Try   HackMD

Linux 核心專題: kHTTPd 改進

執行人: Paintako
GitHub
專題講解影片
期末自我評量

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
提問清單

  • ?

任務簡述

依據 ktcp 的指示,持續改進 sysprog21/khttpd 的程式碼,打造出高效且穩定的網頁伺服器。

TODO: 改進 sysprog21/khttpd 的效率

依據 ktcp 的指示,在 sysprog21/khttpd 的基礎之上,利用 CMWQ 一類的機制,打造出高效且穩定的網頁伺服器,需要處理 lock-free 的資源管理議題 (如 RCU)。

搭配閱讀: 〈RCU Usage In the Linux Kernel: One Decade Later

An application can change the IP options on a per-socket basis by calling sys_setsockopt,
which eventually causes the kernel to call setsockopt.
setsockopt sets the new IP options, then uses call_
rcu to asynchronously free the memory storing the old IP
options. Using call_rcu ensures all threads that might
be manipulating the old options will have released their
reference by exiting the RCU critical section.

引入 Concurrency Managed Workqueue (cmwq),改寫 kHTTPd,分析效能表現和提出改進方案

引入前:

$ ./htstress http://localhost:8081 -t 3 -c 20 -n 200000

requests:      200000
good requests: 200000 [100%]
bad requests:  0 [0%]
socket errors: 0 [0%]
seconds:       2.690
requests/sec:  74344.219

Complete

引入 CMWQ:
參考 kecho 的做法, 在 http_server.h 中引入:

#include <net/sock.h>
#include <linux/workqueue.h>

struct http_server_param {
    struct socket *listen_socket;
};
 
+struct httpd_service {
+    bool is_stopped;
+    struct list_head head;
+};

extern int http_server_daemon(void *arg);

khttp 中,原本是用 kthread_run,但在 kecho 中改用 CMWQ,當有新任務被創建成功時, khttp 是採用 kthread_run 來創建 worker 來處理,但 kecho 是把任務 push 進 CMWQ 中處理

main.c 中加入 workqueue,以便未來處理 request

+struct workqueue_struct *khttpd_wq;
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0)
 static int set_sock_opt(struct socket *sock,
                         int level,
@@ -160,7 +162,9 @@ static int __init khttpd_init(void)
         return err;
     }
     param.listen_socket = listen_socket;
+    khttpd_wq = alloc_workqueue(MODULE_NAME, 0, 0);
     http_server = kthread_run(http_server_daemon, &param, KBUILD_MODNAME);
+    printk(MODULE_NAME ": successfully init khttpd_init \n");
     if (IS_ERR(http_server)) {
         pr_err("can't start http server daemon\n");
         close_listen_socket(listen_socket);
@@ -173,7 +177,10 @@ static void __exit khttpd_exit(void)
 {
     send_sig(SIGTERM, http_server, 1);
     kthread_stop(http_server);
+    printk("kthread closed\n");
     close_listen_socket(listen_socket);
+    destroy_workqueue(khttpd_wq);
+    printk("destory finished\n");
     pr_info("module unloaded\n");
 }

再來在 http_server.h 中增加新的 struct khttpd_service,用一個 bool 紀錄是否要停止以及用一個 list_head 來紀錄 work。

+#include <linux/module.h>
+#include <linux/workqueue.h>
 #include <net/sock.h>
+#define MODULE_NAME "khttpd"
+
 
 struct http_server_param {
     struct socket *listen_socket;
 };
 
+struct khttpd_service {
+    bool is_stopped;
+    struct list_head worker;
+};
+
 extern int http_server_daemon(void *arg);

接下來,新增 create_work 以及 free_work,前者的作用是當 listen 到一個新的 request 時,在 kernal kmalloc 分配一段空間給 http_request 結構體,再把 work 放入至 CMWQ 中。

static struct work_struct *create_work(struct socket *sk)
{
    struct http_request *work;
    if (!(work = kmalloc(sizeof(struct http_request), GFP_KERNEL)))
        return NULL;

    work->socket = sk;

    INIT_WORK(&work->khttpd_work, http_server_worker);

    list_add(&work->list, &daemon.worker);
    printk(MODULE_NAME ": create work successfully");
    return &work->khttpd_work;
}

static void free_work(void)
{
    struct http_request *l, *tar;
    /* cppcheck-suppress uninitvar */
    printk(MODULE_NAME ": ready to free_work");
    list_for_each_entry_safe (tar, l, &daemon.worker, list) {
        kernel_sock_shutdown(tar->socket, SHUT_RDWR);
        flush_work(&tar->khttpd_work);
        sock_release(tar->socket);
        kfree(tar);
    }
    printk(MODULE_NAME ": done_free_work");
}

在前面有提到,新增一個結構體 khttpd_service, 裡面的 bool 是給 http_server_worker 這個函式用的,如果 != is_stopped ,那在 create_work就會透過 INIT_WORK 這個函式來執行,以下是 INIT_WORK 的定義:

#define __INIT_WORK(_work, _func, _onstack)				\
	do {								\
		static struct lock_class_key __key;			\
									\
		__init_work((_work), _onstack);				\
		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
		lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, &__key, 0); \
		INIT_LIST_HEAD(&(_work)->entry);			\
		(_work)->func = (_func);				\
	} while (0)

INIT_WORK 可以綁定要執行的函式 http_server_worker,最後將此新建的 work 加入到服務的 worker 鏈結串列中。

引入後:

./htstress http://localhost:8081 -t 3 -c 20 -n 200000

requests:      200000
good requests: 200000 [100%]
bad requests:  0 [0%]
socket errors: 0 [0%]
seconds:       2.143
requests/sec:  93341.747

使用 dmesg 來檢查 kernal 內的錯誤訊息,當 rmmod 時會出現以下錯誤:

[60230.050609] ------------[ cut here ]------------
[60230.050610] WARNING: CPU: 2 PID: 40721 at kernel/workqueue.c:3066 __flush_work.isra.0+0x234/0x280
[60230.050615] Modules linked in: khttpd(OE-) rfcomm cmac algif_hash algif_skcipher af_alg bnep binfmt_misc nouveau mxm_wmi drm_ttm_helper ttm drm_display_helper snd_hda_codec_realtek cec rc_core intel_rapl_msr intel_rapl_common snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp snd_hda_intel btusb drm_kms_helper joydev btrtl coretemp input_leds i2c_algo_bit snd_intel_dspcfg fb_sys_fops snd_seq_midi btbcm syscopyarea kvm_intel sysfillrect btintel snd_seq_midi_event sysimgblt kvm btmtk nls_iso8859_1 snd_intel_sdw_acpi snd_hda_codec snd_hda_core snd_rawmidi mei_pxp snd_hwdep bluetooth snd_pcm mei_hdcp snd_seq crct10dif_pclmul ecdh_generic ecc snd_seq_device snd_timer ghash_clmulni_intel cmdlinepart spi_nor aesni_intel snd mei_me mac_hid ee1004 soundcore crypto_simd mei mtd cryptd rapl intel_cstate eeepc_wmi wmi_bmof acpi_pad acpi_tad sch_fq_codel msr parport_pc ppdev ramoops reed_solomon lp pstore_blk pstore_zone drm parport
[60230.050655]  efi_pstore ip_tables x_tables autofs4 hid_logitech_hidpp hid_logitech_dj hid_generic usbhid hid mfd_aaeon asus_wmi sparse_keymap platform_profile crc32_pclmul nvme r8169 i2c_i801 intel_lpss_pci spi_intel_pci ahci nvme_core i2c_smbus spi_intel realtek intel_lpss xhci_pci libahci idma64 xhci_pci_renesas wmi video pinctrl_tigerlake [last unloaded: khttpd]
[60230.050671] CPU: 2 PID: 40721 Comm: khttpd Tainted: G      D W  OE     5.19.0-43-generic #44~22.04.1-Ubuntu
[60230.050673] Hardware name: ASUS System Product Name/TUF GAMING B560M-PLUS, BIOS 0820 04/26/2021
[60230.050675] RIP: 0010:__flush_work.isra.0+0x234/0x280
[60230.050677] Code: 8b 54 24 08 41 89 c8 48 c1 e9 04 41 83 e0 08 83 e1 0f 41 83 c8 02 89 c8 49 0f ba 2c 24 03 e9 0b ff ff ff 0f 0b e9 3d ff ff ff <0f> 0b 45 31 ed e9 33 ff ff ff e8 0d 6e e2 00 48 89 de 48 c7 c7 80
[60230.050678] RSP: 0018:ffffac0c89d5fe30 EFLAGS: 00010246
[60230.050680] RAX: 0000000000000000 RBX: ffffffffffffff70 RCX: 0000000000000000
[60230.050681] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff998eae178a60
[60230.050682] RBP: ffffac0c89d5feb8 R08: 00000000時會出錯。00000000 R09: 0000000000000000
[60230.050683] R10: 0000000000000000 R11: 0000000000000000 R12: ffff998eae178a60
[60230.050684] R13: 0000000000000001 R14: ffff998e1ef6b480 R15: ffffffffc0db9a60
[60230.050685] FS:  0000000000000000(0000) GS:ffff998f0ba80000(0000) knlGS:0000000000000000
[60230.050687] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[60230.050688] CR2: 0000262f03eea008 CR3: 0000000048010006 CR4: 00000000007706e0
[60230.050689] PKRU: 55555554
[60230.050690] Call Trace:
[60230.050691]  <TASK>
[60230.050694]  ? http_parser_callback_header_value+0x20/0x20 [khttpd]
[60230.050697]  ? vprintk_default+0x1d/0x30
[60230.050700]  ? _raw_spin_unlock_bh+0x1d/0x30
[60230.050702]  ? release_sock+0x8f/0xb0
[60230.050706]  ? http_parser_callback_header_value+0x20/0x20 [khttpd]
[60230.050707]  flush_work+0xe/0x20
[60230.050709]  http_server_daemon.cold+0x56/0xe2 [khttpd]
[60230.050711]  kthread+0xeb/0x120
[60230.050714]  ? kthread_complete_and_exit+0x20/0x20
[60230.050717]  ret_from_fork+0x1f/0x30
[60230.050720]  </TASK>
[60230.050721] ---[ end trace 0000000000000000 ]---
[60230.050728] BUG: kernel NULL pointer dereference, address: 0000000000000000
[60230.050729] #PF: supervisor read access in kernel mode
[60230.050731] #PF: error_code(0x0000) - not-present page
[60230.050732] PGD 0 P4D 0 
[60230.050733] Oops: 0000 [#2] PREEMPT SMP NOPTI
[60230.050735] CPU: 2 PID: 40721 Comm: khttpd Tainted: G      D W  OE     5.19.0-43-generic #44~22.04.1-Ubuntu
[60230.050737] Hardware name: ASUS System Product Name/TUF GAMING B560M-PLUS, BIOS 0820 04/26/2021
[60230.050738] RIP: 0010:http_server_daemon.cold+0x6a/0xe2 [khttpd]
[60230.050740] Code: 02 00 00 00 e8 29 a8 e2 eb 49 8d bc 24 a0 00 00 00 e8 1c 7a 33 eb 49 8b 3c 24 e8 f3 cc e2 eb 4c 89 e7 49 89 dc e8 88 e4 5e eb <48> 8b 83 90 00 00 00 48 8d 98 70 ff ff ff eb b1 48 c7 c7 a8 c2 db
[60230.050741] RSP: 0018:ffffac0c89d5fed8 EFLAGS: 00010246
[60230.050743] RAX: 0000000000000000 RBX: ffffffffffffff70 RCX: 0000000000000000
[60230.050744] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[60230.050745] RBP: ffffac0c89d5ff08 R08: 0000000000000000 R09: 0000000000000000
[60230.050746] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffffffff70
[60230.050747] R13: 0000000fffffffe0 R14: ffff998e1ef6b480 R15: ffffffffc0db9a60
[60230.050748] FS:  0000000000000000(0000) GS:ffff998f0ba80000(0000) knlGS:0000000000000000
[60230.050749] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[60230.050750] CR2: 0000000000000000 CR3: 0000000048010006 CR4: 00000000007706e0
[60230.050751] PKRU: 55555554
[60230.050752] Call Trace:
[60230.050753]  <TASK>
[60230.050754]  kthread+0xeb/0x120
[60230.050756]  ? kthread_complete_and_exit+0x20/0x20
[60230.050758]  ret_from_fork+0x1f/0x30
[60230.050761]  </TASK>
[60230.050762] Modules linked in: khttpd(OE-) rfcomm cmac algif_hash algif_skcipher af_alg bnep binfmt_misc nouveau mxm_wmi drm_ttm_helper ttm drm_display_helper snd_hda_codec_realtek cec rc_core intel_rapl_msr intel_rapl_common snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp snd_hda_intel btusb drm_kms_helper joydev btrtl coretemp input_leds i2c_algo_bit snd_intel_dspcfg fb_sys_fops snd_seq_midi btbcm syscopyarea kvm_intel sysfillrect btintel snd_seq_midi_event sysimgblt kvm btmtk nls_iso8859_1 snd_intel_sdw_acpi snd_hda_codec snd_hda_core snd_rawmidi mei_pxp snd_hwdep bluetooth snd_pcm mei_hdcp snd_seq crct10dif_pclmul ecdh_generic ecc snd_seq_device snd_timer ghash_clmulni_intel cmdlinepart spi_nor aesni_intel snd mei_me mac_hid ee1004 soundcore crypto_simd mei mtd cryptd rapl intel_cstate eeepc_wmi wmi_bmof acpi_pad acpi_tad sch_fq_codel msr parport_pc ppdev ramoops reed_solomon lp pstore_blk pstore_zone drm parport
[60230.050793]  efi_pstore ip_tables x_tables autofs4 hid_logitech_hidpp hid_logitech_dj hid_generic usbhid hid mfd_aaeon asus_wmi sparse_keymap platform_profile crc32_pclmul nvme r8169 i2c_i801 intel_lpss_pci spi_intel_pci ahci nvme_core i2c_smbus spi_intel realtek intel_lpss xhci_pci libahci idma64 xhci_pci_renesas wmi video pinctrl_tigerlake [last unloaded: khttpd]
[60230.050807] CR2: 0000000000000000
[60230.050809] ---[ end trace 0000000000000000 ]---
[60230.308922] RIP: 0010:http_parser_callback_message_complete+0xa9/0xb2 [khttpd]
[60230.308929] Code: 0c 48 c7 c7 a0 b2 db c0 e8 5e aa 0d ec 48 8b 3b 4c 89 e6 e8 ab fe ff ff 31 c0 c7 83 8c 00 00 00 01 00 00 00 5b 41 5c 5d 31 d2 <89> d6 89 d7 c3 cc cc cc cc 48 c7 c7 58 c2 db c0 e8 62 34 0a ec 48
[60230.308931] RSP: 0018:ffffac0c8801fed8 EFLAGS: 00010246
[60230.308933] RAX: 0000000000000000 RBX: ffffffffffffff70 RCX: 0000000000000000
[60230.308934] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[60230.308935] RBP: ffffac0c8801ff08 R08: 0000000000000000 R09: 0000000000000000
[60230.308936] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffffffff70
[60230.308937] R13: 0000000fffffffe0 R14: ffff998e38b459a0 R15: ffffffffc0db9a60
[60230.308938] FS:  0000000000000000(0000) GS:ffff998f0ba80000(0000) knlGS:0000000000000000
[60230.308939] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[60230.308941] CR2: 0000000000000000 CR3: 00000001d7e6a001 CR4: 00000000007706e0
[60230.308942] PKRU: 55555554
[60230.309007] kthread closed
[60230.309020] destory finished
[60230.309021] khttpd: module unloaded

TODO: 解決以上 dereference null pointer 錯誤

提供目錄檔案存取功能,提供基本的 directory listing 功能

首先,在 struct http_request 中加入新成員 struct dir_context

@@ -42,6 +42,7 @@ struct http_request {
     int complete;
     struct list_head list;
     struct work_struct khttpd_work;
+    struct dir_context dir_context; // struct dir_context, defines in fs.h 
 };

加入讀取現行目錄的檔案名稱的函式,如下

static bool handle_directory(struct http_request *request)
{
    struct file *fp;
    char buf[SEND_BUFFER_SIZE] = {0}; 

    request->dir_context.actor = tracedir;
    if (request->method != HTTP_GET) {
        snprintf(buf, SEND_BUFFER_SIZE,
                 "HTTP/1.1 501 Not Implemented\r\n%s%s%s%s",
                 "Content-Type: text/plain\r\n", "Content-Length: 19\r\n",
                 "Connection: Close\r\n", "501 Not Implemented\r\n");
        http_server_send(request->socket, buf, strlen(buf));
        return false;
    }

    snprintf(buf, SEND_BUFFER_SIZE, "HTTP/1.1 200 OK\r\n%s%s%s",
             "Connection: Keep-Alive\r\n", "Content-Type: text/html\r\n",
             "Keep-Alive: timeout=5, max=1000\r\n\r\n");
    http_server_send(request->socket, buf, strlen(buf));


    snprintf(buf, SEND_BUFFER_SIZE, "%s%s%s%s", "<html><head><style>\r\n",
             "body{font-family: monospace; font-size: 15px;}\r\n",
             "td {padding: 1.5px 6px;}\r\n",
             "</style></head><body><table>\r\n");
    http_server_send(request->socket, buf, strlen(buf));

    fp = filp_open("/home/paintako/linux2023/khttpd", O_RDONLY | O_DIRECTORY, 0);
    if (IS_ERR(fp)) {
        pr_info("Open file failed");osts
        return false;
    }

    iterate_dir(fp, &request->dir_context);
    snprintf(buf, SEND_BUFFER_SIZE, "</table></body></html>\r\n");
    http_server_send(request->socket, buf, strlen(buf));
    filp_close(fp, NULL);
    return true;
}

可以查閱 include/linux/fs.h 以得知以下結構體的定義。

其中, struct filestruct dir_contextiterate_dir funtion 都定義在標頭檔 fs.h 中。

關於 dir_context 結構體, fs.h 中有下列說明


/*
 * This is the "filldir" function type, used by readdir() to let
 * the kernel specify what kind of dirent layout it wants to have.
 * This allows the kernel to read directories into kernel space or
 * to have different dirent layouts depending on the binary type.
 * Return 'true' to keep going and 'false' if there are no more entries.
 */
struct dir_context;
typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
			 unsigned);

struct dir_context {
	filldir_t actor;
	loff_t pos;
};

加入自定的 filldir_t 函式,讓 dir_contentfunction pointer 指向它, 這邊要用這種方式的原因是因為 iterate_dir function 的定義是必須傳入一個 pointer to dir_content

// callback for 'iterate_dir', trace entry.
static int tracedir(struct dir_context *dir_context,
                    const char *name,
                    int namelen,
                    loff_t offset,
                    u64 ino,
                    unsigned int d_type)
{
    if (strcmp(name, ".") && strcmp(name, "..")) {
        struct http_request *request =
            container_of(dir_context, struct http_request, dir_context);
        char buf[SEND_BUFFER_SIZE] = {0};

        snprintf(buf, SEND_BUFFER_SIZE,
                 "<tr><td><a href=\"%s\">%s</a></td></tr>\r\n", name, name);
        http_server_send(request->socket, buf, strlen(buf));
    }
    return 0;
}

Callback function:
為了解決當一遇到函式需要等待,但其他函式又與該等待的函式有關連時,就會使用CallBack Function的時機點來處理。
i.e. 確保程式不會因為 timer 之類的影響執行的順序

http_server_response 做修改,只要把 request pass handle_director 就可以了

但這樣做,會出現錯誤,當 insmod 後,會馬上出錯並且馬上 unload module, 但奇怪的是存取 localhost 依然可以取得目錄 content,以下是報錯訊息:

至此,可以顯示 filp_open 開啟的的目錄結構,但只要路徑不對就會出錯,缺乏彈性。

filp_open, open file in kernal)

/**
 * filp_open - open file and return file pointer
 *
 * @filename:	path to open
 * @flags:	open flags as per the open(2) second argument
 * @mode:	mode for the new file if O_CREAT is set, else ignored
 *
 * This is the helper to open a file from kernelspace if you really
 * have to.  But in generally you should not do this, so please move
 * along, nothing to see here..
 */
struct file *filp_open(const char *filename, int flags, umode_t mode)
{
	struct filename *name = getname_kernel(filename);
	struct file *file = ERR_CAST(name);
	
	if (!IS_ERR(name)) {
		file = file_open_name(name, flags, mode);
		putname(name);
	}
	return file;
}

Note:
在一般 user space 時,想要引用外部函式時,使用 #include 就可以引入,但在 keranl 中想要引入外部函式只能使用 EXPORT_SYMBOL,否則在編譯時會出錯。

掛載模組時,指定要開啟的路徑,使用巨集 module_param_string 新增參數。

#define PATH_SIZE   100
static char WWWROOT[PATH_SIZE] = {0};
module_param_string(WWWROOT, WWWROOT, PATH_SIZE, 0);

module_param_string 可以指定陣列大小(PATH_SIZE),這樣做的好處是可以設定模組變數的預設值。

以下是更改紀錄:

  • http_server.h
 struct khttpd_service {
     bool is_stopped;
+    char *dir_path; // dir_path is used to record the path passed by the user for future use when using `insmod`.
     struct list_head worker;
 };

  • main.c
 #define DEFAULT_PORT 8081
 #define DEFAULT_BACKLOG 100
+#define PATH_SIZE 100
+
+static char WWWROOT[PATH_SIZE] = {0};
+module_param_string(WWWROOT,WWWROOT,PATH_SIZE,0);
+
+extern struct khttpd_service daemon;
 
 static ushort port = DEFAULT_PORT;
 module_param(port, ushort, S_IRUGO);
@@ -156,6 +162,10 @@ static void close_listen_socket(struct socket *socket)
 
 static int __init khttpd_init(void)
 {
+    if (!*WWWROOT) // prevent empty input from user
+        WWWROOT[0] = '/';
+    daemon.dir_path = WWWROOT;
+    

這裡使用 extern 的理由是因為 khttpd_service 宣告在 http_server.c 當中,成員當中的 is_stopped 用來告訴 CMWQ 是否要繼續執行。

  • http_server.c 更改寫死的路徑成 WWWROOT
--- a/http_server.c
+++ b/http_server.c
@@ -126,7 +126,7 @@ static bool handle_directory(struct http_request *request)
              "</style></head><body><table>\r\n");
     http_server_send(request->socket, buf, strlen(buf));
 
-    fp = filp_open("/home/paintako/", O_RDONLY | O_DIRECTORY, 0);
+    fp = filp_open(daemon.dir_path, O_RDONLY | O_DIRECTORY, 0);
     if (IS_ERR(fp)) {

結果是: 編譯成功,但是需要特別注意使用者輸入必須符合規範,或者針對使用者輸入做後處理,不然容易出現 invalid for parameter WWWROOT,如下

[64544.354650] khttpd: loading out-of-tree module taints kernel.
[64544.354683] khttpd: module verification failed: signature and/or required key missing - tainting kernel
[64544.354973] khttpd: `' invalid for parameter `WWWROOT'
[64544.354976] khttpd: unknown parameter '=' ignored
[64544.354977] khttpd: unknown parameter '/home/paintako' ignored
[64566.309479] khttpd: `' invalid for parameter `WWWROOT'
[64566.309483] khttpd: unknown parameter '="/home/paintako"' ignored

$ sudo insmod khttpd.ko WWWROOT='/home/paintako/linux2023'

讀取檔案內容

添加函式來讀取檔案內容,在 kernal space 中開啟檔案使用 filp_open 函式,並且通過前面添加的 WWWROOT 加上 URL 來指定開啟檔案的路徑,並且利用 include/uapi/linux/stat.h 當中定義的 macro 來判斷 filp_open 的類型

#define S_ISLNK(m)	(((m) & S_IFMT) == S_IFLNK)
#define S_ISREG(m)	(((m) & S_IFMT) == S_IFREG)
#define S_ISDIR(m)	(((m) & S_IFMT) == S_IFDIR)
#define S_ISCHR(m)	(((m) & S_IFMT) == S_IFCHR)
#define S_ISBLK(m)	(((m) & S_IFMT) == S_IFBLK)
#define S_ISFIFO(m)	(((m) & S_IFMT) == S_IFIFO)
#define S_ISSOCK(m)	(((m) & S_IFMT) == S_IFSOCK)

新增函式 catstr,顧名思義將兩個路徑串接起來,使用一個 buffer 來紀錄當前的路徑 pwd ,將 pwdurl 串接起來。

// concatenate string
static void catstr(char *res, char *first, char *second)
{
    int first_size = strlen(first);
    int second_size = strlen(second);
    memset(res, 0, BUFFER_SIZE);
    memcpy(res, first, first_size);
    memcpy(res + first_size, second, second_size);
}

修改 tracedir

struct http_request *request =
    container_of(dir_context, struct http_request, dir_context);
+char buf[SEND_BUFFER_SIZE] = {0};
+char *url = !strcmp(request->request_url, "/") ? "" : request->request_url;

修改 hadle_directory,判斷現在開啟的是目錄還是檔案

   if (S_ISDIR(fp->f_inode->i_mode)) {
        char buf[SEND_BUFFER_SIZE] = {0};
        snprintf(buf, SEND_BUFFER_SIZE, "HTTP/1.1 200 OK\r\n%s%s%s",
                 "Connection: Keep-Alive\r\n", "Content-Type: text/html\r\n",
                 "Keep-Alive: timeout=5, max=1000\r\n\r\n");
        http_server_send(request->socket, buf, strlen(buf));

        snprintf(buf, SEND_BUFFER_SIZE, "%s%s%s%s", "<html><head><style>\r\n",
                 "body{font-family: monospace; font-size: 15px;}\r\n",
                 "td {padding: 1.5px 6px;}\r\n",
                 "</style></head><body><table>\r\n");
        http_server_send(request->socket, buf, strlen(buf));

        iterate_dir(fp, &request->dir_context);

        snprintf(buf, SEND_BUFFER_SIZE, "</table></body></html>\r\n");
        http_server_send(request->socket, buf, strlen(buf));

    } else if (S_ISREG(fp->f_inode->i_mode)) {
        char *read_data = kmalloc(fp->f_inode->i_size, GFP_KERNEL);
        int ret = read_file(fp, read_data);

        send_http_header(request->socket, HTTP_STATUS_OK,
                         http_status_str(HTTP_STATUS_OK), "text/plain",
                         fp->f_inode->i_size, "close");
        http_server_send(request->socket, read_data, ret);

        kfree(read_data);

至此,可以在網頁上看到給定目錄的內容以及檔案內容。

測試 request 效率,因為有 open file 帶來的額外 I/O 開銷,所以 throughput 變少。

使用 Chunked transfer encoding 送出目錄資料

HTTP header 是是在請求(request)或回應(response)行(一條訊息的第一行內容)之後傳輸的。協定頭的欄位是以明文的字串格式傳輸,是以冒號分隔的鍵名與鍵值對,以 Enter (CR) 加換行 (LF) 符號序列結尾。

使用 telnet 發送 GET 時,可以看到以下內容:

$ telnet localhost 8081

Trying 127.0.0.1...
Connected to localhost.
Escape character is '^]'.
GET / HTTP/1.1

HTTP/1.1 200 OK
Connection: Keep-Alive
Content-Type: text/html
Keep-Alive: timeout=5, max=1000

<html><head><style>
body{font-family: monospace; font-size: 15px;}
td {padding: 1.5px 6px;}
</style></head><body><table>
<tr><td><a href="/snap">snap</a></td></tr>
<tr><td><a href="/.gitconfig">.gitconfig</a></td></tr>
<tr><td><a href="/web">web</a></td></tr>
<tr><td><a href="/riscv">riscv</a></td></tr>
<tr><td><a href="/linux2023">linux2023</a></td></tr>
<tr><td><a href="/CO">CO</a></td></tr>
<tr><td><a href="/.cache">.cache</a></td></tr>
<tr><td><a href="/Music">Music</a></td></tr>
<tr><td><a href="/Downloads">Downloads</a></td></tr>
<tr><td><a href="/.viminfo">.viminfo</a></td></tr>
<tr><td><a href="/.bash_logout">.bash_logout</a></td></tr>
<tr><td><a href="/Templates">Templates</a></td></tr>
<tr><td><a href="/F100">F100</a></td></tr>
<tr><td><a href="/Desktop">Desktop</a></td></tr>
<tr><td><a href="/.ssh">.ssh</a></td></tr>
<tr><td><a href="/.config">.config</a></td></tr>
<tr><td><a href="/.profile">.profile</a></td></tr>
<tr><td><a href="/.pki">.pki</a></td></tr>
<tr><td><a href="/.sudo_as_admin_successful">.sudo_as_admin_successful</a></td></tr>
<tr><td><a href="/.vim">.vim</a></td></tr>
<tr><td><a href="/.wget-hsts">.wget-hsts</a></td></tr>
<tr><td><a href="/csapp">csapp</a></td></tr>
<tr><td><a href="/.vscode">.vscode</a></td></tr>
<tr><td><a href="/Videos">Videos</a></td></tr>
<tr><td><a href="/.debug">.debug</a></td></tr>
<tr><td><a href="/.python_history">.python_history</a></td></tr>
<tr><td><a href="/Documents">Documents</a></td></tr>
<tr><td><a href="/.gnome">.gnome</a></td></tr>
<tr><td><a href="/.bashrc">.bashrc</a></td></tr>
<tr><td><a href="/.gnupg">.gnupg</a></td></tr>
<tr><td><a href="/.local">.local</a></td></tr>
<tr><td><a href="/Pictures">Pictures</a></td></tr>
<tr><td><a href="/Public">Public</a></td></tr>
<tr><td><a href="/.lesshst">.lesshst</a></td></tr>y
<tr><td><a href="/.chewing">.chewing</a></td></tr>
</table></body></html>
Connection closed by foreign host.

返回的資訊被 \r\n 隔開,上面是 HTTP header 的內容,下面是 Payload,也就是 HTML 內容

原本的實作中,每次傳送 header 都要傳入 Content-Length,而使用 Chunked encoding 就可以不用額外發送 Content-Length,此方法的好處是當大量資料要傳給 client 時可以不用等整個 response 處理完畢才知道大小。

Note:
macro 中使用 ... (可變數量參數) 可使 macro 接受不同數量的參數

根據 C11 規格書(6.10.3.1 節),對於可變數量參數的處理方式如下所述:

The identifier __VA_ARGS__ shall occur only in the replacement list of a function-like macro that uses the ellipsis notation in the parameters.

新增 macro SEND_HTTP_MSG

#define SEND_HTTP_MSG(socket, buf, format, ...)           \
    snprintf(buf, SEND_BUFFER_SIZE, format, __VA_ARGS__); \
    http_server_send(socket, buf, strlen(buf))

透過以上 macro 來減少重複的程式碼。

改寫後:

$ telnet localhost 8081

Trying 127.0.0.1...
Connected to localhost.
Escape character is '^]'.
GET / HTTP/1.1

HTTP/1.1 200 OK
Content-Type: text/html
Transfer-Encoding: chunked

7B
<html><head><style>
body{font-family: monospace; font-size: 15px;}
td {padding: 1.5px 6px;}
</style></head><body><table>
2e
<tr><td><a href="/others">others</a></td></tr>
2a
<tr><td><a href="/perf">perf</a></td></tr>
3c
<tr><td><a href="/c_syntax_test">c_syntax_test</a></td></tr>
2c
<tr><td><a href="/kecho">kecho</a></td></tr>
2e
<tr><td><a href="/khttpd">khttpd</a></td></tr>
32
<tr><td><a href="/tiny_web">tiny_web</a></td></tr>
30
<tr><td><a href="/example">example</a></td></tr>
3c
<tr><td><a href="/kernal_module">kernal_module</a></td></tr>
2e
<tr><td><a href="/origin">origin</a></td></tr>
16
</table></body></html>
0
 static void catstr(char *res, char *first, char *second)
 {
@@ -113,10 +93,9 @@ static int tracedir(struct dir_context *dir_context,
         char *url =
             !strcmp(request->request_url, "/") ? "" : request->request_url;
 
-        snprintf(buf, SEND_BUFFER_SIZE,
-                 "<tr><td><a href=\"%s/%s\">%s</a></td></tr>\r\n", url, name,
-                 name);
-        http_server_send(request->socket, buf, strlen(buf));
+        SEND_HTTP_MSG(request->socket, buf,
+                      "%lx\r\n<tr><td><a href=\"%s/%s\">%s</a></td></tr>\r\n",
+                      34 + strlen(url) + (namelen << 1), url, name, name);
     }
     return 0;
 }
@@ -125,13 +104,13 @@ static bool handle_directory(struct http_request *request)
 {
     struct file *fp;
     char pwd[BUFFER_SIZE] = {0};
-
+    char buf[SEND_BUFFER_SIZE] = {0};
     request->dir_context.actor = tracedir;
     if (request->method != HTTP_GET) {
-        send_http_header(request->socket, HTTP_STATUS_NOT_IMPLEMENTED,
-                         http_status_str(HTTP_STATUS_NOT_IMPLEMENTED),
-                         "text/plain", 19, "close");
-        send_http_content(request->socket, "501 Not Implemented");
+        SEND_HTTP_MSG(request->socket, buf, "%s%s%s%s%s",
+                      "HTTP/1.1 501 Not Implemented\r\n",
+                      "Content-Type: text/plain\r\n", "Content-Length: 19\r\n",
+                      "Connection: Close\r\n\r\n", "501 Not Implemented");
         return false;
     }
@@ -139,40 +118,39 @@ static bool handle_directory(struct http_request *request)
     fp = filp_open(pwd, O_RDONLY, 0);
 
     if (IS_ERR(fp)) {
-        send_http_header(request->socket, HTTP_STATUS_NOT_FOUND,
-                         http_status_str(HTTP_STATUS_NOT_FOUND), "text/plain",
-                         14, "close");
-        send_http_content(request->socket, "404 Not Found");
+        SEND_HTTP_MSG(request->socket, buf, "%s%s%s%s%s",
+                      "HTTP/1.1 404 Not Found\r\n",
+                      "Content-Type: text/plain\r\n", "Content-Length: 13\r\n",
+                      "Connection: Close\r\n\r\n", "404 Not Found");
         kernel_sock_shutdown(request->socket, SHUT_RDWR);
         return false;
     }
 
     if (S_ISDIR(fp->f_inode->i_mode)) {
         char buf[SEND_BUFFER_SIZE] = {0};
-        snprintf(buf, SEND_BUFFER_SIZE, "HTTP/1.1 200 OK\r\n%s%s%s",
-                 "Connection: Keep-Alive\r\n", "Content-Type: text/html\r\n",
-                 "Keep-Alive: timeout=5, max=1000\r\n\r\n");
-        http_server_send(request->socket, buf, strlen(buf));
-
-        snprintf(buf, SEND_BUFFER_SIZE, "%s%s%s%s", "<html><head><style>\r\n",
-                 "body{font-family: monospace; font-size: 15px;}\r\n",
-                 "td {padding: 1.5px 6px;}\r\n",
-                 "</style></head><body><table>\r\n");
-        http_server_send(request->socket, buf, strlen(buf));
+        SEND_HTTP_MSG(request->socket, buf, "%s%s%s", "HTTP/1.1 200 OK\r\n",
+                      "Content-Type: text/html\r\n",
+                      "Transfer-Encoding: chunked\r\n\r\n");
+        SEND_HTTP_MSG(
+            request->socket, buf, "7B\r\n%s%s%s%s", "<html><head><style>\r\n",
+            "body{font-family: monospace; font-size: 15px;}\r\n",
+            "td {padding: 1.5px 6px;}\r\n", "</style></head><body><table>\r\n");
 
         iterate_dir(fp, &request->dir_context);
 
-        snprintf(buf, SEND_BUFFER_SIZE, "</table></body></html>\r\n");
-        http_server_send(request->socket, buf, strlen(buf));
+        SEND_HTTP_MSG(request->socket, buf, "%s",
+                      "16\r\n</table></body></html>\r\n");
+        SEND_HTTP_MSG(request->socket, buf, "%s", "0\r\n\r\n");
 
     } else if (S_ISREG(fp->f_inode->i_mode)) {
         char *read_data = kmalloc(fp->f_inode->i_size, GFP_KERNEL);
         int ret = read_file(fp, read_data);
 
-        send_http_header(request->socket, HTTP_STATUS_OK,
-                         http_status_str(HTTP_STATUS_OK), "text/plain",
-                         fp->f_inode->i_size, "close");
-        http_server_send(request->socket, read_data, ret);
+        SEND_HTTP_MSG(request->socket, buf, "%s%s%s%d%s", "HTTP/1.1 200 OK\r\n",
+                      "Content-Type: text/plain\r\n", "Content-Length: ", ret,
+                      "\r\nConnection: Close\r\n\r\n");
+        http_server_send(request->socket, read_data, strlen(read_data));
+        ;
 
         kfree(read_data);
     }

效能:

requests:      100000
good requests: 100000 [100%]
bad requests:  0 [0%]
socket errors: 0 [0%]
seconds:       8.011
requests/sec:  12482.545

MIME 型態

前面有提到 Content type 是用來表示資源的 media type,而 MIME 可以標示傳送的資料型態,對於後端來說是可以增加資料正確性的手段。

MIME 是由主要型態(type)、次要型態 (subtype)、參數組成 Type 是廣義分類, subtype 則是資料精確型態,MIME 永遠都有主要型態和次要型態,而後面可以加上參數提供更多細節,如下: type/subtype;parameter=value

在原本的實作中,只會顯示文字檔,只要遇到非文字檔如 .jpg 就無法正確的顯示,所以需要透過提供 MIME 使此類非文字檔正常顯示。

增加標頭檔 mime_type.h ,在此標頭檔中定義不同類型的 MIME,

在增加 MIME 後,在瀏覽器中檢查 http header 可以顯示不同類型的檔案類型。

// return mime type string
const char *get_mime_str(char *request_url)
{
    char *request_type = strchr(request_url, '.');
    pr_info("%s , %s\n",request_url, request_type);
    int index = 0;
    if (!request_type)
        return "text/plain";

    while (mime_types[index].type) {
        if (!strcmp(mime_types[index].type, request_type))
            return mime_types[index].string;
        index++;
    }
    return "text/plain";
}

此函式用來區分檔案名稱及副檔名,再來去定義好的數組中比對是否存在該檔名,若存在就返回對應的 MIME type 。

TODO:
上面的方式有待改進,因為每次收到 request 後都進行 linear search,即便比對數量不多,但勢必會影響效能,可改用 hash table 的方式來改善搜尋速度。

HTTP keep alive

HTTP 採用 請求 request-回應 response 模式,非 KeepAlive 模式時,每個請求/應答客戶和伺服器都要新建一個連接,完成 之後立即斷開連接(HTTP 爲 stateless 的通訊協定);當使用 Keep-Alive 模式(又稱持久連接、連接重用)時, Keep-Alive 功能使客戶端到伺服器端的連接持續有效,當出現對伺服器的後繼請求時, Keep-Alive 功能避免了建立或者重新建立連接。

使用 timer 主動關閉連線

保持閒置連線的優點是:

Note:
關閉連線時可以採用 graceful shutdown,避免尚有資料在傳輸給某個 client 時就馬上被關閉。

TODO: 檢視 I/O 模型並尋求效能改進空間

以 ftrace 檢視 kHTTPd 運作的細節,檢視 I/O 模型並尋求效能改進空間

確認可用被追蹤的函式

先確認目前的系統是否有 ftrace

$ cat /boot/config-`uname -r` | grep CONFIG_HAVE_FUNCTION_TRACER
CONFIG_HAVE_FUNCTION_TRACER=y

確認系統內有 ftrace 後,找出 khttpd 內所有可以被追蹤的函式

$ sudo cat /sys/kernel/debug/tracing/available_filter_functions | grep khttpd

parse_url_char.part.0 [khttpd]
http_message_needs_eof [khttpd]
http_should_keep_alive [khttpd]
http_parser_execute [khttpd]
http_method_str [khttpd]
http_status_str [khttpd]
http_parser_init [khttpd]
http_parser_settings_init [khttpd]
http_errno_name [khttpd]
http_errno_description [khttpd]
http_parser_url_init [khttpd]
http_parser_parse_url [khttpd]
http_parser_pause [khttpd]
http_body_is_final [khttpd]
http_parser_version [khttpd]
http_parser_set_max_header_size [khttpd]
http_parser_callback_header_field [khttpd]
http_parser_callback_headers_complete [khttpd]
http_parser_callback_message_begin [khttpd]
http_parser_callback_request_url [khttpd]
http_parser_callback_body [khttpd]
http_server_recv.constprop.0 [khttpd]
http_server_worker [khttpd]
http_server_send.isra.0 [khttpd]
tracedir.part.0 [khttpd]
tracedir [khttpd]
http_parser_callback_header_value [khttpd]
http_server_daemon [khttpd]
get_mime_str [khttpd]
handle_directory [khttpd]
http_parser_callback_message_complete [khttpd]

確認後,參考 鳥哥私房菜,並且參考 《Demystifying the Linux CPU Scheduler》第六章 撰寫 shell script

#!/bin/bash
TRACE_DIR=/sys/kernel/debug/tracing

# clear
echo 0 > $TRACE_DIR/tracing_on
echo > $TRACE_DIR/set_graph_function
echo > $TRACE_DIR/set_ftrace_filter
echo nop > $TRACE_DIR/current_tracer

# setting
echo function_graph > $TRACE_DIR/current_tracer
echo 3 > $TRACE_DIR/max_graph_depth
echo http_server_worker > $TRACE_DIR/set_graph_function

# execute
echo 1 > $TRACE_DIR/tracing_on
./htstress localhost:8081 -n 2000
echo 0 > $TRACE_DIR/tracing_on

echo function_graph > $TRACE_DIR/current_tracer 是把 function_graph 設成當前的 tracer (其他 tracer 可參考 Ftrace)

echo http_server_worker > $TRACE_DIR/set_graph_function 是把 http_server_worker 設成要 trace 的函式。

$ sudo cat /sys/kernel/debug/tracing/trace > ./output.txt

節錄部份結果:

# CPU  DURATION                  FUNCTION CALLS
# |     |   |                     |   |   |   |
 11)   4.756 us    |          filp_open();
 11) + 15.848 us   |          http_server_send.isra.0 [khttpd]();
 11) + 13.042 us   |          http_server_send.isra.0 [khttpd]();
 11) ! 431.909 us  |          iterate_dir();
 11)   8.933 us    |          http_server_send.isra.0 [khttpd]();
 11) + 10.592 us   |          http_server_send.isra.0 [khttpd]();
 11)   8.016 us    |          kernel_sock_shutdown();
 11)   0.469 us    |          filp_close();
 11) ! 495.127 us  |        }
 11) ! 498.186 us  |      }
 11) ! 500.548 us  |    }
 11)   0.081 us    |    http_should_keep_alive [khttpd]();

可以看到花的大部分時間都是在 iterate_dir 函式中

改進 iterate_dir 函式

參考 Jerejere0808 以及 terry23304,為了避免請求時都要執行一次 iterate_dir(),可以將 response 給快取下來,如果下次有同樣的請求時可以避免在執行一次 iterate_dir(),並加入 RCU 同步機制,以支援多個端同時讀取存取快取的需求(允許多個 reader (port) 在單一 writer (cache) 更新資料的同時,在不需要 lock 的前提正確讀取資料)。

在 Linux 核心中,RCU 用來保護以下場景:

  • 指標
  • linked list
  • hlist (hash list)

實作 content cache

使用 hashtable 來實作 content cache

增加 content_cache.h 來實作 content cache

+ struct content_cache_entry {
+     const char *request_url;
+     const char *response;
+     struct hlist_node node;
+     spinlock_t lock;
+ };

content_cache.c 中實作 content_cache 的功能

DEFINE_READ_MOSTLY_HASHTABLE(content_cache_table, 8); // delcare a hashtable which has 8 entries.

在收到新的 request 後,會先檢查該 request 存在於 hashtable 中,如果存在那就返回,反之插入 hashtable 當中


void insert_content_cache(char *request_url, char *cache_buffer)
{
    struct content_cache_entry *entry =
        kmalloc(sizeof(struct content_cache_entry), GFP_KERNEL);
    if (!entry)
        return;

    entry->request_url = request_url;
    entry->response = cache_buffer;

    spin_lock_init(&entry->lock);
    spin_lock(&entry->lock);
    hash_add(content_cache_table, &entry->node,
             request_hash(request_url));  // lock on while accessing hash table
    spin_unlock(&entry->lock);
}

需要注意的是當插入 hashtable 時,需要保證資料的正確性,避免 reader 讀取錯誤資料,故在存取該結構體時,需要使用到結構體內的成員 lock

定義好 hashtable 後,之後在存取之前都會先檢查該 request 的 url 是否存在裡面,如果有便可以把 cache 的回應返回,反之如果不存在就插入 table 內。

加入 content cache 後,重新測試效能,由於每一測試皆是訪問同一個 url,故改善了 requests/sec ,並且隨著 WWWROOT 的改變,由於要讀取的 file 數目不同,request 也會有所改變

$ ./htstress localhost:8081 -n 20000
0 requests
2000 requests
4000 requests
6000 requests
8000 requests
10000 requests
12000 requests
14000 requests
16000 requests
18000 requests

requests:      20000
good requests: 20000 [100%]
bad requests:  0 [0%]
socket errors: 0 [0%]
seconds:       0.852
requests/sec:  23482.116

request/sec 由 12482.545 上升到了 23482.116 。