Linux 核心專題: kHTTPd 改進

# Linux 核心專題: kHTTPd 改進 > 執行人: Paintako > [GitHub](https://github.com/Paintako/khttpd) > [專題講解影片](https://youtu.be/9pTO70MxsN8) > [期末自我評量](http://wiki.csie.ncku.edu.tw/User/Paintako?revision=8318d343b86e26a5d2c99a6a54d51ecd834309df#disqus_thread) :::success :question: 提問清單 * ? ::: ## 任務簡述依據 [ktcp](https://hackmd.io/@sysprog/linux2023-ktcp) 的指示，持續改進 [sysprog21/khttpd](https://github.com/sysprog21/khttpd) 的程式碼，打造出高效且穩定的網頁伺服器。 ## TODO: 改進 [sysprog21/khttpd](https://github.com/sysprog21/khttpd) 的效率依據 [ktcp](https://hackmd.io/@sysprog/linux2023-ktcp) 的指示，在 [sysprog21/khttpd](https://github.com/sysprog21/khttpd) 的基礎之上，利用 CMWQ 一類的機制，打造出高效且穩定的網頁伺服器，需要處理 lock-free 的資源管理議題 (如 RCU)。搭配閱讀: 〈[RCU Usage In the Linux Kernel: One Decade Later](https://pdos.csail.mit.edu/6.828/2020/readings/rcu-decade-later.pdf)〉 > An application can change the IP options on a per-socket basis by calling sys_setsockopt, which eventually causes the kernel to call setsockopt. setsockopt sets the new IP options, then uses call_ rcu to asynchronously free the memory storing the old IP options. Using `call_rcu` ensures all threads that might be manipulating the old options will have released their reference by exiting the RCU critical section. ### 引入 Concurrency Managed Workqueue (cmwq)，改寫 kHTTPd，分析效能表現和提出改進方案引入前: ```shell $ ./htstress http://localhost:8081 -t 3 -c 20 -n 200000 requests: 200000 good requests: 200000 [100%] bad requests: 0 [0%] socket errors: 0 [0%] seconds: 2.690 requests/sec: 74344.219 Complete ``` 引入 CMWQ: 參考 `kecho` 的做法，在 `http_server.h` 中引入： ```diff #include <net/sock.h> #include <linux/workqueue.h> struct http_server_param { struct socket *listen_socket; }; +struct httpd_service { + bool is_stopped; + struct list_head head; +}; extern int http_server_daemon(void *arg); ``` 在 `khttp` 中，原本是用 `kthread_run`，但在 `kecho` 中改用 `CMWQ`，當有新任務被創建成功時， `khttp` 是採用 `kthread_run` 來創建 worker 來處理，但 `kecho` 是把任務 push 進 CMWQ 中處理在 `main.c` 中加入 `workqueue`，以便未來處理 request ```diff +struct workqueue_struct *khttpd_wq; + #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) static int set_sock_opt(struct socket *sock, int level, @@ -160,7 +162,9 @@ static int __init khttpd_init(void) return err; } param.listen_socket = listen_socket; + khttpd_wq = alloc_workqueue(MODULE_NAME, 0, 0); http_server = kthread_run(http_server_daemon, &param, KBUILD_MODNAME); + printk(MODULE_NAME ": successfully init khttpd_init \n"); if (IS_ERR(http_server)) { pr_err("can't start http server daemon\n"); close_listen_socket(listen_socket); @@ -173,7 +177,10 @@ static void __exit khttpd_exit(void) { send_sig(SIGTERM, http_server, 1); kthread_stop(http_server); + printk("kthread closed\n"); close_listen_socket(listen_socket); + destroy_workqueue(khttpd_wq); + printk("destory finished\n"); pr_info("module unloaded\n"); } ``` 再來在 `http_server.h` 中增加新的 struct `khttpd_service`，用一個 bool 紀錄是否要停止以及用一個 `list_head` 來紀錄 work。 ```diff +#include <linux/module.h> +#include <linux/workqueue.h> #include <net/sock.h> +#define MODULE_NAME "khttpd" + struct http_server_param { struct socket *listen_socket; }; +struct khttpd_service { + bool is_stopped; + struct list_head worker; +}; + extern int http_server_daemon(void *arg); ``` 接下來，新增 `create_work` 以及 `free_work`，前者的作用是當 listen 到一個新的 request 時，在 kernal kmalloc 分配一段空間給 `http_request` 結構體，再把 work 放入至 CMWQ 中。 ```c static struct work_struct *create_work(struct socket *sk) { struct http_request *work; if (!(work = kmalloc(sizeof(struct http_request), GFP_KERNEL))) return NULL; work->socket = sk; INIT_WORK(&work->khttpd_work, http_server_worker); list_add(&work->list, &daemon.worker); printk(MODULE_NAME ": create work successfully"); return &work->khttpd_work; } static void free_work(void) { struct http_request *l, *tar; /* cppcheck-suppress uninitvar */ printk(MODULE_NAME ": ready to free_work"); list_for_each_entry_safe (tar, l, &daemon.worker, list) { kernel_sock_shutdown(tar->socket, SHUT_RDWR); flush_work(&tar->khttpd_work); sock_release(tar->socket); kfree(tar); } printk(MODULE_NAME ": done_free_work"); } ``` 在前面有提到，新增一個結構體 `khttpd_service`，裡面的 bool 是給 `http_server_worker` 這個函式用的，如果 `!= is_stopped` ，那在 `create_work`就會透過 `INIT_WORK` 這個函式來執行，以下是 `INIT_WORK` 的定義： ```c #define __INIT_WORK(_work, _func, _onstack) \ do { \ static struct lock_class_key __key; \ \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, &__key, 0); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) ``` `INIT_WORK` 可以綁定要執行的函式 http_server_worker，最後將此新建的 work 加入到服務的 worker 鏈結串列中。引入後： ```shell ./htstress http://localhost:8081 -t 3 -c 20 -n 200000 requests: 200000 good requests: 200000 [100%] bad requests: 0 [0%] socket errors: 0 [0%] seconds: 2.143 requests/sec: 93341.747 ``` 使用 `dmesg` 來檢查 kernal 內的錯誤訊息，當 `rmmod` 時會出現以下錯誤： :::spoiler ```shell [60230.050609] ------------[ cut here ]------------ [60230.050610] WARNING: CPU: 2 PID: 40721 at kernel/workqueue.c:3066 __flush_work.isra.0+0x234/0x280 [60230.050615] Modules linked in: khttpd(OE-) rfcomm cmac algif_hash algif_skcipher af_alg bnep binfmt_misc nouveau mxm_wmi drm_ttm_helper ttm drm_display_helper snd_hda_codec_realtek cec rc_core intel_rapl_msr intel_rapl_common snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp snd_hda_intel btusb drm_kms_helper joydev btrtl coretemp input_leds i2c_algo_bit snd_intel_dspcfg fb_sys_fops snd_seq_midi btbcm syscopyarea kvm_intel sysfillrect btintel snd_seq_midi_event sysimgblt kvm btmtk nls_iso8859_1 snd_intel_sdw_acpi snd_hda_codec snd_hda_core snd_rawmidi mei_pxp snd_hwdep bluetooth snd_pcm mei_hdcp snd_seq crct10dif_pclmul ecdh_generic ecc snd_seq_device snd_timer ghash_clmulni_intel cmdlinepart spi_nor aesni_intel snd mei_me mac_hid ee1004 soundcore crypto_simd mei mtd cryptd rapl intel_cstate eeepc_wmi wmi_bmof acpi_pad acpi_tad sch_fq_codel msr parport_pc ppdev ramoops reed_solomon lp pstore_blk pstore_zone drm parport [60230.050655] efi_pstore ip_tables x_tables autofs4 hid_logitech_hidpp hid_logitech_dj hid_generic usbhid hid mfd_aaeon asus_wmi sparse_keymap platform_profile crc32_pclmul nvme r8169 i2c_i801 intel_lpss_pci spi_intel_pci ahci nvme_core i2c_smbus spi_intel realtek intel_lpss xhci_pci libahci idma64 xhci_pci_renesas wmi video pinctrl_tigerlake [last unloaded: khttpd] [60230.050671] CPU: 2 PID: 40721 Comm: khttpd Tainted: G D W OE 5.19.0-43-generic #44~22.04.1-Ubuntu [60230.050673] Hardware name: ASUS System Product Name/TUF GAMING B560M-PLUS, BIOS 0820 04/26/2021 [60230.050675] RIP: 0010:__flush_work.isra.0+0x234/0x280 [60230.050677] Code: 8b 54 24 08 41 89 c8 48 c1 e9 04 41 83 e0 08 83 e1 0f 41 83 c8 02 89 c8 49 0f ba 2c 24 03 e9 0b ff ff ff 0f 0b e9 3d ff ff ff <0f> 0b 45 31 ed e9 33 ff ff ff e8 0d 6e e2 00 48 89 de 48 c7 c7 80 [60230.050678] RSP: 0018:ffffac0c89d5fe30 EFLAGS: 00010246 [60230.050680] RAX: 0000000000000000 RBX: ffffffffffffff70 RCX: 0000000000000000 [60230.050681] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff998eae178a60 [60230.050682] RBP: ffffac0c89d5feb8 R08: 00000000時會出錯。00000000 R09: 0000000000000000 [60230.050683] R10: 0000000000000000 R11: 0000000000000000 R12: ffff998eae178a60 [60230.050684] R13: 0000000000000001 R14: ffff998e1ef6b480 R15: ffffffffc0db9a60 [60230.050685] FS: 0000000000000000(0000) GS:ffff998f0ba80000(0000) knlGS:0000000000000000 [60230.050687] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [60230.050688] CR2: 0000262f03eea008 CR3: 0000000048010006 CR4: 00000000007706e0 [60230.050689] PKRU: 55555554 [60230.050690] Call Trace: [60230.050691] <TASK> [60230.050694] ? http_parser_callback_header_value+0x20/0x20 [khttpd] [60230.050697] ? vprintk_default+0x1d/0x30 [60230.050700] ? _raw_spin_unlock_bh+0x1d/0x30 [60230.050702] ? release_sock+0x8f/0xb0 [60230.050706] ? http_parser_callback_header_value+0x20/0x20 [khttpd] [60230.050707] flush_work+0xe/0x20 [60230.050709] http_server_daemon.cold+0x56/0xe2 [khttpd] [60230.050711] kthread+0xeb/0x120 [60230.050714] ? kthread_complete_and_exit+0x20/0x20 [60230.050717] ret_from_fork+0x1f/0x30 [60230.050720] </TASK> [60230.050721] ---[ end trace 0000000000000000 ]--- [60230.050728] BUG: kernel NULL pointer dereference, address: 0000000000000000 [60230.050729] #PF: supervisor read access in kernel mode [60230.050731] #PF: error_code(0x0000) - not-present page [60230.050732] PGD 0 P4D 0 [60230.050733] Oops: 0000 [#2] PREEMPT SMP NOPTI [60230.050735] CPU: 2 PID: 40721 Comm: khttpd Tainted: G D W OE 5.19.0-43-generic #44~22.04.1-Ubuntu [60230.050737] Hardware name: ASUS System Product Name/TUF GAMING B560M-PLUS, BIOS 0820 04/26/2021 [60230.050738] RIP: 0010:http_server_daemon.cold+0x6a/0xe2 [khttpd] [60230.050740] Code: 02 00 00 00 e8 29 a8 e2 eb 49 8d bc 24 a0 00 00 00 e8 1c 7a 33 eb 49 8b 3c 24 e8 f3 cc e2 eb 4c 89 e7 49 89 dc e8 88 e4 5e eb <48> 8b 83 90 00 00 00 48 8d 98 70 ff ff ff eb b1 48 c7 c7 a8 c2 db [60230.050741] RSP: 0018:ffffac0c89d5fed8 EFLAGS: 00010246 [60230.050743] RAX: 0000000000000000 RBX: ffffffffffffff70 RCX: 0000000000000000 [60230.050744] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [60230.050745] RBP: ffffac0c89d5ff08 R08: 0000000000000000 R09: 0000000000000000 [60230.050746] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffffffff70 [60230.050747] R13: 0000000fffffffe0 R14: ffff998e1ef6b480 R15: ffffffffc0db9a60 [60230.050748] FS: 0000000000000000(0000) GS:ffff998f0ba80000(0000) knlGS:0000000000000000 [60230.050749] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [60230.050750] CR2: 0000000000000000 CR3: 0000000048010006 CR4: 00000000007706e0 [60230.050751] PKRU: 55555554 [60230.050752] Call Trace: [60230.050753] <TASK> [60230.050754] kthread+0xeb/0x120 [60230.050756] ? kthread_complete_and_exit+0x20/0x20 [60230.050758] ret_from_fork+0x1f/0x30 [60230.050761] </TASK> [60230.050762] Modules linked in: khttpd(OE-) rfcomm cmac algif_hash algif_skcipher af_alg bnep binfmt_misc nouveau mxm_wmi drm_ttm_helper ttm drm_display_helper snd_hda_codec_realtek cec rc_core intel_rapl_msr intel_rapl_common snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp snd_hda_intel btusb drm_kms_helper joydev btrtl coretemp input_leds i2c_algo_bit snd_intel_dspcfg fb_sys_fops snd_seq_midi btbcm syscopyarea kvm_intel sysfillrect btintel snd_seq_midi_event sysimgblt kvm btmtk nls_iso8859_1 snd_intel_sdw_acpi snd_hda_codec snd_hda_core snd_rawmidi mei_pxp snd_hwdep bluetooth snd_pcm mei_hdcp snd_seq crct10dif_pclmul ecdh_generic ecc snd_seq_device snd_timer ghash_clmulni_intel cmdlinepart spi_nor aesni_intel snd mei_me mac_hid ee1004 soundcore crypto_simd mei mtd cryptd rapl intel_cstate eeepc_wmi wmi_bmof acpi_pad acpi_tad sch_fq_codel msr parport_pc ppdev ramoops reed_solomon lp pstore_blk pstore_zone drm parport [60230.050793] efi_pstore ip_tables x_tables autofs4 hid_logitech_hidpp hid_logitech_dj hid_generic usbhid hid mfd_aaeon asus_wmi sparse_keymap platform_profile crc32_pclmul nvme r8169 i2c_i801 intel_lpss_pci spi_intel_pci ahci nvme_core i2c_smbus spi_intel realtek intel_lpss xhci_pci libahci idma64 xhci_pci_renesas wmi video pinctrl_tigerlake [last unloaded: khttpd] [60230.050807] CR2: 0000000000000000 [60230.050809] ---[ end trace 0000000000000000 ]--- [60230.308922] RIP: 0010:http_parser_callback_message_complete+0xa9/0xb2 [khttpd] [60230.308929] Code: 0c 48 c7 c7 a0 b2 db c0 e8 5e aa 0d ec 48 8b 3b 4c 89 e6 e8 ab fe ff ff 31 c0 c7 83 8c 00 00 00 01 00 00 00 5b 41 5c 5d 31 d2 <89> d6 89 d7 c3 cc cc cc cc 48 c7 c7 58 c2 db c0 e8 62 34 0a ec 48 [60230.308931] RSP: 0018:ffffac0c8801fed8 EFLAGS: 00010246 [60230.308933] RAX: 0000000000000000 RBX: ffffffffffffff70 RCX: 0000000000000000 [60230.308934] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [60230.308935] RBP: ffffac0c8801ff08 R08: 0000000000000000 R09: 0000000000000000 [60230.308936] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffffffff70 [60230.308937] R13: 0000000fffffffe0 R14: ffff998e38b459a0 R15: ffffffffc0db9a60 [60230.308938] FS: 0000000000000000(0000) GS:ffff998f0ba80000(0000) knlGS:0000000000000000 [60230.308939] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [60230.308941] CR2: 0000000000000000 CR3: 00000001d7e6a001 CR4: 00000000007706e0 [60230.308942] PKRU: 55555554 [60230.309007] kthread closed [60230.309020] destory finished [60230.309021] khttpd: module unloaded ``` ::: TODO: 解決以上 `dereference null pointer` 錯誤 ### 提供目錄檔案存取功能，提供基本的 directory listing 功能首先，在 `struct http_request` 中加入新成員 `struct dir_context` ```diff @@ -42,6 +42,7 @@ struct http_request { int complete; struct list_head list; struct work_struct khttpd_work; + struct dir_context dir_context; // struct dir_context, defines in fs.h }; ``` 加入讀取現行目錄的檔案名稱的函式，如下 ```c static bool handle_directory(struct http_request *request) { struct file *fp; char buf[SEND_BUFFER_SIZE] = {0}; request->dir_context.actor = tracedir; if (request->method != HTTP_GET) { snprintf(buf, SEND_BUFFER_SIZE, "HTTP/1.1 501 Not Implemented\r\n%s%s%s%s", "Content-Type: text/plain\r\n", "Content-Length: 19\r\n", "Connection: Close\r\n", "501 Not Implemented\r\n"); http_server_send(request->socket, buf, strlen(buf)); return false; } snprintf(buf, SEND_BUFFER_SIZE, "HTTP/1.1 200 OK\r\n%s%s%s", "Connection: Keep-Alive\r\n", "Content-Type: text/html\r\n", "Keep-Alive: timeout=5, max=1000\r\n\r\n"); http_server_send(request->socket, buf, strlen(buf)); snprintf(buf, SEND_BUFFER_SIZE, "%s%s%s%s", "<html><head><style>\r\n", "body{font-family: monospace; font-size: 15px;}\r\n", "td {padding: 1.5px 6px;}\r\n", "</style></head><body><table>\r\n"); http_server_send(request->socket, buf, strlen(buf)); fp = filp_open("/home/paintako/linux2023/khttpd", O_RDONLY | O_DIRECTORY, 0); if (IS_ERR(fp)) { pr_info("Open file failed");osts return false; } iterate_dir(fp, &request->dir_context); snprintf(buf, SEND_BUFFER_SIZE, "</table></body></html>\r\n"); http_server_send(request->socket, buf, strlen(buf)); filp_close(fp, NULL); return true; } ``` 可以查閱 [include/linux/fs.h](https://elixir.bootlin.com/linux/latest/source/include/linux/fs.h#L94) 以得知以下結構體的定義。其中, `struct file`， `struct dir_context`，`iterate_dir funtion` 都定義在標頭檔 `fs.h` 中。關於 `dir_context` 結構體， `fs.h` 中有下列說明 ```c /* * This is the "filldir" function type, used by readdir() to let * the kernel specify what kind of dirent layout it wants to have. * This allows the kernel to read directories into kernel space or * to have different dirent layouts depending on the binary type. * Return 'true' to keep going and 'false' if there are no more entries. */ struct dir_context; typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, unsigned); struct dir_context { filldir_t actor; loff_t pos; }; ``` 加入自定的 `filldir_t` 函式，讓 `dir_content` 的 `function pointer` 指向它，這邊要用這種方式的原因是因為 `iterate_dir function` 的定義是必須傳入一個 `pointer to dir_content` ```c // callback for 'iterate_dir', trace entry. static int tracedir(struct dir_context *dir_context, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) { if (strcmp(name, ".") && strcmp(name, "..")) { struct http_request *request = container_of(dir_context, struct http_request, dir_context); char buf[SEND_BUFFER_SIZE] = {0}; snprintf(buf, SEND_BUFFER_SIZE, "<tr><td><a href=\"%s\">%s</a></td></tr>\r\n", name, name); http_server_send(request->socket, buf, strlen(buf)); } return 0; } ``` :::warning Callback function: 為了解決當一遇到函式需要等待，但其他函式又與該等待的函式有關連時，就會使用CallBack Function的時機點來處理。 i.e. 確保程式不會因為 timer 之類的影響執行的順序 ::: 對 `http_server_response` 做修改，只要把 request pass `handle_director` 就可以了 ~~但這樣做，會出現錯誤，當 `insmod` 後，會馬上出錯並且馬上 unload module，但奇怪的是存取 localhost 依然可以取得目錄 content，以下是報錯訊息：~~ 至此，可以顯示 `filp_open` 開啟的的目錄結構，但只要路徑不對就會出錯，缺乏彈性。（ `filp_open`， open file in kernal) ```c /** * filp_open - open file and return file pointer * * @filename: path to open * @flags: open flags as per the open(2) second argument * @mode: mode for the new file if O_CREAT is set, else ignored * * This is the helper to open a file from kernelspace if you really * have to. But in generally you should not do this, so please move * along, nothing to see here.. */ struct file *filp_open(const char *filename, int flags, umode_t mode) { struct filename *name = getname_kernel(filename); struct file *file = ERR_CAST(name); if (!IS_ERR(name)) { file = file_open_name(name, flags, mode); putname(name); } return file; } ``` :::warning Note: 在一般 user space 時，想要引用外部函式時，使用 `#include` 就可以引入，但在 keranl 中想要引入外部函式只能使用 `EXPORT_SYMBOL`，否則在編譯時會出錯。 ::: 掛載模組時，指定要開啟的路徑，使用巨集 `module_param_string` 新增參數。 ```c #define PATH_SIZE 100 static char WWWROOT[PATH_SIZE] = {0}; module_param_string(WWWROOT, WWWROOT, PATH_SIZE, 0); ``` `module_param_string` 可以指定陣列大小（`PATH_SIZE`)，這樣做的好處是可以設定模組變數的預設值。以下是更改紀錄： * `http_server.h` ```diff struct khttpd_service { bool is_stopped; + char *dir_path; // dir_path is used to record the path passed by the user for future use when using `insmod`. struct list_head worker; }; ``` * `main.c` ```diff #define DEFAULT_PORT 8081 #define DEFAULT_BACKLOG 100 +#define PATH_SIZE 100 + +static char WWWROOT[PATH_SIZE] = {0}; +module_param_string(WWWROOT,WWWROOT,PATH_SIZE,0); + +extern struct khttpd_service daemon; static ushort port = DEFAULT_PORT; module_param(port, ushort, S_IRUGO); @@ -156,6 +162,10 @@ static void close_listen_socket(struct socket *socket) static int __init khttpd_init(void) { + if (!*WWWROOT) // prevent empty input from user + WWWROOT[0] = '/'; + daemon.dir_path = WWWROOT; + ``` 這裡使用 `extern` 的理由是因為 `khttpd_service` 宣告在 `http_server.c` 當中，成員當中的 `is_stopped` 用來告訴 CMWQ 是否要繼續執行。 * `http_server.c` 更改寫死的路徑成 `WWWROOT` ```diff --- a/http_server.c +++ b/http_server.c @@ -126,7 +126,7 @@ static bool handle_directory(struct http_request *request) "</style></head><body><table>\r\n"); http_server_send(request->socket, buf, strlen(buf)); - fp = filp_open("/home/paintako/", O_RDONLY | O_DIRECTORY, 0); + fp = filp_open(daemon.dir_path, O_RDONLY | O_DIRECTORY, 0); if (IS_ERR(fp)) { ``` 結果是：編譯成功，但是需要特別注意使用者輸入必須符合規範，或者針對使用者輸入做後處理，不然容易出現 `invalid for parameter WWWROOT`，如下 ```shell [64544.354650] khttpd: loading out-of-tree module taints kernel. [64544.354683] khttpd: module verification failed: signature and/or required key missing - tainting kernel [64544.354973] khttpd: `' invalid for parameter `WWWROOT' [64544.354976] khttpd: unknown parameter '=' ignored [64544.354977] khttpd: unknown parameter '/home/paintako' ignored [64566.309479] khttpd: `' invalid for parameter `WWWROOT' [64566.309483] khttpd: unknown parameter '="/home/paintako"' ignored ``` ```shell $ sudo insmod khttpd.ko WWWROOT='/home/paintako/linux2023' ``` ![](https://hackmd.io/_uploads/SkGhpKBv3.png) ### 讀取檔案內容添加函式來讀取檔案內容，在 kernal space 中開啟檔案使用 `filp_open` 函式，並且通過前面添加的 `WWWROOT` 加上 URL 來指定開啟檔案的路徑，並且利用 [include/uapi/linux/stat.h](https://github.com/torvalds/linux/blob/master/include/uapi/linux/stat.h) 當中定義的 macro 來判斷 `filp_open` 的類型 ```c #define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) #define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) #define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) #define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) #define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) #define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) ``` 新增函式 `catstr`，顧名思義將兩個路徑串接起來，使用一個 buffer 來紀錄當前的路徑 `pwd` ，將 `pwd` 與 `url` 串接起來。 ```c // concatenate string static void catstr(char *res, char *first, char *second) { int first_size = strlen(first); int second_size = strlen(second); memset(res, 0, BUFFER_SIZE); memcpy(res, first, first_size); memcpy(res + first_size, second, second_size); } ``` 修改 `tracedir` ```diff struct http_request *request = container_of(dir_context, struct http_request, dir_context); +char buf[SEND_BUFFER_SIZE] = {0}; +char *url = !strcmp(request->request_url, "/") ? "" : request->request_url; ``` 修改 `hadle_directory`，判斷現在開啟的是目錄還是檔案 ```c if (S_ISDIR(fp->f_inode->i_mode)) { char buf[SEND_BUFFER_SIZE] = {0}; snprintf(buf, SEND_BUFFER_SIZE, "HTTP/1.1 200 OK\r\n%s%s%s", "Connection: Keep-Alive\r\n", "Content-Type: text/html\r\n", "Keep-Alive: timeout=5, max=1000\r\n\r\n"); http_server_send(request->socket, buf, strlen(buf)); snprintf(buf, SEND_BUFFER_SIZE, "%s%s%s%s", "<html><head><style>\r\n", "body{font-family: monospace; font-size: 15px;}\r\n", "td {padding: 1.5px 6px;}\r\n", "</style></head><body><table>\r\n"); http_server_send(request->socket, buf, strlen(buf)); iterate_dir(fp, &request->dir_context); snprintf(buf, SEND_BUFFER_SIZE, "</table></body></html>\r\n"); http_server_send(request->socket, buf, strlen(buf)); } else if (S_ISREG(fp->f_inode->i_mode)) { char *read_data = kmalloc(fp->f_inode->i_size, GFP_KERNEL); int ret = read_file(fp, read_data); send_http_header(request->socket, HTTP_STATUS_OK, http_status_str(HTTP_STATUS_OK), "text/plain", fp->f_inode->i_size, "close"); http_server_send(request->socket, read_data, ret); kfree(read_data); ``` 至此，可以在網頁上看到給定目錄的內容以及檔案內容。 ![](https://hackmd.io/_uploads/By0KaGjDh.png) 測試 request 效率，因為有 open file 帶來的額外 I/O 開銷，所以 throughput 變少。 ### 使用 Chunked transfer encoding 送出目錄資料 [HTTP header](https://en.wikipedia.org/wiki/List_of_HTTP_header_fields) 是是在請求（request）或回應（response）行（一條訊息的第一行內容）之後傳輸的。協定頭的欄位是以明文的字串格式傳輸，是以冒號分隔的鍵名與鍵值對，以 Enter (CR) 加換行 (LF) 符號序列結尾。使用 `telnet` 發送 `GET` 時，可以看到以下內容： ```shell $ telnet localhost 8081 Trying 127.0.0.1... Connected to localhost. Escape character is '^]'. GET / HTTP/1.1 HTTP/1.1 200 OK Connection: Keep-Alive Content-Type: text/html Keep-Alive: timeout=5, max=1000 <html><head><style> body{font-family: monospace; font-size: 15px;} td {padding: 1.5px 6px;} </style></head><body><table> <tr><td><a href="/snap">snap</a></td></tr> <tr><td><a href="/.gitconfig">.gitconfig</a></td></tr> <tr><td><a href="/web">web</a></td></tr> <tr><td><a href="/riscv">riscv</a></td></tr> <tr><td><a href="/linux2023">linux2023</a></td></tr> <tr><td><a href="/CO">CO</a></td></tr> <tr><td><a href="/.cache">.cache</a></td></tr> <tr><td><a href="/Music">Music</a></td></tr> <tr><td><a href="/Downloads">Downloads</a></td></tr> <tr><td><a href="/.viminfo">.viminfo</a></td></tr> <tr><td><a href="/.bash_logout">.bash_logout</a></td></tr> <tr><td><a href="/Templates">Templates</a></td></tr> <tr><td><a href="/F100">F100</a></td></tr> <tr><td><a href="/Desktop">Desktop</a></td></tr> <tr><td><a href="/.ssh">.ssh</a></td></tr> <tr><td><a href="/.config">.config</a></td></tr> <tr><td><a href="/.profile">.profile</a></td></tr> <tr><td><a href="/.pki">.pki</a></td></tr> <tr><td><a href="/.sudo_as_admin_successful">.sudo_as_admin_successful</a></td></tr> <tr><td><a href="/.vim">.vim</a></td></tr> <tr><td><a href="/.wget-hsts">.wget-hsts</a></td></tr> <tr><td><a href="/csapp">csapp</a></td></tr> <tr><td><a href="/.vscode">.vscode</a></td></tr> <tr><td><a href="/Videos">Videos</a></td></tr> <tr><td><a href="/.debug">.debug</a></td></tr> <tr><td><a href="/.python_history">.python_history</a></td></tr> <tr><td><a href="/Documents">Documents</a></td></tr> <tr><td><a href="/.gnome">.gnome</a></td></tr> <tr><td><a href="/.bashrc">.bashrc</a></td></tr> <tr><td><a href="/.gnupg">.gnupg</a></td></tr> <tr><td><a href="/.local">.local</a></td></tr> <tr><td><a href="/Pictures">Pictures</a></td></tr> <tr><td><a href="/Public">Public</a></td></tr> <tr><td><a href="/.lesshst">.lesshst</a></td></tr>y <tr><td><a href="/.chewing">.chewing</a></td></tr> </table></body></html> Connection closed by foreign host. ``` 返回的資訊被 `\r\n` 隔開，上面是 `HTTP header` 的內容，下面是 `Payload`，也就是 HTML 內容原本的實作中，每次傳送 header 都要傳入 `Content-Length`，而使用 `Chunked encoding` 就可以不用額外發送 `Content-Length`，此方法的好處是當大量資料要傳給 client 時可以不用等整個 response 處理完畢才知道大小。 :::warning Note: macro 中使用 `...` (可變數量參數) 可使 macro 接受不同數量的參數根據 C11 規格書（6.10.3.1 節），對於可變數量參數的處理方式如下所述： > The identifier `__VA_ARGS__` shall occur only in the replacement list of a function-like macro that uses the ellipsis notation in the parameters. ::: 新增 macro `SEND_HTTP_MSG` ```c #define SEND_HTTP_MSG(socket, buf, format, ...) \ snprintf(buf, SEND_BUFFER_SIZE, format, __VA_ARGS__); \ http_server_send(socket, buf, strlen(buf)) ``` 透過以上 macro 來減少重複的程式碼。改寫後： ```shell $ telnet localhost 8081 Trying 127.0.0.1... Connected to localhost. Escape character is '^]'. GET / HTTP/1.1 HTTP/1.1 200 OK Content-Type: text/html Transfer-Encoding: chunked 7B <html><head><style> body{font-family: monospace; font-size: 15px;} td {padding: 1.5px 6px;} </style></head><body><table> 2e <tr><td><a href="/others">others</a></td></tr> 2a <tr><td><a href="/perf">perf</a></td></tr> 3c <tr><td><a href="/c_syntax_test">c_syntax_test</a></td></tr> 2c <tr><td><a href="/kecho">kecho</a></td></tr> 2e <tr><td><a href="/khttpd">khttpd</a></td></tr> 32 <tr><td><a href="/tiny_web">tiny_web</a></td></tr> 30 <tr><td><a href="/example">example</a></td></tr> 3c <tr><td><a href="/kernal_module">kernal_module</a></td></tr> 2e <tr><td><a href="/origin">origin</a></td></tr> 16 </table></body></html> 0 ``` ```diff static void catstr(char *res, char *first, char *second) { @@ -113,10 +93,9 @@ static int tracedir(struct dir_context *dir_context, char *url = !strcmp(request->request_url, "/") ? "" : request->request_url; - snprintf(buf, SEND_BUFFER_SIZE, - "<tr><td><a href=\"%s/%s\">%s</a></td></tr>\r\n", url, name, - name); - http_server_send(request->socket, buf, strlen(buf)); + SEND_HTTP_MSG(request->socket, buf, + "%lx\r\n<tr><td><a href=\"%s/%s\">%s</a></td></tr>\r\n", + 34 + strlen(url) + (namelen << 1), url, name, name); } return 0; } ``` ```diff @@ -125,13 +104,13 @@ static bool handle_directory(struct http_request *request) { struct file *fp; char pwd[BUFFER_SIZE] = {0}; - + char buf[SEND_BUFFER_SIZE] = {0}; request->dir_context.actor = tracedir; if (request->method != HTTP_GET) { - send_http_header(request->socket, HTTP_STATUS_NOT_IMPLEMENTED, - http_status_str(HTTP_STATUS_NOT_IMPLEMENTED), - "text/plain", 19, "close"); - send_http_content(request->socket, "501 Not Implemented"); + SEND_HTTP_MSG(request->socket, buf, "%s%s%s%s%s", + "HTTP/1.1 501 Not Implemented\r\n", + "Content-Type: text/plain\r\n", "Content-Length: 19\r\n", + "Connection: Close\r\n\r\n", "501 Not Implemented"); return false; } ``` ```diff @@ -139,40 +118,39 @@ static bool handle_directory(struct http_request *request) fp = filp_open(pwd, O_RDONLY, 0); if (IS_ERR(fp)) { - send_http_header(request->socket, HTTP_STATUS_NOT_FOUND, - http_status_str(HTTP_STATUS_NOT_FOUND), "text/plain", - 14, "close"); - send_http_content(request->socket, "404 Not Found"); + SEND_HTTP_MSG(request->socket, buf, "%s%s%s%s%s", + "HTTP/1.1 404 Not Found\r\n", + "Content-Type: text/plain\r\n", "Content-Length: 13\r\n", + "Connection: Close\r\n\r\n", "404 Not Found"); kernel_sock_shutdown(request->socket, SHUT_RDWR); return false; } if (S_ISDIR(fp->f_inode->i_mode)) { char buf[SEND_BUFFER_SIZE] = {0}; - snprintf(buf, SEND_BUFFER_SIZE, "HTTP/1.1 200 OK\r\n%s%s%s", - "Connection: Keep-Alive\r\n", "Content-Type: text/html\r\n", - "Keep-Alive: timeout=5, max=1000\r\n\r\n"); - http_server_send(request->socket, buf, strlen(buf)); - - snprintf(buf, SEND_BUFFER_SIZE, "%s%s%s%s", "<html><head><style>\r\n", - "body{font-family: monospace; font-size: 15px;}\r\n", - "td {padding: 1.5px 6px;}\r\n", - "</style></head><body><table>\r\n"); - http_server_send(request->socket, buf, strlen(buf)); + SEND_HTTP_MSG(request->socket, buf, "%s%s%s", "HTTP/1.1 200 OK\r\n", + "Content-Type: text/html\r\n", + "Transfer-Encoding: chunked\r\n\r\n"); + SEND_HTTP_MSG( + request->socket, buf, "7B\r\n%s%s%s%s", "<html><head><style>\r\n", + "body{font-family: monospace; font-size: 15px;}\r\n", + "td {padding: 1.5px 6px;}\r\n", "</style></head><body><table>\r\n"); iterate_dir(fp, &request->dir_context); - snprintf(buf, SEND_BUFFER_SIZE, "</table></body></html>\r\n"); - http_server_send(request->socket, buf, strlen(buf)); + SEND_HTTP_MSG(request->socket, buf, "%s", + "16\r\n</table></body></html>\r\n"); + SEND_HTTP_MSG(request->socket, buf, "%s", "0\r\n\r\n"); } else if (S_ISREG(fp->f_inode->i_mode)) { char *read_data = kmalloc(fp->f_inode->i_size, GFP_KERNEL); int ret = read_file(fp, read_data); - send_http_header(request->socket, HTTP_STATUS_OK, - http_status_str(HTTP_STATUS_OK), "text/plain", - fp->f_inode->i_size, "close"); - http_server_send(request->socket, read_data, ret); + SEND_HTTP_MSG(request->socket, buf, "%s%s%s%d%s", "HTTP/1.1 200 OK\r\n", + "Content-Type: text/plain\r\n", "Content-Length: ", ret, + "\r\nConnection: Close\r\n\r\n"); + http_server_send(request->socket, read_data, strlen(read_data)); + ; kfree(read_data); } ``` 效能： ```shell requests: 100000 good requests: 100000 [100%] bad requests: 0 [0%] socket errors: 0 [0%] seconds: 8.011 requests/sec: 12482.545 ``` ### MIME 型態前面有提到 `Content type` 是用來表示資源的 `media type`，而 MIME 可以標示傳送的資料型態，對於後端來說是可以增加資料正確性的手段。 MIME 是由主要型態（type)、次要型態 (subtype)、參數組成 Type 是廣義分類， subtype 則是資料精確型態，MIME 永遠都有主要型態和次要型態，而後面可以加上參數提供更多細節，如下： `type/subtype;parameter=value` 在原本的實作中，只會顯示文字檔，只要遇到非文字檔如 `.jpg` 就無法正確的顯示，所以需要透過提供 MIME 使此類非文字檔正常顯示。增加標頭檔 `mime_type.h` ，在此標頭檔中定義不同類型的 MIME，在增加 MIME 後，在瀏覽器中檢查 http header 可以顯示不同類型的檔案類型。 ![](https://hackmd.io/_uploads/r1N9AuaPh.png) ```c // return mime type string const char *get_mime_str(char *request_url) { char *request_type = strchr(request_url, '.'); pr_info("%s , %s\n",request_url, request_type); int index = 0; if (!request_type) return "text/plain"; while (mime_types[index].type) { if (!strcmp(mime_types[index].type, request_type)) return mime_types[index].string; index++; } return "text/plain"; } ``` 此函式用來區分檔案名稱及副檔名，再來去定義好的數組中比對是否存在該檔名，若存在就返回對應的 MIME type 。 :::danger TODO: 上面的方式有待改進，因為每次收到 request 後都進行 linear search，即便比對數量不多，但勢必會影響效能，可改用 `hash table` 的方式來改善搜尋速度。 ::: ### HTTP keep alive HTTP 採用 `請求 request`-`回應 response` 模式，非 `KeepAlive` 模式時，每個請求/應答客戶和伺服器都要新建一個連接，完成之後立即斷開連接（HTTP 爲 stateless 的通訊協定）；當使用 `Keep-Alive` 模式（又稱持久連接、連接重用）時， `Keep-Alive` 功能使客戶端到伺服器端的連接持續有效，當出現對伺服器的後繼請求時， `Keep-Alive` 功能避免了建立或者重新建立連接。 ### 使用 timer 主動關閉連線保持閒置連線的優點是： :::warning Note: 關閉連線時可以採用 `graceful shutdown`，避免尚有資料在傳輸給某個 client 時就馬上被關閉。 ::: ## TODO: 檢視 I/O 模型並尋求效能改進空間以 ftrace 檢視 kHTTPd 運作的細節，檢視 I/O 模型並尋求效能改進空間 ### 確認可用被追蹤的函式先確認目前的系統是否有 ftrace ```shell $ cat /boot/config-`uname -r` | grep CONFIG_HAVE_FUNCTION_TRACER CONFIG_HAVE_FUNCTION_TRACER=y ``` 確認系統內有 ftrace 後，找出 `khttpd` 內所有可以被追蹤的函式 ```shell $ sudo cat /sys/kernel/debug/tracing/available_filter_functions | grep khttpd parse_url_char.part.0 [khttpd] http_message_needs_eof [khttpd] http_should_keep_alive [khttpd] http_parser_execute [khttpd] http_method_str [khttpd] http_status_str [khttpd] http_parser_init [khttpd] http_parser_settings_init [khttpd] http_errno_name [khttpd] http_errno_description [khttpd] http_parser_url_init [khttpd] http_parser_parse_url [khttpd] http_parser_pause [khttpd] http_body_is_final [khttpd] http_parser_version [khttpd] http_parser_set_max_header_size [khttpd] http_parser_callback_header_field [khttpd] http_parser_callback_headers_complete [khttpd] http_parser_callback_message_begin [khttpd] http_parser_callback_request_url [khttpd] http_parser_callback_body [khttpd] http_server_recv.constprop.0 [khttpd] http_server_worker [khttpd] http_server_send.isra.0 [khttpd] tracedir.part.0 [khttpd] tracedir [khttpd] http_parser_callback_header_value [khttpd] http_server_daemon [khttpd] get_mime_str [khttpd] handle_directory [khttpd] http_parser_callback_message_complete [khttpd] ``` 確認後，參考 [鳥哥私房菜](https://linux.vbird.org/linux_basic/centos7/0340bashshell-scripts.php)，並且參考 `《Demystifying the Linux CPU Scheduler》第六章` 撰寫 `shell script` 。 ```shell #!/bin/bash TRACE_DIR=/sys/kernel/debug/tracing # clear echo 0 > $TRACE_DIR/tracing_on echo > $TRACE_DIR/set_graph_function echo > $TRACE_DIR/set_ftrace_filter echo nop > $TRACE_DIR/current_tracer # setting echo function_graph > $TRACE_DIR/current_tracer echo 3 > $TRACE_DIR/max_graph_depth echo http_server_worker > $TRACE_DIR/set_graph_function # execute echo 1 > $TRACE_DIR/tracing_on ./htstress localhost:8081 -n 2000 echo 0 > $TRACE_DIR/tracing_on ``` `echo function_graph > $TRACE_DIR/current_tracer` 是把 `function_graph` 設成當前的 tracer (其他 tracer 可參考 [Ftrace](https://docs.kernel.org/trace/ftrace.html)) `echo http_server_worker > $TRACE_DIR/set_graph_function` 是把 `http_server_worker` 設成要 trace 的函式。 ```shell $ sudo cat /sys/kernel/debug/tracing/trace > ./output.txt ``` 節錄部份結果： ```shell # CPU DURATION FUNCTION CALLS # | | | | | | | 11) 4.756 us | filp_open(); 11) + 15.848 us | http_server_send.isra.0 [khttpd](); 11) + 13.042 us | http_server_send.isra.0 [khttpd](); 11) ! 431.909 us | iterate_dir(); 11) 8.933 us | http_server_send.isra.0 [khttpd](); 11) + 10.592 us | http_server_send.isra.0 [khttpd](); 11) 8.016 us | kernel_sock_shutdown(); 11) 0.469 us | filp_close(); 11) ! 495.127 us | } 11) ! 498.186 us | } 11) ! 500.548 us | } 11) 0.081 us | http_should_keep_alive [khttpd](); ``` 可以看到花的大部分時間都是在 `iterate_dir` 函式中 ### 改進 `iterate_dir` 函式參考 [Jerejere0808](https://hackmd.io/@sysprog/HkzcnhOHn#%E4%BB%A5-content-cache-%E6%94%B9%E9%80%B2%E4%BC%BA%E6%9C%8D%E5%99%A8%E8%99%95%E7%90%86%E6%95%88%E7%8E%87) 以及 [terry23304](https://hackmd.io/@sysprog/r1vb6bhH2)，為了避免請求時都要執行一次 `iterate_dir()`，可以將 response 給快取下來，如果下次有同樣的請求時可以避免在執行一次 `iterate_dir()`，並加入 [RCU 同步機制](https://hackmd.io/@sysprog/linux-rcu)，以支援多個端同時讀取存取快取的需求(允許多個 reader (port) 在單一 writer (cache) 更新資料的同時，在不需要 lock 的前提正確讀取資料）。在 Linux 核心中，RCU 用來保護以下場景: * 指標 * linked list * hlist (hash list) ### 實作 content cache 使用 `hashtable` 來實作 `content cache` 增加 `content_cache.h` 來實作 `content cache` ```diff + struct content_cache_entry { + const char *request_url; + const char *response; + struct hlist_node node; + spinlock_t lock; + }; ``` 在 `content_cache.c` 中實作 `content_cache` 的功能 ```c DEFINE_READ_MOSTLY_HASHTABLE(content_cache_table, 8); // delcare a hashtable which has 8 entries. ``` 在收到新的 request 後，會先檢查該 request 存在於 `hashtable` 中，如果存在那就返回，反之插入 `hashtable` 當中 ```c void insert_content_cache(char *request_url, char *cache_buffer) { struct content_cache_entry *entry = kmalloc(sizeof(struct content_cache_entry), GFP_KERNEL); if (!entry) return; entry->request_url = request_url; entry->response = cache_buffer; spin_lock_init(&entry->lock); spin_lock(&entry->lock); hash_add(content_cache_table, &entry->node, request_hash(request_url)); // lock on while accessing hash table spin_unlock(&entry->lock); } ``` 需要注意的是當插入 `hashtable` 時，需要保證資料的正確性，避免 reader 讀取錯誤資料，故在存取該結構體時，需要使用到結構體內的成員 `lock`。定義好 `hashtable` 後，之後在存取之前都會先檢查該 request 的 url 是否存在裡面，如果有便可以把 cache 的回應返回，反之如果不存在就插入 table 內。加入 content cache 後，重新測試效能，由於每一測試皆是訪問同一個 url，故改善了 requests/sec ，並且隨著 `WWWROOT` 的改變，由於要讀取的 file 數目不同，request 也會有所改變 ```shell $ ./htstress localhost:8081 -n 20000 0 requests 2000 requests 4000 requests 6000 requests 8000 requests 10000 requests 12000 requests 14000 requests 16000 requests 18000 requests requests: 20000 good requests: 20000 [100%] bad requests: 0 [0%] socket errors: 0 [0%] seconds: 0.852 requests/sec: 23482.116 ``` request/sec 由 12482.545 上升到了 23482.116 。