Try   HackMD

2023q1 Homework7 (ktcp)

contributed by < fewletter >

開發環境

$ gcc --version
gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0

$ lscpu
Architecture:                    x86_64
CPU op-mode(s):                  32-bit, 64-bit
Byte Order:                      Little Endian
Address sizes:                   39 bits physical, 48 bits virtual
CPU(s):                          6
On-line CPU(s) list:             0-5
Thread(s) per core:              1
Core(s) per socket:              6
Socket(s):                       1
NUMA node(s):                    1
Vendor ID:                       GenuineIntel
CPU family:                      6
Model:                           158
Model name:                      Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz
Stepping:                        10
CPU MHz:                         3000.000
CPU max MHz:                     4400.0000
CPU min MHz:                     800.0000
BogoMIPS:                        6000.00
Virtualization:                  VT-x
L1d cache:                       192 KiB
L1i cache:                       192 KiB
L2 cache:                        1.5 MiB
L3 cache:                        9 MiB
NUMA node0 CPU(s):               0-5

CMWQ

給定的 kecho 已使用 CMWQ,請陳述其優勢和用法
Concurrency Managed Workqueue (cmwq)

從 CMWQ 文件中的段落可以先知道幾個名詞

When such an asynchronous execution context is needed, a work item describing which function to execute is put on a queue. An independent thread serves as the asynchronous execution context. The queue is called workqueue and the thread is called worker.

  • work
    要被執行的程式碼 ( function )
  • worker
    執行程式碼的執行緒 ( thread )
  • workqueue
    要被執行的程式碼序列 ( queue )

為何需要使用 CMWQ ?

  • 在文件中說到,多執行緒的 workqueue 每一個 CPU 都會建立一個執行緒,單執行緒的 workqueue 則會有一個執行緒,而當 CPU 核心越來越多時,又或是使用者建立多的 workqueue 時導致系統啟動 kernel 時就會耗盡 PID 空間。
  • 假設有兩個 work A 和 work B,work B 依賴 work A 執行結果,而如果將這兩個 work 都排程到同一個 worker 上,這樣就會發生 deadlock

CMWQ 的優勢

  • 每當執行任務時,為了避免有 worker 被創建後閒置,讓worker 處於在等待任務的狀態,重複利用 worker 省下創建 worker 的時間

引入 CMWQ 改寫 khttpd

以下實作參考 作業說明: CMWQ (Concurrency Managed Workqueue) ,首先在掛載模組時建立 CMWQ ,並且在卸載模組時結束 CMWQ 。

建立 CMWQ

static int __init khttpd_init(void)
{
    int err = open_listen_socket(port, backlog, &listen_socket);
    if (err < 0) {
        pr_err("can't open listen socket\n");
        return err;
    }
    param.listen_socket = listen_socket;

+   //create CMWQ
+   khttpd_wq = alloc_workqueue("khpptd_wq", WQ_UNBOUND, 0);
    http_server = kthread_run(http_server_daemon, &param, KBUILD_MODNAME);
    if (IS_ERR(http_server)) {
        pr_err("can't start http server daemon\n");
        close_listen_socket(listen_socket);
        return PTR_ERR(http_server);
    }
    return 0;
}

結束 CMWQ

static void __exit khttpd_exit(void)
{
    send_sig(SIGTERM, http_server, 1);
    kthread_stop(http_server);
    close_listen_socket(listen_socket);
+   destroy_workqueue(khttpd_wq);
    pr_info("module unloaded\n");
}

接著參考 kecho 當中建立 workqueue 的模式在 khttpd 重新建立一次。

  • 結構體 khttpd_server 利用 list_head 指向 workqueue 的 work。
  • 結構體 khttpd 是為了將 socket 中的資訊傳給 worker ,讓 worker 能夠從指定的函式中執行任務,list_head 是讓所有的 worker 能夠以佇列的形式存在並且在釋放記憶體空間時能夠從頭到尾遍歷後釋放。
struct khttpd_server {
    bool is_stopped;
    struct list_head worker_head;
};

struct khttpd
{
    struct socket *socket;
    struct list_head list;
    struct work_struct worker;
};
  • 接著宣告一個 workqueue daemon,並且設定狀態為執行中 .is_stopped = false
  • create_work() 中將所有的 work 串接在 &daemon.worker_head 後面,同時啟動 worker 要執行的函式。
  • free_work() 將遍歷整個佇列並將記憶體釋放。
struct khttpd_server daemon = {.is_stopped = false};
...
static struct work_struct *create_work(struct socket *sk)
{
    struct khttpd *work;
    if (!(work = kmalloc(sizeof(struct khttpd), GFP_KERNEL)))
        return NULL;
    work->socket = sk;
    INIT_WORK(&work->worker, http_server_worker);
    list_add(&work->list, &daemon.worker_head);
    return &work->worker;
}

static void free_work(void)
{
    struct khttpd *tar, *tmp;
    list_for_each_entry_safe (tar, tmp, &daemon.worker_head, list) {
        kernel_sock_shutdown(tar->socket, SHUT_RDWR);
        flush_work(&tar->worker);
        sock_release(tar->socket);
        kfree(tar);
    }
}

修改 http_server_daemon(),並以結構體 khttpd 的成員改寫,主要修改的地方有

  • 在建立連線以前就啟動 workqueue
  • 將 worker 排入在 main.c 中建立的 khttpd_wq
int http_server_daemon(void *arg)
{
-   struct task_struct *worker;
+   struct work_struct *worker;
...
+   INIT_LIST_HEAD(&daemon.worker_head);
    while (!kthread_should_stop()) {
        int err = kernel_accept(param->listen_socket, &socket, 0);
        if (err < 0) {
            if (signal_pending(current))
                break;
            pr_err("kernel_accept() error: %d\n", err);
            continue;
        }
-       worker = kthread_run(http_server_worker, socket, KBUILD_MODNAME);
+       if (!(worker = create_work(socket))) {
+           pr_err("can't create more worker process\n");
+           kernel_sock_shutdown(socket, SHUT_RDWR);
+           sock_release(socket);
+           continue;
+        }
+      /* start server worker */
+      queue_work(khttpd_wq, worker);
    }
+   daemon.is_stopped = true; /* notify all worker to stop */
+   free_work();
    return 0;
}

修改 http_server_worker(),主要修改的地方有

  • 利用 container_of() 和結構體 khttpd 的關係找出此 work 下, worker 的記憶體位置
  • 其他地方則是讓 socket 變成由 worker 管理的成員
-static int http_server_worker(void *arg)
+static void http_server_worker(struct work_struct *work)
{
    char *buf;
+   struct khttpd *worker = container_of(work, struct khttpd, worker);
    ...
    if (!buf) {
        pr_err("can't allocate memory!\n");
-       return -1;
+       return;
    }
    ...
-   request.socket = socket;
+   request.socket = worker->socket;
    http_parser_init(&parser, HTTP_REQUEST);
    parser.data = &request;
-   while (!kthread_should_stop()) {
-       int ret = http_server_recv(socket, buf, RECV_BUFFER_SIZE - 1);
+   while (!daemon.is_stopped) {
+       int ret = http_server_recv(worker->socket, buf, RECV_BUFFER_SIZE - 1);
        if (ret <= 0) {
            if (ret)
                pr_err("recv error: %d\n", ret);
    ...
-   kernel_sock_shutdown(socket, SHUT_RDWR);
+   kernel_sock_shutdown(worker->socket, SHUT_RDWR);
-   sock_release(socket);

改好程式碼後,利用 make check 來觀察測試結果
引入 CMWQ 之前

$ make check

0 requests
10000 requests
20000 requests
30000 requests
40000 requests
50000 requests
60000 requests
70000 requests
80000 requests
90000 requests

requests:      100000
good requests: 100000 [100%]
bad requests:  0 [0%]
socket errors: 0 [0%]
seconds:       2.587
requests/sec:  40156.498

Complete

引入 CMWQ 之後

$ make check

0 requests
10000 requests
20000 requests
30000 requests
40000 requests
50000 requests
60000 requests
70000 requests
80000 requests
90000 requests

requests:      100000
good requests: 100000 [100%]
bad requests:  0 [0%]
socket errors: 0 [0%]
seconds:       1.280
requests/sec:  78106.633

實作 directory listing 功能

參考自 作業說明:實作 directory listing 功能,為了能夠在網頁中顯示檔案目錄,首先第一步是要先讀取檔案目錄並且需要了解將資料傳給網頁的語法。

  • 讀取檔案目錄
    首先可以從這篇文章linux/fs.h 了解到讀取檔案的大致架構需要有基本的 filp_open, filp_close, iterate_dir,跟結構體 struct dir_context,而從 linux/fs.h 中的註解可以看到 struct dir_context 允許將目錄讀到核心。
/*
 * This is the "filldir" function type, used by readdir() to let
 * the kernel specify what kind of dirent layout it wants to have.
 * This allows the kernel to read directories into kernel space or
 * to have different dirent layouts depending on the binary type.
 */
struct dir_context;

tracedir 的功能就在提供讀取檔案目錄到核心並且利用 html 的語法將目錄名稱傳傳到 client 中。

static int tracedir(struct dir_context *dir_context,
                    const char *name,
                    int namelen,
                    loff_t offset,
                    u64 ino,
                    unsigned int d_type)
{
    if (strcmp(name, ".") && strcmp(name, "..")) {
        struct http_request *request =
            container_of(dir_context, struct http_request, dir_context);
        char buf[SEND_BUFFER_SIZE] = {0};

        snprintf(buf, SEND_BUFFER_SIZE,
                 "<tr><td><a href=\"%s\">%s</a></td></tr>\r\n", name, name);
        http_server_send(request->socket, buf, strlen(buf));
    }
    return 0;
}
  • 資料傳輸
    • 首先要先確認是否有連線成功 if (request->method != HTTP_GET)
    • filp_open 打開資料夾,並且透過 iterate_dir 遍歷整個資料夾目錄
    • tracedir 會將資料夾目錄名稱透過 http_server_send 傳給網頁
    • 最後 filp_close 關閉資料夾
static bool handle_directory(struct http_request *request)
{
    struct file *fp;
    char buf[SEND_BUFFER_SIZE] = {0};

    request->dir_context.actor = tracedir;
    if (request->method != HTTP_GET) {
        snprintf(buf, SEND_BUFFER_SIZE,
                 "HTTP/1.1 501 Not Implemented\r\n%s%s%s%s",
                 "Content-Type: text/plain\r\n", "Content-Length: 19\r\n",
                 "Connection: Close\r\n", "501 Not Implemented\r\n");
        http_server_send(request->socket, buf, strlen(buf));
        return false;
    }

    snprintf(buf, SEND_BUFFER_SIZE, "HTTP/1.1 200 OK\r\n%s%s%s",
             "Connection: Keep-Alive\r\n", "Content-Type: text/html\r\n",
             "Keep-Alive: timeout=5, max=1000\r\n\r\n");
    http_server_send(request->socket, buf, strlen(buf));


    snprintf(buf, SEND_BUFFER_SIZE, "%s%s%s%s", "<html><head><style>\r\n",
             "body{font-family: monospace; font-size: 15px;}\r\n",
             "td {padding: 1.5px 6px;}\r\n",
             "</style></head><body><table>\r\n");
    http_server_send(request->socket, buf, strlen(buf));

    fp = filp_open("/home/fewletter/linux2023/khttpd/", O_RDONLY | O_DIRECTORY, 0);
    if (IS_ERR(fp)) {
        pr_info("Open file failed");
        return false;
    }

    iterate_dir(fp, &request->dir_context);
    snprintf(buf, SEND_BUFFER_SIZE, "</table></body></html>\r\n");
    http_server_send(request->socket, buf, strlen(buf));
    filp_close(fp, NULL);
    return true;
}

其所產生的 html 檔案如下,可以看到其與 handle_directory 中所傳遞的字串階相同

<html><head><style>
body{font-family: monospace; font-size: 15px;}
td {padding: 1.5px 6px;}
</style></head><body><table>
<tr><td><a href="README.md">README.md</a></td></tr>
<tr><td><a href="http_server.o">http_server.o</a>
...
<tr><td><a href="http_parser.c">http_parser.c</a></td></tr>
<tr><td><a href="http_server.c">http_server.c</a></td></tr>
<tr><td><a href="http_parser.o">http_parser.o</a></td></tr>
<tr><td><a href="index.html.2">index.html.2</a></td></tr>
<tr><td><a href="main.c">main.c</a></td></tr>
<tr><td><a href="main.o">main.o</a></td></tr>
</table></body></html>

最後透過載入模組並且連線到網頁瀏覽器便可得下面結果