Try   HackMD

Linux 核心專題: kHTTPd 改進

執行人: terry23304
專題解說錄影
GitHub

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
提問清單

  • ?

任務簡述

依據 ktcp 的指示,持續改進 sysprog21/khttpd 的程式碼,打造出高效且穩定的網頁伺服器。

TODO: 改進 sysprog21/khttpd 的效率

依據 ktcp 的指示,在 sysprog21/khttpd 的基礎之上,利用 CMWQ 一類的機制,打造出高效且穩定的網頁伺服器,需要處理 lock-free 的資源管理議題 (如 RCU)。

搭配閱讀: 〈RCU Usage In the Linux Kernel: One Decade Later

An application can change the IP options on a per-socket basis by calling sys_setsockopt,
which eventually causes the kernel to call setsockopt.
setsockopt sets the new IP options, then uses call_
rcu to asynchronously free the memory storing the old IP
options. Using call_rcu ensures all threads that might
be manipulating the old options will have released their
reference by exiting the RCU critical section.

TODO: 檢視 I/O 模型並尋求效能改進空間

以 ftrace 檢視 kHTTPd 運作的細節,檢視 I/O 模型並尋求效能改進空間

引入 Concurrency Managed Workqueue

參考 kecho 把所有 work 使用一個鏈結串列做管理,用 is_stopped 判斷是否結束連線

struct httpd_service {
    bool is_stopped;
    struct list_head head;
};
extern struct httpd_service daemon;

原本的 kthread-based 實作是每個 request 都會執行一次 kthread_run,改成 CMWQ 後改為呼叫 create_work 來建立 work,並把建立好的 work 放入 queue 中,server 結束時呼叫 free_work 釋放管理 work 的串列

int http_server_daemon(void *arg)
{
    ...
    INIT_LIST_HEAD(&daemon.head);

    while (!kthread_should_stop()) {
        ...
        if (unlikely(!(work = create_work(socket)))) {
            printk(KERN_ERR MODULE_NAME
                   ": create work error, connection closed\n");
            kernel_sock_shutdown(socket, SHUT_RDWR);
            sock_release(socket);
            continue;
        }
        queue_work(khttpd_wq, work);
    }
    daemon.is_stopped = true;
    free_work();

    return 0;
}

配置完空間後,呼叫 INIT_WORK 建立新的 work,並初始化再把 work 加進串列中,當 work 被執行時會呼叫 http_server_worker 來完成任務

static struct work_struct *create_work(struct socket *sk)
{
    struct http_request *work;

    if (!(work = kmalloc(sizeof(struct http_request), GFP_KERNEL)))
        return NULL;

    work->socket = sk;

    INIT_WORK(&work->khttpd_work, http_server_worker);

    list_add(&work->node, &daemon.head);

    return &work->khttpd_work;
}

執行命令 ./htstress http://localhost:8081 -t 3 -c 20 -n 200000 測試

requests:      200000
good requests: 200000 [100%]
bad requests:  0 [0%]
socket errors: 0 [0%]
seconds:       4.972
requests/sec:  40223.732

CMWQ:
seconds:       3.366
requests/sec:  59425.669
kthread CMWQ
throughput 40223.732 59425.669

實作 Directory listing

使用 linux/fs.h 中的 dir_contextfilp_openiterate_dir 函式

呼叫 http_server_send 送出 header 與 html,若目錄無法開啟則回傳 404,若可以則回傳 200 OK ,在執行 tracedir 時把目錄裡每個檔案寫在 html table 中並傳送到 client 讓網頁顯示所有的檔案

static void directory_listing(struct http_request *request)
{
    struct file *fp;
    char buf[SEND_BUFFER_SIZE] = {0};
    char request_url[REQUEST_URL_SIZE] = {0};

    request->dir_context.actor = tracedir;

    snprintf(request_url, REQUEST_URL_SIZE, "%s%s", DIR, request->request_url);
    fp = filp_open(request_url, O_RDONLY, 0);
    pr_info("request_url: %s\n", request_url);

    if (IS_ERR(fp)) {
        pr_info("Open file failed");
        http_server_send(request->socket, HTTP_RESPONSE_404,
                         strlen(HTTP_RESPONSE_404));
        return;
    }

    snprintf(buf, SEND_BUFFER_SIZE, HTTP_RESPONSE_200_KEEPALIVE_DUMMY);
    http_server_send(request->socket, buf, strlen(buf));
    snprintf(buf, SEND_BUFFER_SIZE, "%s%s%s%s", "<html><head><style>\r\n",
             "body{font-family: monospace; font-size: 15px;}\r\n",
             "td {padding: 1.5px 6px;}\r\n",
             "</style></head><body><table>\r\n");
    http_server_send(request->socket, buf, strlen(buf));

    iterate_dir(fp, &request->dir_context);
    snprintf(buf, SEND_BUFFER_SIZE, "</table></body></html>\r\n");
    http_server_send(request->socket, buf, strlen(buf));
    filp_close(fp, NULL);

    return;
}

傳送每個檔案的檔名與檔案或子目錄的連結,並用 html 的 table 包起來,並發送到 request->socket

static int tracedir(struct dir_context *dir_context,
                    const char *name,
                    int namelen,
                    loff_t offset,
                    u64 ino,
                    unsigned int d_type)
{
    if (strcmp(name, ".")) {
        struct http_request *request =
            container_of(dir_context, struct http_request, dir_context);
        char buf[SEND_BUFFER_SIZE] = {0};

        snprintf(buf, SEND_BUFFER_SIZE,
                 "<tr><td><a href=\"%s\">%s</a></td></tr>\r\n", name, name);
        http_server_send(request->socket, buf, strlen(buf));
    }

    return 0;
}

結果:

Image Not Showing Possible Reasons
  • The image was uploaded to a note which you don't have access to
  • The note which the image was originally uploaded to has been deleted
Learn More →

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
修正上圖的存取權限。
Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

讀取檔案並顯示

原本的實作只會顯示目錄中的所有檔案,新增判斷開啟的是目錄或檔案來回傳檔案內容

static void directory_listing(struct http_request *request)
{
    ...
    if (S_ISDIR(fp->f_inode->i_mode)) {
        ...
    } else if (S_ISREG(fp->f_inode->i_mode)) {
        http_server_send(request->socket, buf, strlen(buf));

        char *file_content = kmalloc(fp->f_inode->i_size, GFP_KERNEL);
        if (!file_content) {
            pr_info("malloc failed");
            filp_close(fp, NULL);
            return;
        }

        int ret = kernel_read(fp, file_content, fp->f_inode->i_size, 0);
        http_server_send(request->socket, file_content, ret);
        kfree(file_content);
    }

    filp_close(fp, NULL);
    return;
}

使用 MIME 判斷副檔名

閱讀 include/linux/hashtable.h

MIME type 加入 hashtable 中做查詢

hashtable 結構:

struct mime_map_entry {
    const char *extension;
    const char *mime_type;
    struct hlist_node node;
};

把 MIME type 跟對應的副檔名加入 hashtable 中,在 moudle 初始化時呼叫

void init_mime_map_table(void)
{
    struct mime_map_entry *entry;

    hash_init(mime_map_table);

    for (entry = mime_map; entry->extension != NULL; entry++) {
        hash_add(mime_map_table, &entry->node, mime_hash(entry->extension));
    }
}

查詢 hashtable 得到 MIME type

const char *get_mime_type(const char *extension)
{
    if (!extension)
        return "text/plain";

    struct mime_map_entry *entry;
    struct hlist_node *node;

    hash_for_each_possible(mime_map_table, entry, node, mime_hash(extension))
    {
        if (!strcmp(entry->extension, extension))
            return entry->mime_type;
    }

    return "text/plain";
}

根據不同副檔名傳送不同 content-type

static void directory_listing(struct http_request *request)
{
    ...
    if (S_ISDIR(fp->f_inode->i_mode)) {
        ...
    } else if (S_ISREG(fp->f_inode->i_mode)) {
+        const char *extension = strchr(request->request_url, '.');
+        snprintf(buf, SEND_BUFFER_SIZE, "%s%s%s%s", "HTTP/1.1 200 OK\r\n",
+                 "Content-Type: ", get_mime_type(extension),
+                 "\r\nConnection: Keep-Alive\r\n\r\n");
        http_server_send(request->socket, buf, strlen(buf));
        ...
        int ret = kernel_read(fp, file_content, fp->f_inode->i_size, 0);
        http_server_send(request->socket, file_content, ret);
        kfree(file_content);
    }

    filp_close(fp, NULL);
    return;
}

可以開啟並顯示圖片

利用 Ftrace 找出 khttpd 核心模組的效能瓶頸

  0)   0.130 us    |        http_should_keep_alive [khttpd]();
  0)               |        directory_listing [khttpd]() {
  0)   8.311 us    |          filp_open();
  0)   4.862 us    |          _printk();
  0) + 24.243 us   |          http_server_send.isra.0 [khttpd]();
  0) + 21.330 us   |          http_server_send.isra.0 [khttpd]();
  0) ! 815.615 us  |          iterate_dir();
  0) + 17.077 us   |          http_server_send.isra.0 [khttpd]();
  0)   0.732 us    |          filp_close();
  0) ! 893.672 us  |        }
  0) ! 894.180 us  |      }
  0) ! 897.131 us  |    }

可以發現 iterate_dir() 花最多時間,其次是 http_server_send(),參考 Jerejere0808 的開發紀錄使用 content cache 來加快 iterate_dir() 的執行時間

實作 content cache 改進處理效率

為了避免每次請求時都執行 iterate_dir()kernel_read(),可以將回應快取下來,以便在下次有相同的請求時不需要再次讀取。
使用 kernel 的 hashtable API 來實作快取,並且由於檔案系統符合資料讀取頻繁,資料寫入較少的情況,加入 Read-Copy Update (RCU) 同步機制,以支援多個讀取端同時存取快取的需求。

快取項目結構:

struct content_cache_entry {
    const char *request_url;
    const char *response;
    struct hlist_node node;
    spinlock_t lock;
};

每次 request 先根據 request->url 查詢 hashtable,若有就直接傳送,若無則跟原本一樣判斷 request 是檔案還是目錄,讀取完後再插入到 hashtable 中

static void directory_listing(struct http_request *request)
{
    ...
    // get cache content
    if (strlen(request->cache_buffer) != 0) {
        http_server_send(request->socket, request->cache_buffer,
                         strlen(request->cache_buffer));
        return;
    }

    ...
    // If not found in the cache, then insert the content into cache.
    insert_content_cache(request_url, request->cache_buffer);

    filp_close(fp, NULL);
    return;
}

實作 timer 定期釋放 cache content

參考 sehttpd作業說明使用 min heap 實作 timer

timer 與 queue 的結構

typedef struct {
    size_t key;
    size_t pos;
    struct hlist_node hash_node;
} timer_node;

typedef struct {
    void **priv;
    atomic_t nalloc;
    atomic_t size;
    prio_queue_comparator comp;
} prio_queue_t;

在將 response content 插入 cache 時呼叫 cache_add_timer 把 timer 插入 queue 中

void insert_content_cache(char *request_url, char *cache_buffer)
{
    ...
    cache_add_timer(entry, TIMEOUT_DEFAULT);

    spin_lock_init(&entry->lock);  
    spin_lock(&entry->lock);
    hash_add_rcu(content_cache_table, &entry->node, request_hash(request_url));
    spin_unlock(&entry->lock);
}

設定好 timer_node 後呼叫 prio_queue_insert() 確認節點在 min heap 中的位置

void cache_add_timer(struct content_cache_entry *entry, size_t timeout)
{
    timer_node *node = kmalloc(sizeof(timer_node), GFP_KERNEL);
    if (!node)
        return;

    time_update();
    entry->timer = node;
    node->key = atomic_read(&current_msec) + timeout;
    node->pos = atomic_read(&timer.nalloc) + 1;
    node->hash_node = entry->node;

    prio_queue_insert(&timer, node);
}

在每次 request 檢查有沒有 timer expired,因為是 min heap 所以只要檢查 heap 中的第一個即可

void handle_expired_timers()
{
    while (!prio_queue_is_empty(&timer)) {
        time_update();
        timer_node *node = prio_queue_min(&timer);

        if (node->key > atomic_read(&current_msec))
            return;
        prio_queue_delmin(&timer);
    }
}

用 do-while 來確保沒有被 min heap 沒有被同時修改,並把第一個節點跟最後一個有效的節點互換,交換完成之後把有效節點 nalloc 減一,再呼叫 prio_queue_sink 把被交換的節點下沉到正確的位置,最後再呼叫 hash_del_rcu 來把快取的內容移除,移除完之後呼叫 synchronize_rcu 來同步讀者

static bool prio_queue_delmin(prio_queue_t *ptr)
{
    ...
    do {
        if (prio_queue_is_empty(ptr))
            return true;

        nalloc = atomic_read(&ptr->nalloc);
        prio_queue_swap(ptr, 1, nalloc);

        if (nalloc == atomic_read(&ptr->nalloc)) {
            node = ptr->priv[nalloc];
            break;
        }
        prio_queue_swap(ptr, 1, nalloc);
    } while (1);

    atomic_dec(&ptr->nalloc);
    prio_queue_sink(ptr, 1);

    hash_del_rcu(&node->hash_node);
    synchronize_rcu();

    return true;
}

並在有相同 request 時延長 timer 的時間

void cache_timer_update(timer_node *node, size_t timeout)
{
    time_update();
    node->key = atomic_read(&current_msec) + timeout;
    node->pos = prio_queue_sink(&timer, node->pos);
}

執行 ./htstress http://localhost:8081 -n 20000 測試

requests:      20000
good requests: 20000 [100%]
bad requests:  0 [0%]
socket errors: 0 [0%]
seconds:       1.096
requests/sec:  18254.904

因為兩萬次 request 都是相同的,所以只有第一次需要讀檔,requests/sec 從 7474.638 提升到 18254.904