依據 ktcp 的指示,持續改進 sysprog21/khttpd 的程式碼,打造出高效且穩定的網頁伺服器。
依據 ktcp 的指示,在 sysprog21/khttpd 的基礎之上,利用 CMWQ 一類的機制,打造出高效且穩定的網頁伺服器,需要處理 lock-free 的資源管理議題 (如 RCU)。
搭配閱讀: 〈RCU Usage In the Linux Kernel: One Decade Later〉
An application can change the IP options on a per-socket basis by calling sys_setsockopt,
which eventually causes the kernel to call setsockopt.
setsockopt sets the new IP options, then uses call_
rcu to asynchronously free the memory storing the old IP
options. Usingcall_rcu
ensures all threads that might
be manipulating the old options will have released their
reference by exiting the RCU critical section.
以 ftrace 檢視 kHTTPd 運作的細節,檢視 I/O 模型並尋求效能改進空間
參考 kecho 把所有 work 使用一個鏈結串列做管理,用 is_stopped
判斷是否結束連線
struct httpd_service {
bool is_stopped;
struct list_head head;
};
extern struct httpd_service daemon;
原本的 kthread-based 實作是每個 request 都會執行一次 kthread_run
,改成 CMWQ 後改為呼叫 create_work
來建立 work,並把建立好的 work 放入 queue 中,server 結束時呼叫 free_work
釋放管理 work 的串列
int http_server_daemon(void *arg)
{
...
INIT_LIST_HEAD(&daemon.head);
while (!kthread_should_stop()) {
...
if (unlikely(!(work = create_work(socket)))) {
printk(KERN_ERR MODULE_NAME
": create work error, connection closed\n");
kernel_sock_shutdown(socket, SHUT_RDWR);
sock_release(socket);
continue;
}
queue_work(khttpd_wq, work);
}
daemon.is_stopped = true;
free_work();
return 0;
}
配置完空間後,呼叫 INIT_WORK
建立新的 work,並初始化再把 work 加進串列中,當 work 被執行時會呼叫 http_server_worker
來完成任務
static struct work_struct *create_work(struct socket *sk)
{
struct http_request *work;
if (!(work = kmalloc(sizeof(struct http_request), GFP_KERNEL)))
return NULL;
work->socket = sk;
INIT_WORK(&work->khttpd_work, http_server_worker);
list_add(&work->node, &daemon.head);
return &work->khttpd_work;
}
執行命令 ./htstress http://localhost:8081 -t 3 -c 20 -n 200000
測試
requests: 200000
good requests: 200000 [100%]
bad requests: 0 [0%]
socket errors: 0 [0%]
seconds: 4.972
requests/sec: 40223.732
CMWQ:
seconds: 3.366
requests/sec: 59425.669
kthread | CMWQ | |
---|---|---|
throughput | 40223.732 | 59425.669 |
使用 linux/fs.h 中的 dir_context
、filp_open
、iterate_dir
函式
呼叫 http_server_send
送出 header 與 html,若目錄無法開啟則回傳 404,若可以則回傳 200 OK ,在執行 tracedir
時把目錄裡每個檔案寫在 html table 中並傳送到 client 讓網頁顯示所有的檔案
static void directory_listing(struct http_request *request)
{
struct file *fp;
char buf[SEND_BUFFER_SIZE] = {0};
char request_url[REQUEST_URL_SIZE] = {0};
request->dir_context.actor = tracedir;
snprintf(request_url, REQUEST_URL_SIZE, "%s%s", DIR, request->request_url);
fp = filp_open(request_url, O_RDONLY, 0);
pr_info("request_url: %s\n", request_url);
if (IS_ERR(fp)) {
pr_info("Open file failed");
http_server_send(request->socket, HTTP_RESPONSE_404,
strlen(HTTP_RESPONSE_404));
return;
}
snprintf(buf, SEND_BUFFER_SIZE, HTTP_RESPONSE_200_KEEPALIVE_DUMMY);
http_server_send(request->socket, buf, strlen(buf));
snprintf(buf, SEND_BUFFER_SIZE, "%s%s%s%s", "<html><head><style>\r\n",
"body{font-family: monospace; font-size: 15px;}\r\n",
"td {padding: 1.5px 6px;}\r\n",
"</style></head><body><table>\r\n");
http_server_send(request->socket, buf, strlen(buf));
iterate_dir(fp, &request->dir_context);
snprintf(buf, SEND_BUFFER_SIZE, "</table></body></html>\r\n");
http_server_send(request->socket, buf, strlen(buf));
filp_close(fp, NULL);
return;
}
傳送每個檔案的檔名與檔案或子目錄的連結,並用 html 的 table 包起來,並發送到 request->socket
static int tracedir(struct dir_context *dir_context,
const char *name,
int namelen,
loff_t offset,
u64 ino,
unsigned int d_type)
{
if (strcmp(name, ".")) {
struct http_request *request =
container_of(dir_context, struct http_request, dir_context);
char buf[SEND_BUFFER_SIZE] = {0};
snprintf(buf, SEND_BUFFER_SIZE,
"<tr><td><a href=\"%s\">%s</a></td></tr>\r\n", name, name);
http_server_send(request->socket, buf, strlen(buf));
}
return 0;
}
結果:
原本的實作只會顯示目錄中的所有檔案,新增判斷開啟的是目錄或檔案來回傳檔案內容
static void directory_listing(struct http_request *request)
{
...
if (S_ISDIR(fp->f_inode->i_mode)) {
...
} else if (S_ISREG(fp->f_inode->i_mode)) {
http_server_send(request->socket, buf, strlen(buf));
char *file_content = kmalloc(fp->f_inode->i_size, GFP_KERNEL);
if (!file_content) {
pr_info("malloc failed");
filp_close(fp, NULL);
return;
}
int ret = kernel_read(fp, file_content, fp->f_inode->i_size, 0);
http_server_send(request->socket, file_content, ret);
kfree(file_content);
}
filp_close(fp, NULL);
return;
}
把 MIME type 加入 hashtable 中做查詢
hashtable 結構:
struct mime_map_entry {
const char *extension;
const char *mime_type;
struct hlist_node node;
};
把 MIME type 跟對應的副檔名加入 hashtable 中,在 moudle 初始化時呼叫
void init_mime_map_table(void)
{
struct mime_map_entry *entry;
hash_init(mime_map_table);
for (entry = mime_map; entry->extension != NULL; entry++) {
hash_add(mime_map_table, &entry->node, mime_hash(entry->extension));
}
}
查詢 hashtable 得到 MIME type
const char *get_mime_type(const char *extension)
{
if (!extension)
return "text/plain";
struct mime_map_entry *entry;
struct hlist_node *node;
hash_for_each_possible(mime_map_table, entry, node, mime_hash(extension))
{
if (!strcmp(entry->extension, extension))
return entry->mime_type;
}
return "text/plain";
}
根據不同副檔名傳送不同 content-type
static void directory_listing(struct http_request *request)
{
...
if (S_ISDIR(fp->f_inode->i_mode)) {
...
} else if (S_ISREG(fp->f_inode->i_mode)) {
+ const char *extension = strchr(request->request_url, '.');
+ snprintf(buf, SEND_BUFFER_SIZE, "%s%s%s%s", "HTTP/1.1 200 OK\r\n",
+ "Content-Type: ", get_mime_type(extension),
+ "\r\nConnection: Keep-Alive\r\n\r\n");
http_server_send(request->socket, buf, strlen(buf));
...
int ret = kernel_read(fp, file_content, fp->f_inode->i_size, 0);
http_server_send(request->socket, file_content, ret);
kfree(file_content);
}
filp_close(fp, NULL);
return;
}
可以開啟並顯示圖片
0) 0.130 us | http_should_keep_alive [khttpd]();
0) | directory_listing [khttpd]() {
0) 8.311 us | filp_open();
0) 4.862 us | _printk();
0) + 24.243 us | http_server_send.isra.0 [khttpd]();
0) + 21.330 us | http_server_send.isra.0 [khttpd]();
0) ! 815.615 us | iterate_dir();
0) + 17.077 us | http_server_send.isra.0 [khttpd]();
0) 0.732 us | filp_close();
0) ! 893.672 us | }
0) ! 894.180 us | }
0) ! 897.131 us | }
可以發現 iterate_dir()
花最多時間,其次是 http_server_send
(),參考 Jerejere0808 的開發紀錄使用 content cache 來加快 iterate_dir()
的執行時間
為了避免每次請求時都執行 iterate_dir()
或 kernel_read()
,可以將回應快取下來,以便在下次有相同的請求時不需要再次讀取。
使用 kernel 的 hashtable API 來實作快取,並且由於檔案系統符合資料讀取頻繁,資料寫入較少的情況,加入 Read-Copy Update (RCU) 同步機制,以支援多個讀取端同時存取快取的需求。
快取項目結構:
struct content_cache_entry {
const char *request_url;
const char *response;
struct hlist_node node;
spinlock_t lock;
};
每次 request 先根據 request->url
查詢 hashtable,若有就直接傳送,若無則跟原本一樣判斷 request 是檔案還是目錄,讀取完後再插入到 hashtable 中
static void directory_listing(struct http_request *request)
{
...
// get cache content
if (strlen(request->cache_buffer) != 0) {
http_server_send(request->socket, request->cache_buffer,
strlen(request->cache_buffer));
return;
}
...
// If not found in the cache, then insert the content into cache.
insert_content_cache(request_url, request->cache_buffer);
filp_close(fp, NULL);
return;
}
參考 sehttpd、作業說明使用 min heap 實作 timer
timer 與 queue 的結構
typedef struct {
size_t key;
size_t pos;
struct hlist_node hash_node;
} timer_node;
typedef struct {
void **priv;
atomic_t nalloc;
atomic_t size;
prio_queue_comparator comp;
} prio_queue_t;
在將 response content 插入 cache 時呼叫 cache_add_timer
把 timer 插入 queue 中
void insert_content_cache(char *request_url, char *cache_buffer)
{
...
cache_add_timer(entry, TIMEOUT_DEFAULT);
spin_lock_init(&entry->lock);
spin_lock(&entry->lock);
hash_add_rcu(content_cache_table, &entry->node, request_hash(request_url));
spin_unlock(&entry->lock);
}
設定好 timer_node
後呼叫 prio_queue_insert()
確認節點在 min heap 中的位置
void cache_add_timer(struct content_cache_entry *entry, size_t timeout)
{
timer_node *node = kmalloc(sizeof(timer_node), GFP_KERNEL);
if (!node)
return;
time_update();
entry->timer = node;
node->key = atomic_read(¤t_msec) + timeout;
node->pos = atomic_read(&timer.nalloc) + 1;
node->hash_node = entry->node;
prio_queue_insert(&timer, node);
}
在每次 request 檢查有沒有 timer expired,因為是 min heap 所以只要檢查 heap 中的第一個即可
void handle_expired_timers()
{
while (!prio_queue_is_empty(&timer)) {
time_update();
timer_node *node = prio_queue_min(&timer);
if (node->key > atomic_read(¤t_msec))
return;
prio_queue_delmin(&timer);
}
}
用 do-while 來確保沒有被 min heap 沒有被同時修改,並把第一個節點跟最後一個有效的節點互換,交換完成之後把有效節點 nalloc 減一,再呼叫 prio_queue_sink
把被交換的節點下沉到正確的位置,最後再呼叫 hash_del_rcu
來把快取的內容移除,移除完之後呼叫 synchronize_rcu
來同步讀者
static bool prio_queue_delmin(prio_queue_t *ptr)
{
...
do {
if (prio_queue_is_empty(ptr))
return true;
nalloc = atomic_read(&ptr->nalloc);
prio_queue_swap(ptr, 1, nalloc);
if (nalloc == atomic_read(&ptr->nalloc)) {
node = ptr->priv[nalloc];
break;
}
prio_queue_swap(ptr, 1, nalloc);
} while (1);
atomic_dec(&ptr->nalloc);
prio_queue_sink(ptr, 1);
hash_del_rcu(&node->hash_node);
synchronize_rcu();
return true;
}
並在有相同 request 時延長 timer 的時間
void cache_timer_update(timer_node *node, size_t timeout)
{
time_update();
node->key = atomic_read(¤t_msec) + timeout;
node->pos = prio_queue_sink(&timer, node->pos);
}
執行 ./htstress http://localhost:8081 -n 20000
測試
requests: 20000
good requests: 20000 [100%]
bad requests: 0 [0%]
socket errors: 0 [0%]
seconds: 1.096
requests/sec: 18254.904
因為兩萬次 request 都是相同的,所以只有第一次需要讀檔,requests/sec 從 7474.638 提升到 18254.904