contributed by < ppodds
>
$ gcc --version
gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
$ lscpu
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
Address sizes: 39 bits physical, 48 bits virtual
CPU(s): 12
On-line CPU(s) list: 0-11
Thread(s) per core: 2
Core(s) per socket: 6
Socket(s): 1
NUMA node(s): 1
struct list_head *q_new()
{
// allocate space for queue node
struct list_head *head = malloc(sizeof(struct list_head));
if (!head)
return NULL;
// initialize list member
INIT_LIST_HEAD(head);
return head;
}
一開始我搞不清楚狀況,拿 element_t
來當 head ,寫到後面才發現自己應該弄錯了,所以後來改成現在這樣。
void q_free(struct list_head *l)
{
// check if list is NULL
if (l == NULL)
return;
struct list_head *li, *tmp;
list_for_each_safe (li, tmp, l) {
element_t *elem = list_entry(li, element_t, list);
list_del(li);
q_release_element(elem);
}
// free queue head
free(l);
}
有 list_for_each_safe
可以用,這樣就可以在迴圈內安全呼叫 free
函式以釋放目前節點所在的記憶體空間。
list_for_each_safe
其實是多用一個暫存來記錄指向下個節點的指標,所以只保證刪除目前的節點不會出錯
bool q_insert_head(struct list_head *head, char *s)
{
// check if queue is NULL
if (!head)
return false;
element_t *node = malloc(sizeof(element_t));
if (!node)
return false;
INIT_LIST_HEAD(&node->list);
// insert the node to the beginning of the queue
list_add(&node->list, head);
node->value = strdup(s);
if (!node->value) {
list_del(&node->list);
free(node);
return false;
}
return true;
}
一開始漏掉 strdup
的記憶體配置檢查,導致有三個測試跑不過,最後加個檢查就過了。
bool q_insert_tail(struct list_head *head, char *s)
{
// check if queue is NULL
if (!head)
return false;
element_t *node = malloc(sizeof(element_t));
if (!node)
return false;
INIT_LIST_HEAD(&node->list);
// insert the node to the end of the queue
list_add_tail(&node->list, head);
node->value = strdup(s);
if (!node->value) {
list_del(&node->list);
free(node);
return false;
}
return true;
}
兩個函式的實作只差在把 list_add
換成 list_add_tail
。
差不多一樣
工程人員說話要精準,避免說「差不多」
element_t *q_remove_head(struct list_head *head, char *sp, size_t bufsize)
{
// if the queue is NULL or empty, return NULL
if (!head || list_empty(head))
return NULL;
element_t *node = list_first_entry(head, element_t, list);
list_del(&node->list);
// check if sp is non-NULL
if (sp) {
strncpy(sp, node->value, bufsize - 1);
sp[bufsize - 1] = '\0';
}
return node;
}
一開始以為 strncpy
會幫忙加 \0
截斷字串,後來發現沒有就自己補上了。
int q_size(struct list_head *head)
{
if (!head)
return 0;
int len = 0;
struct list_head *li;
list_for_each (li, head)
len++;
return len;
}
新的實作
int q_size(struct list_head *head)
{
if (!head)
return 0;
int len = 0;
struct list_head *li;
for (li = head->next; li != head; li = li->next->next) {
if (li->next == head) {
len++;
break;
}
len += 2;
}
return len;
}
cmd> new
l = []
cmd> it a 10000000
l = [a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a ... ]
cmd> time size
Queue size = 10000000
l = [a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a ... ]
old:
Delta time = 0.858
0.855 ~ 0.86 之間
new:
Delta time = 0.813
0.81 ~ 0.83 之間
老師給的實作,就沒另外再動了。
如果一次移動兩格,可以降低變數修改的次數,可以稍稍提高一點速度,實測結果如上面一樣,簡略測試可以提升4~5%。缺點是程式碼變長變髒,也變得更不直覺。或許是有改進空間,但是實際上有沒有必要又是另一回事了。
TODO: 寫程式做個測試圖表
指出後續的改進空間!
感謝老師提點讓我有做實驗測試的機會!
bool q_delete_mid(struct list_head *head)
{
if (!head || list_empty(head))
return false;
struct list_head *slow;
struct list_head *fast = head->next;
list_for_each (slow, head) {
// find the middle node
if (fast == head || fast->next == head)
break;
fast = fast->next->next;
}
list_del(slow);
q_release_element(list_entry(slow, element_t, list));
return true;
// https://leetcode.com/problems/delete-the-middle-node-of-a-linked-list/
}
用Robert W. Floy 提出的循環偵測演算法來找中間點,操作數量大約是
應描述為 Robert W. Floy 提出的循環偵測演算法
bool q_delete_dup(struct list_head *head)
{
if (!head)
return false;
struct list_head *li, *safe;
bool dup = false;
for (li = (head)->next, safe = li->next; li != (head) && safe != (head);
li = safe, safe = li->next) {
element_t *cur = list_entry(li, element_t, list);
element_t *next = list_entry(li->next, element_t, list);
if (strcmp(cur->value, next->value) == 0) {
list_del(li);
q_release_element(cur);
dup = true;
} else if (dup) {
list_del(li);
q_release_element(cur);
dup = false;
}
}
return true;
// https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/
}
這裡的寫法就比較髒了,大意上是先找到第一個重複的項目,之後直到找到不重複的項目為止都會持續刪除目前的節點。當碰到內容不重複時,一樣刪除節點,接下來開始尋找下一個新的重複項目。操作數量已經壓到
void q_swap(struct list_head *head)
{
struct list_head *cur = head->next;
while (cur != head && cur->next != head) {
list_move(cur, cur->next);
cur = cur->next;
}
// https://leetcode.com/problems/swap-nodes-in-pairs/
}
這裡用了 list_move
來實作 swap ,但是實際上 list_move
應該是在list
之間移動節點用的,這裡只是剛好可以用上(因為 list_move
會把傳入的節點插在傳入的 head 後面,剛好符合需求)。
void q_reverse(struct list_head *head)
{
// if the queue is NULL or its size <= 1, return
if (!head || list_empty(head) || list_is_singular(head))
return;
struct list_head *cur = head->prev;
head->prev = head->next;
head->next = cur;
while (cur != head) {
struct list_head *tmp = cur->prev;
cur->prev = cur->next;
cur->next = tmp;
cur = tmp;
}
}
把全部的 link 倒過來就變成 reverse 了。
待補,現在的版本是把 linux 裡面的 sort
放進來。除了刪掉 if (unlikely(!++count)) cmp(priv, b, b);
以外基本沒有改動。教授希望我們把核心的想法融入到自己的 sort
當中,但是原本的版本實在太強了,甚至連看懂都花了我很多時間,還做了一份流程分析方便自己理解過程。因此這部分我想先擱著,等到把其他部分都完成有空餘時間再嘗試精進。
幻滅是成長的開始。不要太早說「有空餘時間」,哪裡不懂,就強化哪。
shuffle
因為 queue.h
用 hash 檢查把檔案鎖住沒辦法變更,所以只能把程式碼直接寫在 qtest.c
裡面。
先新增一個指令的實作函式,待會再來加進指令清單。(參數檢查和其他處理可以從 do_swap
複製下來用)
static bool do_shuffle(int argc, char *argv[])
{
if (argc != 1) {
report(1, "%s takes no arguments", argv[0]);
return false;
}
if (!l_meta.l)
report(3, "Warning: Try to access null queue");
error_check();
set_noallocate_mode(true);
if (exception_setup(true)) {
if (l_meta.l->next != l_meta.l && l_meta.l->next->next != l_meta.l) {
// do shuffle
srand(time(NULL));
struct list_head *tail = l_meta.l->prev;
for (int i = q_size(l_meta.l); i > 1; i--) {
int choose = rand() % i;
struct list_head *cur = l_meta.l->next;
for (int j = choose; j > 0; j--) {
cur = cur->next;
}
// swap tail and cur
element_t *a = list_entry(cur, element_t, list);
element_t *b = list_entry(tail, element_t, list);
char *tmp = b->value;
b->value = a->value;
a->value = tmp;
// update tail position
tail = tail->prev;
}
}
}
exception_cancel();
set_noallocate_mode(false);
show_queue(3);
return !error_check();
}
利用老師提到的 Fisher–Yates shuffle 的 Modern implementation 可以很快寫出符合功能要求的程式碼。但是這裡使用的資料結構是 linked list ,會有大量查找元素時的操作,會對演算法複雜度造成影響。可選的改善方案是把 queue 內的所有資料複製出來做一個 array ,之後在 array 裡面做 shuffle 操作, shuffle 操作結束後再把數值複製回 linked list 。 這種做法的好處是時間複雜度只有
補一個我自己的實作
static bool do_shuffle2(int argc, char *argv[])
{
if (argc != 1) {
report(1, "%s takes no arguments", argv[0]);
return false;
}
if (!l_meta.l)
report(3, "Warning: Try to access null queue");
error_check();
if (exception_setup(true)) {
if (l_meta.l->next != l_meta.l && l_meta.l->next->next != l_meta.l) {
int n = q_size(l_meta.l);
char **values = malloc(n * sizeof(char *));
if (!values) {
report(1, "ERROR: Out of memory");
return false;
}
// copy values from queue
struct list_head *li;
int cnt = 0;
list_for_each (li, l_meta.l) {
element_t *e = list_entry(li, element_t, list);
values[cnt] = e->value;
cnt++;
}
// do shuffle
srand(time(NULL));
for (int i = n; i > 1; i--) {
int choose = rand() % i;
// swap tail and cur
char *tmp = values[i - 1];
values[i - 1] = values[choose];
values[choose] = tmp;
}
// copy data from values
cnt = 0;
list_for_each (li, l_meta.l) {
element_t *e = list_entry(li, element_t, list);
e->value = values[cnt];
cnt++;
}
free(values);
}
}
exception_cancel();
show_queue(3);
return !error_check();
}
web
要在 qtest中提供 web
指令不是容易的事,要考量到下述幾點:
為了達成這幾點,我們要對 console.c
做修改,內容等待會再說明。
先建立 web.h
和 web.c
,作為 web
指令的程式碼模組。
web.h
#ifndef LAB0_WEB_H
#define LAB0_WEB_H
#include <netinet/in.h>
/* Simplifies calls to bind(), connect(), and accept() */
typedef struct sockaddr SA;
extern int listening;
extern int noise;
int open_listenfd(int port);
char *process(int connfd, struct sockaddr_in *clientaddr);
#endif /* LAB0_WEB_H */
web.c
#include "web.h"
#include <arpa/inet.h> /* inet_ntoa */
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <netinet/tcp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/sendfile.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#define LISTENQ 1024 /* second argument to listen() */
#define MAXLINE 1024 /* max length of a line */
#ifndef RIO_BUFSIZE
#define RIO_BUFSIZE 1024
#endif
#ifndef NO_LOG_ACCESS
#define LOG_ACCESS
#endif
typedef struct {
int rio_fd; /* descriptor for this buf */
int rio_cnt; /* unread byte in this buf */
char *rio_bufptr; /* next unread byte in this buf */
char rio_buf[RIO_BUFSIZE]; /* internal buffer */
} rio_t;
typedef struct {
char function_name[512];
} http_request;
char *default_mime_type = "text/plain";
int listening = -1;
int noise = 1;
void rio_readinitb(rio_t *rp, int fd)
{
rp->rio_fd = fd;
rp->rio_cnt = 0;
rp->rio_bufptr = rp->rio_buf;
}
/*
* rio_read - This is a wrapper for the Unix read() function that
* transfers min(n, rio_cnt) bytes from an internal buffer to a user
* buffer, where n is the number of bytes requested by the user and
* rio_cnt is the number of unread bytes in the internal buffer. On
* entry, rio_read() refills the internal buffer via a call to
* read() if the internal buffer is empty.
*/
/* $begin rio_read */
static ssize_t rio_read(rio_t *rp, char *usrbuf, size_t n)
{
int cnt;
while (rp->rio_cnt <= 0) { /* refill if buf is empty */
rp->rio_cnt = read(rp->rio_fd, rp->rio_buf, sizeof(rp->rio_buf));
if (rp->rio_cnt < 0) {
if (errno != EINTR) { /* interrupted by sig handler return */
return -1;
}
} else if (rp->rio_cnt == 0) { /* EOF */
return 0;
}
rp->rio_bufptr = rp->rio_buf; /* reset buffer ptr */
}
/* Copy min(n, rp->rio_cnt) bytes from internal buf to user buf */
cnt = n;
if (rp->rio_cnt < n) {
cnt = rp->rio_cnt;
}
memcpy(usrbuf, rp->rio_bufptr, cnt);
rp->rio_bufptr += cnt;
rp->rio_cnt -= cnt;
return cnt;
}
/*
* rio_readlineb - robustly read a text line (buffered)
*/
ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen)
{
int n;
char c, *bufp = usrbuf;
for (n = 1; n < maxlen; n++) {
int rc;
if ((rc = rio_read(rp, &c, 1)) == 1) {
*bufp++ = c;
if (c == '\n') {
break;
}
} else if (rc == 0) {
if (n == 1) {
return 0; /* EOF, no data read */
} else {
break; /* EOF, some data was read */
}
} else {
return -1; /* error */
}
}
*bufp = 0;
return n;
}
int open_listenfd(int port)
{
int listenfd, optval = 1;
struct sockaddr_in serveraddr;
/* Create a socket descriptor */
if ((listenfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
return -1;
}
/* Eliminates "Address already in use" error from bind. */
if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, (const void *) &optval,
sizeof(int)) < 0) {
return -1;
}
// 6 is TCP's protocol number
// enable this, much faster : 4000 req/s -> 17000 req/s
if (setsockopt(listenfd, 6, TCP_CORK, (const void *) &optval, sizeof(int)) <
0) {
return -1;
}
/* Listenfd will be an endpoint for all requests to port
on any IP address for this host */
memset(&serveraddr, 0, sizeof(serveraddr));
serveraddr.sin_family = AF_INET;
serveraddr.sin_addr.s_addr = htonl(INADDR_ANY);
serveraddr.sin_port = htons((unsigned short) port);
if (bind(listenfd, (SA *) &serveraddr, sizeof(serveraddr)) < 0) {
return -1;
}
/* Make it a listening socket ready to accept connection requests */
if (listen(listenfd, LISTENQ) < 0) {
return -1;
}
return listenfd;
}
void parse_request(int fd, http_request *req)
{
rio_t rio;
char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], format[64];
rio_readinitb(&rio, fd);
rio_readlineb(&rio, buf, MAXLINE);
snprintf(format, 64, "%%%ds %%%ds", MAXLINE - 1, MAXLINE - 1);
sscanf(buf, format, method, uri); /* version is not cared */
/* read all */
while (buf[0] != '\n' && buf[1] != '\n') { /* \n || \r\n */
rio_readlineb(&rio, buf, MAXLINE);
}
char *function_name = uri;
if (uri[0] == '/') {
function_name = uri + 1;
int length = strlen(function_name);
// don't care url query
for (int i = 0; i < length; i++) {
if (function_name[i] == '?') {
function_name[i] = '\0';
break;
}
}
}
strncpy(req->function_name, function_name, 512);
}
#ifdef LOG_ACCESS
void log_access(int status, struct sockaddr_in *c_addr, http_request *req)
{
printf("%s:%d %d - '%s'\n", inet_ntoa(c_addr->sin_addr),
ntohs(c_addr->sin_port), status, req->function_name);
}
#endif
/* replace '/' with ' ' */
void handle_request(int fd, char *func_name)
{
while ((*func_name) != '\0') {
if (*func_name == '/')
*func_name = ' ';
func_name++;
}
}
char *process(int fd, struct sockaddr_in *clientaddr)
{
#ifdef LOG_ACCESS
printf("accept request, fd is %d, pid is %d\n", fd, getpid());
#endif
http_request req;
parse_request(fd, &req);
handle_request(fd, req.function_name);
int status = 200;
#ifdef LOG_ACCESS
log_access(status, clientaddr, &req);
#endif
char *ret = malloc(strlen(req.function_name) + 1);
strncpy(ret, req.function_name, strlen(req.function_name) + 1);
return ret;
}
listening
用來記錄正在監聽的檔案描述元, noise
是用來確認是否要開啟 linenoise
的變數。 因為在開啟 web server的時候,不需要讓 linenoise
幫忙處理輸入,所以在此直接用一個變數來關閉功能。缺點是會造成 web
功能開啟用, linenoise
的自動完成等功能都沒辦法使用。
接下來是 console.c
int cmd_select(int nfds,
fd_set *readfds,
fd_set *writefds,
fd_set *exceptfds,
struct timeval *timeout)
{
int infd;
fd_set local_readset;
if (cmd_done())
return 0;
if (!block_flag) {
/* Process any commands in input buffer */
if (!readfds)
readfds = &local_readset;
/* Add input fd to readset for select */
infd = buf_stack->fd;
FD_ZERO(readfds);
FD_SET(infd, readfds);
/* If web not ready listen */
if (listening != -1)
FD_SET(listening, readfds);
if (infd == STDIN_FILENO && prompt_flag) {
printf("%s", prompt);
fflush(stdout);
prompt_flag = true;
}
if (infd >= nfds)
nfds = infd + 1;
if (listening >= nfds)
nfds = listening + 1;
}
if (nfds == 0)
return 0;
int result = select(nfds, readfds, writefds, exceptfds, timeout);
if (result <= 0)
return result;
infd = buf_stack->fd;
if (readfds && FD_ISSET(infd, readfds)) {
/* Commandline input available */
FD_CLR(infd, readfds);
result--;
char *cmdline;
cmdline = readline();
if (cmdline)
interpret_cmd(cmdline);
} else if (readfds && FD_ISSET(listening, readfds)) {
FD_CLR(listening, readfds);
result--;
int connfd;
struct sockaddr_in clientaddr;
socklen_t clientlen = sizeof(clientaddr);
connfd = accept(listening, (SA *) &clientaddr, &clientlen);
char *p = process(connfd, &clientaddr);
if (p)
interpret_cmd(p);
free(p);
close(connfd);
}
return result;
}
bool run_console(char *infile_name)
{
if (!push_file(infile_name)) {
report(1, "ERROR: Could not open source file '%s'", infile_name);
return false;
}
if (!has_infile) {
char *cmdline;
while (noise && (cmdline = linenoise(prompt)) != NULL) {
interpret_cmd(cmdline);
linenoiseHistoryAdd(cmdline); /* Add to the history. */
linenoiseHistorySave(HISTORY_FILE); /* Save the history on disk. */
linenoiseFree(cmdline);
}
if (!noise) {
while (!cmd_done()) {
cmd_select(0, NULL, NULL, NULL, NULL);
}
}
} else {
while (!cmd_done())
cmd_select(0, NULL, NULL, NULL, NULL);
}
return err_cnt == 0;
}
修改了兩個 function , run_console
的修改讓我們可以藉由 noise
來控制 linenoise
功能的開關。 cmd_select
則是加入 listening
到 select
監視的內容中。如此,我們便能利用 select
同時監測到來自命令列和 web server 的指令。
最後在 qtest.c
內加入指令就完成了
static bool do_web(int argc, char *argv[])
{
if (argc != 1) {
report(1, "%s takes no arguments", argv[0]);
return false;
}
if (exception_setup(true)) {
const int PORT = 9999;
if (listening == -1) {
listening = open_listenfd(PORT);
signal(SIGPIPE, SIG_IGN);
noise = false;
set_echo(false);
printf("listen on port %d, fd is %d\n", PORT, listening);
} else {
report(1, "web server is already turned on");
}
}
exception_cancel();
return !error_check();
}
// in console_init()
ADD_COMMAND(web, " | open simple http server");