2022q1 Homework1 (lab0)

contributed by < ppodds >

實驗環境

$ gcc --version
gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0

$ lscpu
Architecture:                    x86_64
CPU op-mode(s):                  32-bit, 64-bit
Byte Order:                      Little Endian
Address sizes:                   39 bits physical, 48 bits virtual
CPU(s):                          12
On-line CPU(s) list:             0-11
Thread(s) per core:              2
Core(s) per socket:              6
Socket(s):                       1
NUMA node(s):                    1

開發過程

q_new

struct list_head *q_new()
{
    // allocate space for queue node
    struct list_head *head = malloc(sizeof(struct list_head));
    if (!head)
        return NULL;
    // initialize list member
    INIT_LIST_HEAD(head);
    return head;
}

一開始我搞不清楚狀況，拿 element_t 來當 head ，寫到後面才發現自己應該弄錯了，所以後來改成現在這樣。

q_free

void q_free(struct list_head *l)
{
    // check if list is NULL
    if (l == NULL)
        return;
    struct list_head *li, *tmp;
    list_for_each_safe (li, tmp, l) {
        element_t *elem = list_entry(li, element_t, list);
        list_del(li);
        q_release_element(elem);
    }
    // free queue head
    free(l);
}

有 list_for_each_safe 可以用，這樣就可以在迴圈內安全呼叫 free 函式以釋放目前節點所在的記憶體空間。

list_for_each_safe 其實是多用一個暫存來記錄指向下個節點的指標，所以只保證刪除目前的節點不會出錯

q_insert_head

bool q_insert_head(struct list_head *head, char *s)
{
    // check if queue is NULL
    if (!head)
        return false;
    element_t *node = malloc(sizeof(element_t));
    if (!node)
        return false;
    INIT_LIST_HEAD(&node->list);
    // insert the node to the beginning of the queue
    list_add(&node->list, head);
    node->value = strdup(s);
    if (!node->value) {
        list_del(&node->list);
        free(node);
        return false;
    }
    return true;
}

一開始漏掉 strdup 的記憶體配置檢查，導致有三個測試跑不過，最後加個檢查就過了。

q_insert_tail

bool q_insert_tail(struct list_head *head, char *s)
{
    // check if queue is NULL
    if (!head)
        return false;
    element_t *node = malloc(sizeof(element_t));
    if (!node)
        return false;
    INIT_LIST_HEAD(&node->list);
    // insert the node to the end of the queue
    list_add_tail(&node->list, head);
    node->value = strdup(s);
    if (!node->value) {
        list_del(&node->list);
        free(node);
        return false;
    }
    return true;
}

兩個函式的實作只差在把 list_add 換成 list_add_tail 。

~~差不多一樣~~

工程人員說話要精準，避免說「差不多」

Image Not Showing Possible Reasons

The image file may be corrupted
The server hosting the image is unavailable
The image path is incorrect
The image format is not supported

Learn More →

jserv

q_remove_head

element_t *q_remove_head(struct list_head *head, char *sp, size_t bufsize)
{
    // if the queue is NULL or empty, return NULL
    if (!head || list_empty(head))
        return NULL;

    element_t *node = list_first_entry(head, element_t, list);
    list_del(&node->list);
    // check if sp is non-NULL
    if (sp) {
        strncpy(sp, node->value, bufsize - 1);
        sp[bufsize - 1] = '\0';
    }
    return node;
}

一開始以為 strncpy 會幫忙加 \0 截斷字串，後來發現沒有就自己補上了。

q_size

int q_size(struct list_head *head)
{
    if (!head)
        return 0;

    int len = 0;
    struct list_head *li;

    list_for_each (li, head)
        len++;
    return len;
}

新的實作

int q_size(struct list_head *head)
{
    if (!head)
        return 0;

    int len = 0;
    struct list_head *li;

    for (li = head->next; li != head; li = li->next->next) {
        if (li->next == head) {
            len++;
            break;
        }
        len += 2;
    }
    return len;
}

cmd> new 
l = []
cmd> it a 10000000
l = [a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a ... ]
cmd> time size
Queue size = 10000000
l = [a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a ... ]

old:
Delta time = 0.858

0.855 ~ 0.86 之間

new:
Delta time = 0.813

0.81 ~ 0.83 之間

~~老師給的實作，就沒另外再動了。~~

如果一次移動兩格，可以降低變數修改的次數，可以稍稍提高一點速度，實測結果如上面一樣，簡略測試可以提升4~5%。缺點是程式碼變長變髒，也變得更不直覺。或許是有改進空間，但是實際上有沒有必要又是另一回事了。

TODO: 寫程式做個測試圖表

指出後續的改進空間!

Image Not Showing Possible Reasons

The image file may be corrupted
The server hosting the image is unavailable
The image path is incorrect
The image format is not supported

Learn More →

jserv

感謝老師提點讓我有做實驗測試的機會!

q_delete_mid

bool q_delete_mid(struct list_head *head)
{
    if (!head || list_empty(head))
        return false;

    struct list_head *slow;
    struct list_head *fast = head->next;

    list_for_each (slow, head) {
        // find the middle node
        if (fast == head || fast->next == head)
            break;
        fast = fast->next->next;
    }
    list_del(slow);
    q_release_element(list_entry(slow, element_t, list));
    return true;
    // https://leetcode.com/problems/delete-the-middle-node-of-a-linked-list/
}

用Robert W. Floy 提出的循環偵測演算法來找中間點，操作數量大約是

\frac{n}{2}

，快還要更快

應描述為 Robert W. Floy 提出的循環偵測演算法

Image Not Showing Possible Reasons

The image file may be corrupted
The server hosting the image is unavailable
The image path is incorrect
The image format is not supported

Learn More →

jserv

q_delete_dup

bool q_delete_dup(struct list_head *head)
{
    if (!head)
        return false;

    struct list_head *li, *safe;
    bool dup = false;
    for (li = (head)->next, safe = li->next; li != (head) && safe != (head);
         li = safe, safe = li->next) {
        element_t *cur = list_entry(li, element_t, list);
        element_t *next = list_entry(li->next, element_t, list);
        if (strcmp(cur->value, next->value) == 0) {
            list_del(li);
            q_release_element(cur);
            dup = true;
        } else if (dup) {
            list_del(li);
            q_release_element(cur);
            dup = false;
        }
    }
    return true;
    // https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/
}

這裡的寫法就比較髒了，大意上是先找到第一個重複的項目，之後直到找到不重複的項目為止都會持續刪除目前的節點。當碰到內容不重複時，一樣刪除節點，接下來開始尋找下一個新的重複項目。操作數量已經壓到

n

左右了，再來的改進應該也只會是美觀的調整。

q_swap

void q_swap(struct list_head *head)
{
    struct list_head *cur = head->next;
    while (cur != head && cur->next != head) {
        list_move(cur, cur->next);
        cur = cur->next;
    }
    // https://leetcode.com/problems/swap-nodes-in-pairs/
}

這裡用了 list_move 來實作 swap ，但是實際上 list_move 應該是在list 之間移動節點用的，這裡只是剛好可以用上(因為 list_move 會把傳入的節點插在傳入的 head 後面，剛好符合需求)。

q_reverse

void q_reverse(struct list_head *head)
{
    // if the queue is NULL or its size <= 1, return
    if (!head || list_empty(head) || list_is_singular(head))
        return;

    struct list_head *cur = head->prev;
    head->prev = head->next;
    head->next = cur;

    while (cur != head) {
        struct list_head *tmp = cur->prev;
        cur->prev = cur->next;
        cur->next = tmp;
        cur = tmp;
    }
}

把全部的 link 倒過來就變成 reverse 了。

q_sort

待補，現在的版本是把 linux 裡面的 sort 放進來。除了刪掉 if (unlikely(!++count)) cmp(priv, b, b); 以外基本沒有改動。教授希望我們把核心的想法融入到自己的 sort 當中，但是原本的版本實在太強了，甚至連看懂都花了我很多時間，還做了一份流程分析方便自己理解過程。因此這部分我想先擱著，等到把其他部分都完成有空餘時間再嘗試精進。

幻滅是成長的開始。不要太早說「有空餘時間」，哪裡不懂，就強化哪。

Image Not Showing Possible Reasons

The image file may be corrupted
The server hosting the image is unavailable
The image path is incorrect
The image format is not supported

Learn More →

jserv

在 qtest 提供新的命令 `shuffle`

因為 queue.h 用 hash 檢查把檔案鎖住沒辦法變更，所以只能把程式碼直接寫在 qtest.c 裡面。

先新增一個指令的實作函式，待會再來加進指令清單。(參數檢查和其他處理可以從 do_swap 複製下來用)

static bool do_shuffle(int argc, char *argv[])
{
    if (argc != 1) {
        report(1, "%s takes no arguments", argv[0]);
        return false;
    }

    if (!l_meta.l)
        report(3, "Warning: Try to access null queue");
    error_check();

    set_noallocate_mode(true);
    if (exception_setup(true)) {
        if (l_meta.l->next != l_meta.l && l_meta.l->next->next != l_meta.l) {
            // do shuffle
            srand(time(NULL));
            struct list_head *tail = l_meta.l->prev;
            for (int i = q_size(l_meta.l); i > 1; i--) {
                int choose = rand() % i;
                struct list_head *cur = l_meta.l->next;
                for (int j = choose; j > 0; j--) {
                    cur = cur->next;
                }
                // swap tail and cur
                element_t *a = list_entry(cur, element_t, list);
                element_t *b = list_entry(tail, element_t, list);
                char *tmp = b->value;
                b->value = a->value;
                a->value = tmp;
                // update tail position
                tail = tail->prev;
            }
        }
    }
    exception_cancel();

    set_noallocate_mode(false);

    show_queue(3);
    return !error_check();
}

利用老師提到的 Fisher–Yates shuffle 的 Modern implementation 可以很快寫出符合功能要求的程式碼。但是這裡使用的資料結構是 linked list ，會有大量查找元素時的操作，會對演算法複雜度造成影響。可選的改善方案是把 queue 內的所有資料複製出來做一個 array ，之後在 array 裡面做 shuffle 操作， shuffle 操作結束後再把數值複製回 linked list 。這種做法的好處是時間複雜度只有

O (n)

(實際操作數大約

3 n

左右) ，不過需要

O (n)

的空間複雜度。

補一個我自己的實作

static bool do_shuffle2(int argc, char *argv[])
{
    if (argc != 1) {
        report(1, "%s takes no arguments", argv[0]);
        return false;
    }

    if (!l_meta.l)
        report(3, "Warning: Try to access null queue");
    error_check();

    if (exception_setup(true)) {
        if (l_meta.l->next != l_meta.l && l_meta.l->next->next != l_meta.l) {
            int n = q_size(l_meta.l);
            char **values = malloc(n * sizeof(char *));
            if (!values) {
                report(1, "ERROR:  Out of memory");
                return false;
            }
            // copy values from queue
            struct list_head *li;
            int cnt = 0;
            list_for_each (li, l_meta.l) {
                element_t *e = list_entry(li, element_t, list);
                values[cnt] = e->value;
                cnt++;
            }
            // do shuffle
            srand(time(NULL));
            for (int i = n; i > 1; i--) {
                int choose = rand() % i;
                // swap tail and cur
                char *tmp = values[i - 1];
                values[i - 1] = values[choose];
                values[choose] = tmp;
            }
            // copy data from values
            cnt = 0;
            list_for_each (li, l_meta.l) {
                element_t *e = list_entry(li, element_t, list);
                e->value = values[cnt];
                cnt++;
            }
            free(values);
        }
    }
    exception_cancel();

    show_queue(3);
    return !error_check();
}

在 qtest 裡面提供新的命令 `web`

要在 qtest中提供 web 指令不是容易的事，要考量到下述幾點:

web server的持續監聽
不能中斷原本 qtest 的指令輸入

為了達成這幾點，我們要對 console.c 做修改，內容等待會再說明。

先建立 web.h 和 web.c ，作為 web 指令的程式碼模組。

web.h

#ifndef LAB0_WEB_H
#define LAB0_WEB_H
#include <netinet/in.h>

/* Simplifies calls to bind(), connect(), and accept() */
typedef struct sockaddr SA;

extern int listening;
extern int noise;

int open_listenfd(int port);

char *process(int connfd, struct sockaddr_in *clientaddr);

#endif /* LAB0_WEB_H */

web.c

#include "web.h"
#include <arpa/inet.h> /* inet_ntoa */
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <netinet/tcp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/sendfile.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>

#define LISTENQ 1024 /* second argument to listen() */
#define MAXLINE 1024 /* max length of a line */

#ifndef RIO_BUFSIZE
#define RIO_BUFSIZE 1024
#endif

#ifndef NO_LOG_ACCESS
#define LOG_ACCESS
#endif

typedef struct {
    int rio_fd;                /* descriptor for this buf */
    int rio_cnt;               /* unread byte in this buf */
    char *rio_bufptr;          /* next unread byte in this buf */
    char rio_buf[RIO_BUFSIZE]; /* internal buffer */
} rio_t;

typedef struct {
    char function_name[512];
} http_request;

char *default_mime_type = "text/plain";

int listening = -1;
int noise = 1;

void rio_readinitb(rio_t *rp, int fd)
{
    rp->rio_fd = fd;
    rp->rio_cnt = 0;
    rp->rio_bufptr = rp->rio_buf;
}

/*
 * rio_read - This is a wrapper for the Unix read() function that
 *    transfers min(n, rio_cnt) bytes from an internal buffer to a user
 *    buffer, where n is the number of bytes requested by the user and
 *    rio_cnt is the number of unread bytes in the internal buffer. On
 *    entry, rio_read() refills the internal buffer via a call to
 *    read() if the internal buffer is empty.
 */
/* $begin rio_read */
static ssize_t rio_read(rio_t *rp, char *usrbuf, size_t n)
{
    int cnt;
    while (rp->rio_cnt <= 0) { /* refill if buf is empty */

        rp->rio_cnt = read(rp->rio_fd, rp->rio_buf, sizeof(rp->rio_buf));
        if (rp->rio_cnt < 0) {
            if (errno != EINTR) { /* interrupted by sig handler return */
                return -1;
            }
        } else if (rp->rio_cnt == 0) { /* EOF */
            return 0;
        }
        rp->rio_bufptr = rp->rio_buf; /* reset buffer ptr */
    }

    /* Copy min(n, rp->rio_cnt) bytes from internal buf to user buf */
    cnt = n;
    if (rp->rio_cnt < n) {
        cnt = rp->rio_cnt;
    }
    memcpy(usrbuf, rp->rio_bufptr, cnt);
    rp->rio_bufptr += cnt;
    rp->rio_cnt -= cnt;
    return cnt;
}

/*
 * rio_readlineb - robustly read a text line (buffered)
 */
ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen)
{
    int n;
    char c, *bufp = usrbuf;

    for (n = 1; n < maxlen; n++) {
        int rc;
        if ((rc = rio_read(rp, &c, 1)) == 1) {
            *bufp++ = c;
            if (c == '\n') {
                break;
            }
        } else if (rc == 0) {
            if (n == 1) {
                return 0; /* EOF, no data read */
            } else {
                break; /* EOF, some data was read */
            }
        } else {
            return -1; /* error */
        }
    }
    *bufp = 0;
    return n;
}

int open_listenfd(int port)
{
    int listenfd, optval = 1;
    struct sockaddr_in serveraddr;

    /* Create a socket descriptor */
    if ((listenfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
        return -1;
    }

    /* Eliminates "Address already in use" error from bind. */
    if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, (const void *) &optval,
                   sizeof(int)) < 0) {
        return -1;
    }

    // 6 is TCP's protocol number
    // enable this, much faster : 4000 req/s -> 17000 req/s
    if (setsockopt(listenfd, 6, TCP_CORK, (const void *) &optval, sizeof(int)) <
        0) {
        return -1;
    }

    /* Listenfd will be an endpoint for all requests to port
       on any IP address for this host */
    memset(&serveraddr, 0, sizeof(serveraddr));
    serveraddr.sin_family = AF_INET;
    serveraddr.sin_addr.s_addr = htonl(INADDR_ANY);
    serveraddr.sin_port = htons((unsigned short) port);
    if (bind(listenfd, (SA *) &serveraddr, sizeof(serveraddr)) < 0) {
        return -1;
    }

    /* Make it a listening socket ready to accept connection requests */
    if (listen(listenfd, LISTENQ) < 0) {
        return -1;
    }
    return listenfd;
}

void parse_request(int fd, http_request *req)
{
    rio_t rio;
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], format[64];

    rio_readinitb(&rio, fd);
    rio_readlineb(&rio, buf, MAXLINE);
    snprintf(format, 64, "%%%ds %%%ds", MAXLINE - 1, MAXLINE - 1);
    sscanf(buf, format, method, uri); /* version is not cared */
    /* read all */
    while (buf[0] != '\n' && buf[1] != '\n') { /* \n || \r\n */
        rio_readlineb(&rio, buf, MAXLINE);
    }
    char *function_name = uri;
    if (uri[0] == '/') {
        function_name = uri + 1;
        int length = strlen(function_name);
        // don't care url query
        for (int i = 0; i < length; i++) {
            if (function_name[i] == '?') {
                function_name[i] = '\0';
                break;
            }
        }
    }
    strncpy(req->function_name, function_name, 512);
}

#ifdef LOG_ACCESS
void log_access(int status, struct sockaddr_in *c_addr, http_request *req)
{
    printf("%s:%d %d - '%s'\n", inet_ntoa(c_addr->sin_addr),
           ntohs(c_addr->sin_port), status, req->function_name);
}
#endif

/* replace '/' with ' ' */
void handle_request(int fd, char *func_name)
{
    while ((*func_name) != '\0') {
        if (*func_name == '/')
            *func_name = ' ';
        func_name++;
    }
}

char *process(int fd, struct sockaddr_in *clientaddr)
{
#ifdef LOG_ACCESS
    printf("accept request, fd is %d, pid is %d\n", fd, getpid());
#endif
    http_request req;
    parse_request(fd, &req);

    handle_request(fd, req.function_name);

    int status = 200;

#ifdef LOG_ACCESS
    log_access(status, clientaddr, &req);
#endif
    char *ret = malloc(strlen(req.function_name) + 1);
    strncpy(ret, req.function_name, strlen(req.function_name) + 1);

    return ret;
}

listening 用來記錄正在監聽的檔案描述元， noise 是用來確認是否要開啟 linenoise 的變數。因為在開啟 web server的時候，不需要讓 linenoise 幫忙處理輸入，所以在此直接用一個變數來關閉功能。缺點是會造成 web 功能開啟用， linenoise 的自動完成等功能都沒辦法使用。

接下來是 console.c

int cmd_select(int nfds,
               fd_set *readfds,
               fd_set *writefds,
               fd_set *exceptfds,
               struct timeval *timeout)
{
    int infd;
    fd_set local_readset;

    if (cmd_done())
        return 0;

    if (!block_flag) {
        /* Process any commands in input buffer */
        if (!readfds)
            readfds = &local_readset;

        /* Add input fd to readset for select */
        infd = buf_stack->fd;
        FD_ZERO(readfds);
        FD_SET(infd, readfds);

        /* If web not ready listen */
        if (listening != -1)
            FD_SET(listening, readfds);

        if (infd == STDIN_FILENO && prompt_flag) {
            printf("%s", prompt);
            fflush(stdout);
            prompt_flag = true;
        }

        if (infd >= nfds)
            nfds = infd + 1;
        if (listening >= nfds)
            nfds = listening + 1;
    }
    if (nfds == 0)
        return 0;

    int result = select(nfds, readfds, writefds, exceptfds, timeout);
    if (result <= 0)
        return result;

    infd = buf_stack->fd;
    if (readfds && FD_ISSET(infd, readfds)) {
        /* Commandline input available */
        FD_CLR(infd, readfds);
        result--;
        char *cmdline;
        cmdline = readline();
        if (cmdline)
            interpret_cmd(cmdline);
    } else if (readfds && FD_ISSET(listening, readfds)) {
        FD_CLR(listening, readfds);
        result--;

        int connfd;
        struct sockaddr_in clientaddr;
        socklen_t clientlen = sizeof(clientaddr);
        connfd = accept(listening, (SA *) &clientaddr, &clientlen);

        char *p = process(connfd, &clientaddr);
        if (p)
            interpret_cmd(p);
        free(p);
        close(connfd);
    }
    return result;
}

bool run_console(char *infile_name)
{
    if (!push_file(infile_name)) {
        report(1, "ERROR: Could not open source file '%s'", infile_name);
        return false;
    }

    if (!has_infile) {
        char *cmdline;
        while (noise && (cmdline = linenoise(prompt)) != NULL) {
            interpret_cmd(cmdline);
            linenoiseHistoryAdd(cmdline);       /* Add to the history. */
            linenoiseHistorySave(HISTORY_FILE); /* Save the history on disk. */
            linenoiseFree(cmdline);
        }
        if (!noise) {
            while (!cmd_done()) {
                cmd_select(0, NULL, NULL, NULL, NULL);
            }
        }
    } else {
        while (!cmd_done())
            cmd_select(0, NULL, NULL, NULL, NULL);
    }

    return err_cnt == 0;
}

修改了兩個 function ， run_console 的修改讓我們可以藉由 noise 來控制 linenoise 功能的開關。 cmd_select 則是加入 listening 到 select 監視的內容中。如此，我們便能利用 select 同時監測到來自命令列和 web server 的指令。

最後在 qtest.c 內加入指令就完成了

static bool do_web(int argc, char *argv[])
{
    if (argc != 1) {
        report(1, "%s takes no arguments", argv[0]);
        return false;
    }

    if (exception_setup(true)) {
        const int PORT = 9999;

        if (listening == -1) {
            listening = open_listenfd(PORT);
            signal(SIGPIPE, SIG_IGN);
            noise = false;
            set_echo(false);
            printf("listen on port %d, fd is %d\n", PORT, listening);
        } else {
            report(1, "web server is already turned on");
        }
    }
    exception_cancel();

    return !error_check();
}

// in console_init()
ADD_COMMAND(web, "                | open simple http server");

2022q1 Homework1 (lab0)

實驗環境

開發過程

q_new

q_free

q_insert_head

q_insert_tail

q_remove_head

q_size

q_delete_mid

q_delete_dup

q_swap

q_reverse

q_sort

在 qtest 提供新的命令 shuffle

在 qtest 裡面提供新的命令 web

Read more

Linux Kernel Project 1

2022q1 Homework2 (quiz2)

在 qtest 提供新的命令 `shuffle`

在 qtest 裡面提供新的命令 `web`