Try   HackMD

2022q1 Homework1 (lab0)

contributed by < ppodds >

實驗環境

$ gcc --version
gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0

$ lscpu
Architecture:                    x86_64
CPU op-mode(s):                  32-bit, 64-bit
Byte Order:                      Little Endian
Address sizes:                   39 bits physical, 48 bits virtual
CPU(s):                          12
On-line CPU(s) list:             0-11
Thread(s) per core:              2
Core(s) per socket:              6
Socket(s):                       1
NUMA node(s):                    1

開發過程

q_new

struct list_head *q_new()
{
    // allocate space for queue node
    struct list_head *head = malloc(sizeof(struct list_head));
    if (!head)
        return NULL;
    // initialize list member
    INIT_LIST_HEAD(head);
    return head;
}

一開始我搞不清楚狀況,拿 element_t 來當 head ,寫到後面才發現自己應該弄錯了,所以後來改成現在這樣。

q_free

void q_free(struct list_head *l)
{
    // check if list is NULL
    if (l == NULL)
        return;
    struct list_head *li, *tmp;
    list_for_each_safe (li, tmp, l) {
        element_t *elem = list_entry(li, element_t, list);
        list_del(li);
        q_release_element(elem);
    }
    // free queue head
    free(l);
}

list_for_each_safe 可以用,這樣就可以在迴圈內安全呼叫 free 函式以釋放目前節點所在的記憶體空間。

list_for_each_safe 其實是多用一個暫存來記錄指向下個節點的指標,所以只保證刪除目前的節點不會出錯

q_insert_head

bool q_insert_head(struct list_head *head, char *s)
{
    // check if queue is NULL
    if (!head)
        return false;
    element_t *node = malloc(sizeof(element_t));
    if (!node)
        return false;
    INIT_LIST_HEAD(&node->list);
    // insert the node to the beginning of the queue
    list_add(&node->list, head);
    node->value = strdup(s);
    if (!node->value) {
        list_del(&node->list);
        free(node);
        return false;
    }
    return true;
}

一開始漏掉 strdup 的記憶體配置檢查,導致有三個測試跑不過,最後加個檢查就過了。

q_insert_tail

bool q_insert_tail(struct list_head *head, char *s)
{
    // check if queue is NULL
    if (!head)
        return false;
    element_t *node = malloc(sizeof(element_t));
    if (!node)
        return false;
    INIT_LIST_HEAD(&node->list);
    // insert the node to the end of the queue
    list_add_tail(&node->list, head);
    node->value = strdup(s);
    if (!node->value) {
        list_del(&node->list);
        free(node);
        return false;
    }
    return true;
}

兩個函式的實作只差在把 list_add 換成 list_add_tail

差不多一樣

工程人員說話要精準,避免說「差不多」

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

q_remove_head

element_t *q_remove_head(struct list_head *head, char *sp, size_t bufsize)
{
    // if the queue is NULL or empty, return NULL
    if (!head || list_empty(head))
        return NULL;

    element_t *node = list_first_entry(head, element_t, list);
    list_del(&node->list);
    // check if sp is non-NULL
    if (sp) {
        strncpy(sp, node->value, bufsize - 1);
        sp[bufsize - 1] = '\0';
    }
    return node;
}

一開始以為 strncpy 會幫忙加 \0 截斷字串,後來發現沒有就自己補上了。

q_size

int q_size(struct list_head *head)
{
    if (!head)
        return 0;

    int len = 0;
    struct list_head *li;

    list_for_each (li, head)
        len++;
    return len;
}

新的實作

int q_size(struct list_head *head)
{
    if (!head)
        return 0;

    int len = 0;
    struct list_head *li;

    for (li = head->next; li != head; li = li->next->next) {
        if (li->next == head) {
            len++;
            break;
        }
        len += 2;
    }
    return len;
}
cmd> new 
l = []
cmd> it a 10000000
l = [a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a ... ]
cmd> time size
Queue size = 10000000
l = [a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a ... ]

old:
Delta time = 0.858

0.855 ~ 0.86 之間

new:
Delta time = 0.813

0.81 ~ 0.83 之間

老師給的實作,就沒另外再動了。

如果一次移動兩格,可以降低變數修改的次數,可以稍稍提高一點速度,實測結果如上面一樣,簡略測試可以提升4~5%。缺點是程式碼變長變髒,也變得更不直覺。或許是有改進空間,但是實際上有沒有必要又是另一回事了

TODO: 寫程式做個測試圖表

指出後續的改進空間!

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

感謝老師提點讓我有做實驗測試的機會!

q_delete_mid

bool q_delete_mid(struct list_head *head)
{
    if (!head || list_empty(head))
        return false;

    struct list_head *slow;
    struct list_head *fast = head->next;

    list_for_each (slow, head) {
        // find the middle node
        if (fast == head || fast->next == head)
            break;
        fast = fast->next->next;
    }
    list_del(slow);
    q_release_element(list_entry(slow, element_t, list));
    return true;
    // https://leetcode.com/problems/delete-the-middle-node-of-a-linked-list/
}

Robert W. Floy 提出的循環偵測演算法來找中間點,操作數量大約是

n2,快還要更快

應描述為 Robert W. Floy 提出的循環偵測演算法

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

q_delete_dup

bool q_delete_dup(struct list_head *head)
{
    if (!head)
        return false;

    struct list_head *li, *safe;
    bool dup = false;
    for (li = (head)->next, safe = li->next; li != (head) && safe != (head);
         li = safe, safe = li->next) {
        element_t *cur = list_entry(li, element_t, list);
        element_t *next = list_entry(li->next, element_t, list);
        if (strcmp(cur->value, next->value) == 0) {
            list_del(li);
            q_release_element(cur);
            dup = true;
        } else if (dup) {
            list_del(li);
            q_release_element(cur);
            dup = false;
        }
    }
    return true;
    // https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/
}

這裡的寫法就比較髒了,大意上是先找到第一個重複的項目,之後直到找到不重複的項目為止都會持續刪除目前的節點。當碰到內容不重複時,一樣刪除節點,接下來開始尋找下一個新的重複項目。操作數量已經壓到

n 左右了,再來的改進應該也只會是美觀的調整。

q_swap

void q_swap(struct list_head *head)
{
    struct list_head *cur = head->next;
    while (cur != head && cur->next != head) {
        list_move(cur, cur->next);
        cur = cur->next;
    }
    // https://leetcode.com/problems/swap-nodes-in-pairs/
}

這裡用了 list_move 來實作 swap ,但是實際上 list_move 應該是在list 之間移動節點用的,這裡只是剛好可以用上(因為 list_move 會把傳入的節點插在傳入的 head 後面,剛好符合需求)。

q_reverse

void q_reverse(struct list_head *head)
{
    // if the queue is NULL or its size <= 1, return
    if (!head || list_empty(head) || list_is_singular(head))
        return;

    struct list_head *cur = head->prev;
    head->prev = head->next;
    head->next = cur;

    while (cur != head) {
        struct list_head *tmp = cur->prev;
        cur->prev = cur->next;
        cur->next = tmp;
        cur = tmp;
    }
}

把全部的 link 倒過來就變成 reverse 了。

q_sort

待補,現在的版本是把 linux 裡面的 sort 放進來。除了刪掉 if (unlikely(!++count)) cmp(priv, b, b); 以外基本沒有改動。教授希望我們把核心的想法融入到自己的 sort 當中,但是原本的版本實在太強了,甚至連看懂都花了我很多時間,還做了一份流程分析方便自己理解過程。因此這部分我想先擱著,等到把其他部分都完成有空餘時間再嘗試精進。

幻滅是成長的開始。不要太早說「有空餘時間」,哪裡不懂,就強化哪。

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

在 qtest 提供新的命令 shuffle

因為 queue.h 用 hash 檢查把檔案鎖住沒辦法變更,所以只能把程式碼直接寫在 qtest.c 裡面。

先新增一個指令的實作函式,待會再來加進指令清單。(參數檢查和其他處理可以從 do_swap 複製下來用)

static bool do_shuffle(int argc, char *argv[])
{
    if (argc != 1) {
        report(1, "%s takes no arguments", argv[0]);
        return false;
    }

    if (!l_meta.l)
        report(3, "Warning: Try to access null queue");
    error_check();

    set_noallocate_mode(true);
    if (exception_setup(true)) {
        if (l_meta.l->next != l_meta.l && l_meta.l->next->next != l_meta.l) {
            // do shuffle
            srand(time(NULL));
            struct list_head *tail = l_meta.l->prev;
            for (int i = q_size(l_meta.l); i > 1; i--) {
                int choose = rand() % i;
                struct list_head *cur = l_meta.l->next;
                for (int j = choose; j > 0; j--) {
                    cur = cur->next;
                }
                // swap tail and cur
                element_t *a = list_entry(cur, element_t, list);
                element_t *b = list_entry(tail, element_t, list);
                char *tmp = b->value;
                b->value = a->value;
                a->value = tmp;
                // update tail position
                tail = tail->prev;
            }
        }
    }
    exception_cancel();

    set_noallocate_mode(false);

    show_queue(3);
    return !error_check();
}

利用老師提到的 Fisher–Yates shuffle 的 Modern implementation 可以很快寫出符合功能要求的程式碼。但是這裡使用的資料結構是 linked list ,會有大量查找元素時的操作,會對演算法複雜度造成影響。可選的改善方案是把 queue 內的所有資料複製出來做一個 array ,之後在 array 裡面做 shuffle 操作, shuffle 操作結束後再把數值複製回 linked list 。 這種做法的好處是時間複雜度只有

O(n) (實際操作數大約
3n
左右) ,不過需要
O(n)
的空間複雜度。

補一個我自己的實作

static bool do_shuffle2(int argc, char *argv[])
{
    if (argc != 1) {
        report(1, "%s takes no arguments", argv[0]);
        return false;
    }

    if (!l_meta.l)
        report(3, "Warning: Try to access null queue");
    error_check();

    if (exception_setup(true)) {
        if (l_meta.l->next != l_meta.l && l_meta.l->next->next != l_meta.l) {
            int n = q_size(l_meta.l);
            char **values = malloc(n * sizeof(char *));
            if (!values) {
                report(1, "ERROR:  Out of memory");
                return false;
            }
            // copy values from queue
            struct list_head *li;
            int cnt = 0;
            list_for_each (li, l_meta.l) {
                element_t *e = list_entry(li, element_t, list);
                values[cnt] = e->value;
                cnt++;
            }
            // do shuffle
            srand(time(NULL));
            for (int i = n; i > 1; i--) {
                int choose = rand() % i;
                // swap tail and cur
                char *tmp = values[i - 1];
                values[i - 1] = values[choose];
                values[choose] = tmp;
            }
            // copy data from values
            cnt = 0;
            list_for_each (li, l_meta.l) {
                element_t *e = list_entry(li, element_t, list);
                e->value = values[cnt];
                cnt++;
            }
            free(values);
        }
    }
    exception_cancel();

    show_queue(3);
    return !error_check();
}

在 qtest 裡面提供新的命令 web

要在 qtest中提供 web 指令不是容易的事,要考量到下述幾點:

  1. web server的持續監聽
  2. 不能中斷原本 qtest 的指令輸入

為了達成這幾點,我們要對 console.c 做修改,內容等待會再說明。

先建立 web.hweb.c ,作為 web 指令的程式碼模組。

web.h

#ifndef LAB0_WEB_H
#define LAB0_WEB_H
#include <netinet/in.h>

/* Simplifies calls to bind(), connect(), and accept() */
typedef struct sockaddr SA;

extern int listening;
extern int noise;

int open_listenfd(int port);

char *process(int connfd, struct sockaddr_in *clientaddr);

#endif /* LAB0_WEB_H */

web.c

#include "web.h"
#include <arpa/inet.h> /* inet_ntoa */
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <netinet/tcp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/sendfile.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>

#define LISTENQ 1024 /* second argument to listen() */
#define MAXLINE 1024 /* max length of a line */

#ifndef RIO_BUFSIZE
#define RIO_BUFSIZE 1024
#endif

#ifndef NO_LOG_ACCESS
#define LOG_ACCESS
#endif

typedef struct {
    int rio_fd;                /* descriptor for this buf */
    int rio_cnt;               /* unread byte in this buf */
    char *rio_bufptr;          /* next unread byte in this buf */
    char rio_buf[RIO_BUFSIZE]; /* internal buffer */
} rio_t;

typedef struct {
    char function_name[512];
} http_request;

char *default_mime_type = "text/plain";

int listening = -1;
int noise = 1;

void rio_readinitb(rio_t *rp, int fd)
{
    rp->rio_fd = fd;
    rp->rio_cnt = 0;
    rp->rio_bufptr = rp->rio_buf;
}

/*
 * rio_read - This is a wrapper for the Unix read() function that
 *    transfers min(n, rio_cnt) bytes from an internal buffer to a user
 *    buffer, where n is the number of bytes requested by the user and
 *    rio_cnt is the number of unread bytes in the internal buffer. On
 *    entry, rio_read() refills the internal buffer via a call to
 *    read() if the internal buffer is empty.
 */
/* $begin rio_read */
static ssize_t rio_read(rio_t *rp, char *usrbuf, size_t n)
{
    int cnt;
    while (rp->rio_cnt <= 0) { /* refill if buf is empty */

        rp->rio_cnt = read(rp->rio_fd, rp->rio_buf, sizeof(rp->rio_buf));
        if (rp->rio_cnt < 0) {
            if (errno != EINTR) { /* interrupted by sig handler return */
                return -1;
            }
        } else if (rp->rio_cnt == 0) { /* EOF */
            return 0;
        }
        rp->rio_bufptr = rp->rio_buf; /* reset buffer ptr */
    }

    /* Copy min(n, rp->rio_cnt) bytes from internal buf to user buf */
    cnt = n;
    if (rp->rio_cnt < n) {
        cnt = rp->rio_cnt;
    }
    memcpy(usrbuf, rp->rio_bufptr, cnt);
    rp->rio_bufptr += cnt;
    rp->rio_cnt -= cnt;
    return cnt;
}

/*
 * rio_readlineb - robustly read a text line (buffered)
 */
ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen)
{
    int n;
    char c, *bufp = usrbuf;

    for (n = 1; n < maxlen; n++) {
        int rc;
        if ((rc = rio_read(rp, &c, 1)) == 1) {
            *bufp++ = c;
            if (c == '\n') {
                break;
            }
        } else if (rc == 0) {
            if (n == 1) {
                return 0; /* EOF, no data read */
            } else {
                break; /* EOF, some data was read */
            }
        } else {
            return -1; /* error */
        }
    }
    *bufp = 0;
    return n;
}

int open_listenfd(int port)
{
    int listenfd, optval = 1;
    struct sockaddr_in serveraddr;

    /* Create a socket descriptor */
    if ((listenfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
        return -1;
    }

    /* Eliminates "Address already in use" error from bind. */
    if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, (const void *) &optval,
                   sizeof(int)) < 0) {
        return -1;
    }

    // 6 is TCP's protocol number
    // enable this, much faster : 4000 req/s -> 17000 req/s
    if (setsockopt(listenfd, 6, TCP_CORK, (const void *) &optval, sizeof(int)) <
        0) {
        return -1;
    }

    /* Listenfd will be an endpoint for all requests to port
       on any IP address for this host */
    memset(&serveraddr, 0, sizeof(serveraddr));
    serveraddr.sin_family = AF_INET;
    serveraddr.sin_addr.s_addr = htonl(INADDR_ANY);
    serveraddr.sin_port = htons((unsigned short) port);
    if (bind(listenfd, (SA *) &serveraddr, sizeof(serveraddr)) < 0) {
        return -1;
    }

    /* Make it a listening socket ready to accept connection requests */
    if (listen(listenfd, LISTENQ) < 0) {
        return -1;
    }
    return listenfd;
}

void parse_request(int fd, http_request *req)
{
    rio_t rio;
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], format[64];

    rio_readinitb(&rio, fd);
    rio_readlineb(&rio, buf, MAXLINE);
    snprintf(format, 64, "%%%ds %%%ds", MAXLINE - 1, MAXLINE - 1);
    sscanf(buf, format, method, uri); /* version is not cared */
    /* read all */
    while (buf[0] != '\n' && buf[1] != '\n') { /* \n || \r\n */
        rio_readlineb(&rio, buf, MAXLINE);
    }
    char *function_name = uri;
    if (uri[0] == '/') {
        function_name = uri + 1;
        int length = strlen(function_name);
        // don't care url query
        for (int i = 0; i < length; i++) {
            if (function_name[i] == '?') {
                function_name[i] = '\0';
                break;
            }
        }
    }
    strncpy(req->function_name, function_name, 512);
}

#ifdef LOG_ACCESS
void log_access(int status, struct sockaddr_in *c_addr, http_request *req)
{
    printf("%s:%d %d - '%s'\n", inet_ntoa(c_addr->sin_addr),
           ntohs(c_addr->sin_port), status, req->function_name);
}
#endif

/* replace '/' with ' ' */
void handle_request(int fd, char *func_name)
{
    while ((*func_name) != '\0') {
        if (*func_name == '/')
            *func_name = ' ';
        func_name++;
    }
}

char *process(int fd, struct sockaddr_in *clientaddr)
{
#ifdef LOG_ACCESS
    printf("accept request, fd is %d, pid is %d\n", fd, getpid());
#endif
    http_request req;
    parse_request(fd, &req);

    handle_request(fd, req.function_name);

    int status = 200;

#ifdef LOG_ACCESS
    log_access(status, clientaddr, &req);
#endif
    char *ret = malloc(strlen(req.function_name) + 1);
    strncpy(ret, req.function_name, strlen(req.function_name) + 1);

    return ret;
}

listening 用來記錄正在監聽的檔案描述元, noise 是用來確認是否要開啟 linenoise 的變數。 因為在開啟 web server的時候,不需要讓 linenoise 幫忙處理輸入,所以在此直接用一個變數來關閉功能。缺點是會造成 web 功能開啟用, linenoise 的自動完成等功能都沒辦法使用。

接下來是 console.c

int cmd_select(int nfds,
               fd_set *readfds,
               fd_set *writefds,
               fd_set *exceptfds,
               struct timeval *timeout)
{
    int infd;
    fd_set local_readset;

    if (cmd_done())
        return 0;

    if (!block_flag) {
        /* Process any commands in input buffer */
        if (!readfds)
            readfds = &local_readset;

        /* Add input fd to readset for select */
        infd = buf_stack->fd;
        FD_ZERO(readfds);
        FD_SET(infd, readfds);

        /* If web not ready listen */
        if (listening != -1)
            FD_SET(listening, readfds);

        if (infd == STDIN_FILENO && prompt_flag) {
            printf("%s", prompt);
            fflush(stdout);
            prompt_flag = true;
        }

        if (infd >= nfds)
            nfds = infd + 1;
        if (listening >= nfds)
            nfds = listening + 1;
    }
    if (nfds == 0)
        return 0;

    int result = select(nfds, readfds, writefds, exceptfds, timeout);
    if (result <= 0)
        return result;

    infd = buf_stack->fd;
    if (readfds && FD_ISSET(infd, readfds)) {
        /* Commandline input available */
        FD_CLR(infd, readfds);
        result--;
        char *cmdline;
        cmdline = readline();
        if (cmdline)
            interpret_cmd(cmdline);
    } else if (readfds && FD_ISSET(listening, readfds)) {
        FD_CLR(listening, readfds);
        result--;

        int connfd;
        struct sockaddr_in clientaddr;
        socklen_t clientlen = sizeof(clientaddr);
        connfd = accept(listening, (SA *) &clientaddr, &clientlen);

        char *p = process(connfd, &clientaddr);
        if (p)
            interpret_cmd(p);
        free(p);
        close(connfd);
    }
    return result;
}

bool run_console(char *infile_name)
{
    if (!push_file(infile_name)) {
        report(1, "ERROR: Could not open source file '%s'", infile_name);
        return false;
    }

    if (!has_infile) {
        char *cmdline;
        while (noise && (cmdline = linenoise(prompt)) != NULL) {
            interpret_cmd(cmdline);
            linenoiseHistoryAdd(cmdline);       /* Add to the history. */
            linenoiseHistorySave(HISTORY_FILE); /* Save the history on disk. */
            linenoiseFree(cmdline);
        }
        if (!noise) {
            while (!cmd_done()) {
                cmd_select(0, NULL, NULL, NULL, NULL);
            }
        }
    } else {
        while (!cmd_done())
            cmd_select(0, NULL, NULL, NULL, NULL);
    }

    return err_cnt == 0;
}

修改了兩個 function , run_console 的修改讓我們可以藉由 noise 來控制 linenoise 功能的開關。 cmd_select 則是加入 listeningselect 監視的內容中。如此,我們便能利用 select 同時監測到來自命令列和 web server 的指令。

最後在 qtest.c 內加入指令就完成了

static bool do_web(int argc, char *argv[])
{
    if (argc != 1) {
        report(1, "%s takes no arguments", argv[0]);
        return false;
    }

    if (exception_setup(true)) {
        const int PORT = 9999;

        if (listening == -1) {
            listening = open_listenfd(PORT);
            signal(SIGPIPE, SIG_IGN);
            noise = false;
            set_echo(false);
            printf("listen on port %d, fd is %d\n", PORT, listening);
        } else {
            report(1, "web server is already turned on");
        }
    }
    exception_cancel();

    return !error_check();
}
// in console_init()
ADD_COMMAND(web, "                | open simple http server");