Try   HackMD

2022q1 Homework1 (lab0)

contributed by < AmyLin0210 >

作業要求

基本實做

首先我們要先找到 linked list 的資料型態

list.h 裡面定義了 struct list_head

struct list_head {
    struct list_head *prev;
    struct list_head *next;
};

queue.h 裡面定義了 element_t

typedef struct {
    char *value;
    struct list_head list;
} element_t;

在這邊需要使用到 container_of 來拿 element_t 內的字串,在 list.h 中同樣可以看到取得 entry containing node 的指標的程式碼

#define list_entry(node, type, member) container_of(node, type, member)

在這裡要實作的 circular doubly-linked list 架構示意圖如下:







list_add_tail



head

prev

next
head



L1

value

prev

next
L1



head:n->L1





L2

value

prev

next
L2



head:p->L2





L1:p->head





L1:n->L2





L2:n->head





L2:p->L1





q_insert_head

由於在 q_insert_headq_insert_tail 中同樣都有需要增加一個 element 的需求,因此把新增一個 element 這件事情獨立出來,實做一個 new_element 的函式。

struct list_head *new_element(char *s)
{
    element_t *tmp = malloc(sizeof(element_t));

    if (!tmp)
        return NULL;

    tmp->list.prev = NULL;
    tmp->list.next = NULL;

    if (s) {
        tmp->value = strdup(s);
        if (!tmp->value) {
            free(tmp);
            return NULL;
        }
    }
    else {
        tmp->value = NULL;
    }

    return &(tmp->list);
}

以下是 q_insert_head 的實做。

bool q_insert_head(struct list_head *head, char *s)
{
    if (!head)
        return false;

    struct list_head *tmp = new_element(s);

    if (!tmp)
        return false;

    list_add(tmp, head);

    return true;
}

q_insert_tail

bool q_insert_tail(struct list_head *head, char *s)
{
    if (!head)
        return false;

    struct list_head *tmp = new_element(s);
    if (!tmp)
        return false;

    list_add_tail(tmp, head);

    return true;
}

q_remove_head

element_t *q_remove_head(struct list_head *head, char *sp, size_t bufsize)
{
    if (!head || list_empty(head))
        return NULL;

    element_t *tmp = list_entry(head->next, element_t, list);

    if (sp) {
        strncpy(sp, tmp->value, bufsize);
        strcpy(sp + bufsize - 1, "\0");
    }

    list_del(head->next);

    return tmp;
}

q_remove_tail

element_t *q_remove_tail(struct list_head *head, char *sp, size_t bufsize)
{
    if (!head || list_empty(head))
        return false;

    element_t *tmp = list_entry(head->prev, element_t, list);

    if (sp){
        strncpy(sp, tmp->value, bufsize);
        strcpy(sp + bufsize - 1, "\0");
    }

    list_del(head->prev);

    return tmp;
}

q_size

int q_size(struct list_head *head)
{
    if (!head) return 0;

    int len = 0;
    struct list_head *li;

    list_for_each (li, head)
        len++;

    return len;
}

q_delete_mid

在這邊使用到了快慢指標的技巧,每一次快指標( fast ) 往下走兩個位置,慢指標 ( slow )就往下走一個位置,當快指標走回 head 的時候,慢指標會正好走到整條 linked list 的中央。

bool q_delete_mid(struct list_head *head)
{
    if (!head || list_empty(head))
        return false;

    struct list_head *slow = head->next;
    for (struct list_head *fast = head->next; fast != head && fast->next != head; fast = fast->next->next) {
        slow = slow->next;
    }

    list_del(slow);
    q_release_element(list_entry(slow, element_t, list));

    return true;
}

q_reverse

在這裡的 reverse 想法是,從頭開始迭代一條 linked list,每一次都將走訪到的節點挪置 head,當走訪完整條 linked list,那這條 linked list 就同時被 reverse 了。

示意圖:

        。
head -> node1 -> node2 -> node3
                 。
head -> node1 -> node2 -> node3    // 將 node2 挪置最前方
head -> node2 -> node1 -> node3
                          。
head -> node2 -> node1 -> node3    // 將 node3 挪置最前方,迭代完成
head -> node3 -> node2 -> node1

程式碼:

void q_reverse(struct list_head *head) {
    if (!head || list_empty(head))
        return;

    struct list_head *iter, *next;
    list_for_each_safe(iter, next, head) {
        list_move(iter, head);
    }
}

TODO: 一併討論針對 singly-linked list vs. doubly-linked list,這類 reverse 操作的實作考量。有些科技公司面試會出這樣的題目。

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

在 doubly-linked list 中,有許多作法都可以達成 reverse 這項操作,除了上述在遍歷節點後將節點挪至最前端外,也可以直接將節點中的 prevnext 做 swap,這樣的優點是可以減少一些變數的使用。
以下圖範例程式碼為例,我只需要儲存 headiter 就能夠完成整個 reverse 的操作。

void q_reverse(struct list_head *head)
{
    if (!head || list_empty(head))
        return;

    struct list_head *iter = head;

    do {
        iter->next = (void *)((uintptr_t)(iter->prev) ^ (uintptr_t)(iter->next));
        iter->prev = (void *)((uintptr_t)(iter->prev) ^ (uintptr_t)(iter->next));
        iter->next = (void *)((uintptr_t)(iter->prev) ^ (uintptr_t)(iter->next));

        iter = iter->prev;
    } while (iter != head);
}

在上方程式碼不使用 list_h 內提供的 API 的原因是,list_for_each_safe 不會遍歷到 head 這個節點,但我希望可以將所有的 prevnext 做 swap (含 head) 而不要處理太多例外,因此使用 while 迴圈來實做。

而在 singly-linked list 中,想要 reverse 需要得到下面的資訊:正在處理的節點、節點的前一個節點、節點的後一個節點。而且也沒辦法像上面的 doubly-linked list 一樣使用對指標 swap 的方式處理,必然是將節點拆開重接。

下面是 singly-linked list 的測試程式碼

struct node_s {
    int val;
    struct node_s *next;
};

void reverse (struct node_s *head) {
    struct node_s *iter = head;
    struct node_s *prev = NULL;
    struct node_s *next = head->next;

    do {
        iter->next = prev;

        prev = iter;
        iter = next;
        next = next->next;
    } while (iter != head);

    head->next = prev;
}

q_sort

在這邊我使用的演算法為 merge sort

void q_sort(struct list_head *head)
{
    if (!head || list_empty(head))
        return;

    head->prev->next = NULL;
    struct list_head *sorted = mergesort(head->next);

    head->next = sorted;
    sorted->prev = head;

    while(sorted->next) {
        sorted = sorted->next;
    }

    sorted->next = head;
    head->prev = sorted;
}

struct list_head *mergesort(struct list_head *head) {
    if (!head->next) {
        return head;
    }

    struct list_head *mid = head;
    for (struct list_head *fast = head->next; fast && fast->next; fast = fast->next->next) {
        mid = mid->next;
    }

    struct list_head *right = mergesort(mid->next);
    mid->next = NULL;
    struct list_head *left = mergesort(head);

    head = merge(left, right);
    return head;
}

struct list_head *merge(struct list_head *l1, struct list_head *l2) {
    struct list_head *head = l1;
    struct list_head *prev = NULL;
    struct list_head **ptr = &head;
    struct list_head **node;

    for (node = NULL; l1 && l2; *node = (*node)->next) {
        node = (strcmp(list_entry(l1, element_t, list)->value, list_entry(l2, element_t, list)->value) < 0) ? &l1 : &l2;
        (*node)->prev = prev;
        prev = *node;
        *ptr = *node;
        ptr = &(*ptr)->next;
    }

    *ptr = (l1)? l1 : l2;
    (*ptr)->prev = prev;

    return head;
}

TODO: 避免使用遞迴呼叫,並從 Linux 核心原始程式碼的 list_sort.c 探討後續改進。

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

q_swap

void q_swap(struct list_head *head)
{
    if (!head || list_empty(head))
        return;

    struct list_head *first = head->next, *second = head->next->next;
    for (;first != head && second != head; first = first->next, second = first->next) {
        first->prev->next = second;
        first->next = second->next;

        second->next->prev = first;
        second->next = first;
        second->prev = first->prev;

        first->prev = second;
    }
}

q_delete_dup

bool q_delete_dup(struct list_head *head)
{
    if (!head || list_empty(head))
        return false;

    struct list_head *cur = head->next;
    struct list_head *tmp = cur;

    while (cur != head && cur->next != head) {
        char *c1 = list_entry(cur, element_t, list)->value;
        char *c2 = list_entry(cur->next, element_t, list)->value;
        tmp = cur->next;

        if (strcmp(c1, c2) == 0) {
            while (tmp != head && strcmp(c1, c2) == 0) {
                cur->next = tmp->next;
                tmp->next->prev = cur;

                q_release_element(list_entry(tmp, element_t, list));

                tmp = cur->next;
                c2 = list_entry(tmp, element_t, list)->value;
            }

            cur->prev->next = tmp;
            tmp->prev = cur->prev;
            q_release_element(list_entry(cur, element_t, list));

            cur = tmp;
        }
        else {
            cur = cur->next;
        }

    }
    return true;
}

運用 Valgrind 排除 qtest 實作的記憶體錯誤

在單純使用 valgrind 去觀察執行 ./qtest 的時候沒有問題

$ valgrind ./qtest
cmd> new
l = []
cmd> ih aaa
l = [aaa]
cmd> ih bbb
l = [bbb aaa]
cmd> reverse
l = [aaa bbb]
cmd> quit
Freeing queue

但發現若在 ./qtest 加上參數 -f ,從檔案來測試時,就會出現 memory leak。

$ valgrind ./qtest -f ./traces-01-ops.cmd
...
==71277== 41 bytes in 1 blocks are still reachable in loss record 1 of 3
==71277==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==71277==    by 0x4A4F50E: strdup (strdup.c:42)
==71277==    by 0x1108BA: linenoiseHistoryAdd (linenoise.c:1236)
==71277==    by 0x11144D: linenoiseHistoryLoad (linenoise.c:1325)
==71277==    by 0x10C6B0: main (qtest.c:951)
==71277== 
==71277== 160 bytes in 1 blocks are still reachable in loss record 2 of 3
==71277==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==71277==    by 0x11087A: linenoiseHistoryAdd (linenoise.c:1224)
==71277==    by 0x11144D: linenoiseHistoryLoad (linenoise.c:1325)
==71277==    by 0x10C6B0: main (qtest.c:951)
==71277== 
==71277== 223 bytes in 19 blocks are still reachable in loss record 3 of 3
==71277==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==71277==    by 0x4A4F50E: strdup (strdup.c:42)
==71277==    by 0x11082E: linenoiseHistoryAdd (linenoise.c:1236)
==71277==    by 0x11144D: linenoiseHistoryLoad (linenoise.c:1325)
==71277==    by 0x10C6B0: main (qtest.c:951)
==71277== 

從上面發現的特性,可以去找找看從 file 讀取資料與從 command line 輸入資料兩種模式會有哪些不同的行為。

qtest.c 裡的第 910 行,可以找到它去判斷 option 的程式碼,發現如果有給 -f 這個參數,會將檔案名稱丟入 infile_name

case 'f':
    strncpy(buf, optarg, BUFSIZE);
    buf[BUFSIZE - 1] = '\0';
    infile_name = buf;
    break;

再來找到 infile_name 這個參數會在哪裡被使用到,可以發現同樣在 qtest.c 裡的 962 行呼叫了 run_console(file_name),這個函式被定義在 console.c 的 639 行。

從以下程式碼可以發現,若有 infile_name,那檔案的內容會在第三行的 push_file 函式處理,由於不會執行到第 8 至 15 行,所以不會執行到 linenoise.c 的內容。

bool run_console(char *infile_name) { if (!push_file(infile_name)) { report(1, "ERROR: Could not open source file '%s'", infile_name); return false; } if (!has_infile) { char *cmdline; while ((cmdline = linenoise(prompt)) != NULL) { interpret_cmd(cmdline); linenoiseHistoryAdd(cmdline); /* Add to the history. */ linenoiseHistorySave(HISTORY_FILE); /* Save the history on disk. */ linenoiseFree(cmdline); } } else { while (!cmd_done()) cmd_select(0, NULL, NULL, NULL, NULL); } return err_cnt == 0; }

回到 qtest.c 這個檔案,在第 951 行的地方呼叫了 linenoiseHistoryLoad(HISTORY_FILE),在這邊 HISTORY_FILE 的內容是 .cmd_history,所以它會讀檔案的內容並存起來,但由於後續不會去釋放掉相關的記憶體,所以就造成了 memory leak。

準備提交 pull request?

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

在提交 pull request 之前,已經有對這份 project 做過一些變更 (e.g. queue.c) 並 commit,但不希望在這個檔案內的內容同樣被 pr 上去,所以做了以下處理:

  1. 當前進度完成後,在本地端 commit
  2. 開一條新的 branch ,取名作 patch-qtest
  3. 將其內容回朔到變更 queue.c 之前
  4. 將已經在原專案的變更 pull 回來
  5. 修改想要變更的內容並 commit

經過以下操作,在 patch-qtest 這條分支內的變更內容就會只剩下想要對 qtest.c 的變更。

觀察 linenoiseHistoryLoad(HISTORY_FILE) ,它所做的事情是將過去 command line 所執行的指令個重新讀入再寫進檔案,如果是想要留下所有的歷史紀錄的話,在沒有下 -f 這個參數時執行就好,因此變更 qtest.c 的程式碼為:

if (!infile_name) {
    /* Trigger call back function(auto completion) */
    linenoiseSetCompletionCallback(completion);
    linenoiseHistorySetMaxLen(HISTORY_LEN);
    linenoiseHistoryLoad(HISTORY_FILE); /* Load the history at startup */
}

發現在給定錯誤的測試檔案路徑時,也會造成 memory leak

$ valgrind ./qtest -f ./traces/trace-01-ops.cm
ERROR: Could not open source file './traces/trace-01-ops.cm'
==136232== 32 bytes in 1 blocks are still reachable in loss record 1 of 28
==136232==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==136232==    by 0x10CD61: malloc_or_fail (report.c:189)
==136232==    by 0x10D7EB: add_cmd (console.c:89)
==136232==    by 0x10DB75: init_cmd (console.c:408)
==136232==    by 0x10C4C1: main (qtest.c:944)
==136232== 
==136232== 32 bytes in 1 blocks are still reachable in loss record 2 of 28
==136232==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==136232==    by 0x10CD61: malloc_or_fail (report.c:189)
==136232==    by 0x10D7EB: add_cmd (console.c:89)
==136232==    by 0x10DB8F: init_cmd (console.c:409)
==136232==    by 0x10C4C1: main (qtest.c:944)
==136232== 
==136232== 32 bytes in 1 blocks are still reachable in loss record 3 of 28
==136232==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==136232==    by 0x10CD61: malloc_or_fail (report.c:189)
==136232==    by 0x10D7EB: add_cmd (console.c:89)
==136232==    by 0x10DBA9: init_cmd (console.c:410)
==136232==    by 0x10C4C1: main (qtest.c:944)
...

找到 console.crun_console 函式,可以發現當他找不到對應的檔案時,會執行以下的程式碼:

if (!push_file(infile_name)) {
    report(1, "ERROR: Could not open source file '%s'", infile_name);
    return false;
}

再回到 qtest.c ,觀察以下程式碼,會發現若檔案路徑錯誤,會回傳 false,然後在第 3 行的地方,由於使用了 && ,所以當 okfalse 時,會跳過 finish_cmd() 這個函式呼叫,故更改成以下程式碼,讓 finish_cmd() 先執行。

bool ok = true;
ok = ok && run_console(infile_name);
+ ok = finish_cmd() && ok;
- ok = ok && finish_cmd();

在還沒有對程式碼做變更的階段,使用 massif 來看

$ valgrind --tool=massif ./qtest -f ./traces/trace-01-ops.cmd
$ ms_print massif.out.101289
ms_print massif.out.101289
--------------------------------------------------------------------------------
Command:            ./qtest -f ./traces/trace-01-ops.cmd
Massif arguments:   (none)
ms_print arguments: massif.out.101289
--------------------------------------------------------------------------------


    KB
14.59^                                                             ###        
     |                                                             #  ::: ::  
     |                                                             #  :  ::   
     |                                                             #  :  ::   
     |                                                             #  :  :: : 
     |                                                     :@:::@@@#  :  @: : 
     |                                                     :@:::@@@#  :  @: : 
     |                                                     :@:::@@@#  :  @: : 
     |                                                     :@:::@@@#  :  @: : 
     |                                                     :@:::@@@#  :  @: : 
     |                                                     :@:::@@@#  :  @: : 
     |                                                     :@:::@@@#  :  @: : 
     |                                                   :@@@:::@@@#  :  @: : 
     |                                                   :@@@:::@@@#  :  @: : 
     |                                                   :@@@:::@@@#  :  @: : 
     |                                                   :@@@:::@@@#  :  @: : 
     |                                                   :@@@:::@@@#  :  @: @ 
     |                                                   :@@@:::@@@#  :  @: @ 
     |                                                   @@@@:::@@@#  :  @: @:
     |                                                  @@@@@:::@@@#  :  @: @:
   0 +----------------------------------------------------------------------->ki
     0                                                                   305.6

變更 qtest.c 後印出的東西為

$ ms_print massif.out.133040
--------------------------------------------------------------------------------
Command:            ./qtest -f ./traces/trace-01-ops.cmd
Massif arguments:   (none)
ms_print arguments: massif.out.133040
--------------------------------------------------------------------------------


    KB
13.81^                                                             #          
     |                                                             #: ::::::  
     |                                                             #  :  :    
     |                                                             #  :  :    
     |                                                             # ::  :  : 
     |                                                     :::::@@@# ::  :  : 
     |                                                     :::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                     @::::@@@# ::  :  @ 
     |                                                   :@@::::@@@# ::  :  @:
   0 +----------------------------------------------------------------------->ki
     0                                                                   295.8

在 qtest 提供新的命令 shuffle

在這邊會使用到 Fisher–Yates shuffle 的演算法。

流程大致如下:

原始 array: [0, 1, 2, 3, 4]

     隨機數 原陣列內容         變更後陣列內容
0-4  3     [0, 1, 2, 3, 4]  [0, 1, 2, 4, 3]
0-3  1     [0, 1, 2, 4, 3]  [0, 2, 4, 3, 1]
0-2. 1     [0, 2, 4, 3, 1]  [0, 4, 3, 1, 2]
0-1  0.    [0, 4, 3, 1, 2]  [4, 3, 1, 2, 0]

在資料型態為 array 的狀態中,時間複雜度為 O(n),但是若資料型態為 linked list,由於會有把 node 從特定的位置挪至最後面,時間複雜度會增加到 O(n2)

由於不能夠修改 queue.h 這份檔案,因此只好將 q_shuffle 這個函式實作在 qtest.c

static void q_shuffle(struct list_head *head)
{
    if (!head || list_empty(head))
        return;

    srand(time(NULL));
    int size = q_size(head);

    for (int i = 0; i < size - 1; ++i) {
        struct list_head *tmp = head->next;
        int n = rand() % (size - i - 1);

        for (int j = 0; j < n; ++j) {
            tmp = tmp->next;
        }

        list_move_tail(tmp, head);
    }
}

static bool do_shuffle(int argc, char *argv[])
{
    if (argc != 1) {
        report(1, "%s takes no arguments", argv[0]);
        return false;
    }

    if (!l_meta.l)
        report(3, "Warning: Calling shuffle on null queue");
    error_check();

    set_noallocate_mode(true);
    if (exception_setup(true))
        q_shuffle(l_meta.l);
    exception_cancel();

    set_noallocate_mode(false);
    show_queue(3);
    return !error_check();
}

並同時新增下面的程式碼至 test.c 內,使其可以執行 shuffle。

ADD_COMMAND(shuffle, "                | Shuffle queue");

參考資料