Try   HackMD

2023q1 Homework1 (lab0)

contributed by < brianlin314 >

開發環境

$ gcc --version
gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0

$ lscpu
Architecture:            x86_64
  CPU op-mode(s):        32-bit, 64-bit
  Address sizes:         39 bits physical, 48 bits virtual
  Byte Order:            Little Endian
CPU(s):                  12
  On-line CPU(s) list:   0-11
Vendor ID:               GenuineIntel
  Model name:            12th Gen Intel(R) Core(TM) i5-12400
    CPU family:          6
    Model:               151
    Thread(s) per core:  2
    Core(s) per socket:  6
    Socket(s):           1
    Stepping:            2
    CPU max MHz:         5600.0000
    CPU min MHz:         800.0000
    BogoMIPS:            4992.00

C Programming lab

q_new

/* Create an empty queue */
struct list_head *q_new()
{
    struct list_head *head = malloc(sizeof(struct list_head));
    if (!head) {
        return NULL;
    }
    INIT_LIST_HEAD(head);
    return head;
}

malloc 會在 size 為 0 時回傳 NULL,因此需要在 malloc 後檢查 head 是否為 NULL

q_free

/* Free all storage used by佇列*/
void q_free(struct list_head *l) 
{
    if(!l) {
        return;
    }
    element_t *cur, *tmp;
    list_for_each_entry_safe(cur, tmp, l, list) {
        q_release_element(cur);
    }
    free(l);
    return;
}

釋放佇列的所有節點及佇列本身,用 list_for_each_entry_safe 避免釋放 cur 後,無法往下一個節點前進,最後 free(l) 釋放 head

改進漢語描述。

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv
已修正
Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

q_insert_headq_insert_tail

/* Insert an element at head of佇列*/
bool q_insert_head(struct list_head *head, char *s)
{
    int str_len = strlen(s);
    if (!head) { 
        return false;
    }
    element_t *element = malloc(sizeof(element_t));
    if (!element) {
        free(element);
        return false;
    }
    element->value = malloc(sizeof(char) * (str_len + 1));
    if (!element->value) {
        free(element);
        return false;
    }
    strncpy(element->value, s, str_len);
    *(element->value + str_len) = '\0';
    list_add(&element->list, head);
    return true;
}
/* Insert an element at tail of佇列*/
bool q_insert_tail(struct list_head *head, char *s)
{
    int str_len = strlen(s);
    if (!head) {
        return false;
    }
    element_t *element = malloc(sizeof(element_t));
    if (!element) {
        free(element);
        return false;
    }
    element->value = malloc(sizeof(char) * (str_len + 1));
    if (!element->value) {
        free(element);
        return false;
    }
    strncpy(element->value, s, str_len);
    *(element->value + str_len) = '\0';
    list_add_tail(&element->list, head);
    return true;
}

新增一個節點,並在最後引用 list_addlist_add_tail ,分別將其加入到 list 的頭或尾。

在實作 q_insert_headq_insert_tail 時,我學到若 strncpy 傳入的字元個數為 str_len + 1 ,那 strncpy 會自動補上 \0 ,但我還是選擇額外多寫一行程式碼存入\0

為何「選擇額外多寫一行程式碼存入\0 」?請詳述你的考量,以及日後如何改進。

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

q_remove_headq_remove_tail

/* Remove an element from head of佇列*/
element_t *q_remove_head(struct list_head *head, char *sp, size_t bufsize)
{
    if (!head || list_empty(head)) {
        return NULL;
    }
    element_t *first = list_first_entry(head, element_t, list);
    list_del(&first->list);
    if (sp) {
        strncpy(sp, first->value, bufsize - 1);
        *(sp + bufsize - 1) = '\0';
    }
    return first;
}
/* Remove an element from tail of佇列*/
element_t *q_remove_tail(struct list_head *head, char *sp, size_t bufsize)
{
    if (!head || list_empty(head)) {
        return NULL;
    }
    element_t *last = list_last_entry(head, element_t, list);
    list_del(&last->list);
    if (sp) {
        strncpy(sp, last->value, bufsize - 1);
        *(sp + bufsize - 1) = '\0';
    }
    return last;

先使用 list_first_entrylist_last_entry 取得頭尾節點的位址後,再使用 list_del 移除節點。

q_size

/* Return number of elements in佇列*/
int q_size(struct list_head *head)
{
    if(!head || list_empty(head)) {
        return 0;
    }
    int cnt = 0;
    struct list_head *node, *safe;
    list_for_each_safe(node, safe, head) {
        cnt++;
    }
    return cnt;
}

使用 list_for_each_safe 走訪整個list,以此算出節點數。

q_delete_mid

/* Delete the middle node in佇列*/
bool q_delete_mid(struct list_head *head)
{
    // https://leetcode.com/problems/delete-the-middle-node-of-a-linked-list/
    if (!head || list_empty(head))
        return false;
    struct list_head *forward = head->next;
    struct list_head *backward = head->prev;
    while (backward->next != forward && backward != forward) {
        forward = forward->next;
        backward = backward->prev;
    }
    list_del(forward);

    // delete entry
    element_t *middle = list_entry(forward, element_t, list);
    q_release_element(middle);
    return true;
}

forwardbackward 分別指向 head 的下一個與前一個節點,While 迴圈的中止條件為:

  1. forwardbackward 跑到同一節點上
  2. forwardbackward 擦肩而過時

因為本題需要進行 delete 而非 remove ,所以最後刪除 forward 所在的節點。

q_delete_dup

沒必要完整張貼程式碼,開發紀錄著重於你的思考過程和遇到的技術問題。

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv
了解~之後會改進
Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

/* Delete all nodes that have duplicate string */
bool q_delete_dup(struct list_head *head)
{
    // https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/
    if (head == NULL || list_empty(head)) {
        return false;
    }
    struct list_head *node = head;
    element_t *p = NULL, *q = NULL, *tmp = NULL;
    char *tmp_value = NULL;
    while (node->next != head && node->next->next != head) {
        p = list_entry(node->next, element_t, list);
        q = list_entry(node->next->next, element_t, list);
        int str_len = strlen(p->value);
        tmp_value = malloc(sizeof(char) * (str_len + 1));
        strncpy(tmp_value, p->value, str_len);
        *(tmp_value + str_len) = '\0';
        if (!strcmp(p->value, q->value)) {
            tmp = list_entry(node->next, element_t, list);
            do {
                list_del_init(&tmp->list);
                q_release_element(tmp);
                tmp = list_entry(node->next, element_t, list);
            } while(node->next != head && !strcmp(tmp->value, tmp_value));
        }
        else {
            node = node->next;
        }
        free(tmp_value);
    }
    return true;
}

指標 node 指向 head,然後對 list 進行走訪,若 node->nextnode->next->next 對應的 value 相同,就將 node->next 及所有後面有相同 value 的節點刪除,記下元素值 tmp_value,之後不斷將 node->next 從list中移除,直到 node->next 指到 head 或者節點 value 不等於 tmp_value 時,即可刪除list中 value 為 tmp_value 的所有節點。

node->nextnode->next->next 對應的 value 不同,就執行 node = node->next

queue.c:168:31: style: Condition 'node->next!=head' is always true [knownConditionTrueFalse]
            while (node->next != head && !strcmp(tmp->value, tmp_value)) {
                              ^

Fail to pass static analysis.

此版本在進行 git commit 時,卻遇到以下錯誤,但若不加 node->next != head ,就無法通過 [3,2,1,1,1] 的測試,且此種寫法確實會造成其他人 code review 的不易,於是捨棄此種寫法,更改為以下:

改用 diff 呈現變更,避免張貼重複的程式碼。

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv
收到
Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

@@ -1,31 +1,22 @@
 bool q_delete_dup(struct list_head *head)
 {
     // https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/
-    if (head == NULL || list_empty(head)) {
+    if (!head || list_empty(head) || list_is_singular(head)){
         return false;
     }
-    struct list_head *node = head;
-    element_t *p = NULL, *q = NULL, *tmp = NULL;
-    char *tmp_value = NULL;
-    while (node->next != head && node->next->next != head) {
-        p = list_entry(node->next, element_t, list);
-        q = list_entry(node->next->next, element_t, list);
-        int str_len = strlen(p->value);
-        tmp_value = malloc(sizeof(char) * (str_len + 1));
-        strncpy(tmp_value, p->value, str_len);
-        *(tmp_value + str_len) = '\0';
-        if (!strcmp(p->value, q->value)) {
-            tmp = list_entry(node->next, element_t, list);
-            do {
-                list_del_init(&tmp->list);
-                q_release_element(tmp);
-                tmp = list_entry(node->next, element_t, list);
-            } while(node->next != head && !strcmp(tmp->value, tmp_value));
+
+    element_t *curr = NULL, *next = NULL;
+    bool check = false;
+    list_for_each_entry_safe (curr, next, head, list) {
+        if (&next->list != head && !strcmp(curr->value, next->value)) {
+            list_del_init(&curr->list);
+            q_release_element(curr);
+            check = true;
+        } else if (check) {
+            list_del_init(&curr->list);
+            q_release_element(curr);
+            check = false;
         }
-        else {
-            node = node->next;
-        }
-        free(tmp_value);
     }
     return true;
 }

使用 list_for_each_entry_safe 走訪 list ,得到當前與下一個節點的位址,並使用 strcmp 比對 value 是否相同,若相同,則刪除 curr 節點,並設 check = true; 若不同,則判斷若 check = true,要刪除 curr 節點。

q_swap

/* Swap every two adjacent nodes */
void q_swap(struct list_head *head)
{
    // https://leetcode.com/problems/swap-nodes-in-pairs/
    if(head == NULL || list_is_singular(head)) {
        return;
    }
    struct list_head *first = head->next;
    while(first != head && first->next != head) {
        struct list_head *second = first->next;
        list_del_init(first);
        list_add(first,second);
        first = first->next;
    }
    return;
}

先判斷 head 是否為 NULL 或 list 是否只存在一個節點,先將第一個節點 first 移除,再將其加回第二個節點的頭,即可完成兩個節點的變換位置,而此時 first->next 所指的節點也剛好會是尚未變換位置的第一個節點。

q_reverse

/* Reverse elements in queue */
void q_reverse(struct list_head *head) 
{
    if(!head || list_empty(head)){
        return;
    }
    struct list_head *node, *safe;
    list_for_each_safe(node, safe, head) {
        list_move(node, head);
    }
    return;
}

使用 list_for_each_safe 走訪整個list,並將走訪到的 node ,移回 head 的頭,如此一來,就可以很簡單的實做 reverse

q_descend

int q_descend(struct list_head *head)
{
    // https://leetcode.com/problems/remove-nodes-from-linked-list/
    if (head == NULL || list_empty(head)) {
        return 0;
    }
    struct list_head *node = head->prev;
    int total = 1, delete = 0;
    element_t *curr = NULL, *check = NULL;
    char *num = NULL;
    while (&check->list != head && node->prev != head) {
        curr = list_entry(node, element_t, list);
        check = list_entry(node->prev, element_t, list);
        int str_len = strlen(curr->value);
        num = malloc(sizeof(char) * (str_len + 1));
        strncpy(num, curr->value, str_len);
        *(num + str_len) = '\0';
        if (strcmp(check->value, num) < 0) {
            list_del(&check->list);
            q_release_element(check);
            delete += 1;
        } else {
            node = node->prev;
        }
        free(num);
        total += 1;
    }
    return total - delete; 
}

架構 實做方法與 q_delete_dup 相同,只是將 next 全部改為 prev,即對 list 做反向操作,若 node->prev 的 value 小於node 的 value,則刪除該節點,若不小於,則 node = node->prev

q_reverseK

/* Reverse the nodes of the list k at a time */
void q_reverseK(struct list_head *head, int k)
{
    // https://leetcode.com/problems/reverse-nodes-in-k-group/
    if (!head || list_empty(head) || list_is_singular(head) || k <= 2) {
        return;
    }

    int list_length = q_size(head);
    struct list_head **change = NULL;
    struct list_head *header = NULL, *curr = head->next;
    while (curr != head && curr->next != head) {
        change = &curr->next;
        header = curr->prev;
        for (int i = 1; i < k; i++) {
            if (list_length >= k) {
                list_move(*change, header);
            }
        }
        list_length -= k;
        curr = curr->next;
    }
    return;
}

作法與 reverse 相同,但改為 K 個一組進行反轉,且若該組的節點數小於 K ,則維持原樣 header 紀錄每一組的頭節點,change 紀錄要插入到該組的頭的節點,是一個 pointer to pointer ,作用為避免因為指標的改變,而喪失目標位置,可以達到簡化程式碼的長度。

q_sort

參考了你所不知道的C 語言: linked list 和非連續記憶體中的 Merge Sort ,分成三個部份:merge_two_lists, merge_sort, q_sort

merge_two_lists
struct list_head *merge_two_list(struct list_head *left,
                                 struct list_head *right)
{
    struct list_head *head = NULL, **ptr = &head;
    element_t *left_node = NULL, *right_node = NULL;
    for (; left && right; ptr = &(*ptr)->next) {
        left_node = list_entry(left, element_t, list);
        right_node = list_entry(right, element_t, list);
        if (strcmp(left_node->value, right_node->value) < 0) {
            *ptr = left;
            left = left->next;
        } else {
            *ptr = right;
            right = right->next;
        }
    }
    *ptr = (struct list_head *) ((uintptr_t) left | (uintptr_t) right);
    return head;
}
}

將兩個不同的 list 合併並排序,使用 leftright 指標分別指向兩個 list 的開頭,比較指標內容,將較小的節點放到新的 list 中。

迴圈迭代完成後,會剩下其中一個 list 中尚存在節點,採 bit-wsie 找出非空的節點且能加速運算。

merge_sort
struct list_head *merge_sort(struct list_head *head)
{
    if (!head || !head->next) {
        return head;
    }

    struct list_head *fast, *slow = head;
    for (fast = head->next; fast && fast->next; fast = fast->next->next) {
        slow = slow->next;
    }

    struct list_head *left, *right;

    right = slow->next;
    slow->next = NULL;
    left = merge_sort(head);
    right = merge_sort(right);
    return merge_two_list(left, right);
}

使用快慢指針技巧找到第一個與中間節點,並遞迴呼叫 merge_sort ,將左邊的串列與右邊的串列也各自找出中間節點,最後會切成一個一個節點,過程中要將切完的串列的最後一個節點的 next 指向 NULL ,最後呼叫 merge_two_lists 將兩個串合併。

q_sort
/* Sort elements of queue in ascending order */
void q_sort(struct list_head *head)
{
    if (!head || list_empty(head) || list_is_singular(head)) {
        return;
    }

    head->prev->next = NULL;
    head->next = merge_sort(head->next);

    struct list_head *curr = head, *node = head->next;
    while (node) {
        node->prev = curr;
        curr = node;
        node = node->next;
    }
    curr->next = head;
    head->prev = curr;
    return;
}
}

先將串列的最後一個節點指向 NULL,作為其餘兩個函式的終止條件,並呼叫 merge_sort 進行排序,最後的 while 迴圈,是為了將所有節點的 prev 重新串接回去,使其重新符合 circular doubly-linked list。

注意書寫: doubly-linked list 裡頭連字號的位置。

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv
感謝老師提醒~
Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

q_merge

/* Merge all the queues into one sorted queue, which is in ascending order */
int q_merge(struct list_head *head)
{
    // https://leetcode.com/problems/merge-k-sorted-lists/
    if(!head || list_empty(head)) {
        return 0;
    }
    if(list_is_singular(head)) {
        return list_entry(head->next, queue_contex_t, chain)->size;
    }
    queue_contex_t *merged_list = list_entry(head->next, queue_contex_t, chain);
    struct list_head *node = NULL, *safe = NULL;
    list_for_each_safe (node, safe, head) {
        if(node == head->next) {
            continue;
        }
        queue_contex_t *tmp = list_entry(node, queue_contex_t, chain);
        list_splice_init(tmp->q, merged_list->q);
    }
    q_sort(merged_list->q);
    return merged_list->size;
}

實作 q_merge 前,必須先熟知 queue_contex_t 這個資料結構,將各queue_contex_t 所指的佇列全部串連起來,最後使用實做過的 q_sort 排序以串連的串列即可。


研讀 lib/list_sort.c

與其逐行張貼程式碼,你應該闡述 Linux 核心開發者的考量因素,還有這些工程取捨如何反映到整體的設計中。避免「舉燭」!

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv
學生收到
Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

避免非必要的條列 (及 - 開頭的項目符號),用清晰且完整的漢語展現。

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv
了解,會改進使用 - 的時機點

上方筆記也該調整

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

torvalds/linux list_sort.c

  • 分為 3 個函式:
    • merge
    • merge_final
    • list_sort

研讀 list_sort 函式提供的註解,此外list_sort 需要3個參數:

  1. priv 是一個 private data,用來傳遞 cmp 所需的參數

  2. head 為要被排序的list

  3. cmp 是一個元素比較大小的函式指標

    ​​​​typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *,
    ​​​​    const struct list_head *, const struct list_head *);
    
    • @cmp 的回傳值是 int 型態,@cmp 的第一個參數負責接收 @priv。第二個參數 @a 與第三個參數 @b 為串列的 Node。
    • 如果 @a 應該排在 @b 之後,即 @cmp 回傳 >0 時,表示要進行 ascending sort
    • 如果 @a 應該排在 @b 之前,即 @cmp 回傳 <=0 時,表示要進行 descending sort 或保留原本狀態
    • 且維持 stable sort,因此不需要區分 @a < @b@a == @b 的情況。
    ​​​​* The comparison function @cmp must return > 0 if @a should sort after
    ​​​​ * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should
    ​​​​ * sort before @b *or* their original order should be preserved.  It is
    ​​​​ * always called with the element that came first in the input in @a,
    ​​​​ * and list_sort is a stable sort, so it is not necessary to distinguish
    ​​​​ * the @a < @b and @a == @b cases.
    

接下來提到merge sort的實作細節,方法會保持 2 : 1 的 list,假設有兩個 2k 大小的 list , merge sort 不會直接合併,而是等到有第 3 個長度為 2k 的 list 才進行合併,變成一個 2k+1 長度的 list 及一個 2k 長度的 list 。
只要這 2 : 1 的 list 都可以被放進 cache 裡,就可以避免 Cache thrashing 問題。

 * This mergesort is as eager as possible while always performing at least
 * 2:1 balanced merges.  Given two pending sublists of size 2^k, they are
 * merged to a size-2^(k+1) list as soon as we have 2^k following elements.
 *
 * Thus, it will avoid cache thrashing as long as 3*2^k elements can
 * fit into the cache.  Not quite as good as a fully-eager bottom-up
 * mergesort, but it does use 0.2*n fewer comparisons, so is faster in
 * the common case that everything fits into L1.

要實作上述方法,會透過一個變數 count 去計算目前 pending 上的節點總數,當每次 count 加1時,會將第 k 個 bit 設為 1 ,而 k-1 到 0 bit 則設為 0,且當 count 來到 2k 時,就把兩個 2k 長度的 list 做合併。

初始化

struct list_head *list = head->next, *pending = NULL;
	size_t count = 0;	/* Count of pending */

	if (list == head->prev)	/* Zero or one elements */
		return;

	/* Convert to a null-terminated singly-linked list. */
	head->prev->next = NULL;

先檢查 list 是否為空或只有一個節點,接著將最後一個節點的 next 指向 NULL







ele_list



e2

Node 1

prev

next



e3

Node 2

prev

next



e2:right->e3:top





e3:left->e2:top





e4

.....

prev

next



e3:right->e4:top





e4:left->e3:top





e5

Node N

prev

next



e4:right->e5:top





e5:left->e4:top





NULL
NULL



e5:right->NULL





p
pending



p->NULL





l
list



l->e2





tail
tail



tail->p





走訪整個 list 並執行 merge

 do {
        size_t bits;
        struct list_head **tail = &pending;

        /* Find the least-significant clear bit in count */
        for (bits = count; bits & 1; bits >>= 1)
            tail = &(*tail)->prev;
        /* Do the indicated merge */
        if (likely(bits)) {
            struct list_head *a = *tail, *b = a->prev;
            a = merge(priv, cmp, b, a);
            /* Install the merged result in place of the inputs */
            a->prev = b->prev;
            *tail = a;
        }

        /* Move one element from input list to pending */
        list->prev = pending;
        pending = list;
        list = list->next;
        pending->next = NULL;
        count++;
    } while (list);

以下為 count 0 到 15 的狀態:

  • 第三欄為為每次 do while 迴圈迭代完成後,所有 pending sublist 的狀態,每個以逗號隔開的數字代表該 sublist 的 Node 數量,且一個數字自為一個sublist。
    • count = 4 為例,[2,1,1,1] 的第一個 2 代表這個 sublist 中有 2 個node,分別是 node1、 node2,接下來的 1 各分別代表 node3、node4、node5,因為第2和3個的sublist長度都為 20 (即只有一個節點),且第 4 個sublist長度也是 20,所以進行 merge,因此最終狀態變為[2,2,1]。

      count<十進制> count<二進制> sublist 的狀態
      0
      0b0000
      [1]
      1
      0b0001
      [1,1]
      2 (merge)
      0b0010
      [1,1,1] -> [2,1]
      3
      0b0011
      [2,1,1]
      4 (merge)
      0b0100
      [2,1,1,1] -> [2,2,1]
      5 (merge)
      0b0101
      [2,2,1,1] -> [4,1,1]
      6 (merge)
      0b0110
      [4,1,1,1] -> [4,2,1]
      7
      0b0111
      [4,2,1,1]
      8 (merge)
      0b1000
      [4,2,1,1,1] -> [4,2,2,1]
      9 (merge)
      0b1001
      [4,2,2,1,1] -> [4,4,1,1]
      10 (merge)
      0b1010
      [4,4,1,1,1] -> [4,4,2,1]
      11 (merge)
      0b1011
      [4,4,2,1,1] -> [8,2,1,1]
      12 (merge)
      0b1100
      [8,2,1,1,1] -> [8,2,2,1]
      13 (merge)
      0b1101
      [8,2,2,1,1] -> [8,4,1,1]
      14 (merge)
      0b1110
      [8,4,1,1,1] -> [8,4,2,1]
      15
      0b1111
      [8,4,2,1,1]

由以上程式碼與例子可得知:

  • pending 是已排序好,但尚未被 merge 的 sublist

    • 每個排序好的 sublist 長度為 2k
  • bits 用於判斷何時必須 merge ,會檢查目前的 sublist 是否滿足 2k 個 Node,

    • 若目前有 2k 個 Node,if(likely(bits)) 就會成立,並呼叫 merge 來合併最新的兩個 pending sublists,然後讓 list 指向下一個 Node;
    • 反之,list 指向下一個 Node。
  • 在每次迭代時,list 會指向第 count 個 Node,pending 會指向前一個 Node,indirect pointer tail 會指向 &pending 並在 bits 向右移時,一直指向 tail = &(*tail)->prev 藉以把自己移動到可能將被 merge 的 sublist,並在 sublist 被 merge 後更新 prev

將剩餘的 sublist merge,並使其再次成為 circular doubly-linked list

當 do while 迭代完成後,以上述例子 n = 15 為例,最後會剩下 [8,4,2,1] 4 個 pending sublist ,透過 for 迴圈合併:[8,4,2,1] -> [8,4,3] -> [8,7]。最後呼叫 merge_final 來合併剩餘的 [8,7],且 merge_final 會將整個 list 恢復成 circular doubly- linked list。

for (;;) { struct list_head *next = pending->prev; if (!next) break; list = merge(priv, cmp, pending, list); pending = next; }

list_sort.c 引入到 lab0-c 專案

參考 linhoward/linux2023-lab0 進行實做
研讀 qtest 命令直譯器的實作後,了解到要新增 qtestcmd 的流程,紀錄在下方。

  1. list_sort.c的程式碼加入到 qtest.c 中。
  2. 在程式碼中有使用到 likelyunlikely 這兩個巨集,因此也需加到qtest.c 中。
    ​​​​ # define likely(x) __builtin_expect(!!(x), 1) ​​​​ # define unlikely(x) __builtin_expect(!!(x), 0)
  3. list_cmp_func_t 的定義加入到 qtest.c,並實作該 function。
    ​​​​typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *, ​​​​ const struct list_head *, const struct list_head *);
    ​​​​static int cmp_fun(void *priv, ​​​​ const struct list_head *a, ​​​​ const struct list_head *b) ​​​​{ ​​​​ return strcmp(list_entry(a, element_t, list)->value, ​​​​ list_entry(b, element_t, list)->value); ​​​​}
  4. 實做 do_linux_sort,且在 console_init 新增 command。
    ​​​​ADD_COMMAND(linux_sort, "Sort queue in ascending order by linux kerenl","");
  5. 編譯時會出現錯誤訊息:
    ​​​​qtest.c: In function ‘merge_final’:
    ​​​​qtest.c:901:9: error: unknown type name ‘u8’
    ​​​​  901 |         u8 count = 0;
    ​​​​      |         ^~
    
    需將 merge_final 中的 u8 改成 uint8_t

比較自己實作的 merge sort 和 Linux 核心程式碼之間效能落差

參考 perf 原理和實務linhoward/linux2023-lab0 進行安裝與實做

可使用以下命令查看是否開啟 perf

cat "/boot/config-`uname -r`" | grep "PERF_EVENT"

若想要檢測 cache miss event ,需要先取消 kernel pointer 的禁用。

sudo sh -c " echo 0 > /proc/sys/kernel/kptr_restrict"

先撰寫想要進行測試的 .cmd 檔( RAND_1000.cmd ),以下為測試 q_sort ,若想要測試 linux list_sort ,則將 q_sort 改為 list_sort

option fail 0
option malloc 0
new
ih RAND 1000
q_sort
free

使用 shell script 來執行 linux_sortq_sort 的效能測試,shell script 撰寫參考 鳥哥私房菜-學習 Shell Scripts

#!/bin/bash
for i in ./traces/traces_perf_linux_sort/*.cmd
do
    perf stat --repeat 5 -o "${i%.cmd}"_report -e cache-misses,branches,instructions,context-switches ./qtest -v 0 -f "$i"
done

以下為跑完測試的 .report 檔與比較表格

# started on Tue Feb 28 13:05:42 2023


 Performance counter stats for './qtest -v 0 -f ./traces/traces_perf_linux_sort/RAND_1000.cmd' (5 runs):

              2014      cpu_core/cache-misses/                                        ( +-103.03% )
           77,8034      cpu_core/branches/                                            ( +-  0.11% )
          514,7021      cpu_core/instructions/                                        ( +-  0.09% )
                 0      context-switches                                            

          0.004317 +- 0.000648 seconds time elapsed  ( +- 15.01% )


  • linux list_sort

    # node cache-misses branches instructions context-switches time
    1000 2014 77,8034 514,7021 0 0.004317
    10000 5333 615,8033 4291,8014 0 0.006202
    100000 39,5825 6296,2214 4,3431,3455 0 0.058937
    1000000 3577,2642 6,6064,3230 44,8546,9541 5 0.927776
  • q_sort

    # node cache-misses branches instructions context-switches time
    1000 3091 77,8123 510,0095 0 0.004366
    10000 1986 621,7308 4251,1116 0 0.00755
    100000 61,7447 6376,2295 4,3054,4077 0 0.061952
    1000000 4560,3884 6,6678,2837 44,2331,4748 4 0.99760

從我的實驗數據中可以發現兩者的差異不大,雖然隨機產生節點資料可能偶爾會讓 q_sort 的效能看起來更好,但從執行時間上來看都還是 list_sort 快了一點。

善用 gnuplot 製圖,你該分析效能落差的成因。

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv


Valgrind 排除 qtest 實作的記憶體錯誤

Valgrind 提供動態分析,用來追蹤及分析程式效能,幫助使用者偵測記憶體錯誤,諸如使用未初始化的記憶體、不當的記憶體配置、或取消配置記憶體,並加以分析。

執行命令 make valgrind 後,trace-13-malloc 得到 0 分

+++ TESTING trace trace-13-malloc:
# Test of malloc failure on new
==10966== 32 bytes in 1 blocks are still reachable in loss record 1 of 3
==10966==    at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==10966==    by 0x10CBE6: do_new (qtest.c:146)
==10966==    by 0x10DE12: interpret_cmda (console.c:181)
==10966==    by 0x10E3C7: interpret_cmd (console.c:201)
==10966==    by 0x10E7C8: cmd_select (console.c:610)
==10966==    by 0x10F0B4: run_console (console.c:705)
==10966==    by 0x10D204: main (qtest.c:1228)
==10966== 
==10966== 160 bytes in 5 blocks are possibly lost in loss record 2 of 3
==10966==    at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==10966==    by 0x10CBE6: do_new (qtest.c:146)
==10966==    by 0x10DE12: interpret_cmda (console.c:181)
==10966==    by 0x10E3C7: interpret_cmd (console.c:201)
==10966==    by 0x10E7C8: cmd_select (console.c:610)
==10966==    by 0x10F0B4: run_console (console.c:705)
==10966==    by 0x10D204: main (qtest.c:1228)
==10966== 
==10966== 168 bytes in 3 blocks are still reachable in loss record 3 of 3
==10966==    at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==10966==    by 0x10F128: test_malloc (harness.c:133)
==10966==    by 0x10F52D: q_new (queue.c:18)
==10966==    by 0x10CC1F: do_new (qtest.c:150)
==10966==    by 0x10DE12: interpret_cmda (console.c:181)
==10966==    by 0x10E3C7: interpret_cmd (console.c:201)
==10966==    by 0x10E7C8: cmd_select (console.c:610)
==10966==    by 0x10F0B4: run_console (console.c:705)
==10966==    by 0x10D204: main (qtest.c:1228)
==10966== 
---	trace-13-malloc	0/6

尋著錯誤訊息,找到在 qtest.c 中的 q_quit 函式可能存在錯誤,該函式功能為釋放佇列的所有記憶體,首先要先使用 q_free 釋放所有 queue_context_t 指向的 q 的所有記憶體,再釋放 queue_context_t 的所有記憶體,所以執行 make valgrind 會顯示還有記憶體未釋放。

因為在此函式中的第一行,直接執行 return true ,所以將這行註解掉即可。

修改後,再次執行 make valgrind ,發現 trace-02-ops 沒有過,於是去檢查 delete_mid 的實做,發現我只有 free(middle) ,但是應該要先進行 free(middle->value) 才對,所以更改程式碼為 q_release_element(middle)

+++ TESTING trace trace-02-ops:
# Test of insert_head, insert_tail, remove_head, remove_tail, and delete_mid
ERROR: Freed queue, but 2 blocks are still allocated
==13726== 47 bytes in 1 blocks are still reachable in loss record 1 of 2
==13726==    at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==13726==    by 0x10F1FB: test_malloc (harness.c:133)
==13726==    by 0x10F6B9: q_insert_head (queue.c:51)
==13726==    by 0x10CBD2: do_ih (qtest.c:224)
==13726==    by 0x10DEE5: interpret_cmda (console.c:181)
==13726==    by 0x10E49A: interpret_cmd (console.c:201)
==13726==    by 0x10E89B: cmd_select (console.c:610)
==13726==    by 0x10F187: run_console (console.c:705)
==13726==    by 0x10D2D7: main (qtest.c:1228)
==13726== 
==13726== 48 bytes in 1 blocks are still reachable in loss record 2 of 2
==13726==    at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==13726==    by 0x10F1FB: test_malloc (harness.c:133)
==13726==    by 0x10F757: q_insert_tail (queue.c:72)
==13726==    by 0x10C8C7: do_it (qtest.c:313)
==13726==    by 0x10DEE5: interpret_cmda (console.c:181)
==13726==    by 0x10E49A: interpret_cmd (console.c:201)
==13726==    by 0x10E89B: cmd_select (console.c:610)
==13726==    by 0x10F187: run_console (console.c:705)
==13726==    by 0x10D2D7: main (qtest.c:1228)
==13726== 
---	trace-02-ops	0/6

再次執行make valgrind ,就沒有再顯示任何記憶體錯誤資訊。

brian@brian-linux:~/linux/lab0-c$ make valgrind
# Explicitly disable sanitizer(s)
make clean SANITIZER=0 qtest
make[1]: Entering directory '/home/brian/linux/lab0-c'
rm -f qtest.o report.o console.o harness.o queue.o random.o dudect/constant.o dudect/fixture.o dudect/ttest.o shannon_entropy.o linenoise.o web.o .qtest.o.d .report.o.d .console.o.d .harness.o.d .queue.o.d .random.o.d .dudect/constant.o.d .dudect/fixture.o.d .dudect/ttest.o.d .shannon_entropy.o.d .linenoise.o.d .web.o.d *~ qtest /tmp/qtest.*
rm -rf .dudect
rm -rf *.dSYM
(cd traces; rm -f *~)
  CC	qtest.o
  CC	report.o
  CC	console.o
  CC	harness.o
  CC	queue.o
  CC	random.o
  CC	dudect/constant.o
  CC	dudect/fixture.o
  CC	dudect/ttest.o
  CC	shannon_entropy.o
  CC	linenoise.o
  CC	web.o
  LD	qtest
make[1]: Leaving directory '/home/brian/linux/lab0-c'
cp qtest /tmp/qtest.XEguXt
chmod u+x /tmp/qtest.XEguXt
sed -i "s/alarm/isnan/g" /tmp/qtest.XEguXt
scripts/driver.py -p /tmp/qtest.XEguXt --valgrind 
---	Trace		Points
+++ TESTING trace trace-01-ops:
# Test of insert_head and remove_head
---	trace-01-ops	5/5
+++ TESTING trace trace-02-ops:
# Test of insert_head, insert_tail, remove_head, remove_tail, and delete_mid
---	trace-02-ops	6/6
+++ TESTING trace trace-03-ops:
# Test of insert_head, insert_tail, remove_head, reverse and merge
---	trace-03-ops	6/6
+++ TESTING trace trace-04-ops:
# Test of insert_head, insert_tail, size, swap, and sort
---	trace-04-ops	6/6
+++ TESTING trace trace-05-ops:
# Test of insert_head, insert_tail, remove_head, reverse, size, swap, and sort
---	trace-05-ops	6/6
+++ TESTING trace trace-06-ops:
# Test of insert_head, insert_tail, delete duplicate, sort, descend and reverseK
---	trace-06-ops	6/6
+++ TESTING trace trace-07-string:
# Test of truncated strings
---	trace-07-string	6/6
+++ TESTING trace trace-08-robust:
# Test operations on empty queue
---	trace-08-robust	6/6
+++ TESTING trace trace-09-robust:
# Test remove_head with NULL argument
---	trace-09-robust	6/6
+++ TESTING trace trace-10-robust:
# Test operations on NULL queue
---	trace-10-robust	6/6
+++ TESTING trace trace-11-malloc:
# Test of malloc failure on insert_head
---	trace-11-malloc	6/6
+++ TESTING trace trace-12-malloc:
# Test of malloc failure on insert_tail
---	trace-12-malloc	6/6
+++ TESTING trace trace-13-malloc:
# Test of malloc failure on new
---	trace-13-malloc	6/6
+++ TESTING trace trace-14-perf:
# Test performance of insert_tail, reverse, and sort
---	trace-14-perf	6/6
+++ TESTING trace trace-15-perf:
# Test performance of sort with random and descending orders
# 10000: all correct sorting algorithms are expected pass
# 50000: sorting algorithms with O(n^2) time complexity are expected failed
# 100000: sorting algorithms with O(nlogn) time complexity are expected pass
---	trace-15-perf	6/6
+++ TESTING trace trace-16-perf:
# Test performance of insert_tail
---	trace-16-perf	6/6
+++ TESTING trace trace-17-complexity:
# Test if time complexity of q_insert_tail, q_insert_head, q_remove_tail, and q_remove_head is constant
Probably constant time
Probably constant time
Probably constant time
Probably constant time
---     trace-17-complexity     5/5
---	TOTAL		100/100

Test with specific case by running command:
scripts/driver.py -p /tmp/qtest.XEguXt --valgrind -t <tid>

開啟 address sanitizer ,修正 qtest 執行過程中錯誤

make clean
make SANITIZER=1
make test

開啟 address sanitizer,並重新編譯

執行 make test 後,並沒有出現任何記憶體有關之錯誤。

Massif 視覺化

透過 massif 查看記憶體狀況,可以觀察到建立的動態記憶體歸為 0,表示記憶體已被釋放。

$ valgrind --tool=massif ./qtest -f traces/trace-15-perf.cmd
$ massif-visualizer massif.out.*