--- tags: linux2023 --- # 2023q1 Homework1 (lab0) contributed by < `brianlin314` > ## 開發環境 ```shell $ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 $ lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Address sizes: 39 bits physical, 48 bits virtual Byte Order: Little Endian CPU(s): 12 On-line CPU(s) list: 0-11 Vendor ID: GenuineIntel Model name: 12th Gen Intel(R) Core(TM) i5-12400 CPU family: 6 Model: 151 Thread(s) per core: 2 Core(s) per socket: 6 Socket(s): 1 Stepping: 2 CPU max MHz: 5600.0000 CPU min MHz: 800.0000 BogoMIPS: 4992.00 ``` ### C Programming lab #### `q_new` ```c /* Create an empty queue */ struct list_head *q_new() { struct list_head *head = malloc(sizeof(struct list_head)); if (!head) { return NULL; } INIT_LIST_HEAD(head); return head; } ``` `malloc` 會在 size 為 0 時回傳 `NULL`,因此需要在 `malloc` 後檢查 head 是否為 `NULL`。 #### `q_free` ```c /* Free all storage used by佇列*/ void q_free(struct list_head *l) { if(!l) { return; } element_t *cur, *tmp; list_for_each_entry_safe(cur, tmp, l, list) { q_release_element(cur); } free(l); return; } ``` 釋放佇列的所有節點及佇列本身,用 `list_for_each_entry_safe` 避免釋放 `cur` 後,無法往下一個節點前進,最後 `free(l)` 釋放 `head`。 :::warning 改進漢語描述。 :notes: jserv 已修正 :+1: ::: #### `q_insert_head` 與 `q_insert_tail` ```c /* Insert an element at head of佇列*/ bool q_insert_head(struct list_head *head, char *s) { int str_len = strlen(s); if (!head) { return false; } element_t *element = malloc(sizeof(element_t)); if (!element) { free(element); return false; } element->value = malloc(sizeof(char) * (str_len + 1)); if (!element->value) { free(element); return false; } strncpy(element->value, s, str_len); *(element->value + str_len) = '\0'; list_add(&element->list, head); return true; } ``` ```c /* Insert an element at tail of佇列*/ bool q_insert_tail(struct list_head *head, char *s) { int str_len = strlen(s); if (!head) { return false; } element_t *element = malloc(sizeof(element_t)); if (!element) { free(element); return false; } element->value = malloc(sizeof(char) * (str_len + 1)); if (!element->value) { free(element); return false; } strncpy(element->value, s, str_len); *(element->value + str_len) = '\0'; list_add_tail(&element->list, head); return true; } ``` 新增一個節點,並在最後引用 `list_add` 、 `list_add_tail` ,分別將其加入到 list 的頭或尾。 在實作 `q_insert_head` 與 `q_insert_tail` 時,我學到若 `strncpy` 傳入的字元個數為 `str_len + 1` ,那 `strncpy` 會自動補上 `\0` ,但我還是選擇額外多寫一行程式碼存入`\0` 。 :::warning 為何「選擇額外多寫一行程式碼存入`\0` 」?請詳述你的考量,以及日後如何改進。 :notes: jserv ::: #### `q_remove_head` 與 `q_remove_tail` ```c /* Remove an element from head of佇列*/ element_t *q_remove_head(struct list_head *head, char *sp, size_t bufsize) { if (!head || list_empty(head)) { return NULL; } element_t *first = list_first_entry(head, element_t, list); list_del(&first->list); if (sp) { strncpy(sp, first->value, bufsize - 1); *(sp + bufsize - 1) = '\0'; } return first; } ``` ```c /* Remove an element from tail of佇列*/ element_t *q_remove_tail(struct list_head *head, char *sp, size_t bufsize) { if (!head || list_empty(head)) { return NULL; } element_t *last = list_last_entry(head, element_t, list); list_del(&last->list); if (sp) { strncpy(sp, last->value, bufsize - 1); *(sp + bufsize - 1) = '\0'; } return last; ``` 先使用 `list_first_entry` 與 `list_last_entry` 取得頭尾節點的位址後,再使用 `list_del` 移除節點。 #### `q_size` ```c /* Return number of elements in佇列*/ int q_size(struct list_head *head) { if(!head || list_empty(head)) { return 0; } int cnt = 0; struct list_head *node, *safe; list_for_each_safe(node, safe, head) { cnt++; } return cnt; } ``` 使用 `list_for_each_safe` 走訪整個list,以此算出節點數。 #### `q_delete_mid` ```c /* Delete the middle node in佇列*/ bool q_delete_mid(struct list_head *head) { // https://leetcode.com/problems/delete-the-middle-node-of-a-linked-list/ if (!head || list_empty(head)) return false; struct list_head *forward = head->next; struct list_head *backward = head->prev; while (backward->next != forward && backward != forward) { forward = forward->next; backward = backward->prev; } list_del(forward); // delete entry element_t *middle = list_entry(forward, element_t, list); q_release_element(middle); return true; } ``` 設 `forward` 和 `backward` 分別指向 `head` 的下一個與前一個節點,While 迴圈的中止條件為: 1. 當 `forward` 與 `backward` 跑到同一節點上 2. 當 `forward` 與 `backward` 擦肩而過時 因為本題需要進行 delete 而非 remove ,所以最後刪除 `forward` 所在的節點。 #### `q_delete_dup` :::warning 沒必要完整張貼程式碼,開發紀錄著重於你的思考過程和遇到的技術問題。 :notes: jserv 了解~之後會改進 :+1: ::: ```c /* Delete all nodes that have duplicate string */ bool q_delete_dup(struct list_head *head) { // https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/ if (head == NULL || list_empty(head)) { return false; } struct list_head *node = head; element_t *p = NULL, *q = NULL, *tmp = NULL; char *tmp_value = NULL; while (node->next != head && node->next->next != head) { p = list_entry(node->next, element_t, list); q = list_entry(node->next->next, element_t, list); int str_len = strlen(p->value); tmp_value = malloc(sizeof(char) * (str_len + 1)); strncpy(tmp_value, p->value, str_len); *(tmp_value + str_len) = '\0'; if (!strcmp(p->value, q->value)) { tmp = list_entry(node->next, element_t, list); do { list_del_init(&tmp->list); q_release_element(tmp); tmp = list_entry(node->next, element_t, list); } while(node->next != head && !strcmp(tmp->value, tmp_value)); } else { node = node->next; } free(tmp_value); } return true; } ``` 指標 `node` 指向 head,然後對 list 進行走訪,若 `node->next` 與 `node->next->next` 對應的 value 相同,就將 `node->next` 及所有後面有相同 value 的節點刪除,記下元素值 `tmp_value`,之後不斷將 `node->next` 從list中移除,直到 `node->next` 指到 `head` 或者節點 `value` 不等於 `tmp_value` 時,即可刪除list中 value 為 `tmp_value` 的所有節點。 若 `node->next` 與 `node->next->next` 對應的 value 不同,就執行 `node = node->next` 。 ``` queue.c:168:31: style: Condition 'node->next!=head' is always true [knownConditionTrueFalse] while (node->next != head && !strcmp(tmp->value, tmp_value)) { ^ Fail to pass static analysis. ``` 此版本在進行 git commit 時,卻遇到以下錯誤,但若不加 `node->next != head` ,就無法通過 [3,2,1,1,1] 的測試,且此種寫法確實會造成其他人 code review 的不易,於是捨棄此種寫法,更改為以下: :::warning 改用 diff 呈現變更,避免張貼重複的程式碼。 :notes: jserv 收到 :+1: ::: ```diff @@ -1,31 +1,22 @@ bool q_delete_dup(struct list_head *head) { // https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/ - if (head == NULL || list_empty(head)) { + if (!head || list_empty(head) || list_is_singular(head)){ return false; } - struct list_head *node = head; - element_t *p = NULL, *q = NULL, *tmp = NULL; - char *tmp_value = NULL; - while (node->next != head && node->next->next != head) { - p = list_entry(node->next, element_t, list); - q = list_entry(node->next->next, element_t, list); - int str_len = strlen(p->value); - tmp_value = malloc(sizeof(char) * (str_len + 1)); - strncpy(tmp_value, p->value, str_len); - *(tmp_value + str_len) = '\0'; - if (!strcmp(p->value, q->value)) { - tmp = list_entry(node->next, element_t, list); - do { - list_del_init(&tmp->list); - q_release_element(tmp); - tmp = list_entry(node->next, element_t, list); - } while(node->next != head && !strcmp(tmp->value, tmp_value)); + + element_t *curr = NULL, *next = NULL; + bool check = false; + list_for_each_entry_safe (curr, next, head, list) { + if (&next->list != head && !strcmp(curr->value, next->value)) { + list_del_init(&curr->list); + q_release_element(curr); + check = true; + } else if (check) { + list_del_init(&curr->list); + q_release_element(curr); + check = false; } - else { - node = node->next; - } - free(tmp_value); } return true; } ``` 使用 `list_for_each_entry_safe` 走訪 list ,得到當前與下一個節點的位址,並使用 `strcmp` 比對 value 是否相同,若相同,則刪除 `curr` 節點,並設 `check = true`; 若不同,則判斷若 `check = true`,要刪除 `curr` 節點。 #### `q_swap` ```c /* Swap every two adjacent nodes */ void q_swap(struct list_head *head) { // https://leetcode.com/problems/swap-nodes-in-pairs/ if(head == NULL || list_is_singular(head)) { return; } struct list_head *first = head->next; while(first != head && first->next != head) { struct list_head *second = first->next; list_del_init(first); list_add(first,second); first = first->next; } return; } ``` 先判斷 `head` 是否為 `NULL` 或 list 是否只存在一個節點,先將第一個節點 `first` 移除,再將其加回第二個節點的頭,即可完成兩個節點的變換位置,而此時 `first->next` 所指的節點也剛好會是尚未變換位置的第一個節點。 #### `q_reverse` ```c /* Reverse elements in queue */ void q_reverse(struct list_head *head) { if(!head || list_empty(head)){ return; } struct list_head *node, *safe; list_for_each_safe(node, safe, head) { list_move(node, head); } return; } ``` 使用 `list_for_each_safe` 走訪整個list,並將走訪到的 node ,移回 head 的頭,如此一來,就可以很簡單的實做 `reverse`。 #### `q_descend` ```c int q_descend(struct list_head *head) { // https://leetcode.com/problems/remove-nodes-from-linked-list/ if (head == NULL || list_empty(head)) { return 0; } struct list_head *node = head->prev; int total = 1, delete = 0; element_t *curr = NULL, *check = NULL; char *num = NULL; while (&check->list != head && node->prev != head) { curr = list_entry(node, element_t, list); check = list_entry(node->prev, element_t, list); int str_len = strlen(curr->value); num = malloc(sizeof(char) * (str_len + 1)); strncpy(num, curr->value, str_len); *(num + str_len) = '\0'; if (strcmp(check->value, num) < 0) { list_del(&check->list); q_release_element(check); delete += 1; } else { node = node->prev; } free(num); total += 1; } return total - delete; } ``` <s>架構</s> 實做方法與 `q_delete_dup` 相同,只是將 `next` 全部改為 `prev`,即對 list 做反向操作,若 `node->prev` 的 value 小於`node` 的 value,則刪除該節點,若不小於,則 `node = node->prev`。 #### `q_reverseK` ```c /* Reverse the nodes of the list k at a time */ void q_reverseK(struct list_head *head, int k) { // https://leetcode.com/problems/reverse-nodes-in-k-group/ if (!head || list_empty(head) || list_is_singular(head) || k <= 2) { return; } int list_length = q_size(head); struct list_head **change = NULL; struct list_head *header = NULL, *curr = head->next; while (curr != head && curr->next != head) { change = &curr->next; header = curr->prev; for (int i = 1; i < k; i++) { if (list_length >= k) { list_move(*change, header); } } list_length -= k; curr = curr->next; } return; } ``` 作法與 `reverse` 相同,但改為 K 個一組進行反轉,且若該組的節點數小於 K ,則維持原樣 `header` 紀錄每一組的頭節點,`change` 紀錄要插入到該組的頭的節點,是一個 pointer to pointer ,作用為避免因為指標的改變,而喪失目標位置,可以達到簡化程式碼的長度。 #### `q_sort` 參考了[你所不知道的C 語言: linked list 和非連續記憶體](https://hackmd.io/@sysprog/c-linked-list#Linked-list-%E5%9C%A8-Linux-%E6%A0%B8%E5%BF%83%E5%8E%9F%E5%A7%8B%E7%A8%8B%E5%BC%8F%E7%A2%BC)中的 `Merge Sort` ,分成三個部份:`merge_two_lists`, `merge_sort`, `q_sort` ##### `merge_two_lists` ```c struct list_head *merge_two_list(struct list_head *left, struct list_head *right) { struct list_head *head = NULL, **ptr = &head; element_t *left_node = NULL, *right_node = NULL; for (; left && right; ptr = &(*ptr)->next) { left_node = list_entry(left, element_t, list); right_node = list_entry(right, element_t, list); if (strcmp(left_node->value, right_node->value) < 0) { *ptr = left; left = left->next; } else { *ptr = right; right = right->next; } } *ptr = (struct list_head *) ((uintptr_t) left | (uintptr_t) right); return head; } } ``` 將兩個不同的 list 合併並排序,使用 `left` 與 `right` 指標分別指向兩個 list 的開頭,比較指標內容,將較小的節點放到新的 list 中。 迴圈迭代完成後,會剩下其中一個 list 中尚存在節點,採 bit-wsie 找出非空的節點且能加速運算。 ##### `merge_sort` ```c struct list_head *merge_sort(struct list_head *head) { if (!head || !head->next) { return head; } struct list_head *fast, *slow = head; for (fast = head->next; fast && fast->next; fast = fast->next->next) { slow = slow->next; } struct list_head *left, *right; right = slow->next; slow->next = NULL; left = merge_sort(head); right = merge_sort(right); return merge_two_list(left, right); } ``` 使用快慢指針技巧找到第一個與中間節點,並遞迴呼叫 `merge_sort` ,將左邊的串列與右邊的串列也各自找出中間節點,最後會切成一個一個節點,過程中要將切完的串列的最後一個節點的 `next` 指向 `NULL` ,最後呼叫 `merge_two_lists` 將兩個串合併。 ##### `q_sort` ```c /* Sort elements of queue in ascending order */ void q_sort(struct list_head *head) { if (!head || list_empty(head) || list_is_singular(head)) { return; } head->prev->next = NULL; head->next = merge_sort(head->next); struct list_head *curr = head, *node = head->next; while (node) { node->prev = curr; curr = node; node = node->next; } curr->next = head; head->prev = curr; return; } } ``` 先將串列的最後一個節點指向 `NULL`,作為其餘兩個函式的終止條件,並呼叫 `merge_sort` 進行排序,最後的 while 迴圈,是為了將所有節點的 `prev` 重新串接回去,使其重新符合 circular doubly-linked list。 :::warning 注意書寫: doubly-linked list 裡頭連字號的位置。 :notes: jserv 感謝老師提醒~ :+1: ::: #### `q_merge` ```c /* Merge all the queues into one sorted queue, which is in ascending order */ int q_merge(struct list_head *head) { // https://leetcode.com/problems/merge-k-sorted-lists/ if(!head || list_empty(head)) { return 0; } if(list_is_singular(head)) { return list_entry(head->next, queue_contex_t, chain)->size; } queue_contex_t *merged_list = list_entry(head->next, queue_contex_t, chain); struct list_head *node = NULL, *safe = NULL; list_for_each_safe (node, safe, head) { if(node == head->next) { continue; } queue_contex_t *tmp = list_entry(node, queue_contex_t, chain); list_splice_init(tmp->q, merged_list->q); } q_sort(merged_list->q); return merged_list->size; } ``` 實作 `q_merge` 前,必須先熟知 `queue_contex_t` 這個資料結構,將各`queue_contex_t` 所指的佇列全部串連起來,最後使用實做過的 `q_sort` 排序以串連的串列即可。 --- ### 研讀 lib/list_sort.c :::danger 與其逐行張貼程式碼,你應該闡述 Linux 核心開發者的考量因素,還有這些工程取捨如何反映到整體的設計中。避免「舉燭」! :notes: jserv 學生收到 :+1: ::: :::warning 避免非必要的條列 (及 `-` 開頭的項目符號),用清晰且完整的漢語展現。 :notes: jserv 了解,會改進使用 `-` 的時機點 > 上方筆記也該調整 > :notes: jserv ::: torvalds/linux [list_sort.c](https://github.com/torvalds/linux/blob/master/lib/list_sort.c) - 分為 3 個函式: - merge - merge_final - list_sort 研讀 `list_sort` 函式提供的註解,此外`list_sort` 需要3個參數: 1. `priv` 是一個 private data,用來傳遞 `cmp` 所需的參數 2. `head` 為要被排序的list 3. `cmp` 是一個元素比較大小的函式指標 ```c typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *, const struct list_head *, const struct list_head *); ``` - `@cmp` 的回傳值是 int 型態,`@cmp` 的第一個參數負責接收 `@priv`。第二個參數 `@a` 與第三個參數 `@b` 為串列的 Node。 - 如果 `@a` 應該排在 `@b` 之後,即 `@cmp` 回傳 >0 時,表示要進行 ascending sort - 如果 `@a` 應該排在 `@b` 之前,即 `@cmp` 回傳 <=0 時,表示要進行 descending sort 或保留原本狀態 - 且維持 stable sort,因此不需要區分 `@a < @b` 和 `@a == @b` 的情況。 ``` * The comparison function @cmp must return > 0 if @a should sort after * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should * sort before @b *or* their original order should be preserved. It is * always called with the element that came first in the input in @a, * and list_sort is a stable sort, so it is not necessary to distinguish * the @a < @b and @a == @b cases. ``` 接下來提到`merge sort`的實作細節,方法會保持 2 : 1 的 list,假設有兩個 2^k^ 大小的 list , `merge sort` 不會直接合併,而是等到有第 3 個長度為 2^k^ 的 list 才進行合併,變成一個 2^k+1^ 長度的 list 及一個 2^k^ 長度的 list 。 只要這 2 : 1 的 list 都可以被放進 cache 裡,就可以避免 Cache thrashing 問題。 ``` * This mergesort is as eager as possible while always performing at least * 2:1 balanced merges. Given two pending sublists of size 2^k, they are * merged to a size-2^(k+1) list as soon as we have 2^k following elements. * * Thus, it will avoid cache thrashing as long as 3*2^k elements can * fit into the cache. Not quite as good as a fully-eager bottom-up * mergesort, but it does use 0.2*n fewer comparisons, so is faster in * the common case that everything fits into L1. ``` 要實作上述方法,會透過一個變數 `count` 去計算目前 `pending` 上的節點總數,當每次 `count` 加1時,會將第 k 個 bit 設為 1 ,而 k-1 到 0 bit 則設為 0,且當 count 來到 2^k^ 時,就把兩個 2^k^ 長度的 list 做合併。 #### 初始化 ```c struct list_head *list = head->next, *pending = NULL; size_t count = 0; /* Count of pending */ if (list == head->prev) /* Zero or one elements */ return; /* Convert to a null-terminated singly-linked list. */ head->prev->next = NULL; ``` 先檢查 list 是否為空或只有一個節點,接著將最後一個節點的 `next` 指向 `NULL`。 ```graphviz digraph ele_list { rankdir=LR; node[shape=record]; e2 [label="<top>Node 1|{<left>prev|<right>next}", style="bold", pos="0,1!"]; e3 [label="<top>Node 2|{<left>prev|<right>next}", style="bold"]; e4 [label="<top>.....|{<left>prev|<right>next}", style="bold"]; e5 [label="<top>Node N|{<left>prev|<right>next}", style="bold"]; p [label="pending", shape = plaintext]; l [label="list", shape = plaintext]; NULL [label="NULL", shape = plaintext]; tail [label="tail", shape = plaintext]; e3:left -> e2:top; e4:left -> e3:top; e5:left -> e4:top; e5:right -> NULL ; e2:right -> e3:top; e3:right -> e4:top; e4:right -> e5:top; p -> NULL; l -> e2; tail -> p; } ``` #### 走訪整個 list 並執行 merge ```c do { size_t bits; struct list_head **tail = &pending; /* Find the least-significant clear bit in count */ for (bits = count; bits & 1; bits >>= 1) tail = &(*tail)->prev; /* Do the indicated merge */ if (likely(bits)) { struct list_head *a = *tail, *b = a->prev; a = merge(priv, cmp, b, a); /* Install the merged result in place of the inputs */ a->prev = b->prev; *tail = a; } /* Move one element from input list to pending */ list->prev = pending; pending = list; list = list->next; pending->next = NULL; count++; } while (list); ``` 以下為 `count` 0 到 15 的狀態: - 第三欄為為每次 do while 迴圈迭代完成後,所有 pending sublist 的狀態,每個以逗號隔開的數字代表該 sublist 的 Node 數量,且一個數字自為一個sublist。 - 舉 `count = 4` 為例,[2,1,1,1] 的第一個 2 代表這個 sublist 中有 2 個node,分別是 node1、 node2,接下來的 1 各分別代表 node3、node4、node5,因為第2和3個的sublist長度都為 2^0^ (即只有一個節點),且第 4 個sublist長度也是 2^0^,所以進行 merge,因此最終狀態變為[2,2,1]。 | count<十進制> | count<二進制> | sublist 的狀態 | | --------------- | ------------------- |------------------------ | | 0 | $0b0000$ | [1] | | 1 | $0b0001$ | [1,1] | | 2 (merge) | $0b0010$ | [1,1,1] -> [2,1] | | 3 | $0b0011$ | [2,1,1] | | 4 (merge) | $0b0100$ | [2,1,1,1] -> [2,2,1] | | 5 (merge) | $0b0101$ | [2,2,1,1] -> [4,1,1] | | 6 (merge) | $0b0110$ | [4,1,1,1] -> [4,2,1] | | 7 | $0b0111$ | [4,2,1,1] | | 8 (merge) | $0b1000$ | [4,2,1,1,1] -> [4,2,2,1] | | 9 (merge) | $0b1001$ | [4,2,2,1,1] -> [4,4,1,1] | | 10 (merge) | $0b1010$ | [4,4,1,1,1] -> [4,4,2,1] | | 11 (merge) | $0b1011$ | [4,4,2,1,1] -> [8,2,1,1] | | 12 (merge) | $0b1100$ | [8,2,1,1,1] -> [8,2,2,1] | | 13 (merge) | $0b1101$ | [8,2,2,1,1] -> [8,4,1,1] | | 14 (merge) | $0b1110$ | [8,4,1,1,1] -> [8,4,2,1] | | 15 | $0b1111$ | [8,4,2,1,1] | 由以上程式碼與例子可得知: - `pending` 是已排序好,但尚未被 merge 的 sublist - 每個排序好的 sublist 長度為 2^k^ - `bits` 用於判斷何時必須 merge ,會檢查目前的 sublist 是否滿足 2^k^ 個 Node, - 若目前有 2^k^ 個 Node,`if(likely(bits))` 就會成立,並呼叫 `merge` 來合併最新的兩個 pending sublists,然後讓 `list` 指向下一個 Node; - 反之,`list` 指向下一個 Node。 - 在每次迭代時,list 會指向第 `count` 個 Node,`pending` 會指向前一個 Node,indirect pointer `tail` 會指向 `&pending` 並在 `bits` 向右移時,一直指向 `tail = &(*tail)->prev` 藉以把自己移動到可能將被 merge 的 sublist,並在 sublist 被 merge 後更新 `prev`。 #### 將剩餘的 sublist merge,並使其再次成為 circular doubly-linked list 當 do while 迭代完成後,以上述例子 `n = 15` 為例,最後會剩下 [8,4,2,1] 4 個 pending sublist ,透過 for 迴圈合併:[8,4,2,1] -> [8,4,3] -> [8,7]。最後呼叫 merge_final 來合併剩餘的 [8,7],且 merge_final 會將整個 list 恢復成 circular doubly- linked list。 ```c= for (;;) { struct list_head *next = pending->prev; if (!next) break; list = merge(priv, cmp, pending, list); pending = next; } ``` --- ### 將 `list_sort.c` 引入到 `lab0-c` 專案 參考 [linhoward/linux2023-lab0](https://hackmd.io/@linhoward/linux2023-lab0#%E7%A0%94%E8%AE%80-Linux-%E6%A0%B8%E5%BF%83%E5%8E%9F%E5%A7%8B%E7%A8%8B%E5%BC%8F%E7%A2%BC%E7%9A%84-liblist_sortc) 進行實做 研讀 [`qtest` 命令直譯器的實作](https://hackmd.io/@sysprog/linux2023-lab0/%2F%40sysprog%2Flinux2023-lab0-b#qtest-%E5%91%BD%E4%BB%A4%E7%9B%B4%E8%AD%AF%E5%99%A8%E7%9A%84%E5%AF%A6%E4%BD%9C)後,了解到要新增 `qtest` 的 `cmd` 的流程,紀錄在下方。 1. 將 [`list_sort.c`](https://github.com/torvalds/linux/blob/master/lib/list_sort.c)的程式碼加入到 `qtest.c` 中。 2. 在程式碼中有使用到 `likely` 與 `unlikely` 這兩個巨集,因此也需加到`qtest.c` 中。 ```c= # define likely(x) __builtin_expect(!!(x), 1) # define unlikely(x) __builtin_expect(!!(x), 0) ``` 3. 將 `list_cmp_func_t` 的定義加入到 `qtest.c`,並實作該 function。 ```c= typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *, const struct list_head *, const struct list_head *); ``` ```c= static int cmp_fun(void *priv, const struct list_head *a, const struct list_head *b) { return strcmp(list_entry(a, element_t, list)->value, list_entry(b, element_t, list)->value); } ``` 4. 實做 `do_linux_sort`,且在 `console_init` 新增 command。 ```c= ADD_COMMAND(linux_sort, "Sort queue in ascending order by linux kerenl",""); ``` 5. 編譯時會出現錯誤訊息: ``` qtest.c: In function ‘merge_final’: qtest.c:901:9: error: unknown type name ‘u8’ 901 | u8 count = 0; | ^~ ``` 需將 `merge_final` 中的 `u8` 改成 `uint8_t`。 --- ### 比較自己實作的 merge sort 和 Linux 核心程式碼之間效能落差 參考 [perf 原理和實務](https://hackmd.io/@sysprog/gnu-linux-dev/https%3A%2F%2Fhackmd.io%2Fs%2FB11109rdg) 與 [linhoward/linux2023-lab0](https://hackmd.io/@linhoward/linux2023-lab0#%E7%A0%94%E8%AE%80-Linux-%E6%A0%B8%E5%BF%83%E5%8E%9F%E5%A7%8B%E7%A8%8B%E5%BC%8F%E7%A2%BC%E7%9A%84-liblist_sortc) 進行安裝與實做 可使用以下命令查看是否開啟 `perf` ```cmd cat "/boot/config-`uname -r`" | grep "PERF_EVENT" ``` 若想要檢測 cache miss event ,需要先取消 kernel pointer 的禁用。 ```cmd sudo sh -c " echo 0 > /proc/sys/kernel/kptr_restrict" ``` 先撰寫想要進行測試的 `.cmd` 檔( RAND_1000.cmd ),以下為測試 `q_sort` ,若想要測試 `linux list_sort` ,則將 `q_sort` 改為 `list_sort` ``` option fail 0 option malloc 0 new ih RAND 1000 q_sort free ``` 使用 `shell script` 來執行 `linux_sort` 和 `q_sort` 的效能測試,`shell script` 撰寫參考 [鳥哥私房菜-學習 Shell Scripts](https://linux.vbird.org/linux_basic/centos7/0340bashshell-scripts.php) ``` #!/bin/bash for i in ./traces/traces_perf_linux_sort/*.cmd do perf stat --repeat 5 -o "${i%.cmd}"_report -e cache-misses,branches,instructions,context-switches ./qtest -v 0 -f "$i" done ``` 以下為跑完測試的 `.report` 檔與比較表格 ``` # started on Tue Feb 28 13:05:42 2023 Performance counter stats for './qtest -v 0 -f ./traces/traces_perf_linux_sort/RAND_1000.cmd' (5 runs): 2014 cpu_core/cache-misses/ ( +-103.03% ) 77,8034 cpu_core/branches/ ( +- 0.11% ) 514,7021 cpu_core/instructions/ ( +- 0.09% ) 0 context-switches 0.004317 +- 0.000648 seconds time elapsed ( +- 15.01% ) ``` - `linux list_sort` | # node | cache-misses | branches | instructions | context-switches | time | |:------:|:-----------:|:--:|:--:|:--:|:--:| |1000|2014|77,8034|514,7021|0|0.004317| |10000|5333|615,8033|4291,8014|0|0.006202| |100000|39,5825|6296,2214|4,3431,3455|0|0.058937| |1000000|3577,2642|6,6064,3230|44,8546,9541|5|0.927776| - `q_sort` | # node | cache-misses | branches | instructions | context-switches | time | |:------:|:-----------:|:--:|:--:|:--:|:--:| |1000|3091|77,8123|510,0095|0|0.004366| |10000|1986|621,7308|4251,1116|0|0.00755| |100000|61,7447|6376,2295|4,3054,4077|0|0.061952| |1000000|4560,3884|6,6678,2837|44,2331,4748|4|0.99760| 從我的實驗數據中可以發現兩者的差異不大,雖然隨機產生節點資料可能偶爾會讓 `q_sort` 的效能看起來更好,但從執行時間上來看都還是 `list_sort` 快了一點。 :::warning 善用 gnuplot 製圖,你該分析效能落差的成因。 :notes: jserv ::: --- ### Valgrind 排除 qtest 實作的記憶體錯誤 [Valgrind](https://valgrind.org/) 提供動態分析,用來追蹤及分析程式效能,幫助使用者偵測記憶體錯誤,諸如使用未初始化的記憶體、不當的記憶體配置、或取消配置記憶體,並加以分析。 執行命令 `make valgrind` 後,`trace-13-malloc` 得到 0 分 ``` +++ TESTING trace trace-13-malloc: # Test of malloc failure on new ==10966== 32 bytes in 1 blocks are still reachable in loss record 1 of 3 ==10966== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==10966== by 0x10CBE6: do_new (qtest.c:146) ==10966== by 0x10DE12: interpret_cmda (console.c:181) ==10966== by 0x10E3C7: interpret_cmd (console.c:201) ==10966== by 0x10E7C8: cmd_select (console.c:610) ==10966== by 0x10F0B4: run_console (console.c:705) ==10966== by 0x10D204: main (qtest.c:1228) ==10966== ==10966== 160 bytes in 5 blocks are possibly lost in loss record 2 of 3 ==10966== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==10966== by 0x10CBE6: do_new (qtest.c:146) ==10966== by 0x10DE12: interpret_cmda (console.c:181) ==10966== by 0x10E3C7: interpret_cmd (console.c:201) ==10966== by 0x10E7C8: cmd_select (console.c:610) ==10966== by 0x10F0B4: run_console (console.c:705) ==10966== by 0x10D204: main (qtest.c:1228) ==10966== ==10966== 168 bytes in 3 blocks are still reachable in loss record 3 of 3 ==10966== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==10966== by 0x10F128: test_malloc (harness.c:133) ==10966== by 0x10F52D: q_new (queue.c:18) ==10966== by 0x10CC1F: do_new (qtest.c:150) ==10966== by 0x10DE12: interpret_cmda (console.c:181) ==10966== by 0x10E3C7: interpret_cmd (console.c:201) ==10966== by 0x10E7C8: cmd_select (console.c:610) ==10966== by 0x10F0B4: run_console (console.c:705) ==10966== by 0x10D204: main (qtest.c:1228) ==10966== --- trace-13-malloc 0/6 ``` 尋著錯誤訊息,找到在 `qtest.c` 中的 `q_quit` 函式可能存在錯誤,該函式功能為釋放佇列的所有記憶體,首先要先使用 `q_free` 釋放所有 `queue_context_t` 指向的 `q` 的所有記憶體,再釋放 `queue_context_t` 的所有記憶體,所以執行 `make valgrind` 會顯示還有記憶體未釋放。 因為在此函式中的第一行,直接執行 `return true` ,所以將這行註解掉即可。 修改後,再次執行 `make valgrind` ,發現 `trace-02-ops` 沒有過,於是去檢查 `delete_mid` 的實做,發現我只有 `free(middle)` ,但是應該要先進行 `free(middle->value)` 才對,所以更改程式碼為 `q_release_element(middle)`。 ``` +++ TESTING trace trace-02-ops: # Test of insert_head, insert_tail, remove_head, remove_tail, and delete_mid ERROR: Freed queue, but 2 blocks are still allocated ==13726== 47 bytes in 1 blocks are still reachable in loss record 1 of 2 ==13726== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==13726== by 0x10F1FB: test_malloc (harness.c:133) ==13726== by 0x10F6B9: q_insert_head (queue.c:51) ==13726== by 0x10CBD2: do_ih (qtest.c:224) ==13726== by 0x10DEE5: interpret_cmda (console.c:181) ==13726== by 0x10E49A: interpret_cmd (console.c:201) ==13726== by 0x10E89B: cmd_select (console.c:610) ==13726== by 0x10F187: run_console (console.c:705) ==13726== by 0x10D2D7: main (qtest.c:1228) ==13726== ==13726== 48 bytes in 1 blocks are still reachable in loss record 2 of 2 ==13726== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==13726== by 0x10F1FB: test_malloc (harness.c:133) ==13726== by 0x10F757: q_insert_tail (queue.c:72) ==13726== by 0x10C8C7: do_it (qtest.c:313) ==13726== by 0x10DEE5: interpret_cmda (console.c:181) ==13726== by 0x10E49A: interpret_cmd (console.c:201) ==13726== by 0x10E89B: cmd_select (console.c:610) ==13726== by 0x10F187: run_console (console.c:705) ==13726== by 0x10D2D7: main (qtest.c:1228) ==13726== --- trace-02-ops 0/6 ``` 再次執行`make valgrind` ,就沒有再顯示任何記憶體錯誤資訊。 ``` brian@brian-linux:~/linux/lab0-c$ make valgrind # Explicitly disable sanitizer(s) make clean SANITIZER=0 qtest make[1]: Entering directory '/home/brian/linux/lab0-c' rm -f qtest.o report.o console.o harness.o queue.o random.o dudect/constant.o dudect/fixture.o dudect/ttest.o shannon_entropy.o linenoise.o web.o .qtest.o.d .report.o.d .console.o.d .harness.o.d .queue.o.d .random.o.d .dudect/constant.o.d .dudect/fixture.o.d .dudect/ttest.o.d .shannon_entropy.o.d .linenoise.o.d .web.o.d *~ qtest /tmp/qtest.* rm -rf .dudect rm -rf *.dSYM (cd traces; rm -f *~) CC qtest.o CC report.o CC console.o CC harness.o CC queue.o CC random.o CC dudect/constant.o CC dudect/fixture.o CC dudect/ttest.o CC shannon_entropy.o CC linenoise.o CC web.o LD qtest make[1]: Leaving directory '/home/brian/linux/lab0-c' cp qtest /tmp/qtest.XEguXt chmod u+x /tmp/qtest.XEguXt sed -i "s/alarm/isnan/g" /tmp/qtest.XEguXt scripts/driver.py -p /tmp/qtest.XEguXt --valgrind --- Trace Points +++ TESTING trace trace-01-ops: # Test of insert_head and remove_head --- trace-01-ops 5/5 +++ TESTING trace trace-02-ops: # Test of insert_head, insert_tail, remove_head, remove_tail, and delete_mid --- trace-02-ops 6/6 +++ TESTING trace trace-03-ops: # Test of insert_head, insert_tail, remove_head, reverse and merge --- trace-03-ops 6/6 +++ TESTING trace trace-04-ops: # Test of insert_head, insert_tail, size, swap, and sort --- trace-04-ops 6/6 +++ TESTING trace trace-05-ops: # Test of insert_head, insert_tail, remove_head, reverse, size, swap, and sort --- trace-05-ops 6/6 +++ TESTING trace trace-06-ops: # Test of insert_head, insert_tail, delete duplicate, sort, descend and reverseK --- trace-06-ops 6/6 +++ TESTING trace trace-07-string: # Test of truncated strings --- trace-07-string 6/6 +++ TESTING trace trace-08-robust: # Test operations on empty queue --- trace-08-robust 6/6 +++ TESTING trace trace-09-robust: # Test remove_head with NULL argument --- trace-09-robust 6/6 +++ TESTING trace trace-10-robust: # Test operations on NULL queue --- trace-10-robust 6/6 +++ TESTING trace trace-11-malloc: # Test of malloc failure on insert_head --- trace-11-malloc 6/6 +++ TESTING trace trace-12-malloc: # Test of malloc failure on insert_tail --- trace-12-malloc 6/6 +++ TESTING trace trace-13-malloc: # Test of malloc failure on new --- trace-13-malloc 6/6 +++ TESTING trace trace-14-perf: # Test performance of insert_tail, reverse, and sort --- trace-14-perf 6/6 +++ TESTING trace trace-15-perf: # Test performance of sort with random and descending orders # 10000: all correct sorting algorithms are expected pass # 50000: sorting algorithms with O(n^2) time complexity are expected failed # 100000: sorting algorithms with O(nlogn) time complexity are expected pass --- trace-15-perf 6/6 +++ TESTING trace trace-16-perf: # Test performance of insert_tail --- trace-16-perf 6/6 +++ TESTING trace trace-17-complexity: # Test if time complexity of q_insert_tail, q_insert_head, q_remove_tail, and q_remove_head is constant Probably constant time Probably constant time Probably constant time Probably constant time --- trace-17-complexity 5/5 --- TOTAL 100/100 Test with specific case by running command: scripts/driver.py -p /tmp/qtest.XEguXt --valgrind -t <tid> ``` ### 開啟 address sanitizer ,修正 `qtest` 執行過程中錯誤 ``` make clean make SANITIZER=1 make test ``` 開啟 address sanitizer,並重新編譯 執行 `make test` 後,並沒有出現任何記憶體有關之錯誤。 ### Massif 視覺化 透過 `massif` 查看記憶體狀況,可以觀察到建立的動態記憶體歸為 0,表示記憶體已被釋放。 ``` $ valgrind --tool=massif ./qtest -f traces/trace-15-perf.cmd $ massif-visualizer massif.out.* ``` ![](https://i.imgur.com/GcxywpO.png)