2023q1 Homework1 (quiz1)

--- tags: linux kernel class --- # 2023q1 Homework1 (quiz1) contributed by < `willwillhi1` > ### 測驗一 ```c= static void list_sort(struct list_head *head) { if (list_empty(head) || list_is_singular(head)) return; struct list_head list_less, list_greater; INIT_LIST_HEAD(&list_less); INIT_LIST_HEAD(&list_greater); struct item *pivot = list_first_entry(head, AAA, BBB); list_del(&pivot->list); struct item *itm = NULL, *is = NULL; CCC (itm, is, head, list) { if (cmpint(&itm->i, &pivot->i) < 0) DDD (&itm->list, &list_less); else EEE (&itm->list, &list_greater); } list_sort(&list_less); list_sort(&list_greater); list_add(&pivot->list, head); list_splice(&list_less, head); FFF(&list_greater, head); } ``` * 此為遞迴版本的 ```quick sort```，是一種 ```Divide and Conquer``` 的方法 1. 先選出一個 ```pivot```，然後調整數列，使得「所有在 ```pivot``` 左邊的數，都比 ```pivot``` 還小」，而「在 ```pivot``` 右邊的數都比 ```pivot``` 大」。 2. 接著，將所有在 ```pivot``` 左邊的數視為「新的數列」，所有在 ```pivot``` 右邊的數視為「另一個新的數列」，「分別」重複上述步驟(選 ```pivot```、調整數列)，直到分不出「新的數列」為止。 ```graphviz digraph G { compound=true node[shape=record, height=.1];input result divide_41 divide_21 divide_31 divide_11 divide_12 divide_13 divide_14 merge_21 merge_41 merge_42 merge_11 merge_12 merge_13 merge_14 merge_15 merge_16 merge_17 merge_18 merge_19 merge_110 merge_111 merge_31; input[label="<f0>5|<f1>2|<f2>4|<f3>6|<f4>8|<f5>1|<f6>7|<f7>3"] result[label="<f0>1|<f1>2|<f2>3|<f3>4|<f4>5|<f5>6|<f6>7|<f7>8"] divide_41[label="<f1>2|<f2>4|<f3>1|<f4>3"] divide_31[label="<f1>6|<f2>8|<f3>7"] divide_11[label="<f0>1"] divide_21[label="<f0>4|<f1>3"] divide_12[label="<f0>3"] divide_13[label="<f0>8"] divide_14[label="<f0>7"] merge_21[label="<f0>3|<f1>4"] merge_11[label="<f0>1"] merge_12[label="<f0>2"] merge_13[label="<f0>5"] merge_14[label="<f0>6"] merge_15[label="<f0>8"] merge_16[label="<f0>7"] merge_41[label="<f0>1|<f1>2|<f2>3|<f3>4"] merge_17[label="<f0>5"] merge_18[label="<f0>6"] merge_19[label="<f0>8"] merge_110[label="<f0>7"] merge_42[label="<f0>1|<f1>2|<f2>3|<f3>4"] merge_111[label="<f0>5"] merge_31[label="<f0>6|<f1>7|<f2>8"] subgraph cluster_0 { label="divide"; divide_pad[label="----------------------", style=invis] divide_pad -> divide_21[style=invis] divide_pad -> divide_13[style=invis] input -> divide_41 input -> divide_31 divide_41 -> divide_21 divide_41 -> divide_11 divide_31 -> divide_13 divide_31 -> divide_14 divide_21 -> divide_12 } divide_12 -> pad[style=invis] subgraph cluster_2 { label="merge"; merge_pad[label="--------------------", style=invis] rank=same; merge_11; merge_12; merge_13; merge_14; merge_15; merge_16; merge_21 rank=same; merge_41; merge_17; merge_18; merge_19; merge_110 rank=same; merge_111; merge_42; merge_31 pad[label="----------------------", style=invis] pad -> merge_13[style=invis] pad -> merge_42[style=invis] pad -> merge_31[style=invis] //pad -> merge_17[style=invis] //merge_11 -> merge_12[style=invis] //merge_23 -> merge_pad[style=invis] //merge_pad -> result[style=invis] merge_11 -> merge_41 merge_12 -> merge_41 merge_21 -> merge_41 merge_13 -> merge_17 merge_15 -> merge_19 merge_16 -> merge_110 merge_14 -> merge_18 merge_41 -> merge_42 merge_17 -> merge_111 merge_18 -> merge_31 merge_19 -> merge_31 merge_110 -> merge_31 merge_42 -> result merge_111 -> result merge_31 -> result } } ``` * 程式碼中的 ```list_less```, ```list_greater``` 分別代表第一步中的兩個佇列的 ```head```。 * 用 ```list_first_entry``` 拿原本佇列的第一個節點當作 ```pivot```，所以 ```AAA = item```, ```BBB = list```。 * 13 ~ 19 行就是要把原本的佇列通過與 ```pivot``` 比較分到 ```list_less```, ```list_greater``` 之中，所以要用 ```CCC = list_for_each_entry_safe``` 來走訪整個佇列 * 若節點的值小於 ```pivot```，就用 ```DDD = list_move_tail``` 將該節點從原佇列移動到 ```list_less``` 的尾端。 * 反之若大於等於 ```pivot```，就用 ```EEE = list_move_tail``` 將改節點移動到 ```list_greater``` 的尾端。 * 分出 ```list_less``` 和 ```list_greater``` 後，就是進入第二步，重複的對其做 ```list_sort``` 直到分不出新的佇列。 * 最後 24 ~ 26 就是把 ```list_less```, ```pivot```, ```list_greater``` 接起來，所以 ```FFF = list_splice_tail``` ### 測驗二 ```c= #define MAX_DEPTH 512 void q_sort(struct list_head *head) { if (list_empty(head) || list_is_singular(head)) return; struct list_head stack[MAX_DEPTH]; for (int i = 0; i < MAX_DEPTH; i++) INIT_LIST_HEAD(&stack[i]); int top = -1; list_splice_init(head, &stack[++top]); // struct list_head partition; // INIT_LIST_HEAD(&partition); LIST_HEAD(partition); while (top >= 0) { INIT_LIST_HEAD(&partition); // list_splice_init(&stack[top--], &partition); if (!list_empty(&stack[top]) && !list_is_singular(&stack[top])) { list_splice_init(&stack[top--], &partition); struct list_head list_less, list_greater; INIT_LIST_HEAD(&list_less); INIT_LIST_HEAD(&list_greater); element_t *pivot = list_first_entry(&partition, element_t, list); list_del(&pivot->list); INIT_LIST_HEAD(&pivot->list); element_t *itm = NULL, *is = NULL; list_for_each_entry_safe (itm, is, &partition, list) { // list_del(&itm->list); if (strcmp(itm->value, pivot->value) < 0) list_move(&itm->list, &list_less); else list_move(&itm->list, &list_greater); } list_move_tail (&pivot->list, &list_less); if (!list_empty(&list_greater)) list_splice_tail(&list_greater, &stack[++top]); if (!list_empty(&list_less)) list_splice_tail(&list_less, &stack[++top]); } else { // top++; // list_splice_tail(&partition, &stack[top]); while (top >= 0 && list_is_singular(&stack[top])) { // element_t *tmp = list_first_entry(&stack[top], element_t, list); // list_del(&tmp->list); // INIT_LIST_HEAD(&stack[top--]); // list_add_tail(&tmp->list, head); list_splice_tail_init(&stack[top--], head); } } } } ``` ```struct list_head stack[MAX_DEPTH]```是用來模擬遞迴的操作，先對每一層做初始化 ```INIT_LIST_HEAD(&stack[i])```，```top``` 從 -1 開始，然後 ```list_splice_init(head, &stack[++top])``` 把 ```head``` 的資料移到 ```stack[0]```，接著底下這兩行 ```c= struct list_head partition; INIT_LIST_HEAD(&partition); ``` 可以用以下代替 ```c= LIST_HEAD(partition); ``` 進入 ```while``` 迴圈後，把 ```stack[top]``` 目前的資料移到 ```partition```，並初始化 ```stack[top]``` 因為 ```stack[top]``` 目前的資料被拿出來了，所以 ```top``` 要減一。 ```c= INIT_LIST_HEAD(&partition); list_splice_init(&stack[top--], &partition); ``` * 發現可以把上述的 ```c= list_splice_init(&stack[top--], &partition); ``` 加入到底下的 ```if``` 內，所以 ```if``` 的判斷式的 ```&partition``` 要改成 ```&stack[top]```，然後就可以不執行 ```else``` 裡的: ```c= top++; list_splice_tail(&partition, &stack[top]); ``` 回到 ```if``` 在做的事，如果 ```partition``` 如果不是空或者只有一點，就把 ```partition``` 依照 ```pivot``` 分成 ```list_less``` 和 ```list_greater``` 兩個佇列。其中 ```list_del(&itm->list)``` 是多餘的，後面的 ```list_move``` 已經包含相同的效果。 ```c= if (!list_empty(&partition) && \ !list_is_singular(&partition)) ``` 接著把 ```pivot``` 接到 ```list_less``` 尾端，然後判斷如果 ```list_greater``` 不為空，就把它加到 ```stack[++top]``` 裡，```list_less``` 同理，所以 ```stack``` 會先放大的再放小的。 ```c= list_move_tail (&pivot->list, &list_less); if (!list_empty(&list_greater)) list_splice_tail(&list_greater, &stack[++top]); if (!list_empty(&list_less)) list_splice_tail(&list_less, &stack[++top]); ``` 如果 `partition` 為空或者只有一個節點且 `stack[top]` 只有一節點，就把當前的 `stack[top]` 的一個節點加到 `head` 的尾端，所以可以把下面 `while` 的四行用 `list_splice_tail_init` 代替。 ```c= else { while (top >= 0 && list_is_singular(&stack[top])) { // element_t *tmp = list_first_entry(&stack[top], element_t, list); // list_del(&tmp->list); // INIT_LIST_HEAD(&stack[top--]); // list_add_tail(&tmp->list, head); list_splice_tail_init(&stack[top--], head); } ``` ### 效能比較以下為用 qtest time 做的測試，每筆分別做五次 * recursion * 10000 筆： 0.013, 0.016, 0.006, 0.006, 0.007 * 100000 筆： 0.126, 0.139, 0.125, 0.124, 0.122 * 200000 筆： 0.282, 0.287, 0.280, 0.287, 0.202 * 300000 筆： 0.461, 0.474, 0.455, 0.448, 0.463 * 400000 筆： 0.631, 0.648, 0.624, 0.619, 0.626 * 500000 筆： 0.834, 0.815, 0.795, 0.793, 0.729 * 600000 筆： 1.010, 0.873, 1.011, 1.053, 0.922 * 700000 筆： 1.223, 1.151, 1.040, 1.237, 1.237 * 800000 筆： 1.409, 1.198, 1.153, 1.223, 1.349 * 900000 筆： Time limit exceeded * iteration * 10000 筆： 0.008, 0.009, 0.009, 0.010, 0.010 * 100000 筆： 0.172, 0.149, 0.153, 0.149, 0.158 * 200000 筆： 0.341, 0.35, 0.34, 0.344, 0.34 * 300000 筆： 0.567, 0.541, 0.508, 0.588, 0.551 * 400000 筆： 0.799, 0.792, 0.788, 0.742, 0.791 * 500000 筆： 1.077, 1.016, 0.952, 0.995, 0.915 * 600000 筆： 1.280, 1.175, 1.222, 1.214, 1.203 * 700000 筆： Time limit exceeded ### 用 perf 做測試首先建立一個測試用的 cmd，然後用以下指令做測試 ```c= sudo perf stat -e cache-misses,cache-references,instructions,cycles ./qtest -f sort.cmd ``` ```c= will@will:~/linux2023/lab0-c$ cat sort.cmd option fail 0 option malloc 0 new ih RAND 10000 sort free ``` * recursion ```c= Performance counter stats for './qtest -f sort.cmd': 76,501 cache-misses # 6.726 % of all cache refs 1,137,407 cache-references 45,705,227 instructions # 1.56 insn per cycle 29,351,662 cycles 0.018867434 seconds time elapsed 0.014206000 seconds user 0.004735000 seconds sys ``` * iteration ```c= Performance counter stats for './qtest -f sort.cmd': 139,941 cache-misses # 9.913 % of all cache refs 1,411,675 cache-references 51,237,005 instructions # 1.54 insn per cycle 33,255,167 cycles 0.033091868 seconds time elapsed 0.020052000 seconds user 0.012031000 seconds sys ```