# 2024q1 Homework2 (quiz1+2) contributed by < `johnny69527` > ## [quiz1 - 測驗 1](https://hackmd.io/@sysprog/linux2024-quiz1#%E6%B8%AC%E9%A9%97-1) 實作中利用 `begin` 與 `end` 陣列模擬 recursive 的堆疊,並以排序串列長度 2 倍為堆疊的大小。 當測試資料量達到 1,000,000 時,因為 `begin` 與 `end` 陣列配置記憶體太大,發生 segmentation fault。 ### 把 non recursive quick sort 整合到 lab0-c 在 `lab0-c` 中加入此 quick sort,使用 Linux 核心風格的 List API 改寫上述程式,並使用鏈結串列模擬堆疊,排除 segmentation fault。此變更使 quick sort 不是 in-place algorithms: ```clike typedef struct { struct list_head value; struct list_head list; } stack_t; stack_t *stack_push(struct list_head *head) { stack_t *stk = malloc(sizeof(stack_t)); if (!stk) return NULL; INIT_LIST_HEAD(&(stk->value)); list_add_tail(&(stk->list), head); return stk; } stack_t *stack_pop(struct list_head *head) { if (!head || list_empty(head)) return NULL; /* cppcheck-suppress nullPointer */ stack_t *stk = list_last_entry(head, stack_t, list); list_del(&stk->list); return stk; } ``` #### 加入 quick sort 實作程式碼 [`quick_sort.h`](https://github.com/johnny69527/lab0-c/blob/master/quick_sort.h) 和 [`quick_sort.c`](https://github.com/johnny69527/lab0-c/blob/master/quick_sort.c) #### 加入 quick_sort 命令 修改 `qtest.c`: ```c diff --git a/qtest.c b/qtest.c index 095239d..41fcd33 100644 --- a/qtest.c +++ b/qtest.c @@ -44,6 +44,7 @@ extern int show_entropy; */ #include "list_sort.h" #include "queue.h" +#include "quick_sort.h" #include "console.h" #include "report.h" @@ -616,12 +617,14 @@ bool _do_sort(int argc, char *argv[], int mode) report(3, "Warning: Calling sort on single node"); error_check(); - set_noallocate_mode(true); + set_noallocate_mode(false); if (current && exception_setup(true)) { if (mode == 0) q_sort(current->q, descend); else if (mode == 1) list_sort(NULL, current->q, descend ? cmp_descend : cmp); + else if (mode == 2) + quick_sort(current->q, descend); } exception_cancel(); @@ -663,6 +666,11 @@ bool do_linux_sort(int argc, char *argv[]) return _do_sort(argc, argv, 1); } +bool do_quick_sort(int argc, char *argv[]) +{ + return _do_sort(argc, argv, 2); +} + static bool do_dm(int argc, char *argv[]) { if (argc != 1) { @@ -1077,6 +1085,7 @@ static void console_init() ADD_COMMAND(reverse, "Reverse queue", ""); ADD_COMMAND(sort, "Sort queue in ascending/descening order", ""); ADD_COMMAND(linux_sort, "Sort queue with linux lib/list_sort.c", ""); + ADD_COMMAND(quick_sort, "Sort queue with Non recursive quick sort", ""); ADD_COMMAND(size, "Compute queue size n times (default: n == 1)", "[n]"); ADD_COMMAND(show, "Show queue contents", ""); ADD_COMMAND(dm, "Delete middle node in queue", ""); ``` #### 效能比較 建立[亂序測試資料的效能數據 script](https://github.com/johnny69527/lab0-c/blob/master/traces/trace-perf-quick-sort.cmd)(真實世界中罕見的情境)和在 Makefile 加入測試命令: ```bash diff --git a/Makefile b/Makefile index 1fe3fd0..ecda85e 100644 --- a/Makefile +++ b/Makefile @@ -40,7 +40,7 @@ $(GIT_HOOKS): OBJS := qtest.o report.o console.o harness.o queue.o \ random.o dudect/constant.o dudect/fixture.o dudect/ttest.o \ shannon_entropy.o \ - linenoise.o web.o list_sort.o + linenoise.o web.o list_sort.o quick_sort.o deps := $(OBJS:%.o=.%.o.d) @@ -83,19 +83,30 @@ clean: distclean: clean rm -f .cmd_history -sort_perf: +linux_sort.dat: list_sort.o ./qtest -v 2 -f traces/trace-perf-linux-sort.cmd -l linux_sort.dat cat linux_sort.dat | grep -v 'Delta time =' | sed 's/# //g' > linux_sort.dat01 cat linux_sort.dat | grep 'Delta time =' | sed 's/Delta time = //g' > linux_sort.dat02 pr -m -T linux_sort.dat01 linux_sort.dat02 > linux_sort.dat rm linux_sort.dat01 linux_sort.dat02 + +merge_sort.dat: queue.o ./qtest -v 2 -f traces/trace-perf-merge-sort.cmd -l merge_sort.dat cat merge_sort.dat | grep -v 'Delta time =' | sed 's/# //g' > merge_sort.dat01 cat merge_sort.dat | grep 'Delta time =' | sed 's/Delta time = //g' > merge_sort.dat02 pr -m -T merge_sort.dat01 merge_sort.dat02 > merge_sort.dat rm merge_sort.dat01 merge_sort.dat02 + +quick_sort.dat: quick_sort.o + ./qtest -v 2 -f traces/trace-perf-quick-sort.cmd -l quick_sort.dat + cat quick_sort.dat | grep -v 'Delta time =' | sed 's/# //g' > quick_sort.dat01 + cat quick_sort.dat | grep 'Delta time =' | sed 's/Delta time = //g' > quick_sort.dat02 + pr -m -T quick_sort.dat01 quick_sort.dat02 > quick_sort.dat + rm quick_sort.dat01 quick_sort.dat02 + +sort_perf: linux_sort.dat merge_sort.dat quick_sort.dat gnuplot scripts/sort_perf.gp - rm linux_sort.dat merge_sort.dat + # rm linux_sort.dat merge_sort.dat quick_sort.dat xdg-open sort_perf.png -include $(deps) ``` 修改 gunplot script - `sort_perf.gp`,加入 quick sort 效能數據: ``` diff --git a/scripts/sort_perf.gp b/scripts/sort_perf.gp index 071cf0d..db8899c 100644 --- a/scripts/sort_perf.gp +++ b/scripts/sort_perf.gp @@ -3,5 +3,5 @@ set xlabel 'size of list' set title 'performance comparison' set term png enhanced font 'Verdana,10' set output 'sort_perf.png' -plot "linux_sort.dat" title "linux list sort" w lp, "merge_sort.dat" title "merge sort" w lp +plot "linux_sort.dat" title "linux list sort" w lp, "merge_sort.dat" title "merge sort" w lp, "quick_sort.dat" title "quick sort" w lp ``` non recursive quick sort 和我實作的 merge sort 及 liunx list sort 進行比較。 ![sort_perf](https://hackmd.io/_uploads/By1zkA9Ta.png) ## [quiz1 - 測驗 2](https://hackmd.io/@sysprog/linux2024-quiz1#%E6%B8%AC%E9%A9%97-2) ### 把 Tim sort 整合到 lab0-c #### 加入 Tim sort 實作程式碼 [`sort_impl.h`](https://github.com/johnny69527/lab0-c/blob/master/sort_impl.h) 和 [`timsort.c`](https://github.com/johnny69527/lab0-c/blob/master/timsort.c) #### 加入 tim_sort 命令 修改 `qtest.c`: ```c diff --git a/qtest.c b/qtest.c index 41fcd33..4c151eb 100644 --- a/qtest.c +++ b/qtest.c @@ -45,6 +45,7 @@ extern int show_entropy; #include "list_sort.h" #include "queue.h" #include "quick_sort.h" +#include "sort_impl.h" #include "console.h" #include "report.h" @@ -625,6 +626,8 @@ bool _do_sort(int argc, char *argv[], int mode) list_sort(NULL, current->q, descend ? cmp_descend : cmp); else if (mode == 2) quick_sort(current->q, descend); + else if (mode == 3) + timsort(NULL, current->q, descend ? cmp_descend : cmp); } exception_cancel(); @@ -671,6 +674,11 @@ bool do_quick_sort(int argc, char *argv[]) return _do_sort(argc, argv, 2); } +bool do_tim_sort(int argc, char *argv[]) +{ + return _do_sort(argc, argv, 3); +} + static bool do_dm(int argc, char *argv[]) { if (argc != 1) { @@ -1086,6 +1094,7 @@ static void console_init() ADD_COMMAND(sort, "Sort queue in ascending/descening order", ""); ADD_COMMAND(linux_sort, "Sort queue with linux lib/list_sort.c", ""); ADD_COMMAND(quick_sort, "Sort queue with Non recursive quick sort", ""); + ADD_COMMAND(tim_sort, "Sort queue with tim sort", ""); ADD_COMMAND(size, "Compute queue size n times (default: n == 1)", "[n]"); ADD_COMMAND(show, "Show queue contents", ""); ADD_COMMAND(dm, "Delete middle node in queue", ""); ``` #### 效能比較 建立[亂序測試資料的效能數據 script](https://github.com/johnny69527/lab0-c/blob/master/traces/trace-perf-tim-sort.cmd)(真實世界中罕見的情境)和在 Makefile 加入測試命令: ```bash diff --git a/Makefile b/Makefile index ecda85e..2d8881d 100644 --- a/Makefile +++ b/Makefile @@ -40,7 +40,7 @@ $(GIT_HOOKS): OBJS := qtest.o report.o console.o harness.o queue.o \ random.o dudect/constant.o dudect/fixture.o dudect/ttest.o \ shannon_entropy.o \ - linenoise.o web.o list_sort.o quick_sort.o + linenoise.o web.o list_sort.o quick_sort.o timsort.o deps := $(OBJS:%.o=.%.o.d) @@ -104,7 +104,14 @@ quick_sort.dat: quick_sort.o pr -m -T quick_sort.dat01 quick_sort.dat02 > quick_sort.dat rm quick_sort.dat01 quick_sort.dat02 -sort_perf: linux_sort.dat merge_sort.dat quick_sort.dat +tim_sort.dat: timsort.o + ./qtest -v 2 -f traces/trace-perf-tim-sort.cmd -l tim_sort.dat + cat tim_sort.dat | grep -v 'Delta time =' | sed 's/# //g' > tim_sort.dat01 + cat tim_sort.dat | grep 'Delta time =' | sed 's/Delta time = //g' > tim_sort.dat02 + pr -m -T tim_sort.dat01 tim_sort.dat02 > tim_sort.dat + rm tim_sort.dat01 tim_sort.dat02 + +sort_perf: linux_sort.dat merge_sort.dat quick_sort.dat tim_sort.dat gnuplot scripts/sort_perf.gp # rm linux_sort.dat merge_sort.dat quick_sort.dat xdg-open sort_perf.png ``` 修改 gunplot script - `sort_perf.gp`,加入 Tim sort 效能數據: ```bash diff --git a/scripts/sort_perf.gp b/scripts/sort_perf.gp index db8899c..780f16c 100644 --- a/scripts/sort_perf.gp +++ b/scripts/sort_perf.gp @@ -3,5 +3,5 @@ set xlabel 'size of list' set title 'performance comparison' set term png enhanced font 'Verdana,10' set output 'sort_perf.png' -plot "linux_sort.dat" title "linux list sort" w lp, "merge_sort.dat" title "merge sort" w lp, "quick_sort.dat" title "quick sort" w lp +plot "linux_sort.dat" title "linux list sort" w lp, "merge_sort.dat" title "merge sort" w lp, "quick_sort.dat" title "quick sort" w lp, "tim_sort.dat" title "tim sort" w lp ``` Tim sort 在亂序測試資料的情境下,當測試資料量達到 7,000,000 以上時,效能比 linux list sort 還好: ![sort_perf](https://hackmd.io/_uploads/HkZMqbAa6.png) #### quick sort worst case 效能比較 分別建立排序已排序資料的 script: [trace-linux-sort-qwc.cmd](https://github.com/johnny69527/lab0-c/blob/master/traces/trace-perf-linux-sort-qwc.cmd), [trace-merge-sort-qwc.cmd](https://github.com/johnny69527/lab0-c/blob/master/traces/trace-perf-merge-sort-qwc.cmd), [trace-quick-sort-qwc.cmd](https://github.com/johnny69527/lab0-c/blob/master/traces/trace-perf-quick-sort-qwc.cmd) 和 [trace-tim-sort-qwc.cmd](https://github.com/johnny69527/lab0-c/blob/master/traces/trace-perf-tim-sort-qwc.cmd)。 在 Makefile 加入測試命令: ```bash diff --git a/Makefile b/Makefile index 2d8881d..a0e0a25 100644 --- a/Makefile +++ b/Makefile @@ -116,4 +116,37 @@ sort_perf: linux_sort.dat merge_sort.dat quick_sort.dat tim_sort.dat # rm linux_sort.dat merge_sort.dat quick_sort.dat xdg-open sort_perf.png +linux_sort_qwc.dat: list_sort.o + ./qtest -v 2 -f traces/trace-perf-linux-sort-qwc.cmd -l linux_sort_qwc.dat + cat linux_sort_qwc.dat | grep -v 'Delta time =' | sed 's/# //g' > linux_sort_qwc.dat01 + cat linux_sort_qwc.dat | grep 'Delta time =' | sed 's/Delta time = //g' > linux_sort_qwc.dat02 + pr -m -T linux_sort_qwc.dat01 linux_sort_qwc.dat02 > linux_sort_qwc.dat + rm linux_sort_qwc.dat01 linux_sort_qwc.dat02 + +merge_sort_qwc.dat: queue.o + ./qtest -v 2 -f traces/trace-perf-merge-sort-qwc.cmd -l merge_sort_qwc.dat + cat merge_sort_qwc.dat | grep -v 'Delta time =' | sed 's/# //g' > merge_sort_qwc.dat01 + cat merge_sort_qwc.dat | grep 'Delta time =' | sed 's/Delta time = //g' > merge_sort_qwc.dat02 + pr -m -T merge_sort_qwc.dat01 merge_sort_qwc.dat02 > merge_sort_qwc.dat + rm merge_sort_qwc.dat01 merge_sort_qwc.dat02 + +quick_sort_qwc.dat: quick_sort.o + ./qtest -v 2 -f traces/trace-perf-quick-sort-qwc.cmd -l quick_sort_qwc.dat + cat quick_sort_qwc.dat | grep -v 'Delta time =' | sed 's/# //g' > quick_sort_qwc.dat01 + cat quick_sort_qwc.dat | grep 'Delta time =' | sed 's/Delta time = //g' > quick_sort_qwc.dat02 + pr -m -T quick_sort_qwc.dat01 quick_sort_qwc.dat02 > quick_sort_qwc.dat + rm quick_sort_qwc.dat01 quick_sort_qwc.dat02 + +tim_sort_qwc.dat: timsort.o + ./qtest -v 2 -f traces/trace-perf-tim-sort-qwc.cmd -l tim_sort_qwc.dat + cat tim_sort_qwc.dat | grep -v 'Delta time =' | sed 's/# //g' > tim_sort_qwc.dat01 + cat tim_sort_qwc.dat | grep 'Delta time =' | sed 's/Delta time = //g' > tim_sort_qwc.dat02 + pr -m -T tim_sort_qwc.dat01 tim_sort_qwc.dat02 > tim_sort_qwc.dat + rm tim_sort_qwc.dat01 tim_sort_qwc.dat02 + +sort_perf_qwc: linux_sort_qwc.dat merge_sort_qwc.dat quick_sort_qwc.dat tim_sort_qwc.dat + gnuplot scripts/sort_perf_qwc.gp + # rm linux_sort.dat merge_sort.dat quick_sort.dat + xdg-open sort_perf_qwc.png + -include $(deps) ``` 建立 gunplot script - [`sort_perf_qwc.gp`](https://github.com/johnny69527/lab0-c/blob/master/scripts/sort_perf_qwc.gp)。 排序已排序資料的效能比較顯示,Tim sort 效能最好。 ![sort_perf_qwc](https://hackmd.io/_uploads/H16QDGR6a.png) 在排序已排序資料的效能比較中,merge sort 比 linux list sort 好,是因在 circular doubly-linked list 可以 $O(1)$ 找到 last node,在 merge sort 中 merge 時可以檢查是不是存在其中一 list 的 last node(最大值)小於另一 list 的 first node(最小值),存在的話便可以直接接上兩個 list,而不用遍歷兩個 list 的 node: ```c // merge if (!list_empty(&left) && !list_empty(&right)) { // fast case 1 element_t *l_f = list_first_entry(&left, element_t, list); element_t *r_l = list_last_entry(&right, element_t, list); if (valuecmp(r_l->value, l_f->value, descend) <= 0) { list_splice_tail(&left, &right); list_splice(&right, head); return; } // fast case 2 element_t *l_l = list_last_entry(&left, element_t, list); element_t *r_f = list_first_entry(&right, element_t, list); if (valuecmp(l_l->value, r_f->value, descend) <= 0) { list_splice_tail(&right, &left); list_splice(&left, head); return; } } ``` 若拿掉上述程式碼,merge sort 的效能便不及 linux list sort: ![sort_perf_qwc](https://hackmd.io/_uploads/Sk6ty8pAp.png) ## [quiz2 - 測驗 1](https://hackmd.io/@sysprog/linux2024-quiz2#%E6%B8%AC%E9%A9%97-1) 實作中利用 hash table 來記錄 inorder 的 value 的 index。然後,在用 preorder 建樹時,快速找到 root(perorder 的第一個 value)在 inorder 中的 index,從而得知左右子樹大小,便可以在 preorder 裡分割出左右子樹。 接著,左右子樹 recursively 建樹。 測試程式碼: ```c int tree_height(struct TreeNode *tree, int height) { if (tree == NULL) return height; int left_height = tree_height(tree->left, height + 1); int right_height = tree_height(tree->right, height + 1); return left_height > right_height ? left_height : right_height; } void tree_print(struct TreeNode *tree, struct TreeNode *output[], int index) { if (tree == NULL) return; output[index] = tree; tree_print(tree->left, output, (index << 1) + 1); tree_print(tree->right, output, (index << 1) + 2); } int main() { int preorder[] = {3, 9, 20, 15, 7}; int inorder[] = {9, 3, 15, 20, 7}; /* int preorder[] = {-1}; */ /* int inorder[] = {-1}; */ int size = sizeof(preorder) / sizeof(int); if (!size) { printf("[]\n"); return 0; } struct TreeNode *tree = buildTree(preorder, size, inorder, size); int height = tree_height(tree, 0); int output_size = (1 << height) - 1; struct TreeNode *output[output_size]; int i; for (i = 0; i < output_size; i++) output[i] = NULL; tree_print(tree, output, 0); printf("["); for (i = 0; i < output_size - 1; i++) { if (output[i] == NULL) printf("null, "); else { printf("%d, ", output[i]->val); } } if (output[i] == NULL) printf("null]\n"); else { printf("%d]\n", output[i]->val); } return 0; } ``` ## [quiz2 - 測驗 2](https://hackmd.io/@sysprog/linux2024-quiz2#%E6%B8%AC%E9%A9%97-2) 實作用 hash table 來存取資料,用 linked list 來記錄 Least Recently Used。所以資料結構中包含了 `struct hlist_node` 和 `struct list_head`: ```c typedef struct { int key; int value; struct hlist_node node; struct list_head link; } LRUNode; ``` 當資料從 hash table 取出時,便會把資料移到 linked list 的最前面,代表 Least Recently Used。 當新資料要存到 hash table 時,需要檢查容量是否已滿了,如果是的話,便要把 linked list 最後面的資料,用新資料取代,並且把它移到 linked list 的最前面,代表 Least Recently Used;否則,配置新的資料儲存空間,放到 hash table 和加到 linked list 的最前面。 ## [quiz2 - 測驗 3](https://hackmd.io/@sysprog/linux2024-quiz2#%E6%B8%AC%E9%A9%97-3)