Try   HackMD

2024q1 Homework2 (quiz1+2)

contributed by < johnny69527 >

quiz1 - 測驗 1

實作中利用 beginend 陣列模擬 recursive 的堆疊,並以排序串列長度 2 倍為堆疊的大小。
當測試資料量達到 1,000,000 時,因為 beginend 陣列配置記憶體太大,發生 segmentation fault。

把 non recursive quick sort 整合到 lab0-c

lab0-c 中加入此 quick sort,使用 Linux 核心風格的 List API 改寫上述程式,並使用鏈結串列模擬堆疊,排除 segmentation fault。此變更使 quick sort 不是 in-place algorithms:

typedef struct {
    struct list_head value;
    struct list_head list;
} stack_t;

stack_t *stack_push(struct list_head *head)
{
    stack_t *stk = malloc(sizeof(stack_t));
    if (!stk)
        return NULL;

    INIT_LIST_HEAD(&(stk->value));

    list_add_tail(&(stk->list), head);
    return stk;
}

stack_t *stack_pop(struct list_head *head)
{
    if (!head || list_empty(head))
        return NULL;

    /* cppcheck-suppress nullPointer */
    stack_t *stk = list_last_entry(head, stack_t, list);

    list_del(&stk->list);

    return stk;
}

加入 quick sort 實作程式碼

quick_sort.hquick_sort.c

加入 quick_sort 命令

修改 qtest.c

diff --git a/qtest.c b/qtest.c
index 095239d..41fcd33 100644
--- a/qtest.c
+++ b/qtest.c
@@ -44,6 +44,7 @@ extern int show_entropy;
  */
 #include "list_sort.h"
 #include "queue.h"
+#include "quick_sort.h"
 
 #include "console.h"
 #include "report.h"
@@ -616,12 +617,14 @@ bool _do_sort(int argc, char *argv[], int mode)
         report(3, "Warning: Calling sort on single node");
     error_check();
 
-    set_noallocate_mode(true);
+    set_noallocate_mode(false);
     if (current && exception_setup(true)) {
         if (mode == 0)
             q_sort(current->q, descend);
         else if (mode == 1)
             list_sort(NULL, current->q, descend ? cmp_descend : cmp);
+        else if (mode == 2)
+            quick_sort(current->q, descend);
     }
 
     exception_cancel();
@@ -663,6 +666,11 @@ bool do_linux_sort(int argc, char *argv[])
     return _do_sort(argc, argv, 1);
 }
 
+bool do_quick_sort(int argc, char *argv[])
+{
+    return _do_sort(argc, argv, 2);
+}
+
 static bool do_dm(int argc, char *argv[])
 {
     if (argc != 1) {
@@ -1077,6 +1085,7 @@ static void console_init()
     ADD_COMMAND(reverse, "Reverse queue", "");
     ADD_COMMAND(sort, "Sort queue in ascending/descening order", "");
     ADD_COMMAND(linux_sort, "Sort queue with linux lib/list_sort.c", "");
+    ADD_COMMAND(quick_sort, "Sort queue with Non recursive quick sort", "");
     ADD_COMMAND(size, "Compute queue size n times (default: n == 1)", "[n]");
     ADD_COMMAND(show, "Show queue contents", "");
     ADD_COMMAND(dm, "Delete middle node in queue", "");

效能比較

建立亂序測試資料的效能數據 script(真實世界中罕見的情境)和在 Makefile 加入測試命令:

diff --git a/Makefile b/Makefile
index 1fe3fd0..ecda85e 100644
--- a/Makefile
+++ b/Makefile
@@ -40,7 +40,7 @@ $(GIT_HOOKS):
 OBJS := qtest.o report.o console.o harness.o queue.o \
         random.o dudect/constant.o dudect/fixture.o dudect/ttest.o \
         shannon_entropy.o \
-        linenoise.o web.o list_sort.o
+        linenoise.o web.o list_sort.o quick_sort.o
 
 deps := $(OBJS:%.o=.%.o.d)
 
@@ -83,19 +83,30 @@ clean:
 distclean: clean
 	rm -f .cmd_history
 
-sort_perf: 
+linux_sort.dat: list_sort.o
 	./qtest -v 2 -f traces/trace-perf-linux-sort.cmd -l linux_sort.dat
 	cat linux_sort.dat | grep -v 'Delta time =' | sed 's/# //g' > linux_sort.dat01
 	cat linux_sort.dat | grep 'Delta time =' | sed 's/Delta time = //g' > linux_sort.dat02
 	pr -m -T linux_sort.dat01 linux_sort.dat02 > linux_sort.dat
 	rm linux_sort.dat01 linux_sort.dat02
+
+merge_sort.dat: queue.o
 	./qtest -v 2 -f traces/trace-perf-merge-sort.cmd -l merge_sort.dat
 	cat merge_sort.dat | grep -v 'Delta time =' | sed 's/# //g' > merge_sort.dat01
 	cat merge_sort.dat | grep 'Delta time =' | sed 's/Delta time = //g' > merge_sort.dat02
 	pr -m -T merge_sort.dat01 merge_sort.dat02 > merge_sort.dat
 	rm merge_sort.dat01 merge_sort.dat02
+
+quick_sort.dat: quick_sort.o
+	./qtest -v 2 -f traces/trace-perf-quick-sort.cmd -l quick_sort.dat
+	cat quick_sort.dat | grep -v 'Delta time =' | sed 's/# //g' > quick_sort.dat01
+	cat quick_sort.dat | grep 'Delta time =' | sed 's/Delta time = //g' > quick_sort.dat02
+	pr -m -T quick_sort.dat01 quick_sort.dat02 > quick_sort.dat
+	rm quick_sort.dat01 quick_sort.dat02
+
+sort_perf: linux_sort.dat merge_sort.dat quick_sort.dat
 	gnuplot scripts/sort_perf.gp
-	rm linux_sort.dat merge_sort.dat
+	# rm linux_sort.dat merge_sort.dat quick_sort.dat
 	xdg-open sort_perf.png
 
 -include $(deps)

修改 gunplot script - sort_perf.gp,加入 quick sort 效能數據:

diff --git a/scripts/sort_perf.gp b/scripts/sort_perf.gp
index 071cf0d..db8899c 100644
--- a/scripts/sort_perf.gp
+++ b/scripts/sort_perf.gp
@@ -3,5 +3,5 @@ set xlabel 'size of list'
 set title 'performance comparison'
 set term png enhanced font 'Verdana,10'
 set output 'sort_perf.png'
-plot "linux_sort.dat" title "linux list sort" w lp, "merge_sort.dat" title "merge sort" w lp
+plot "linux_sort.dat" title "linux list sort" w lp, "merge_sort.dat" title "merge sort" w lp, "quick_sort.dat" title "quick sort" w lp

non recursive quick sort 和我實作的 merge sort 及 liunx list sort 進行比較。

Image Not Showing Possible Reasons
  • The image was uploaded to a note which you don't have access to
  • The note which the image was originally uploaded to has been deleted
Learn More →

quiz1 - 測驗 2

把 Tim sort 整合到 lab0-c

加入 Tim sort 實作程式碼

sort_impl.htimsort.c

加入 tim_sort 命令

修改 qtest.c

diff --git a/qtest.c b/qtest.c
index 41fcd33..4c151eb 100644
--- a/qtest.c
+++ b/qtest.c
@@ -45,6 +45,7 @@ extern int show_entropy;
 #include "list_sort.h"
 #include "queue.h"
 #include "quick_sort.h"
+#include "sort_impl.h"
 
 #include "console.h"
 #include "report.h"
@@ -625,6 +626,8 @@ bool _do_sort(int argc, char *argv[], int mode)
             list_sort(NULL, current->q, descend ? cmp_descend : cmp);
         else if (mode == 2)
             quick_sort(current->q, descend);
+        else if (mode == 3)
+            timsort(NULL, current->q, descend ? cmp_descend : cmp);
     }
 
     exception_cancel();
@@ -671,6 +674,11 @@ bool do_quick_sort(int argc, char *argv[])
     return _do_sort(argc, argv, 2);
 }
 
+bool do_tim_sort(int argc, char *argv[])
+{
+    return _do_sort(argc, argv, 3);
+}
+
 static bool do_dm(int argc, char *argv[])
 {
     if (argc != 1) {
@@ -1086,6 +1094,7 @@ static void console_init()
     ADD_COMMAND(sort, "Sort queue in ascending/descening order", "");
     ADD_COMMAND(linux_sort, "Sort queue with linux lib/list_sort.c", "");
     ADD_COMMAND(quick_sort, "Sort queue with Non recursive quick sort", "");
+    ADD_COMMAND(tim_sort, "Sort queue with tim sort", "");
     ADD_COMMAND(size, "Compute queue size n times (default: n == 1)", "[n]");
     ADD_COMMAND(show, "Show queue contents", "");
     ADD_COMMAND(dm, "Delete middle node in queue", "");

效能比較

建立亂序測試資料的效能數據 script(真實世界中罕見的情境)和在 Makefile 加入測試命令:

diff --git a/Makefile b/Makefile
index ecda85e..2d8881d 100644
--- a/Makefile
+++ b/Makefile
@@ -40,7 +40,7 @@ $(GIT_HOOKS):
 OBJS := qtest.o report.o console.o harness.o queue.o \
         random.o dudect/constant.o dudect/fixture.o dudect/ttest.o \
         shannon_entropy.o \
-        linenoise.o web.o list_sort.o quick_sort.o
+        linenoise.o web.o list_sort.o quick_sort.o timsort.o
 
 deps := $(OBJS:%.o=.%.o.d)
 
@@ -104,7 +104,14 @@ quick_sort.dat: quick_sort.o
 	pr -m -T quick_sort.dat01 quick_sort.dat02 > quick_sort.dat
 	rm quick_sort.dat01 quick_sort.dat02
 
-sort_perf: linux_sort.dat merge_sort.dat quick_sort.dat
+tim_sort.dat: timsort.o
+	./qtest -v 2 -f traces/trace-perf-tim-sort.cmd -l tim_sort.dat
+	cat tim_sort.dat | grep -v 'Delta time =' | sed 's/# //g' > tim_sort.dat01
+	cat tim_sort.dat | grep 'Delta time =' | sed 's/Delta time = //g' > tim_sort.dat02
+	pr -m -T tim_sort.dat01 tim_sort.dat02 > tim_sort.dat
+	rm tim_sort.dat01 tim_sort.dat02
+	
+sort_perf: linux_sort.dat merge_sort.dat quick_sort.dat tim_sort.dat
 	gnuplot scripts/sort_perf.gp
 	# rm linux_sort.dat merge_sort.dat quick_sort.dat
 	xdg-open sort_perf.png

修改 gunplot script - sort_perf.gp,加入 Tim sort 效能數據:

diff --git a/scripts/sort_perf.gp b/scripts/sort_perf.gp
index db8899c..780f16c 100644
--- a/scripts/sort_perf.gp
+++ b/scripts/sort_perf.gp
@@ -3,5 +3,5 @@ set xlabel 'size of list'
 set title 'performance comparison'
 set term png enhanced font 'Verdana,10'
 set output 'sort_perf.png'
-plot "linux_sort.dat" title "linux list sort" w lp, "merge_sort.dat" title "merge sort" w lp, "quick_sort.dat" title "quick sort" w lp
+plot "linux_sort.dat" title "linux list sort" w lp, "merge_sort.dat" title "merge sort" w lp, "quick_sort.dat" title "quick sort" w lp, "tim_sort.dat" title "tim sort" w lp

Tim sort 在亂序測試資料的情境下,當測試資料量達到 7,000,000 以上時,效能比 linux list sort 還好:

sort_perf

quick sort worst case 效能比較

分別建立排序已排序資料的 script: trace-linux-sort-qwc.cmd, trace-merge-sort-qwc.cmd, trace-quick-sort-qwc.cmdtrace-tim-sort-qwc.cmd

在 Makefile 加入測試命令:

diff --git a/Makefile b/Makefile
index 2d8881d..a0e0a25 100644
--- a/Makefile
+++ b/Makefile
@@ -116,4 +116,37 @@ sort_perf: linux_sort.dat merge_sort.dat quick_sort.dat tim_sort.dat
 	# rm linux_sort.dat merge_sort.dat quick_sort.dat
 	xdg-open sort_perf.png
 
+linux_sort_qwc.dat: list_sort.o
+	./qtest -v 2 -f traces/trace-perf-linux-sort-qwc.cmd -l linux_sort_qwc.dat
+	cat linux_sort_qwc.dat | grep -v 'Delta time =' | sed 's/# //g' > linux_sort_qwc.dat01
+	cat linux_sort_qwc.dat | grep 'Delta time =' | sed 's/Delta time = //g' > linux_sort_qwc.dat02
+	pr -m -T linux_sort_qwc.dat01 linux_sort_qwc.dat02 > linux_sort_qwc.dat
+	rm linux_sort_qwc.dat01 linux_sort_qwc.dat02
+
+merge_sort_qwc.dat: queue.o
+	./qtest -v 2 -f traces/trace-perf-merge-sort-qwc.cmd -l merge_sort_qwc.dat
+	cat merge_sort_qwc.dat | grep -v 'Delta time =' | sed 's/# //g' > merge_sort_qwc.dat01
+	cat merge_sort_qwc.dat | grep 'Delta time =' | sed 's/Delta time = //g' > merge_sort_qwc.dat02
+	pr -m -T merge_sort_qwc.dat01 merge_sort_qwc.dat02 > merge_sort_qwc.dat
+	rm merge_sort_qwc.dat01 merge_sort_qwc.dat02
+
+quick_sort_qwc.dat: quick_sort.o
+	./qtest -v 2 -f traces/trace-perf-quick-sort-qwc.cmd -l quick_sort_qwc.dat
+	cat quick_sort_qwc.dat | grep -v 'Delta time =' | sed 's/# //g' > quick_sort_qwc.dat01
+	cat quick_sort_qwc.dat | grep 'Delta time =' | sed 's/Delta time = //g' > quick_sort_qwc.dat02
+	pr -m -T quick_sort_qwc.dat01 quick_sort_qwc.dat02 > quick_sort_qwc.dat
+	rm quick_sort_qwc.dat01 quick_sort_qwc.dat02
+
+tim_sort_qwc.dat: timsort.o
+	./qtest -v 2 -f traces/trace-perf-tim-sort-qwc.cmd -l tim_sort_qwc.dat
+	cat tim_sort_qwc.dat | grep -v 'Delta time =' | sed 's/# //g' > tim_sort_qwc.dat01
+	cat tim_sort_qwc.dat | grep 'Delta time =' | sed 's/Delta time = //g' > tim_sort_qwc.dat02
+	pr -m -T tim_sort_qwc.dat01 tim_sort_qwc.dat02 > tim_sort_qwc.dat
+	rm tim_sort_qwc.dat01 tim_sort_qwc.dat02
+	
+sort_perf_qwc: linux_sort_qwc.dat merge_sort_qwc.dat quick_sort_qwc.dat tim_sort_qwc.dat
+	gnuplot scripts/sort_perf_qwc.gp
+	# rm linux_sort.dat merge_sort.dat quick_sort.dat
+	xdg-open sort_perf_qwc.png
+
 -include $(deps)

建立 gunplot script - sort_perf_qwc.gp

排序已排序資料的效能比較顯示,Tim sort 效能最好。

sort_perf_qwc

在排序已排序資料的效能比較中,merge sort 比 linux list sort 好,是因在 circular doubly-linked list 可以

O(1) 找到 last node,在 merge sort 中 merge 時可以檢查是不是存在其中一 list 的 last node(最大值)小於另一 list 的 first node(最小值),存在的話便可以直接接上兩個 list,而不用遍歷兩個 list 的 node:

    // merge
    if (!list_empty(&left) && !list_empty(&right)) {
        // fast case 1
        element_t *l_f = list_first_entry(&left, element_t, list);
        element_t *r_l = list_last_entry(&right, element_t, list);
        if (valuecmp(r_l->value, l_f->value, descend) <= 0) {
            list_splice_tail(&left, &right);
            list_splice(&right, head);
            return;
        }

        // fast case 2
        element_t *l_l = list_last_entry(&left, element_t, list);
        element_t *r_f = list_first_entry(&right, element_t, list);
        if (valuecmp(l_l->value, r_f->value, descend) <= 0) {
            list_splice_tail(&right, &left);
            list_splice(&left, head);
            return;
        }
    }

若拿掉上述程式碼,merge sort 的效能便不及 linux list sort:

sort_perf_qwc

quiz2 - 測驗 1

實作中利用 hash table 來記錄 inorder 的 value 的 index。然後,在用 preorder 建樹時,快速找到 root(perorder 的第一個 value)在 inorder 中的 index,從而得知左右子樹大小,便可以在 preorder 裡分割出左右子樹。 接著,左右子樹 recursively 建樹。

測試程式碼:

int tree_height(struct TreeNode *tree, int height) {

    if (tree == NULL)
        return height;

    int left_height = tree_height(tree->left, height + 1);
    int right_height = tree_height(tree->right, height + 1);

    return left_height > right_height ? left_height : right_height;
}

void tree_print(struct TreeNode *tree, struct TreeNode *output[], int index) {

    if (tree == NULL)
        return;

    output[index] = tree;

    tree_print(tree->left, output, (index << 1) + 1);
    tree_print(tree->right, output, (index << 1) + 2);
}

int main() {
    int preorder[] = {3, 9, 20, 15, 7};
    int inorder[] = {9, 3, 15, 20, 7};
    /* int preorder[] = {-1}; */
    /* int inorder[] = {-1}; */

    int size = sizeof(preorder) / sizeof(int);

    if (!size) {
        printf("[]\n");
        return 0;
    }

    struct TreeNode *tree = buildTree(preorder, size, inorder, size);

    int height = tree_height(tree, 0);

    int output_size = (1 << height) - 1;

    struct TreeNode *output[output_size];

    int i;

    for (i = 0; i < output_size; i++)
        output[i] = NULL;

    tree_print(tree, output, 0);

    printf("[");

    for (i = 0; i < output_size - 1; i++) {
        if (output[i] == NULL)
            printf("null, ");
        else {
            printf("%d, ", output[i]->val);
        }
    }
    
    if (output[i] == NULL)
        printf("null]\n");
    else {
        printf("%d]\n", output[i]->val);
    }

    return 0;
}

quiz2 - 測驗 2

實作用 hash table 來存取資料,用 linked list 來記錄 Least Recently Used。所以資料結構中包含了 struct hlist_nodestruct list_head

typedef struct {
    int key;
    int value;
    struct hlist_node node;
    struct list_head link;
} LRUNode;

當資料從 hash table 取出時,便會把資料移到 linked list 的最前面,代表 Least Recently Used。

當新資料要存到 hash table 時,需要檢查容量是否已滿了,如果是的話,便要把 linked list 最後面的資料,用新資料取代,並且把它移到 linked list 的最前面,代表 Least Recently Used;否則,配置新的資料儲存空間,放到 hash table 和加到 linked list 的最前面。

quiz2 - 測驗 3