2023q1 Homework1 (quiz1)

--- tags: linux kernel class --- # 2023q1 Homework1 (quiz1) contributed by < `Jerejere0808` > ### 測驗一 :::spoiler ```c static void list_sort(struct list_head *head) { if (list_empty(head) || list_is_singular(head)) return; struct list_head list_less, list_greater; INIT_LIST_HEAD(&list_less); INIT_LIST_HEAD(&list_greater); struct item *pivot = list_first_entry(head, AAA, BBB); list_del(&pivot->list); struct item *itm = NULL, *is = NULL; CCC (itm, is, head, list) { if (cmpint(&itm->i, &pivot->i) < 0) DDD (&itm->list, &list_less); else EEE (&itm->list, &list_greater); } list_sort(&list_less); list_sort(&list_greater); list_add(&pivot->list, head); list_splice(&list_less, head); FFF(&list_greater, head); } ``` :::danger 不用抄題目，善用超連結。 :notes: jserv ::: ::: 遞迴版的 quick sort 1. 拆掉 head 並把第一個 node 當作 pivot 2. 所有比 pivot 小的 node 擺放在list_less ，所有比 pivot 大的 node 擺放在list_greater，個別針對兩個新的 list 以第回方式持續分割直到剩下一個節點，如下圖可以再繼續分解 partirion1 和 partirion2 3. 若無法再分解就直接返回到上一層之後與另一半再次結合到原本的 head 上 ```graphviz digraph G { rankdir=LR; node[shape=record, height=.1]; // Linked list nodes node3[label="<f0>3"]; node4[label="<f0>4"]; node1[label="<f0>1"]; node9[label="<f0>9"]; node5[label="<f0>5"]; node8[label="<f0>8"]; node0[label="<f0>0"]; // Linked list edges node3 -> node4; node4 -> node1; node1 -> node9; node9 -> node5; node5 -> node8; node8 -> node0; // Quicksort partitions subgraph cluster_0 { label = "Partition 1"; rank=same; node_1[label="<f0>3"]; node_2[label="<f0>1"]; node_3[label="<f0>0"]; node_1 -> node_2; node_2 -> node_3; } subgraph cluster_1 { label = "Partition 2"; rank=same; node_4[label="<f0>4"]; node_5[label="<f0>9"]; node_6[label="<f0>5"]; node_7[label="<f0>8"]; node_4 -> node_5; node_5 -> node_6; node_6 -> node_7; } // Quicksort pivot subgraph cluster_2 { label = "Pivot"; pivot[label="<f0>3"]; } } ``` AAA = item BBB = list list_first_entry 以 struct member list 來定位並取首個節點的地址。 CCC = list_for_each_entry_safe 走訪每個 node 並與 pivot比較大小 DDD = list_move_tail 若 node 值小於 pivot，用 list_move_tail 將 node 從原佇列移動到 list_less 尾端 EEE = list_move_tail 若 node 值大於 pivot，用 list_move_tail 將 node 從原佇列移動到 list_greater 尾端 FFF = list_splice_tail 把 pivot 歸納到 list_less 針對 Quick sort 提出改進方案並實作 Balanced Quick Sort 改進 pivot 的選擇，採用的策略為取出佇列最前端與最後端以及中間的元素，並以中間值作為 pivot。這樣一來可以避免選擇到最大或最小 pivot 導致 $O(N^2^)$ 最壞狀況。 ```c element_t* middleOfThree(element_t *a, element_t *b, element_t *c) { // Checking for b if ((strcmp(a->value, b->value) <= 0 && strcmp(b->value, c->value) <= 0) || (strcmp(c->value, b->value) <= 0 && strcmp(b->value, a->value) <= 0)) return b; // Checking for a else if ((strcmp(a->value, b->value) <= 0 && strcmp(c->value, a->value) <= 0) || (strcmp(a->value, c->value) <= 0 && strcmp(b->value, a->value) <= 0)) return a; else return c; } void q_sort(struct list_head *head) { if (list_empty(head) || list_is_singular(head)) return; struct list_head list_less, list_greater; INIT_LIST_HEAD(&list_less); INIT_LIST_HEAD(&list_greater); element_t *left = list_first_entry(head, element_t, list); element_t *right = list_last_entry(head, element_t, list); element_t *middle; struct list_head *l = head->next; struct list_head *r = head->prev; while (1) { if (l == r) { middle = list_entry(l, element_t, list); break; } else if (l->next == r) { // remove later one middle = list_entry(l, element_t, list); break; } l = l->next; r = r->prev; } element_t *pivot = middleOfThree(left, right, middle); list_del(&pivot->list); element_t *itm = NULL, *is = NULL; list_for_each_entry_safe (itm, is, head, list) { if (strcmp(itm->value, pivot->value) < 0) list_move_tail (&itm->list, &list_less); else list_move_tail (&itm->list, &list_greater); } q_sort(&list_less); q_sort(&list_greater); list_add(&pivot->list, head); list_splice(&list_less, head); list_splice_tail(&list_greater, head); } ``` 引入 hybrid sort 策略 (實作 introsort) 這裡的 insort() 的作法有點像是半衰期的概念，每次 pivot 分割完後都把前半部(小的部分)給下一層insort()遞迴下去，並且depth_limit - 1再去處理後半部，這裡用array儲存每個 node，所以 middleOfThree 中，找中間節點就只需要0(1)的時間( (low+high)/2 )，只是資料量若太大就會空間不足。 ```c #define MAX_NUM 1000000 element_t* middleOfThree(element_t *a, element_t *b, element_t *c) { // Checking for b if ((strcmp(a->value, b->value) <= 0 && strcmp(b->value, c->value) <= 0) || (strcmp(c->value, b->value) <= 0 && strcmp(b->value, a->value) <= 0)) return b; // Checking for a else if ((strcmp(a->value, b->value) <= 0 && strcmp(c->value, a->value) <= 0) || (strcmp(a->value, c->value) <= 0 && strcmp(b->value, a->value) <= 0)) return a; else return c; } void heapify(element_t **array, int n, int i) { int largest = i; int left = 2 * i + 1; int right = 2 * i + 2; if (left < n && strcmp(array[left]->value, array[largest]->value) > 0) { largest = left; } if (right < n && strcmp(array[right]->value, array[largest]->value) > 0) { largest = right; } if (largest != i) { element_t *tmp = array[i]; array[i] = array[largest]; array[largest] = tmp; heapify(array, n, largest); } } void heapsort(element_t **array, int n) { for (int i = n / 2 - 1; i >= 0; i--) { heapify(array, n, i); } for (int i = n - 1; i >= 0; i--) { element_t *tmp = array[0]; array[0] = array[i]; array[i] = tmp; heapify(array, i, 0); } } int partition(element_t **array, int low, int high) { element_t *pivot = middleOfThree(array[low], array[(low+high)/2], array[high]); int i = low - 1; int j = high + 1; while (1) { do { i++; } while (strcmp(array[i]->value, pivot->value) < 0); do { j--; } while (strcmp(array[j]->value, pivot->value) > 0); if (i >= j) { return j; } element_t *tmp = array[i]; array[i] = array[j]; array[j] = tmp; } } void insertion_sort(element_t **array, int low, int high) { for (int i = low + 1; i <= high; i++) { element_t *key = array[i]; int j = i - 1; while (j >= low && strcmp(array[j]->value, key->value) > 0) { array[j+1] = array[j]; j--; } array[j+1] = key; } } void introsort(element_t **array, int low, int high, int depth_limit) { while (high - low > 16) { if (depth_limit == 0) { heapsort(array + low, high - low + 1); return; } depth_limit--; int p = partition(array, low, high); introsort(array, low, p, depth_limit); low = p + 1; } insertion_sort(array, low, high); } void q_sort(struct list_head *head) { if (list_empty(head) || list_is_singular(head)) return; int size = 0; element_t *it; list_for_each_entry (it, head, list) { size++; } element_t *array[MAX_NUM]; int i = 0; list_for_each_entry (it, head, list) { array[i++] = it; } introsort(array, 0, size-1, (int)log2(size) * 2); INIT_LIST_HEAD(head); for (i = 0; i < size; i++) { list_add_tail(&array[i]->list, head); } } ``` --- ### 測驗二迭代版的 quick :::spoiler ```c= #define MAX_DEPTH 512 static void list_sort_nr(struct list_head *head) { if (list_empty(head) || list_is_singular(head)) return; struct list_head stack[MAX_DEPTH]; for (int i = 0; i < MAX_DEPTH; i++) INIT_LIST_HEAD(&stack[i]); int top = -1; list_splice_init(head, &stack[++top]); struct list_head partition; INIT_LIST_HEAD(&partition); while (top >= 0) { INIT_LIST_HEAD(&partition); list_splice_init(&stack[top--], &partition); if (!list_empty(&partition) && !list_is_singular(&partition)) { struct list_head list_less, list_greater; INIT_LIST_HEAD(&list_less); INIT_LIST_HEAD(&list_greater); struct item *pivot = list_first_entry(&partition, struct item, list); list_del(&pivot->list); INIT_LIST_HEAD(&pivot->list); struct item *itm = NULL, *is = NULL; GGGG (itm, is, &partition, list) { list_del(&itm->list); if (cmpint(&itm->i, &pivot->i) < 0) list_move(&itm->list, &list_less); else list_move(&itm->list, &list_greater); } HHHH (&pivot->list, &list_less); if (!list_empty(&list_greater)) list_splice_tail(&list_greater, IIII); if (!list_empty(&list_less)) list_splice_tail(&list_less, JJJJ); } else { top++; list_splice_tail(&partition, &stack[top]); while (top >= 0 && list_is_singular(&stack[top])) { struct item *tmp = list_first_entry(&stack[top], struct item, list); list_del(&tmp->list); INIT_LIST_HEAD(KKKK); list_add_tail(&tmp->list, head); } } } } ``` ::: 用 stack 來取代遞迴版的 quick sort 分割動作，當還可以繼續分割 list 時，就選擇一個 pivot 並且將比 pivot 小的 node 串成 list_less，比 pivot 大的 node 串成 list_greater，並將後者放置到 stack 下一個位置，前者放置到stack 下下個位置，並移動到 stack 最頂層(也就是剛剛list_less放置的地方)並持續此動作來分割 list 直到剩一個節點無法分割，此時就模擬 stack pop 的動作，將最上層的 node 加入到 head 最後方，因為都是先把小的 node 往上堆，所以可以觀察到越小的 node 會越先被加到 head 。 GGGG = list_for_each_entry_safe 走訪每個 node 並與 pivot比較大小 HHHH = list_add_tail 把 pivot 接到 list_less 後面 (為什麼不是前面? 因為假設有 3 2 在 list， 3 為 pivot 分割後 list_less 有 2 ，若又把 3 放前面， list_less 變成 3 2 還是回到未分割狀態造成永遠無法分割。若放後面，2 3 下回合 3 就會被分到list_greater了) IIII = &stack[++top] 把list_greater 堆進 stack JJJJ = &stack[++top] 再把list_less 堆進 stack KKKK = &stack[top- -] 往下 pop stack 這個方法最大的缺點就是依賴 MAX_DEPTH 再資料量很大時可能導致記憶體空間分配不足的問題，至於要如何避免依賴 MAX_DEPTH 目前想不到方法，因為其為top down，若不用 recursion 就要使用額外記憶體來暫存某些狀態，所以 iteration quick sort 無法達到memory cost O(1) 。 :::info #更正後來參考到[***yanjiew1***](https://hackmd.io/@yanjiew/linux2023q1-quiz1)的作法，發現其實可以將原本雙向 linked list 的 prev pointer 拿來串接每個被分割後的 list 來當作一個 stack。如下圖list_less 透過 prev poniter 連接到 list_greater 。下階段把 list_less 分割後，把新產生的 list_greater 也是透過 prev list 跟原本的 list_greater 接起來，如此就能產生 stack 的效果，經過越多個 prev pointer 代表越接近 stack 底層，所以其實可以 memory cost O(1)。 ```graphviz digraph graphname{ compound=true rankdir=RL node[shape=box] D[label=2] F[label=3] B[label=5] C[label=7] E[label=4] { rank=same } subgraph { subgraph cluster_less { labeljust=l label="list_greater" E->C->B; } subgraph cluster_greater { labeljust=l label="list_less" F->D; F->E [label="prev"] [labeljust=r] // Arrow from F to E with "prev" label } E->D[style=invis] } } ``` ::: ### recursive quick sort v.s iterative quick sort 效能比較用 qtest time 做測試，每筆分別做三次 ***recursion sort*** * 10000 筆： * Delta time = 0.002 * Delta time = 0.008 * Delta time = 0.008 * 100000 筆： * Delta time = 0.082 * Delta time = 0.093 * Delta time = 0.081 * 500000 筆： * Delta time = 0.600 * Delta time = 0.581 * Delta time = 0.596 * 1000000 筆： * Delta time = 1.486 ***recursion balanced sort*** * 10000 筆： * Delta time = 0.003 * Delta time = 0.004 * Delta time = 0.005 * 100000 筆： * Delta time = 0.080 * Delta time = 0.081 * Delta time = 0.097 * 500000 筆： * Delta time = 0.657 * Delta time = 0.654 * Delta time = 0.647 * 1000000 筆： * Time limit exceeded ***iteration*** * 10000 筆： * Delta time = 0.003 * Delta time = 0.004 * Delta time = 0.004 * 100000 筆： * Delta time = 0.090 * Delta time = 0.093 * Delta time = 0.097 * 500000 筆： * Delta time = 0.755 * Delta time = 0.842 * Delta time = 0.781 * 1000000 筆： * Time limit exceeded 測出來的結果 balanced recursion quick sort 速度並沒有比 recursion quick sort 快，後來發現是因為沒有思考到選 pivot 的過程花0(N)的時間尋找中間節點，原版的找第一個節點只需O(1) 之後會再試試別的方法，像是找第二個節點取代找中間節點。但是令我訝異的是 iteration quick sort 並沒有比 recursion quick sort 快，不要說有時會比 recursion quick sort 慢，基本上大部分時候都是是三者最慢 = =，這點我還在尋找答案，目前<s>收尋到的答案</s>是因為用 stack 模擬 recursion 會需要反覆存取 stack 多次而增加額外的執行時間。 :::danger 不用花太多時間「搜尋」，用自身力量解決問題，不是等網際網路上面陌生人的施捨。 :notes: jserv ::: --- 測驗三 ***XOR linked list*** :::info ```c typedef struct _xorlist_node { struct _xorlist_node *cmp; } xor_node_t; ``` struct _xorlist_node 裡的 cmp 內容其實就是把兩邊的 node address 做 XOR 的結果，若某一邊沒有節點，其地址就視為 0。假設 linked list 為 “A - B - C” : 將 address 透過 XOR 達成 doubly linked list 之 prev & next 效果 B 的 cmp 跟 address of C 進行 XOR 可得到 address of A (B->prev = A) B 的 cmp 跟 address of A 進行 XOR 可得到 address of C (B->next = C) ::: LLLL = (uintptr_t)a ^ (uintptr_t)b MMMM = &list->tail 當 list 為空也就是 head 直接接 tail 的情況， real_next 就應為 address of tail PPPP = real_next->cmp 要先利用 XOR 的特性消除掉上一個 node 的 address 也就是 real_prev (因為跟上一個 node 之間已經有新的 node 插入了)

Syntax	Example	Reference
# Header	Header	基本排版
- Unordered List	Unordered List
1. Ordered List	Ordered List
- [ ] Todo List	Todo List
> Blockquote	Blockquote
Bold font	Bold font
Italics font	Italics font
~~Strikethrough~~	~~Strikethrough~~
19^th^	19^th
H~2~O	H₂O
++Inserted text++	Inserted text
==Marked text==	Marked text
[link text](https:// "title")	Link
![image alt](https:// "title")	Image
`Code`	`Code`	在筆記中貼入程式碼
```javascript var i = 0; ```	`var i = 0;`
:smile:		Emoji list
{%youtube youtube_id %}	Externals
$L^aT_eX$	L^aT_eX
:::info This is a alert area. :::	This is a alert area.