2023q1 Homework1 (lab0)

# 2023q1 Homework1 (lab0) contributed by < [`chunplusplus`](https://github.com/chunplusplus/lab0-c) > ## 開發環境 ```bash $ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 $ lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Address sizes: 39 bits physical, 48 bits virtual Byte Order: Little Endian CPU(s): 16 On-line CPU(s) list: 0-15 Vendor ID: GenuineIntel Model name: Intel(R) Core(TM) i7-10875H CPU @ 2.30GHz CPU family: 6 Model: 165 Thread(s) per core: 2 Core(s) per socket: 8 Socket(s): 1 Stepping: 2 CPU max MHz: 5100.0000 CPU min MHz: 800.0000 BogoMIPS: 4599.93 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_go od nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm 2 ssse3 sdbg fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsa ve avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi 2 erms invpcid mpx rdseed adx smap clflushopt intel_pt xsaveopt xsavec xgetbv1 xsaves dtherm ida arat pln pts hwp hwp_notify hwp_act_window hwp_epp pku ospke md_clear flush_l1d arch_capabilities Virtualization features: Virtualization: VT-x Caches (sum of all): L1d: 256 KiB (8 instances) L1i: 256 KiB (8 instances) L2: 2 MiB (8 instances) L3: 16 MiB (1 instance) NUMA: NUMA node(s): 1 NUMA node0 CPU(s): 0-15 ``` ## 實作 queue.c ### `q_delete_mid` 運用 circular doubly-linked list 的特性，分別建立向前遍歷和向後遍歷的 forward 和 backward 的 list_head 指標。當 forward 或 forward->next 和 barkward "相遇" 時，forward 便是中間節點。 ```c /* Delete the middle node in queue */ bool q_delete_mid(struct list_head *head) { // https://leetcode.com/problems/delete-the-middle-node-of-a-linked-list/ if (!head || list_empty(head)) return false; struct list_head *forward = head->next; struct list_head *backward = head->prev; while (forward != backward && forward->next != backward) { forward = forward->next; backward = backward->prev; } element_t *e = list_entry(forward, element_t, list); free(e->value); list_del(&e->list); free(e); return true; } ``` ### `q_delete_dup` 用變數 `e` 記錄 current node, 用 `next` 記錄 current node 之後的節點。遍歷每一個節點，每次遍歷都要檢查之後有沒有 duplicate string 的節點。如果有的話要把它刪掉，並且在該次遍歷後要把 current node 刪掉，所以需要用 bool `dup` 來記錄有沒有 duplicate string 的節點。因為遍歷的過程中有可能刪除 current node 和 next node, 所以此 function 不能使用 `list_for_each_safe` 遍歷。 ```c /* Delete all nodes that have duplicate string */ bool q_delete_dup(struct list_head *head) { // https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/ if (!head) return false; if (list_empty(head) || list_is_singular(head)) return true; element_t *e = list_first_entry(head, element_t, list); struct list_head *next = e->list.next; while (next != head) { bool dup = false; while (next != head) { element_t *next_e = list_entry(next, element_t, list); next = next->next; if (strcmp(e->value, next_e->value) == 0) { dup = true; free(next_e->value); list_del(&next_e->list); free(next_e); } else { break; } } next = e->list.next; // use next to temporarily storing the next e if (dup) { free(e->value); list_del(&e->list); free(e); } if (next != head) { e = list_entry(next, element_t, list); next = e->list.next; } } return true; } ``` ### `q_swap` 以 aa <-> a <-> b <-> bb 四個相連節點為例，swap a 和 b 需要變更 a 和 b 之間的 3 個 next 和 3 個 prev 指標。 swap 後 a 便賦值為 bb, b 賦值為 bb->next, aa 賦值為 a->prev, bb 賦值為 b->next。重複 swap a 和 b 直至 a 或 b 為 head 為止。 ```c /* Swap every two adjacent nodes */ void q_swap(struct list_head *head) { // https://leetcode.com/problems/swap-nodes-in-pairs/ if (!head || list_empty(head) || list_is_singular(head)) return; // aa <-> a <-> b <-> bb struct list_head *a = head->next; struct list_head *b = a->next; struct list_head *aa = a->prev; struct list_head *bb = b->next; while (a != head && b != head) { aa->next = b; b->prev = aa; b->next = a; a->prev = b; a->next = bb; bb->prev = a; a = bb; b = a->next; aa = a->prev; bb = b->next; } return; } ``` ### `q_reverse` 可使用 `list_for_each_safe` 遍歷，swap current node 的 next 和 prev。 ```c /* Reverse elements in queue */ void q_reverse(struct list_head *head) { if (!head || list_empty(head) || list_is_singular(head)) return; struct list_head *node, *safe, *tmp; list_for_each_safe (node, safe, head) { tmp = node->next; node->next = node->prev; node->prev = tmp; } tmp = head->next; head->next = head->prev; head->prev = tmp; return; } ``` ### `q_reverseK` 不斷從輸入的 list 前端切出 k 個 node 成一新的 list `to`, 把 `to` 給 `q_reverse` 反轉，反轉的結果接到用來暫存的 `tmp`。最後把 `tmp` 接到輸入的 list 前端。 ```c /* Reverse the nodes of the list k at a time */ void q_reverseK(struct list_head *head, int k) { // https://leetcode.com/problems/reverse-nodes-in-k-group/ if (!head || list_empty(head) || list_is_singular(head) || k < 2) return; struct list_head tmp; INIT_LIST_HEAD(&tmp); while (q_size(head) >= k) { struct list_head *p = head; for (int i = 0; i < k; i++) { p = p->next; } struct list_head to; INIT_LIST_HEAD(&to); list_cut_position(&to, head, p); q_reverse(&to); list_splice_tail(&to, &tmp); } list_splice(&tmp, head); return; } ``` ### `q_sort` 用 recursive 實作 top down 的 merge sort。因為 circular doubly-linked list 可以 $O(1)$ 找到 last node，所以 merge 時可以檢查是不是存在其中一 list 的 last node（最大值）小於另一 list 的 first node（最小值），存在的話便可以直接接上兩個 list, 而不用遍歷兩個 list 的 node。 ```c /* Sort elements of queue in ascending order */ void q_sort(struct list_head *head) { if (!head || list_empty(head) || list_is_singular(head)) return; struct list_head *forward = head->next; struct list_head *backward = head->prev; while (forward != backward && forward->next != backward) { forward = forward->next; backward = backward->prev; } struct list_head left, right; INIT_LIST_HEAD(&left); INIT_LIST_HEAD(&right); list_cut_position(&left, head, forward); list_splice_init(head, &right); q_sort(&left); q_sort(&right); // merge two lists if (!list_empty(&left) && !list_empty(&right)) { // quick case 1 element_t *l_f = list_first_entry(&left, element_t, list); element_t *r_l = list_last_entry(&right, element_t, list); if (strcmp(r_l->value, l_f->value) <= 0) { list_splice_tail(&left, &right); list_splice(&right, head); return; } // quic case 2 element_t *l_l = list_last_entry(&left, element_t, list); element_t *r_f = list_first_entry(&right, element_t, list); if (strcmp(l_l->value, r_f->value) <= 0) { list_splice_tail(&right, &left); list_splice(&left, head); return; } } int l_size = q_size(&left), r_size = q_size(&right); while (l_size > 0 && r_size > 0) { element_t *l = list_first_entry(&left, element_t, list); element_t *r = list_first_entry(&right, element_t, list); if (strcmp(l->value, r->value) > 0) { list_move_tail(&r->list, head); r_size--; } else { list_move_tail(&l->list, head); l_size--; } } if (l_size > 0) list_splice_tail(&left, head); else list_splice_tail(&right, head); return; } ``` ### q_descend 向後遍歷把比 current node 小的節點刪掉。當遇到比 current node 大或一樣的節點時便把它作為 current node。 ```c /* Remove every node which has a node with a strictly greater value anywhere to * the right side of it */ int q_descend(struct list_head *head) { // https://leetcode.com/problems/remove-nodes-from-linked-list/ if (!head || list_empty(head) || list_is_singular(head)) return q_size(head); element_t *e = list_last_entry(head, element_t, list); while (e->list.prev != head) { element_t *p = list_entry(e->list.prev, element_t, list); if (strcmp(e->value, p->value) <= 0) { e = p; } else { list_del(&p->list); free(p->value); free(p); } } return q_size(head); } ``` ### q_merge 用 `null_queues` 暫存已 merge 的空 queue。每次從輸入的 queue of queues 抽出最前面兩個 queue 做 merge, 直到剩下一個 queue 為止。已 merge 的 queue 放到 queue of queues 的尾端，以避免最前面的 queue 越來越長影響 merge 的效能。這裡的 merge 用到 `q_sort` 的 merge 的直接連接檢查以提高效率。最後把 `null_queues` 連回 queue of queues 的尾端。 ```c /* Merge all the queues into one sorted queue, which is in ascending order */ int q_merge(struct list_head *head) { // https://leetcode.com/problems/merge-k-sorted-lists/ if (!head || list_empty(head)) return 0; struct list_head null_queues; // temporary storage of NULL queues INIT_LIST_HEAD(&null_queues); while (!list_is_singular(head)) { queue_contex_t *a = list_first_entry(head, queue_contex_t, chain); queue_contex_t *b = list_entry(a->chain.next, queue_contex_t, chain); struct list_head *left = a->q; struct list_head *right = b->q; struct list_head tmp; INIT_LIST_HEAD(&tmp); // merge two lists int l_size = a->size, r_size = b->size; a->size += b->size; bool done = false; if (!list_empty(left) && !list_empty(right)) { // quick case 1 element_t *l_f = list_first_entry(left, element_t, list); element_t *r_l = list_last_entry(right, element_t, list); // quic case 2 element_t *l_l = list_last_entry(left, element_t, list); element_t *r_f = list_first_entry(right, element_t, list); if (strcmp(r_l->value, l_f->value) <= 0) { list_splice_tail_init(left, right); list_splice_init(right, &tmp); done = true; } else if (strcmp(l_l->value, r_f->value) <= 0) { list_splice_tail_init(right, left); list_splice_init(left, &tmp); done = true; } } if (!done) { while (l_size > 0 && r_size > 0) { element_t *l = list_first_entry(left, element_t, list); element_t *r = list_first_entry(right, element_t, list); if (strcmp(l->value, r->value) > 0) { list_move_tail(&r->list, &tmp); r_size--; } else { list_move_tail(&l->list, &tmp); l_size--; } } if (l_size > 0) list_splice_tail_init(left, &tmp); else list_splice_tail_init(right, &tmp); } list_splice(&tmp, a->q); /* b->q = NULL; */ b->size = 0; list_move_tail(&b->chain, &null_queues); list_move_tail(&a->chain, head); // move the 'long' queue to tail } list_splice_tail(&null_queues, head); queue_contex_t *a = list_first_entry(head, queue_contex_t, chain); return a->size; } ``` ## 測試 ### `make test` 自動評分系不知道為何在 trace-17-complexity 中的幾個單純的操作被認為可能不是 constant time? > follow up: 研究 dudect 相關程式 ```shell # Test performance of sort with random and descending orders # 10000: all correct sorting algorithms are expected pass # 50000: sorting algorithms with O(n^2) time complexity are expected failed # 100000: sorting algorithms with O(nlogn) time complexity are expected pass --- trace-15-perf 6/6 +++ TESTING trace trace-16-perf: # Test performance of insert_tail --- trace-16-perf 6/6 +++ TESTING trace trace-17-complexity: # Test if time complexity of q_insert_tail, q_insert_head, q_remove_tail, and q_remove_head is constant ERROR: Probably not constant time or wrong implementation ERROR: Probably not constant time or wrong implementation ERROR: Probably not constant time or wrong implementation ERROR: Probably not constant time or wrong implementation --- trace-17-complexity 0/5 --- TOTAL 95/100 make: *** [Makefile:60: test] Error 1 ``` ### `make valgrind` 分析記憶體問題 ```shell +++ TESTING trace trace-01-ops: # Test of insert_head and remove_head ==1449213== 32 bytes in 1 blocks are still reachable in loss record 1 of 2 ==1449213== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==1449213== by 0x10CBE0: do_new (qtest.c:146) ==1449213== by 0x10DE0C: interpret_cmda (console.c:181) ==1449213== by 0x10E3C1: interpret_cmd (console.c:201) ==1449213== by 0x10E7C2: cmd_select (console.c:610) ==1449213== by 0x10F0AE: run_console (console.c:705) ==1449213== by 0x10D1FE: main (qtest.c:1224) ==1449213== ==1449213== 56 bytes in 1 blocks are still reachable in loss record 2 of 2 ==1449213== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==1449213== by 0x10F122: test_malloc (harness.c:133) ==1449213== by 0x10F527: q_new (queue.c:17) ==1449213== by 0x10CC19: do_new (qtest.c:150) ==1449213== by 0x10DE0C: interpret_cmda (console.c:181) ==1449213== by 0x10E3C1: interpret_cmd (console.c:201) ==1449213== by 0x10E7C2: cmd_select (console.c:610) ==1449213== by 0x10F0AE: run_console (console.c:705) ==1449213== by 0x10D1FE: main (qtest.c:1224) ==1449213== --- trace-01-ops 5/5 ``` 由 Valgrind 的報告看出 "blocks are still reachable" 是由 `q_new` 引起的，查看 `qtest.c` 發現 `q_quit` 固意沒有執行 "Freeing queue" 程式碼。修改 `q_quit` 如下: ```clike static bool q_quit(int argc, char *argv[]) { /* return true; */ report(3, "Freeing queue"); if (current && current->size > BIG_LIST_SIZE) set_cautious_mode(false); ``` 再一次執行 make valgrind, 在 trace-03-ops 的測試中，"Freed queue" 後還是有 "2 blocks are still allocated" 。 ```shell --- Trace Points +++ TESTING trace trace-01-ops: # Test of insert_head and remove_head --- trace-01-ops 5/5 +++ TESTING trace trace-02-ops: # Test of insert_head, insert_tail, remove_head, remove_tail, and delete_mid --- trace-02-ops 6/6 +++ TESTING trace trace-03-ops: # Test of insert_head, insert_tail, remove_head, reverse and merge ERROR: Freed queue, but 2 blocks are still allocated ==1449761== 112 bytes in 2 blocks are still reachable in loss record 1 of 1 ==1449761== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==1449761== by 0x10F1F5: test_malloc (harness.c:133) ==1449761== by 0x10F5FA: q_new (queue.c:17) ==1449761== by 0x10CCEC: do_new (qtest.c:150) ==1449761== by 0x10DEDF: interpret_cmda (console.c:181) ==1449761== by 0x10E494: interpret_cmd (console.c:201) ==1449761== by 0x10E895: cmd_select (console.c:610) ==1449761== by 0x10F181: run_console (console.c:705) ==1449761== by 0x10D2D1: main (qtest.c:1224) ==1449761== --- trace-03-ops 0/6 +++ TESTING trace trace-04-ops: # Test of insert_head, insert_tail, size, swap, and sort --- trace-04-ops 6/6 +++ TESTING trace trace-05-ops: ``` 發現在 `q_merge` 的過程中不能把已被 merge 的 queue 設成 NULL, 否則不能在 `q_quit` 的 "Freeing queue" 釋放。修改 `q_merge` 如下： ```c list_splice(&tmp, a->q); /* b->q = NULL; */ b->size = 0; ``` 可是，`queue.h` 註解中提及把它設為 NULL, 是我的理解有誤還是註解有誤呢? ```c=226 * 'q' since they will be released externally. However, q_merge() is responsible * for making the queues to be NULL-queue, except the first one. ``` 修改後便能通過 Valgrind 測試。 ```c scripts/driver.py -p /tmp/qtest.XcF5oR --valgrind --- Trace Points +++ TESTING trace trace-01-ops: # Test of insert_head and remove_head --- trace-01-ops 5/5 +++ TESTING trace trace-02-ops: # Test of insert_head, insert_tail, remove_head, remove_tail, and delete_mid --- trace-02-ops 6/6 +++ TESTING trace trace-03-ops: # Test of insert_head, insert_tail, remove_head, reverse and merge --- trace-03-ops 6/6 +++ TESTING trace trace-04-ops: # Test of insert_head, insert_tail, size, swap, and sort --- trace-04-ops 6/6 +++ TESTING trace trace-05-ops: # Test of insert_head, insert_tail, remove_head, reverse, size, swap, and sort --- trace-05-ops 6/6 +++ TESTING trace trace-06-ops: # Test of insert_head, insert_tail, delete duplicate, sort, descend and reverseK --- trace-06-ops 6/6 +++ TESTING trace trace-07-string: # Test of truncated strings --- trace-07-string 6/6 +++ TESTING trace trace-08-robust: # Test operations on empty queue --- trace-08-robust 6/6 +++ TESTING trace trace-09-robust: # Test remove_head with NULL argument --- trace-09-robust 6/6 +++ TESTING trace trace-10-robust: # Test operations on NULL queue --- trace-10-robust 6/6 +++ TESTING trace trace-11-malloc: # Test of malloc failure on insert_head --- trace-11-malloc 6/6 +++ TESTING trace trace-12-malloc: # Test of malloc failure on insert_tail --- trace-12-malloc 6/6 +++ TESTING trace trace-13-malloc: # Test of malloc failure on new --- trace-13-malloc 6/6 +++ TESTING trace trace-14-perf: # Test performance of insert_tail, reverse, and sort --- trace-14-perf 6/6 +++ TESTING trace trace-15-perf: # Test performance of sort with random and descending orders # 10000: all correct sorting algorithms are expected pass # 50000: sorting algorithms with O(n^2) time complexity are expected failed # 100000: sorting algorithms with O(nlogn) time complexity are expected pass --- trace-15-perf 6/6 +++ TESTING trace trace-16-perf: # Test performance of insert_tail --- trace-16-perf 6/6 +++ TESTING trace trace-17-complexity: # Test if time complexity of q_insert_tail, q_insert_head, q_remove_tail, and q_remove_head is constant Probably constant time ERROR: Probably not constant time or wrong implementation Probably constant time Probably constant time --- trace-17-complexity 0/5 --- TOTAL 95/100 make: *** [Makefile:72: valgrind] Error 1 ``` ## 引入 [lib/list_sort.c](https://github.com/torvalds/linux/blob/master/lib/list_sort.c) ### 加入相關程式碼 `list_sort.h` ```c #include "queue.h" #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) typedef int (*list_cmp_func_t)(void *, struct list_head *, struct list_head *); void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp); ``` `list_sort.c` ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #include "list_sort.h" static struct list_head *merge(void *priv, list_cmp_func_t cmp, struct list_head *a, struct list_head *b) { struct list_head *head = NULL, **tail = &head; for (;;) { /* if equal, take 'a' -- important for sort stability */ if (cmp(priv, a, b) <= 0) { *tail = a; tail = &a->next; a = a->next; if (!a) { *tail = b; break; } } else { *tail = b; tail = &b->next; b = b->next; if (!b) { *tail = a; break; } } } return head; } static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, struct list_head *a, struct list_head *b) { struct list_head *tail = head; /* u8 count = 0; */ uint count = 0; for (;;) { /* if equal, take 'a' -- important for sort stability */ if (cmp(priv, a, b) <= 0) { tail->next = a; a->prev = tail; tail = a; a = a->next; if (!a) break; } else { tail->next = b; b->prev = tail; tail = b; b = b->next; if (!b) { b = a; break; } } } /* Finish linking remainder of list b on to tail */ tail->next = b; do { /* * If the merge is highly unbalanced (e.g. the input is * already sorted), this loop may run many iterations. * Continue callbacks to the client even though no * element comparison is needed, so the client's cmp() * routine can invoke cond_resched() periodically. */ if (unlikely(!++count)) cmp(priv, b, b); b->prev = tail; tail = b; b = b->next; } while (b); /* And the final links to make a circular doubly-linked list */ tail->next = head; head->prev = tail; } void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp) { struct list_head *list = head->next, *pending = NULL; size_t count = 0; /* Count of pending */ if (list == head->prev) /* Zero or one elements */ return; /* Convert to a null-terminated singly-linked list. */ head->prev->next = NULL; /* * Data structure invariants: * - All lists are singly linked and null-terminated; prev * pointers are not maintained. * - pending is a prev-linked "list of lists" of sorted * sublists awaiting further merging. * - Each of the sorted sublists is power-of-two in size. * - Sublists are sorted by size and age, smallest & newest at front. * - There are zero to two sublists of each size. * - A pair of pending sublists are merged as soon as the number * of following pending elements equals their size (i.e. * each time count reaches an odd multiple of that size). * That ensures each later final merge will be at worst 2:1. * - Each round consists of: * - Merging the two sublists selected by the highest bit * which flips when count is incremented, and * - Adding an element from the input as a size-1 sublist. */ do { size_t bits; struct list_head **tail = &pending; /* Find the least-significant clear bit in count */ for (bits = count; bits & 1; bits >>= 1) tail = &(*tail)->prev; /* Do the indicated merge */ if (likely(bits)) { struct list_head *a = *tail, *b = a->prev; a = merge(priv, cmp, b, a); /* Install the merged result in place of the inputs */ a->prev = b->prev; *tail = a; } /* Move one element from input list to pending */ list->prev = pending; pending = list; list = list->next; pending->next = NULL; count++; } while (list); /* End of input; merge together all the pending lists. */ list = pending; pending = pending->prev; for (;;) { struct list_head *next = pending->prev; if (!next) break; list = merge(priv, cmp, pending, list); pending = next; } /* The final merge, rebuilding prev links */ merge_final(priv, cmp, head, pending, list); } ``` ### 加入 linux_sort 命令 `qtest.c` 加入以下程式碼。 ```clike #include "list_sort.h" ... .. . static int cmp(void *priv, struct list_head *a, struct list_head *b) { element_t *l = list_entry(a, element_t, list); element_t *r = list_entry(b, element_t, list); return strcmp(l->value, r->value); } bool _do_sort(int argc, char *argv[], int mode) { ... .. . set_noallocate_mode(true); if (current && exception_setup(true)) { if (mode == 0) q_sort(current->q); if (mode == 1) list_sort(NULL, current->q, cmp); } exception_cancel(); set_noallocate_mode(false); ... .. . return ok && !error_check(); } bool do_sort(int argc, char *argv[]) { return _do_sort(argc, argv, 0); } bool do_linux_sort(int argc, char *argv[]) { return _do_sort(argc, argv, 1); } ... .. . ADD_COMMAND(sort, "Sort queue in ascending order", ""); ADD_COMMAND(linux_sort, "Sort queue with linux lib/list_sort.c", ""); ADD_COMMAND(size, "Compute queue size n times (default: n == 1)", "[n]"); ... .. ``` ### 效能比較因為執行 `RAND 1,000,000` 的 sorting 時發生 Time limit exceeded Error。 ```shell l = [] l = [dtswcfi euocvhn byaelpmbt ajfckzo srkbkhur zsiblg whrjz mjreyday wfapiptm ipnxr zrpgn tarlmw aavoymfdj mfhexl qrerlam mzdcttzow zcgxs lmflt xrcok dcywnh tpngee oruzfo qqkvx vueihgud nsjxlzcm lnyrog qnlkjxpx voefzvmgl wldfrzl akccxddhl ... ] ERROR: Time limit exceeded. Either you are in an infinite loop, or your code is too inefficient ``` 修改 `harness.c` 調整 `alarm` 的 interval ```c -static int time_limit = 1; +static int time_limit = 10; ``` linux sort 排序 `RAND 1000000` 的 queue, 大概 75% 效能提升 (1.327 / 0.756 = 1.755) ```shell cmd> new cmd> ih RAND 1000000 cmd> time sort Delta time = 1.327 cmd> free cmd> new cmd> ih RAND 1000000 cmd> time linux_sort Delta time = 0.756 ``` linux sort 排序已排序和反排序的 queue, 效能反而分別下降 23% (0.616 / 0.793 = 0.777) 和 13% (0.631 / 0.722 = 0.874) ```shell cmd> new cmd> show Current queue ID: 0 l = [] cmd> ih RAND 1000000 cmd> sort cmd> time sort Delta time = 0.616 cmd> time linux_sort Delta time = 0.793 cmd> reverse cmd> time sort Delta time = 0.631 cmd> reverse cmd> time linux_sort Delta time = 0.722 ``` linux sort 排序大量重複 entry 的 queue, 有大概 16% 效能提升 (0.379 / 0.327 = 1.159) ```shell cmd> new cmd> show Current queue ID: 0 l = [] cmd> ih dolphin 1000000 cmd> it gerbil 1000000 cmd> reverse cmd> time sort Delta time = 0.379 cmd> reverse cmd> time linux_sort Delta time = 0.327 ``` ### 分析 > TODO ## 整合 web 命令和 linenoise 套件 ### 解決 favicon.ico 的問題按照作業說明的做法解決 favicon.ico 的問題。 ```c char *p = web_recv(web_connfd, &clientaddr); char *buffer = "HTTP/1.1 200 OK\r\n%s%s%s%s%s%s" "Content-Type: text/html\r\n\r\n" "<html><head><style>" "body{font-family: monospace; font-size: 13px;}" "td {padding: 1.5px 6px;}" "</style><link rel=\"shortcut icon\" href=\"#\">" "</head><body><table>\n"; web_send(web_connfd, buffer); ``` 但是引申了另一個問題 [page loads twice in Google chrome](https://stackoverflow.com/questions/2009092/page-loads-twice-in-google-chrome), 導致 chrome 每次發出兩個 request。 ```shell cmd> web listen on port 9999, fd is 3 cmd> l = [] cmd> l = [] cmd> l = [1] cmd> l = [1 1] cmd> l = [2 1 1] cmd> l = [2 2 1 1] cmd> ``` 在 `console.c` 加入一個不做任何事的 favicon.ico 命令迴避這個問題。 ```c static bool do_nothing(int arc, char *argv[]) { return true; } ... .. . ADD_COMMAND(web, "Read commands from builtin web server", "[port]"); add_cmd("favicon.ico", do_nothing, "Do nothing command for favicon.ico request from browser", ""); ``` ### I/O Multiplexing 在 `linenoise.c` 中的 `line_edit` 的 `while(1)` 迴圈裡採用 `select` 系統呼叫查詢哪個 File Descriptor 有內容可以讀取，便不會造成 web 和 stdio 相互阻塞的情況。 ```c extern int web_fd; extern int web_connfd; static int line_edit(int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt) { ... .. . while (1) { fd_set set; FD_ZERO(&set); if (web_fd) FD_SET(web_fd, &set); FD_SET(l.ifd, &set); int rv = web_fd ? select(web_fd + 1, &set, NULL, NULL, NULL) : select(l.ifd + 1, &set, NULL, NULL, NULL); switch (rv) { case -1: perror("select"); /* an error occurred */ continue; case 0: printf("timeout occurred\n"); /* a timeout occurred */ continue; default: if (web_fd && FD_ISSET(web_fd, &set)) { struct sockaddr_in clientaddr; socklen_t clientlen = sizeof clientaddr; web_connfd = accept(web_fd, (struct sockaddr *) &clientaddr, &clientlen); char *p = web_recv(web_connfd, &clientaddr); strncpy(buf, p, strlen(p) + 1); char *buffer = "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\n"; web_send(web_connfd, buffer); free(p); return strlen(p) + 1; } else if (FD_ISSET(l.ifd, &set)) { signed char c; int nread; char seq[5]; nread = read(l.ifd, &c, 1); if (nread <= 0) return l.len; ``` ## Fisher-Yates shuffle ### `q_shuffle` ```c /* Shuffle elements in queue */ void q_shuffle(struct list_head *head) { if (!head || list_empty(head) || list_is_singular(head)) return; /* srand(time(0)); */ int len = q_size(head); struct list_head *i_th = head->prev; for (int i = len; i > 0; i--) { struct list_head *r_th = head; int r = rand() % i + 1; if (r != i) { // find r_th for (int j = 0; j < r; j++) r_th = r_th->next; // swap r_th and i_th element element_t *e_rth = list_entry(r_th, element_t, list); element_t *e_ith = list_entry(i_th, element_t, list); char *temp = e_rth->value; e_rth->value = e_ith->value; e_ith->value = temp; } i_th = i_th->prev; } } ``` ### entropy 在作業說明提供的統計分析 script 中加入 entropy 計算。 ```python from math import log2 entropy = 0 for i in c: entropy += (i / test_count) * log2(i / test_count) entropy = -entropy print('entropy: %.3f bits' % entropy) ``` ### 統計分析如果以時間作為亂數種子 `srand(time(0))`，產生明顯不平均的分佈。距離 entropy 理想值 log2(24)=4.584962501 有一點距離。用[查表](https://homepage.divms.uiowa.edu/~mbognar/applets/chisq.html)的方式查出自由度 24，$X^2$=557983.6738827821 時，p=0=0% 經由卡方檢定計算出的 p 值小於設定的信心水準 (如0.05) 拒絕虛無假說，不符合平均分佈。 ```shell Expectation: 41666 Observation: {'1234': 71761, '1243': 36587, '1324': 85797, '1342': 13520, '1423': 31009, '1432': 13412, '2134': 5772, '2143': 17290, '2314': 55601, '2341': 32905, '2413': 113339, '2431': 11512, '3124': 30696, '3142': 42459, '3214': 13427, '3241': 24843, '3412': 38443, '3421': 23283, '4123': 117198, '4132': 27165, '4213': 13634, '4231': 83435, '4312': 34703, '4321': 62209} chi square sum: 557983.6738827821 entropy: 4.218 bits ``` ![](https://i.imgur.com/Wke3Gde.png) 拿掉上述 `srand(time(0))`，即原用 `qtest` 的 `main` 設定： ```c /* A better seed can be obtained by combining getpid() and its parent ID * with the Unix time. */ srand(os_random(getpid() ^ getppid())); ``` 得到明顯平均分佈的結果。與 entropy 理想值幾乎一樣。用[查表](https://homepage.divms.uiowa.edu/~mbognar/applets/chisq.html)的方式查出自由度 24，$X^2$=19.01766428262852 時，p=0.75105=75.105%。$H_0$ 不顯著，無證據說此分佈不均勻。為何在一個 process 中，亂數種子都是一個整數，但產生出來的亂數在分佈上會有明顯的差異呢？ ```shell Expectation: 41666 Observation: {'1234': 41584, '1243': 41488, '1324': 41643, '1342': 41714, '1423': 41606, '1432': 41681, '2134': 41594, '2143': 41413, '2314': 42103, '2341': 41925, '2413': 41550, '2431': 41609, '3124': 41362, '3142': 41696, '3214': 41639, '3241': 41557, '3412': 41716, '3421': 41968, '4123': 41752, '4132': 41760, '4213': 41852, '4231': 41829, '4312': 41648, '4321': 41311} chi square sum: 19.01766428262852 entropy: 4.585 bits ``` ![](https://i.imgur.com/ngrAaql.png) ## Xorshift PRNG 實作 Xorshift 和在 `ih` 命令加入 `RAND_XOR` 選項切換 Xorshift。 ```c +struct xorshift32_state { + uint32_t a; +}; + +struct xorshift32_state xor_state; + +uint32_t xorshift32() +{ + xor_state.a ^= xor_state.a << 13; + xor_state.a ^= xor_state.a >> 17; + xor_state.a ^= xor_state.a << 5; + + return xor_state.a; +} + +static void fill_xor_string(char *buf, size_t buf_size) +{ + size_t len = 0; + while (len < MIN_RANDSTR_LEN) + len = rand() % buf_size; + + for (size_t n = 0; n < len; n++) + buf[n] = charset[xorshift32() % (sizeof(charset) - 1)]; + buf[len] = '\0'; +} + /* insert head */ static bool do_ih(int argc, char *argv[]) { @@ -202,7 +228,7 @@ static bool do_ih(int argc, char *argv[]) char *lasts = NULL; char randstr_buf[MAX_RANDSTR_LEN]; int reps = 1; - bool ok = true, need_rand = false; + bool ok = true, need_rand = false, need_rand_xor = false; if (argc != 2 && argc != 3) { report(1, "%s needs 1-2 arguments", argv[0]); return false; @@ -219,6 +245,9 @@ static bool do_ih(int argc, char *argv[]) if (!strcmp(inserts, "RAND")) { need_rand = true; inserts = randstr_buf; + } else if (!strcmp(inserts, "RAND_XOR")) { + need_rand_xor = true; + inserts = randstr_buf; } if (!current || !current->q) @@ -229,6 +258,8 @@ static bool do_ih(int argc, char *argv[]) for (int r = 0; ok && r < reps; r++) { if (need_rand) fill_rand_string(randstr_buf, sizeof(randstr_buf)); + if (need_rand_xor) + fill_xor_string(randstr_buf, sizeof(randstr_buf)); bool rval = q_insert_head(current->q, inserts); if (rval) { current->size++; @@ -1248,6 +1279,7 @@ int main(int argc, char *argv[]) * with the Unix time. */ srand(os_random(getpid() ^ getppid())); + xor_state.a = os_random(getpid() ^ getppid()); ``` 執行結果 ```shell cmd> option entropy 1 cmd> new l = [] cmd> ih RAND 20 l = [icbepfs(32.81%) pmcbvj(29.69%) zqtvups(32.81%) rvxslflhc(35.94%) aiuicw(26.56%) gvrniealg(35.94%) kedwaojgu(37.50%) cguxirumy(35.94%) jeqcpj(26.56%) hzaeal(26.56%) bxzmcn(29.69%) bimvnltnl(32.81%) trteu(21.88%) nqmxytufd(37.50%) djokclzd(32.81%) wlfuybf(29.69%) xsicg(26.56%) vqtspqlau(35.94%) gwkjydgd(29.69%) jxjdtzm(29.69%)] cmd> new l = [] cmd> ih RAND_XOR 20 l = [doaykxmo(32.81%) ipcids(26.56%) xscqpbqpr(32.81%) xxezunnih(32.81%) dfuapviuh(35.94%) stfbjadps(35.94%) dfljtfyk(32.81%) oroojgdt(28.12%) wcaamw(21.88%) xpgtdyr(32.81%) rnjizwoil(35.94%) zuynzrbzl(31.25%) ddemvhmu(29.69%) tjcyopave(37.50%) ywbuqkml(35.94%) rgetliqb(35.94%) pwwufko(29.69%) noarvrlcy(35.94%) zxhkhjkm(29.69%) jnyimveik(35.94%)] cmd> ``` ### 統計分析工具，比較亂數產生器 (如 Xorshift vs. 內建) 的品質 > 還在想。。。 ## 研讀論文〈Dude, is my code constant time?〉解釋本程式的 “simulation” 模式是如何透過以實驗而非理論分析，達到驗證時間複雜度，需要解釋 Student’s t-distribution 及程式實作的原理 ### simulation 模式 `lab0-c` 共有 `ih`, `it`, `rh`, `rt` 4個命令支援 simulation 模式，分別呼叫 preprocessor 展開 `fixture.c` 的巨集後的 `is_insert_head_const()`, `is_insert_tail_const()`, `is_remove_head_const()`, `is_remove_tail_const()` 函式來進行實驗，並判斷該實驗是否為常數時間。 > #: Stringification/Stringizing (字串化): 讓一個表示式變成字串，在 assert 巨集用到 > ##: concatenation (連結，接續) > > https://hackmd.io/@sysprog/c-preprocessor#%E9%96%8B%E7%99%BC%E7%89%A9%E4%BB%B6%E5%B0%8E%E5%90%91%E7%A8%8B%E5%BC%8F%E6%99%82%EF%BC%8C%E5%96%84%E7%94%A8%E5%89%8D%E7%BD%AE%E8%99%95%E7%90%86%E5%99%A8%E5%8F%AF%E5%A4%A7%E5%B9%85%E7%B0%A1%E5%8C%96%E9%96%8B%E7%99%BC `dudect/fixture.h` ```c #include <stdbool.h> #include "constant.h" /* Interface to test if function is constant */ #define _(x) bool is_##x##_const(void); DUT_FUNCS #undef _ ``` `dudect/constant.h` ```c #define DUT_FUNCS \ _(insert_head) \ _(insert_tail) \ _(remove_head) \ _(remove_tail) #define DUT(x) DUT_##x enum { #define _(x) DUT(x), DUT_FUNCS #undef _ }; ``` 展開後 ```c /* Interface to test if function is constant */ bool is_insert_head_const(void); bool is_insert_tail_const(void); bool is_remove_head_const(void); bool is_remove_tail_const(void); enum { DUT_insert_head, DUT_insert_tail, DUT_remove_head, DUT_remove_tail, } ``` 上述四個函式的實作也是通過 preprocessor 展開, 呼叫 `test_const` `dudect/fixture.c` ```c=173 #define DUT_FUNC_IMPL(op) \ bool is_##op##_const(void) { return test_const(#op, DUT(op)); } #define _(x) DUT_FUNC_IMPL(x) DUT_FUNCS #undef _ ``` 展開後 ```c bool is_insert_head_const(void) { return test_const("insert_head", DUT_insert_head); } bool is_insert_tail_const(void) { return test_const("insert_tail", DUT_insert_tail)); } bool is_insert_tail_const(void) { return test_const("remove_head", DUT_remove_head)); } bool is_insert_tail_const(void) { return test_const("remove_tail", DUT_remove_tail)); } ``` `test_const` 進行 TEST_TRIES 次測試，每次測試 ENOUGH_MEASURE / (N_MEASURES - DROP_SIZE * 2) + 1 次 `doit`。 ```c static bool test_const(char *text, int mode) { bool result = false; t = malloc(sizeof(t_context_t)); for (int cnt = 0; cnt < TEST_TRIES; ++cnt) { printf("Testing %s...(%d/%d)\n\n", text, cnt, TEST_TRIES); init_once(); for (int i = 0; i < ENOUGH_MEASURE / (N_MEASURES - DROP_SIZE * 2) + 1; ++i) result = doit(mode); printf("\033[A\033[2K\033[A\033[2K"); if (result) break; } free(t); return result; } ``` `doit` 中先執行 `measure` ，`measure` 執行 `N_MEASURES - DROP_SIZE` 次測試命令，並記錄每次測試命令前後的 CPU cycles。接著 `differentiate` 算出測試命令用了多少 CPU cycle 數，`update_statistics` 呼叫 `t_push` 算出 mean 和 variances。 ```c prepare_inputs(input_data, classes); bool ret = measure(before_ticks, after_ticks, input_data, mode); differentiate(exec_times, before_ticks, after_ticks); update_statistics(exec_times, classes); ret &= report(); ``` `report` 呼叫 `t_compute` 算 t value，最後用 t value 比較 `t_threshold_bananas` 和 `t_threshold_moderate` 決定是否常數時間。 ```c static bool report(void) { double max_t = fabs(t_compute(t)); double number_traces_max_t = t->n[0] + t->n[1]; double max_tau = max_t / sqrt(number_traces_max_t); printf("\033[A\033[2K"); printf("meas: %7.2lf M, ", (number_traces_max_t / 1e6)); if (number_traces_max_t < ENOUGH_MEASURE) { printf("not enough measurements (%.0f still to go).\n", ENOUGH_MEASURE - number_traces_max_t); return false; } /* max_t: the t statistic value * max_tau: a t value normalized by sqrt(number of measurements). * this way we can compare max_tau taken with different * number of measurements. This is sort of "distance * between distributions", independent of number of * measurements. * (5/tau)^2: how many measurements we would need to barely * detect the leak, if present. "barely detect the * leak" = have a t value greater than 5. */ printf("max t: %+7.2f, max tau: %.2e, (5/tau)^2: %.2e.\n", max_t, max_tau, (double) (5 * 5) / (double) (max_tau * max_tau)); /* Definitely not constant time */ if (max_t > t_threshold_bananas) return false; /* Probably not constant time. */ if (max_t > t_threshold_moderate) return false; /* For the moment, maybe constant time. */ return true; } ``` ### 解釋 Student’s t-distribution 及程式實作的原理 `ttest.c` 裡包含了 Student's t-distribution 的 Student's t-test 的 Welch's t-test 的實作，暫時還未看得懂。 ```c void t_push(t_context_t *ctx, double x, uint8_t class) { assert(class == 0 || class == 1); ctx->n[class]++; /* Welford method for computing online variance * in a numerically stable way. */ double delta = x - ctx->mean[class]; ctx->mean[class] = ctx->mean[class] + delta / ctx->n[class]; ctx->m2[class] = ctx->m2[class] + delta * (x - ctx->mean[class]); } double t_compute(t_context_t *ctx) { double var[2] = {0.0, 0.0}; var[0] = ctx->m2[0] / (ctx->n[0] - 1); var[1] = ctx->m2[1] / (ctx->n[1] - 1); double num = (ctx->mean[0] - ctx->mean[1]); double den = sqrt(var[0] / ctx->n[0] + var[1] / ctx->n[1]); double t_value = num / den; return t_value; } void t_init(t_context_t *ctx) { for (int class = 0; class < 2; class ++) { ctx->mean[class] = 0.0; ctx->m2[class] = 0.0; ctx->n[class] = 0.0; } return; } ``` ## [第 1 週作業檢討](https://www.youtube.com/watch?v=pB0YOqkNk4Y&t=11492s) 參考作業內容 [yanjiew1](https://hackmd.io/@yanjiew/linux2023q1-lab0) * https://hackmd.io/@yanjiew/linux2023q1-lab0#%E7%B3%BB%E7%B5%B1%E5%85%A7%E5%BB%BA%E4%BA%82%E6%95%B8%E6%B8%AC%E8%A9%A6 * https://hackmd.io/@yanjiew/linux2023q1-lab0#%E7%A0%94%E8%AE%80%E8%AB%96%E6%96%87%E3%80%88Dude-is-my-code-constant-time%E3%80%89 [1:30:00] [ItisCaleb](https://hackmd.io/@ItisCaleb/linux2023-lab0#dudect) * https://hackmd.io/@ItisCaleb/linux2023-lab0#dudect [1:59:00] * linux list_sort.c 你認為Linux核心想要解決甚麼問題? [2:04:00]