linux2023: Julian-Chu

## 測驗 α−1: 解釋上述程式碼運作原理 S-Tree 是一種 binary search tree 的變形，相較於單純的 BST ，S-Tree 具有自平衡分支高度的功能，與 BST 雷同的部分就不提及 ```c /// S-Tree 的資料結構: struct st_node { // 該節點下的最長深度, 利用 hint 來平衡節點高度 short hint; struct st_node *parent; struct st_node *left, *right; }; struct st_root { struct st_node *root; }; ``` ```c /// 根據左右子節點的 hint 算出樹高的差異, 作為自平衡的依據 static inline int st_balance(struct st_node *n) { int l = 0, r = 0; if (st_left(n)) l = st_left(n)->hint + 1; if (st_right(n)) r = st_right(n)->hint + 1; return l - r; } /// 節點高度計算, 取左右兩邊中最高 static inline int st_max_hint(struct st_node *n) { int l = 0, r = 0; if (st_left(n)) l = st_left(n)->hint + 1; if (st_right(n)) r = st_right(n)->hint + 1; return l > r ? l : r; } ``` ```c // 自平衡 static inline void st_update(struct st_node **root, struct st_node *n) { if (!n) return; // 取得兩邊的樹高差異 int b = st_balance(n); // 暫存目前的最長樹高作為平衡後的比對值 int prev_hint = n->hint; struct st_node *p = st_parent(n); if (b < -1) { /* leaning to the right */ if (n == *root) *root = st_right(n); st_rotate_right(n); } else if (b > 1) { /* leaning to the left */ if (n == *root) *root = st_left(n); st_rotate_left(n); } // 新的樹高 n->hint = st_max_hint(n); // hint == 0 表示新插入的節點, // 在新插入節點跟當前節點樹高有變化時需要對親代節點重新平衡且計算新樹高 if (n->hint == 0 || n->hint != prev_hint) st_update(root, p); } ``` 刪除與新增節點都有可能導致樹高的改變, 所以在這兩種操作中需要計算新樹高且判斷是否需要旋轉維持平衡, ```c void st_remove(struct st_node **root, struct st_node *del) void st_insert(struct st_node **root, struct st_node *p, struct st_node *n, enum st_dir d) ``` ## 測驗 α−2 :::warning todo: 研讀 AVL tree, AVL 也是高度差異超過 1 時會發生自平衡, 與 S-Tree 的根本差異是？ ::: ## 測驗 α−3 --- ## 測驗 β-1 ```c #include <stdint.h> static inline uintptr_t align_up(uintptr_t sz, size_t alignment) { uintptr_t mask = alignment - 1; if ((alignment & mask) == 0) { /* power of two? */ return (sz + mask) & ~mask; } return (((sz + mask) / alignment) * alignment); } ``` 函數的主要目的是將 sz round up 到最近的 alignment 倍數 (`alignment * k`)。 `((sz + mask) / alignment) * alignment` 是一般通用的做法, 將 sz 加上 `(alignment - 1)`, 使得 `sz <= alignment * k < sz + (alignment - 1) < alignment * ( k + 1 )`, 除以 `alignment` 即可取得 `k` 。但該方法涉及乘除法運算, 是否有更快速的做法？是的，當 `alignment` 為 2 的 n 次方的時候, 可以利用位元運算。首先我們先觀察 `2^n` 與 `2^n -1`(mask) 在位元上的特性: - `2^n` : Most siginificant bit 以外的 bit 皆為 0, e.g., 1000, 100000 - `2^n` - 1: 會將 MSB 變為 0, 其餘的有效位為 1, e.g., 0111, 011111 所以 `2^n` 與 `2^n -1` 在位元上沒有重疊的 1 , 所以 `2^n & 2^n - 1 ==0` 接著利用該特性判斷 alignment 是否為 `2^n` 。 - ~(2^n - 1): 會將小於 MSB 的 bit 設為 0, 且大於等於 MSB 的部分設為 1, e.g. : 11110000 , 11100000 由於 mask 是 (alignment - 1), 目前的大小關係是 `alignment * k <= ( sz + mask ) < alignment * ( k + 1 )` , 可以想像成 `alignment * k + 餘數 = ( sz + mask )` 然後由於 alignment 是 `2^n` 所以餘數 <= `2^n - 1 (mask)`, 所以 `&~mask` 運算會將所有涉及餘數的 bit 設為 0 。 ## 測驗 β-2 ref: - align https://elixir.bootlin.com/linux/latest/source/include/linux/align.h - round_up https://elixir.bootlin.com/linux/latest/source/include/linux/math.h#L25 - PAGE_ALIGN https://elixir.bootlin.com/linux/latest/source/tools/include/linux/mm.h#L19 --- ## 測驗 γ-1 - 建立 thread pool, 特別注意的是 goto f1, f2, f3 針對不同程度的初始化失敗釋放資源 ```c /* The multithreaded qsort public interface */ void qsort_mt(void *a, size_t n, size_t es, cmp_t *cmp, int maxthreads, int forkelem) { struct qsort *qs; struct common c; int i, islot; bool bailout = true; if (n < forkelem) goto f1; errno = 0; /* Try to initialize the resources we need. */ // init mutex for pool, if fail, goto normal qsort if (pthread_mutex_init(&c.mtx_al, NULL) != 0) goto f1; // init pool, if fail, goto destroy mtx_al mutex and normal qsort if (pthread_mutex_init(&c.mtx_al, NULL) != 0) if ((c.pool = calloc(maxthreads, sizeof(struct qsort))) == NULL) goto f2; for (islot = 0; islot < maxthreads; islot++) { qs = &c.pool[islot]; if (pthread_mutex_init(&qs->mtx_st, NULL) != 0) goto f3; if (pthread_cond_init(&qs->cond_st, NULL) != 0) { verify(pthread_mutex_destroy(&qs->mtx_st)); goto f3; } qs->st = ts_idle; qs->common = &c; if (pthread_create(&qs->id, NULL, qsort_thread, qs) != 0) { verify(pthread_mutex_destroy(&qs->mtx_st)); verify(pthread_cond_destroy(&qs->cond_st)); goto f3; } } /* All systems go. */ bailout = false; /* Initialize common elements. */ c.swaptype = ((char *) a - (char *) 0) % sizeof(long) || es % sizeof(long) ? 2 : es == sizeof(long) ? 0 : 1; c.es = es; c.cmp = cmp; c.forkelem = forkelem; c.idlethreads = c.nthreads = maxthreads; /* Hand out the first work batch. */ qs = &c.pool[0]; verify(pthread_mutex_lock(&qs->mtx_st)); qs->a = a; qs->n = n; qs->st = ts_work; c.idlethreads--; verify(pthread_cond_signal(&qs->cond_st)); verify(pthread_mutex_unlock(&qs->mtx_st)); /* Wait for all threads to finish, and free acquired resources. */ f3: for (i = 0; i < islot; i++) { qs = &c.pool[i]; if (bailout) { verify(pthread_mutex_lock(&qs->mtx_st)); qs->st = ts_term; verify(pthread_cond_signal(&qs->cond_st)); verify(pthread_mutex_unlock(&qs->mtx_st)); } verify(pthread_join(qs->id, NULL)); verify(pthread_mutex_destroy(&qs->mtx_st)); verify(pthread_cond_destroy(&qs->cond_st)); } free(c.pool); f2: verify(pthread_mutex_destroy(&c.mtx_al)); if (bailout) { fprintf(stderr, "Resource initialization failed; bailing out.\n"); f1: qsort(a, n, es, cmp); } } ``` - 從 thread pool 裡面拿取 thread 做 subrange 的 partition 運算 ```c /* Thread-callable quicksort. */ static void qsort_algo(struct qsort *qs) { char *pa, *pb, *pc, *pd, *pl, *pm, *pn; int d, r, swaptype, swap_cnt; void *a; /* Array of elements. */ size_t n, es; /* Number of elements; size. */ cmp_t *cmp; int nl, nr; struct common *c; struct qsort *qs2; /* Initialize qsort arguments. */ c = qs->common; es = c->es; cmp = c->cmp; swaptype = c->swaptype; a = qs->a; n = qs->n; top: /* From here on qsort(3) business as usual. */ swap_cnt = 0; if (n < 7) { for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) for (pl = pm; pl > (char *) a && CMP(thunk, pl - es, pl) > 0; pl -= es) swap(pl, pl - es); return; } pm = (char *) a + (n / 2) * es; if (n > 7) { pl = (char *) a; pn = (char *) a + (n - 1) * es; if (n > 40) { d = (n / 8) * es; pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk); pm = med3(pm - d, pm, pm + d, cmp, thunk); pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk); } pm = med3(pl, pm, pn, cmp, thunk); } swap(a, pm); pa = pb = (char *) a + es; pc = pd = (char *) a + (n - 1) * es; for (;;) { while (pb <= pc && (r = CMP(thunk, pb, a)) <= 0) { if (r == 0) { swap_cnt = 1; swap(pa, pb); pa += es; } pb += es; } while (pb <= pc && (r = CMP(thunk, pc, a)) >= 0) { if (r == 0) { swap_cnt = 1; swap(pc, pd); pd -= es; } pc -= es; } if (pb > pc) break; swap(pb, pc); swap_cnt = 1; pb += es; pc -= es; } pn = (char *) a + n * es; r = min(pa - (char *) a, pb - pa); vecswap(a, pb - r, r); r = min(pd - pc, pn - pd - es); vecswap(pb, pn - r, r); if (swap_cnt == 0) { /* Switch to insertion sort */ r = 1 + n / 4; /* n >= 7, so r >= 2 */ for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) for (pl = pm; pl > (char *) a && CMP(thunk, pl - es, pl) > 0; pl -= es) { swap(pl, pl - es); if (++swap_cnt > r) goto nevermind; } return; } nevermind: nl = (pb - pa) / es; nr = (pd - pc) / es; /* Now try to launch subthreads. */ if (nl > c->forkelem && nr > c->forkelem && (qs2 = allocate_thread(c)) != NULL) { qs2->a = a; qs2->n = nl; verify(pthread_cond_signal(&qs2->cond_st)); verify(pthread_mutex_unlock(&qs2->mtx_st)); } else if (nl > 0) { qs->a = a; qs->n = nl; qsort_algo(qs); } if (nr > 0) { a = pn - nr * es; n = nr; goto top; } } ``` :::warning todo: thunk 的用途? 並沒有實際用到 ::: ## 測驗 γ-2 ## 測驗 γ-3