# 2023q1 Homework4 (quiz4) contributed by < `ItisCaleb` > ## 測驗 `1` 測驗中所使用的紅黑樹的節點只存左節點跟右節點 ```c /* Node structure */ #define rb_node(x_type) \ struct { \ x_type *left, *right_red; \ } ``` ### insert 首先會走訪樹來找到要插入的位置,並且把走過的路徑及比較的結果紀錄下來到 `path` 裡 而 `pathp` 則指向現在走到的節點 ```c x_prefix##path_entry_t path[RB_MAX_DEPTH]; \ x_prefix##path_entry_t *pathp; \ rbt_node_new(x_type, x_field, rbtree, node); \ /* Wind. */ \ path->node = rbtree->root; \ for (pathp = path; pathp->node; pathp++) { \ int cmp = pathp->cmp = x_cmp(node, pathp->node); \ assert(cmp != 0); \ if (cmp < 0) { \ pathp[1].node = rbtn_left_get(x_type, x_field, pathp->node); \ } else { \ pathp[1].node = rbtn_right_get(x_type, x_field, pathp->node); \ } \ } \ pathp->node = node; ``` 接下來則是從最底下往回調整樹的結構 ```c for (pathp--; (uintptr_t) pathp >= (uintptr_t) path; pathp--) { ... } ``` 如果 `pathp->cmp` < 0 則表示現在的路徑是往左 反之則是往右 而 `cnode` 則指向當前的節點 如果現在的路徑是往左 則必須去確認是否有連續兩個節點 (`left` & `leftleft`) 是紅色的情況 如果有則先把原本 `leftleft` 設成黑色,並且以現在的節點 `cnode` 當作 root 去作右旋,最後將現在指向的節點設成 `left` ```c if (pathp->cmp < 0) { \ x_type *left = pathp[1].node; \ rbtn_left_set(x_type, x_field, cnode, left); \ if (!rbtn_red_get(x_type, x_field, left)) \ return; \ x_type *leftleft = rbtn_left_get(x_type, x_field, left); \ if (leftleft && rbtn_red_get(x_type, x_field, leftleft)) { \ / * Fix up 4-node. */ \ x_type *tnode; \ rbtn_black_set(x_type, x_field, leftleft); \ rbtn_rotate_right(x_type, x_field, cnode, tnode); \ cnode = tnode; \ } \ } ``` 如果現在的路徑是往右 會有兩種情況需要判斷 第一種是去確認 `cnode` 的 `right` 跟 `left` 使否都是紅色 如果是的話便把 `left` 跟 `right` 設成黑色 然後把 `cnode` 設成紅色 第二種情況則是只有 `right` 是紅色 就將 `cnode` 向左旋轉 同時把他設成黑色 最後將現在指向的節點設成 `right` ```c else { \ x_type *right = pathp[1].node; \ rbtn_right_set(x_type, x_field, cnode, right); \ if (!rbtn_red_get(x_type, x_field, right)) \ return; \ x_type *left = rbtn_left_get(x_type, x_field, cnode); \ if (left && rbtn_red_get(x_type, x_field, left)) { \ /* Split 4-node. */ \ rbtn_black_set(x_type, x_field, left); \ rbtn_black_set(x_type, x_field, right); \ rbtn_red_set(x_type, x_field, cnode); \ } else { \ /* Lean left. */ \ x_type *tnode; \ bool tred = rbtn_red_get(x_type, x_field, cnode); \ rbtn_rotate_left(x_type, x_field, cnode, tnode); \ rbtn_color_set(x_type, x_field, tnode, tred); \ rbtn_black_set(x_type, x_field, cnode); \ cnode = tnode; \ } \ } ``` 最後把 root 設成黑色 ```c /* Set root, and make it black. */ \ rbtree->root = path->node; \ rbtn_black_set(x_type, x_field, rbtree->root); ``` ### remove 要 remove 節點同樣要去走訪紅黑樹,並且紀錄走訪過的路徑 而當找到要 remove 的節點時 (`cmp` = 0),便把 `nodep` 設成當前的路徑,並同時把右子樹最左邊的節點當成後繼節點,以準備等下跟要 remove 的節點交換 ```c x_prefix##path_entry_t path[RB_MAX_DEPTH]; \ x_prefix##path_entry_t *pathp; \ x_prefix##path_entry_t *nodep; \ /* This is just to silence a compiler warning. */ \ nodep = NULL; \ /* Wind. */ \ path->node = rbtree->root; \ for (pathp = path; pathp->node; pathp++) { \ int cmp = pathp->cmp = x_cmp(node, pathp->node); \ if (cmp < 0) { \ pathp[1].node = rbtn_left_get(x_type, x_field, pathp->node); \ } else { \ pathp[1].node = rbtn_right_get(x_type, x_field, pathp->node); \ if (cmp == 0) { \ /* Find node's successor, in preparation for swap. */ \ pathp->cmp = 1; \ nodep = pathp; \ for (pathp++; pathp->node; pathp++) { \ pathp->cmp = -1; \ pathp[1].node = \ rbtn_left_get(x_type, x_field, pathp->node); \ } \ break; \ } \ } \ } ``` 這邊就是把後繼節點的顏色設成要 remove 的節點的顏色並且與之替換 而若要刪除的節點是 root ,則把 `rbtree` 的 root 設成後繼節點 ```c pathp--; \ if (pathp->node != node) { \ /* Swap node with its successor. */ \ bool tred = rbtn_red_get(x_type, x_field, pathp->node); \ rbtn_color_set(x_type, x_field, pathp->node, \ rbtn_red_get(x_type, x_field, node)); \ rbtn_left_set(x_type, x_field, pathp->node, \ rbtn_left_get(x_type, x_field, node)); \ /* If node's successor is its right child, the following code */ \ /* will do the wrong thing for the right child pointer. */ \ /* However, it doesn't matter, because the pointer will be */ \ /* properly set when the successor is pruned. */ \ rbtn_right_set(x_type, x_field, pathp->node, \ rbtn_right_get(x_type, x_field, node)); \ rbtn_color_set(x_type, x_field, node, tred); \ /* The pruned leaf node's child pointers are never accessed */ \ /* again, so don't bother setting them to nil. */ \ nodep->node = pathp->node; \ pathp->node = node; \ if (nodep == path) { \ rbtree->root = nodep->node; \ } else { \ if (nodep[-1].cmp < 0) { \ rbtn_left_set(x_type, x_field, nodep[-1].node, \ nodep->node); \ } else { \ rbtn_right_set(x_type, x_field, nodep[-1].node, \ nodep->node); \ } \ } \ } ``` 而若發現要 remove 的節點只有一個左子節點 就直接把左子節點設成黑色跟要 remove 的節點替換 同樣的若要刪除的節點是 root ,則 root 設成 NULL 這邊做完可以不需要去修復樹的顏色所以會直接 return ```c else { \ x_type *left = rbtn_left_get(x_type, x_field, node); \ if (left) { \ /* node has no successor, but it has a left child. */ \ /* Splice node out, without losing the left child. */ \ assert(!rbtn_red_get(x_type, x_field, node)); \ assert(rbtn_red_get(x_type, x_field, left)); \ rbtn_black_set(x_type, x_field, left); \ if (pathp == path) { \ rbtree->root = left; \ /* Nothing to summarize -- the subtree rooted at the */ \ /* node's left child hasn't changed, and it's now the */ \ /* root. */ \ } else { \ if (pathp[-1].cmp < 0) { \ rbtn_left_set(x_type, x_field, pathp[-1].node, left); \ } else { \ rbtn_right_set(x_type, x_field, pathp[-1].node, left); \ } \ } \ return; \ } else if (pathp == path) { \ /* The tree only contained one node. */ \ rbtree->root = NULL; \ return; \ } \ } ``` ## 測驗 `2` timsort 是一種 insertion sort 跟 merge sort 的 hybrid sort 演算法的內容大致上就是把資料分成小部份的 run,接著用 insertion sort 把這些 run 排序好後放進 stack 裡面 而當 stack 裡面 run 的資料量符合一定的規則後就把 run merge起來完成排序 首先會先宣告一個 stack ```c void timsort(void *f, void *l, size_t size, bool (*cmp)(const void *, const void *)) { char *first = (char *) f, *last = (char *) l; /* Assume last is always larger than first. */ void *buf = malloc(last - first); /* FIXME: avoid alloca */ run_t *stack = alloca(sizeof(run_t) * ilog2((last - first) / size) * 2); size_t top = 0; char *cur = first; ``` 接著就是開始處理資料 會先用 `next_partition` 找出下一個 run 並且使用 insertion sort 排序 之後就直接放進 `stack` 裡 而為了要保持 balanced merge,演算法會考慮 stack 最上面的三個 run,分別為 X Y Z 並且讓他們符合以下的規則,這邊的絕對值代表的是 run 的長度 * $|Z| > |X| + |Y|$ * $|Y| > |X|$ 如果不符合上述兩個規則就執行 merge ```c while (cur < last) { size_t len = next_partition(cur, last, size, cmp); stack[top].first = cur; stack[top].last = stack[top].first + len * size; cur = stack[top].last; while (merge_rule(stack, top)) { if (buf) __merge(stack[top - 1].first, stack[top - 1].last, stack[top].first, stack[top].last, buf, size, cmp); else __inplace_merge(stack[top - 1].first, stack[top - 1].last, stack[top].first, stack[top].last, size, cmp); stack[top - 1].last = stack[top].last; top--; } top++; } ``` 這邊的 merge 有兩種版本,如果沒有提供 buffer 那就是用 `__inplace_merge` ```c static void __inplace_merge(char *first_1, char *last_1, char *first_2, char *last_2, size_t size, bool (*cmp)(const void *, const void *)) { while (first_1 < last_2 && first_2 < last_2) { char *cur_1 = first_1, *cur_2 = first_2; while (cur_1 < last_2 && !cmp(cur_2, cur_1)) cur_1 += size; if (cur_1 == last_2 || cur_1 > first_2) break; while (cur_2 < last_2 && cmp(cur_2, cur_1)) cur_2 += size; __reverse(cur_1, first_2, size); __reverse(first_2, cur_2, size); __reverse(cur_1, cur_2, size); first_1 = cur_1 + (cur_2 - first_2); first_2 = cur_2; } } ``` `last1` 在這邊沒用我就不標了 首先由於兩邊的 run 都是排序好的狀態 所以我們只要在右邊找到可以放進左邊的區間然後一直交換直到尾端就好 初始狀態 ```graphviz digraph { node [shape=plaintext, fontcolor=black, fontsize=16]; "Pointers:" -> "Values:" [color=white]; node [shape=record, fontcolor=black, fontsize=14, width=4.75, fixedsize=true]; pointers [label="<f0> cur1/first1 | <f1> cur2/first2 | <f2> last2", color=white]; values1 [label="<f0> 1 | <f1> 2 | <f2> 3 | <f3> 6 | <f4> 10"]; values2 [label="<f0> 4 | <f1> 5 | <f2> 7 | <f3> 9 | <f4> 12 | <f5> 14| <f6> 17"]; { rank=same; "Pointers:"; pointers } { rank=same; "Values:"; values1 } edge [color=blue]; pointers:f0 -> values1:f0; pointers:f1 -> values2:f0; pointers:f2 -> values2:f6; } ``` 在左邊的區段找到比 `first2` 大的元素並且設成 `cur1` 找到之後換在右邊的區段找到比 `cur1` 大的元素 這樣就能找到區間 [6, 10] 跟 [4, 5] ```graphviz digraph { node [shape=plaintext, fontcolor=black, fontsize=16]; "Pointers:" -> "Values:" [color=white]; node [shape=record, fontcolor=black, fontsize=14, width=4.75, fixedsize=true]; pointers [label="<f0> first1 |<f1> cur1 | <f2> first2 | <f3> cur2 | <f4> last2", color=white]; values1 [label="<f0> 1 | <f1> 2 | <f2> 3 | <f3> 6 | <f4> 10"]; values2 [label="<f0> 4 | <f1> 5 | <f2> 7 | <f3> 9 | <f4> 12 | <f5> 14| <f6> 17"]; { rank=same; "Pointers:"; pointers } { rank=same; "Values:"; values1 } edge [color=blue]; pointers:f0 -> values1:f0; pointers:f1 -> values1:f3; pointers:f2 -> values2:f0; pointers:f3 -> values2:f2; pointers:f4 -> values2:f6; } ``` 接著我們便透過三次 reverse 來把兩者交換 * [6, 10] -> [10, 6] * [4, 5] -> [5, 4] * [10, 6, 5, 4] -> [4, 5, 6, 10] ```graphviz digraph { node [shape=plaintext, fontcolor=black, fontsize=16]; "Pointers:" -> "Values:" [color=white]; node [shape=record, fontcolor=black, fontsize=14, width=4.75, fixedsize=true]; pointers [label="<f0> first1 |<f1> cur1 | <f2> first2 | <f3> cur2 | <f4> last2", color=white]; values1 [label="<f0> 1 | <f1> 2 | <f2> 3 | <f3> 4 | <f4> 5 "]; values2 [label="<f0> 6 | <f1> 10 | <f2> 7 | <f3> 9 | <f4> 12 | <f5> 14| <f6> 17"]; { rank=same; "Pointers:"; pointers } { rank=same; "Values:"; values1 } edge [color=blue]; pointers:f0 -> values1:f0; pointers:f1 -> values1:f3; pointers:f2 -> values2:f0; pointers:f3 -> values2:f2; pointers:f4 -> values2:f6; } ``` 最後我們把 `first1` 跟 `first2` 分別設成 `cur1 + (cur2 - first2)` 跟 `cur2` 重複做剛才的步驟直到排序好 ```graphviz digraph { node [shape=plaintext, fontcolor=black, fontsize=16]; "Pointers:" -> "Values:" [color=white]; node [shape=record, fontcolor=black, fontsize=14, width=4.75, fixedsize=true]; pointers [label="<f0> first1/ cur1 | <f1> first2/cur2 | <f2> last2", color=white]; values2 [label="<f0> 6 | <f1> 10 | <f2> 7 | <f3> 9 | <f4> 12 | <f5> 14| <f6> 17"]; { rank=same; "Pointers:"; pointers } edge [color=blue]; pointers:f0 -> values2:f0; pointers:f1 -> values2:f2; pointers:f2 -> values2:f6; } ``` ## 測驗 `3`