2021q1 Homework2 (quiz2)

# 2021q1 Homework2 (quiz2) contribute by < `jameshuang890902` > ## 作業要求 - [ ] 重新回答第 2 周測驗題 (解釋程式運作原理時，應提供對應的 Graphviz 圖例) - [ ] 附帶的「延伸問題」 ## 測驗一 ### 重新作答 ``` COND1 = ? (c) fast->next == list COND2 = ? (b) fast->next->next == list MMM = ? (e) &get_middle(&q->list)->list TTT = ? (a) slow ``` ### 程式碼運作原理 #### `list.h` - **container_of** ```c= /** * container_of() - Calculate address of object that contains address ptr * @ptr: pointer to member variable * @type: type of the structure containing ptr * @member: name of the member variable in struct @type * * Return: @type pointer of object containing ptr */ #ifndef container_of #define container_of(ptr, type, member) \ __extension__({ \ const __typeof__(((type *) 0)->member) *__pmember = (ptr); \ (type *) ((char *) __pmember - offsetof(type, member)); \ }) #endif ``` - **struct list_head** ```c= /** * struct list_head - Head and node of a doubly-linked list * @prev: pointer to the previous node in the list * @next: pointer to the next node in the list */ struct list_head { struct list_head *prev, *next; }; ``` - **LIST_HEAD** ```c= /** * LIST_HEAD - Declare list head and initialize it * @head: name of the new object */ #define LIST_HEAD(head) struct list_head head = {&(head), &(head)} ``` - **INIT_LIST_HEAD** ```c= /** * INIT_LIST_HEAD() - Initialize empty list head * @head: pointer to list head */ static inline void INIT_LIST_HEAD(struct list_head *head) { head->next = head; head->prev = head; } ``` - **list_add_tail** ```c= /** * list_add_tail() - Add a list node to the end of the list * @node: pointer to the new node * @head: pointer to the head of the list */ static inline void list_add_tail(struct list_head *node, struct list_head *head) { struct list_head *prev = head->prev; prev->next = node; node->next = head; node->prev = prev; head->prev = node; } ``` - 這個函式將 node 放在 head 所指向的 tail of list 位置。 - 示意圖如下： ```graphviz digraph foo { rankdir=LR; node [shape=record]; a [label="prev of head"] c [label="head"]; a:ref:c -> c:data [ dir=both]; } ``` ```graphviz digraph foo { rankdir=LR; node [shape=record]; a [label="prev of head"] b [label="node"]; c [label="head"]; a:ref:c -> b:data [ dir=both]; b:ref:c -> c:data [ dir=both]; } ``` - **list_del** ```c= /** * list_del() - Remove a list node from the list * @node: pointer to the node */ static inline void list_del(struct list_head *node) { struct list_head *next = node->next, *prev = node->prev; next->prev = prev; prev->next = next; } ``` - 這個函式將 node 從 list 中脫離。 - 示意圖如下： ```graphviz digraph foo { rankdir=LR; node [shape=record]; a [label="prev of head"] b [label="node"]; c [label="head"]; d [label="other nodes"]; e [label="other nodes"]; a:ref:c -> b:data [ dir=both]; b:ref:c -> c:data [ dir=both]; d:ref:c -> a:data [ dir=both]; c:ref:c -> e:data [ dir=both]; } ``` ```graphviz digraph foo { rankdir=LR; node [shape=record]; a [label="prev of head"] c [label="head"]; d [label="other nodes"]; e [label="other nodes"]; a:ref:c -> c:data [ dir=both]; d:ref:c -> a:data [ dir=both]; c:ref:c -> e:data [ dir=both]; } ``` - **list_empty** ```c= /** * list_empty() - Check if list head has no nodes attached * @head: pointer to the head of the list */ static inline int list_empty(const struct list_head *head) { return (head->next == head); } ``` - **list_is_singular** ```c= /** * list_is_singular() - Check if list head has exactly one node attached * @head: pointer to the head of the list */ static inline int list_is_singular(const struct list_head *head) { return (!list_empty(head) && head->prev == head->next); } ``` - **list_splice_tail** ```c= /** * list_splice_tail() - Add list nodes from a list to end of another list * @list: pointer to the head of the list with the node entries * @head: pointer to the head of the list */ static inline void list_splice_tail(struct list_head *list, struct list_head *head) { struct list_head *head_last = head->prev; struct list_head *list_first = list->next, *list_last = list->prev; if (list_empty(list)) return; head->prev = list_last; list_last->next = head; list_first->prev = head_last; head_last->next = list_first; } ``` - 這個函式將 `list` 接到 `head` 所指 list 的末端。 - 示意圖如下： ```graphviz digraph foo { rankdir=LR; node [shape=record]; a [label="prev of head"] b [label="head"]; c [label="prev of list"]; d [label="list"]; e [label="next of list"]; f [label="other nodes"]; g [label="other nodes"]; h [label="other nodes"]; i [label="other nodes"]; a:ref:c -> b:data [ dir=both]; b:ref:c -> g:data [ dir=both]; f:ref:c -> a:data [ dir=both]; h:ref:c -> c:data [ dir=both]; c:ref:c -> d:data [ dir=both]; d:ref:c -> e:data [ dir=both]; e:ref:c -> i:data [ dir=both]; } ``` ```graphviz digraph foo { rankdir=LR; node [shape=record]; a [label="prev of head"] b [label="head"]; c [label="prev of list"]; d [label="list"]; e [label="next of list"]; f [label="other nodes"]; g [label="other nodes"]; h [label="other nodes"]; g:ref:c -> c:data [ dir=both]; c:ref:c -> b:data [ dir=both]; b:ref:c -> f:data [ dir=both]; f:ref:c -> a:data [ dir=both]; a:ref:c -> e:data [ dir=both]; e:ref:c -> h:data [ dir=both]; d->c; d->e; } ``` - **list_cut_position** ```c= /** * list_cut_position() - Move beginning of a list to another list * @head_to: pointer to the head of the list which receives nodes * @head_from: pointer to the head of the list * @node: pointer to the node in which defines the cutting point */ static inline void list_cut_position(struct list_head *head_to, struct list_head *head_from, struct list_head *node) { struct list_head *head_from_first = head_from->next; if (list_empty(head_from)) return; if (head_from == node) { INIT_LIST_HEAD(head_to); return; } head_from->next = node->next; head_from->next->prev = head_from; head_to->prev = node; node->next = head_to; head_to->next = head_from_first; head_to->next->prev = head_to; } ``` - 這個函式將 list 從 node 處分割成兩個 sublist 。 - 示意圖如下： ```graphviz digraph foo { rankdir=LR; node [shape=record]; a [label="head_to"] b [label="head_from"]; c [label="head_from_first"]; d [label="node"]; e [label="next of node"]; g [label="other nodes"]; h [label="other nodes"]; i [label="other nodes"]; b:ref:c -> c:data [ dir=both]; c:ref:c -> g:data [ dir=both]; g:ref:c -> d:data [ dir=both]; d:ref:c -> e:data [ dir=both]; e:ref:c -> h:data [ dir=both]; a:ref:c -> i:data [ dir=both]; } ``` ```graphviz digraph foo { rankdir=LR; node [shape=record]; a [label="head_to"] b [label="head_from"]; c [label="head_from_first"]; d [label="node"]; e [label="next of node"]; g [label="other nodes"]; i [label="other nodes"]; j [label="other nodes"]; g:ref:c -> d:data [ dir=both]; e:ref:c -> i:data [ dir=both]; b:ref:c -> e:data [ dir=both]; d:ref:c -> a:data [ dir=both]; a:ref:c -> c:data [ dir=both]; c:ref:c -> j:data [ dir=both]; } ``` - **list_entry** ```c= /** * list_entry() - Calculate address of entry that contains list node * @node: pointer to list node * @type: type of the entry containing the list node * @member: name of the list_head member variable in struct @type */ #define list_entry(node, type, member) container_of(node, type, member) ``` - **list_first_entry** ```c= /** * list_first_entry() - get first entry of the list * @head: pointer to the head of the list * @type: type of the entry containing the list node * @member: name of the list_head member variable in struct @type */ #define list_first_entry(head, type, member) \ list_entry((head)->next, type, member) ``` - **list_for_each** ```c= /** * list_for_each - iterate over list nodes * @node: list_head pointer used as iterator * @head: pointer to the head of the list */ #define list_for_each(node, head) \ for (node = (head)->next; node != (head); node = node->next) ``` #### `merge_sort.c` ```c= typedef struct __element { char *value; struct __element *next; struct list_head list; } list_ele_t; ``` ```c= typedef struct { list_ele_t *head; /* Linked list of elements */ list_ele_t *tail; size_t size; struct list_head list; } queue_t; ``` ##### `get_middle` 的目的是找到一個近乎可以將 `list` 等分的 node 。 - **get_middle** ```c= static list_ele_t *get_middle(struct list_head *list) { struct list_head *fast = list->next, *slow; list_for_each (slow, list) { if (fast->next == list || fast->next->next == list) break; fast = fast->next->next; } return list_entry(slow, list_ele_t, list); } ``` - `list_head *fast = list->next` 讓 `fast` 指向鏈接list 的第一（`*next`）個 `struct list_head` 。 - `list_for_each (slow, list)` 是將 `slow` 輪流照順序指向每個 `struct list_head` 的迴圈。 - `if (fast->next == list || fast->next->next == list) break;` 是讓 `fast` 再回到 `list` 的前一步或前兩步停下。 - `fast = fast->next->next;` 使 `fast` 在每次迴圈中前進兩步，而`slow` 則前進一步。 - `return list_entry(slow, list_ele_t, list);`反還 `slow` 下一個（`next`）`list_ele_t` 的位置。因為迴圈執行完時 `fast` 約略在整個 list 的末端，而 `slow` 在 `fast` 與起始位置中間一半的位置，因此可以得到一個約略將 list 等分的位置。 - 在等分時可能會有下面兩種情況： - node 的個數是奇數，迴圈終止在 `fast->next == list` 。 ```graphviz digraph doubly_linked_list { rankdir=LR; node [shape=record]; a [label="{ <data> list | <ref> }", width=1.2] b [label="{ <data> node 1 | <ref> }"]; c [label="{ <data> node 2 | <ref> }"]; d [label="{ <data> node 3 | <ref> }", width=1.2] e [label="{ <data> node 4 | <ref> }"]; f [label="{ <data> node 5 | <ref> }"]; g [label="fast_step_1",shape= none]; h [label="fast_step_2",shape= none]; i [label="fast_step_3",shape= none]; j [label="slow_step_1",shape= none]; k [label="slow_step_2",shape= none]; l [label="slow_step_3",shape= none]; a:ref:c -> b:data [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false, arrowsize=1.2]; b:ref:c -> c:data [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false]; c:ref:c -> d [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false]; d:ref:c -> e [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false]; e:ref:c -> f [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false]; f:ref:c -> a [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false]; g->b; h->d; i->f; j->b; k->c; l->e; } ``` - node 的個數是偶數，迴圈終止在 `fast->next->next == list` 。 ```graphviz digraph doubly_linked_list { rankdir=LR; node [shape=record]; a [label="{ <data> list | <ref> }", width=1.2] b [label="{ <data> node 1 | <ref> }"]; c [label="{ <data> node 2 | <ref> }"]; d [label="{ <data> node 3 | <ref> }", width=1.2] e [label="{ <data> node 4 | <ref> }"]; f [label="{ <data> node 5 | <ref> }"]; m [label="{ <data> node 6 | <ref> }"]; g [label="fast_step_1",shape= none]; h [label="fast_step_2",shape= none]; i [label="fast_step_3",shape= none]; j [label="slow_step_1",shape= none]; k [label="slow_step_2",shape= none]; l [label="slow_step_3",shape= none]; a:ref:c -> b:data [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false, arrowsize=1.2]; b:ref:c -> c:data [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false]; c:ref:c -> d [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false]; d:ref:c -> e [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false]; e:ref:c -> f [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false]; f:ref:c -> m [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false]; m:ref:c -> a [arrowhead=vee, arrowtail=dot, dir=both, tailclip=false]; g->b; h->d; i->f; j->b; k->c; l->e; } ``` - **list_merge** ```c= static void list_merge(struct list_head *lhs, struct list_head *rhs, struct list_head *head) { INIT_LIST_HEAD(head); if (list_empty(lhs)) { list_splice_tail(lhs, head); return; } if (list_empty(rhs)) { list_splice_tail(rhs, head); return; } while (!list_empty(lhs) && !list_empty(rhs)) { char *lv = list_entry(lhs->next, list_ele_t, list)->value; char *rv = list_entry(rhs->next, list_ele_t, list)->value; struct list_head *tmp = strcmp(lv, rv) <= 0 ? lhs->next : rhs->next; list_del(tmp); list_add_tail(tmp, head); } list_splice_tail(list_empty(lhs) ? rhs : lhs, head); } ``` - `INIT_LIST_HEAD(head);` 將 `head` 初始化，`head->next = head; head->prev = head;`。 - `if (list_empty(lhs))` ，判斷如果 `lhs` 並沒有鏈接任何節點，就直接用 `list_splice_tail(lhs, head);` 將 `lhs` 整串接在 `head` 上，不需要 merge。`if (list_empty(rhs))` 則剛好邏輯顛倒。 :::danger 記得檢查是否有錯誤! ```c= if (list_empty(lhs)) { list_splice_tail(rhs, head); return; } if (list_empty(rhs)) { list_splice_tail(lhs, head); return; } ``` ::: - `while (!list_empty(lhs) && !list_empty(rhs))` ，當 `lhs, rhs` 皆不為空時執行迴圈。 - `char *lv = list_entry(lhs->next, list_ele_t, list)->value;` ，使 `lv` 指向 `lhs` 的第一個（`next`）`list_ele_t`。 - `char *rv = list_entry(rhs->next, list_ele_t, list)->value;` ，使 `rv` 指向 `rhs` 的第一個（`next`）`list_ele_t`。 - `struct list_head *tmp = strcmp(lv, rv) <= 0 ? lhs->next : rhs->next;` 讓 `tmp` 指向比較小（strcmp的比較方法）的第一個（`next`）`list_ele_t`。 - `list_del(tmp);list_add_tail(tmp, head);` 將 `tmp` 從原本鏈結中移除，並接在 `head` 的末端（tail）。 - **list_merge_sort** ```c= void list_merge_sort(queue_t *q) { if (list_is_singular(&q->list)) return; queue_t left; struct list_head sorted; INIT_LIST_HEAD(&left.list); list_cut_position(&left.list, &q->list, &get_middle(&q->list)->list); list_merge_sort(&left); list_merge_sort(q); list_merge(&left.list, &q->list, &sorted); INIT_LIST_HEAD(&q->list); list_splice_tail(&sorted, &q->list); } ``` - `if (list_is_singular(&q->list)) return;` 當 `list` 只有唯一一個 element，不需要 sort ，直接 return。 - `list_cut_position(&left.list, &q->list, &get_middle(&q->list)->list);` ，把從` &get_middle(&q->list)->list` 分割後的鏈結串到 `&left.list` 上。 - `list_merge_sort(&left); list_merge_sort(q);` 分別對 `&left` 與 `q` 再做 `list_merge_sort` ，遞迴下去。 - `list_merge(&left.list, &q->list, &sorted);` 把 sort 好的 `&left.list` 與 `&q->list` 串在 `&sorted` 上。 - `INIT_LIST_HEAD(&q->list);` 已初始化清除所有 `q->list` 的鏈結。 - `list_splice_tail(&sorted, &q->list);` 把 `sorted` 串在 `q->list` 上。 - **validate** ```c= static bool validate(queue_t *q) { struct list_head *node; list_for_each (node, &q->list) { if (node->next == &q->list) break; if (strcmp(list_entry(node, list_ele_t, list)->value, list_entry(node->next, list_ele_t, list)->value) > 0) return false; } return true; } ``` - 逐一檢查每個 node 的 `value` 是否由小到大排列。 - 如果全部由小到大排列， `return true` ; 當一檢查到兩個 node 先大再小，立刻 `return false` 。 - **q_new** ```c= static queue_t *q_new() { queue_t *q = malloc(sizeof(queue_t)); if (!q) return NULL; q->head = q->tail = NULL; q->size = 0; INIT_LIST_HEAD(&q->list); return q; } ``` - 建立一的新的 queue_t *q ，做記憶體配置並初始化。 - **q_free** ```c= static void q_free(queue_t *q) { if (!q) return; list_ele_t *current = q->head; while (current) { list_ele_t *tmp = current; current = current->next; free(tmp->value); free(tmp); } free(q); } ``` - 釋放整個 `queue_t *q` 所鏈結到的記憶體。 - **q_insert_head** ```c= bool q_insert_head(queue_t *q, char *s) { if (!q) return false; list_ele_t *newh = malloc(sizeof(list_ele_t)); if (!newh) return false; char *new_value = strdup(s); if (!new_value) { free(newh); return false; } newh->value = new_value; newh->next = q->head; q->head = newh; if (q->size == 0) q->tail = newh; q->size++; list_add_tail(&newh->list, &q->list); return true; } ``` - 新建一個 `list_ele_t *newh` ，放置在 `q->list` 的 `tail` 。 - **main** ```c= int main(void) { FILE *fp = fopen("cities.txt", "r"); if (!fp) { perror("failed to open cities.txt"); exit(EXIT_FAILURE); } queue_t *q = q_new(); char buf[256]; while (fgets(buf, 256, fp)) q_insert_head(q, buf); fclose(fp); list_merge_sort(q); assert(validate(q)); q_free(q); return 0; } ``` - `FILE *fp = fopen("cities.txt", "r");` 開啟這個內含世界超過 9 萬個都市的名稱的檔案 `cities.txt`。 - `if (!fp)` ，如果開啟失敗， `fp = NULL` ，開始處理錯誤訊息。 - `queue_t *q = q_new();` 建立一個新的 queue。 - `fgets(buf, 256, fp)` ，用來讀取最大長度 256 的字串（至 `'\0'` 或 `EOF` 停止）。 - `q_insert_head(q, buf);` 將字串插入 queue 的最前端。 - `fclose(fp);` 關閉檔案。 - `list_merge_sort(q);` 對 queue 做 merge sort 。 - `assert(validate(q));` 檢查是否 sort 成功。 - `q_free(q);` 釋放 queue 的記憶體。 ### 延伸問題 1. 解釋上述程式碼運作原理，指出改進空間並著手實作 2. 研讀 Linux 核心的 [lib/list_sort.c](https://github.com/torvalds/linux/blob/master/lib/list_sort.c) 原始程式碼，學習 [sysprog21/linux-list](https://github.com/sysprog21/linux-list) 的手法，將 [lib/list_sort.c](https://github.com/torvalds/linux/blob/master/lib/list_sort.c) 的實作抽離為可單獨執行 (standalone) 的使用層級應用程式，並設計效能評比的程式碼，說明 Linux 核心的 [lib/list_sort.c](https://github.com/torvalds/linux/blob/master/lib/list_sort.c) 最佳化手法 3. 將你在 quiz1 提出的改進實作，和 Linux 核心的 [lib/list_sort.c](https://github.com/torvalds/linux/blob/master/lib/list_sort.c) 進行效能比較，需要提出涵蓋足夠廣泛的 benchmark ## 測驗二 ### 重新作答 ``` X = ? (b) 2 Y = ? (d) 4 Z = ? (h) 8 ``` 上面選項的在下面的**程式碼運作原理**中解釋。 ### 程式碼運作原理下面的函式 `func` 接受一個 16 位元無號整數 N ，並回傳小於或等於 N 的 power-of-2 。 ```c= uint16_t func(uint16_t N) { /* change all right side bits to 1 */ N |= N >> 1; N |= N >> 2; N |= N >> 4; N |= N >> 8; return (N + 1) >> 1; } ``` #### 以 `func(512)` 得到 512 為例這是一個 N = 512 的例子，已知小於或等於 N 的 power-of-2 是 512 ，為了分析函式中的 `>>8` 所以選擇了一個最左邊的 1 在第九位的數。下面將整函數每一步的 bit-wise 操作列出。 ``` 0000000100000000 0000000010000000 0000000110000000 0000000001100000 0000000111100000 0000000000011110 0000000111111110 0000000000000001 0000000111111111 0000001000000000 0000000100000000 ``` 從結果往回看，因為要求的是小於或等於 N 的 power-of-2 ，根據二位元表示法特性，當只有一個 1 存在，而其餘為零，那個數就一定屬於 power-of-2 。可能會有兩中情況： 1. N 是 power-of-2。 2. 要找最大小於 N 的 power-of-2。無論哪一種情況，答案都是只有 N 的 MSB 為 1 的二元排列，再來就研究如何只留下原來最左邊的 1 為 1。要達到只有只留下原來最左邊的 1 為 1，程式中的做法是將原來最左邊的 1 與小於原來最左邊的 1 的位元都設為 1 ，其餘設為 0 ，靠著 +1 留下唯一一個 1 在原先原來最左邊的 1 的左邊一位，接個在做 `>>1` ，就可以得到我們要的結果，留下只有 N 原來的的原來最左邊的 1 為 1 的二元排列。過程如下： ``` 0000000111111111 0000001000000000 0000000100000000 ``` 再來要分析如何達成將原來最左邊的 1 與小於 MSB 的位元都設為原來最左邊的 1 的操作。 1. `N |= N >> 1;` 這一步目的是為了讓原來最左邊的 1 與其右邊一位都變成 1 。 ``` 0000000100000000 N 0000000010000000 N >> 1 0000000110000000 N |= N >> 1 ``` 2. `N |= N >> 2;` 上一步確保原來最左邊的 1 與其右邊一位都為 1 ，所以這一步是讓原來最左邊的 1 與其右邊 3 位都為 1 。 ``` 0000000110000000 N 0000000001100000 N >> 2 0000000111100000 N |= N >> 2 ``` 3. `N |= N >> 4;` 上一步確保原來最左邊的 1 與其右邊 3 位都為 1 ，所以這一步是讓原來最左邊的 1 與其右邊 7 位都為 1 。 ``` 0000000111100000 N 0000000000011110 N >> 4 0000000111111110 N |= N >> 4 ``` 4. `N |= N >> 8;` 上一步確保原來最左邊的 1 與其右邊 7 位都為 1 ，所以這一步是讓原來最左邊的 1 與其右邊 15 位都為 1 。該範圍可以涵蓋整個 `int16_t` 的範圍。 ``` 0000000111111110 N 0000000000000001 N >> 8 0000000111111111 N |= N >> 8 ``` 上面的方法同樣一可以運用在 N 不是 power-of-2 的情況，因為上面只用到原來最左邊的 1 為 1 的性質。另外因為 `N = 0` 的答案也會是 0 所以也適用上面的方法。 ### 延伸問題 #### 在 [The Linux Kernel API](https://www.kernel.org/doc/html/latest/core-api/kernel-api.html) 頁面搜尋 “power of 2”，可見 is_power_of_2，查閱 Linux 核心原始程式碼，舉例說明其作用和考量（特別是 round up/down power of 2特別留意 __roundup_pow_of_two 和 __rounddown_pow_of_two 的實作機制）原始碼來自 [torvalds/linux/include/linux/log2.h](https://github.com/torvalds/linux/blob/f986e350833347cb605d9d1ed517325c9a97808d/include/linux/log2.h) ##### `is_power_of_2` ```c= /** * is_power_of_2() - check if a value is a power of two * @n: the value to check * * Determine whether some value is a power of two, where zero is * *not* considered a power of two. * Return: true if @n is a power of 2, otherwise false. */ static inline __attribute__((const)) bool is_power_of_2(unsigned long n) { return (n != 0 && ((n & (n - 1)) == 0)); } ``` - 如果有一個數是 power of 2 那當寫成二進制時，一定只會有一個 1 存在其位元組中。 - 如果 n 滿足上面條件，假設 n = 11000 ，則 n-1 = 10111 ，可以觀察到最低為 1 的位元會歸 0 ，而其右邊原先為 0 的位元會變 1，其左邊的位元則都不變。 - 將上面的例子帶入 `n & (n - 1)` ，會得到 `11000＆10111 = 10000` ，可以發現原先最低為 1 的位元與其右邊位元全部歸零。 - 藉由 `(n & (n - 1)) == 0` 可以得知 n 是否只有一個 1 存在其位元組中，也就是 n 是否為 power or 2 (除了 0)。 - `n != 0 && ((n & (n - 1)) == 0)` ，排除 `n != 0` 的情況。 ```c= /* include/linux/bitops.h */ static inline unsigned fls_long(unsigned long l) { if (sizeof(l) == 4) return fls(l); return fls64(l); } ``` ```c= /* linux/arch/s390/include/asm/bitops.h */ /** * fls64 - find last set bit in a 64-bit word * @word: the word to search * * This is defined in a similar way as the libc and compiler builtin * ffsll, but returns the position of the most significant set bit. * * fls64(value) returns 0 if value is 0 or the position of the last * set bit if value is nonzero. The last (most significant) bit is * at position 64. */ static inline int fls64(unsigned long word) { unsigned long mask = 2 * BITS_PER_LONG - 1; return (1 + (__flogr(word) ^ (BITS_PER_LONG - 1))) & mask; } ``` ```c= /* linux/arch/openrisc/include/asm/bitops/fls.h */ static inline int fls(unsigned int x) { int ret; __asm__ ("l.fl1 %0,%1" : "=r" (ret) : "r" (x)); return ret; } ``` > The fls(), flsl() and flsll() functions find the last (most significant) bit set in value and return the index of that bit. Bits are numbered starting at 1, the least significant bit. A return value of zero from any of these functions means that the argument was zero. ##### `__roundup_pow_of_two` ```c= /** * __roundup_pow_of_two() - round up to nearest power of two * @n: value to round up */ static inline __attribute__((const)) unsigned long __roundup_pow_of_two(unsigned long n) { return 1UL << fls_long(n - 1); } ``` - `1UL` 表示 `unsigned long 1` 。 - 如果 n = 0101 ， n - 1 = 0100 ，執行 `fls_long(n - 1);` 可以得到為 1 的最高位元的 index ，在這個例子中是 3 。 - 假設 UL1 = 0001 (位元較少較好解釋) ，執行 `1UL << fls_long(n - 1)` 等同於 `0001 << 3 = 0100` 。 ##### `__rounddown_pow_of_two` ```c= /** * __rounddown_pow_of_two() - round down to nearest power of two * @n: value to round down */ static inline __attribute__((const)) unsigned long __rounddown_pow_of_two(unsigned long n) { return 1UL << (fls_long(n) - 1); } ``` - `1UL` 表示 `unsigned long 1` 。 - - 如果 n = 0101 ，執行 `fls_long(n);` 可以得到為 1 的最高位元的 index ，在這個例子中是 3 。 - 假設 UL1 = 0001 (位元較少較好解釋) ，執行 `1UL << (fls_long(n) - 1)` 等同於 `0001 << 2 = 0010` 。 #### 研讀 [slab allocator](https://en.wikipedia.org/wiki/Slab_allocation)，探索 Linux 核心的 slab 實作，找出運用 power-of-2 的程式碼和解讀 :::info 待完成在 [linux/include/linux/slab.h](https://github.com/torvalds/linux/blob/d310ec03a34e92a77302edb804f7d68ee4f01ba0/include/linux/slab.h) 有使用到 power of 2 應用的片斷如下： ```c= /* * Figure out which kmalloc slab an allocation of a certain size * belongs to. * 0 = zero alloc * 1 = 65 .. 96 bytes * 2 = 129 .. 192 bytes * n = 2^(n-1)+1 .. 2^n */ static __always_inline unsigned int kmalloc_index(size_t size) { if (!size) return 0; if (size <= KMALLOC_MIN_SIZE) return KMALLOC_SHIFT_LOW; if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) return 1; if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) return 2; if (size <= 8) return 3; if (size <= 16) return 4; if (size <= 32) return 5; if (size <= 64) return 6; if (size <= 128) return 7; if (size <= 256) return 8; if (size <= 512) return 9; if (size <= 1024) return 10; if (size <= 2 * 1024) return 11; if (size <= 4 * 1024) return 12; if (size <= 8 * 1024) return 13; if (size <= 16 * 1024) return 14; if (size <= 32 * 1024) return 15; if (size <= 64 * 1024) return 16; if (size <= 128 * 1024) return 17; if (size <= 256 * 1024) return 18; if (size <= 512 * 1024) return 19; if (size <= 1024 * 1024) return 20; if (size <= 2 * 1024 * 1024) return 21; if (size <= 4 * 1024 * 1024) return 22; if (size <= 8 * 1024 * 1024) return 23; if (size <= 16 * 1024 * 1024) return 24; if (size <= 32 * 1024 * 1024) return 25; if (size <= 64 * 1024 * 1024) return 26; BUG(); /* Will never be reached. Needed because the compiler may complain */ return -1; } ``` ::: ## 測驗三 ### 重新作答 ``` RRR = ? (a) data <<= read_lhs DDD = ? (b) mask |= write_mask[write_lhs + bitsize] ``` ### 程式碼運作原理 #### `bitcpy` ```c= void bitcpy(void *_dest, /* Address of the buffer to write to */ size_t _write, /* Bit offset to start writing to */ const void *_src, /* Address of the buffer to read from */ size_t _read, /* Bit offset to start reading from */ size_t count) ``` - _src: 長度為 8 個 uint8 陣列 (總共 64 位元)。注意: 其位元順序布局由每個位元組的 MSB (Most Significant Bit) 往上增加，如下圖一所示。 - _dest: 長度為 8 個 uint8 陣列 (總共 64 位元)，其位元順序布局如 input/_dest 所述。 - _write: 從 _dest 的第 _write 個位元開始寫入 _count 位元數。 - _read: 從 _src 的第 _read 個位元開始讀取 _count 位元數。 - count: 讀取/寫入的位元數。 ```c= size_t read_lhs = _read & 7; size_t read_rhs = 8 - read_lhs; const uint8_t *source = (const uint8_t *) _src + (_read / 8); size_t write_lhs = _write & 7; size_t write_rhs = 8 - write_lhs; uint8_t *dest = (uint8_t *) _dest + (_write / 8); ``` ```c= size_t read_lhs = _read & 7; size_t write_lhs = _write & 7; ``` - `_read & 7` 作用等同於 `_read % 8`；`_write & 7` 作用等同於 `_write % 8`。 - `read_lhs` 、 `write_lhs` 分別表示從起始位元組的 MSB 開始向右數的第幾位。 - 當 `read_lhs > 0` 或 `write_lhs > 0`，皆表示起始位元沒有對齊位元組的 MSB。 ```c= size_t read_rhs = 8 - read_lhs; size_t write_rhs = 8 - write_lhs; ``` - `read_rhs` 、 `write_rhs` 分別表示`_read` 、`_write` 從結束位元組的 MSB 開始向右數的第幾位。 ```c= const uint8_t *source = (const uint8_t *) _src + (_read / 8); uint8_t *dest = (uint8_t *) _dest + (_write / 8); ``` - `*source` 指向開始讀取的 byte 。 - `_read / 8` 從 _src 的第幾個 byte 開始讀取。 - `*dest` 指向開始寫入的 byte 。 - `_write / 8` 從 _dest 的第幾個 byte 開始寫入。 ```c= static const uint8_t write_mask[] = { 0xFF, /* == 0 11111111b */ 0x7F, /* == 1 01111111b */ 0x3F, /* == 2 00111111b */ 0x1F, /* == 3 00011111b */ 0x0F, /* == 4 00001111b */ 0x07, /* == 5 00000111b */ 0x03, /* == 6 00000011b */ 0x01, /* == 7 00000001b */ 0x00 /* == 8 00000000b */ }; ``` ```c= while (count > 0) { uint8_t data = *source++; size_t bitsize = (count > 8) ? 8 : count; ... count -= bitsize; } ``` - 由於是 `while (count > 0)` 的迴圈，如果要複製的字數為 0 ，則一開始就完全不執行。 - `uint8_t data = *source++;` 每次從 `source` 中抓取 8 個 bit 存在 `data`，下一次再取後沒取過的 8 個 bit 。 - `(count > 8)`判斷要讀取的資料有沒有超過一個 byte ，所以 `size_t bitsize = (count > 8) ? 8 : count;` 將 `bitsize` 設成在這個 byte中要讀取的 bite 數量。 - 透過迴圈最後的 `count -= bitsize;` 把 `count` 更新成之後還須要讀取的 bite 數量。 ```c= if (read_lhs > 0) { data <<= read_lhs; if (bitsize > read_rhs) data |= (*source >> read_rhs); ``` - `read_lhs > 0`，表示起始位元沒有對齊位元組的 MSB，因此執行 `data <<= read_lhs;` ，將 data 的位元向左移 `read_lhs` ，使得 data 的第一個位元變成要讀取的位元。 - 這時候如果 `bitsize > read_rhs` 表示這次（迴圈）所要讀取的資料的最右位置超過最後讀取的在一個 byte 中的位置，因為傷一部執行過 `data <<= read_lhs;` 會造成目前讀取的位元組的最右邊有 0 ，不是要讀取的部分。因而執行 `data |= (*source >> read_rhs);` ，將這些 0 的位置以下一個位元組的資料補齊，此時的`*source` 因為之前有執行 `uint8_t data = *source++;` ，所以是指在下一個位元組。 ```c= if (bitsize < 8) data &= read_mask[bitsize]; ``` - 唯有 `count > 8` 時才會有`bitsize < 8` 的情況，代表讀取到最後一個位元組了。 - `data &= read_mask[bitsize];` 這一步是為了把 `data` 位元組中超過最後要讀取的 bit （左邊數來的 `bitsize` 後）的其他bit 全部設為 0 。 - 下面是`read_mask` 的特徵： ```c= static const uint8_t read_mask[] = { 0x00, /* == 0 00000000b */ 0x80, /* == 1 10000000b */ 0xC0, /* == 2 11000000b */ 0xE0, /* == 3 11100000b */ 0xF0, /* == 4 11110000b */ 0xF8, /* == 5 11111000b */ 0xFC, /* == 6 11111100b */ 0xFE, /* == 7 11111110b */ 0xFF /* == 8 11111111b */ }; ``` ```c= uint8_t original = *dest; uint8_t mask = read_mask[write_lhs]; ``` - 用 `original` 儲存 `*dest` 指到的第一個位元組。 ```c= if (bitsize > write_rhs) { /* Cross multiple bytes */ *dest++ = (original & mask) | (data >> write_lhs); original = *dest & write_mask[bitsize - write_rhs]; *dest = original | (data << write_rhs); } else { // Since write_lhs + bitsize is never >= 8, no out-of-bound access. mask |= write_mask[write_lhs + bitsize]; *dest++ = (original & mask) | (data >> write_lhs); } ``` - `if (bitsize > write_rhs)` 是判斷要寫入的資料是否橫跨多個位元組。 - `bitsize > write_rhs`的情況： - `*dest++ = (original & mask) | (data >> write_lhs);` 中的 `data >> write_lhs` 是為了使得要被寫入的 `data` 對齊每次八個 bit 寫入的起始位置。而 `original & mask` 是把目前位元組中左邊數來第 `write_lhs` 前的位元全部設為 1 。 - 由於上一部執行過 `*dest++` ，所以目前 `*dest` 是指在下一個位元組， `original = *dest & write_mask[bitsize - write_rhs];` 中的 `bitsize - write_rhs` 是代表在`*dest` 指向的位元組中，左邊有幾個位元是這一個迴圈要寫入的，透過這行程式將要寫入位元歸 0 的結果存到 `original` 。 - `*dest = original | (data << write_rhs);` ，這一步是將 `data` 最尾端還沒被寫入的資料中的第一位對其`*dest` 指向的位元組，並與上一步歸 0 的結果合併，寫入 `*dest` 。 - `bitsize <= write_rhs`的情況： - `mask |= write_mask[write_lhs + bitsize];` ，等價於 `read_mask[write_lhs]|= write_mask[write_lhs + bitsize];` ，得到一個要寫入區段為 0 ，其餘為 1 的位元組。 - `*dest++ = (original & mask) | (data >> write_lhs);` 中 `original & mask` 是讓上一梯次寫入的資料（左邊數來第 `write_lhs` 個位元前）寫入 `mask` 在上一不歸 0 的左邊部分，而 `data >> write_lhs` 是將要寫入的資料對齊 `write_lhs` 的位置，最後一並寫入 `*dest` 。 ```c= static uint8_t output[8], input[8]; ``` - `output[]` 用來存 `bitcpy` 後的資料，`output[i]` 八個 bite 所以這個 array 一共 64 個 byte 大小。 - `input[]` 用來存要 `bitcpy` 的資料，`input[]` 八個 bite 所以這個 array 一共 64 個 byte 大小。 ```c= static inline void dump_8bits(uint8_t _data) { for (int i = 0; i < 8; ++i) printf("%d", (_data & (0x80 >> i)) ? 1 : 0); } ``` `static inline void dump_8bits(uint8_t _data)` 這個函數適用於 `print` 出一個 `uint8_t` 的 8 個 bit 。關鍵在於`(_data & (0x80 >> i)) ? 1 : 0` ，其中 `0x80` = 10000000 (binary) 。藉由 `0x80 >> i` ， i 的範圍 0~7 ，所以 for loop 中可以得到下面八種組合： ``` 10000000 0x80 >> 0 01000000 0x80 >> 1 00100000 0x80 >> 2 00010000 0x80 >> 3 00001000 0x80 >> 4 00000100 0x80 >> 5 00000010 0x80 >> 6 00000001 0x80 >> 7 ``` 舉例來說， `_data & (0x80 >> 1)` 等價於 `_data & 01000000` ，有下面兩種可能： **X 代表可以是 0 or 1 的位元** 1. `_data = X0XXXXXX`，`X0XXXXXX & 01000000 = 0` 3. `_data = X1XXXXXX`，`X1XXXXXX & 01000000 = 1` 透過以上方法，可以知道左邊數來第二位元是 0 or 1 。 ```c= static inline void dump_binary(uint8_t *_buffer, size_t _length) { for (int i = 0; i < _length; ++i) dump_8bits(*_buffer++); } ``` - `uint8_t *_buffer`：指向要印出來的 bytes 中的第一個 bytes 的第一個 bite 。 - `size_t _length` : 共有幾個要用`dump_8bits` 印出來的 bytes。 - 這個函數的目的是用 binary 的形式以 `dump_8bits` 印出從 `*_buffer` 開始的 `_length` 個 bytes。 ```c= int main(int _argc, char **_argv) { memset(&input[0], 0xFF, sizeof(input)); for (int i = 1; i <= 32; ++i) { for (int j = 0; j < 16; ++j) { for (int k = 0; k < 16; ++k) { memset(&output[0], 0x00, sizeof(output)); printf("%2d:%2d:%2d ", i, k, j); bitcpy(&output[0], k, &input[0], j, i); dump_binary(&output[0], 8); printf("\n"); } } } return 0; } ``` 1. `memset(&input[0], 0xFF, sizeof(input));` 因為 `0xFF = 11111111 (in binary)` 所以在經過 `memset` 後 `input[]`裡面的 bite 全部會變 1，用於初始化。 3. `memset(&output[0], 0x00, sizeof(output));` 因為 `0xFF = 00000000 (in binary)` 所以在經過 `memset` 後 `input[]`裡面的 bite 全部會變 0，用於初始化。 4. `printf("%2d:%2d:%2d ", i, k, j);` - `i`: - `k`: - `j`: 5. `bitcpy(&output[0], k, &input[0], j, i);` 將 `output[]` 中的 bites 拷貝到 `input[]` 中。 7. `dump_binary(&output[0], 8); printf("\n");` 用 binary 的形式以 `dump_8bits` 印出從 `&output[0]` 開始的 8 個 bytes。 ### 延伸問題 1. 解釋上述程式碼運作原理，並嘗試重寫為同樣功能但效率更高的實作 2. 在 Linux 核心原始程式碼找出逐 bit 進行資料複製的程式碼，並解說相對應的情境 ## 測驗四 ### 重新作答 ``` CCC = ? (c) s->hash_size ``` ### 程式碼運作原理 #### `cstr.h` - `#pragma once` :::info 補充：[Pragmas](https://gcc.gnu.org/onlinedocs/cpp/Pragmas.html) ::: ```c= enum { CSTR_PERMANENT = 1, CSTR_INTERNING = 2, CSTR_ONSTACK = 4, }; ``` - **CSTR_INTERNING_SIZE, CSTR_STACK_SIZE** ```c= #define CSTR_INTERNING_SIZE (32) #define CSTR_STACK_SIZE (128) ``` - **__cstr_data, cstring** ```c= typedef struct __cstr_data { char *cstr; uint32_t hash_size; uint16_t type; uint16_t ref; } * cstring; ``` - `__cstr_data` 是個存放 string inerning 必要資料的結構。 - `* cstring` 是一個另外宣告指向 `__cstr_data` 的指標。 - `*cstr` 指向 string 所存放的位置，將其擺放在 struct 的第一位可以讓 print `cstring` 時可以 print 出 `cstr` 。 - `hash_size` : - `type` 用於標記 string 所存放的狀態，共有三種情形，分別為： 1. `CSTR_ONSTACK`，string 存放於 stack。 2. `CSTR_PERMANENT` 3. `CSTR_INTERNING`，string 存放在可interning 的位置。 - `ref` : reference counting :::info Q: hash_size, ref 的用途？ ::: - **__cstr_buffer** ```c= typedef struct __cstr_buffer { cstring str; } cstr_buffer[1]; ``` - 用於存放一串 `cstring` 的 buffer。 :::info Q: 為什麼要建構一個結構只存放單一物件？為什麼要cstr_buffer[1] not cstr_buffer? ::: - **CSTR_S(s)** ```c= #define CSTR_S(s) ((s)->str) ``` - **CSTR_BUFFER** ```c= #define CSTR_BUFFER(var) \ char var##_cstring[CSTR_STACK_SIZE] = {0}; \ struct __cstr_data var##_cstr_data = {var##_cstring, 0, CSTR_ONSTACK, 0}; \ cstr_buffer var; \ var->str = &var##_cstr_data; ``` - `char var##_cstring[CSTR_STACK_SIZE] = {0};` 用到 [3.5 Concatenation](https://gcc.gnu.org/onlinedocs/cpp/Concatenation.html#Concatenation) 的技巧，創建一個名稱為 `VAR_cstring[128]` 的陣列，並初始化每個值為 0 。(VAR 表示變數 var 的值) - 建立一個名為 `VAR_cstr_data` 的 `struct __cstr_data` ，並做初始化 `{VAR_cstring, 0, CSTR_ONSTACK, 0};` ，等同於 `char *cstr = VAR_cstring; uint32_t hash_size = 0; uint16_t type = CSTR_ONSTACK; uint16_t ref = 0;` - `cstr_buffer var; var->str = &VAR_cstr_data;` ，宣告一個 `cstr_buffer VAR` ，指向剛才建立的 `VAR_cstr_data` 。 - **CSTR_LITERAL** ```c= #define CSTR_LITERAL(var, cstr) \ static cstring var = NULL; \ if (!var) { \ cstring tmp = cstr_clone("" cstr, (sizeof(cstr) / sizeof(char)) - 1); \ if (tmp->type == 0) { \ tmp->type = CSTR_PERMANENT; \ tmp->ref = 0; \ } \ if (!__sync_bool_compare_and_swap(&var, NULL, tmp)) { \ if (tmp->type == CSTR_PERMANENT) \ free(tmp); \ } \ } ``` - 用 macro 定義應該是為了使用 `var` 作為變數名稱。 :::info __sync_bool_compare_and_swap 的用途？ ::: - **CSTR_CLOSE** ```c= #define CSTR_CLOSE(var) \ do { \ if (!(var)->str->type) \ cstr_release((var)->str); \ } while (0) ``` - - **cstr_grab, cstr_clone, cstr_cat, cstr_equal, cstr_release** ```c= /* Public API */ cstring cstr_grab(cstring s); cstring cstr_clone(const char *cstr, size_t sz); cstring cstr_cat(cstr_buffer sb, const char *str); int cstr_equal(cstring a, cstring b); void cstr_release(cstring s); ``` #### `cstr.c` - **INTERNING_POOL_SIZE, HASH_START_SIZE** ```c= #define INTERNING_POOL_SIZE 1024 #define HASH_START_SIZE 16 /* must be power of 2 */ ``` - **struct __cstr_node** ```c= struct __cstr_node { char buffer[CSTR_INTERNING_SIZE]; struct __cstr_data str; struct __cstr_node *next; }; ``` - **struct __cstr_pool** ```c= struct __cstr_pool { struct __cstr_node node[INTERNING_POOL_SIZE]; }; ``` - 用於存放所有的 `struct __cstr_node` 。 - `INTERNING_POOL_SIZE` 是 interning pool 的最大容量。 - **__cstr_interning** ```c= struct __cstr_interning { int lock; int index; unsigned size; unsigned total; struct __cstr_node **hash; struct __cstr_pool *pool; }; static struct __cstr_interning __cstr_ctx; ``` - `lock`: 與 `CSTR_LOCK()` 、 `CSTR_UNLOCK()` 操作相關。 - `index`: - `size`: - `total`: - `hash`: - `pool` : - **CSTR_LOCK, CSTR_UNLOCK** ```c= /* FIXME: use C11 atomics */ #define CSTR_LOCK() \ ({ \ while (__sync_lock_test_and_set(&(__cstr_ctx.lock), 1)) { \ } \ }) #define CSTR_UNLOCK() ({ __sync_lock_release(&(__cstr_ctx.lock)); }) ``` >https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html > type __sync_lock_test_and_set (type *ptr, type value, ...) >This built-in function, as described by Intel, is not a traditional test-and-set operation, but rather an atomic exchange operation. It writes value into *ptr, and returns the previous contents of *ptr. Many targets have only minimal support for such locks, and do not support a full exchange operation. In this case, a target may support reduced functionality here by which the only valid value to store is the immediate constant 1. The exact value actually stored in *ptr is implementation defined. This built-in function is not a full barrier, but rather an acquire barrier. This means that references after the operation cannot move to (or be speculated to) before the operation, but previous memory stores may not be globally visible yet, and previous memory loads may not yet be satisfied. >void __sync_lock_release (type *ptr, ...) This built-in function releases the lock acquired by __sync_lock_test_and_set. Normally this means writing the constant 0 to *ptr. This built-in function is not a full barrier, but rather a release barrier. This means that all previous memory stores are globally visible, and all previous memory loads have been satisfied, but following memory reads are not prevented from being speculated to before the barrier. - **xalloc** ```c= static void *xalloc(size_t n) { void *m = malloc(n); if (!m) exit(-1); return m; } ``` - 配置 `size_t n` 大小的記憶體，如果不成功則 `exit(-1);` ；成功則返還記憶體位置。是由 `malloc` 包裝而成的函式。 - **insert_node** ```c= static inline void insert_node(struct __cstr_node **hash, int sz, struct __cstr_node *node) { uint32_t h = node->str.hash_size; int index = h & (sz - 1); node->next = hash[index]; hash[index] = node; } ``` - **expand** ```c= static void expand(struct __cstr_interning *si) { unsigned new_size = si->size * 2; if (new_size < HASH_START_SIZE) new_size = HASH_START_SIZE; struct __cstr_node **new_hash = xalloc(sizeof(struct __cstr_node *) * new_size); memset(new_hash, 0, sizeof(struct __cstr_node *) * new_size); for (unsigned i = 0; i < si->size; ++i) { struct __cstr_node *node = si->hash[i]; while (node) { struct __cstr_node *tmp = node->next; insert_node(new_hash, new_size, node); node = tmp; } } free(si->hash); si->hash = new_hash; si->size = new_size; } ``` - **interning** ```c= static cstring interning(struct __cstr_interning *si, const char *cstr, size_t sz, uint32_t hash) { if (!si->hash) return NULL; int index = (int) (hash & (si->size - 1)); struct __cstr_node *n = si->hash[index]; while (n) { if (n->str.hash_size == hash) { if (!strcmp(n->str.cstr, cstr)) return &n->str; } n = n->next; } // 80% (4/5) threshold if (si->total * 5 >= si->size * 4) return NULL; if (!si->pool) { si->pool = xalloc(sizeof(struct __cstr_pool)); si->index = 0; } n = &si->pool->node[si->index++]; memcpy(n->buffer, cstr, sz); n->buffer[sz] = 0; cstring cs = &n->str; cs->cstr = n->buffer; cs->hash_size = hash; cs->type = CSTR_INTERNING; cs->ref = 0; n->next = si->hash[index]; si->hash[index] = n; return cs; } ``` - `if (!si->hash) return NULL;` ，如果 `si` 沒有經過 hash 則不需要 interning 。 - `int index = (int) (hash & (si->size - 1));` ，因為 `x % power_of_2 == x & (power_of_2 - 1)` ，所以 `hash & (si->size - 1)` 等同於 `hash % si->size`（當 `si->size` 是 power_or_2）。 :::info 需要完成 - `struct __cstr_node *n = si->hash[index];` ::: - 當 `si->total` 為 `si->size` 的 80% 時， `return NULL` 。 ```c= // 80% (4/5) threshold if (si->total * 5 >= si->size * 4) return NULL; ``` - 如果 `si->pool = NULL` 還未指向任何記憶體，則使用 `xalloc` 指派一個 `struct __cstr_pool` 的儲存空間，並且初始化 `si->index` 為 0 。 ```c= if (!si->pool) { si->pool = xalloc(sizeof(struct __cstr_pool)); si->index = 0; } ``` :::info 研究中 ```c n = &si->pool->node[si->index++]; memcpy(n->buffer, cstr, sz); n->buffer[sz] = 0; cstring cs = &n->str; cs->cstr = n->buffer; cs->hash_size = hash; cs->type = CSTR_INTERNING; cs->ref = 0; n->next = si->hash[index]; si->hash[index] = n; return cs; ``` ::: - **cstr_interning** ```c= static cstring cstr_interning(const char *cstr, size_t sz, uint32_t hash) { cstring ret; CSTR_LOCK(); ret = interning(&__cstr_ctx, cstr, sz, hash); if (!ret) { expand(&__cstr_ctx); ret = interning(&__cstr_ctx, cstr, sz, hash); } ++__cstr_ctx.total; CSTR_UNLOCK(); return ret; } ``` :::info 需要搞懂： - `CSTR_LOCK();` 與 `({ \ while (__sync_lock_test_and_set(&(__cstr_ctx.lock), 1)) { \ } \ })` 等價。 - `CSTR_UNLOCK()` 與 `({ __sync_lock_release(&(__cstr_ctx.lock)); })` 等價。 ::: - **hash_blob** ```c= static inline uint32_t hash_blob(const char *buffer, size_t len) { const uint8_t *ptr = (const uint8_t *) buffer; size_t h = len; size_t step = (len >> 5) + 1; for (size_t i = len; i >= step; i -= step) h = h ^ ((h << 5) + (h >> 2) + ptr[i - 1]); return h == 0 ? 1 : h; } ``` - `const uint8_t *ptr = (const uint8_t *) buffer;` ，設一個 `const uint8_t *ptr` 指到 `buffer` 的前八個位元。 - `size_t h = len;` ，將 `len` 拷貝到 `h`。 - `size_t step = (len >> 5) + 1;` ，相當於 `len/32 + 1` 。 - `for (size_t i = len; i >= step; i -= step)`，這個迴圈約略會執行作多 32 次。 - `h = h ^ ((h << 5) + (h >> 2) + ptr[i - 1]);`，這行應該是 hash function ，但因為 `+ ptr[i - 1]` ，所以只會用到部分的 `*buffer` 在 hash 的過程中。 - `return h == 0 ? 1 : h;` 返回不等於 0 的 hash 結果。 - **cstr_clone** ```c= cstring cstr_clone(const char *cstr, size_t sz) { if (sz < CSTR_INTERNING_SIZE) return cstr_interning(cstr, sz, hash_blob(cstr, sz)); cstring p = xalloc(sizeof(struct __cstr_data) + sz + 1); if (!p) return NULL; void *ptr = (void *) (p + 1); p->cstr = ptr; p->type = 0; p->ref = 1; memcpy(ptr, cstr, sz); ((char *) ptr)[sz] = 0; p->hash_size = 0; return p; } ``` - **cstr_grab** ```c= cstring cstr_grab(cstring s) { if (s->type & (CSTR_PERMANENT | CSTR_INTERNING)) return s; if (s->type == CSTR_ONSTACK) return cstr_clone(s->cstr, s->hash_size); if (s->ref == 0) s->type = CSTR_PERMANENT; else __sync_add_and_fetch(&s->ref, 1); return s; } ``` - **cstr_release** ```c= void cstr_release(cstring s) { if (s->type || !s->ref) return; if (__sync_sub_and_fetch(&s->ref, 1) == 0) free(s); } ``` - **cstr_hash** ```c= static size_t cstr_hash(cstring s) { if (s->type == CSTR_ONSTACK) return hash_blob(s->cstr, s->hash_size); if (s->hash_size == 0) s->hash_size = hash_blob(s->cstr, strlen(s->cstr)); return s->hash_size; } ``` - **cstr_equal** ```c= int cstr_equal(cstring a, cstring b) { if (a == b) return 1; if ((a->type == CSTR_INTERNING) && (b->type == CSTR_INTERNING)) return 0; if ((a->type == CSTR_ONSTACK) && (b->type == CSTR_ONSTACK)) { if (a->hash_size != b->hash_size) return 0; return memcmp(a->cstr, b->cstr, a->hash_size) == 0; } uint32_t hasha = cstr_hash(a); uint32_t hashb = cstr_hash(b); if (hasha != hashb) return 0; return !strcmp(a->cstr, b->cstr); } ``` - **cstr_cat2** ```c= static cstring cstr_cat2(const char *a, const char *b) { size_t sa = strlen(a), sb = strlen(b); if (sa + sb < CSTR_INTERNING_SIZE) { char tmp[CSTR_INTERNING_SIZE]; memcpy(tmp, a, sa); memcpy(tmp + sa, b, sb); tmp[sa + sb] = 0; return cstr_interning(tmp, sa + sb, hash_blob(tmp, sa + sb)); } cstring p = xalloc(sizeof(struct __cstr_data) + sa + sb + 1); if (!p) return NULL; char *ptr = (char *) (p + 1); p->cstr = ptr; p->type = 0; p->ref = 1; memcpy(ptr, a, sa); memcpy(ptr + sa, b, sb); ptr[sa + sb] = 0; p->hash_size = 0; return p; } ``` - `size_t sa = strlen(a), sb = strlen(b);` ，以 `sa, sb` 紀錄 `a, b` 的字串長度。 :::info 這裡需要補齊。 - `if (sa + sb < CSTR_INTERNING_SIZE)`，`CSTR_INTERNING_SIZE = 32`。 - 建立一個 `char tmp[CSTR_INTERNING_SIZE];`。 - 以 `memcpy(tmp, a, sa); memcpy(tmp + sa, b, sb);` 依序將 `a. b` 寫入 `temp`。 - 最後在 `tmp[sa + sb]` 放 `'\0'` 。`return cstr_interning(tmp, sa + sb, hash_blob(tmp, sa + sb));` ::: - 當 sa + sb >= 32 時，才會有接下來的步驟。 - `cstring p = xalloc(sizeof(struct __cstr_data) + sa + sb + 1);` ，配置大小為 `sizeof(struct __cstr_data) + sa + sb + 1` 的記憶體給 `p` ，失敗的話 `xalloc` 內會 `exit(1);` 。 - `char *ptr = (char *) (p + 1);` ，建立一個 `char *ptr`使其指向 `p` 的 `struct __cstr_data` 儲存空間後的第一個位址。因為前面配置記憶體時有考慮到 `sa + sb + 1` ，所以不用另外配置記憶體給該字串。 - 做一些初始化 `p->cstr = ptr; p->type = 0; p->ref = 1;` 。 - `memcpy(ptr, a, sa); memcpy(ptr + sa, b, sb);` 依序拷貝字串 `a, b` 到 `ptr` 的空間。 - `ptr[sa + sb] = 0;` 在末端加入 `'\0'` 。 - `p->hash_size = 0;` - **cstr_cat** ```c= cstring cstr_cat(cstr_buffer sb, const char *str) { cstring s = sb->str; if (s->type == CSTR_ONSTACK) { int i = s->hash_size; while (i < CSTR_STACK_SIZE - 1) { s->cstr[i] = *str; if (*str == 0) return s; ++s->hash_size; ++str; ++i; } s->cstr[i] = 0; } cstring tmp = s; sb->str = cstr_cat2(tmp->cstr, str); cstr_release(tmp); return sb->str; } ``` - `cstring s = sb->str;` 將 `sb->str` 拷貝到 `cstring s` 。 - `if (s->type == CSTR_ONSTACK)` ，判斷如果 `s` 有在 stack 中就執行（`CSTR_ONSTACK` = 4）。 - `int i = s->hash_size;`，`hash_size` 代表目前 hash 過幾次，在 `CSTR_BUFFER` 中的初始值為 0。 - `while (i < CSTR_STACK_SIZE - 1)` ，判斷 stack 還沒到達 hash 量的上限，就執行迴圈。 - `s->cstr[i] = *str; ++s->hash_size;++str;++i;` 逐一將 str 的 char 存到 `s->cstr[i]` 注意 `i` 是從上一次 hash 後紀錄的 `hash_size` 開始，所以會避開複寫問題。 - `if (*str == 0) return s;` 當因為之前的 `++str;` 讓 `str = NULL` 時，終止回圈因為寫入結束。 - `cstring tmp = s;`，讓 `tmp` 留住 `s` 所指向的內容。 :::info 這裡需要補齊 - `sb->str = cstr_cat2(tmp->cstr, str);` ， - `cstr_release(tmp);` - `return sb->str;` ::: #### `main.c` - **cmp** ```c= static cstring cmp(cstring t) { CSTR_LITERAL(hello, "Hello string"); CSTR_BUFFER(ret); cstr_cat(ret, cstr_equal(hello, t) ? "equal" : "not equal"); return cstr_grab(CSTR_S(ret)); } ``` - **test_cstr** ```c= static void test_cstr() { CSTR_BUFFER(a); cstr_cat(a, "Hello "); cstr_cat(a, "string"); cstring b = cmp(CSTR_S(a)); printf("%s\n", b->cstr); CSTR_CLOSE(a); cstr_release(b); } ``` - **main** ```c= int main(int argc, char *argv[]) { test_cstr(); return 0; } ``` ### 延伸問題 1. 上述程式碼使用到 gcc atomics builtins，請透過 POSIX Thread 在 GNU/Linux 驗證多執行緒的環境中，string interning 能否符合預期地運作？若不能，請提出解決方案 2. chriso/intern 是個更好的 string interning 實作，請探討其特性和設計技巧，並透過內建的 benchmark 分析其效能表現