# 2021q1 Homework2 (quiz2) contributed by < `qwe661234` > ###### tags: `linux2021` # question 1 `queue element` ``` cpp typedef struct __element { char *value; struct __element *next; struct list_head list; } list_ele_t; ``` `queue` ``` cpp typedef struct { list_ele_t *head; /* Linked list of elements */ list_ele_t *tail; size_t size; struct list_head list; } queue_t; ``` `list head` ``` cpp struct list_head { struct list_head *prev, *next; }; ``` `container_of` ``` cpp #ifndef container_of #define container_of(ptr, type, member) \ __extension__({ \ const __typeof__(((type *) 0)->member) *__pmember = (ptr); \ (type *) ((char *) __pmember - offsetof(type, member)); \ }) #endif ``` ``` __extension__ ``` >GCC uses the __extension__ attribute when using the -ansi flag to avoid warnings in headers with GCC extensions. ``` __typeof__(((type *) 0)->member) ``` >ANSI C 標准允許變量0的常量被強制轉換成任何一種類型的指針,並且轉換的結果是個 NULL, 所以 (type *) 0 就是資料型別為 type *的 NULL 指針, 雖然取用 NULL 指針去指向 member 是非法的, 但前面加上 typeof 後, 編譯器不會去訪問 NULL->member, 而是單純去取 member 的資料型別 ``` offsetof(type, member) ``` >它的作用是獲取 struct 中某個成員相對於該 struct 首元素地址的偏移量 reference: * [C语言高级用法---typeof( ((type *)0)->member )和offset_of()](https://blog.csdn.net/rosetta/article/details/90746936) * [How to use __extension__ and __typeof__ in a minified example in C?](https://stackoverflow.com/questions/5323478/how-to-use-extension-and-typeof-in-a-minified-example-in-c) `list_entry` ``` cpp #define list_first_entry(head, type, member) \ list_entry((head)->next, type, member) ``` `list_add_tail function` ``` cpp static inline void list_add_tail(struct list_head *node, struct list_head *head) { struct list_head *prev = head->prev; prev->next = node; node->next = head; node->prev = prev; head->prev = node; } ``` <font color="blue">Black: original Red: after function</font> ```graphviz digraph structs { rankdir=LR node[shape="box"] edge [dir="both"] node0 [label="head"]; node1 [label="last_ele"]; node2 [label="first_ele"]; node3 [label="newnode"]; prev [label="prev" shape="none"]; { rank = "same"; prev->node1[dir="forward"]; } { node1 -> node0-> node2; node1 -> node3 -> node0-> node2 [color="red"]; } } ``` `list_del function` ``` cpp static inline void list_del(struct list_head *node) { struct list_head *next = node->next, *prev = node->prev; next->prev = prev; prev->next = next; } ``` <font color="blue">Black: original Red: after function</font> ```graphviz digraph structs { rankdir=LR node[shape="box"] edge [dir="both"] node0 [label="node"]; node1 [label="element"]; node2 [label="element"]; { node1 -> node0-> node2; node1 -> node2 [color="red"]; } } ``` `list_empty function` 檢查 list head 是否有連接 node ``` cpp static inline int list_empty(const struct list_head *head) { // point to self return (head->next == head); } ``` `list_is_singular function` 檢查 list head 後面是否只連接一個 node ``` cpp static inline int list_is_singular(const struct list_head *head) { // 1 or 2 return (!list_empty(head) && head->prev == head->next); } ``` `list_splice_tail function` 將所有以 list 為 list head 的 list node 接在以 head 為 list head 的最後一個 list node 之後 ``` cpp static inline void list_splice_tail(struct list_head *list, struct list_head *head) { // head_ last means tail struct list_head *head_last = head->prev; struct list_head *list_first = list->next, *list_last = list->prev; if (list_empty(list)) return; head->prev = list_last; list_last->next = head; list_first->prev = head_last; head_last->next = list_first; } ``` <font color="blue">Black: original Red: after function</font> ```graphviz digraph structs { rankdir=LR node[shape="box"] edge [dir="both"] node0 [label="list"]; node1 [label="list node 1\n (list first)"]; node2 [label="list node 2\n (list last)"]; hnode0 [label="head"]; hnode1 [label="head node 1"]; hnode2 [label="head node 2\n (head last)"]; { node0->node1->node2->node0; hnode0->hnode1->hnode2->hnode0; } { node2->hnode0 [color="red"]; hnode2->node1 [color="red"]; } } ``` Result ```graphviz digraph structs { rankdir=LR node[shape="box"] edge [dir="both"] node1 [label="list node 1\n (list first)"]; node2 [label="list node 2\n (list last)"]; hnode0 [label="head"]; hnode1 [label="head node 1"]; hnode2 [label="head node 2\n (head last)"]; { hnode0->hnode1->hnode2; hnode2->node1[color="red"]; node1->node2; node2->hnode0[color="red"]; } } ``` `list_cut_position` 將由 head_from 為list head 的第一個 list_node 至 node (parameter 中的 node) 接在 list head head_to 之後 ``` cpp static inline void list_cut_position(struct list_head *head_to, struct list_head *head_from, struct list_head *node) { struct list_head *head_from_first = head_from->next; if (list_empty(head_from)) return; if (head_from == node) { INIT_LIST_HEAD(head_to); return; } // begin to cut from headfirst to node, and list it to head_to head_from->next = node->next; head_from->next->prev = head_from; head_to->prev = node; node->next = head_to; head_to->next = head_from_first; head_to->next->prev = head_to; } ``` <font color="blue">Black: original Red: after function</font> ```graphviz digraph structs { rankdir=LR node[shape="box"] edge [dir="both"] node0 [label="head_from"]; node1 [label="node 1\n (head from first)"]; node2 [label="node 2\n (node)"]; node3 [label="node 3\n"]; node4 [label="node 4\n"]; hnode0 [label="head_to"]; { node0->node1->node2->node3->node4->node0; node0->node3[color="red"]; hnode0->node2[color="red"]; hnode0->node1[color="red"]; } } ``` Result ```graphviz digraph structs { rankdir=LR node[shape="box"] edge [dir="both"] node0 [label="head_from"]; node1 [label="node 1\n (head from first)"]; node2 [label="node 2\n (node)"]; node3 [label="node 3\n"]; node4 [label="node 4\n"]; hnode0 [label="head_to"]; { node3->node4->node0; node0->node3[color="red"]; hnode0->node2[color="red"]; node2->node1; hnode0->node1[color="red"]; } } ``` `get_middle function` 以 slow 一次走訪一格 list_node, fast 一次走訪兩格 list_node 的方式, 當fast 走訪一整個 list 將要回到 list head 時, slow 剛好會走到 list 的中間點 ``` cpp static list_ele_t *get_middle(struct list_head *list) { struct list_head *fast = list->next, *slow; list_for_each (slow, list) { /*用來判斷 fast 是否走訪整個 list 即將回到 list head*/ if (fast->next == list || fast->next->next == list) break; fast = fast->next->next; } return list_entry(slow, list_ele_t, list); } ``` :::success 重構`list_merge function` 和 `get_middle function`: 將`list_merge function` 和 `get_middle function` 合寫成一個 function, 因為 list_merge function 中的 head_from 就是 get_middle function 中的 list, 所以合寫後只須傳入兩個參數, 不需要再多傳入 middle node ::: ```cpp list_cut_position(&left.list, &q->list, &get_middle(&q->list)->list); ``` 變更為: ```cpp list_cut_middle(&left.list, &q->list); ``` ``` c static inline void list_cut_middle(struct list_head *head_to, struct list_head *head_from) { struct list_head *fast = head_from->next, *slow; struct list_head *head_from_first = head_from->next; if (list_empty(head_from)) return; // find middle list_for_each (slow, head_from) { if (fast->next == head_from || fast->next->next == head_from) break; fast = fast->next->next; } // begin to cut from headfirst to node, and list it to head_to head_from->next = slow->next; head_from->next->prev = head_from; head_to->prev = slow; slow->next = head_to; head_to->next = head_from_first; head_to->next->prev = head_to; } ``` `list_merge function` 將兩個 list 排序並合併, if 的部份是因為 get_middle 的機制, 如果切分時只有一個 list node, 那個 list node 會接在 lhs 上, 所以只要 lhs 沒有接任何 list node, 那 rhs 必然也沒有接任何 list node, 故可以直接結束 funciton ``` cpp static void list_merge(struct list_head *lhs, struct list_head *rhs, struct list_head *head) { INIT_LIST_HEAD(head); if (list_empty(lhs)) { list_splice_tail(lhs, head); return; } if (list_empty(rhs)) { list_splice_tail(rhs, head); return; } while (!list_empty(lhs) && !list_empty(rhs)) { char *lv = list_entry(lhs->next, list_ele_t, list)->value; char *rv = list_entry(rhs->next, list_ele_t, list)->value; struct list_head *tmp = strcmp(lv, rv) <= 0 ? lhs->next : rhs->next; list_del(tmp); list_add_tail(tmp, head); } list_splice_tail(list_empty(lhs) ? rhs : lhs, head); } ``` :::success 重構 `list_merge function`: 如果 rhs 為空就把 lhs 的 list node 接在 head 之後, lhs 為空則 rhs 必為空,所以行14即可實做, 且 list head 為空就會跳出迴圈, 無須一開始的兩個 if ::: ```cpp static void list_merge(struct list_head *lhs, struct list_head *rhs, struct list_head *head) { INIT_LIST_HEAD(head); while (!list_empty(lhs) && !list_empty(rhs)) { char *lv = list_entry(lhs->next, list_ele_t, list)->value; char *rv = list_entry(rhs->next, list_ele_t, list)->value; struct list_head *tmp = strcmp(lv, rv) <= 0 ? lhs->next : rhs->next; list_del(tmp); list_add_tail(tmp, head); } list_splice_tail(list_empty(lhs) ? rhs : lhs, head); } ``` # question 2 ``` cpp uint16_t func(uint16_t N) { /* change all right side bits to 1 */ N |= N >> 1; N |= N >> 2; N |= N >> 4; N |= N >> 8; return (N + 1) >> 1; } ``` Ex: N = $10000000000000_2$ = $16384_{10}$ 原理是確保16個 bit 都能變為1, 最後做+1是為了得到最接近且比 N 大的 power of two, 接著 >> 1 (也就是除以2), 就可以得到最接近且比 N 小的 power of two ``` N |= N >> 1 0100000000000000 0010000000000000 ---------------- 0110000000000000 N |= N >> 2; 0110000000000000 0001100000000000 ---------------- 0111100000000000 N |= N >> 4; 0111100000000000 0000011110000000 ---------------- 0111111110000000 N |= N >> 8; 0111111110000000 0000000000111111 ---------------- 0111111111111111 (N + 1) = 1000000000000000 (N + 1) >> 1 = 0100000000000000 ``` `is_power_of_2` ```cpp static inline __attribute__((const)) bool is_power_of_2(unsigned long n) { return (n != 0 && ((n & (n - 1)) == 0)); } ``` 原理: Ex n = $1010_2$, $1010_2$ & $1001_2$ = $1000_2$ != 0, 若 n = $1000_2$, $1000_2$ & $0111_2$ = 0 `roundup_pow_of_two` ``` cpp static inline __attribute__((const)) unsigned long __roundup_pow_of_two(unsigned long n) { return 1UL << fls_long(n - 1); } ``` `rounddown_pow_of_two` ``` cpp static inline __attribute__((const)) unsigned long __rounddown_pow_of_two(unsigned long n) { return 1UL << (fls_long(n) - 1); } ``` 1UL 是代表值為1的 unsigned long roundup_pow_of_two 和 rounddown_pow_of_two 都有使用到 `fls_long` fls 為 find last set bit 的縮寫,就是找到傳入參數的最靠近 MSB 且是1的 bit 是第幾個 bit 並回傳. 將 1UL << 回傳值即可得到比參數 n 大且最靠近的 pow_of_two => `__roundup_pow_of_two`, 將回傳值 - 1 再將 1UL << 回傳值即可得到比參數 n 小且最靠近的 pow_of_two => `__rounddown_pow_of_two`, 在執行 function 前會先考慮系統是64 bit 架構還是32 bit `fls_long` ``` cpp static inline unsigned fls_long(unsigned long l) { if (sizeof(l) == 4) return fls(l); return fls64(l); } ``` ``` cpp #if BITS_PER_LONG == 32 static __always_inline int fls64(__u64 x) { __u32 h = x >> 32; if (h) return fls(h) + 32; return fls(x); } #elif BITS_PER_LONG == 64 static __always_inline int fls64(__u64 x) { if (x == 0) return 0; return __fls(x) + 1; } ``` `fls` and `__fls` ``` cpp static inline __attribute__ ((const)) int __fls(unsigned long x) { if (!x) return 0; else return fls(x) - 1; } static inline __attribute__ ((const)) int fls(unsigned long x) { int n; asm volatile( " fls.f %0, %1 \n" /* 0:31; 0(Z) if src 0 */ " add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */ : "=r"(n) /* Early clobber not needed */ : "r"(x) : "cc"); return n; } ``` # question3 ``` cpp /*read from src and write to dest*/ void bitcpy(void *_dest, /* Address of the buffer to write to */ size_t _write, /* Bit offset to start writing to */ const void *_src, /* Address of the buffer to read from */ size_t _read, /* Bit offset to start reading from */ size_t count) { size_t read_lhs = _read & 7; size_t read_rhs = 8 - read_lhs; const uint8_t *source = (const uint8_t *) _src + (_read / 8); size_t write_lhs = _write & 7; size_t write_rhs = 8 - write_lhs; uint8_t *dest = (uint8_t *) _dest + (_write / 8); static const uint8_t read_mask[] = { 0x00, /* == 0 00000000b */ 0x80, /* == 1 10000000b */ 0xC0, /* == 2 11000000b */ 0xE0, /* == 3 11100000b */ 0xF0, /* == 4 11110000b */ 0xF8, /* == 5 11111000b */ 0xFC, /* == 6 11111100b */ 0xFE, /* == 7 11111110b */ 0xFF /* == 8 11111111b */ }; static const uint8_t write_mask[] = { 0xFF, /* == 0 11111111b */ 0x7F, /* == 1 01111111b */ 0x3F, /* == 2 00111111b */ 0x1F, /* == 3 00011111b */ 0x0F, /* == 4 00001111b */ 0x07, /* == 5 00000111b */ 0x03, /* == 6 00000011b */ 0x01, /* == 7 00000001b */ 0x00 /* == 8 00000000b */ }; while (count > 0) { uint8_t data = *source++; // how many bytes you want to write this round size_t bitsize = (count > 8) ? 8 : count; if (read_lhs > 0) { data <<= read_lhs; if (bitsize > read_rhs) // put the context in next bytes to data data |= (*source >> read_rhs); } // keep the number of bit in data you want to write if (bitsize < 8) data &= read_mask[bitsize]; uint8_t original = *dest; uint8_t mask = read_mask[write_lhs]; if (bitsize > write_rhs) { /* Cross multiple bytes */ // write first bytes *dest++ = (original & mask) | (data >> write_lhs); // keep the context of next bytes original = *dest & write_mask[bitsize - write_rhs]; // write next bytes *dest = original | (data << write_rhs); } else { // Since write_lhs + bitsize is never >= 8, no out-of-bound access. // keep context you do not write from tail mask |= write_mask[write_lhs + bitsize]; // write bytes *dest++ = (original & mask) | (data >> write_lhs); } count -= bitsize; } } ``` ``` cpp size_t read_lhs = _read & 7; size_t read_rhs = 8 - read_lhs; ``` _read & 7 與 _read % 8 同義, 用來看8 bit 中 read 的 lhs 有幾個 bit, 剩下 rhs 就是 8 - lhs Ex: _read = 4 ```graphviz digraph structs { rankdir=LR node[shape="record"] node0 [label="{lhs|lhs|lhs|lhs|lhs|rhs|rhs|rhs}"]; } ``` ``` cpp const uint8_t *source = (const uint8_t *) _src + (_read / 8); ``` _read / 8 是用來看要從 src 的第幾個 bytes 開始讀起 Ex: _read = 20 (20 / 8 = 2) 所以 source 會指向 src 的第2個 bytes ``` cpp static const uint8_t read_mask[] = { 0x00, /* == 0 00000000b */ 0x80, /* == 1 10000000b */ 0xC0, /* == 2 11000000b */ 0xE0, /* == 3 11100000b */ 0xF0, /* == 4 11110000b */ 0xF8, /* == 5 11111000b */ 0xFC, /* == 6 11111100b */ 0xFE, /* == 7 11111110b */ 0xFF /* == 8 11111111b */ }; static const uint8_t write_mask[] = { 0xFF, /* == 0 11111111b */ 0x7F, /* == 1 01111111b */ 0x3F, /* == 2 00111111b */ 0x1F, /* == 3 00011111b */ 0x0F, /* == 4 00001111b */ 0x07, /* == 5 00000111b */ 0x03, /* == 6 00000011b */ 0x01, /* == 7 00000001b */ 0x00 /* == 8 00000000b */ }; ``` read_mask 用來避免讀入不該讀的部份 Ex: 要讀入 bit 0 - 5, mask = 11111100b, 用來避免讀入 bit6, 7 write_mask 用來保留不須寫入的部份的原始內容 Ex: 要寫入 bit 0 - 5, mask = 00000011b, 用來保留 bit6, 7 的原始內容 ### 從 src 讀入 ``` cpp size_t bitsize = (count > 8) ? 8 : count; ``` bitsize 是用來看這一輪要寫入幾個 bits, 因為一輪只能最多寫8個 bits, Ex: count = 13, 那第一輪就寫入8個 bits, 第二輪再寫入5個 bits ``` cpp uint8_t data = *source++; // how many bytes you want to write this round size_t bitsize = (count > 8) ? 8 : count; if (read_lhs > 0) { data <<= read_lhs; if (bitsize > read_rhs) // put the context in next bytes to data data |= (*source >> read_rhs); } ``` read_lhs > 0 代表沒有對齊, Ex: lhs = 3, 代表要以 bit 2到下一個 bytes 的 bit 1組成8 bit 來處理 ```graphviz digraph structs { rankdir=LR node[shape="record"] node0 [label="{data2|data3|data4|data5|data6|data7|X|X}"]; text [label="data <<= read_lhs", shape = "none"] text1 [label="data 目前讀的 bytes, *source 是下一個 bytes", shape = "none"] } ``` ```graphviz digraph structs { rankdir=LR node[shape="record"] node0 [label="{X|X|X|X|X|X|source0|source1}"]; text [label="*source >> read_rhs", shape = "none"] } ``` ```graphviz digraph structs { rankdir=LR node[shape="record"] node0 [label="{data2|data3|data4|data5|data6|data7|source0|source1}"]; text [label="data |= (*source >> read_rhs)", shape = "none"] } ``` ``` cpp // keep the number of bit in data you want to write if (bitsize < 8) data &= read_mask[bitsize]; ``` bitsize 如果<8代表要用 read_mask 來避免讀入不該讀的部份 ### 寫入 dest ``` cpp uint8_t mask = read_mask[write_lhs]; if (bitsize > write_rhs) { /* Cross multiple bytes */ // write first bytes *dest++ = (original & mask) | (data >> write_lhs); // keep the context of next bytes original = *dest & write_mask[bitsize - write_rhs]; // write next bytes *dest = original | (data << write_rhs); } ``` write_lhs 是要保留不寫入的部份, write_rhs 是要寫入新 data 的部份, Ex: write_lhs = 2 ```graphviz digraph structs { rankdir=LR node[shape="record"] node0 [label="{original0|original1|X|X|X|X|X|X}"]; text [label="(original & mask)", shape = "none"] } ``` ```graphviz digraph structs { rankdir=LR node[shape="record"] node0 [label="{X|X|data2|data3|data4|data5|data6|data7}"]; text [label="(data >> write_lhs)", shape = "none"] } ``` ```graphviz digraph structs { rankdir=LR node[shape="record"] node0 [label="{original0|original1|data2|data3|data4|data5|data6|data7}"]; text [label="*dest++ = (original & mask) | (data >> write_lhs)", shape = "none"] } ``` ``` cpp original = *dest & write_mask[bitsize - write_rhs]; // write next bytes *dest = original | (data << write_rhs); ``` 這邊是把還沒寫完的 bit 寫入下一個 bytes, 一樣先把不寫入的部份做保留, 然後寫入要寫入的部份 ``` cpp while (count > 0) { ... count -= bitsize; } ``` 這邊是把寫完的 bit 扣掉, 如果 count 還>0 代表要繼續寫下一輪 ## 重構 一次copy 64個bit ```cpp void bitcpy(void *_dest, /* Address of the buffer to write to */ size_t _write, /* Bit offset to start writing to */ const void *_src, /* Address of the buffer to read from */ size_t _read, /* Bit offset to start reading from */ size_t count) { uint64_t data; const uint64_t *source = _src; uint64_t *dest = _dest; data = *source; data = data << _read; // data >> _read + (63 - (_read + count) + 1) data = data >> (64 - count); data = data << (64 - (_read + count)); if(_read > _write){ data = data << (_read - _write); }else{ data = data >> (_write - _read); } *dest |= data; } ``` # question4 cstr buffer ```cpp typedef struct __cstr_data { char *cstr; uint32_t hash_size; uint16_t type; uint16_t ref; // reference counter 有多少 cstring 引用這個cstr } *cstring; typedef struct __cstr_buffer { cstring str; } cstr_buffer[1]; ``` interning pool and hash table ```cpp struct __cstr_node { char buffer[CSTR_INTERNING_SIZE]; struct __cstr_data str; struct __cstr_node *next; }; struct __cstr_pool { struct __cstr_node node[INTERNING_POOL_SIZE]; }; struct __cstr_interning { int lock; int index; unsigned size; unsigned total; struct __cstr_node **hash; struct __cstr_pool *pool; }; ``` 由測試程式的運作來解說 funciton 的功能 `測試程式` ```cpp static cstring cmp(cstring t) { CSTR_LITERAL(hello, "Hello string"); CSTR_BUFFER(ret); cstr_cat(ret, cstr_equal(hello, t) ? "equal" : "not equal"); return cstr_grab(CSTR_S(ret)); } static void test_cstr() { CSTR_BUFFER(a); cstr_cat(a, "Hello "); cstr_cat(a, "string"); cstring b = cmp(CSTR_S(a)); printf("%s\n", b->cstr); CSTR_CLOSE(a); cstr_release(b); } int main(int argc, char *argv[]) { test_cstr(); return 0; } ``` `CSTR_BUFFER(a)` 首先建立一個 cstr_buffer a `cstr_cat(a, "Hello ")` 將字串 "Hello" 接在 buffer 中的 cstring 的 cstr 指標後 如果 buffer 中 cstring 的 type 是 ONSTACK, str 會一個字一個字接上, 如果 buffer 中 cstring 的 type 不是 ONSTACK 或要字串長度超過 CSTR_STACK_SIZE, 則以 tmp 保存目前 buffer 中 cstring 並以 cstr_cat2 連接 ```cpp cstring cstr_cat(cstr_buffer sb, const char *str) { cstring s = sb->str; if (s->type == CSTR_ONSTACK) { int i = s->hash_size; while (i < CSTR_STACK_SIZE - 1) { // the dataType of s->cstr[i] is integer, *str store as ASCII code s->cstr[i] = *str; if (*str == 0)// '/0' == 0 return s; ++s->hash_size; ++str; ++i; } s->cstr[i] = 0; } cstring tmp = s; // concat str size > CSTR_STACK_SIZE // only cat 128 length sb->str = cstr_cat2(tmp->cstr, str); cstr_release(tmp); return sb->str; } ``` `cstr_cat2` 用來連接非 ONSTACK 和長度超過 CSTR_STACK_SIZE 的字串 用 `cstr_cat2` 連接完成的字串長度如果 < CSTR_INTERNING_SIZE 會做 `cstr_interning`, 如果沒有會回傳一個連接完成的 cstring 這個次是程式並沒有進到 `cstr_cat2` ```cpp= static cstring cstr_cat2(const char *a, const char *b) { size_t sa = strlen(a), sb = strlen(b); if (sa + sb < CSTR_INTERNING_SIZE) { char tmp[CSTR_INTERNING_SIZE]; memcpy(tmp, a, sa); memcpy(tmp + sa, b, sb); tmp[sa + sb] = 0; return cstr_interning(tmp, sa + sb, hash_blob(tmp, sa + sb)); } cstring p = xalloc(sizeof(struct __cstr_data) + sa + sb + 1); if (!p) return NULL; char *ptr = (char *) (p + 1); p->cstr = ptr; p->type = 0; p->ref = 1; memcpy(ptr, a, sa); memcpy(ptr + sa, b, sb); ptr[sa + sb] = 0; p->hash_size = 0; return p; } ``` `cstr_cat2` 中的 `hash_blob` 是用來計算 hash table 的 key value 之後會被進一步被 hash function 轉換程 index `hash_blob` ```cpp static inline uint32_t hash_blob(const char *buffer, size_t len) { const uint8_t *ptr = (const uint8_t *) buffer; size_t h = len; size_t step = (len >> 5) + 1; for (size_t i = len; i >= step; i -= step) h = h ^ ((h << 5) + (h >> 2) + ptr[i - 1]); return h == 0 ? 1 : h; } ``` 完成字串連接後會做 CSTR_LITERAL(hello, "Hello string"), 其中 CSTR_LITERAL() 會執行 `cstr_clone` 如果要 clone 的字串長度小於 CSTR_INTERNING_SIZE, 那就會先去 interning pool 中找, 如果有一樣的會回傳已存在 pool 中的字串, 如果沒有則會複製一份新的新字串並存於 interning pool 和 hash_table 中, 回傳新字串, 如果 > CSTR_INTERNING_SIZE 就直接複製一份新的 cstring ```cpp cstring cstr_clone(const char *cstr, size_t sz) { if (sz < CSTR_INTERNING_SIZE) return cstr_interning(cstr, sz, hash_blob(cstr, sz)); cstring p = xalloc(sizeof(struct __cstr_data) + sz + 1); if (!p) return NULL; void *ptr = (void *) (p + 1); p->cstr = ptr; p->type = 0; p->ref = 1; memcpy(ptr, cstr, sz); ((char *) ptr)[sz] = 0; p->hash_size = 0; return p; } ``` `CSTR_LOCK` 鎖上一個 spinlock 等待 interning 完成再 `CSTR_UNLOCK()` release lock 它會先做一次 interning 如果回傳的 cstring 是空的代表 hash table 不存在 或是 hash table 超過 threshold, 此時需要 `expand` `expand` 就是初始化 hash table size 成 HASH_START_SIZE 或是 新簡一個 double size 的 hash table, 並把之前 hash table 中的 node insert 進去 ```cpp static cstring cstr_interning(const char *cstr, size_t sz, uint32_t hash) { cstring ret; CSTR_LOCK(); ret = interning(&__cstr_ctx, cstr, sz, hash); if (!ret) { expand(&__cstr_ctx); ret = interning(&__cstr_ctx, cstr, sz, hash); } ++__cstr_ctx.total; CSTR_UNLOCK(); return ret; } ``` 如果 hash table 存在, 先去 interning pool 中找, 如果有一樣的會回傳已存在 pool 中的字串, 如果沒有則會複製一份新的新字串並存於 interning pool 和 hash table 中, 回傳新字串 ```cpp static cstring interning(struct __cstr_interning *si, const char *cstr, size_t sz, uint32_t hash) { if (!si->hash) return NULL; // hash function int index = (int) (hash & (si->size - 1)); struct __cstr_node *n = si->hash[index]; while (n) { if (n->str.hash_size == hash) { if (!strcmp(n->str.cstr, cstr)) return &n->str; } n = n->next; } // 80% (4/5) threshold if (si->total * 5 >= si->size * 4) return NULL; if (!si->pool) { si->pool = xalloc(sizeof(struct __cstr_pool)); si->index = 0; } n = &si->pool->node[si->index++]; memcpy(n->buffer, cstr, sz); n->buffer[sz] = 0; cstring cs = &n->str; cs->cstr = n->buffer; cs->hash_size = hash; cs->type = CSTR_INTERNING; cs->ref = 0; n->next = si->hash[index]; si->hash[index] = n; return cs; } ```