# 2021q1 Homework2 (quiz2) contributed by < `jonathanyang0227` > # Week 2 - [ ] Linux: 作業系統術語及概念 - [ ] 系統軟體開發思維 - [x] C 語言: 數值系統 - [x] C 語言: Bitwise 操作 - [ ] 題目 / 參考題解 - [ ] 為什麼要深入學習 C 語言? - [ ] 基於 C 語言標準研究與系統程式安全議題 - [?] C 語言:記憶體管理、對齊及硬體特性 - [x] C 語言: bit-field # 測驗一 :::success 1. 解釋上述程式碼運作原理,指出改進空間並著手實作 2. 研讀 Linux 核心的 lib/list_sort.c 原始程式碼,學習 sysprog21/linux-list 的手法,將 lib/list_sort.c 的實作抽離為可單獨執行 (standalone) 的使用層級應用程式,並設計效能評比的程式碼,說明 Linux 核心的 lib/list_sort.c 最佳化手法 3. 將你在 quiz1 提出的改進實作,和 Linux 核心的 lib/list_sort.c 進行效能比較,需要提出涵蓋足夠廣泛的 benchmark ::: ## 程式解釋 ### list_head ```c struct list_head { struct list_head *prev, *next; }; ``` 定義 doubly linked list 資料結構 ```graphviz digraph struct{ node[shape=record]; list[label=" <prev> prev |<head>head| next"] } ``` ### INIT_LIST_HEAD ```c static inline void INIT_LIST_HEAD(struct list_head *head) { head->next = head; head->prev = head; } ``` 設定 head 的 pointer 皆指向自己 ```graphviz digraph struct{ node[shape=record]; list[label=" <prev> prev |<head>head| <next>next"] list:prev->list:prev[ arrowsize=1]; list :next -> list[arrowhead=vee, tailclip=true, arrowsize=1]; } ``` ### list_add_tail ```c static inline void list_add_tail(struct list_head *node, struct list_head *head) { struct list_head *prev = head->prev; prev->next = node; node->next = head; node->prev = prev; head->prev = node; } ``` 新增一個 node 在 pre 及 head 中間 :::warning 看到函式名稱被誤導 一直以為自己理解錯誤 其實不是加在最尾端!!! > TODO: 對照看 Linux 核心原始程式碼的 `include/linux/list.h`,分析實作方式和命名 > :notes: jserv ::: ```graphviz digraph struct{ node[shape=record]; rankdir=LR; { rankdir=LR; node1[label="prev"]; node2[label="node"]; node3[label="head"]; } node1:n->node2[tailclip=true ,dir=both] node2:n->node3[tailclip=true ,dir=both] node3:n->node1:n } ``` ### list_del ```c static inline void list_del(struct list_head *node) { struct list_head *next = node->next, *prev = node->prev; next->prev = prev; prev->next = next; } ``` :::warning 避免書寫 `prev & next`,因為 Linux 核心存在大量的 bitwise 操作,恐怕會誤導。可改為「prev 及 next」 :notes: jserv ::: >已修改 謝謝老師 新增 prev 及 next ,分別接在 node 的前後; 在讓 prev 及 next 接在一起,就可以刪除 node ```graphviz digraph struct{ node[shape=record]; rankdir=LR; { rankdir=LR; node1[label="prev"]; node2[label="node",color=red]; node3[label="next"]; } node1:n->node2[tailclip=true ,dir=both] node2:n->node3[tailclip=true ,dir=both] } ``` ```graphviz digraph struct{ node[shape=record]; rankdir=LR; { rankdir=LR; node1[label="prev"]; node2[label="node",color=red]; node3[label="next"]; } node1:n->node3[tailclip=true ,dir=both] } ``` ### list_empty ```c static inline int list_empty(const struct list_head *head) { return (head->next == head); } ``` 判斷程式是否為空 (即 head 後面是否有接其他 node ) ### list_is_singular ```c static inline int list_is_singular(const struct list_head *head) { return (!list_empty(head) && head->prev == head->next); } ``` 判斷 head 是否為 list 中唯一一個元素 ### list_splice_tail ```c static inline void list_splice_tail(struct list_head *list, struct list_head *head) { struct list_head *head_last = head->prev; struct list_head *list_first = list->next, *list_last = list->prev; if (list_empty(list)) return; head->prev = list_last; list_last->next = head; list_first->prev = head_last; head_last->next = list_first; } ``` 此函數將 list 接在 head 的尾端,但不包含 list 本身 ```graphviz digraph struct{ node[shape=box] { node1[label=head] node2[label=head_last] } { rank=same node4[label=list_first] node3[label=list] node5[label=list_last] } { } } ``` ### list_cut_position ```c static inline void list_cut_position(struct list_head *head_to, struct list_head *head_from, struct list_head *node) { struct list_head *head_from_first = head_from->next; if (list_empty(head_from)) return; if (head_from == node) { INIT_LIST_HEAD(head_to); return; } head_from->next = node->next; head_from->next->prev = head_from; head_to->prev = node; node->next = head_to; head_to->next = head_from_first; head_to->next->prev = head_to; } ``` 函式中,從 head_from 中,切割 node以前的 list ,並接到 head_to ```c head_from->next = node->next; head_from->next->prev = head_from; ``` 將 head_from 連結到 node 的後方,進而切割出要連接的 list ```c head_to->prev = node; node->next = head_to; head_to->next = head_from_first; head_to->next->prev = head_to; ``` ### list_ele_t ```c typedef struct __element { char *value; struct __element *next; struct list_head list; } list_ele_t; ``` ### queue_t ```c typedef struct { list_ele_t *head; /* Linked list of elements */ list_ele_t *tail; size_t size; struct list_head list; } queue_t; ``` ### get_middle ```c static list_ele_t *get_middle(struct list_head *list) { struct list_head *fast = list->next, *slow; list_for_each (slow, list) { if (COND1 || COND2) break; fast = fast->next->next; } return list_entry(TTT, list_ele_t, list); } ``` 在函式中 `list for each` 走訪 list 中所有元素,可得知 slow 每走一步,fast 會走兩步,於是 fast 走到最後的時候,slow 會剛好在中間,故 `COND1` `COND2` 應為終止條件,我們從 `main()` 中 ```c newh->value = new_value; newh->next = q->head; q->head = newh; ``` 可以知道 list 為環狀 linked list,而中止條件應為回到 head,即為 `fast->next == list` or `fast->next->next == list` 而 `TTT` 應該為 slow ### list_merge ```c tatic void list_merge(struct list_head *lhs, struct list_head *rhs, struct list_head *head) { INIT_LIST_HEAD(head); if (list_empty(lhs)) { list_splice_tail(lhs, head); return; } if (list_empty(rhs)) { list_splice_tail(rhs, head); return; } while (!list_empty(lhs) && !list_empty(rhs)) { char *lv = list_entry(lhs->next, list_ele_t, list)->value; char *rv = list_entry(rhs->next, list_ele_t, list)->value; struct list_head *tmp = strcmp(lv, rv) <= 0 ? lhs->next : rhs->next; list_del(tmp); list_add_tail(tmp, head); } list_splice_tail(list_empty(lhs) ? rhs : lhs, head); } ``` 這個函式是用來比大小並進行合併 ``` c struct list_head *tmp = strcmp(lv, rv) <= 0 ? lhs->next : rhs->next; list_del(tmp); list_add_tail(tmp, head); ``` 先利用 `strcmp()` 找到比較小的值,再用 `list_del()` 分離出該節點,最後透過 `list_add_tail()` 接到 head 上 :::warning :question: 不了解 `list_entry()` 的用意,為何不能直接用 lhs->value > 不理解就說不懂,不要說「有點」,裝可愛對討論沒幫助。 > 思考物件封裝的議題,記住 Linux 核心 API 要儘量做到通用 (generic)。 > :notes: jserv ::: ### list_merge_sort ```c void list_merge_sort(queue_t *q) { if (list_is_singular(&q->list)) return; queue_t left; struct list_head sorted; INIT_LIST_HEAD(&left.list); list_cut_position(&left.list, &q->list, MMM); list_merge_sort(&left); list_merge_sort(q); list_merge(&left.list, &q->list, &sorted); INIT_LIST_HEAD(&q->list); list_splice_tail(&sorted, &q->list); } ``` 此函式是用遞迴的方法進行排序即合併,用方法為,找出中間點進行切割,再排列,最後合併 ```c list_cut_position(&left.list, &q->list, MMM); list_merge_sort(&left); list_merge_sort(q); ``` 從這個遞迴中可得知 `list_cut_position()` 應該是要將 list 分成一半,故 MMM 應該是中間的元素,於是我們可以知道應該是要用 `getmiddle()` 這個函式找到 q (未排序 list )的中間元素 故 MMM 應該是 `&get_middle(&q->list)->list` :::warning 感覺 queue_t 的資料結構有 list 會造成很多不必要的麻煩 > 理工人要避免說「感覺」,拿證據說話! > :notes: jserv ::: --- # 測驗二 :::success 1. 解釋上述程式碼運作原理 2. 在The Linux Kernel API 頁面搜尋 “power of 2”,可見 is_power_of_2,查閱 Linux 核心原始程式碼,舉例說明其作用和考量,特別是 round up/down power of 2 * 特別留意 __roundup_pow_of_two 和 __rounddown_pow_of_two 的實作機制 3. 研讀 slab allocator,探索 Linux 核心的 slab 實作,找出運用 power-of-2 的程式碼和解讀 ::: 因為沒有 bitwise 相關基礎,在寫作業前有加強一些相關的基礎 [數值系統](https://hackmd.io/@ENBEKyh8TRSGvqMy1_LUYQ/B1PHONLXO) ## 程式解釋 ```c uint16_t func(uint16_t N) { /* change all right side bits to 1 */ N |= N >> 1; N |= N >> X; N |= N >> Y; N |= N >> Z; return (N + 1) >> 1; } ``` #### 考慮函式 func 接受一個 16 位元無號整數 N,並回傳小於或等於N的 power-of-2 #### 假定 N = 10101~2~ = 21~10~,那麼 func(21) 要得到 16,請補完程式碼,使其運作符合預期。 我們可以從函式會回傳 power of 2 可得知,最後 return 的時候最高位 bit 是 1 其餘都是 0 ```c N |= N >> 1; ``` N `=|` N>>1 是將 N 與 N>>1 做 OR,在 assign 回 N * N ```graphviz digraph struct{ node[shape=record] node1[label="1|0|1|0|1"] } ``` * N >> 1 ```graphviz digraph struct{ node[shape=record] node1[label="0|1|0|1|0"] } ``` * N | N >> 1 = 11111 ```graphviz digraph struct{ node[shape=record] node1[label="1|1|1|1|1"] } ``` * ( N + 1 ) >> 1 ```graphviz digraph struct{ node[shape=record] node1[label="1|0|0|0|0|0"] } ``` 而以上其實已經達到題目要求,所以再往下是要確保其他條件下經過運算後一樣能得到答案 因此我設定 N = 10000000~2~ * N ```graphviz digraph struct{ node[shape=record] node1[label="1|0|0|0|0|0|0|0"] } ``` * N >> 1 ```graphviz digraph struct{ node[shape=record] node1[label="0|1|0|0|0|0|0|0"] } ``` * N | N >> 1 ```graphviz digraph struct{ node[shape=record] node1[label="1|1|0|0|0|0|0|0"] } ``` 到這裡可以看出,要讓後面都變成 1 , X = 2 * N ```graphviz digraph struct{ node[shape=record] node1[label="1|1|0|0|0|0|0|0"] } ``` * N >> 2 ```graphviz digraph struct{ node[shape=record] node1[label="0|0|1|1|0|0|0|0"] } ``` * N | N >> 2 ```graphviz digraph struct{ node[shape=record] node1[label="1|1|1|1|0|0|0|0"] } ``` 從這裡就可以知道每次做完,因為 OR 運算的特性,都會得到原本 "1" 數量2倍的值,故 Y 及 Z 分別為 4 及 8 # 測驗三 :::success 1. 解釋上述程式碼運作原理,並嘗試重寫為同樣功能但效率更高的實作 2. 在 Linux 核心原始程式碼找出逐 bit 進行資料複製的程式碼,並解說相對應的情境 ::: ## 程式解釋 ```c void bitcpy(void *_dest, /* Address of the buffer to write to */ size_t _write, /* Bit offset to start writing to */ const void *_src, /* Address of the buffer to read from */ size_t _read, /* Bit offset to start reading from */ size_t count) { size_t read_lhs = _read & 7; size_t read_rhs = 8 - read_lhs; const uint8_t *source = (const uint8_t *)_src + (_read / 8); size_t write_lhs = _write & 7; size_t write_rhs = 8 - write_lhs; uint8_t *dest = (uint8_t *)_dest + (_write / 8); ``` 首先先看到 ```c size_t read_lhs = _read & 7; size_t read_rhs = 8 - read_lhs; const uint8_t *source = (const uint8_t *)_src + (_read / 8); ``` * `read` 是要讀取的長度 * `_read & 7` 相當於 `read % 8` 是要算出最後一段不滿一個位元組( 8 bit )的長度並存到 `read_lhs` 中 * `write_lhs` `write_rhs` `dest` 也是同理 * `_src` 是從哪裡開始讀 `dest` 則是從哪裡開始寫 * `source` 是 `_src` + `read` /8 ,表示現在是每一組位元組開始的位置, +1 表下一個位元組 :::info TODO: `_read & 7` 相當於 `read % 8` 實作 ::: ```c while (count > 0) { uint8_t data = *source++; size_t bitsize = (count > 8) ? 8 : count; if (read_lhs > 0) { RRR; if (bitsize > read_rhs) data |= (*source >> read_rhs); } if (bitsize < 8) data &= read_mask[bitsize]; uint8_t original = *dest; uint8_t mask = read_mask[write_lhs]; if (bitsize > write_rhs) { /* Cross multiple bytes */ *dest++ = (original & mask) | (data >> write_lhs); original = *dest & write_mask[bitsize - write_rhs]; *dest = original | (data << write_rhs); } else { // Since write_lhs + bitsize is never >= 8, no out-of-bound access. DDD; *dest++ = (original & mask) | (data >> write_lhs); } count -= bitsize; } ``` 這部份研究很久 QQ ```c uint8_t data = *source++; size_t bitsize = (count > 8) ? 8 : count; ``` `data` 為指向 **下次** 進行複製的位元組的開始位置 +1 代表下一組開始的位置 ```c if (read_lhs > 0) { RRR; if (bitsize > read_rhs) data |= (*source >> read_rhs); } ``` 這邊是說看看有沒有每次都從改組的第一個元素開始,若沒有要左移 再來看 `bitsize > read_rhs` 若有跨位元組需要移動,將瘦下的對齊尾端 最後透過 OR 複製其中內容 於是可以得知 `RRR` 是複製起始點開始到該位元組結束 **`RRR` = `data <<= read_lhs`** ```c if (bitsize < 8) data &= read_mask[bitsize]; ``` 如果根本不滿 8 個元素 把多餘的 bit 設為 0 避免多寫入 ```c uint8_t original = *dest; uint8_t mask = read_mask[write_lhs]; ``` 將要被覆寫的地方設為 0 ```c if (bitsize > write_rhs) { /* Cross multiple bytes */ *dest++ = (original & mask) | (data >> write_lhs); original = *dest & write_mask[bitsize - write_rhs]; *dest = original | (data << write_rhs); } ``` * `original & mask` 初始化 `original` 使要被覆蓋的地方為0 * `(original & mask) | (data >> write_lhs);` 將 data 向後移 `write_lhs` 使 data 及 original 可以對在相同位置上 * `original = *dest & write_mask[bitsize - write_rhs];` 再初始化下一個位元組要被覆寫的地方 * `dest = original | (data << write_rhs);` 將 data 向左移使 data 及 original 應對到相同位置 ```c else { // Since write_lhs + bitsize is never >= 8, no out-of-bound access. DDD; *dest++ = (original & mask) | (data >> write_lhs); } ``` 最後就剩下不滿一位元組的要複製 因為 `Since write_lhs + bitsize is never >= 8, no out-of-bound access.` **`DDD` = `mask |= write_mask[write_lhs + bitsize]`** # 測驗四 :::success 1. 解釋上述程式碼運作原理 2. 上述程式碼使用到 gcc atomics builtins,請透過 POSIX Thread 在 GNU/Linux 驗證多執行緒的環境中,string interning 能否符合預期地運作?若不能,請提出解決方案 3. chriso/intern 是個更好的 string interning 實作,請探討其特性和設計技巧,並透過內建的 benchmark 分析其效能表現 ::: ## 程式解釋 * **cstr.h** ```c #pragma once #include <stddef.h> #include <stdint.h> #include <stdlib.h> enum { CSTR_PERMANENT = 1, CSTR_INTERNING = 2, CSTR_ONSTACK = 4, }; #define CSTR_INTERNING_SIZE (32) #define CSTR_STACK_SIZE (128) typedef struct __cstr_data { char *cstr; uint32_t hash_size; uint16_t type; uint16_t ref; } * cstring; typedef struct __cstr_buffer { cstring str; } cstr_buffer[1]; #define CSTR_S(s) ((s)->str) #define CSTR_BUFFER(var) \ char var##_cstring[CSTR_STACK_SIZE] = {0}; \ struct __cstr_data var##_cstr_data = {var##_cstring, 0, CSTR_ONSTACK, 0}; \ cstr_buffer var; \ var->str = &var##_cstr_data; #define CSTR_LITERAL(var, cstr) \ static cstring var = NULL; \ if (!var) { \ cstring tmp = cstr_clone("" cstr, (sizeof(cstr) / sizeof(char)) - 1); \ if (tmp->type == 0) { \ tmp->type = CSTR_PERMANENT; \ tmp->ref = 0; \ } \ if (!__sync_bool_compare_and_swap(&var, NULL, tmp)) { \ if (tmp->type == CSTR_PERMANENT) \ free(tmp); \ } \ } #define CSTR_CLOSE(var) \ do { \ if (!(var)->str->type) \ cstr_release((var)->str); \ } while (0) /* Public API */ cstring cstr_grab(cstring s); cstring cstr_clone(const char *cstr, size_t sz); cstring cstr_cat(cstr_buffer sb, const char *str); int cstr_equal(cstring a, cstring b); void cstr_release(cstring s); ``` 首先我先注意到了在 include 前有 [`#pragma once`](https://en.wikipedia.org/wiki/Pragma_once) 根據維基百科,其用途為 cause the current source file to be included only once in a single compilation. 可以避免被同一個檔案被多次 include [enumeration](https://www.geeksforgeeks.org/enumeration-enum-c/) 可以幫數字取名字,方便閱讀 ```c typedef struct __cstr_data { char *cstr; uint32_t hash_size; uint16_t type; uint16_t ref; } * cstring; typedef struct __cstr_buffer { cstring str; } cstr_buffer[1]; ``` * **cstr.c** ```c #include <errno.h> #include <stdarg.h> #include <stdio.h> #include <string.h> #include "cstr.h" #define INTERNING_POOL_SIZE 1024 #define HASH_START_SIZE 16 /* must be power of 2 */ struct __cstr_node { char buffer[CSTR_INTERNING_SIZE]; struct __cstr_data str; struct __cstr_node *next; }; struct __cstr_pool { struct __cstr_node node[INTERNING_POOL_SIZE]; }; struct __cstr_interning { int lock; int index; unsigned size; unsigned total; struct __cstr_node **hash; struct __cstr_pool *pool; }; static struct __cstr_interning __cstr_ctx; /* FIXME: use C11 atomics */ #define CSTR_LOCK() \ ({ \ while (__sync_lock_test_and_set(&(__cstr_ctx.lock), 1)) { \ } \ }) #define CSTR_UNLOCK() ({ __sync_lock_release(&(__cstr_ctx.lock)); }) static void *xalloc(size_t n) { void *m = malloc(n); if (!m) exit(-1); return m; } static inline void insert_node(struct __cstr_node **hash, int sz, struct __cstr_node *node) { uint32_t h = node->str.hash_size; int index = h & (sz - 1); node->next = hash[index]; hash[index] = node; } static void expand(struct __cstr_interning *si) { unsigned new_size = si->size * 2; if (new_size < HASH_START_SIZE) new_size = HASH_START_SIZE; struct __cstr_node **new_hash = xalloc(sizeof(struct __cstr_node *) * new_size); memset(new_hash, 0, sizeof(struct __cstr_node *) * new_size); for (unsigned i = 0; i < si->size; ++i) { struct __cstr_node *node = si->hash[i]; while (node) { struct __cstr_node *tmp = node->next; insert_node(new_hash, new_size, node); node = tmp; } } free(si->hash); si->hash = new_hash; si->size = new_size; } static cstring interning(struct __cstr_interning *si, const char *cstr, size_t sz, uint32_t hash) { if (!si->hash) return NULL; int index = (int) (hash & (si->size - 1)); struct __cstr_node *n = si->hash[index]; while (n) { if (n->str.hash_size == hash) { if (!strcmp(n->str.cstr, cstr)) return &n->str; } n = n->next; } // 80% (4/5) threshold if (si->total * 5 >= si->size * 4) return NULL; if (!si->pool) { si->pool = xalloc(sizeof(struct __cstr_pool)); si->index = 0; } n = &si->pool->node[si->index++]; memcpy(n->buffer, cstr, sz); n->buffer[sz] = 0; cstring cs = &n->str; cs->cstr = n->buffer; cs->hash_size = hash; cs->type = CSTR_INTERNING; cs->ref = 0; n->next = si->hash[index]; si->hash[index] = n; return cs; } static cstring cstr_interning(const char *cstr, size_t sz, uint32_t hash) { cstring ret; CSTR_LOCK(); ret = interning(&__cstr_ctx, cstr, sz, hash); if (!ret) { expand(&__cstr_ctx); ret = interning(&__cstr_ctx, cstr, sz, hash); } ++__cstr_ctx.total; CSTR_UNLOCK(); return ret; } static inline uint32_t hash_blob(const char *buffer, size_t len) { const uint8_t *ptr = (const uint8_t *) buffer; size_t h = len; size_t step = (len >> 5) + 1; for (size_t i = len; i >= step; i -= step) h = h ^ ((h << 5) + (h >> 2) + ptr[i - 1]); return h == 0 ? 1 : h; } cstring cstr_clone(const char *cstr, size_t sz) { if (sz < CSTR_INTERNING_SIZE) return cstr_interning(cstr, sz, hash_blob(cstr, sz)); cstring p = xalloc(sizeof(struct __cstr_data) + sz + 1); if (!p) return NULL; void *ptr = (void *) (p + 1); p->cstr = ptr; p->type = 0; p->ref = 1; memcpy(ptr, cstr, sz); ((char *) ptr)[sz] = 0; p->hash_size = 0; return p; } cstring cstr_grab(cstring s) { if (s->type & (CSTR_PERMANENT | CSTR_INTERNING)) return s; if (s->type == CSTR_ONSTACK) return cstr_clone(s->cstr, s->hash_size); if (s->ref == 0) s->type = CSTR_PERMANENT; else __sync_add_and_fetch(&s->ref, 1); return s; } void cstr_release(cstring s) { if (s->type || !s->ref) return; if (__sync_sub_and_fetch(&s->ref, 1) == 0) free(s); } static size_t cstr_hash(cstring s) { if (s->type == CSTR_ONSTACK) return hash_blob(s->cstr, s->hash_size); if (s->hash_size == 0) s->hash_size = hash_blob(s->cstr, strlen(s->cstr)); return s->hash_size; } int cstr_equal(cstring a, cstring b) { if (a == b) return 1; if ((a->type == CSTR_INTERNING) && (b->type == CSTR_INTERNING)) return 0; if ((a->type == CSTR_ONSTACK) && (b->type == CSTR_ONSTACK)) { if (a->hash_size != b->hash_size) return 0; return memcmp(a->cstr, b->cstr, a->hash_size) == 0; } uint32_t hasha = cstr_hash(a); uint32_t hashb = cstr_hash(b); if (hasha != hashb) return 0; return !strcmp(a->cstr, b->cstr); } static cstring cstr_cat2(const char *a, const char *b) { size_t sa = strlen(a), sb = strlen(b); if (sa + sb < CSTR_INTERNING_SIZE) { char tmp[CSTR_INTERNING_SIZE]; memcpy(tmp, a, sa); memcpy(tmp + sa, b, sb); tmp[sa + sb] = 0; return cstr_interning(tmp, sa + sb, hash_blob(tmp, sa + sb)); } cstring p = xalloc(sizeof(struct __cstr_data) + sa + sb + 1); if (!p) return NULL; char *ptr = (char *) (p + 1); p->cstr = ptr; p->type = 0; p->ref = 1; memcpy(ptr, a, sa); memcpy(ptr + sa, b, sb); ptr[sa + sb] = 0; p->hash_size = 0; return p; } cstring cstr_cat(cstr_buffer sb, const char *str) { cstring s = sb->str; if (s->type == CSTR_ONSTACK) { int i = CCC; while (i < CSTR_STACK_SIZE - 1) { s->cstr[i] = *str; if (*str == 0) return s; ++s->hash_size; ++str; ++i; } s->cstr[i] = 0; } cstring tmp = s; sb->str = cstr_cat2(tmp->cstr, str); cstr_release(tmp); return sb->str; } ```