Try   HackMD

2021q1 Homework2 (quiz2)

contributed by < qwe661234 >

tags: linux2021

question 1

queue element

typedef struct __element {
    char *value;
    struct __element *next;
    struct list_head list;
} list_ele_t;

queue

typedef struct {
    list_ele_t *head; /* Linked list of elements */
    list_ele_t *tail;
    size_t size;
    struct list_head list;
} queue_t;

list head

struct list_head {
    struct list_head *prev, *next;
};

container_of

#ifndef container_of
#define container_of(ptr, type, member)                            \
    __extension__({                                                \
        const __typeof__(((type *) 0)->member) *__pmember = (ptr); \
        (type *) ((char *) __pmember - offsetof(type, member));    \
    })
#endif
__extension__

GCC uses the extension attribute when using the -ansi flag to avoid warnings in headers with GCC extensions.

__typeof__(((type *) 0)->member)

ANSI C 標准允許變量0的常量被強制轉換成任何一種類型的指針,並且轉換的結果是個 NULL, 所以 (type *) 0 就是資料型別為 type *的 NULL 指針, 雖然取用 NULL 指針去指向 member 是非法的, 但前面加上 typeof 後, 編譯器不會去訪問 NULL->member, 而是單純去取 member 的資料型別

offsetof(type, member)

它的作用是獲取 struct 中某個成員相對於該 struct 首元素地址的偏移量

reference:

list_entry

#define list_first_entry(head, type, member) \
    list_entry((head)->next, type, member)

list_add_tail function

static inline void list_add_tail(struct list_head *node, struct list_head *head)
{
    struct list_head *prev = head->prev;

    prev->next = node;
    node->next = head;
    node->prev = prev;
    head->prev = node;
}

Black: original
Red: after function







structs



node0

head



node2

first_ele



node0->node2






node0->node2






node1

last_ele



node1->node0






node3

newnode



node1->node3






node3->node0






prev
prev



prev->node1





list_del function

static inline void list_del(struct list_head *node)
{
    struct list_head *next = node->next, *prev = node->prev;
    next->prev = prev; prev->next = next;
}

Black: original
Red: after function







structs



node0

node



node2

element



node0->node2






node1

element



node1->node0






node1->node2






list_empty function
檢查 list head 是否有連接 node

    static inline int list_empty(const struct list_head *head)
{
    // point to self 
    return (head->next == head);
}

list_is_singular function
檢查 list head 後面是否只連接一個 node

static inline int list_is_singular(const struct list_head *head)
{
    // 1 or 2
    return (!list_empty(head) && head->prev == head->next);
}

list_splice_tail function
將所有以 list 為 list head 的 list node 接在以 head 為 list head 的最後一個 list node 之後

static inline void list_splice_tail(struct list_head *list,
                                    struct list_head *head)
{
    // head_ last means tail 
    struct list_head *head_last = head->prev;
    struct list_head *list_first = list->next, *list_last = list->prev;

    if (list_empty(list))
        return;

    head->prev = list_last;
    list_last->next = head;

    list_first->prev = head_last;
    head_last->next = list_first;
}

Black: original
Red: after function







structs



node0

list



node1

list node 1
 (list first)



node0->node1






node2

list node 2
 (list last)



node1->node2






node2->node0






hnode0

head



node2->hnode0






hnode1

head node 1



hnode0->hnode1






hnode2

head node 2
 (head last)



hnode1->hnode2






hnode2->node1






hnode2->hnode0






Result







structs



node1

list node 1
 (list first)



node2

list node 2
 (list last)



node1->node2






hnode0

head



node2->hnode0






hnode1

head node 1



hnode0->hnode1






hnode2

head node 2
 (head last)



hnode1->hnode2






hnode2->node1






list_cut_position
將由 head_from 為list head 的第一個 list_node 至 node (parameter 中的 node) 接在 list head head_to 之後

static inline void list_cut_position(struct list_head *head_to,
                                     struct list_head *head_from,
                                     struct list_head *node)
{
    struct list_head *head_from_first = head_from->next;

    if (list_empty(head_from))
        return;

    if (head_from == node) {
        INIT_LIST_HEAD(head_to);
        return;
    }
    // begin to cut from headfirst to node, and list it to head_to 
    head_from->next = node->next;
    head_from->next->prev = head_from;

    head_to->prev = node;
    node->next = head_to;
    head_to->next = head_from_first;
    head_to->next->prev = head_to;
}

Black: original
Red: after function







structs



node0

head_from



node1

node 1
 (head from first)



node0->node1






node3

node 3



node0->node3






node2

node 2
 (node)



node1->node2






node2->node3






node4

node 4



node3->node4






node4->node0






hnode0

head_to



hnode0->node1






hnode0->node2






Result







structs



node0

head_from



node3

node 3



node0->node3






node1

node 1
 (head from first)



node2

node 2
 (node)



node2->node1






node4

node 4



node3->node4






node4->node0






hnode0

head_to



hnode0->node1






hnode0->node2






get_middle function
以 slow 一次走訪一格 list_node, fast 一次走訪兩格 list_node 的方式, 當fast 走訪一整個 list 將要回到 list head 時, slow 剛好會走到 list 的中間點

static list_ele_t *get_middle(struct list_head *list)
{
    struct list_head *fast = list->next, *slow;
    list_for_each (slow, list) {
        /*用來判斷 fast 是否走訪整個 list 即將回到 list head*/
        if (fast->next == list || fast->next->next == list)
            break;
        fast = fast->next->next;
    }
    return list_entry(slow, list_ele_t, list);
}

重構list_merge functionget_middle function
list_merge functionget_middle function 合寫成一個 function, 因為 list_merge function 中的 head_from 就是 get_middle function 中的 list, 所以合寫後只須傳入兩個參數, 不需要再多傳入 middle node

    list_cut_position(&left.list, &q->list, &get_middle(&q->list)->list);

變更為:

    list_cut_middle(&left.list, &q->list);
static inline void list_cut_middle(struct list_head *head_to,
                                   struct list_head *head_from)
{
    struct list_head *fast = head_from->next, *slow;
    struct list_head *head_from_first = head_from->next;
    
    if (list_empty(head_from))
        return;
    // find middle
    list_for_each (slow, head_from) {
        if (fast->next == head_from || fast->next->next == head_from)
            break;
        fast = fast->next->next;
    }
    // begin to cut from headfirst to node, and list it to head_to 
    head_from->next = slow->next;
    head_from->next->prev = head_from;

    head_to->prev = slow;
    slow->next = head_to;
    head_to->next = head_from_first;
    head_to->next->prev = head_to;
}

list_merge function
將兩個 list 排序並合併, if 的部份是因為 get_middle 的機制, 如果切分時只有一個 list node, 那個 list node 會接在 lhs 上, 所以只要 lhs 沒有接任何 list node, 那 rhs 必然也沒有接任何 list node, 故可以直接結束 funciton

static void list_merge(struct list_head *lhs,
                       struct list_head *rhs,
                       struct list_head *head)
{
    INIT_LIST_HEAD(head);
    if (list_empty(lhs)) {
        list_splice_tail(lhs, head);
        return;
    }
    if (list_empty(rhs)) {
        list_splice_tail(rhs, head);
        return;
    }

    while (!list_empty(lhs) && !list_empty(rhs)) {
        char *lv = list_entry(lhs->next, list_ele_t, list)->value;
        char *rv = list_entry(rhs->next, list_ele_t, list)->value;
        struct list_head *tmp = strcmp(lv, rv) <= 0 ? lhs->next : rhs->next;
        list_del(tmp);
        list_add_tail(tmp, head);
    }
    list_splice_tail(list_empty(lhs) ? rhs : lhs, head);
}

重構 list_merge function
如果 rhs 為空就把 lhs 的 list node 接在 head 之後, lhs 為空則 rhs 必為空,所以行14即可實做, 且 list head 為空就會跳出迴圈, 無須一開始的兩個 if

static void list_merge(struct list_head *lhs,
                       struct list_head *rhs,
                       struct list_head *head)
{
    INIT_LIST_HEAD(head);

    while (!list_empty(lhs) && !list_empty(rhs)) {
        char *lv = list_entry(lhs->next, list_ele_t, list)->value;
        char *rv = list_entry(rhs->next, list_ele_t, list)->value;
        struct list_head *tmp = strcmp(lv, rv) <= 0 ? lhs->next : rhs->next;
        list_del(tmp);
        list_add_tail(tmp, head);
    }
    list_splice_tail(list_empty(lhs) ? rhs : lhs, head);
}

question 2

uint16_t func(uint16_t N) {
    /* change all right side bits to 1 */
    N |= N >> 1;
    N |= N >> 2;
    N |= N >> 4;
    N |= N >> 8;

    return (N + 1) >> 1;
}

Ex: N =

100000000000002 =
1638410

原理是確保16個 bit 都能變為1, 最後做+1是為了得到最接近且比 N 大的 power of two, 接著 >> 1 (也就是除以2), 就可以得到最接近且比 N 小的 power of two

 N |= N >> 1
0100000000000000
0010000000000000
----------------
0110000000000000

 N |= N >> 2;
0110000000000000
0001100000000000
----------------
0111100000000000

 N |= N >> 4;
0111100000000000
0000011110000000
----------------
0111111110000000

 N |= N >> 8;
0111111110000000
0000000000111111
----------------
0111111111111111

(N + 1) = 1000000000000000
(N + 1) >> 1 = 0100000000000000

is_power_of_2

static inline __attribute__((const))
bool is_power_of_2(unsigned long n)
{
	return (n != 0 && ((n & (n - 1)) == 0));
}

原理:
Ex n =

10102,
10102
&
10012
=
10002
!= 0, 若 n =
10002
,
10002
&
01112
= 0

roundup_pow_of_two

static inline __attribute__((const))
unsigned long __roundup_pow_of_two(unsigned long n)
{
	return 1UL << fls_long(n - 1);
}

rounddown_pow_of_two

static inline __attribute__((const))
unsigned long __rounddown_pow_of_two(unsigned long n)
{
	return 1UL << (fls_long(n) - 1);
}

1UL 是代表值為1的 unsigned long
roundup_pow_of_two 和 rounddown_pow_of_two 都有使用到 fls_long
fls 為 find last set bit 的縮寫,就是找到傳入參數的最靠近 MSB 且是1的 bit 是第幾個 bit 並回傳. 將 1UL << 回傳值即可得到比參數 n 大且最靠近的 pow_of_two => __roundup_pow_of_two, 將回傳值 - 1 再將 1UL << 回傳值即可得到比參數 n 小且最靠近的 pow_of_two => __rounddown_pow_of_two, 在執行 function 前會先考慮系統是64 bit 架構還是32 bit
fls_long

static inline unsigned fls_long(unsigned long l)
{
	if (sizeof(l) == 4)
		return fls(l);
	return fls64(l);
}
#if BITS_PER_LONG == 32
static __always_inline int fls64(__u64 x)
{
	__u32 h = x >> 32;
	if (h)
		return fls(h) + 32;
	return fls(x);
}
#elif BITS_PER_LONG == 64
static __always_inline int fls64(__u64 x)
{
	if (x == 0)
		return 0;
	return __fls(x) + 1;
}

fls and __fls

static inline __attribute__ ((const)) int __fls(unsigned long x)
{
	if (!x)
		return 0;
	else
		return fls(x) - 1;
}
static inline __attribute__ ((const)) int fls(unsigned long x)
{
	int n;

	asm volatile(
	"	fls.f	%0, %1		\n"  /* 0:31; 0(Z) if src 0 */
	"	add.nz	%0, %0, 1	\n"  /* 0:31 -> 1:32 */
	: "=r"(n)	/* Early clobber not needed */
	: "r"(x)
	: "cc");

	return n;
}

question3

/*read from src and write to dest*/
void bitcpy(void *_dest,      /* Address of the buffer to write to */
            size_t _write,    /* Bit offset to start writing to */
            const void *_src, /* Address of the buffer to read from */
            size_t _read,     /* Bit offset to start reading from */
            size_t count)
{
    size_t read_lhs = _read & 7;
    size_t read_rhs = 8 - read_lhs;
    const uint8_t *source = (const uint8_t *) _src + (_read / 8);
    size_t write_lhs = _write & 7;
    size_t write_rhs = 8 - write_lhs;
    uint8_t *dest = (uint8_t *) _dest + (_write / 8);

    static const uint8_t read_mask[] = {
        0x00, /*    == 0    00000000b   */
        0x80, /*    == 1    10000000b   */
        0xC0, /*    == 2    11000000b   */
        0xE0, /*    == 3    11100000b   */
        0xF0, /*    == 4    11110000b   */
        0xF8, /*    == 5    11111000b   */
        0xFC, /*    == 6    11111100b   */
        0xFE, /*    == 7    11111110b   */
        0xFF  /*    == 8    11111111b   */
    };

    static const uint8_t write_mask[] = {
        0xFF, /*    == 0    11111111b   */
        0x7F, /*    == 1    01111111b   */
        0x3F, /*    == 2    00111111b   */
        0x1F, /*    == 3    00011111b   */
        0x0F, /*    == 4    00001111b   */
        0x07, /*    == 5    00000111b   */
        0x03, /*    == 6    00000011b   */
        0x01, /*    == 7    00000001b   */
        0x00  /*    == 8    00000000b   */
    };

    while (count > 0) {
        uint8_t data = *source++;
        // how many bytes you want to write this round
        size_t bitsize = (count > 8) ? 8 : count;
        if (read_lhs > 0) {
            data <<= read_lhs;
            if (bitsize > read_rhs)
                // put the context in next bytes to data               
                data |= (*source >> read_rhs);
        }
        // keep the number of bit in data you want to write
        if (bitsize < 8)
            data &= read_mask[bitsize];

        uint8_t original = *dest;
        uint8_t mask = read_mask[write_lhs];
        if (bitsize > write_rhs) {
            /* Cross multiple bytes */
            // write first bytes
            *dest++ = (original & mask) | (data >> write_lhs);
            // keep the context of next bytes
            original = *dest & write_mask[bitsize - write_rhs];
            // write next bytes
            *dest = original | (data << write_rhs);
        } else {
            // Since write_lhs + bitsize is never >= 8, no out-of-bound access.
            
            // keep context you do not write from tail
            mask |= write_mask[write_lhs + bitsize];
            // write bytes
            *dest++ = (original & mask) | (data >> write_lhs);
        }

        count -= bitsize;
    }
}
    size_t read_lhs = _read & 7;
    size_t read_rhs = 8 - read_lhs;

_read & 7 與 _read % 8 同義, 用來看8 bit 中 read 的 lhs 有幾個 bit, 剩下 rhs 就是 8 - lhs
Ex: _read = 4







structs



node0

lhs

lhs

lhs

lhs

lhs

rhs

rhs

rhs



    const uint8_t *source = (const uint8_t *) _src + (_read / 8);

_read / 8 是用來看要從 src 的第幾個 bytes 開始讀起
Ex: _read = 20 (20 / 8 = 2) 所以 source 會指向 src 的第2個 bytes

    static const uint8_t read_mask[] = {
        0x00, /*    == 0    00000000b   */
        0x80, /*    == 1    10000000b   */
        0xC0, /*    == 2    11000000b   */
        0xE0, /*    == 3    11100000b   */
        0xF0, /*    == 4    11110000b   */
        0xF8, /*    == 5    11111000b   */
        0xFC, /*    == 6    11111100b   */
        0xFE, /*    == 7    11111110b   */
        0xFF  /*    == 8    11111111b   */
    };

    static const uint8_t write_mask[] = {
        0xFF, /*    == 0    11111111b   */
        0x7F, /*    == 1    01111111b   */
        0x3F, /*    == 2    00111111b   */
        0x1F, /*    == 3    00011111b   */
        0x0F, /*    == 4    00001111b   */
        0x07, /*    == 5    00000111b   */
        0x03, /*    == 6    00000011b   */
        0x01, /*    == 7    00000001b   */
        0x00  /*    == 8    00000000b   */
    };

read_mask 用來避免讀入不該讀的部份
Ex: 要讀入 bit 0 - 5, mask = 11111100b, 用來避免讀入 bit6, 7
write_mask 用來保留不須寫入的部份的原始內容
Ex: 要寫入 bit 0 - 5, mask = 00000011b, 用來保留 bit6, 7 的原始內容

從 src 讀入

    size_t bitsize = (count > 8) ? 8 : count;

bitsize 是用來看這一輪要寫入幾個 bits, 因為一輪只能最多寫8個 bits, Ex: count = 13, 那第一輪就寫入8個 bits, 第二輪再寫入5個 bits

    uint8_t data = *source++;
    // how many bytes you want to write this round
    size_t bitsize = (count > 8) ? 8 : count;
    if (read_lhs > 0) {
        data <<= read_lhs;
        if (bitsize > read_rhs)
            // put the context in next bytes to data               
            data |= (*source >> read_rhs);
    }

read_lhs > 0 代表沒有對齊, Ex: lhs = 3, 代表要以 bit 2到下一個 bytes 的 bit 1組成8 bit 來處理







structs



node0

data2

data3

data4

data5

data6

data7

X

X



text
data <<= read_lhs



text1
data 目前讀的 bytes, *source 是下一個 bytes









structs



node0

X

X

X

X

X

X

source0

source1



text
*source >> read_rhs









structs



node0

data2

data3

data4

data5

data6

data7

source0

source1



text
data |= (*source >> read_rhs)



// keep the number of bit in data you want to write
    if (bitsize < 8)
        data &= read_mask[bitsize];

bitsize 如果<8代表要用 read_mask 來避免讀入不該讀的部份

寫入 dest

    uint8_t mask = read_mask[write_lhs];
    if (bitsize > write_rhs) {
        /* Cross multiple bytes */
        // write first bytes
        *dest++ = (original & mask) | (data >> write_lhs);
        // keep the context of next bytes
        original = *dest & write_mask[bitsize - write_rhs];
        // write next bytes
        *dest = original | (data << write_rhs);
    } 

write_lhs 是要保留不寫入的部份, write_rhs 是要寫入新 data 的部份, Ex: write_lhs = 2







structs



node0

original0

original1

X

X

X

X

X

X



text
(original & mask)









structs



node0

X

X

data2

data3

data4

data5

data6

data7



text
(data >> write_lhs)









structs



node0

original0

original1

data2

data3

data4

data5

data6

data7



text
*dest++ = (original & mask) | (data >> write_lhs)



    original = *dest & write_mask[bitsize - write_rhs];
    // write next bytes
    *dest = original | (data << write_rhs);

這邊是把還沒寫完的 bit 寫入下一個 bytes, 一樣先把不寫入的部份做保留, 然後寫入要寫入的部份

while (count > 0) {
        ...
        count -= bitsize;
    }

這邊是把寫完的 bit 扣掉, 如果 count 還>0 代表要繼續寫下一輪

重構

一次copy 64個bit

void bitcpy(void *_dest,      /* Address of the buffer to write to */
            size_t _write,    /* Bit offset to start writing to */
            const void *_src, /* Address of the buffer to read from */
            size_t _read,     /* Bit offset to start reading from */
            size_t count)
{
    uint64_t data;
    const uint64_t *source = _src;
    uint64_t *dest = _dest;
    data = *source;
    data = data << _read;
    // data >> _read + (63 - (_read + count) + 1)
    data = data >> (64 - count);
    data = data << (64 - (_read + count));
    if(_read > _write){
        data = data << (_read - _write);
    }else{
        data = data >> (_write - _read);
    }
    *dest |= data;
}

question4

cstr buffer

typedef struct __cstr_data {
    char *cstr;
    uint32_t hash_size;
    uint16_t type;
    uint16_t ref; // reference counter 有多少 cstring 引用這個cstr
} *cstring;

typedef struct __cstr_buffer {
    cstring str;
} cstr_buffer[1];

interning pool and hash table

struct __cstr_node {
    char buffer[CSTR_INTERNING_SIZE];
    struct __cstr_data str;
    struct __cstr_node *next;
};

struct __cstr_pool {
    struct __cstr_node node[INTERNING_POOL_SIZE];
};

struct __cstr_interning {
    int lock;
    int index;
    unsigned size;
    unsigned total;
    struct __cstr_node **hash;
    struct __cstr_pool *pool;
};

由測試程式的運作來解說 funciton 的功能
測試程式

static cstring cmp(cstring t)
{
    CSTR_LITERAL(hello, "Hello string");
    CSTR_BUFFER(ret);
    cstr_cat(ret, cstr_equal(hello, t) ? "equal" : "not equal");
    return cstr_grab(CSTR_S(ret));
}

static void test_cstr()
{
    CSTR_BUFFER(a);
    cstr_cat(a, "Hello ");
    cstr_cat(a, "string");
    cstring b = cmp(CSTR_S(a));
    printf("%s\n", b->cstr);
    CSTR_CLOSE(a);
    cstr_release(b);
}

int main(int argc, char *argv[])
{
    test_cstr();
    return 0;
}

CSTR_BUFFER(a) 首先建立一個 cstr_buffer a
cstr_cat(a, "Hello ") 將字串 "Hello" 接在 buffer 中的 cstring 的 cstr 指標後
如果 buffer 中 cstring 的 type 是 ONSTACK, str 會一個字一個字接上, 如果 buffer 中 cstring 的 type 不是 ONSTACK 或要字串長度超過 CSTR_STACK_SIZE, 則以 tmp 保存目前 buffer 中 cstring 並以 cstr_cat2 連接

cstring cstr_cat(cstr_buffer sb, const char *str)
{
    cstring s = sb->str;
    if (s->type == CSTR_ONSTACK) {
        int i = s->hash_size;
        while (i < CSTR_STACK_SIZE - 1) {
            // the dataType of s->cstr[i] is integer, *str store as ASCII code
            s->cstr[i] = *str;
            if (*str == 0)// '/0' == 0
                return s;
            ++s->hash_size;
            ++str;
            ++i;
        }
        s->cstr[i] = 0;
    }
    cstring tmp = s;
    // concat str size > CSTR_STACK_SIZE
    // only cat 128 length
    sb->str = cstr_cat2(tmp->cstr, str);
    cstr_release(tmp);
    return sb->str;
}

cstr_cat2 用來連接非 ONSTACK 和長度超過 CSTR_STACK_SIZE 的字串
cstr_cat2 連接完成的字串長度如果 < CSTR_INTERNING_SIZE 會做 cstr_interning, 如果沒有會回傳一個連接完成的 cstring
這個次是程式並沒有進到 cstr_cat2

static cstring cstr_cat2(const char *a, const char *b) { size_t sa = strlen(a), sb = strlen(b); if (sa + sb < CSTR_INTERNING_SIZE) { char tmp[CSTR_INTERNING_SIZE]; memcpy(tmp, a, sa); memcpy(tmp + sa, b, sb); tmp[sa + sb] = 0; return cstr_interning(tmp, sa + sb, hash_blob(tmp, sa + sb)); } cstring p = xalloc(sizeof(struct __cstr_data) + sa + sb + 1); if (!p) return NULL; char *ptr = (char *) (p + 1); p->cstr = ptr; p->type = 0; p->ref = 1; memcpy(ptr, a, sa); memcpy(ptr + sa, b, sb); ptr[sa + sb] = 0; p->hash_size = 0; return p; }

cstr_cat2 中的 hash_blob 是用來計算 hash table 的 key value 之後會被進一步被 hash function 轉換程 index
hash_blob

static inline uint32_t hash_blob(const char *buffer, size_t len)
{
    const uint8_t *ptr = (const uint8_t *) buffer;
    size_t h = len;
    size_t step = (len >> 5) + 1;
    for (size_t i = len; i >= step; i -= step)
        h = h ^ ((h << 5) + (h >> 2) + ptr[i - 1]);
    return h == 0 ? 1 : h;
}

完成字串連接後會做 CSTR_LITERAL(hello, "Hello string"), 其中 CSTR_LITERAL() 會執行 cstr_clone
如果要 clone 的字串長度小於 CSTR_INTERNING_SIZE, 那就會先去 interning pool 中找, 如果有一樣的會回傳已存在 pool 中的字串, 如果沒有則會複製一份新的新字串並存於 interning pool 和 hash_table 中, 回傳新字串, 如果 > CSTR_INTERNING_SIZE 就直接複製一份新的 cstring

cstring cstr_clone(const char *cstr, size_t sz)
{
    if (sz < CSTR_INTERNING_SIZE)
        return cstr_interning(cstr, sz, hash_blob(cstr, sz));
    cstring p = xalloc(sizeof(struct __cstr_data) + sz + 1);
    if (!p)
        return NULL;
    void *ptr = (void *) (p + 1);
    p->cstr = ptr;
    p->type = 0;
    p->ref = 1;
    memcpy(ptr, cstr, sz);
    ((char *) ptr)[sz] = 0;
    p->hash_size = 0;
    return p;
}

CSTR_LOCK 鎖上一個 spinlock 等待 interning 完成再 CSTR_UNLOCK() release lock
它會先做一次 interning 如果回傳的 cstring 是空的代表 hash table 不存在 或是 hash table 超過 threshold, 此時需要 expand
expand 就是初始化 hash table size 成 HASH_START_SIZE 或是 新簡一個 double size 的 hash table, 並把之前 hash table 中的 node insert 進去

static cstring cstr_interning(const char *cstr, size_t sz, uint32_t hash)
{
    cstring ret;
    CSTR_LOCK();
    ret = interning(&__cstr_ctx, cstr, sz, hash);
    if (!ret) {
        expand(&__cstr_ctx);
        ret = interning(&__cstr_ctx, cstr, sz, hash);
    }
    ++__cstr_ctx.total;
    CSTR_UNLOCK();
    return ret;
}

如果 hash table 存在, 先去 interning pool 中找, 如果有一樣的會回傳已存在 pool 中的字串, 如果沒有則會複製一份新的新字串並存於 interning pool 和 hash table 中, 回傳新字串

static cstring interning(struct __cstr_interning *si,
                         const char *cstr,
                         size_t sz,
                         uint32_t hash)
{
    if (!si->hash)
        return NULL;
    // hash function
    int index = (int) (hash & (si->size - 1));
    struct __cstr_node *n = si->hash[index];
    while (n) {
        if (n->str.hash_size == hash) {
            if (!strcmp(n->str.cstr, cstr))
                return &n->str;
        }
        n = n->next;
    }
    // 80% (4/5) threshold
    if (si->total * 5 >= si->size * 4)
        return NULL;
    if (!si->pool) {
        si->pool = xalloc(sizeof(struct __cstr_pool));
        si->index = 0;
    }
    n = &si->pool->node[si->index++];
    memcpy(n->buffer, cstr, sz);
    n->buffer[sz] = 0;

    cstring cs = &n->str;
    cs->cstr = n->buffer;
    cs->hash_size = hash;
    cs->type = CSTR_INTERNING;
    cs->ref = 0;

    n->next = si->hash[index];
    si->hash[index] = n;

    return cs;
}