# 2021q1 Homework2 (quiz2)
contributed by < `qwe661234` >
###### tags: `linux2021`
# question 1
`queue element`
``` cpp
typedef struct __element {
char *value;
struct __element *next;
struct list_head list;
} list_ele_t;
```
`queue`
``` cpp
typedef struct {
list_ele_t *head; /* Linked list of elements */
list_ele_t *tail;
size_t size;
struct list_head list;
} queue_t;
```
`list head`
``` cpp
struct list_head {
struct list_head *prev, *next;
};
```
`container_of`
``` cpp
#ifndef container_of
#define container_of(ptr, type, member) \
__extension__({ \
const __typeof__(((type *) 0)->member) *__pmember = (ptr); \
(type *) ((char *) __pmember - offsetof(type, member)); \
})
#endif
```
```
__extension__
```
>GCC uses the __extension__ attribute when using the -ansi flag to avoid warnings in headers with GCC extensions.
```
__typeof__(((type *) 0)->member)
```
>ANSI C 標准允許變量0的常量被強制轉換成任何一種類型的指針,並且轉換的結果是個 NULL, 所以 (type *) 0 就是資料型別為 type *的 NULL 指針, 雖然取用 NULL 指針去指向 member 是非法的, 但前面加上 typeof 後, 編譯器不會去訪問 NULL->member, 而是單純去取 member 的資料型別
```
offsetof(type, member)
```
>它的作用是獲取 struct 中某個成員相對於該 struct 首元素地址的偏移量
reference:
* [C语言高级用法---typeof( ((type *)0)->member )和offset_of()](https://blog.csdn.net/rosetta/article/details/90746936)
* [How to use __extension__ and __typeof__ in a minified example in C?](https://stackoverflow.com/questions/5323478/how-to-use-extension-and-typeof-in-a-minified-example-in-c)
`list_entry`
``` cpp
#define list_first_entry(head, type, member) \
list_entry((head)->next, type, member)
```
`list_add_tail function`
``` cpp
static inline void list_add_tail(struct list_head *node, struct list_head *head)
{
struct list_head *prev = head->prev;
prev->next = node;
node->next = head;
node->prev = prev;
head->prev = node;
}
```
<font color="blue">Black: original
Red: after function</font>
```graphviz
digraph structs {
rankdir=LR
node[shape="box"]
edge [dir="both"]
node0 [label="head"];
node1 [label="last_ele"];
node2 [label="first_ele"];
node3 [label="newnode"];
prev [label="prev" shape="none"];
{
rank = "same";
prev->node1[dir="forward"];
}
{
node1 -> node0-> node2;
node1 -> node3 -> node0-> node2 [color="red"];
}
}
```
`list_del function`
``` cpp
static inline void list_del(struct list_head *node)
{
struct list_head *next = node->next, *prev = node->prev;
next->prev = prev; prev->next = next;
}
```
<font color="blue">Black: original
Red: after function</font>
```graphviz
digraph structs {
rankdir=LR
node[shape="box"]
edge [dir="both"]
node0 [label="node"];
node1 [label="element"];
node2 [label="element"];
{
node1 -> node0-> node2;
node1 -> node2 [color="red"];
}
}
```
`list_empty function`
檢查 list head 是否有連接 node
``` cpp
static inline int list_empty(const struct list_head *head)
{
// point to self
return (head->next == head);
}
```
`list_is_singular function`
檢查 list head 後面是否只連接一個 node
``` cpp
static inline int list_is_singular(const struct list_head *head)
{
// 1 or 2
return (!list_empty(head) && head->prev == head->next);
}
```
`list_splice_tail function`
將所有以 list 為 list head 的 list node 接在以 head 為 list head 的最後一個 list node 之後
``` cpp
static inline void list_splice_tail(struct list_head *list,
struct list_head *head)
{
// head_ last means tail
struct list_head *head_last = head->prev;
struct list_head *list_first = list->next, *list_last = list->prev;
if (list_empty(list))
return;
head->prev = list_last;
list_last->next = head;
list_first->prev = head_last;
head_last->next = list_first;
}
```
<font color="blue">Black: original
Red: after function</font>
```graphviz
digraph structs {
rankdir=LR
node[shape="box"]
edge [dir="both"]
node0 [label="list"];
node1 [label="list node 1\n (list first)"];
node2 [label="list node 2\n (list last)"];
hnode0 [label="head"];
hnode1 [label="head node 1"];
hnode2 [label="head node 2\n (head last)"];
{
node0->node1->node2->node0;
hnode0->hnode1->hnode2->hnode0;
}
{
node2->hnode0 [color="red"];
hnode2->node1 [color="red"];
}
}
```
Result
```graphviz
digraph structs {
rankdir=LR
node[shape="box"]
edge [dir="both"]
node1 [label="list node 1\n (list first)"];
node2 [label="list node 2\n (list last)"];
hnode0 [label="head"];
hnode1 [label="head node 1"];
hnode2 [label="head node 2\n (head last)"];
{
hnode0->hnode1->hnode2;
hnode2->node1[color="red"];
node1->node2;
node2->hnode0[color="red"];
}
}
```
`list_cut_position`
將由 head_from 為list head 的第一個 list_node 至 node (parameter 中的 node) 接在 list head head_to 之後
``` cpp
static inline void list_cut_position(struct list_head *head_to,
struct list_head *head_from,
struct list_head *node)
{
struct list_head *head_from_first = head_from->next;
if (list_empty(head_from))
return;
if (head_from == node) {
INIT_LIST_HEAD(head_to);
return;
}
// begin to cut from headfirst to node, and list it to head_to
head_from->next = node->next;
head_from->next->prev = head_from;
head_to->prev = node;
node->next = head_to;
head_to->next = head_from_first;
head_to->next->prev = head_to;
}
```
<font color="blue">Black: original
Red: after function</font>
```graphviz
digraph structs {
rankdir=LR
node[shape="box"]
edge [dir="both"]
node0 [label="head_from"];
node1 [label="node 1\n (head from first)"];
node2 [label="node 2\n (node)"];
node3 [label="node 3\n"];
node4 [label="node 4\n"];
hnode0 [label="head_to"];
{
node0->node1->node2->node3->node4->node0;
node0->node3[color="red"];
hnode0->node2[color="red"];
hnode0->node1[color="red"];
}
}
```
Result
```graphviz
digraph structs {
rankdir=LR
node[shape="box"]
edge [dir="both"]
node0 [label="head_from"];
node1 [label="node 1\n (head from first)"];
node2 [label="node 2\n (node)"];
node3 [label="node 3\n"];
node4 [label="node 4\n"];
hnode0 [label="head_to"];
{
node3->node4->node0;
node0->node3[color="red"];
hnode0->node2[color="red"];
node2->node1;
hnode0->node1[color="red"];
}
}
```
`get_middle function`
以 slow 一次走訪一格 list_node, fast 一次走訪兩格 list_node 的方式, 當fast 走訪一整個 list 將要回到 list head 時, slow 剛好會走到 list 的中間點
``` cpp
static list_ele_t *get_middle(struct list_head *list)
{
struct list_head *fast = list->next, *slow;
list_for_each (slow, list) {
/*用來判斷 fast 是否走訪整個 list 即將回到 list head*/
if (fast->next == list || fast->next->next == list)
break;
fast = fast->next->next;
}
return list_entry(slow, list_ele_t, list);
}
```
:::success
重構`list_merge function` 和 `get_middle function`:
將`list_merge function` 和 `get_middle function` 合寫成一個 function, 因為 list_merge function 中的 head_from 就是 get_middle function 中的 list, 所以合寫後只須傳入兩個參數, 不需要再多傳入 middle node
:::
```cpp
list_cut_position(&left.list, &q->list, &get_middle(&q->list)->list);
```
變更為:
```cpp
list_cut_middle(&left.list, &q->list);
```
``` c
static inline void list_cut_middle(struct list_head *head_to,
struct list_head *head_from)
{
struct list_head *fast = head_from->next, *slow;
struct list_head *head_from_first = head_from->next;
if (list_empty(head_from))
return;
// find middle
list_for_each (slow, head_from) {
if (fast->next == head_from || fast->next->next == head_from)
break;
fast = fast->next->next;
}
// begin to cut from headfirst to node, and list it to head_to
head_from->next = slow->next;
head_from->next->prev = head_from;
head_to->prev = slow;
slow->next = head_to;
head_to->next = head_from_first;
head_to->next->prev = head_to;
}
```
`list_merge function`
將兩個 list 排序並合併, if 的部份是因為 get_middle 的機制, 如果切分時只有一個 list node, 那個 list node 會接在 lhs 上, 所以只要 lhs 沒有接任何 list node, 那 rhs 必然也沒有接任何 list node, 故可以直接結束 funciton
``` cpp
static void list_merge(struct list_head *lhs,
struct list_head *rhs,
struct list_head *head)
{
INIT_LIST_HEAD(head);
if (list_empty(lhs)) {
list_splice_tail(lhs, head);
return;
}
if (list_empty(rhs)) {
list_splice_tail(rhs, head);
return;
}
while (!list_empty(lhs) && !list_empty(rhs)) {
char *lv = list_entry(lhs->next, list_ele_t, list)->value;
char *rv = list_entry(rhs->next, list_ele_t, list)->value;
struct list_head *tmp = strcmp(lv, rv) <= 0 ? lhs->next : rhs->next;
list_del(tmp);
list_add_tail(tmp, head);
}
list_splice_tail(list_empty(lhs) ? rhs : lhs, head);
}
```
:::success
重構 `list_merge function`:
如果 rhs 為空就把 lhs 的 list node 接在 head 之後, lhs 為空則 rhs 必為空,所以行14即可實做, 且 list head 為空就會跳出迴圈, 無須一開始的兩個 if
:::
```cpp
static void list_merge(struct list_head *lhs,
struct list_head *rhs,
struct list_head *head)
{
INIT_LIST_HEAD(head);
while (!list_empty(lhs) && !list_empty(rhs)) {
char *lv = list_entry(lhs->next, list_ele_t, list)->value;
char *rv = list_entry(rhs->next, list_ele_t, list)->value;
struct list_head *tmp = strcmp(lv, rv) <= 0 ? lhs->next : rhs->next;
list_del(tmp);
list_add_tail(tmp, head);
}
list_splice_tail(list_empty(lhs) ? rhs : lhs, head);
}
```
# question 2
``` cpp
uint16_t func(uint16_t N) {
/* change all right side bits to 1 */
N |= N >> 1;
N |= N >> 2;
N |= N >> 4;
N |= N >> 8;
return (N + 1) >> 1;
}
```
Ex: N = $10000000000000_2$ = $16384_{10}$
原理是確保16個 bit 都能變為1, 最後做+1是為了得到最接近且比 N 大的 power of two, 接著 >> 1 (也就是除以2), 就可以得到最接近且比 N 小的 power of two
```
N |= N >> 1
0100000000000000
0010000000000000
----------------
0110000000000000
N |= N >> 2;
0110000000000000
0001100000000000
----------------
0111100000000000
N |= N >> 4;
0111100000000000
0000011110000000
----------------
0111111110000000
N |= N >> 8;
0111111110000000
0000000000111111
----------------
0111111111111111
(N + 1) = 1000000000000000
(N + 1) >> 1 = 0100000000000000
```
`is_power_of_2`
```cpp
static inline __attribute__((const))
bool is_power_of_2(unsigned long n)
{
return (n != 0 && ((n & (n - 1)) == 0));
}
```
原理:
Ex n = $1010_2$, $1010_2$ & $1001_2$ = $1000_2$ != 0, 若 n = $1000_2$, $1000_2$ & $0111_2$ = 0
`roundup_pow_of_two`
``` cpp
static inline __attribute__((const))
unsigned long __roundup_pow_of_two(unsigned long n)
{
return 1UL << fls_long(n - 1);
}
```
`rounddown_pow_of_two`
``` cpp
static inline __attribute__((const))
unsigned long __rounddown_pow_of_two(unsigned long n)
{
return 1UL << (fls_long(n) - 1);
}
```
1UL 是代表值為1的 unsigned long
roundup_pow_of_two 和 rounddown_pow_of_two 都有使用到 `fls_long`
fls 為 find last set bit 的縮寫,就是找到傳入參數的最靠近 MSB 且是1的 bit 是第幾個 bit 並回傳. 將 1UL << 回傳值即可得到比參數 n 大且最靠近的 pow_of_two => `__roundup_pow_of_two`, 將回傳值 - 1 再將 1UL << 回傳值即可得到比參數 n 小且最靠近的 pow_of_two => `__rounddown_pow_of_two`, 在執行 function 前會先考慮系統是64 bit 架構還是32 bit
`fls_long`
``` cpp
static inline unsigned fls_long(unsigned long l)
{
if (sizeof(l) == 4)
return fls(l);
return fls64(l);
}
```
``` cpp
#if BITS_PER_LONG == 32
static __always_inline int fls64(__u64 x)
{
__u32 h = x >> 32;
if (h)
return fls(h) + 32;
return fls(x);
}
#elif BITS_PER_LONG == 64
static __always_inline int fls64(__u64 x)
{
if (x == 0)
return 0;
return __fls(x) + 1;
}
```
`fls` and `__fls`
``` cpp
static inline __attribute__ ((const)) int __fls(unsigned long x)
{
if (!x)
return 0;
else
return fls(x) - 1;
}
static inline __attribute__ ((const)) int fls(unsigned long x)
{
int n;
asm volatile(
" fls.f %0, %1 \n" /* 0:31; 0(Z) if src 0 */
" add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */
: "=r"(n) /* Early clobber not needed */
: "r"(x)
: "cc");
return n;
}
```
# question3
``` cpp
/*read from src and write to dest*/
void bitcpy(void *_dest, /* Address of the buffer to write to */
size_t _write, /* Bit offset to start writing to */
const void *_src, /* Address of the buffer to read from */
size_t _read, /* Bit offset to start reading from */
size_t count)
{
size_t read_lhs = _read & 7;
size_t read_rhs = 8 - read_lhs;
const uint8_t *source = (const uint8_t *) _src + (_read / 8);
size_t write_lhs = _write & 7;
size_t write_rhs = 8 - write_lhs;
uint8_t *dest = (uint8_t *) _dest + (_write / 8);
static const uint8_t read_mask[] = {
0x00, /* == 0 00000000b */
0x80, /* == 1 10000000b */
0xC0, /* == 2 11000000b */
0xE0, /* == 3 11100000b */
0xF0, /* == 4 11110000b */
0xF8, /* == 5 11111000b */
0xFC, /* == 6 11111100b */
0xFE, /* == 7 11111110b */
0xFF /* == 8 11111111b */
};
static const uint8_t write_mask[] = {
0xFF, /* == 0 11111111b */
0x7F, /* == 1 01111111b */
0x3F, /* == 2 00111111b */
0x1F, /* == 3 00011111b */
0x0F, /* == 4 00001111b */
0x07, /* == 5 00000111b */
0x03, /* == 6 00000011b */
0x01, /* == 7 00000001b */
0x00 /* == 8 00000000b */
};
while (count > 0) {
uint8_t data = *source++;
// how many bytes you want to write this round
size_t bitsize = (count > 8) ? 8 : count;
if (read_lhs > 0) {
data <<= read_lhs;
if (bitsize > read_rhs)
// put the context in next bytes to data
data |= (*source >> read_rhs);
}
// keep the number of bit in data you want to write
if (bitsize < 8)
data &= read_mask[bitsize];
uint8_t original = *dest;
uint8_t mask = read_mask[write_lhs];
if (bitsize > write_rhs) {
/* Cross multiple bytes */
// write first bytes
*dest++ = (original & mask) | (data >> write_lhs);
// keep the context of next bytes
original = *dest & write_mask[bitsize - write_rhs];
// write next bytes
*dest = original | (data << write_rhs);
} else {
// Since write_lhs + bitsize is never >= 8, no out-of-bound access.
// keep context you do not write from tail
mask |= write_mask[write_lhs + bitsize];
// write bytes
*dest++ = (original & mask) | (data >> write_lhs);
}
count -= bitsize;
}
}
```
``` cpp
size_t read_lhs = _read & 7;
size_t read_rhs = 8 - read_lhs;
```
_read & 7 與 _read % 8 同義, 用來看8 bit 中 read 的 lhs 有幾個 bit, 剩下 rhs 就是 8 - lhs
Ex: _read = 4
```graphviz
digraph structs {
rankdir=LR
node[shape="record"]
node0 [label="{lhs|lhs|lhs|lhs|lhs|rhs|rhs|rhs}"];
}
```
``` cpp
const uint8_t *source = (const uint8_t *) _src + (_read / 8);
```
_read / 8 是用來看要從 src 的第幾個 bytes 開始讀起
Ex: _read = 20 (20 / 8 = 2) 所以 source 會指向 src 的第2個 bytes
``` cpp
static const uint8_t read_mask[] = {
0x00, /* == 0 00000000b */
0x80, /* == 1 10000000b */
0xC0, /* == 2 11000000b */
0xE0, /* == 3 11100000b */
0xF0, /* == 4 11110000b */
0xF8, /* == 5 11111000b */
0xFC, /* == 6 11111100b */
0xFE, /* == 7 11111110b */
0xFF /* == 8 11111111b */
};
static const uint8_t write_mask[] = {
0xFF, /* == 0 11111111b */
0x7F, /* == 1 01111111b */
0x3F, /* == 2 00111111b */
0x1F, /* == 3 00011111b */
0x0F, /* == 4 00001111b */
0x07, /* == 5 00000111b */
0x03, /* == 6 00000011b */
0x01, /* == 7 00000001b */
0x00 /* == 8 00000000b */
};
```
read_mask 用來避免讀入不該讀的部份
Ex: 要讀入 bit 0 - 5, mask = 11111100b, 用來避免讀入 bit6, 7
write_mask 用來保留不須寫入的部份的原始內容
Ex: 要寫入 bit 0 - 5, mask = 00000011b, 用來保留 bit6, 7 的原始內容
### 從 src 讀入
``` cpp
size_t bitsize = (count > 8) ? 8 : count;
```
bitsize 是用來看這一輪要寫入幾個 bits, 因為一輪只能最多寫8個 bits, Ex: count = 13, 那第一輪就寫入8個 bits, 第二輪再寫入5個 bits
``` cpp
uint8_t data = *source++;
// how many bytes you want to write this round
size_t bitsize = (count > 8) ? 8 : count;
if (read_lhs > 0) {
data <<= read_lhs;
if (bitsize > read_rhs)
// put the context in next bytes to data
data |= (*source >> read_rhs);
}
```
read_lhs > 0 代表沒有對齊, Ex: lhs = 3, 代表要以 bit 2到下一個 bytes 的 bit 1組成8 bit 來處理
```graphviz
digraph structs {
rankdir=LR
node[shape="record"]
node0 [label="{data2|data3|data4|data5|data6|data7|X|X}"];
text [label="data <<= read_lhs", shape = "none"]
text1 [label="data 目前讀的 bytes, *source 是下一個 bytes", shape = "none"]
}
```
```graphviz
digraph structs {
rankdir=LR
node[shape="record"]
node0 [label="{X|X|X|X|X|X|source0|source1}"];
text [label="*source >> read_rhs", shape = "none"]
}
```
```graphviz
digraph structs {
rankdir=LR
node[shape="record"]
node0 [label="{data2|data3|data4|data5|data6|data7|source0|source1}"];
text [label="data |= (*source >> read_rhs)", shape = "none"]
}
```
``` cpp
// keep the number of bit in data you want to write
if (bitsize < 8)
data &= read_mask[bitsize];
```
bitsize 如果<8代表要用 read_mask 來避免讀入不該讀的部份
### 寫入 dest
``` cpp
uint8_t mask = read_mask[write_lhs];
if (bitsize > write_rhs) {
/* Cross multiple bytes */
// write first bytes
*dest++ = (original & mask) | (data >> write_lhs);
// keep the context of next bytes
original = *dest & write_mask[bitsize - write_rhs];
// write next bytes
*dest = original | (data << write_rhs);
}
```
write_lhs 是要保留不寫入的部份, write_rhs 是要寫入新 data 的部份, Ex: write_lhs = 2
```graphviz
digraph structs {
rankdir=LR
node[shape="record"]
node0 [label="{original0|original1|X|X|X|X|X|X}"];
text [label="(original & mask)", shape = "none"]
}
```
```graphviz
digraph structs {
rankdir=LR
node[shape="record"]
node0 [label="{X|X|data2|data3|data4|data5|data6|data7}"];
text [label="(data >> write_lhs)", shape = "none"]
}
```
```graphviz
digraph structs {
rankdir=LR
node[shape="record"]
node0 [label="{original0|original1|data2|data3|data4|data5|data6|data7}"];
text [label="*dest++ = (original & mask) | (data >> write_lhs)", shape = "none"]
}
```
``` cpp
original = *dest & write_mask[bitsize - write_rhs];
// write next bytes
*dest = original | (data << write_rhs);
```
這邊是把還沒寫完的 bit 寫入下一個 bytes, 一樣先把不寫入的部份做保留, 然後寫入要寫入的部份
``` cpp
while (count > 0) {
...
count -= bitsize;
}
```
這邊是把寫完的 bit 扣掉, 如果 count 還>0 代表要繼續寫下一輪
## 重構
一次copy 64個bit
```cpp
void bitcpy(void *_dest, /* Address of the buffer to write to */
size_t _write, /* Bit offset to start writing to */
const void *_src, /* Address of the buffer to read from */
size_t _read, /* Bit offset to start reading from */
size_t count)
{
uint64_t data;
const uint64_t *source = _src;
uint64_t *dest = _dest;
data = *source;
data = data << _read;
// data >> _read + (63 - (_read + count) + 1)
data = data >> (64 - count);
data = data << (64 - (_read + count));
if(_read > _write){
data = data << (_read - _write);
}else{
data = data >> (_write - _read);
}
*dest |= data;
}
```
# question4
cstr buffer
```cpp
typedef struct __cstr_data {
char *cstr;
uint32_t hash_size;
uint16_t type;
uint16_t ref; // reference counter 有多少 cstring 引用這個cstr
} *cstring;
typedef struct __cstr_buffer {
cstring str;
} cstr_buffer[1];
```
interning pool and hash table
```cpp
struct __cstr_node {
char buffer[CSTR_INTERNING_SIZE];
struct __cstr_data str;
struct __cstr_node *next;
};
struct __cstr_pool {
struct __cstr_node node[INTERNING_POOL_SIZE];
};
struct __cstr_interning {
int lock;
int index;
unsigned size;
unsigned total;
struct __cstr_node **hash;
struct __cstr_pool *pool;
};
```
由測試程式的運作來解說 funciton 的功能
`測試程式`
```cpp
static cstring cmp(cstring t)
{
CSTR_LITERAL(hello, "Hello string");
CSTR_BUFFER(ret);
cstr_cat(ret, cstr_equal(hello, t) ? "equal" : "not equal");
return cstr_grab(CSTR_S(ret));
}
static void test_cstr()
{
CSTR_BUFFER(a);
cstr_cat(a, "Hello ");
cstr_cat(a, "string");
cstring b = cmp(CSTR_S(a));
printf("%s\n", b->cstr);
CSTR_CLOSE(a);
cstr_release(b);
}
int main(int argc, char *argv[])
{
test_cstr();
return 0;
}
```
`CSTR_BUFFER(a)` 首先建立一個 cstr_buffer a
`cstr_cat(a, "Hello ")` 將字串 "Hello" 接在 buffer 中的 cstring 的 cstr 指標後
如果 buffer 中 cstring 的 type 是 ONSTACK, str 會一個字一個字接上, 如果 buffer 中 cstring 的 type 不是 ONSTACK 或要字串長度超過 CSTR_STACK_SIZE, 則以 tmp 保存目前 buffer 中 cstring 並以 cstr_cat2 連接
```cpp
cstring cstr_cat(cstr_buffer sb, const char *str)
{
cstring s = sb->str;
if (s->type == CSTR_ONSTACK) {
int i = s->hash_size;
while (i < CSTR_STACK_SIZE - 1) {
// the dataType of s->cstr[i] is integer, *str store as ASCII code
s->cstr[i] = *str;
if (*str == 0)// '/0' == 0
return s;
++s->hash_size;
++str;
++i;
}
s->cstr[i] = 0;
}
cstring tmp = s;
// concat str size > CSTR_STACK_SIZE
// only cat 128 length
sb->str = cstr_cat2(tmp->cstr, str);
cstr_release(tmp);
return sb->str;
}
```
`cstr_cat2` 用來連接非 ONSTACK 和長度超過 CSTR_STACK_SIZE 的字串
用 `cstr_cat2` 連接完成的字串長度如果 < CSTR_INTERNING_SIZE 會做 `cstr_interning`, 如果沒有會回傳一個連接完成的 cstring
這個次是程式並沒有進到 `cstr_cat2`
```cpp=
static cstring cstr_cat2(const char *a, const char *b)
{
size_t sa = strlen(a), sb = strlen(b);
if (sa + sb < CSTR_INTERNING_SIZE) {
char tmp[CSTR_INTERNING_SIZE];
memcpy(tmp, a, sa);
memcpy(tmp + sa, b, sb);
tmp[sa + sb] = 0;
return cstr_interning(tmp, sa + sb, hash_blob(tmp, sa + sb));
}
cstring p = xalloc(sizeof(struct __cstr_data) + sa + sb + 1);
if (!p)
return NULL;
char *ptr = (char *) (p + 1);
p->cstr = ptr;
p->type = 0;
p->ref = 1;
memcpy(ptr, a, sa);
memcpy(ptr + sa, b, sb);
ptr[sa + sb] = 0;
p->hash_size = 0;
return p;
}
```
`cstr_cat2` 中的 `hash_blob` 是用來計算 hash table 的 key value 之後會被進一步被 hash function 轉換程 index
`hash_blob`
```cpp
static inline uint32_t hash_blob(const char *buffer, size_t len)
{
const uint8_t *ptr = (const uint8_t *) buffer;
size_t h = len;
size_t step = (len >> 5) + 1;
for (size_t i = len; i >= step; i -= step)
h = h ^ ((h << 5) + (h >> 2) + ptr[i - 1]);
return h == 0 ? 1 : h;
}
```
完成字串連接後會做 CSTR_LITERAL(hello, "Hello string"), 其中 CSTR_LITERAL() 會執行 `cstr_clone`
如果要 clone 的字串長度小於 CSTR_INTERNING_SIZE, 那就會先去 interning pool 中找, 如果有一樣的會回傳已存在 pool 中的字串, 如果沒有則會複製一份新的新字串並存於 interning pool 和 hash_table 中, 回傳新字串, 如果 > CSTR_INTERNING_SIZE 就直接複製一份新的 cstring
```cpp
cstring cstr_clone(const char *cstr, size_t sz)
{
if (sz < CSTR_INTERNING_SIZE)
return cstr_interning(cstr, sz, hash_blob(cstr, sz));
cstring p = xalloc(sizeof(struct __cstr_data) + sz + 1);
if (!p)
return NULL;
void *ptr = (void *) (p + 1);
p->cstr = ptr;
p->type = 0;
p->ref = 1;
memcpy(ptr, cstr, sz);
((char *) ptr)[sz] = 0;
p->hash_size = 0;
return p;
}
```
`CSTR_LOCK` 鎖上一個 spinlock 等待 interning 完成再 `CSTR_UNLOCK()` release lock
它會先做一次 interning 如果回傳的 cstring 是空的代表 hash table 不存在 或是 hash table 超過 threshold, 此時需要 `expand`
`expand` 就是初始化 hash table size 成 HASH_START_SIZE 或是 新簡一個 double size 的 hash table, 並把之前 hash table 中的 node insert 進去
```cpp
static cstring cstr_interning(const char *cstr, size_t sz, uint32_t hash)
{
cstring ret;
CSTR_LOCK();
ret = interning(&__cstr_ctx, cstr, sz, hash);
if (!ret) {
expand(&__cstr_ctx);
ret = interning(&__cstr_ctx, cstr, sz, hash);
}
++__cstr_ctx.total;
CSTR_UNLOCK();
return ret;
}
```
如果 hash table 存在, 先去 interning pool 中找, 如果有一樣的會回傳已存在 pool 中的字串, 如果沒有則會複製一份新的新字串並存於 interning pool 和 hash table 中, 回傳新字串
```cpp
static cstring interning(struct __cstr_interning *si,
const char *cstr,
size_t sz,
uint32_t hash)
{
if (!si->hash)
return NULL;
// hash function
int index = (int) (hash & (si->size - 1));
struct __cstr_node *n = si->hash[index];
while (n) {
if (n->str.hash_size == hash) {
if (!strcmp(n->str.cstr, cstr))
return &n->str;
}
n = n->next;
}
// 80% (4/5) threshold
if (si->total * 5 >= si->size * 4)
return NULL;
if (!si->pool) {
si->pool = xalloc(sizeof(struct __cstr_pool));
si->index = 0;
}
n = &si->pool->node[si->index++];
memcpy(n->buffer, cstr, sz);
n->buffer[sz] = 0;
cstring cs = &n->str;
cs->cstr = n->buffer;
cs->hash_size = hash;
cs->type = CSTR_INTERNING;
cs->ref = 0;
n->next = si->hash[index];
si->hash[index] = n;
return cs;
}
```