# facebooc-q1
###### tags: `facebooc 2022`
**Tribute to:** < `jserv` >
## 作業說明
### `list.h`
改寫自:[`Linux/list.h`](https://github.com/torvalds/linux/blob/master/include/linux/list.h) 的實作。利用 `struct list_head` 資料結構,建構一個巧妙的 circular doubly linked-list,
```c
struct list_head {
struct list_head *prev, *next;
};
```
程式運作原理說明:[Linked list 在 Linux 核心原始程式碼](https://hackmd.io/@sysprog/c-linked-list#Linked-list-%E5%9C%A8-Linux-%E6%A0%B8%E5%BF%83%E5%8E%9F%E5%A7%8B%E7%A8%8B%E5%BC%8F%E7%A2%BC)
**我們提供以下程式碼進行改寫:**
```c
#include <stddef.h>
/**
* container_of() - Calculate address of object that contains address ptr
* @ptr: pointer to member variable
* @type: type of the structure containing ptr
* @member: name of the member variable in struct @type
*
* Return: @type pointer of object containing ptr
*/
#ifndef container_of
#define container_of(ptr, type, member) \
__extension__({ \
const __typeof__(((type *) 0)->member) *__pmember = (ptr); \
(type *) ((char *) __pmember - offsetof(type, member)); \
})
#endif
/**
* struct list_head - Head and node of a doubly-linked list
* @prev: pointer to the previous node in the list
* @next: pointer to the next node in the list
*/
struct list_head {
struct list_head *prev, *next;
};
/**
* LIST_HEAD - Declare list head and initialize it
* @head: name of the new object
*/
#define LIST_HEAD(head) struct list_head head = {&(head), &(head)}
/**
* INIT_LIST_HEAD() - Initialize empty list head
* @head: pointer to list head
*/
static inline void INIT_LIST_HEAD(struct list_head *head)
{
head->next = head;
head->prev = head;
}
/**
* list_add_tail() - Add a list node to the end of the list
* @node: pointer to the new node
* @head: pointer to the head of the list
*/
static inline void list_add_tail(struct list_head *node, struct list_head *head)
{
struct list_head *prev = head->prev;
prev->next = node;
node->next = head;
node->prev = prev;
head->prev = node;
}
/**
* list_remove() - Remove a list node from the list
* @node: pointer to the node
*/
static inline void list_remove(struct list_head *node)
{
struct list_head *next = node->next, *prev = node->prev;
next->prev = prev;
prev->next = next;
}
/**
* list_empty() - Check if list head has no nodes attached
* @head: pointer to the head of the list
*/
static inline int list_empty(const struct list_head *head)
{
return (head->next == head);
}
/**
* list_is_singular() - Check if list head has exactly one node attached
* @head: pointer to the head of the list
*/
static inline int list_is_singular(const struct list_head *head)
{
return (!list_empty(head) && head->prev == head->next);
}
/**
* list_replace - replace old entry by new one
* @old : the element to be replaced
* @new : the new element to insert
*
* If @old was empty, it will be overwritten.
*/
static inline void list_replace(struct list_head *old, struct list_head *new)
{
new->next = old->next;
new->next->prev = new;
new->prev = old->prev;
new->prev->next = new;
}
/**
* list_swap_ptr - swap two pointer's value
* @ptr1: the first pointer
* @ptr2: the second pointer
*/
static inline void list_swap_ptr(char **ptr1, char **ptr2)
{
char *tmp = *ptr1;
*ptr1 = *ptr2;
*ptr2 = tmp;
}
/**
* list_swap - replace entry1 with entry2 and re-add entry1 at entry2's position
* @entry1: the location to place entry2
* @entry2: the location to place entry1
*/
static inline void list_swap(struct list_head *entry1, struct list_head *entry2)
{
struct list_head *pos = entry2->prev;
list_remove(entry2);
list_replace(entry1, entry2);
if (pos == entry1)
pos = entry2;
list_add_tail(entry1, pos);
}
/**
* list_splice_tail() - Add list nodes from a list to end of another list
* @list: pointer to the head of the list with the node entries
* @head: pointer to the head of the list
*/
static inline void list_splice_tail(struct list_head *list,
struct list_head *head)
{
struct list_head *head_last = head->prev;
struct list_head *list_first = list->next, *list_last = list->prev;
if (list_empty(list))
return;
head->prev = list_last;
list_last->next = head;
list_first->prev = head_last;
head_last->next = list_first;
}
/**
* list_cut_position() - Move beginning of a list to another list
* @head_to: pointer to the head of the list which receives nodes
* @head_from: pointer to the head of the list
* @node: pointer to the node in which defines the cutting point
*/
static inline void list_cut_position(struct list_head *head_to,
struct list_head *head_from,
struct list_head *node)
{
struct list_head *head_from_first = head_from->next;
if (list_empty(head_from))
return;
if (list_is_singular(head_from) && (head_from->next != node && head_from != node))
return;
if (head_from == node) {
INIT_LIST_HEAD(head_to);
return;
}
head_from->next = node->next;
head_from->next->prev = head_from;
head_to->prev = node;
node->next = head_to;
head_to->next = head_from_first;
head_to->next->prev = head_to;
}
/**
* list_entry() - Calculate address of entry that contains list node
* @node: pointer to list node
* @type: type of the entry containing the list node
* @member: name of the list_head member variable in struct @type
*/
#define list_entry(node, type, member) container_of(node, type, member)
/**
* list_first_entry() - get first entry of the list
* @head: pointer to the head of the list
* @type: type of the entry containing the list node
* @member: name of the list_head member variable in struct @type
*/
#define list_first_entry(head, type, member) \
list_entry((head)->next, type, member)
/**
* list_for_each - iterate over list nodes
* @node: list_head pointer used as iterator
* @head: pointer to the head of the list
*/
#define list_for_each(node, head) \
for (node = (head)->next; node != (head); node = node->next)
/**
* list_for_each_prev_reverse - iterate over a list backwards for reverse the list
* @node: list_head pointer used as iterator
* @head: pointer to the head of the list
*/
#define list_for_each_next_reverse(node, head) \
for (node = (head->next); node != (head); node = node->prev)
/**
* shamelessly copy from linux/list.h
* list_for_each_safe - iterate over a list safe against removal of list entry
* @pos: the &struct list_head to use as a loop cursor.
* @n: another &struct list_head to use as temporary storage
* @head: the head for your list.
*/
#define list_for_each_safe(pos, n, head) \
for (pos = (head)->next, n = pos->next; pos != (head); \
pos = n, n = pos->next)
```
---
### folly::FBString
[folly::FBString](https://github.com/facebook/folly/blob/main/folly/FBString.h) 實作主要手法透過下方的 union:
```c
union {
struct {
char *data;
size_t size;
size_t capacity;
} heap;
struct {
char data[23];
int8_t length; // align 1
} stack;
};
```
分為「短字串」以及「中長字串」,分開進行處理。「短字串」保存在 `char data[23]`,而「中長字串」則透過 data 這個指標和 `malloc` 的使用,保存在 heap 上。
### `xs.c`
在伺服器運算 ([server-side](https://en.wikipedia.org/wiki/Server-side) computing) 領域中,常會遇到頻繁的短字串處理,為此 Facebook 特別發展 [folly::fbstring](https://github.com/facebook/folly/blob/main/folly/FBString.h) 作為 C++ [std::string](https://legacy.cplusplus.com/reference/string/string/) 的高效能替代品,而之所以可以有效能上的突破,在於透過緊湊的記憶體佈局,施行 SSO (small string optimization) 和 CoW (copy on write) 這兩種最佳化手法:
* 當字串被歸類為「短字串」,會使用 stack 上的空間來保存字串。
* 當字串被視作「中等長度的字串」,採用動態配置記憶體;
* 當字串倍視作「長字串」,會透過 CoW 手段進行最佳化:進行字串複製操作時,倘若字串本身沒有修改,就會共享記憶體空間,從而減少記憶體佔用和省去複製的開銷。換言之,真正的「複製」操作僅發生在字串內容發生變更。
在 stack 上的空間 (也就是不計算透過 malloc 所取得的 heap 空間) 佔用尤其精巧,[folly::FBString](https://github.com/facebook/folly/blob/main/folly/FBString.h) 本體使用 24 個位元組,但卻能表達 23 個位元組長度的字串,沒有任何ㄧ個位元組浪費。相較之下,[std::string](https://legacy.cplusplus.com/reference/string/string/) 本身佔用 32 個位元組,但在 heap 上卻只能表達 16 個位元組的字串。
我們可以效仿 [folly::FBString](https://github.com/facebook/folly/blob/main/folly/FBString.h) 的資料結構實作一個山寨版:
```c
#define MAX_STR_LEN_BITS (54)
#define MAX_STR_LEN ((1UL << MAX_STR_LEN_BITS) - 1)
#define LARGE_STRING_LEN 256
#define MIDDLE_STRING_LEN 16
typedef union {
/* allow strings up to 15 bytes to stay on the stack
* use the last byte as a null terminator and to store flags
* much like fbstring:
* https://github.com/facebook/folly/blob/master/folly/docs/FBString.md
*/
char data[MIDDLE_STRING_LEN];
struct {
uint8_t filler[15],
/* how many free bytes in this stack allocated string
* same idea as fbstring
*/
space_left : 4,
/* if it is on heap, set to 1 */
is_ptr : 1, is_large_string : 1, flag2 : 1, flag3 : 1;
};
/* heap allocated */
struct {
char *ptr;
/* supports strings up to 2^54 - 1 bytes */
size_t size : 54,
/* capacity is always a power of 2 (unsigned)-1 */
capacity : 6;
/* the last 4 bits are important flags */
};
} xs;
```
`xs` `union` 定義 16 個位元組,應用與實作細節如下:
* 字串長度小於或等於 15 個位元組,則放在 stack。
* 字串長度大於或等於 16 個位元組,則放在 heap (透過 `malloc` 系統呼叫配置所需字串大小)
* heap struct
* `ptr`: 8 個位元組指標 (64 位元架構: x86_64/aarch64 等等)。
* `size`: 字串長度。因定義 54 位元,故最大字串長度為 $2^{54}$ − 1
位元組。
* `capacity`: 從 heap 配置的空間大小,其單位為 $2^{capacity}$,故用 6 個位元即可涵蓋 `size` 長度。
* `is_ptr`: 若有從 heap 配置空間,`is_ptr` 值為 1,否則為 0。
* `is_large_string`: 若字串長度 > `LARGE_STRING_LEN`,視為「長字串」,`is_large_string` 值為 1,否則為 0。
* 有 2 個位元沒有定義,是為了避免覆寫另一結構成員: `flag 2` 與 `flag3`。
**我們提供以下程式碼進行改寫:**
```c
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#define MAX_STR_LEN_BITS (54)
#define MAX_STR_LEN ((1UL << MAX_STR_LEN_BITS) - 1)
#define LARGE_STRING_LEN 256
#define MIDDLE_STRING_LEN 16
/* Memory leaks happen if the string is too long but it is still useful for
* short strings.
*/
#define xs_tmp(x) \
((void) ((struct { \
_Static_assert(sizeof(x) <= MAX_STR_LEN, "it is too big"); \
int dummy; \
}){1}), \
xs_new(&xs_literal_empty(), x))
#define xs_literal_empty() \
(xs) { .space_left = 15 }
enum xs_type {
XS_SHORT = 0,
XS_MEDIUM,
XS_LARGE,
};
typedef union {
/* allow strings up to 15 bytes to stay on the stack
* use the last byte as a null terminator and to store flags
* much like fbstring:
* https://github.com/facebook/folly/blob/master/folly/docs/FBString.md
*/
char data[MIDDLE_STRING_LEN];
struct {
uint8_t filler[15],
/* how many free bytes in this stack allocated string
* same idea as fbstring
*/
space_left : 4,
/* if it is on heap, set to 1 */
is_ptr : 1, is_large_string : 1, flag2 : 1, flag3 : 1;
};
/* heap allocated */
struct {
char *ptr;
/* supports strings up to 2^54 - 1 bytes */
size_t size : 54,
/* capacity is always a power of 2 (unsigned)-1 */
capacity : 6;
/* the last 4 bits are important flags */
};
} xs;
static inline bool xs_is_ptr(const xs *x)
{
return x->is_ptr;
}
static inline bool xs_is_large_string(const xs *x)
{
return x->is_large_string;
}
size_t xs_size(const xs *x)
{
return xs_is_ptr(x) ? x->size : 15 - x->space_left;
}
char *xs_data(const xs *x)
{
if (!xs_is_ptr(x))
return (char *) x->filler;
if (xs_is_large_string(x))
return (char *) (x->ptr + /* HEADER */ 4);
return (char *) x->ptr;
}
int xs_type(const xs *x)
{
if (!x->is_ptr)
return XS_SHORT;
if (x->is_large_string)
return XS_LARGE;
return XS_MEDIUM;
}
size_t xs_capacity(const xs *x)
{
return xs_is_ptr(x) ? ((size_t) 1 << x->capacity) - 1 : 15;
}
static inline void xs_set_refcnt(const xs *x, int val)
{
*((int *) ((size_t) x->ptr)) = val;
}
#define xs_literal_empty() \
(xs) { .space_left = 15 }
/* lowerbound (floor log2) */
static inline int ilog2(uint32_t n)
{
return 32 - __builtin_clz(n) - 1;
}
static void xs_allocate_data(xs *x, size_t len, bool reallocate)
{
/**
* This function can be improved by:
* Delete the passing the arg @len. Therefore, change the if stmt,
* using just x->capacity to determine whether @x is small, middle, or large string.
*/
/* Small string */
if (len < MIDDLE_STRING_LEN) {
return;
}
/* Medium string */
if (len < LARGE_STRING_LEN) {
x->ptr = reallocate ? realloc(x->ptr, (size_t) 1 << x->capacity)
: malloc((size_t) 1 << x->capacity);
x->is_large_string = 0;
return;
}
/* Large string */
x->is_large_string = 1;
/* The extra 4 bytes are used to store the reference count */
x->ptr = reallocate ? realloc(x->ptr, (size_t)(1 << x->capacity) + 4)
: malloc((size_t)(1 << x->capacity) + 4);
xs_set_refcnt(x, 1);
}
static inline void xs_inc_refcnt(const xs *x)
{
if (xs_is_large_string(x))
++(*(int *) ((size_t) x->ptr));
}
static inline int xs_dec_refcnt(const xs *x)
{
if (!xs_is_large_string(x))
return 0;
return --(*(int *) ((size_t) x->ptr));
}
static inline int xs_get_refcnt(const xs *x)
{
if (!xs_is_large_string(x))
return 0;
return *(int *) ((size_t) x->ptr);
}
/**
* xs_new create a xs string, allocate the necessary memory and copy the
* the bytes from p to xs_data(x). If strlen(p) is greater or equal to
* LARGE_STRING_LEN, use string interning to share memory address if possible.
*/
xs *xs_new(xs *x, const char *p)
{
*x = xs_literal_empty();
size_t len = strlen(p) + 1;
if (len > LARGE_STRING_LEN) {
x->capacity = ilog2(len) + 1;
x->size = len - 1;
x->is_ptr = true;
x->is_large_string = true;
/* TODO: String interning method */
} else if (len > MIDDLE_STRING_LEN) {
x->capacity = ilog2(len) + 1;
x->size = len - 1;
x->is_ptr = true;
xs_allocate_data(x, x->size, 0);
memcpy(xs_data(x), p, len);
} else {
memcpy(x->data, p, len);
x->space_left = 15 - (len - 1);
}
return x;
}
/* grow up to specified size */
xs *xs_grow(xs *x, size_t len)
{
if (len <= xs_capacity(x))
return x;
x->capacity = ilog2(len) + 1;
if (xs_is_ptr(x)) {
xs_allocate_data(x, len, 1);
} else {
x->is_ptr = true;
xs_allocate_data(x, len, 0);
}
/* TODO: Large string case */
return x;
}
static inline xs *xs_newempty(xs *x)
{
*x = xs_literal_empty();
return x;
}
static inline xs *xs_free(xs *x)
{
if (xs_is_ptr(x) && xs_dec_refcnt(x) <= 0)
free(x->ptr);
return xs_newempty(x);
}
static bool xs_cow_lazy_copy(xs *x, char **data)
{
if (!x->is_large_string) {
memcpy(xs_data(x), *data, strlen(*data));
return true;
} else if (xs_get_refcnt(x) > 1) {
/* Lazy copy */
xs_dec_refcnt(x);
/* TODO: String interning method */
}
if (data) {
memcpy(xs_data(x), *data, x->size);
}
/* Update the newly allocated pointer */
*data = xs_data(x);
return true;
}
xs *xs_concat(xs *string, const xs *prefix, const xs *suffix)
{
size_t pres = xs_size(prefix), sufs = xs_size(suffix),
size = xs_size(string), capacity = xs_capacity(string);
char *pre = xs_data(prefix), *suf = xs_data(suffix),
*data = xs_data(string);
if (xs_get_refcnt(string) > 1)
xs_cow_lazy_copy(string, &data);
if (size + pres + sufs <= capacity) {
memmove(data + pres, data, size);
memcpy(data, pre, pres);
memcpy(data + pres + size, suf, sufs + 1);
if (xs_is_ptr(string))
string->size = size + pres + sufs;
else
string->space_left = 15 - (size + pres + sufs);
} else {
xs tmps = xs_literal_empty();
xs_grow(&tmps, size + pres + sufs);
char *tmpdata = xs_data(&tmps);
memcpy(tmpdata + pres, data, size);
memcpy(tmpdata, pre, pres);
memcpy(tmpdata + pres + size, suf, sufs + 1);
xs_free(string);
*string = tmps;
string->size = size + pres + sufs;
}
/*
TODO: String interning if is large string
*/
return string;
}
xs *xs_trim(xs *x, const char *trimset)
{
if (!trimset[0])
return x;
char *dataptr = xs_data(x), *orig = dataptr;
if (xs_get_refcnt(x) > 1) {
xs_cow_lazy_copy(x, &dataptr);
orig = dataptr;
}
/* Similar to strspn/strpbrk but it operates on binary data */
uint8_t mask[32] = {0};
#define check_bit(byte) (mask[(uint8_t) byte >> 3] & 1 << (uint8_t) byte & 7)
#define set_bit(byte) (mask[(uint8_t) byte >> 3] |= 1 << (uint8_t) byte & 7)
size_t i, slen = xs_size(x), trimlen = strlen(trimset);
for (i = 0; i < trimlen; i++)
set_bit(trimset[i]);
for (i = 0; i < slen; i++)
if (!check_bit(dataptr[i]))
break;
for (; slen > 0; slen--)
if (!check_bit(dataptr[slen - 1]))
break;
dataptr += i;
slen -= i;
/* reserved space as a buffer on the heap.
* Do not reallocate immediately. Instead, reuse it as possible.
* Do not shrink to in place if < 16 bytes.
*/
memmove(orig, dataptr, slen);
/* do not dirty memory unless it is needed */
if (orig[slen])
orig[slen] = 0;
if (xs_is_ptr(x))
x->size = slen;
else
x->space_left = 15 - slen;
/*
TODO: String interning if is large string
*/
return x;
#undef check_bit
#undef set_bit
}
void xs_erase(xs *x)
{
if (xs_is_ptr(x)) {
free(x->ptr);
}
memset(x, 0, sizeof(xs));
return;
}
```
:::success
**作業要求:**
1. 解釋 `list.h`、`xs.c` 的程式碼以及其運作原理。
2. 利用 `xs.c` 的實作,改寫第一次作業,並且設計實驗,與第一次的作業進行比較,cache 的使用以及效能上的差異。最後解釋你的實驗結果以及猜想。
3. 練習利用 `list.h` 的實作,針對 Linked list 的資料結構,改寫第一次的作業。
4. 研究 [字串駐留 (String Interning)](https://en.wikipedia.org/wiki/String_interning) 的手法,完善 `xs.c` 內部對於「長字串」處理的實作,並且提出對於 `xs.c` 程式碼更好的實作以及改進。
這裡提供 [字串駐留 (String Interning) 的相關實作](https://hackmd.io/@Zxiro/rk4qw64s_) 作為參考。
> Tribute to Zxiro
:::
:::info
貼心提醒:作業的繳交期限沒有限制。
:::
---
## 作業區 (HackMD / Github)
:::info
:exclamation: 無論標題和內文,中文和英文字元之間要有空白字元 (對排版和文字搜尋有利);文字訊息請避免用圖片來表示,否則不好搜尋和分類。
:notes: 「開發紀錄」的 HackMD 網址 **不一定** 要用「固定網址」,也就是如 https://hackmd.io/@itsme/XXXX 的形式,設定公開發表並允許已登入者進行編輯,請留意!
:coffee: 我們在開發的過程中,所有人都能看得到其他人寫的共筆,大家可以互相參考、留下評論,當然也可以拿來引用,但是,引用時,請註明你們引用的出處。
:::
- [ ] XXXX <--- Discord 暱稱
* [開發紀錄](https://) / [Github](https://)
- [ ] Astus
* [開發紀錄](https://hackmd.io/xCOla4f6R0yDXdjS7AoCog?view) / [Github](https://)
- [ ] peter lee
* [開發紀錄](/lLLczFH_StOv1aEqGJ_Uwg) / [Github](https://)