2021q1 Homework2 (quiz2)

# 2021q1 Homework2 (quiz2) contributed by < `Jings1017` > ###### tags: `linux2021` ## 測驗 1 ### 程式碼分析 ### list.h * list_add_tail Add a list node to the end of the list ```c= static inline void list_add_tail(struct list_head *node, struct list_head *head) { struct list_head *prev = head->prev; prev->next = node; node->next = head; node->prev = prev; head->prev = node; } ``` ```graphviz digraph{ rankdir = LR node [shape="box"] edge [dir = "both"] omit1[label="...", shape="none"] pre[label="prev"] head[label="head"] omit2[label="...", shape="none"] omit1->pre head->omit2 pre->head } ``` ```graphviz digraph{ rankdir = LR node [shape="box"] edge [dir = "both"] omit1[label="...", shape="none"] pre[label="prev"] node0[label="node" color="red"] head[label="head"] omit2[label="...", shape="none"] omit1->pre head->omit2 pre->node0[color=red] node0->head[color=red] } ``` * list_del Remove a list node from the list ```c= static inline void list_del(struct list_head *node) { struct list_head *next = node->next, *prev = node->prev; next->prev = prev; prev->next = next; } ``` ```graphviz digraph{ rankdir = LR node [shape="box"] edge [dir = "both"] omit1[label="...", shape="none"] pre[label="prev"] node0[label="node" color="red"] head[label="next"] omit2[label="...", shape="none"] omit1->pre head->omit2 pre->node0[color=red] node0->head[color=red] } ``` ```graphviz digraph{ rankdir = LR node [shape="box"] edge [dir = "both"] omit1[label="...", shape="none"] pre[label="prev"] head[label="next"] omit2[label="...", shape="none"] omit1->pre head->omit2 pre->head } ``` * list_splice_tail Add list nodes from a list to end of another list ```c= static inline void list_splice_tail(struct list_head *list, struct list_head *head) { struct list_head *head_last = head->prev; struct list_head *list_first = list->next, *list_last = list->prev; if (list_empty(list)) return; // blue double arrow head->prev = list_last; list_last->next = head; // red double arrow list_first->prev = head_last; head_last->next = list_first; } ``` ```graphviz digraph{ node [shape=box] edge[dir=both minlen=3] ell1,ell2,ell3,ell4 [label="..." shape=none] lprev[label="prev of list"] lnext[label="next of list"] hprev[label="prev of head"] { rank = same ell1->lprev lprev->list[dir=forward color=gray] list->lprev[dir=forward] list->lnext[dir=forward] lnext->list[dir=forward color=gray] lnext->ell2 } { rank = same ell3->hprev hprev->head[color=gray] head->ell4 } lnext->hprev[color=red] lprev->head[color=blue] } ``` * list_cut_position Move beginning of a list to another list ```c= static inline void list_cut_position(struct list_head *head_to, struct list_head *head_from, struct list_head *node) { struct list_head *head_from_first = head_from->next; if (list_empty(head_from)) return; if (head_from == node) { INIT_LIST_HEAD(head_to); return; } // red double arrow head_from->next = node->next; head_from->next->prev = head_from; // green doube arrow head_to->prev = node; node->next = head_to; // blue double arrow head_to->next = head_from_first; head_to->next->prev = head_to; } ``` ```graphviz digraph{ rankdir = LR node [shape=box] edge [dir=both] ell1,ell2,ell3 [label="..." shape=none] cut [label="node"] { ell1->head_from head_from->head_from_first[color=gray] head_from_first->ell2->cut cut->node_next[color=gray] node_next->ell3 head_from->node_next[color=red] head_to:s->cut:s [color=darkgreen] head_to->head_from_first[color=blue] } } ``` ### mergesort.c #### list_merge( ) 之改善 ```diff= if (list_empty(lhs)) { - list_splice_tail(lhs, head); + list_splice_tail(rhs, head); return; } if (list_empty(rhs)) { - list_splice_tail(rhs, head); + list_splice_tail(lhs, head); return; } ``` 原程式碼邏輯上有誤，若判斷 `lhs` 是否為空，則在敘述中的參數應該為 `rhs` 較為合理同理， line 7 應修正成 line 8 ### COND1 , COND2 ```c= static list_ele_t *get_middle(struct list_head *list) { struct list_head *fast = list->next, *slow; list_for_each (slow, list) { if (COND1 || COND2) break; fast = fast->next->next; } return list_entry(TTT, list_ele_t, list); } ``` 由上方程式碼可知 slow 每經過一個節點， fast 會經過兩個節點，也就是 slow 經過的節點數約為 fast 的一半，所以可推斷出 fast 走到尾端時， slow 約在串列中間的位置。而 if 中需放入的條件為 fast 在尾端的狀態，考慮到串列的節點數有奇有偶，故條件為 `fast->next==list` 或 `fast->next->next==list` * COND1 = ? fast->next == list ```graphviz digraph { rankdir=LR a[shape=record, label="{<h>|<n>}"] b[shape=record, label="{<h>|<n>}"] c[shape=record, label="{<h>|<n>}"] d[shape=record, label="{<h>|<n>}"] e[shape=record, label="{<h>|<n>}"] list[label="list" shape=plaintext , fontcolor=red] fast[label="fast" shape=plaintext , fontcolor=red] slow[label="slow" shape=plaintext , fontcolor=red] a:n->b:h b:n->c:h c:n->d:h d:n->e:h e:n->a:h list->a slow->c fast->e } ``` * COND2 = ? fast->next->next == list ```graphviz digraph { rankdir=LR a[shape=record, label="{<h>|<n>}"] b[shape=record, label="{<h>|<n>}"] c[shape=record, label="{<h>|<n>}"] d[shape=record, label="{<h>|<n>}"] e[shape=record, label="{<h>|<n>}"] f[shape=record, label="{<h>|<n>}"] list[label="list" shape=plaintext , fontcolor=red] fast[label="fast" shape=plaintext , fontcolor=red] slow[label="slow" shape=plaintext , fontcolor=red] a:n->b:h b:n->c:h c:n->d:h d:n->e:h e:n->f:h f:n->a:h list->a fast->e slow->c } ``` ### MMM ```c= void list_merge_sort(queue_t *q) { if (list_is_singular(&q->list)) return; queue_t left; struct list_head sorted; INIT_LIST_HEAD(&left.list); list_cut_position(&left.list, &q->list, MMM); list_merge_sort(&left); list_merge_sort(q); list_merge(&left.list, &q->list, &sorted); INIT_LIST_HEAD(&q->list); list_splice_tail(&sorted, &q->list); } ``` 由 line 9 可知，MMM 應該放入切斷點。又這裡採用 merge sort , 所以切斷點為中間點，可以用 `get_middle(&q->list)`求得。 * MMM = ? &get_middle(&q->list)->list ### TTT ```c= static list_ele_t *get_middle(struct list_head *list) { struct list_head *fast = list->next, *slow; list_for_each (slow, list) { if (COND1 || COND2) break; fast = fast->next->next; } return list_entry(TTT, list_ele_t, list); } ``` 上述分析中已知 slow 指向中間節點，又此函式需回傳中間節點位置，所以 `TTT = slow` * TTT = ? slow --- ## 測驗 2 接受一個 16 位元無號整數 N ，並回傳小於或等於 N 的 power-of-2 ### 想法 2 的冪寫成 pattern 是 `1000...00` 所以我們可以先將原 N 經過運算得到 `11...11111` 再加 1 即變成 `100...000` 最後再 `>> 1` 就是小於或等於 N 的 2 的冪 :::warning 注意[冪](https://zh.wikipedia.org/wiki/%E5%86%AA)的用法。$b^n$ 讀作「b 的 n 次方」或「b 的 **n 次**==冪==」，但不是「冪次」，不管幾次，就是 b 的[冪](https://zh.wikipedia.org/wiki/%E5%86%AA) :notes: jserv ::: ### 解題關鍵在於考慮極值 2^15^ , 每次運算可讓1的數量變為兩倍，十六位數故作 4 次右移，分別為 1, 2, 4, 8 ``` 1000 0000 0000 0000 | 0100 0000 0000 0000 = 1100 0000 0000 0000 1100 0000 0000 0000 | 0011 0000 0000 0000 = 1111 0000 0000 0000 1111 0000 0000 0000 | 1111 1111 0000 0000 = 1111 1111 0000 0000 1111 1111 0000 0000 | 0000 0000 1111 1111 = 1111 1111 1111 1111 1111 1111 1111 1111 + 1 = 1 0000 0000 0000 0000 1 0000 0000 0000 0000 >> 1 = 1000 0000 0000 0000 ``` * X = ? `2` * Y = ? `4` * Z = ? `8` --- ## 測驗 3 * RRR = ? data<<=read_lhs * DDD = ? mask |= write_mask[write_lhs+bitsize] ### 分析 bitcpy() ```c= void bitcpy(void *_dest, /* Address of the buffer to write to */ size_t _write, /* Bit offset to start writing to */ const void *_src, /* Address of the buffer to read from */ size_t _read, /* Bit offset to start reading from */ size_t count) ``` * void *_dest : 可傳入任何記憶體位置, 再將 type 轉成 uint8_t * const void *_src : 同上 * uint8_t : 8 位元 unsigned int ```diff= size_t read_lhs = _read & 7; size_t read_rhs = 8 - read_lhs; - const uint8_t *source = (const uint8_t *)_src + (_read / 8); + const uint8_t *source = (const uint8_t *)_src + (_read >> 3); size_t write_lhs = _write & 7; size_t write_rhs = 8 - write_lhs; - uint8_t *dest = (uint8_t *)_dest + (_write / 8); + uint8_t *dest = (uint8_t *)_dest + (_write >> 3); ``` 上方程式碼 `(_read / 8)` 可用 `(_read >> 3)` 代替，以減少運算成本。同理，`(_write / 8)` 可用 `(_write >> 3)` 代替 ```c= while (count > 0) { uint8_t data = *source++; size_t bitsize = (count > 8) ? 8 : count; if (read_lhs > 0) { data <<= read_lhs; if (bitsize > read_rhs) data |= (*source >> read_rhs); } if (bitsize < 8) data &= read_mask[bitsize]; uint8_t original = *dest; uint8_t mask = read_mask[write_lhs]; if (bitsize > write_rhs) { /* Cross multiple bytes */ *dest++ = (original & mask) | (data >> write_lhs); original = *dest & write_mask[bitsize - write_rhs]; *dest = original | (data << write_rhs); } else { // Since write_lhs + bitsize is never >= 8, no out-of-bound access. mask |= write_mask[write_lhs + bitsize]; *dest++ = (original & mask) | (data >> write_lhs); } count -= bitsize; } ``` * 先判斷 count 是否大於 8 ，是的話 bitsize 設成 8，小於等於則設成 count 之值 * ```RRR = data <<= read_lhs ```的理由是要使 data 補到 8 bits * 根據 line 21 的註解，可知 ```write_lhs + bitsize is never >= 8```，所以 ```DDD = mask |= write_mask[write_lhs + bitsize]; ``` * input/output 變數位元順序示意圖 ![](https://i.imgur.com/WpjBDuk.jpg) 以下舉例說明假設 * ==read_lhs = 3== * ==_read_rhs = 5== * ==bit_size = 7== ```c= if (read_lhs > 0) { data <<= read_lhs; if (bitsize > read_rhs) data |= (*source >> read_rhs); } ``` ```graphviz digraph{ node [shape=record, width=3, fixedsize=true]; a [label="x |x |x |1 |1 |0 |1 |0", fontsize=20]; b [label="1 |1 |0 |1 |0 |0 |0 |0", fontsize=20]; node [shape=plaintext, fontcolor=black, fontsize=100, width=1]; "|" node [shape=record, width=3, fixedsize=true]; c [label="1 |0 |0 |x |x |x |x |x", fontsize=20]; d[label="0 |0 |0 |0 |0 |1 |0 |0", fontsize=20]; node [shape=plaintext, fontcolor=black, fontsize=30, width=1]; "*source"->a [style=invis] "*(source + 1)"->c [style=invis] node [shape=plaintext, fontcolor=black, fontsize=50, width=1]; "=" a->b [label=" << 3", fontsize=25] c->d [label=" >> 5", fontsize=25] b->"=" [label="=", minlen = 3, style=invis] node [shape=record, fontcolor=black, fontsize=14, width=3, fixedsize=true]; e[label="1 |1 |0 |1 |0 |1 |0 |0", color=black, fillcolor=white, style=filled, fontsize=20]; { rank=same; a; c} { rank=same; b; d; "|";} { rank=same; "="; e} } ``` 所以可知結果由 `source` 右 5 bit及 `source+1`左 3 bit 所組成的 8 bit 而 `mask` 是用來將沒有寫入的 bits 保留起來，以防止被 `data` 複寫的機制， ```c= if (bitsize > write_rhs) { /* Cross multiple bytes */ *dest++ = (original & mask) | (data >> write_lhs); original = *dest & write_mask[bitsize - write_rhs]; *dest = original | (data << write_rhs); } else { // Since write_lhs + bitsize is never >= 8, no out-of-bound access. mask |= write_mask[write_lhs + bitsize]; *dest++ = (original & mask) | (data >> write_lhs); } ``` 此段程式碼是用來判斷欲寫入的 bit 是否橫跨了兩個 byte 。如果橫跨了兩個 byte ，則需要 `original & mask` 來記錄不須更改的 bit ，再用 `data >> write_lhs` 來把欲寫入的 bit 移至 `write_lhs` 為開頭的位置，兩者做 `|` 運算，即是 `data` 更新後的結果。`write_mask[bitsize - write_rhs]` 則是下一個 byte 需寫入的 bits 位置，將尚未寫入的 bits (即 `data << write_lhs`)寫入到下一個 byte 中。若沒有橫跨兩個 byte，則 `mask |= write_mask[write_lhs + bitsize]` 用來記錄不須寫入的 bits 的位置，而 `mask` 在欲寫入的位置為 0，其他位置與 `dest` 原先的 bit 相同。最後再將 `(original & mask)` 與 `(data >> write_lhs)` 做 `|` 運算，即可得到更新後的資料。 --- ## 測驗 4 ### 程式目的 string interning 將字串存入　hash 中，當想使用的時候只要將指標指向字串即可，不需再額外存取，如此一來可改善記憶體的使用效率 ### 定義結構 ```c= typedef struct __cstr_data { char *cstr; uint32_t hash_size; uint16_t type; uint16_t ref; } * cstring; ``` ```c= typedef struct __cstr_buffer { cstring str; } cstr_buffer[1]; ``` ```c= struct __cstr_node { char buffer[CSTR_INTERNING_SIZE]; struct __cstr_data str; struct __cstr_node *next; }; struct __cstr_pool { struct __cstr_node node[INTERNING_POOL_SIZE]; }; struct __cstr_interning { int lock; int index; unsigned size; unsigned total; struct __cstr_node **hash; struct __cstr_pool *pool; }; static struct __cstr_interning __cstr_ctx; ``` ### xalloc( ) 記憶體配置，失敗則 `exit(-1)` ```c= static void *xalloc(size_t n) { void *m = malloc(n); if (!m) exit(-1); return m; } ``` ### insert_node( ) ```c= static inline void insert_node(struct __cstr_node **hash, int sz, struct __cstr_node *node) { uint32_t h = node->str.hash_size; int index = h & (sz - 1); node->next = hash[index]; hash[index] = node; } ``` ### expand( ) ```c= static void expand(struct __cstr_interning *si) { unsigned new_size = si->size * 2; if (new_size < HASH_START_SIZE) new_size = HASH_START_SIZE; struct __cstr_node **new_hash = xalloc(sizeof(struct __cstr_node *) * new_size); memset(new_hash, 0, sizeof(struct __cstr_node *) * new_size); for (unsigned i = 0; i < si->size; ++i) { struct __cstr_node *node = si->hash[i]; while (node) { struct __cstr_node *tmp = node->next; insert_node(new_hash, new_size, node); node = tmp; } } free(si->hash); si->hash = new_hash; si->size = new_size; } ``` ### interning( ) ```c= static cstring interning(struct __cstr_interning *si, const char *cstr, size_t sz, uint32_t hash) { if (!si->hash) return NULL; int index = (int) (hash & (si->size - 1)); struct __cstr_node *n = si->hash[index]; while (n) { if (n->str.hash_size == hash) { if (!strcmp(n->str.cstr, cstr)) return &n->str; } n = n->next; } // 80% (4/5) threshold if (si->total * 5 >= si->size * 4) return NULL; if (!si->pool) { si->pool = xalloc(sizeof(struct __cstr_pool)); si->index = 0; } n = &si->pool->node[si->index++]; memcpy(n->buffer, cstr, sz); n->buffer[sz] = 0; cstring cs = &n->str; cs->cstr = n->buffer; cs->hash_size = hash; cs->type = CSTR_INTERNING; cs->ref = 0; n->next = si->hash[index]; si->hash[index] = n; return cs; } ``` ### cstr_interning( ) ```c= static cstring cstr_interning(const char *cstr, size_t sz, uint32_t hash) { cstring ret; CSTR_LOCK(); ret = interning(&__cstr_ctx, cstr, sz, hash); if (!ret) { expand(&__cstr_ctx); ret = interning(&__cstr_ctx, cstr, sz, hash); } ++__cstr_ctx.total; CSTR_UNLOCK(); return ret; } ``` ### hash_blob( ) ```c= static inline uint32_t hash_blob(const char *buffer, size_t len) { const uint8_t *ptr = (const uint8_t *) buffer; size_t h = len; size_t step = (len >> 5) + 1; for (size_t i = len; i >= step; i -= step) h = h ^ ((h << 5) + (h >> 2) + ptr[i - 1]); return h == 0 ? 1 : h; } ``` ### cstr_clone( ) ```c= cstring cstr_clone(const char *cstr, size_t sz) { if (sz < CSTR_INTERNING_SIZE) return cstr_interning(cstr, sz, hash_blob(cstr, sz)); cstring p = xalloc(sizeof(struct __cstr_data) + sz + 1); if (!p) return NULL; void *ptr = (void *) (p + 1); p->cstr = ptr; p->type = 0; p->ref = 1; memcpy(ptr, cstr, sz); ((char *) ptr)[sz] = 0; p->hash_size = 0; return p; } ``` ### cstr_grab( ) ```c= cstring cstr_grab(cstring s) { if (s->type & (CSTR_PERMANENT | CSTR_INTERNING)) return s; if (s->type == CSTR_ONSTACK) return cstr_clone(s->cstr, s->hash_size); if (s->ref == 0) s->type = CSTR_PERMANENT; else __sync_add_and_fetch(&s->ref, 1); return s; } ``` ### cstr_release( ) ```c= void cstr_release(cstring s) { if (s->type || !s->ref) return; if (__sync_sub_and_fetch(&s->ref, 1) == 0) free(s); } ``` ### cstr_hash( ) ```c= static size_t cstr_hash(cstring s) { if (s->type == CSTR_ONSTACK) return hash_blob(s->cstr, s->hash_size); if (s->hash_size == 0) s->hash_size = hash_blob(s->cstr, strlen(s->cstr)); return s->hash_size; } ``` ### cstr_equal( ) ```c= int cstr_equal(cstring a, cstring b) { if (a == b) return 1; if ((a->type == CSTR_INTERNING) && (b->type == CSTR_INTERNING)) return 0; if ((a->type == CSTR_ONSTACK) && (b->type == CSTR_ONSTACK)) { if (a->hash_size != b->hash_size) return 0; return memcmp(a->cstr, b->cstr, a->hash_size) == 0; } uint32_t hasha = cstr_hash(a); uint32_t hashb = cstr_hash(b); if (hasha != hashb) return 0; return !strcmp(a->cstr, b->cstr); } ``` ### cstr_cat2( ) ```c= static cstring cstr_cat2(const char *a, const char *b) { size_t sa = strlen(a), sb = strlen(b); if (sa + sb < CSTR_INTERNING_SIZE) { char tmp[CSTR_INTERNING_SIZE]; memcpy(tmp, a, sa); memcpy(tmp + sa, b, sb); tmp[sa + sb] = 0; return cstr_interning(tmp, sa + sb, hash_blob(tmp, sa + sb)); } cstring p = xalloc(sizeof(struct __cstr_data) + sa + sb + 1); if (!p) return NULL; char *ptr = (char *) (p + 1); p->cstr = ptr; p->type = 0; p->ref = 1; memcpy(ptr, a, sa); memcpy(ptr + sa, b, sb); ptr[sa + sb] = 0; p->hash_size = 0; return p; } ``` ### cstr_cat( ) ```c= cstring cstr_cat(cstr_buffer sb, const char *str) { cstring s = sb->str; if (s->type == CSTR_ONSTACK) { int i = s->hash_size; // CCC while (i < CSTR_STACK_SIZE - 1) { s->cstr[i] = *str; if (*str == 0) return s; ++s->hash_size; ++str; ++i; } s->cstr[i] = 0; } cstring tmp = s; sb->str = cstr_cat2(tmp->cstr, str); cstr_release(tmp); return sb->str; } ```