--- tags: linux2022 --- # 2022q1 Homework1 (quiz1) contributed by < `Xx-oX` > > [題目](https://hackmd.io/@sysprog/linux2022-quiz1#2022q1-%E7%AC%AC-1-%E9%80%B1%E6%B8%AC%E9%A9%97%E9%A1%8C) ## 測驗 `1` 利用 [Hash Table](https://en.wikipedia.org/wiki/Hash_table) 解決 [LeetCode 1. Two Sum](https://leetcode.com/problems/two-sum/) 考慮以下實作,補完(AAA, BBB)並解釋運作原理(延伸題目 1) :::spoiler 題目程式碼 ```c #include <stddef.h> #include <stdlib.h> struct hlist_node { struct hlist_node *next, **pprev; }; struct hlist_head { struct hlist_node *first; }; typedef struct { int bits; struct hlist_head *ht; } map_t; #define MAP_HASH_SIZE(bits) (1 << bits) map_t *map_init(int bits) { map_t *map = malloc(sizeof(map_t)); if (!map) return NULL; map->bits = bits; map->ht = malloc(sizeof(struct hlist_head) * MAP_HASH_SIZE(map->bits)); if (map->ht) { for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++) (map->ht)[i].first = NULL; } else { free(map); map = NULL; } return map; } struct hash_key { int key; void *data; struct hlist_node node; }; #define container_of(ptr, type, member) \ ({ \ void *__mptr = (void *) (ptr); \ ((type *) (__mptr - offsetof(type, member))); \ }) #define GOLDEN_RATIO_32 0x61C88647 static inline unsigned int hash(unsigned int val, unsigned int bits) { /* High bits are more random, so use them. */ return (val * GOLDEN_RATIO_32) >> (32 - bits); } static struct hash_key *find_key(map_t *map, int key) { struct hlist_head *head = &(map->ht)[hash(key, map->bits)]; for (struct hlist_node *p = head->first; p; p = p->next) { struct hash_key *kn = container_of(p, struct hash_key, node); if (kn->key == key) return kn; } return NULL; } void *map_get(map_t *map, int key) { struct hash_key *kn = find_key(map, key); return kn ? kn->data : NULL; } void map_add(map_t *map, int key, void *data) { struct hash_key *kn = find_key(map, key); if (kn) return; kn = malloc(sizeof(struct hash_key)); kn->key = key, kn->data = data; struct hlist_head *h = &map->ht[hash(key, map->bits)]; struct hlist_node *n = &kn->node, *first = h->first; AAA; if (first) first->pprev = &n->next; h->first = n; BBB; } void map_deinit(map_t *map) { if (!map) return; for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++) { struct hlist_head *head = &map->ht[i]; for (struct hlist_node *p = head->first; p;) { struct hash_key *kn = container_of(p, struct hash_key, node); struct hlist_node *n = p; p = p->next; if (!n->pprev) /* unhashed */ goto bail; struct hlist_node *next = n->next, **pprev = n->pprev; *pprev = next; if (next) next->pprev = pprev; n->next = NULL, n->pprev = NULL; bail: free(kn->data); free(kn); } } free(map); } int *twoSum(int *nums, int numsSize, int target, int *returnSize) { map_t *map = map_init(10); *returnSize = 0; int *ret = malloc(sizeof(int) * 2); if (!ret) goto bail; for (int i = 0; i < numsSize; i++) { int *p = map_get(map, target - nums[i]); if (p) { /* found */ ret[0] = i, ret[1] = *p; *returnSize = 2; break; } p = malloc(sizeof(int)); *p = i; map_add(map, nums[i], p); } bail: map_deinit(map); return ret; } ``` ::: ### 運作原理 尋訪所有 nums 中的數字,在 Hash table 中找尋是否有 target 減去該數字的值 如果沒有就將此數字加入 Hash table 加入數字到 Hash table 時會先檢查是否已經存在了,如果沒有就在 `hlist_head` 陣列中相應 index 值的串列開頭加入新的節點 (參考下方說明) ### 資料結構 ```c struct hlist_node { struct hlist_node *next, **pprev; }; struct hlist_head { struct hlist_node *first; }; typedef struct { int bits; struct hlist_head *ht; } map_t; struct hash_key { int key; void *data; struct hlist_node node; }; ``` * `hlist_head` : Hash table 的起始節點,可以節省起始端 `**pprev` 的空間 * `hlist_node` : Hash table 的節點,其中 `**pprev` 是上一個節點的指標的指標,用來直接存取上一個節點的 `*next` ,以便在 $O(1)$ 的時間複雜度內完成加入 list 之類的操作 ```graphviz digraph g_hlist_node{ rankdir = LR node[shape=record] hn1 [label="<a>hlist_node_a|{<pprev>**pprev|<next>*next}"] hn2 [label="<a>hlist_node_b|{<pprev>**pprev|<next>*next}"] hn1:next -> hn2:a hn2:pprev -> hn1:next } ``` * `hash_key` : 用來存放資料的 bucket 底部用 `hlist_node` 連結,跟 `lab0` 中的 `element_t` 相似 ```graphviz digraph g_hash_key{ rankdir = LR node[shape=record] hh [label="<a>hlist_head|<p>first"] hk [label="<a>hash_key|key|data|<p>hlist_node|{**pprev|*next}"] hh:p -> hk:p } ``` * `map_t` : 整個 Hash table 的起始,其中 `bits` 表示 Hash table 的大小,`ht` 則指向 `hlist_head` 的陣列 ```graphviz digraph g_hash_table{ rankdir = LR node[shape=record] m [label="<a>map_t|bits|<p>ht"] hharray [label="<a>hlist_head[MAP_HASH_SIZE(bits)]|{hlist_head_1|<hh1f>first}|{hlist_head_2|<hh2f>first}|..."] hn1_1 [label="<a>hlist_node_1_1|{<pprev>**pprev|<next>*next}"] hn1_2 [label="<a>hlist_node_1_2|{<pprev>**pprev|<next>*next}"] hn2_1 [label="<a>hlist_node_2_1|{<pprev>**pprev|<next>*next}"] node[shape=plaintext] p1 [label="..."] p2 [label="..."] m:p -> hharray:a hharray:hh1f -> hn1_1:a hn1_1:pprev -> hharray:hh1f hn1_1:next -> hn1_2:a hn1_2:pprev -> hn1_1:next hn1_2:next -> p1 hharray:hh2f -> hn2_1:a hn2_1:pprev -> hharray:hh2f hn2_1:next -> p2 } ``` ### 函式 `map_init` : 初始化整個 Hash table 以及 `hlist_head` 陣列 `hash` : 此 Hash table 的 Hash function,回傳`(val * GOLDEN_RATIO_32) >> (32 - bits)` 作為 Hash table 的 index `find_key` : 查詢 Hash table 中有無某個 Key 值的資料,有的話回傳該 `hash_key` `map_get` : 包裝 `find_key`,給最後 `twoSum` 做整數運算 > 回傳型態為 void* ,待後續呼叫作顯式轉型 `map_add` : 新增一筆資料到 Hash table 中 `map_deinit` : 斷開 Hash table 中的連結,並釋放占用的記憶體空間 `two_sum` : 解決問題的主程式,在 Hash table 中查找有無符合條件的數字 ### 填空 ```c void map_add(map_t *map, int key, void *data) { struct hash_key *kn = find_key(map, key); if (kn) return; kn = malloc(sizeof(struct hash_key)); kn->key = key, kn->data = data; struct hlist_head *h = &map->ht[hash(key, map->bits)]; struct hlist_node *n = &kn->node, *first = h->first; AAA; if (first) first->pprev = &n->next; h->first = n; BBB; } ``` AAA 應填入 `n->next = first` 由後三行程式碼可以發現是將新節點 `n` 接在 list 的開頭 BBB 應填入 `n->pprev = &h->first`,將 `n` 與 `h` 相連 (從上一行 `h->first = n` 得知) 結果如下圖 ```graphviz digraph g_aaa{ rankdir = LR node[shape=record] h [label="<a>h|<p>first"] n [label="<a>n|{<p>**pprev|<n>*next}"] o [label="<a>first|{<p>**pprev|*next}"] n:n -> o:a o:p -> n:n h:p -> n:a n:p -> h } ``` ## 測驗 `2` 考慮以下針對 [LeetCode 82. Remove Duplicates from Sorted List II](https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/) 的實作 ```c #include <stddef.h> struct ListNode { int val; struct ListNode *next; }; struct ListNode *deleteDuplicates(struct ListNode *head) { if (!head) return NULL; if (COND1) { /* Remove all duplicate numbers */ while (COND2) head = head->next; return deleteDuplicates(head->next); } head->next = deleteDuplicates(head->next); return head; } ``` 填入 COND1 及 COND2,並 * 嘗試避免遞迴,寫出同樣作用的程式碼 * 以類似 Linux 核心的 circular doubly-linked list 改寫,撰寫遞迴和迭代 (iterative) 的程式碼 ### 填空 目標是要在已排序的串列中刪除重複的節點 故 COND1 應填入 `head->next && head->value == head->next->value` 檢查此節點的值是否等於下個節點的值,其中需要檢查下個節點是否存在 而 COND2 那行的 while 是為了尋訪後續數值相同的節點,所以條件與 COND1 相同,填入 `head->next && head->value == head->next->value` ### 迭代版本的程式碼 ```c #include <stddef.h> struct ListNode { int val; struct ListNode *next; }; void deleteDuplicates(struct ListNode *head) { if (!head) return; struct ListNode *p = head; struct ListNode *pp = &head->next; for (; p->next; pp = &p->next, p = p->next) { if (p->val == p->next->val) { int c = p->val; for (p = p->next; p->next; p = p->next) { if (p->next->val != c) break; } pp->next = p->next; } } } ``` 改寫自 [Xx-oX/lab0-c](https://github.com/Xx-oX/lab0-c/blob/master/queue.c) 當中的 `q_delete_dup`,使用 `pointer to pointer` 的技巧來連接節點 ### Circular doubly-linked list 版本 參見 [Xx-oX/lab0-c](https://github.com/Xx-oX/lab0-c/blob/master/queue.c) 當中的 `q_delete_dup` ## 測驗 `3` 考慮以下針對 [LeetCode 146. LRU Cache](https://leetcode.com/problems/lru-cache/) 的實作 ```c #include <stdio.h> #include <stdlib.h> #include "list.h" typedef struct { int capacity, count; struct list_head dhead, hheads[]; } LRUCache; typedef struct { int key, value; struct list_head hlink, dlink; } LRUNode; LRUCache *lRUCacheCreate(int capacity) { LRUCache *obj = malloc(sizeof(*obj) + capacity * sizeof(struct list_head)); obj->count = 0; obj->capacity = capacity; INIT_LIST_HEAD(&obj->dhead); for (int i = 0; i < capacity; i++) INIT_LIST_HEAD(&obj->hheads[i]); return obj; } void lRUCacheFree(LRUCache *obj) { LRUNode *lru, *n; MMM1 (lru, n, &obj->dhead, dlink) { list_del(&lru->dlink); free(lru); } free(obj); } int lRUCacheGet(LRUCache *obj, int key) { LRUNode *lru; int hash = key % obj->capacity; MMM2 (lru, &obj->hheads[hash], hlink) { if (lru->key == key) { list_move(&lru->dlink, &obj->dhead); return lru->value; } } return -1; } void lRUCachePut(LRUCache *obj, int key, int value) { LRUNode *lru; int hash = key % obj->capacity; MMM3 (lru, &obj->hheads[hash], hlink) { if (lru->key == key) { list_move(&lru->dlink, &obj->dhead); lru->value = value; return; } } if (obj->count == obj->capacity) { lru = MMM4(&obj->dhead, LRUNode, dlink); list_del(&lru->dlink); list_del(&lru->hlink); } else { lru = malloc(sizeof(LRUNode)); obj->count++; } lru->key = key; list_add(&lru->dlink, &obj->dhead); list_add(&lru->hlink, &obj->hheads[hash]); lru->value = value; } ``` 填入 MMM1, MMM2, MMM3 以及 MMM4,並 * 解釋上述程式碼的運作,撰寫完整的測試程式,指出其中可改進之處並實作 * 在 Linux 核心找出 LRU 相關程式碼並探討 ### 填空 MMM1 應填入 `list_for_each_entry_safe`,作用是要釋出記憶體,故使用安全的遍歷版本 MMM2 與 MMM3 應填入 `list_for_each_entry`,作用是在 `obj->hheads[hash]` 中遍歷,並使用對應的 `entry` MMM4 應填入 `list_last_entry`,作用是移除串列最後的節點 ## 測驗 `4` 考慮以下針對 [LeetCode 128. Longest Consecutive Sequence](https://leetcode.com/problems/longest-consecutive-sequence/description/) 的實作 ```c #include <stdio.h> #include <stdlib.h> #include "list.h" struct seq_node { int num; struct list_head link; }; static struct seq_node *find(int num, int size, struct list_head *heads) { struct seq_node *node; int hash = num < 0 ? -num % size : num % size; list_for_each_entry (node, &heads[hash], link) { if (node->num == num) return node; } return NULL; } int longestConsecutive(int *nums, int n_size) { int hash, length = 0; struct seq_node *node; struct list_head *heads = malloc(n_size * sizeof(*heads)); for (int i = 0; i < n_size; i++) INIT_LIST_HEAD(&heads[i]); for (int i = 0; i < n_size; i++) { if (!find(nums[i], n_size, heads)) { hash = nums[i] < 0 ? -nums[i] % n_size : nums[i] % n_size; node = malloc(sizeof(*node)); node->num = nums[i]; list_add(&node->link, &heads[hash]); } } for (int i = 0; i < n_size; i++) { int len = 0; int num; node = find(nums[i], n_size, heads); while (node) { len++; num = node->num; list_del(&node->link); int left = num, right = num; while ((node = find(LLL, n_size, heads))) { len++; list_del(&node->link); } while ((node = find(RRR, n_size, heads))) { len++; list_del(&node->link); } length = len > length ? len : length; } } return length; } ``` 填入 LLL 及 RRR,並 * 解釋上述程式碼的運作,撰寫完整的測試程式,指出其中可改進之處並實作 * 嘗試用 Linux 核心風格的 hash table 重新實作上述程式碼 ### 填空 LLL 應填入 `--left` RRR 應填入 `++right` 從 `left`, `right` 的初始值都是 `num` 即可判斷是要先做加減 (preinc, predec) 而 `left` 往左移動,故減 1 ; `right` 往右移動,故加 1,相當直覺