2022q1 Homework1 (quiz1)

--- tags: linux2022 --- # 2022q1 Homework1 (quiz1) contributed by < [Eddielin0926](https://github.com/Eddielin0926) > > [quiz1 題目](https://hackmd.io/@sysprog/linux2022-quiz1) ## 測驗 1 參考 LeetCode 編號 1 的題目 Two Sum，以 hash table 的方式求解。 ### 結構體定義 `hlist` 用於 hash table 的實作，它的資料結構定義在 [include/linux/types.h](https://github.com/torvalds/linux/blob/master/include/linux/types.h) 中。採用 double link list 的方式，處理 [hash collision](https://en.wikipedia.org/wiki/Hash_collision) 的資料。 ```c struct hlist_node { struct hlist_node *next, **pprev; }; struct hlist_head { struct hlist_node *first; }; typedef struct { int bits; struct hlist_head *ht; } map_t; ``` ![image alt](https://i.imgur.com/QYQLqvC.png) `map_t` 中的 `bits` 用來表示 hash table 的長度，假設 hash 的值是 4 個位元的數值，那麼就有 $2^4$ 種可能，也就是 `1 << 4`， hash table 的長度為 16。 ```c= #define MAP_HASH_SIZE(bits) (1 << bits) ``` ### Hash Table 初始化根據 `bits` 產生長度是 $2^{bits}$ 的 `hlist_head` 的陣列。 ```c= map_t *map_init(int bits) { map_t *map = malloc(sizeof(map_t)); if (!map) return NULL; map->bits = bits; map->ht = malloc(sizeof(struct hlist_head) * MAP_HASH_SIZE(map->bits)); if (map->ht) { for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++) (map->ht)[i].first = NULL; } else { free(map); map = NULL; } return map; } ``` ### Hash #### Hash Key double link list 的節點，包了雜湊值以及原本的資料。 ```c struct hash_key { int key; void *data; struct hlist_node node; }; ``` #### container_of 利用 `offsetof` 反向找到包含這個屬性 (attribute) 的結構體 (struct) 位址。 ```c #define container_of(ptr, type, member) \ ({ \ void *__mptr = (void *) (ptr); \ ((type *) (__mptr - offsetof(type, member))); \ }) ``` #### Hash Value 乘上 `GOLDEN_RATIO_32` 後，只取高位的位元，因為高位的位元經過乘法計算後受到的影響必較大，相對來說會比較隨機。 ```c #define GOLDEN_RATIO_32 0x61C88647 static inline unsigned int hash(unsigned int val, unsigned int bits) { /* High bits are more random, so use them. */ return (val * GOLDEN_RATIO_32) >> (32 - bits); } ``` 將 `key` 做雜湊後，遍歷 hash table 來找對應的雜湊值。 ```c static struct hash_key *find_key(map_t *map, int key) { struct hlist_head *head = &(map->ht)[hash(key, map->bits)]; for (struct hlist_node *p = head->first; p; p = p->next) { struct hash_key *kn = container_of(p, struct hash_key, node); if (kn->key == key) return kn; } return NULL; } ``` ### Map 從 hash table 找對應的資料。 ```c void *map_get(map_t *map, int key) { struct hash_key *kn = find_key(map, key); return kn ? kn->data : NULL; } ``` 當有新的資料進來，如果有找到 key 就直接回傳。如果沒有找到就要插入到 hash table 中。 `AAA` 和 `BBB` 的答案其實就是要將節點插入到 list 的開頭。 ```c void map_add(map_t *map, int key, void *data) { struct hash_key *kn = find_key(map, key); if (kn) return; kn = malloc(sizeof(struct hash_key)); kn->key = key, kn->data = data; struct hlist_head *h = &map->ht[hash(key, map->bits)]; struct hlist_node *n = &kn->node, *first = h->first; n->next = first; // AAA if (first) first->pprev = &n->next; h->first = n; n->pprev = &h->first; // BBB } ``` `deinit` 就要要將 `map` 的記憶體空間釋放出來。 ```c void map_deinit(map_t *map) { if (!map) return; for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++) { struct hlist_head *head = &map->ht[i]; for (struct hlist_node *p = head->first; p;) { struct hash_key *kn = container_of(p, struct hash_key, node); struct hlist_node *n = p; p = p->next; if (!n->pprev) /* unhashed */ goto bail; struct hlist_node *next = n->next, **pprev = n->pprev; *pprev = next; if (next) next->pprev = pprev; n->next = NULL, n->pprev = NULL; bail: free(kn->data); free(kn); } } free(map); } ``` 整個 twoSum 的概念就是用 `taget - nums[i]` 做 hash table，每個 `nums[i]` 再去找是否有對應的 pair，如果有就會是答案。 ```c int *twoSum(int *nums, int numsSize, int target, int *returnSize) { map_t *map = map_init(10); *returnSize = 0; int *ret = malloc(sizeof(int) * 2); if (!ret) goto bail; for (int i = 0; i < numsSize; i++) { int *p = map_get(map, target - nums[i]); if (p) { /* found */ ret[0] = i, ret[1] = *p; *returnSize = 2; break; } p = malloc(sizeof(int)); *p = i; map_add(map, nums[i], p); } bail: map_deinit(map); return ret; } ``` ## 測驗 2 針對 LeetCode [82. Remove Duplicates from Sorted List II](https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/)，以下是可能的合法 C 程式實作: ```c #include <stddef.h> struct ListNode { int val; struct ListNode *next; }; struct ListNode *deleteDuplicates(struct ListNode *head) { if (!head) return NULL; if (head->next && head->val == head->next->val) { // COND1 /* Remove all duplicate numbers */ while (head->next && head->val == head->next->val) // COND2 head = head->next; return deleteDuplicates(head->next); } head->next = deleteDuplicates(head->next); return head; } ``` | Question | Answer | |:-------- |:-------------------------------------------- | | COND1 | `head->next && head->val == head->next->val` | | COND2 | `head->next && head->val == head->next->val` | 這邊的做法簡略了釋放記憶體空間的部分，只保留了移除節點的功能。當目前的節點與下一個節點的值相同，就尋找下一個值不相同得節點，並將兩個節點連接起來。 ### 避免遞迴用 `for` 迴圈取代遞迴的動作，利用指標的指標直接修改 `next` 所指向的下一個節點。 ```c #include <stddef.h> struct ListNode { int val; struct ListNode *next; }; struct ListNode *deleteDuplicates(struct ListNode *head) { if (!head) return NULL; for (struct ListNode **ptr = &head; *ptr != NULL; ptr = &((*ptr)->next)) while ((*ptr)->next && (*ptr)->val == (*ptr)->next->val) *ptr = (*ptr)->next; return head; } ``` ### circular doubly-linked list 改寫 #### 遞迴因為遞迴結束的條件是回到 `head`，因此我多了一層函式來處理 `head`。 ```c #include <stddef.h> struct ListNode { int val; struct ListNode *next; struct ListNode *prev; }; struct ListNode *__deleteDuplicates(struct ListNode *head, struct ListNode *node) { if (!head || !node) return NULL; if (node == head) return node; /* Remove all duplicate numbers */ while (node->next != head && node->val == node->next->val) node = node->next->next; struct ListNode *next = __deleteDuplicates(head, node->next); node->next = next; next->prev = node; return node; } void deleteDuplicates(struct ListNode *head) { struct ListNode *next = __deleteDuplicates(head, head->next); head->next = next; next->prev = head; } ``` #### 迭代與 singly linked list 作法類似，但這邊就不用指標的指標了。 ```c #include <stddef.h> struct ListNode { int val; struct ListNode *next; struct ListNode *prev; }; struct ListNode *deleteDuplicates(struct ListNode *head) { if (!head) return NULL; for (struct ListNode *ptr = head->next; ptr != head; ptr = ptr->next) { while (ptr->next != head && ptr->val == ptr->next->val) { ptr->next->next->prev = ptr; ptr->next = ptr->next->next; } } return head; } ``` ## 測驗 3 針對 LeetCode [146. LRU Cache](https://leetcode.com/problems/lru-cache/)，以下是 [Least Recently Used](https://en.wikipedia.org/wiki/Cache_replacement_policies#Least_recently_used_(LRU)) (LRU) 可能的合法 C 程式實作 ```c #include <stdio.h> #include <stdlib.h> #include "list.h" typedef struct { int capacity, count; struct list_head dhead, hheads[]; } LRUCache; typedef struct { int key, value; struct list_head hlink, dlink; } LRUNode; LRUCache *lRUCacheCreate(int capacity) { LRUCache *obj = malloc(sizeof(*obj) + capacity * sizeof(struct list_head)); obj->count = 0; obj->capacity = capacity; INIT_LIST_HEAD(&obj->dhead); for (int i = 0; i < capacity; i++) INIT_LIST_HEAD(&obj->hheads[i]); return obj; } void lRUCacheFree(LRUCache *obj) { LRUNode *lru, *n; list_for_each_entry_safe (lru, n, &obj->dhead, dlink) { // MMM1 list_del(&lru->dlink); free(lru); } free(obj); } int lRUCacheGet(LRUCache *obj, int key) { LRUNode *lru; int hash = key % obj->capacity; list_for_each_entry (lru, &obj->hheads[hash], hlink) { // MMM2 if (lru->key == key) { list_move(&lru->dlink, &obj->dhead); return lru->value; } } return -1; } void lRUCachePut(LRUCache *obj, int key, int value) { LRUNode *lru; int hash = key % obj->capacity; list_for_each_entry (lru, &obj->hheads[hash], hlink) { // MMM3 if (lru->key == key) { list_move(&lru->dlink, &obj->dhead); lru->value = value; return; } } if (obj->count == obj->capacity) { lru = list_entry(&obj->dhead, LRUNode, dlink); // MMM4 list_del(&lru->dlink); list_del(&lru->hlink); } else { lru = malloc(sizeof(LRUNode)); obj->count++; } lru->key = key; list_add(&lru->dlink, &obj->dhead); list_add(&lru->hlink, &obj->hheads[hash]); lru->value = value; } ``` | Question | Answer | |:-------- |:------------------------ | | MMM1 | `list_for_each_entry_safe` | | MMM2 | `list_for_each_entry` | | MMMM3 | `list_for_each_entry` | | MMM4 | `list_entry` | ### 解釋上述程式碼的運作 ### 在 Linux 核心找出 LRU 相關程式碼並探討 [linux/include/linux/lru_cache.h](https://github.com/torvalds/linux/blob/9b76d71fa8be8c52dbc855ab516754f0c93e2980/include/linux/lru_cache.h) 和 [linux/lib/lru_cache.c](https://github.com/torvalds/linux/blob/master/lib/lru_cache.c) ## 測驗 4 ```c #include <stdio.h> #include <stdlib.h> #include "list.h" struct seq_node { int num; struct list_head link; }; static struct seq_node *find(int num, int size, struct list_head *heads) { struct seq_node *node; int hash = num < 0 ? -num % size : num % size; list_for_each_entry (node, &heads[hash], link) { if (node->num == num) return node; } return NULL; } int longestConsecutive(int *nums, int n_size) { int hash, length = 0; struct seq_node *node; struct list_head *heads = malloc(n_size * sizeof(*heads)); for (int i = 0; i < n_size; i++) INIT_LIST_HEAD(&heads[i]); for (int i = 0; i < n_size; i++) { if (!find(nums[i], n_size, heads)) { hash = nums[i] < 0 ? -nums[i] % n_size : nums[i] % n_size; node = malloc(sizeof(*node)); node->num = nums[i]; list_add(&node->link, &heads[hash]); } } for (int i = 0; i < n_size; i++) { int len = 0; int num; node = find(nums[i], n_size, heads); while (node) { len++; num = node->num; list_del(&node->link); int left = num, right = num; while ((node = find(LLL, n_size, heads))) { len++; list_del(&node->link); } while ((node = find(RRR, n_size, heads))) { len++; list_del(&node->link); } length = len > length ? len : length; } } return length; } ```