--- tags: linux2022, linux --- # 2022q1 Homework1 (quiz1) contributed by < [Julian-Chu](https://github.com/Julian-Chu) > > [GitHub](https://github.com/Julian-Chu/linux2022-quizzes/quiz1) ## 測驗 1 ## 測驗 2 ### answer ```cpp= struct ListNode *deleteDuplicates(struct ListNode *head){ if(!head) return NULL; if(head->next && head->val == head->next->val){ while(head->next && head->val == head->next->val) head = head->next; //return deleteDuplicates(head->next); } head->next = deleteDuplicates(head->next); return head; } ``` 需要填入的程式碼相當直觀, 需要先檢查 `head->next` 存在才進行比較。 另外題目裏 `line 8` 的 `return` 會導致 linked list 斷開。 #### 簡化後 ```cpp struct ListNode *deleteDuplicates(struct ListNode *head){ if(!head) return NULL; while(head->next && head->val == head->next->val) head = head->next; head->next = deleteDuplicates(head->next); return head; } ``` #### pointer to pointer ```cpp struct ListNode *deleteDuplicates(struct ListNode *head){ struct ListNode **indirect = &head; while(*indirect && (*indirect)->next && (*indirect)->val == (*indirect)->next->val) *indirect = (*indirect)->next; if((*indirect)->next) (*indirect)->next = deleteDuplicates((*indirect)->next); return *indirect; } ``` ### 迭代版本 ```cpp struct ListNode *deleteDuplicates_iter(struct ListNode *head){ if(!head) return NULL; struct ListNode *cur = head; while(cur->next){ if(cur->val == cur->next->val){ cur->next = cur->next->next; continue } cur = cur->next; } return head; } ``` ```cpp struct ListNode *deleteDuplicates_iter(struct ListNode *head){ struct ListNode **indirect = &head; while(*indirect && (*indirect)->next){ if((*indirect)->val == (*indirect)->next->val){ *indirect = (*indirect)->next; continue; } indirect = &(*indirect)->next; } return head; ``` [測試程式碼](https://github.com/Julian-Chu/linux2022-quizzes/blob/main/quiz1/question2/question2.c#L93) ### 用 linux 風格 circular doubly-linked list 改寫 #### 前置準備 ```cpp #include <stddef.h> #include <stdlib.h> #include <stdio.h> #include <assert.h> #define container_of(ptr, type, member) \ ({ \ void *__mptr = (void *) (ptr); \ ((type *) (__mptr - offsetof(type, member))); \ }) struct list_head { struct list_head *next, *prev; }; struct ListNode{ int val; struct list_head node; }; ``` #### 遞迴 ```cpp struct list_head *deleteDuplicates(struct list_head *head) { if(!head) return NULL; struct ListNode *headNode = container_of(head, struct ListNode, node); if(head->next){ struct ListNode *nextNode = container_of(head->next, struct ListNode, node); while(head->next && headNode->val == nextNode->val){ head = head->next; headNode = nextNode; if(head->next) nextNode = container_of(head->next, struct ListNode, node); } } head->next = deleteDuplicates(head->next); return head; } ``` #### 迭代 ```cpp struct list_head *deleteDuplicates_iter(struct list_head *head) { if(!head) return NULL; struct list_head *cur = head; struct ListNode *curNode, *nextNode; curNode = container_of(head, struct ListNode, node); while(cur->next){ nextNode = container_of(cur->next, struct ListNode, node); if(curNode->val == nextNode->val){ cur->next = cur->next->next; }else{ cur = cur->next; curNode = nextNode; } } return head; } ``` [測試程式碼](https://github.com/Julian-Chu/linux2022-quizzes/blob/main/quiz1/question2/question2_linux_style.c#L112) :::info TODO: - 將函數簽章從 list_head 改成 ListNode - 改用 linux/list.h 的 API ::: --- ## 測驗 3 ### answer ```cpp LRUCache *lRUCacheCreate(int capacity) { LRUCache *obj = malloc(sizeof(*obj) + capacity * sizeof(struct list_head)); obj->count = 0; obj->capacity = capacity; INIT_LIST_HEAD(&obj->dhead); for (int i = 0; i < capacity; i++) INIT_LIST_HEAD(&obj->hheads[i]) return obj; } void lRUCacheFree(LRUCache *obj) { LRUNode *lru, *n; list_for_each_entry_safe(lru, n, &obj->dhead, dlink){ list_del(&lru->dlink); free(lru); } free(obj); } int lRUCacheGet(LRUCache *obj, int key){ LRUNode *lru; int hash = key % obj->capacity; list_for_each_entry(lru, &obj->hheads[hash], hlink){ if(lru->key == key){ list_move(&lru->dlink, &obj->dhead); return lru->value; } } return -1; } void lRUCachePut(LRUCache *obj, int key, int value){ LRUNode *lru; int hash = key% obj->capacity; list_for_each_entry(lru, &obj->hheads[hash], hlink){ if(lru->key == key){ list_move(&lru->dlink, &obj->dhead); lru->value = value; return; } } if(obj->count == obj->capacity){ lru = list_last_entry(&obj->dhead, LRUNode, dlink); list_del(&lru->dlink); list_del(&lru->hlink); }else{ lru = malloc(sizeof(LRUNode)); obj->count++; } lru->key = key; list_add(&lru->dlink, &obj->dhead); list_add(&lru->hlink, &obj->hheads[hash]); lru->value = value; } ``` ### 延伸問題 1 ```cpp typedef struct{ int capacity, count; struct list_head dhead, hheads[]; } LRUCache; typedef struct{ int key, value; struct list_head hlink, dlink; } LRUNode; ``` LRUCache 內部使用兩個資料結構,分別爲 list, hashtable #### hashtable ```cpp int hash = key % obj->capacity; // 利用 hash 尋找 hashtable 中的 head, 然後進行 list 查找 list_for_each_entry(lru, &obj->hheads[hash], hlink){ if(lru->key == key){ // 找到後將 cache 更新到 dhead 的最前端, 避免最近使用的 cache 因為容量已滿被移除 list_move(&lru->dlink, &obj->dhead); return lru->value; } } ``` hashtable 用於搜索資料, 相較於單一 list 的全路徑搜尋,利用 hashtable 可以有效的縮短搜尋路徑,單一 list 爲 $O(n)$ 時間複雜度, 搭配 hashtable 後理想狀況可變為 $O(\frac{n}{m})$, m 爲 hashtable 的容量 :::warning 明確說明是「時間」抑或「空間」複雜度。 :notes: jserv ::: #### list ```cpp // 容量已滿 if(obj->count == obj->capacity){ // list 上最後一個 Cache lru = list_last_entry(&obj->dhead, LRUNode, dlink); list_del(&lru->dlink); list_del(&lru->hlink); }else{ lru = malloc(sizeof(LRUNode)); obj->count++; } ``` list 用於判斷該 cache 是否可以移除, 當容量已滿,會將距離上次使用最久的 cache 移除 #### 測試程式碼和改進 測試程式碼 ```cpp int main(void){ printf("testing LRUCache with capacity 4\n"); LRUCache *cache = lRUCacheCreate(4); printf("put kvs: (1,1), (2,2), (3,3), (4,4)\n"); lRUCachePut(cache, 1,1); lRUCachePut(cache, 2,2); lRUCachePut(cache, 3,3); lRUCachePut(cache, 4,4); LRUNode *lru; lru = list_last_entry(&cache->dhead, LRUNode, dlink); assert(lru->key==1); printf("now least recently key is 1\n"); assert(lRUCacheGet(cache, 1) == 1); printf("get key 1 with its value 1\n"); lru = list_last_entry(&cache->dhead, LRUNode, dlink); assert(lru->value==2); printf("now least recently used key is 2\n"); printf("put kv: (5,5)\n"); lRUCachePut(cache, 5, 5); lru = list_first_entry(&cache->dhead, LRUNode, dlink); assert(lru->value==5); assert(lRUCacheGet(cache, 2)==-1); lru = list_last_entry(&cache->dhead, LRUNode, dlink); assert(lru->value==3); printf("now least recently used key is 3, 2 is removed\n"); printf("test passed\n"); } ``` ```shell testing LRUCache with capacity 4 put kvs: (1,1), (2,2), (3,3), (4,4) now least recently used key is 1 get key 1 with its value 1 now least recently used key is 2 put kv: (5,5) now least recently used key is 3, 2 is removed test passed ``` ##### 嘗試改進: - 將 cache 的容量限制爲 2^N 改用 GOLDEN_RATIO位元運算取代除法 hash (仿照測驗1 map) - 考慮 temporal locality 在 Get 與 Put 增加對 dhead 的 key 檢查, ```cpp #define GOLDEN_RATIO_32 0x61C88647 static inline unsigned int hash(unsigned int val, unsigned int bits) { /* High bits are more random, so use them. */ return (val * GOLDEN_RATIO_32) >> (32 - bits); } int lRUCacheGet(LRUCache *obj, int key){ LRUNode *lru; // 新增,檢查第一個 node lru = list_first_entry(&obj->dhead, LRUNode, dlink); if(lru->key == key) return lru->value; list_for_each_entry(lru, &obj->hheads[hash(key, obj->bits)], hlink){ if(lru->key == key){ list_move(&lru->dlink, &obj->dhead); // 新增: 將最新取用的 cache 在 hashtable 的位置移到 head list_move(&lru->hlink, &obj->hheads[hash(key, obj->bits)]); return lru->value; } } return -1; } void lRUCachePut(LRUCache *obj, int key, int value){ LRUNode *lru; list_for_each_entry(lru, &obj->hheads[hash(key, obj->bits)], hlink){ if(lru->key == key){ list_move(&lru->dlink, &obj->dhead); // 新增: 將最新取用的 cache 在 hashtable 的位置移到 head list_move(&lru->hlink, &obj->hheads[hash(key, obj->bits)]); lru->value = value; return; } } if(obj->count == obj->capacity){ lru = list_last_entry(&obj->dhead, LRUNode, dlink); list_del(&lru->dlink); list_del(&lru->hlink); }else{ lru = malloc(sizeof(LRUNode)); obj->count++; } lru->key = key; list_add(&lru->dlink, &obj->dhead); list_add(&lru->hlink, &obj->hheads[hash(key, obj->bits)]); lru->value = value; } ``` TODO: 測試性能差異 ### 延伸問題 2 #### DRBD [Distributed Replicated Block Device](https://en.wikipedia.org/wiki/Distributed_Replicated_Block_Device) [lru_cache.h](https://github.com/torvalds/linux/blob/master/include/linux/lru_cache.h) [lru_cache.c](https://github.com/torvalds/linux/blob/master/lib/lru_cache.c) Distributed Replicated Block Device 是 linux 上的分散式存儲系統, lru_cache 主要用來記錄追蹤遠端節點的狀態, 避免網路故障或是節點故障回復後的全狀態同步, 而用來追蹤狀態的 object 的記憶體空間實際上不會被釋放,會利用 LRU 選出 object 進行重複使用 #### Memory management(Page reclaim/Page Replacement) [Page Frame Reclamation](https://www.kernel.org/doc/gorman/html/understand/understand013.html) [mm/list_lru.c](https://github.com/torvalds/linux/blob/master/mm/list_lru.c) [list_lru.h](https://github.com/torvalds/linux/blob/master/include/linux/list_lru.h) [mm_types.h](https://github.com/torvalds/linux/blob/5bfc75d92efd494db37f5c4c173d3639d4772966/include/linux/mm_types.h#L70) [mm/workingset](https://github.com/torvalds/linux/blob/master/mm/workingset.c) LRU 大量的使用在記憶體管理, 藉由 page 的最近使用時間來決定,當記憶體不足的時候該釋放那部分的記憶體 --- ## 測驗 4 ### answer ```cpp #include <stdio.h> #include <stdlib.h> #include "list.h" #include <assert.h> struct seq_node{ int num; struct list_head link; }; // 根據值與給定的數列長度在 hash table 裡面尋找目標節點 static struct seq_node *find(int num, int size, struct list_head *heads) { struct seq_node *node; int hash = num < 0? -num%size:num%size; // 根據 hash 值從 head 開始查找 list_for_each_entry(node, &heads[hash], link){ // hash 值可能會重複,仍然需要比對數值本身 if (node->num == num) return node; } return NULL; } int longestConsecutive(int *nums, int n_size){ int hash, length = 0; struct seq_node *node; // 劃分記憶體空間給 hashtable struct list_head *heads = malloc(n_size * sizeof(*heads)); // 初始化 hashtable for (int i = 0; i<n_size; i++) INIT_LIST_HEAD(&heads[i]); // 將給定的數列中的數字放入 hashtable 中 for(int i = 0; i < n_size; i++){ if(!find(nums[i], n_size, heads)){ hash = nums[i] < 0? -nums[i] % n_size:nums[i] % n_size; node = malloc(sizeof(*node)); node->num = nums[i]; list_add(&node->link, &heads[hash]); } } for (int i = 0; i < n_size; i++){ int len = 0; int num; // 在 hashtable 中取出對應的節點 node = find(nums[i], n_size, heads); while(node){ len++; num = node->num; // 將已取用的節點從 hashtable 中移除 list_del(&node->link); // 以 node 的值爲中心點, 加減一尋找下一個相鄰的數字 int left = num, right = num; // 需要將值 -1 後在傳入,所以使用 --left 而不是 left-- while ((node = find(--left, n_size, heads))){ // 找到相鄰的節點長度加一並從 hashtable 中移除該節點 len++; list_del(&node->link); } // 需要將值 +1 後在傳入,所以使用 ++right 而不是 right++ while ((node = find(++right, n_size, heads))){ len++; list_del(&node->link); } // 新的長度大於已知的最大長度就更新值 length = len > length ? len:length; } } return length; } ``` ### 測試程式碼 ```cpp int main(void){ int nums1[4] = {0, 1, 2, 4}; assert(longestConsecutive(nums1, 4)== 3); int nums2[5] = {-1, 0, 1, 3, 4}; assert(longestConsecutive(nums2, 5)== 3); printf("tests passed\n"); } ```