---
tags: linux2022
---
# 2022q1 Homework1 (quiz1)
contributed by < [Eddielin0926](https://github.com/Eddielin0926) >
> [quiz1 題目](https://hackmd.io/@sysprog/linux2022-quiz1)
## 測驗 1
參考 LeetCode 編號 1 的題目 Two Sum,以 hash table 的方式求解。
### 結構體定義
`hlist` 用於 hash table 的實作,它的資料結構定義在 [include/linux/types.h](https://github.com/torvalds/linux/blob/master/include/linux/types.h) 中。
採用 double link list 的方式,處理 [hash collision](https://en.wikipedia.org/wiki/Hash_collision) 的資料。
```c
struct hlist_node { struct hlist_node *next, **pprev; };
struct hlist_head { struct hlist_node *first; };
typedef struct { int bits; struct hlist_head *ht; } map_t;
```
![image alt](https://i.imgur.com/QYQLqvC.png)
`map_t` 中的 `bits` 用來表示 hash table 的長度,假設 hash 的值是 4 個位元的數值,那麼就有 $2^4$ 種可能,也就是 `1 << 4`, hash table 的長度為 16。
```c=
#define MAP_HASH_SIZE(bits) (1 << bits)
```
### Hash Table 初始化
根據 `bits` 產生長度是 $2^{bits}$ 的 `hlist_head` 的陣列。
```c=
map_t *map_init(int bits) {
map_t *map = malloc(sizeof(map_t));
if (!map)
return NULL;
map->bits = bits;
map->ht = malloc(sizeof(struct hlist_head) * MAP_HASH_SIZE(map->bits));
if (map->ht) {
for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++)
(map->ht)[i].first = NULL;
} else {
free(map);
map = NULL;
}
return map;
}
```
### Hash
#### Hash Key
double link list 的節點,包了雜湊值以及原本的資料。
```c
struct hash_key {
int key;
void *data;
struct hlist_node node;
};
```
#### container_of
利用 `offsetof` 反向找到包含這個屬性 (attribute) 的結構體 (struct) 位址。
```c
#define container_of(ptr, type, member) \
({ \
void *__mptr = (void *) (ptr); \
((type *) (__mptr - offsetof(type, member))); \
})
```
#### Hash Value
乘上 `GOLDEN_RATIO_32` 後,只取高位的位元,因為高位的位元經過乘法計算後受到的影響必較大,相對來說會比較隨機。
```c
#define GOLDEN_RATIO_32 0x61C88647
static inline unsigned int hash(unsigned int val, unsigned int bits) {
/* High bits are more random, so use them. */
return (val * GOLDEN_RATIO_32) >> (32 - bits);
}
```
將 `key` 做雜湊後,遍歷 hash table 來找對應的雜湊值。
```c
static struct hash_key *find_key(map_t *map, int key) {
struct hlist_head *head = &(map->ht)[hash(key, map->bits)];
for (struct hlist_node *p = head->first; p; p = p->next) {
struct hash_key *kn = container_of(p, struct hash_key, node);
if (kn->key == key)
return kn;
}
return NULL;
}
```
### Map
從 hash table 找對應的資料。
```c
void *map_get(map_t *map, int key)
{
struct hash_key *kn = find_key(map, key);
return kn ? kn->data : NULL;
}
```
當有新的資料進來,如果有找到 key 就直接回傳。
如果沒有找到就要插入到 hash table 中。
`AAA` 和 `BBB` 的答案其實就是要將節點插入到 list 的開頭。
```c
void map_add(map_t *map, int key, void *data)
{
struct hash_key *kn = find_key(map, key);
if (kn)
return;
kn = malloc(sizeof(struct hash_key));
kn->key = key, kn->data = data;
struct hlist_head *h = &map->ht[hash(key, map->bits)];
struct hlist_node *n = &kn->node, *first = h->first;
n->next = first; // AAA
if (first)
first->pprev = &n->next;
h->first = n;
n->pprev = &h->first; // BBB
}
```
`deinit` 就要要將 `map` 的記憶體空間釋放出來。
```c
void map_deinit(map_t *map)
{
if (!map)
return;
for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++) {
struct hlist_head *head = &map->ht[i];
for (struct hlist_node *p = head->first; p;) {
struct hash_key *kn = container_of(p, struct hash_key, node);
struct hlist_node *n = p;
p = p->next;
if (!n->pprev) /* unhashed */
goto bail;
struct hlist_node *next = n->next, **pprev = n->pprev;
*pprev = next;
if (next)
next->pprev = pprev;
n->next = NULL, n->pprev = NULL;
bail:
free(kn->data);
free(kn);
}
}
free(map);
}
```
整個 twoSum 的概念就是用 `taget - nums[i]` 做 hash table,每個 `nums[i]` 再去找是否有對應的 pair,如果有就會是答案。
```c
int *twoSum(int *nums, int numsSize, int target, int *returnSize)
{
map_t *map = map_init(10);
*returnSize = 0;
int *ret = malloc(sizeof(int) * 2);
if (!ret)
goto bail;
for (int i = 0; i < numsSize; i++) {
int *p = map_get(map, target - nums[i]);
if (p) { /* found */
ret[0] = i, ret[1] = *p;
*returnSize = 2;
break;
}
p = malloc(sizeof(int));
*p = i;
map_add(map, nums[i], p);
}
bail:
map_deinit(map);
return ret;
}
```
## 測驗 2
針對 LeetCode [82. Remove Duplicates from Sorted List II](https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/),以下是可能的合法 C 程式實作:
```c
#include <stddef.h>
struct ListNode {
int val;
struct ListNode *next;
};
struct ListNode *deleteDuplicates(struct ListNode *head)
{
if (!head)
return NULL;
if (head->next && head->val == head->next->val) { // COND1
/* Remove all duplicate numbers */
while (head->next && head->val == head->next->val) // COND2
head = head->next;
return deleteDuplicates(head->next);
}
head->next = deleteDuplicates(head->next);
return head;
}
```
| Question | Answer |
|:-------- |:-------------------------------------------- |
| COND1 | `head->next && head->val == head->next->val` |
| COND2 | `head->next && head->val == head->next->val` |
這邊的做法簡略了釋放記憶體空間的部分,只保留了移除節點的功能。當目前的節點與下一個節點的值相同,就尋找下一個值不相同得節點,並將兩個節點連接起來。
### 避免遞迴
用 `for` 迴圈取代遞迴的動作,利用指標的指標直接修改 `next` 所指向的下一個節點。
```c
#include <stddef.h>
struct ListNode {
int val;
struct ListNode *next;
};
struct ListNode *deleteDuplicates(struct ListNode *head)
{
if (!head)
return NULL;
for (struct ListNode **ptr = &head; *ptr != NULL; ptr = &((*ptr)->next))
while ((*ptr)->next && (*ptr)->val == (*ptr)->next->val)
*ptr = (*ptr)->next;
return head;
}
```
### circular doubly-linked list 改寫
#### 遞迴
因為遞迴結束的條件是回到 `head`,因此我多了一層函式來處理 `head`。
```c
#include <stddef.h>
struct ListNode
{
int val;
struct ListNode *next;
struct ListNode *prev;
};
struct ListNode *__deleteDuplicates(struct ListNode *head, struct ListNode *node)
{
if (!head || !node)
return NULL;
if (node == head)
return node;
/* Remove all duplicate numbers */
while (node->next != head && node->val == node->next->val)
node = node->next->next;
struct ListNode *next = __deleteDuplicates(head, node->next);
node->next = next;
next->prev = node;
return node;
}
void deleteDuplicates(struct ListNode *head) {
struct ListNode *next = __deleteDuplicates(head, head->next);
head->next = next;
next->prev = head;
}
```
#### 迭代
與 singly linked list 作法類似,但這邊就不用指標的指標了。
```c
#include <stddef.h>
struct ListNode
{
int val;
struct ListNode *next;
struct ListNode *prev;
};
struct ListNode *deleteDuplicates(struct ListNode *head)
{
if (!head)
return NULL;
for (struct ListNode *ptr = head->next; ptr != head; ptr = ptr->next)
{
while (ptr->next != head && ptr->val == ptr->next->val)
{
ptr->next->next->prev = ptr;
ptr->next = ptr->next->next;
}
}
return head;
}
```
## 測驗 3
針對 LeetCode [146. LRU Cache](https://leetcode.com/problems/lru-cache/),以下是 [Least Recently Used](https://en.wikipedia.org/wiki/Cache_replacement_policies#Least_recently_used_(LRU)) (LRU) 可能的合法 C 程式實作
```c
#include <stdio.h>
#include <stdlib.h>
#include "list.h"
typedef struct {
int capacity, count;
struct list_head dhead, hheads[];
} LRUCache;
typedef struct {
int key, value;
struct list_head hlink, dlink;
} LRUNode;
LRUCache *lRUCacheCreate(int capacity)
{
LRUCache *obj = malloc(sizeof(*obj) + capacity * sizeof(struct list_head));
obj->count = 0;
obj->capacity = capacity;
INIT_LIST_HEAD(&obj->dhead);
for (int i = 0; i < capacity; i++)
INIT_LIST_HEAD(&obj->hheads[i]);
return obj;
}
void lRUCacheFree(LRUCache *obj)
{
LRUNode *lru, *n;
list_for_each_entry_safe (lru, n, &obj->dhead, dlink) { // MMM1
list_del(&lru->dlink);
free(lru);
}
free(obj);
}
int lRUCacheGet(LRUCache *obj, int key)
{
LRUNode *lru;
int hash = key % obj->capacity;
list_for_each_entry (lru, &obj->hheads[hash], hlink) { // MMM2
if (lru->key == key) {
list_move(&lru->dlink, &obj->dhead);
return lru->value;
}
}
return -1;
}
void lRUCachePut(LRUCache *obj, int key, int value)
{
LRUNode *lru;
int hash = key % obj->capacity;
list_for_each_entry (lru, &obj->hheads[hash], hlink) { // MMM3
if (lru->key == key) {
list_move(&lru->dlink, &obj->dhead);
lru->value = value;
return;
}
}
if (obj->count == obj->capacity) {
lru = list_entry(&obj->dhead, LRUNode, dlink); // MMM4
list_del(&lru->dlink);
list_del(&lru->hlink);
} else {
lru = malloc(sizeof(LRUNode));
obj->count++;
}
lru->key = key;
list_add(&lru->dlink, &obj->dhead);
list_add(&lru->hlink, &obj->hheads[hash]);
lru->value = value;
}
```
| Question | Answer |
|:-------- |:------------------------ |
| MMM1 | `list_for_each_entry_safe` |
| MMM2 | `list_for_each_entry` |
| MMMM3 | `list_for_each_entry` |
| MMM4 | `list_entry` |
### 解釋上述程式碼的運作
### 在 Linux 核心找出 LRU 相關程式碼並探討
[linux/include/linux/lru_cache.h](https://github.com/torvalds/linux/blob/9b76d71fa8be8c52dbc855ab516754f0c93e2980/include/linux/lru_cache.h) 和 [linux/lib/lru_cache.c](https://github.com/torvalds/linux/blob/master/lib/lru_cache.c)
## 測驗 4
```c
#include <stdio.h>
#include <stdlib.h>
#include "list.h"
struct seq_node {
int num;
struct list_head link;
};
static struct seq_node *find(int num, int size, struct list_head *heads)
{
struct seq_node *node;
int hash = num < 0 ? -num % size : num % size;
list_for_each_entry (node, &heads[hash], link) {
if (node->num == num)
return node;
}
return NULL;
}
int longestConsecutive(int *nums, int n_size)
{
int hash, length = 0;
struct seq_node *node;
struct list_head *heads = malloc(n_size * sizeof(*heads));
for (int i = 0; i < n_size; i++)
INIT_LIST_HEAD(&heads[i]);
for (int i = 0; i < n_size; i++) {
if (!find(nums[i], n_size, heads)) {
hash = nums[i] < 0 ? -nums[i] % n_size : nums[i] % n_size;
node = malloc(sizeof(*node));
node->num = nums[i];
list_add(&node->link, &heads[hash]);
}
}
for (int i = 0; i < n_size; i++) {
int len = 0;
int num;
node = find(nums[i], n_size, heads);
while (node) {
len++;
num = node->num;
list_del(&node->link);
int left = num, right = num;
while ((node = find(LLL, n_size, heads))) {
len++;
list_del(&node->link);
}
while ((node = find(RRR, n_size, heads))) {
len++;
list_del(&node->link);
}
length = len > length ? len : length;
}
}
return length;
}
```