---
tags: linux kernel
---
# 2022q1 Homework1 (quiz1)
contributed by < `zoanana990` >
> [GitHub](https://hackmd.io/@sysprog/linux2022-quiz1)
## 測驗 `1`
:::info
題目
- [x] 請補完程式碼
- [x] 解釋上述程式碼運作原理
- [x] 研讀 Linux 核心原始程式碼 include/linux/hashtable.h 及對應的文件 How does the kernel implements Hashtables?,解釋 hash table 的設計和實作手法,並留意到 tools/include/linux/hash.h 的 GOLDEN_RATIO_PRIME,探討其實作考量
:::
本題分兩個部分進行討論:
1. `linux kernel` 實作雜湊表的資料結構、雜湊函數
2. 本題實作 `two sum` 的程式碼
### 雜湊表
#### 基本資料結構
```c
struct hlist_node { struct hlist_node *next, **pprev; };
```
```graphviz
digraph{
struct [shape=record, label= "{{<p>pprev|<n>next}}"];
}
```
```c
struct hlist_head { struct hlist_node *first; };
```
```graphviz
digraph{
struct [shape=record, label= "first"];
}
```
```c
typedef struct { int bits; struct hlist_head *ht; } map_t;
```
```graphviz
digraph{
struct [shape=record, label= "bits|hash list head"];
}
```
```c
struct hash_key {
int key;
void *data;
struct hlist_node node;
};
```
```graphviz
digraph{
rankdir="LR";
struct [shape=record, label= "key|data|{<p>pprev|<n>next}"];
}
```
#### 雜湊表初始化
- 雜湊表的尺寸定義如下
```c
#define MAP_HASH_SIZE(bits) (1 << bits)
```
- 尺寸為 n bits ↔ $2^n$ 格
- 雜湊表初始化函數
```c
map_t *map_init(int bits) {
// malloc a space for the hash table
map_t *map = malloc(sizeof(map_t));
if (!map)
return NULL;
map->bits = bits;
// the size of hashtable us 2^n bits
map->ht = malloc(sizeof(struct hlist_head) * MAP_HASH_SIZE(map->bits));
if (map->ht) {
// let every list head point to NULL
for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++)
(map->ht)[i].first = NULL;
} else {
free(map);
map = NULL;
}
return map;
}
```
- 圖表化:初始化結果
```graphviz
digraph{
rankdir="LR";
map_t [shape=record, label= "map|bits|<h>hlist_head"];
hlist_head[shape=record, label="<h>hlist_head|<1>key_1|<2>key_2|<3>key_3|<4>key_4|<5>..."]
map_t:h->hlist_head:h
hlist_head:1->NULL
hlist_head:2->NULL
hlist_head:3->NULL
hlist_head:4->NULL
hlist_head:5->NULL
}
```
#### 雜湊函數
瞭解基本的雜湊表資料結構後,現在進行雜湊函數的頗析
```c
#define GOLDEN_RATIO_32 0x61C88647
static inline unsigned int hash(unsigned int val, unsigned int bits) {
/* High bits are more random, so use them. */
return (val * GOLDEN_RATIO_32) >> (32 - bits);
}
```
在了解什麼是 `0x61c88647` 之前,我們先要了解什麼是[雜湊函數(hash function)](https://en.wikipedia.org/wiki/Hash_function),雜湊函數目的是將 `<key, value>` 進行映射 (mapping) ,期望輸入 `key` 可以利用雜湊函數得到期望的 `value` ,這個結果可以得到
本題使用的雜湊函數為[斐波那契法](https://book.huihoo.com/data-structures-and-algorithms-with-object-oriented-design-patterns-in-c++/html/page214.html),其中 `0x61c88647` 為 `INT32` 除以黃金比例所得到的結果,其可以讓一個亂數序列取得映射函數,並可以最大化分散輸入值,如下圖
![](https://i.imgur.com/nf2fUb0.png)
如果是使用餘數法,則有可能造成一個鍵 (key) 對應多個值 (value) ,這樣就不是一個好的雜湊函數,如下所示:
![](https://i.imgur.com/ee5lb0b.png)
測試原始碼,這邊用 `python` 比較方便,因此用 `python` 撰寫:
```python
import numpy as np
import matplotlib.pyplot as plt
if __name__ == '__main__':
N = 30
bit = 10
x = np.random.randint(10000,size=N)
print(x)
y = np.right_shift(np.uint64(x * 0x61C88647),np.uint64(22))
z = x % 4
plt.scatter(x, y)
plt.show()
```
如果使用其他的數字呢?不使用黃金比例對於雜湊函數有什麼影響?
這邊推薦一個連結
### 雜湊函數
```c
static struct hash_key *find_key(map_t *map, int key) {
// find the list_head by hash function
struct hlist_head *head = &(map->ht)[hash(key, map->bits)];
for (struct hlist_node *p = head->first; p; p = p->next) {
struct hash_key *kn = container_of(p, struct hash_key, node);
if (kn->key == key)
return kn;
}
return NULL;
}
```
上面我們知道,`hlist_head` 其實是一個陣列,因此我們可以在 $O(1)$ 的時間內利用雜湊函數找到對應的節點,然後對後續的節點進行走訪取得想要的資料,其圖示化結果如下:
```graphviz
digraph{
rankdir="LR";
map_t [shape=record, label= "map|bits|<h>hlist_head"];
hlist_head[shape=record, label="<h>hlist_head|<1>key_1|<2>key_2|<3>key_3|<4>key_4|<5>..."]
hash_key_1 [shape=record, label= "key_1|<d>data|{<p>pprev|<n>next}"];
hash_key_2 [shape=record, label= "key_2|<d>data|{<p>pprev|<n>next}"];
hash_key_3 [shape=record, label= "key_3|<d>data|{<p>pprev|<n>next}"];
map_t:h->hlist_head:h
hlist_head:1->hash_key_1:d
hash_key_1:n->NULL
hash_key_1:p->hlist_head:1
hlist_head:2->hash_key_2:d
hash_key_2:n->NULL
hash_key_2:p->hlist_head:2
hlist_head:3->hash_key_3:d
hash_key_3:n->NULL
hash_key_3:p->hlist_head:3
hlist_head:4->NULL
hlist_head:5->NULL
}
```
假設今天使用雜湊函數算出來是 `key_1` 的話,會進行節點走訪,並且對應其鍵(key)值有沒有對應,若值正確,則可以使用 `*map_get` 函數進行資料回傳
```c
void *map_get(map_t *map, int key)
{
struct hash_key *kn = find_key(map, key);
return kn ? kn->data : NULL;
}
```
插入節點
```c
void map_add(map_t *map, int key, void *data)
{
struct hash_key *kn = find_key(map, key);
if (kn)
return;
kn = malloc(sizeof(struct hash_key));
kn->key = key, kn->data = data;
struct hlist_head *h = &map->ht[hash(key, map->bits)];
struct hlist_node *n = &kn->node, *first = h->first;
AAA;
if (first)
first->pprev = &n->next;
h->first = n;
BBB;
}
```
這邊的程式碼是想要插入節點,需要考慮到 `pprev` 是使用間接指標,以及程式碼中需要考慮到 `first` 存在的問題
```c
if (first)
first->pprev = &n->next;
```
也就是說,插入的節點是在 `h->first` 的位置,因此可以 `AAA` 可以填入 `n->next=first` , `BBB` 則可以填入 `n->pprev = &h->first;` 因為插入節點的前一個位置是 `hlist_head` ,填完程式碼如下
```c
void map_add(map_t *map, int key, void *data)
{
struct hash_key *kn = find_key(map, key);
if (kn)
return;
kn = malloc(sizeof(struct hash_key));
kn->key = key, kn->data = data;
struct hlist_head *h = &map->ht[hash(key, map->bits)];
struct hlist_node *n = &kn->node, *first = h->first;
n->next = first;
if (first)
first->pprev = &n->next;
h->first = n;
n->pprev = &h->first;
}
```
接下來是將 `hash table` 刪除,這個過程比較繁瑣,但是大體上是與 `linked list` 一樣的。先把各個節點獨立,將指標項釋放,再將節點釋放,節點都是放完成後將 `hlist_head` 釋放,最後將一開始的 `map` 釋放,就完成了
:::spoiler 完整程式碼
#include <stddef.h>
#include <stdlib.h>
struct hlist_node { struct hlist_node *next, **pprev; };
struct hlist_head { struct hlist_node *first; };
typedef struct { int bits; struct hlist_head *ht; } map_t;
#define MAP_HASH_SIZE(bits) (1 << bits)
map_t *map_init(int bits) {
map_t *map = malloc(sizeof(map_t));
if (!map)
return NULL;
map->bits = bits;
map->ht = malloc(sizeof(struct hlist_head) * MAP_HASH_SIZE(map->bits));
if (map->ht) {
for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++)
(map->ht)[i].first = NULL;
} else {
free(map);
map = NULL;
}
return map;
}
struct hash_key {
int key;
void *data;
struct hlist_node node;
};
#define container_of(ptr, type, member) \
({ \
void *__mptr = (void *) (ptr); \
((type *) (__mptr - offsetof(type, member))); \
})
#define GOLDEN_RATIO_32 0x61C88647
static inline unsigned int hash(unsigned int val, unsigned int bits) {
/* High bits are more random, so use them. */
return (val * GOLDEN_RATIO_32) >> (32 - bits);
}
static struct hash_key *find_key(map_t *map, int key) {
struct hlist_head *head = &(map->ht)[hash(key, map->bits)];
for (struct hlist_node *p = head->first; p; p = p->next) {
struct hash_key *kn = container_of(p, struct hash_key, node);
if (kn->key == key)
return kn;
}
return NULL;
}
void *map_get(map_t *map, int key)
{
struct hash_key *kn = find_key(map, key);
return kn ? kn->data : NULL;
}
void map_add(map_t *map, int key, void *data)
{
struct hash_key *kn = find_key(map, key);
if (kn)
return;
kn = malloc(sizeof(struct hash_key));
kn->key = key, kn->data = data;
struct hlist_head *h = &map->ht[hash(key, map->bits)];
struct hlist_node *n = &kn->node, *first = h->first;
n->next = first;
if (first)
first->pprev = &n->next;
h->first = n;
n->pprev = &h->first;
}
void map_deinit(map_t *map)
{
if (!map)
return;
for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++) {
struct hlist_head *head = &map->ht[i];
for (struct hlist_node *p = head->first; p;) {
struct hash_key *kn = container_of(p, struct hash_key, node);
struct hlist_node *n = p;
p = p->next;
if (!n->pprev) /* unhashed */
goto bail;
struct hlist_node *next = n->next, **pprev = n->pprev;
*pprev = next;
if (next)
next->pprev = pprev;
n->next = NULL, n->pprev = NULL;
bail:
free(kn->data);
free(kn);
}
}
free(map);
}
int *twoSum(int *nums, int numsSize, int target, int *returnSize)
{
map_t *map = map_init(10);
*returnSize = 0;
int *ret = malloc(sizeof(int) * 2);
if (!ret)
goto bail;
for (int i = 0; i < numsSize; i++) {
int *p = map_get(map, target - nums[i]);
if (p) { /* found */
ret[0] = i, ret[1] = *p;
*returnSize = 2;
break;
}
p = malloc(sizeof(int));
*p = i;
map_add(map, nums[i], p);
}
bail:
map_deinit(map);
return ret;
}
:::
---
## 測驗 `2`
:::info
題目
- [x] 嘗試避免遞迴,寫出同樣作用的程式碼
- [ ] 以類似 Linux 核心的 circular doubly-linked list 改寫,撰寫遞迴和迭代 (iterative) 的程式碼
:::
原始程式碼:
```c
#include <stddef.h>
struct ListNode {
int val;
struct ListNode *next;
};
struct ListNode* deleteDuplicates(struct ListNode* head){
if (!head)
return NULL;
if (/*COND1*/) {
/* Remove all duplicate numbers */
while (/*COND2*/){
head = head->next;
}
return deleteDuplicates(head->next);
}
head->next = deleteDuplicates(head->next);
return head;
}
```
ANS:
```c
CON1 = head->next && head->val == head->next->val
CON2 = head->next && head->val == head->next->val
```
- 嘗試避免遞迴,寫出同樣作用的程式碼:
- 將遞回法改為疊代法,使用間接指標(indirect pointer)進行實作
```c
struct ListNode* deleteDuplicates(struct ListNode* head){
if(!head || !head->next) return head;
struct ListNode **indirect=&head;
while(*indirect){
// dupliacte node
if((*indirect)->next && (*indirect)->next->val==(*indirect)->val){
struct ListNode *temp = *indirect;
while(temp && temp->val == (*indirect)->val)
temp = temp->next;
*indirect = temp;
}
// normal traversal
else indirect = &(*indirect)->next;
}
return head;
}
```
- 以類似 Linux 核心的 circular doubly-linked list 改寫,撰寫遞迴和迭代 (iterative) 的程式碼
- 遞迴
- 迭代,類似學生在 `lab0-c` 裡撰寫的
```c
bool q_delete_dup(struct list_head *head)
{
if (!head || list_empty(head) || list_is_singular(head))
return false;
struct list_head *prev = NULL, *curr = head->next, *next = curr->next;
while (curr != head && next != head) {
element_t *q_curr = list_entry(curr, element_t, list);
element_t *q_next = list_entry(next, element_t, list);
while (next != head && !strcmp(q_curr->value, q_next->value)) {
prev = curr;
list_del(next);
q_release_element(list_entry(next, element_t, list));
next = curr->next;
q_next = list_entry(next, element_t, list);
}
if (prev) {
list_del(prev);
q_release_element(list_entry(prev, element_t, list));
prev = NULL;
}
curr = next;
next = curr->next;
}
return true;
}
```
---
## 測驗 `3`
:::info
題目
- [ ] 補完程式碼
- [ ] 解釋上述程式碼的運作,撰寫完整的測試程式,指出其中可改進之處並實作
- [ ] 在 Linux 核心找出 LRU 相關程式碼並探討
:::
---
## 測驗 `4`
:::info
題目
- [ ] 補完程式碼
- [ ] 解釋上述程式碼的運作,撰寫完整的測試程式,指出其中可改進之處並實作
- [ ] 嘗試用 Linux 核心風格的 hash table 重新實作上述程式碼
:::