2022q1 Homework1 (quiz1)

contributed by < zoanana990 >

GitHub

測驗 `1`

題目

請補完程式碼
解釋上述程式碼運作原理
研讀 Linux 核心原始程式碼 include/linux/hashtable.h 及對應的文件 How does the kernel implements Hashtables?，解釋 hash table 的設計和實作手法，並留意到 tools/include/linux/hash.h 的 GOLDEN_RATIO_PRIME，探討其實作考量

本題分兩個部分進行討論：

linux kernel 實作雜湊表的資料結構、雜湊函數
本題實作 two sum 的程式碼

雜湊表

基本資料結構

struct hlist_node { struct hlist_node *next, **pprev; };

struct hlist_head { struct hlist_node *first; };

typedef struct { int bits; struct hlist_head *ht; } map_t;

struct hash_key {
    int key;
    void *data;
    struct hlist_node node;
};

雜湊表初始化

雜湊表的尺寸定義如下

#define MAP_HASH_SIZE(bits)  (1 << bits)

尺寸為 n bits ↔
$2^{n}$ 格

雜湊表初始化函數

map_t *map_init(int bits) {
    // malloc a space for the hash table
    map_t *map = malloc(sizeof(map_t));
    if (!map)
        return NULL;

    map->bits = bits;
    
    // the size of hashtable us 2^n bits
    map->ht = malloc(sizeof(struct hlist_head) * MAP_HASH_SIZE(map->bits));
    if (map->ht) {
        
        // let every list head point to NULL
        for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++)
            (map->ht)[i].first = NULL;
    } else {
        free(map);
        map = NULL;
    }
    return map;
}

圖表化：初始化結果

雜湊函數

瞭解基本的雜湊表資料結構後，現在進行雜湊函數的頗析

#define GOLDEN_RATIO_32 0x61C88647
static inline unsigned int hash(unsigned int val, unsigned int bits) {
    /* High bits are more random, so use them. */
    return (val * GOLDEN_RATIO_32) >> (32 - bits);
}

在了解什麼是 0x61c88647 之前，我們先要了解什麼是雜湊函數(hash function)，雜湊函數目的是將 <key, value> 進行映射 (mapping) ，期望輸入 key 可以利用雜湊函數得到期望的 value ，這個結果可以得到

本題使用的雜湊函數為斐波那契法，其中 0x61c88647 為 INT32 除以黃金比例所得到的結果，其可以讓一個亂數序列取得映射函數，並可以最大化分散輸入值，如下圖

Image Not Showing Possible Reasons

The image file may be corrupted
The server hosting the image is unavailable
The image path is incorrect
The image format is not supported

Learn More →

如果是使用餘數法，則有可能造成一個鍵 (key) 對應多個值 (value) ，這樣就不是一個好的雜湊函數，如下所示：

Image Not Showing Possible Reasons

The image file may be corrupted
The server hosting the image is unavailable
The image path is incorrect
The image format is not supported

Learn More →

測試原始碼，這邊用 python 比較方便，因此用 python 撰寫：

import numpy as np
import matplotlib.pyplot as plt

if __name__ == '__main__':
	N = 30
	bit = 10
	x = np.random.randint(10000,size=N)
	print(x)
	y = np.right_shift(np.uint64(x * 0x61C88647),np.uint64(22))
	z = x % 4
	plt.scatter(x, y) 
	plt.show()

如果使用其他的數字呢？不使用黃金比例對於雜湊函數有什麼影響？
這邊推薦一個連結

雜湊函數

static struct hash_key *find_key(map_t *map, int key) {
    // find the list_head by hash function
    struct hlist_head *head = &(map->ht)[hash(key, map->bits)];
    for (struct hlist_node *p = head->first; p; p = p->next) {
        struct hash_key *kn = container_of(p, struct hash_key, node);
        if (kn->key == key)
            return kn;
    }
    return NULL;
}

上面我們知道，hlist_head 其實是一個陣列，因此我們可以在

O (1)

的時間內利用雜湊函數找到對應的節點，然後對後續的節點進行走訪取得想要的資料，其圖示化結果如下：

假設今天使用雜湊函數算出來是 key_1 的話，會進行節點走訪，並且對應其鍵(key)值有沒有對應，若值正確，則可以使用 *map_get 函數進行資料回傳

void *map_get(map_t *map, int key)
{
    struct hash_key *kn = find_key(map, key);
    return kn ? kn->data : NULL;
}

插入節點

void map_add(map_t *map, int key, void *data)
{
    struct hash_key *kn = find_key(map, key);
    if (kn)
        return;

    kn = malloc(sizeof(struct hash_key));
    kn->key = key, kn->data = data;

    struct hlist_head *h = &map->ht[hash(key, map->bits)];
    struct hlist_node *n = &kn->node, *first = h->first;
    
    AAA;    
    if (first)
        first->pprev = &n->next;
    h->first = n;
    BBB;
}

這邊的程式碼是想要插入節點，需要考慮到 pprev 是使用間接指標，以及程式碼中需要考慮到 first 存在的問題

if (first)
    first->pprev = &n->next;

也就是說，插入的節點是在 h->first 的位置，因此可以 AAA 可以填入 n->next=first ， BBB 則可以填入 n->pprev = &h->first; 因為插入節點的前一個位置是 hlist_head ，填完程式碼如下

void map_add(map_t *map, int key, void *data)
{
    struct hash_key *kn = find_key(map, key);
    if (kn)
        return;

    kn = malloc(sizeof(struct hash_key));
    kn->key = key, kn->data = data;

    struct hlist_head *h = &map->ht[hash(key, map->bits)];
    struct hlist_node *n = &kn->node, *first = h->first;
    
    n->next = first;    
    if (first)
        first->pprev = &n->next;
    h->first = n;
    n->pprev = &h->first;
}

接下來是將 hash table 刪除，這個過程比較繁瑣，但是大體上是與 linked list 一樣的。先把各個節點獨立，將指標項釋放，再將節點釋放，節點都是放完成後將 hlist_head 釋放，最後將一開始的 map 釋放，就完成了

完整程式碼

#include <stddef.h>
#include <stdlib.h>

struct hlist_node { struct hlist_node *next, **pprev; };
struct hlist_head { struct hlist_node *first; };
typedef struct { int bits; struct hlist_head *ht; } map_t;

#define MAP_HASH_SIZE(bits) (1 << bits)

map_t *map_init(int bits) {
map_t *map = malloc(sizeof(map_t));
if (!map)
return NULL;

map->bits = bits;
map->ht = malloc(sizeof(struct hlist_head) * MAP_HASH_SIZE(map->bits));
if (map->ht) {
    for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++)
        (map->ht)[i].first = NULL;
} else {
    free(map);
    map = NULL;
}
return map;

}

struct hash_key {
int key;
void *data;
struct hlist_node node;
};

#define container_of(ptr, type, member)
({
void *__mptr = (void *) (ptr);
((type *) (__mptr - offsetof(type, member)));
})

#define GOLDEN_RATIO_32 0x61C88647
static inline unsigned int hash(unsigned int val, unsigned int bits) {
/* High bits are more random, so use them. */
return (val * GOLDEN_RATIO_32) >> (32 - bits);
}

static struct hash_key *find_key(map_t *map, int key) {
struct hlist_head *head = &(map->ht)[hash(key, map->bits)];
for (struct hlist_node *p = head->first; p; p = p->next) {
struct hash_key *kn = container_of(p, struct hash_key, node);
if (kn->key == key)
return kn;
}
return NULL;
}

void *map_get(map_t *map, int key)
{
struct hash_key *kn = find_key(map, key);
return kn ? kn->data : NULL;
}

void map_add(map_t *map, int key, void *data)
{
struct hash_key *kn = find_key(map, key);
if (kn)
return;

kn = malloc(sizeof(struct hash_key));
kn->key = key, kn->data = data;

struct hlist_head *h = &map->ht[hash(key, map->bits)];
struct hlist_node *n = &kn->node, *first = h->first;

n->next = first;    
if (first)
    first->pprev = &n->next;
h->first = n;
n->pprev = &h->first;

}

void map_deinit(map_t *map)
{
if (!map)
return;

for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++) {
    struct hlist_head *head = &map->ht[i];
    for (struct hlist_node *p = head->first; p;) {
        struct hash_key *kn = container_of(p, struct hash_key, node);
        struct hlist_node *n = p;
        p = p->next;

        if (!n->pprev) /* unhashed */
            goto bail;

        struct hlist_node *next = n->next, **pprev = n->pprev;
        *pprev = next;
        if (next)
            next->pprev = pprev;
        n->next = NULL, n->pprev = NULL;

    bail:
        free(kn->data);
        free(kn);
    }
}
free(map);

}

int *twoSum(int *nums, int numsSize, int target, int *returnSize)
{
map_t *map = map_init(10);
*returnSize = 0;
int *ret = malloc(sizeof(int) * 2);
if (!ret)
goto bail;

for (int i = 0; i < numsSize; i++) {
    int *p = map_get(map, target - nums[i]);
    if (p) { /* found */
        ret[0] = i, ret[1] = *p;
        *returnSize = 2;
        break;
    }

    p = malloc(sizeof(int));
    *p = i;
    map_add(map, nums[i], p);
}

bail:
map_deinit(map);
return ret;
}

測驗 `2`

題目

嘗試避免遞迴，寫出同樣作用的程式碼
以類似 Linux 核心的 circular doubly-linked list 改寫，撰寫遞迴和迭代 (iterative) 的程式碼

原始程式碼：

#include <stddef.h>

struct ListNode {
    int val;
    struct ListNode *next;
};

struct ListNode* deleteDuplicates(struct ListNode* head){
    if (!head)
        return NULL;
    if (/*COND1*/) {
        /* Remove all duplicate numbers */
        while (/*COND2*/){
            head = head->next;
        }
        return deleteDuplicates(head->next);
    }
    head->next = deleteDuplicates(head->next);
    return head;
}

ANS:

CON1 = head->next && head->val == head->next->val
CON2 = head->next && head->val == head->next->val

嘗試避免遞迴，寫出同樣作用的程式碼：
- 將遞回法改為疊代法，使用間接指標(indirect pointer)進行實作

struct ListNode* deleteDuplicates(struct ListNode* head){
    if(!head || !head->next) return head;
    struct ListNode **indirect=&head;
    while(*indirect){
        // dupliacte node
        if((*indirect)->next && (*indirect)->next->val==(*indirect)->val){
            struct ListNode *temp = *indirect;
            while(temp && temp->val == (*indirect)->val)
                temp = temp->next;
            *indirect = temp;
        }
        // normal traversal
        else indirect = &(*indirect)->next;
    }
    return head;
}

以類似 Linux 核心的 circular doubly-linked list 改寫，撰寫遞迴和迭代 (iterative) 的程式碼

遞迴
迭代，類似學生在 lab0-c 裡撰寫的

bool q_delete_dup(struct list_head *head)
{
    if (!head || list_empty(head) || list_is_singular(head))
        return false;

    struct list_head *prev = NULL, *curr = head->next, *next = curr->next;
    while (curr != head && next != head) {
        element_t *q_curr = list_entry(curr, element_t, list);
        element_t *q_next = list_entry(next, element_t, list);
        while (next != head && !strcmp(q_curr->value, q_next->value)) {
            prev = curr;
            list_del(next);
            q_release_element(list_entry(next, element_t, list));
            next = curr->next;
            q_next = list_entry(next, element_t, list);
        }
        if (prev) {
            list_del(prev);
            q_release_element(list_entry(prev, element_t, list));
            prev = NULL;
        }
        curr = next;
        next = curr->next;
    }
    return true;
}

測驗 `3`

題目

補完程式碼
解釋上述程式碼的運作，撰寫完整的測試程式，指出其中可改進之處並實作
在 Linux 核心找出 LRU 相關程式碼並探討

測驗 `4`

題目

補完程式碼
解釋上述程式碼的運作，撰寫完整的測試程式，指出其中可改進之處並實作
嘗試用 Linux 核心風格的 hash table 重新實作上述程式碼

2022q1 Homework1 (quiz1)

測驗 1

雜湊表

基本資料結構

雜湊表初始化

雜湊函數

雜湊函數

測驗 2

測驗 3

測驗 4

Read more

biRISC-V

Linux patch 心得

Linux 檔案系統

RISCV ISA

測驗 `1`

測驗 `2`

測驗 `3`

測驗 `4`