2022q1 Homework1 (quiz1)

contributed by < linjohnss >

測驗題目

測驗 1

Leetcode 1. Two Sum

變數結構

變數節有 4 種：

map_t 為 hash table 的結構，包含一個紀錄 hash index 數量的 bit 以及指向一個 hlist_head 陣列的指標。
hlist_node 與 hlist_head 為 Linux 核心中專門為了 hash table 而設計的資料結構，由一個 hlist_head 連接多個 hlist_node 組成非環狀 doubly linked list。
hash_key 為儲存陣列值作為 key 以及其 index 作為 data。可以藉由 container_of 讀取。

struct hlist_node { struct hlist_node *next, **pprev; };
struct hlist_head { struct hlist_node *first; };
typedef struct { int bits; struct hlist_head *ht; } map_t;
struct hash_key {
    int key;
    void *data;
    struct hlist_node node;
};

初始化 hash table

graphviz 參考 cy023 quiz1 開發筆記

先利用 malloc 配置 map_t 空間。
接著配置 hlist_head 指標陣列，共 MAP_HASH_SIZE(map->bits) 個 bucket，全指向 NULL。
MAP_HASH_SIZE 是用於取得 bucket 數量（2^bit）。

#define MAP_HASH_SIZE(bits) (1 << bits)

map_t *map_init(int bits) {
    map_t *map = malloc(sizeof(map_t));
    if (!map)
        return NULL;

    map->bits = bits;
    map->ht = malloc(sizeof(struct hlist_head) * MAP_HASH_SIZE(map->bits));
    if (map->ht) {
        for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++)
            (map->ht)[i].first = NULL;
    } else {
        free(map);
        map = NULL;
    }
    return map;
}

hash function

#define GOLDEN_RATIO_32 0x61C88647
static inline unsigned int hash(unsigned int val, unsigned int bits) {
    /* High bits are more random, so use them. */
    return (val * GOLDEN_RATIO_32) >> (32 - bits);
}

搜尋 hash table

走訪相同 index 的 list，並找出 key 相同的 hash_key，若沒找到則回傳 NULL。

static struct hash_key *find_key(map_t *map, int key) {
    struct hlist_head *head = &(map->ht)[hash(key, map->bits)];
    for (struct hlist_node *p = head->first; p; p = p->next) {
        struct hash_key *kn = container_of(p, struct hash_key, node);
        if (kn->key == key)
            return kn;
    }
    return NULL;
}

void *map_get(map_t *map, int key)
{
    struct hash_key *kn = find_key(map, key);
    return kn ? kn->data : NULL;
}

新增 hash table

用 find_key 進行搜尋，如果有找到相同的 key 表示已經找到答案，故 return。
若沒找到，則配置一個 hash_key 空間，

void map_add(map_t *map, int key, void *data)
{
    struct hash_key *kn = find_key(map, key);
    if (kn)
        return;

    kn = malloc(sizeof(struct hash_key));
    kn->key = key, kn->data = data;

    struct hlist_head *h = &map->ht[hash(key, map->bits)];
    struct hlist_node *n = &kn->node, *first = h->first;
    AAA;
    if (first)
        first->pprev = &n->next;
    h->first = n;
    BBB;
}

AAA：n->next = first
BBB：n->pprev = &h->first

先將 n 放在 first 的前面。
若 first 不為 NULL，則需將 first->pprev 指向 &n-<next。
最後將 h 與 n 連接，即可完成 node 的插入。

    n->next = first;
    if (first)
        first->pprev = &n->next;
    h->first = n;
    n->pprev = &h->first;
}

清除 hash table

走訪整個 hash table 並將記憶體空間釋放掉。

走訪 map->ht 的每一個元素，用 head 指向該元素。
走訪每一個元素的 list，用 kn 指向 hash_key、n 指向該 node。
把 n 從 list remove。
釋放 kn 的記憶體空間。
走訪完整個 hash table 後，釋放 map 的記憶體空間。

void map_deinit(map_t *map)
{
    if (!map)
        return;

    for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++) {
        struct hlist_head *head = &map->ht[i];
        for (struct hlist_node *p = head->first; p;) {
            struct hash_key *kn = container_of(p, struct hash_key, node);
            struct hlist_node *n = p;
            p = p->next;

            if (!n->pprev) /* unhashed */
                goto bail;

            struct hlist_node *next = n->next, **pprev = n->pprev;
            *pprev = next;
            if (next)
                next->pprev = pprev;
            n->next = NULL, n->pprev = NULL;

        bail:
            free(kn->data);
            free(kn);
        }
    }
    free(map);
}

Two Sum

給定一個 int 陣列 num、陣列大小 numSize、目標總和 target 與回傳結果的陣列大小 returnSize。
先初始化 hash table(map_init)，並配置結果陣列的記憶體空間 ret。
找尋 hash table 中是否有與 target 差值的 index 存在。
若存在，則回傳匹配的兩個元素所組成的陣列指標（ret）。
若不存在，則將該元素加入 hash table。
找完所有元素都沒有匹配的話，清除整個 hash table。

int *twoSum(int *nums, int numsSize, int target, int *returnSize)
{
    map_t *map = map_init(10);
    *returnSize = 0;
    int *ret = malloc(sizeof(int) * 2);
    if (!ret)
        goto bail;

    for (int i = 0; i < numsSize; i++) {
        int *p = map_get(map, target - nums[i]);
        if (p) { /* found */
            ret[0] = i, ret[1] = *p;
            *returnSize = 2;
            break;
        }

        p = malloc(sizeof(int));
        *p = i;
        map_add(map, nums[i], p);
    }

bail:
    map_deinit(map);
    return ret;
}

Linux 核心原始程式碼 include/linux/hashtable.h

根據 include/linux/types.h 中定義，hlist_head 以及 hlist_node 的結構如下：

struct hlist_head {
	struct hlist_node *first;
};

struct hlist_node {
	struct hlist_node *next, **pprev;
};

宣告一個有 2^bits 個元素的 hash table，並初始化每一個元素。

#define DEFINE_HASHTABLE(name, bits)						\
	struct hlist_head name[1 << (bits)] =					\
			{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }

根據 hash.h 的註解，提到將輸入乘以一個大的奇數並取高位，可以有效分散數列。其中 GOLDEN_RATIO phi = (sqrt(5)-1)/2 或其負數具有特別好的特性，而取負數 (1 - phi) = phi**2 = (3 - sqrt(5))/2 更方便實做乘法，且對成果不影響，故選擇以下數字作為 GOLDEN_RATIO。

#define GOLDEN_RATIO_32 0x61C88647
#define GOLDEN_RATIO_64 0x61C8864680B583EBull

測驗 2

LeetCode 82. Remove Duplicates from Sorted List II

recursive

#include <stddef.h>

struct ListNode {
    int val;
    struct ListNode *next;
};

struct ListNode *deleteDuplicates(struct ListNode *head)
{
    if (!head)
        return NULL;

    if (COND1) {
        /* Remove all duplicate numbers */
        while (COND2)
            head = head->next;
        return deleteDuplicates(head->next);
    }

    head->next = deleteDuplicates(head->next);
    return head;
}

判斷 head->val 與 head->neat->val 是否相等。
COND1：head->neat && head->val == head->next->val
COND2：head->next && head->val == head->next->val

iterative

struct ListNode* deleteDuplicates(struct ListNode* head){
    if (!head)
        return NULL;
    bool found = false;
    struct ListNode **target = &head;
    while ((*target)->next) {
        if((*target)->val == (*target)->next->val) {
            struct ListNode **indirect = &head;
            while (*indirect != *target)
                indirect = &(*indirect)->next;
            *indirect = (*target)->next;
            found = true;
            target = &(*indirect);
        } else if (found) {
            struct ListNode **indirect = &head;
            while (*indirect != *target)
                indirect = &(*indirect)->next;
            *indirect = (*target)->next;
            found = false;
            target = &(*indirect);
        } else {
            target = &(*target)->next;   
        }
    }
    return head;
}

Linux 核心

struct ListNode *deleteDuplicates(struct list_head *head)
{
    if (!head || list_empty(head))
        return NULL;
    element_t *entry, *safe;
    bool found = NULL;  // use left to delete the last duplicate element
    list_for_each_entry_safe (entry, safe, head, list) {
        if (&safe->list != head && !strcmp(entry->value, safe->value)) {
            list_del(&entry->list);
            free(entry->val);
            free(entry)
            found = true;
        } else if (left) {
            list_del(&entry->list);
            free(entry->val);
            free(entry);
            found = false;
        }
    }
    return true;
}

測驗 3

Leetcode 146. LRU Cache

lRUCacheCreate

用 malloc 配置一個 LRUCache 空間，capacity 為 cache 的容量。
初始化 obj->dhead 與 obj->hheads

LRUCache *lRUCacheCreate(int capacity)
{   
    LRUCache *obj = malloc(sizeof(*obj) + capacity * sizeof(struct list_head));
    obj->count = 0;
    obj->capacity = capacity;
    INIT_LIST_HEAD(&obj->dhead);
    for (int i = 0; i < capacity; i++)
        INIT_LIST_HEAD(&obj->hheads[i]);
    return obj;
}

lRUCacheFree

用 list_for_each_entry_safe 走訪 obj->dhead，釋放 list 節點的記憶體。
釋放全部節點的記憶體後，釋放整個 cache 的記憶體。

void lRUCacheFree(LRUCache *obj)
{       
    LRUNode *lru, *n;
    MMM1 (lru, n, &obj->dhead, dlink) {
        list_del(&lru->dlink);
        free(lru);
    }
    free(obj); 
}

MM1：list_for_each_entry_safe

lRUCacheGet

先計算 key 的 hash。
用 list_for_each_entry_safe 走訪 obj->hheads[hash] 的 list。
尋找相同 key 的節點，並將其移動至最前端，然後回傳 lru->value。
若沒找到，則回傳 -1。

int lRUCacheGet(LRUCache *obj, int key)
{
    LRUNode *lru;
    int hash = key % obj->capacity;
    MMM2 (lru, &obj->hheads[hash], hlink) {
        if (lru->key == key) {
            list_move(&lru->dlink, &obj->dhead);
            return lru->value;
        }
    }
    return -1;
}

MM2：list_for_each_entry

lRUCachePut

計算 key 的 hash。
先利用 list_for_each_entry 走訪 obj->hheads[hash] 的 list，找到是否已經存在，若存在則用 list_move 把 lru->dlink 移到最前端。
若為第一次加入，判斷 cache 是否滿了，如果沒滿，則配置 lru 的空間，並計算 count++。
如果滿了，則移除 list 的最後一個節點（LRU）。
將新的節點加到 list 的最前端。

void lRUCachePut(LRUCache *obj, int key, int value)
{
    LRUNode *lru;
    int hash = key % obj->capacity;
    MMM3 (lru, &obj->hheads[hash], hlink) {
        if (lru->key == key) {
            list_move(&lru->dlink, &obj->dhead);
            lru->value = value;
            return;
        }
    }

    if (obj->count == obj->capacity) {
        lru = MMM4(&obj->dhead, LRUNode, dlink);
        list_del(&lru->dlink);
        list_del(&lru->hlink);
    } else {
        lru = malloc(sizeof(LRUNode));
        obj->count++;
    }
    lru->key = key;
    list_add(&lru->dlink, &obj->dhead);
    list_add(&lru->hlink, &obj->hheads[hash]);
    lru->value = value;
}

MM3：list_for_each_entry
MM4：list_last_entry

測試程式

void show(int value, int key)
{
    if (value != -1)
        printf("GET [%d, %d]\n", key, value);
    else
        printf("Key %d not found!\n", key);
};
int main()
{
    int capacity = 2;
    LRUCache *cache = lRUCacheCreate(capacity);
    lRUCachePut(cache, 1, 1);
    lRUCachePut(cache, 2, 2);
    show(lRUCacheGet(cache, 1), 1);
    lRUCachePut(cache, 3, 3);
    show(lRUCacheGet(cache, 2), 2);
    lRUCachePut(cache, 4, 4);
    show(lRUCacheGet(cache, 1), 1);
    show(lRUCacheGet(cache, 3), 3);
    show(lRUCacheGet(cache, 4), 4);
    lRUCacheFree(cache);
    
    return 0;
}

$ ./q3
GET [1, 1]
Key 2 not found!
Key 1 not found!
GET [3, 3]
GET [4, 4]

Linux 核心 LRU (include/linux/lru_cache.h)

在 linux/mm/swap.c 中有一個函式 folio_add_lru，其運用到 LRU 相關的實做。

void folio_add_lru(struct folio *folio)
{
	struct pagevec *pvec;

	VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio);
	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);

	folio_get(folio);
	local_lock(&lru_pvecs.lock);
	pvec = this_cpu_ptr(&lru_pvecs.lru_add);
	if (pagevec_add_and_need_flush(pvec, &folio->page))
		__pagevec_lru_add(pvec);
	local_unlock(&lru_pvecs.lock);
}
EXPORT_SYMBOL(folio_add_lru);

參考資料
https://www.kernel.org/doc/gorman/html/understand/understand013.html
https://elixir.bootlin.com/linux/latest/source/mm/swap.c#L458

測驗 4

Leetcode 128. Longest Consecutive Sequence

find

計算 hash，需取絕對值。
走訪 heads[hash] 找到是否有相同的值。
若有則回傳該 node ; 否則回傳 NULL。

static struct seq_node *find(int num, int size, struct list_head *heads)
{   
    struct seq_node *node;
    int hash = num < 0 ? -num % size : num % size;
    list_for_each_entry (node, &heads[hash], link) {
        if (node->num == num)
            return node;
    }
    return NULL;
}

longestConsecutive

先配置 head 的記憶體空間（指標陣列），並初始化每個元素。
將每一個 input 元素加入到 hash table(若已經存在，則不加入)。
尋找最長的連續數列
1. 先判斷該數字是否存在 hash table
2. 若存在，則向它的左右節點開始尋找最常的連續數列
3. 判斷是否為最常的連續數列
4. 回傳最長的數列

int longestConsecutive(int *nums, int n_size)
{
    int hash, length = 0;
    struct seq_node *node;
    struct list_head *heads = malloc(n_size * sizeof(*heads));

    for (int i = 0; i < n_size; i++)
        INIT_LIST_HEAD(&heads[i]);

    for (int i = 0; i < n_size; i++) {
        if (!find(nums[i], n_size, heads)) {
            hash = nums[i] < 0 ? -nums[i] % n_size : nums[i] % n_size;
            node = malloc(sizeof(*node));
            node->num = nums[i];
            list_add(&node->link, &heads[hash]);
        }
    }

    for (int i = 0; i < n_size; i++) {
        int len = 0;
        int num;
        node = find(nums[i], n_size, heads);
        while (node) {
            len++;
            num = node->num;
            list_del(&node->link);

            int left = num, right = num;
            while ((node = find(LLL, n_size, heads))) {
                len++;
                list_del(&node->link);
            }

            while ((node = find(RRR, n_size, heads))) {
                len++;
                list_del(&node->link);
            }

            length = len > length ? len : length;
        }
    }

    return length;
}

LLL： –left
RRR：++right

測試程式

int main()
{
    int num[10] = {0,3,7,2,5,8,4,6,0,1};
    int *ptr = num;
    int length = longestConsecutive(num, 10);
    printf("%d\n", length);
    return 0;
}

$ ./q4
9

2022q1 Homework1 (quiz1)

測驗 1

變數結構

初始化 hash table

hash function

搜尋 hash table

新增 hash table

清除 hash table

Two Sum

Linux 核心原始程式碼 include/linux/hashtable.h

測驗 2

recursive

iterative

Linux 核心

測驗 3

lRUCacheCreate

lRUCacheFree

lRUCacheGet

lRUCachePut

測試程式

Linux 核心 LRU (include/linux/lru_cache.h)

測驗 4

find

longestConsecutive

測試程式

tags: linux kernel linux2022

Read more

[rhashtable](https://elixir.bootlin.com/linux/latest/source/include/linux/rhashtable.h) 研究

2022q1 Homework5 (quiz6)

2022q1 Homework5 (quiz5)

2022q1 Homework3 (fibdrv)

tags: `linux kernel` `linux2022`