2022q1 Homework1 (quiz1)

contributed by < rickywu0421 >

第一題 LeetCode 1. Two Sum

Linux style hash table

struct hlist_node {
    struct hlist_node *next;
    struct hlist_node **pprev;
};

struct hlist_head {
    struct hlist_node *first;
};

本題使用到 linux kernel 中 include/linux/types.h 內定義的 hlist_node 與 hlist_head 結構體。

hlist_head 為了節省 hash table 的空間之開銷,只包含一個成員 first, 指向第一個 hlist_node, hlist_node 則為雙向的 linked list, 其特別之處為：不若另一個表示 linkd list 的結構體 list_head 的 prev 成員為指標, 其指向前一個 node 的成員 (精準的來說是指向前一個成員的 next field 的記憶體位址) 為指標的指標 pprev, 其目的在進行 delete 操作時不需要考慮前一個節點是否為 head。

以下為示意圖：

Image Not Showing Possible Reasons

The image file may be corrupted
The server hosting the image is unavailable
The image path is incorrect
The image format is not supported

Learn More →

以下 function 部分已由 include/linux/list.h 中定義的操作改寫。

map_init

根據 bits , 建立含有 \(2^{bits}\) 個成員的 hash table。

map_t *map_init(int bits)
{
    map_t *map = (map_t *) malloc(sizeof(map_t));
    if (!map)
        return NULL;

    map->bits = bits;
    map->ht = (struct hlist_head *) 
        malloc(sizeof(struct hlist_head) * MAP_HASH_SIZE(map->bits));
    if (!map->ht) {
        for (int i = 0; i < MAP_HASH_SIZE(map->bits); i++)
            (map->ht)[i].first = NULL;
    }
    else {
        free(map);
        map = NULL;
    }
    return map;
}

find_key

首先, 透過 hash() 得到 key 對應的 hash value, 再從 hash table 中得到對應的 hlist_head, 接著走訪此 hlist, 找尋是否有值為 key 的 node。

static struct hash_key *find_key(map_t *map, int key)
{
    struct hlist_head *h = &(map->ht)[hash(key, map->bits)];
    struct hash_key *kn;

    hlist_for_each_entry (kn, h, node) {
        if (kn->key == key)
            return kn;
    }

    return NULL;
}

map_get

呼叫 find_key(), 若得到的回傳值不為 NULL, 則 return index 值。

void *map_get(map_t *map, int key)
{
    struct hash_key *kn = find_key(map, key);
    return kn ? kn->data : NULL;
}

map_add

新增一個 hash_key, 並根據 key 值將此 hash_key 的 list insert 到對應的 hash table 的 head。

void map_add(map_t *map, int key, void *data)
{
    struct hash_key *kn = find_key(map, key);
    if (kn)
        return;

    kn = (struct hash_key *) malloc(sizeof(struct hash_key));
    kn->key = key;
    kn->data = data;

    struct hlist_head *h = &map->ht[hash(key, map->bits)];
    struct hlist_node *n = &kn->node;

    hlist_add_head(n, h);
}

map_deinit

Deallocate hash_table。

void map_deinit(map_t *map)
{
    if (!map)
        return;

    for (int i = 0; i < MAP_HASH_SIZE; i++) {
        struct hlist_head *h = &map->ht[i];
        struct hlist_node *n, *next;
        struct hash_key *kn, *safe;
        
        hlist_for_each_entry_safe (kn, safe, h, node) {
            free(kn->data);
            free(kn);
        }
    }

    free(map);
}

two_sum

主要邏輯之處理。

根據 nums[i], 找到對應的 value = target - nums[i], 並從 hash table 中尋找是否有 key == value 的 node, 若有, 則呼叫 map_get() 得到該 key 的 index 值並 return; 否則透過 map_add() 將 nums[i] 與對應的 index i 加入到 hash table 中。

int *two_sum(int *nums, int numsSize, int target, int *returnSize)
{
    map_t *map = map_init(10);
    *returnSize = 0;
    int *ret = (int *) malloc(sizeof(int) * 2);
    if (!ret)
        goto bail;

    for (int i = 0; i < numsSize; i++) {
        int *p = map_get(map, target - nums[i]);
        if (p) {
            *returnSize = 2;
            ret[0] = i;
            ret[1] = *p;
            break;
        }

        p = (int *) malloc(sizeof(int));
        *p = i;
        map_add(map, nums[i], p);
    }

bail:
    map_deinit(map);
    return ret;
}

第二題 LeetCode 82. Remove Duplicates from Sorted List II

這題的目標是把 list 中擁有相同 val 的 node 從 list 中拿掉, 要注意的是首個 val 相同的 node 也必須拿掉, 示意圖如下：

Image Not Showing Possible Reasons

The image file may be corrupted
The server hosting the image is unavailable
The image path is incorrect
The image format is not supported

Learn More →

遞迴呼叫版

題目中第一個 if 為判斷節點是否為 NULL, 其作為遞迴呼叫時的終止條件之一。

第二個 if 則為判斷此節點與下一個節點的 val 是否相同, 若是, 則進入 while 迴圈 (這邊我覺得程式可以將 if 拿掉, 效果一樣且程式碼更為精簡), 逐次進行相同判斷, 直到條件為否時即到達重複 val 之連續節點的最後一個 node, 傳入 head->next 進行遞迴呼叫。

若第二個 if 判斷為否, 則設定 head->next 為遞迴呼叫的回傳值, 並回傳 head。

#include <stddef.h>

struct ListNode {
    int val;
    struct ListNode *next;
};

struct ListNode *deleteDuplicates(struct ListNode *head)
{
    if (!head)
        return NULL;

    if (head->next && head->val == head->next->val) {
        /* Remove all duplicate numbers */
        while (head->next && head->val == head->next->val) 
            head = head->next;
        return deleteDuplicates(head->next);
    }

    head->next = deleteDuplicates(head->next);
    return head;
}

迭代 + linux kernel style 版

使用 list_for_each_entry() 走訪整個 linked list, 比較 prev_value 是否等於 pos->value, 若是, 則表示此 Node 為 duplicate node, 此時呼叫 list_del() 將該 Node 的 list unlink。除此之外, 因為第一個重複的 Node 也要被 unlink, 因此需要記錄其為 start, 並在造訪下一種 value 的節點或造訪到 head 時 unlink start。

此函式存在重複之程式碼, 還需要再想辦法精簡之。

#include <stdbool.h>

bool q_delete_dup(struct list_head *head)
{
    element_t *pos, *start = NULL;
    char *prev_value = "";

    if (!head)
        return false;

    list_for_each_entry (pos, head, list) {
        if (!strcmp(pos->value, prev_value)) {
            /* Record the start queue element of the duplicate set,
               which will be delete later */
            if (!start)
                start = list_entry(pos->list.prev, element_t, list);

            list_del(&pos->list);
        } else {
            prev_value = pos->value;

            /* Defered deletion of the start of the duplicate set */
            if (start) {
                list_del(&start->list);
                start = NULL;
            }
        }
    }

    /* Defered deletion of the start of the duplicate set */
    if (start)
        list_del(&start->list);

    return true;
}

第三題 LeetCode 146. LRU Cache

本題需實作資料結構, 使其能夠表現一個 LRU cache。所有的資料在 cache 中以 <key, value> pair 呈現, 以下以 "data" 作為 <key, value> pair 之簡稱。

結構體定義

LRUCache

typedef struct {
    int capacity, count;
    struct list_head dhead, hheads[];
} LRUCache;

LRUCache 為 cache 之表示, 除了記錄 cache 容量與
目前的資料數, 還包含使其成為 curcular doubly-linked list 的 list_head 結構體: dhead 與 hheads。

dhead maintain 了一個 LRU cache, 在 list 的成員從頭到尾依序表示新到舊的 data。

hheads 則作為一個 hash table , 優點是使 list 的搜尋效率增加, 缺點是 list node 在插入時增加了插入到 hash table 的成本。此成員亦為一個 zero-length array: hheads, 其存在使得此結構體成為 variable-length object。需注意的是此用法為 GNU C 的 extension, 參考資料: Zero-Length。

LRUNode

typedef struct {
    int key, value;
    struct list_head dlink, hlink;
} LRUNode;

LRUNode 則為 list 中代表 data 的結構體, 其中 dlink 連接到 LRUCache->dhead 作為頭的 list, hlink 連接到 hheads[hash] 作為頭的 list (hash 由 key 值決定, 透過 hash function 產生, 本題使用最 naive 的手法, 即 hash = key % capacity, 此手法可能沒辦法有效的避免碰撞)

以下示意圖為 capacity 為 100 的情況下, 依序新增 100, 0, 50 三個節點時 dlink 與 hlink 的鏈結情況：

dlink:

hlink:

lRUCacheCreate

配置 LRUCache 的記憶體空間, 要注意的是不能只是 malloc(sizeof(LRUCache), 因為這樣沒有配置到最後一個成員 hheads (zero-lengh array), 要再加上 sizeof(struct list_head) * capacity 的長度。

最後透過 INIT_LIST_HEAD() 將 dlink, hlink 分別指向自身。

LRUCache *lRUCacheCreate(int capacity)
{
    LRUCache *obj = (LRUCache *) 
        malloc(sizeof(LRUCache) + sizeof(struct list_head) * capacity);
    obj->count = 0;
    obj->capacity = capacity;
    INIT_LIST_HEAD(&obj->dhead);
    for (int i = 0; i < capacity; i++)
        INIT_LIST_HEAD(&obj->hheads[i]);
    return obj;
}

lRUCacheFree

透過 list_for_each_entry_safe() 走訪 dlink, unlink 節點並 free() LRUNode。

void lRUCacheFree(LRUCache *obj)
{
    LRUNode *lru, *n;
    list_for_each_entry_safe (lru, n, &obj->dhead, dlink) {
        list_del(lru);
        free(lru);
    }
    free(obj);
}

lRUCacheGet

首先算出 key 的 hash 值, 再從對應 hash 值的 hlink 找尋是否存在 key 值, 若是, 則因為此節點是 least recently used 的節點, 需先將此節點透過 list_move() 移動到 dlink 的頭, 再回傳此節點的 value 值。

int lRUCacheGet(LRUCache *obj, int key)
{
    LRUNode *lru;
    int hash = key % obj->capacity;
    list_for_each_entry (lru, &obj->hheads[hash], hlink) {
        if (lru->key == key) {
            list_move(&lru->dlink, &obj->deads);
            return lru->value;
        }
    }
    return -1;
}

lRUCachePut

首先算出 key 的 hash 值, 再從對應 hash 值的 hlink 找尋是否已經存在 key 值, 若是, 則因為此節點是 least recently used 的節點, 需先將此節點透過 list_move() 移動到 dlink 的頭, 更新此節點的 value 並 return;

若否, 則表示 link 中尚未存在 key 值的節點, 此時需先檢查 cache->count 是否已達到 cache->capacity, 若是, 則需要 remove 最久沒被用到的節點, 即 dlink 的最末端, 透過 list_last_entry() 可以得到 dlink 最末端的節點的 LRUNode, 替換其 key 與 value, 並將其插入至 dlink 的頭;

若否, 則需 malloc() 一個 LRUNode, 並填入 key 與 value, 再將其插入至 dlink 的頭。

void lRUCachePut(LRUCache *obj, int key, int value)
{
    LRUNode *lru;
    int hash = key % obj->capacity;
    list_for_each_entry (lru, &obj->hheads[hash], hlink) {
        if (lru->key == key) {
            list_move(&lru->dlink, &obj->dhead);
            lru->value = value;
            return;
        }
    }

    if (obj->count == obj->capacity) {
        lru = list_last_entry(&obj->dhead, LRUNode, dlink);
        list_del(&lru->dlink);
        list_del(&lru->hlink);
    } else {
        lru = (LRUNode *) malloc(sizeof(LRUNode));
        obj->count++;
    }

    lru->key = key;
    lru->value = value;
    list_add(&lru->dlink, &obj->dhead);
    list_add(&lru->hlink, &obj->hheads[hash]);
}