Try   HackMD

2022q1 Homework1 (quiz1)

contributed by < yaohwang99 >

quiz1
graphviz tutorial

1. Two Sum

Class note

In class note, Uduru0522 showed that it is more efficient to delete a node if we use indirect pointer pprev.
Consider the version with the following data structure:

struct hlist_head {
    struct hlist_node *first;
};

struct hlist_node {
    struct hlist_node *next, *pprev;
};






G



list_head

hlist_head

first



node_1

hlist_node

prev

next



list_head->node_1:m





node_2

hlist_node

prev

next



node_1:n->node_2:m





NULL_1
NULL



node_1:p->NULL_1





node_2:p->node_1:m





node_3

hlist_node

prev

next



node_2:n->node_3:m





node_3:p->node_2:m





NULL_2
NULL



node_3:n->NULL_2





  • prev of the first node is NULL because it can only point to struct hlist_node.
  • prev and next points to struct hlist_node.

In the above version, traversing the nodes would be more efficient because it is a direct pointer. However, for a decent has table, collision rarely occurs so the linked list will not be long. Therefore, the advantage of deleting a node outweigh the disadvantage of traversing the nodes.


Analyze map_add()

void map_add(map_t *map, int key, void *data) { struct hash_key *kn = find_key(map, key); if (kn) return; kn = malloc(sizeof(struct hash_key)); kn->key = key, kn->data = data; struct hlist_head *h = &map->ht[hash(key, map->bits)]; struct hlist_node *n = &kn->node, *first = h->first; n->next = first;//AAA if (first) first->pprev = &n->next; h->first = n; n->pprev = &h->first;//BBB }






G



map_t
map



hash_table

bits

ht



map_t->hash_table





list_head

...

...

hlist_head.first

...

...



hash_table:ht->list_head:h





node_1

key

data

prev

next
hlist_node



list_head:m->node_1





node_1:prev->list_head





node_2

key

data

prev

next
hlist_node



node_1:next->node_2





node_2:prev->node_1:next





NULL
NULL



node_2:next->NULL





Above graph is initial state








G



map_t
map



hash_table

bits

ht



map_t->hash_table





list_head

...

...

hlist_head.first

...

...



hash_table:ht->list_head:h





node_1

key

data

prev

next
hlist_node



list_head:m->node_1





node_1:prev->list_head





node_2

key

data

prev

next
hlist_node



node_1:next->node_2





node_2:prev->node_1:next





NULL
NULL



node_2:next->NULL





node_3

key

data

prev

next



kn
kn



kn->node_3





h
h



h->list_head





n
n



n->node_3:prev





first
first



first->node_1





kn = malloc(sizeof(struct hash_key)); kn->key = key, kn->data = data; struct hlist_head *h = &map->ht[hash(key, map->bits)]; struct hlist_node *n = &kn->node, *first = h->first;







G



map_t
map



hash_table

bits

ht



map_t->hash_table





list_head

...

...

hlist_head.first

...

...



hash_table:ht->list_head:h





node_1

key

data

prev

next
hlist_node



list_head:m->node_1





node_1:prev->list_head





node_2

key

data

prev

next
hlist_node



node_1:next->node_2





node_2:prev->node_1:next





NULL
NULL



node_2:next->NULL





node_3

key

data

prev

next



node_3:next->node_1





kn
kn



kn->node_3





h
h



h->list_head





n
n



n->node_3:prev





first
first



first->node_1





n->next = first;//AAA







G



map_t
map



hash_table

bits

ht



map_t->hash_table





list_head

...

...

hlist_head.first

...

...



hash_table:ht->list_head:h





node_3

key

data

prev

next



list_head:m->node_3





node_1

key

data

prev

next
hlist_node



node_2

key

data

prev

next
hlist_node



node_1:next->node_2





node_1:prev->node_3:next





node_2:prev->node_1:next





NULL
NULL



node_2:next->NULL





node_3:next->node_1





kn
kn



kn->node_3





h
h



h->list_head





n
n



n->node_3:prev





first
first



first->node_1





if (first) first->pprev = &n->next; h->first = n;







G



map_t
map



hash_table

bits

ht



map_t->hash_table





list_head

...

...

hlist_head.first

...

...



hash_table:ht->list_head:h





node_3

key

data

prev

next



list_head:m->node_3





node_1

key

data

prev

next
hlist_node



node_2

key

data

prev

next
hlist_node



node_1:next->node_2





node_1:prev->node_3:next





node_2:prev->node_1:next





NULL
NULL



node_2:next->NULL





node_3:prev->list_head:m





node_3:next->node_1





kn
kn



kn->node_3





h
h



h->list_head





first
first



first->node_1





n
n



n->node_3:prev





n->pprev = &h->first;//BBB

main()

  1. Create map pointer to new map 210 bucket
  2. Set *returnSize to 0 and space for 2 integer.
  3. If nums does not have an answer, returnSize will still be 0 and return ret, implying that *ret contains garbage value.
  4. Iterate through nums, use targer - nums[i] as key to find the index of the corresponding number.
  5. If found, return the answer. If not found, add nums[i] and current index i to map, continue to next element of nums.
int *twoSum(int *nums, int numsSize, int target, int *returnSize)
{
    map_t *map = map_init(10);
    *returnSize = 0;
    int *ret = malloc(sizeof(int) * 2);
    if (!ret)
        goto bail;

    for (int i = 0; i < numsSize; i++) {
        int *p = map_get(map, target - nums[i]);
        if (p) { /* found */
            ret[0] = i, ret[1] = *p;
            *returnSize = 2;
            break;
        }

        p = malloc(sizeof(int));
        *p = i;
        map_add(map, nums[i], p);
    }

bail:
    map_deinit(map);
    return ret;
}

In map_deinit(), I don't understand why n->pprev can be NULL.
cy023 pointed out that it can pass leetcode test even if it is commented.

if (!n->pprev) /* unhashed */ goto bail;

Read hash.h

Note:
Initialize hash using designated initializer.

#define DEFINE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }

Hash function:
Linux kernel will check the word size of the machine and use the corresponding hash function.

If key is 64 bits, use hash_long(), if key is 32 bits, use hash_32().
If word size is 32 bits, hash_long() is hash_32(), if word size is 64 bits, hash_long() is hash_64()
If word size is 32 bits but wants to use hash_64(), hash_32((u32)val ^ __hash_32(val >> 32), bits) produces the same result.

The hash function val * GOLDEN_RATIO_32 >> (32 - bits) uses the first bits number of bits of val * GOLDEN_RATIO_32 as bucket index.
For example, bits = 3, val = 5, then val * GOLDEN_RATIO_32 == 0xE8EA9F63 (discard overflowed bits), bucket index = 0xE.

2. Remove Duplicates from Sorted List II

Sample code

  1. If head->val equals head->next->value, move head to next node until the statement is false or end of the list.
  2. Use head->next as new head of the sublist, repeat the algorithm.
#include <stddef.h>

struct ListNode {
    int val;
    struct ListNode *next;
};

struct ListNode *deleteDuplicates(struct ListNode *head)
{
    if (!head)
        return NULL;

    if (head->next && head->value == head->next->value) {
        /* Remove all duplicate numbers */
        while (head->next && head->value == head->next->value))
            head = head->next;
        return deleteDuplicates(head->next);
    }

    head->next = deleteDuplicates(head->next);
    return head;
}

In the sample code, the removed node is not freed and is not pointed by any pointer.


Iterative method

Create struct ListNode *removeSublist(struct ListNode *head) which removes all the repeated value until the first non-repeated node.

struct ListNode *removeSublist(struct ListNode *head){
    if (!head)
        return NULL;
    while (head->next && head->value == head->next->value)){
        if (head->next && head->value == head->next->value) {
            /* Remove all duplicate numbers */
            while (head->next && head->value == head->next->value))
                head = head->next;
            head = head->next;
        }
    }
    return head;
}

For example, the following list







G



11

1



12

1



11->12





21

2



12->21





22

2



23

2



22->23





21->22





3

3



23->3





4

4



3->4





becomes:







G



3

3



4

4



3->4





after calling struct ListNode *removeSublist(struct ListNode *head).


Using struct ListNode *removeSublist(struct ListNode *head), implement the algorithm by iteration.

  1. Set head to the first non-repeated node by calling removeSublist().
  2. Walk through the list with prev initialized to head
  3. Return head.
#include <stddef.h>

struct ListNode {
    int val;
    struct ListNode *next;
};
struct ListNode *removeSublist(struct ListNode *head){
    if (!head)
        return NULL;
    while (head->next && head->value == head->next->value)){
        if (head->next && head->value == head->next->value) {
            /* Remove all duplicate numbers */
            while (head->next && head->value == head->next->value))
                head = head->next;
            head = head->next;
        }
    }
    return head;
}
struct ListNode *deleteDuplicates(struct ListNode *head)
{
    if (!head)
        return NULL;
    head = removeSublist(head);
    
    struct ListNode *prev= head;
    while (prev) {
        prev -> next = removeSublist(prev->next);
        prev = prev->next;
    }
    
    return head;
}


Circular doublely-linked list

  1. Convert the list to non-circular list by head->prev->next=NULL
  2. Treat the list as singly-linked list and use head->next as head node for deleteDuplicates(), which could be either the recursive or the iterative version.
  3. Restore the prev pointer in each node, set next of last node to head to convert back to circular list.
struct d_ListNode {
    int val;
    struct list_head *next, *prev;
};
struct list_head *d_deleteDuplicates(struct ListNode *head)
{
    if (!head || list_empty(head))
        return head;
    head->prev->next=NULL;
    head->next = deleteDuplicates(head->next);
    struct list_head *curr = head;
    while(curr->next) {
        curr->next->prev=curr;
        curr = curr->next;
    }
    head->prev = curr;
    curr->next = head;
    return head;
}

3. LRU Cache

Define data structure

  1. dhead and hlink forms a list by every node in cache. dhead is the head and the tail is the least recently used node.
  2. hhead[] is each bucket of the hash table, each bucket points to nodes connected by hlink.
  3. capacity is the max number of node the cache can contain. count records current number of nodes in the cache.
typedef struct {
    int capacity, count;             
    struct list_head dhead, hheads[];
} LRUCache;
    
typedef struct {
    int key, value;
    struct list_head hlink, dlink;
} LRUNode;






G


cluster_1

LRUNode


cluster_2

LRUNode


cluster_0

LRUCache



LRUCache

capacity

count

dhead

hheads[0]

hheads[1]

...

hheads[capacity-1]



LRUNode0

key

value

dlink

hlink



LRUCache:dhead->LRUNode0:dlink






LRUCache:hhead0->LRUNode0:hlink






LRUNode1

key

value

dlink

hlink



LRUCache:hhead1->LRUNode1:hlink






LRUNode0:dlink->LRUNode1:dlink






NULL2
NULL



LRUNode0:hlink->NULL2





NULL
NULL



LRUNode1:dlink->NULL





NULL3
NULL



LRUNode1:hlink->NULL3






Create Cache

  1. Create obj pointer to newly allocated memory.
  2. Initialize each element and return obj.
LRUCache *lRUCacheCreate(int capacity)
{   
    LRUCache *obj = malloc(sizeof(*obj) + capacity * sizeof(struct list_head));
    obj->count = 0;
    obj->capacity = capacity;
    INIT_LIST_HEAD(&obj->dhead);
    for (int i = 0; i < capacity; i++)
        INIT_LIST_HEAD(&obj->hheads[i]);
    return obj;
}

When allocating memory, sizeof(*obj) is equaled to 2*sizeof(int)+sizeof(struct list_head).


Free cache

Because dhead list contains every node, we can iterate through it and delete (remove and then free) each node.
We need to delete the current node in each iteration, so we need the safe pointer to the next entry.
MMM1: list_for_each_entry_safe

void lRUCacheFree(LRUCache *obj)
{       
    LRUNode *lru, *n;
    MMM1 (lru, n, &obj->dhead, dlink) {
        list_del(&lru->dlink);
        free(lru);
    }
    free(obj); 
}   

Get

  1. Use modulo as hash function.
  2. Iterate through the nodes in the corresponding bucket.
  3. If found, move the node to the head of the dhead list (most recently used) and return the value.
  4. If not found, return -1.

MMM2: list_for_each_entry

int lRUCacheGet(LRUCache *obj, int key)
{
    LRUNode *lru;
    int hash = key % obj->capacity;
    MMM2 (lru, &obj->hheads[hash], hlink) {
        if (lru->key == key) {
            list_move(&lru->dlink, &obj->dhead);
            return lru->value;
        }
    }
    return -1;
}

Put

Similar to lRUCacheGet.

  1. Use modulo as hash function.
  2. Iterate through the nodes in the corresponding bucket.
  3. If found, move the node to the head of the dhead list (most recently used) and set the value in the node to input value.
  4. If not found:
    1. If the cache is full, remove the last node in the dhead list (least recently used).
    2. Add new key and value to cache.

MMM3: list_for_each_entry
MMM4: list_last_entry

void lRUCachePut(LRUCache *obj, int key, int value)
{
    LRUNode *lru;
    int hash = key % obj->capacity;
    MMM3 (lru, &obj->hheads[hash], hlink) {
        if (lru->key == key) {
            list_move(&lru->dlink, &obj->dhead);
            lru->value = value;
            return;
        }
    }

    if (obj->count == obj->capacity) {
        lru = MMM4(&obj->dhead, LRUNode, dlink);
        list_del(&lru->dlink);
        list_del(&lru->hlink);
    } else {
        lru = malloc(sizeof(LRUNode));
        obj->count++;
    }
    lru->key = key;
    list_add(&lru->dlink, &obj->dhead);
    list_add(&lru->hlink, &obj->hheads[hash]);
    lru->value = value;
}

Test result with leetcode

Add the following code at the top of the script.

#define container_of(ptr, type, member)                            \
    __extension__({                                                \
        const __typeof__(((type *) 0)->member) *__pmember = (ptr); \
        (type *) ((char *) __pmember - offsetof(type, member));    \
    })
#define list_entry(node, type, member) container_of(node, type, member)
#define list_last_entry(head, type, member) \
    list_entry((head)->prev, type, member)
#define list_for_each_entry(entry, head, member)                       \
    for (entry = list_entry((head)->next, __typeof__(*entry), member); \
         &entry->member != (head);                                     \
         entry = list_entry(entry->member.next, __typeof__(*entry), member))
#define list_for_each_entry_safe(entry, safe, head, member)                \
    for (entry = list_entry((head)->next, __typeof__(*entry), member),     \
        safe = list_entry(entry->member.next, __typeof__(*entry), member); \
         &entry->member != (head); entry = safe,                           \
        safe = list_entry(safe->member.next, __typeof__(*entry), member))
struct list_head {
    struct list_head *prev;
    struct list_head *next;
};
typedef struct {
    int capacity, count;             
    struct list_head dhead, hheads[];
} LRUCache;
    
typedef struct {
    int key, value;
    struct list_head hlink, dlink;
} LRUNode;
void INIT_LIST_HEAD(struct list_head *head)
{
    head->next = head;
    head->prev = head;
}
void list_add(struct list_head *node, struct list_head *head)
{
    struct list_head *next = head->next;

    next->prev = node;
    node->next = next;
    node->prev = head;
    head->next = node;
}
void list_del(struct list_head *node)
{
    struct list_head *next = node->next;
    struct list_head *prev = node->prev;

    next->prev = prev;
    prev->next = next;
}
void list_move(struct list_head *node, struct list_head *head)
{

    list_del(node);
    list_add(node, head);
}

Linux lru_cache.h

I only understood a little.

lru_cache:

lru: pointer to list of unused but ready to be reused or recycled element, the least recently used item is kept at lru->prev
free: pointer to list of unused and ready to be recycled element
in_use: pointer to list of element in use
to_be_changed: pointer to
lc_cache: pointer to struct kmem_cache
element_size: size of tracked objects
element_off: offset of struct lc_element member in the tracked object
nr_elements: number of elements (indices)
max_pending_changes: allow to accumulate number of changes
pending_changes: number of elements currently on to_be_changed list
used: number of elements currently on in_use list
hits, misses, starving, locked, changed:number of elements
flags: flag bits (enum) for lru_cache
lc_private:
name:
lc_slot:nr_elements there
lc_element:nr_elements there
lc_find: find which lc_slot bucket the element is in using lc_new_number as key.
lc_get():

  1. Find the element using lc_find().
  2. If found, increase hits and other statistics then move the element to in_use list.
  3. If not found, increase miss.

lc_set(): associate index with label, move element to lru list or free list.
lc_put(): decrease refcnt, if refcnt == 0, move element to lru list.







G


cluster1

lru_cache


cluster2

kmem_cache


cluster0

lc_element


cluster3

lc_element


cluster5

slot


cluster4

element



lru_cache

lru

free

in_use

to_be_changed

lc_cache

element_size

element_off

nr_elements

max_pending_changes

pending_changes

used

hits

misses

starving

locked

changed

flags

lc_private

name

lc_slot

lc_clcment



kmem_cache

kmem_cache



lru_cache:lc_cache->kmem_cache





slot

hlist_head[0]

hlist_head[1]

...



lru_cache:lc_slot->slot





element

[0]

[1]

...



lru_cache:lc_element->element





lc_element0

colision

list

refcnt

lc_index

lc_number

lc_new_number



lc_element3

colision

list

refcnt

lc_index

lc_number

lc_new_number



element:0->lc_element0





element:1->lc_element3






4. Longest Consecutive Sequence

Example code

  1. Iterate through nums and put each num in hash table.
  2. Because of the hash function, consecutive num will be place in adjacent bucket.
  3. For each key:
    1. Check if previous bucket contains key-1, if so, repeat for key-2 and so on until the key is not found, record how many are found.
      LLL: --left
    2. Check if next bucket contains key+1, if so, repeat for key+2 and so on until the key is not found, record how many are found.
      RRR: --right
  4. Return the longest number.
#include <stdio.h>
#include <stdlib.h>
#include "list.h"

struct seq_node {
    int num;
    struct list_head link;
};                                                                                                                                                            
            
static struct seq_node *find(int num, int size, struct list_head *heads)
{   
    struct seq_node *node;
    int hash = num < 0 ? -num % size : num % size; //bit-wise op
    list_for_each_entry (node, &heads[hash], link) {
        if (node->num == num)
            return node;
    }
    return NULL;
}

int longestConsecutive(int *nums, int n_size)
{
    int hash, length = 0;
    struct seq_node *node;
    struct list_head *heads = malloc(n_size * sizeof(*heads));

    for (int i = 0; i < n_size; i++)
        INIT_LIST_HEAD(&heads[i]);

    for (int i = 0; i < n_size; i++) {
        if (!find(nums[i], n_size, heads)) {
            hash = nums[i] < 0 ? -nums[i] % n_size : nums[i] % n_size; //bit-wise op
            node = malloc(sizeof(*node));
            node->num = nums[i];
            list_add(&node->link, &heads[hash]);
        }
    }

    for (int i = 0; i < n_size; i++) {
        int len = 0;
        int num;
        node = find(nums[i], n_size, heads);
        while (node) {
            len++;
            num = node->num;
            list_del(&node->link);

            int left = num, right = num;
            while ((node = find(LLL, n_size, heads))) {
                len++;
                list_del(&node->link);
            }

            while ((node = find(RRR, n_size, heads))) {
                len++;
                list_del(&node->link);
            }

            length = len > length ? len : length;
        }
    }

    return length;
}

Improvement

The above method takes

O(n2) because for each number we have to iterate again to know the length of each consecutive sequence
We can achieve
O(n)
if we record the consecutive sequence when inserting the number in the bucket.







G


cluster0

map


cluster1

setNode



map

map[0]

...

map[map_size-1]



setNode

key

min

max

link



Consider the following example:







G


cluster0

map


cluster1

setNode


cluster2

setNode


cluster4

setNode


cluster5

setNode



map

map[0]

map[1]

map[2]

map[3]

map[4]

map[5]



setNode1

key=1

min=1

max=2

link



map:1->setNode1





setNode2

key=2

min=1

max=2

link



map:2->setNode2





setNode4

key=4

min=4

max=5

link



map:4->setNode4





setNode5

key=5

min=4

max=5

link



map:5->setNode5





If we insert 3, we need to updatemap[3], map[1] and map[5] and don't care about the other keys.
Because 1~5 is a known consecutive sequence, if 1~5 is inserted again, nothing will be changed.







G


cluster0

map


cluster2

setNode


cluster1

setNode


cluster3

setNode


cluster5

setNode


cluster4

setNode



map

map[0]

map[1]

map[2]

map[3]

map[4]

map[5]



setNode1

key=1

min=1

max=5

link



map:1->setNode1





setNode2

key=2

min=1

max=2

link



map:2->setNode2





setNode3

key=3

min=1

max=5

link



map:3->setNode3





setNode4

key=4

min=4

max=5

link



map:4->setNode4





setNode5

key=5

min=1

max=5

link



map:5->setNode5





#define container_of(ptr, type, member)                            \
    __extension__({                                                \
        const __typeof__(((type *) 0)->member) *__pmember = (ptr); \
        (type *) ((char *) __pmember - offsetof(type, member));    \
    })
struct hlist_node {
    struct hlist_node *next, **pprev;
};

struct hlist_head {
    struct hlist_node *first;
};


typedef struct{
    int key, min, max;
    struct hlist_node link;
} setNode;

struct hlist_head *map_init(size_t map_size) {
    struct hlist_head *map = malloc(map_size * sizeof(struct hlist_head *));
    for (size_t i = 0; i < map_size; i++){
        map[i].first = NULL;
    }
    return map;
}
void map_deinit(struct hlist_head *map , size_t map_size){
    for (size_t i = 0; i < map_size; i++){
        struct hlist_node *target = map[i].first;
        while (target) {
            map[i].first = target->next;
            free(container_of(target, setNode, link));
            target = map[i].first;
        }
    }
    free(map);
}
void map_add(struct hlist_head *map, int key, size_t map_size, int min, int max){
    int hash = key % map_size;
    struct hlist_node *head = map[hash].first;
    setNode * e = malloc(sizeof(setNode));

    e->min = min;
    e->max = max;
    e->key = key;
    
    e->link.next = head;
    if(head)
        head->pprev = &e->link.next;
    map[hash].first = &e->link;
    e->link.pprev = &map[hash].first;
}

setNode *map_find(struct hlist_head *map, int key, size_t map_size){
    int hash = key % map_size;
    struct hlist_node *head = map[hash].first;
    setNode *ret = NULL;
    for(;head; head = head->next) {
        ret = container_of(head, setNode, link);
        if (ret->key == key)
            return ret;
    }
        
    return NULL;
}

int longestConsecutive(int* nums, int numsSize){
    int ret = 0;
    size_t map_size = 50000;
    struct hlist_head *map = map_init(map_size);
    for(int i=0; i < numsSize; ++i) {
        if(map_find(map,nums[i],map_size))
            continue;
        setNode *set1 = map_find(map, nums[i]-1, map_size);
        setNode *set2 = map_find(map, nums[i]+1, map_size);

        int min = set1 ? set1->min : nums[i];
        int max = set2 ? set2->max : nums[i]; 

        map_add(map, nums[i], map_size, min, max);
        if (min != nums[i])
            map_add(map, min, map_size, min, max);
        if(max != nums[i])
            map_add(map, max, map_size, min, max);
        ret = ret > max - min + 1 ? ret : max - min + 1;
    }
    // map_deinit(map, map_size);
    return ret;
}

Test result