2023q1 Homework1 (lab0)

contributed by < D4nnyLee >

開發環境

$ gcc --version
gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0

$ lscpu
Architecture:            x86_64
  CPU op-mode(s):        32-bit, 64-bit
  Address sizes:         48 bits physical, 48 bits virtual
  Byte Order:            Little Endian
CPU(s):                  12
  On-line CPU(s) list:   0-11
Vendor ID:               AuthenticAMD
  Model name:            AMD Ryzen 5 7600X 6-Core Processor
    CPU family:          25
    Model:               97
    Thread(s) per core:  2
    Core(s) per socket:  6
    Socket(s):           1
    Stepping:            2
    Frequency boost:     enabled
    CPU max MHz:         5452.7339
    CPU min MHz:         3000.0000
    BogoMIPS:            9382.64

開發過程

實作 queue.c

q_insert_head, q_insert_tail

/* Insert an element at head of queue */
bool q_insert_head(struct list_head *head, char *s)
{
    element_t *e;
    if (!head || !(e = element_new(s)))
        return false;

    list_add(&e->list, head);

    return true;
}

/* Insert an element at tail of queue */
bool q_insert_tail(struct list_head *head, char *s)
{
    element_t *e;
    if (!head || !(e = element_new(s)))
        return false;

    list_add_tail(&e->list, head);

    return true;
}

這兩個函式的操作非常相似，差別只在於將節點新增到 queue 的開頭或是結尾。
其中與新增一個 element_t 相關的程式碼在兩個函式中是一模一樣的，並且邏輯較複雜，因此我將這個部份重新寫成一個函式 element_new，避免以後每次想要改相關的邏輯時都必須在兩個函式中修改一模一樣的程式碼，並且也可以使 q_insert_head 及 q_insert_tail 更容易閱讀。

q_delete_dup
在枚舉節點的過程中，每次都將目前的節點與前一個節點比較是否相等，如果不相等，就將前一個節點刪除。

從 leetcode 題目敘述中可以知道，如果數字 1 出現兩次以上，則所有的 1 都要被刪除，而不是只留下一個，但是若是使用上述枚舉的方法，最後還會留下一個 1。
因此這邊使用一個 bool 變數 remove_on_diff，讓我在遇到目前的節點與前一個節點不同時，有辦法知道該不該刪除前一個節點。

/* Delete all nodes that have duplicate string */
bool q_delete_dup(struct list_head *head)
{
    // https://leetcode.com/problems/remove-duplicates-from-sorted-list-ii/
    if (!head)
        return false;

    /**
     * Remove previous entry when previous entry
     * is different to current entry.
     */
    bool remove_on_diff = false;

    for (struct list_head *node = head->next->next; node != head;
         node = node->next) {
        element_t *prev = list_entry(node->prev, element_t, list);
        element_t *curr = list_entry(node, element_t, list);

        bool same = strcmp(prev->value, curr->value) == 0;
        if (same || remove_on_diff) {
            list_del(&prev->list);
            q_release_element(prev);

            /**
             * same == true:
             *   Set remove_on_diff to true so that we can
             *   delete the last duplicate.
             *
             * same == false:
             *   We have deleted the last duplicate from list,
             *   thus we set remove_on_diff to false.
             */
            remove_on_diff = same;
        }
    }

    if (remove_on_diff) {
        element_t *last = list_last_entry(head, element_t, list);
        list_del(&last->list);
        q_release_element(last);
    }

    return true;
}

q_free
原本的實作如下：

/* Free all storage used by queue */
void q_free(struct list_head *head)
{
    if (!head)
        return;

    element_t *entry, *safe;
    list_for_each_entry_safe (entry, safe, head, list) {
        list_del(&entry->list);
        q_release_element(entry);
    }
    free(head);
}

參考 yanjiew 後，發現因為是用 list_for_each_entry_safe，所以不需要特別使用 list_del 來將節點從 list 中分離，也不會因此造成任何的 use after free，因此可以直接將 list_del 刪除。

q_sort
在讀完了Merge Sort 與它的變化之後，因為實驗結果顯示即使串列的順序是相反的，分割成排序好的串列的速度跟分割成單一節點的速度其實差不多，因此我打算直接實作分割成排序好的串列的版本。

/**
 * Divide list into sorted segments
 */

size_t size = 0;

struct list_head array[100000];
while (!list_empty(head)) {
    element_t *entry;
    list_for_each_entry (entry, head, list) {
        element_t *next = list_entry(entry->list.next, element_t, list);

        if (entry->list.next == head ||
            strcmp(entry->value, next->value) > 0)
            break;
    }

    list_cut_position(&array[size++], head, &entry->list);
}

上面的程式碼中雖然 next 有可能會是 list_entry(head, element_t, list)，這是一個不該以 element_t * 存取的位址，但是我在後面的 if 判斷式中有寫到如果下一個節點是 head 的話就會直接跳出迴圈，因此這樣寫其實不會發生預期外的結果。

此作法只能在長度不超過 100000 時正常運作，需要尋找其他解決方法。

至於合併的部分為了實作方便就用了分段合併的寫法。

/**
 * Merge sorted segments
 */

for (size_t offset = 1; offset < size; offset <<= 1) {
    for (size_t i = offset; i < size; i += (offset << 1)) {
        __q_merge_two_sorted_lists(&array[i - offset], &array[i]);
    }
}

list_splice_tail(&array[0], head);

__q_merge_two_sorted_lists 因為之後實作 q_merge 時應該還會用到，因此特別寫成另一個函式。

/**
 * __q_merge_two_sorted_lists() - Merge two sorted lists
 * @lhs: header of sorted list
 * @rhs: header of sorted list
 *
 * Nodes in @rhs will merged into @lhs in the ascending order.
 */
static void __q_merge_two_sorted_lists(struct list_head *lhs,
                                       struct list_head *rhs)
{
    LIST_HEAD(tmp);

    element_t *lhs_entry;
    list_for_each_entry (lhs_entry, lhs, list) {
        if (list_empty(rhs))
            break;

        element_t *rhs_entry;
        list_for_each_entry (rhs_entry, rhs, list) {
            if (strcmp(lhs_entry->value, rhs_entry->value) <= 0)
                break;
        }

        /**
         * insert the range [rhs->next, rhs_entry->list.prev] before
         * lhs_entry->list
         */

        list_cut_position(&tmp, rhs, rhs_entry->list.prev);

        lhs_entry->list.prev->next = tmp.next;
        tmp.next->prev = lhs_entry->list.prev;

        lhs_entry->list.prev = tmp.prev;
        tmp.prev->next = &lhs_entry->list;
    }

    list_splice_tail_init(rhs, lhs);
}

上面的程式碼在 git commit 時遇到了以下錯誤：

Following files need to be cleaned up:
queue.c
queue.c:280:35: error: Uninitialized variable: lhs_entry->value [uninitvar]
            if (strcmp(lhs_entry->value, rhs_entry->value) <= 0)
                                  ^
queue.c:275:23: note: Assuming condition is false
        if (list_empty(rhs))
                      ^
queue.c:280:35: note: Uninitialized variable: lhs_entry->value
            if (strcmp(lhs_entry->value, rhs_entry->value) <= 0)
                                  ^
queue.c:280:53: error: Uninitialized variable: rhs_entry->value [uninitvar]
            if (strcmp(lhs_entry->value, rhs_entry->value) <= 0)
                                                    ^
queue.c:317:31: error: Uninitialized variable: entry->list [uninitvar]
            element_t *next = list_entry(entry->list.next, element_t, list);
                              ^
list.h:169:19: warning: Uninitialized variable: head->next [uninitvar]
    return (head->next == head);
                  ^
queue.c:314:12: note: Assuming condition is false
    while (!list_empty(head)) {
           ^
queue.c:337:23: note: Calling function 'list_splice_tail', 1st argument 'array' value is <Uninit>
    list_splice_tail(&array[0], head);
                      ^
list.h:226:20: note: Calling function 'list_empty', 1st argument 'list' value is <Uninit>
    if (list_empty(list))
                   ^
list.h:169:19: note: Uninitialized variable: head->next
    return (head->next == head);
                  ^

Fail to pass static analysis.

仔細看了一下發現都是關於沒有初始化的錯誤，因此做了以下的修改來讓 cppcheck 可以通過檢查。

diff --git a/queue.c b/queue.c
index 9d928da..de8fa09 100644
--- a/queue.c
+++ b/queue.c
@@ -270,12 +270,12 @@ static void __q_merge_two_sorted_lists(struct list_head *lhs,
 {
     LIST_HEAD(tmp);

-    element_t *lhs_entry;
+    element_t *lhs_entry = NULL;
     list_for_each_entry (lhs_entry, lhs, list) {
         if (list_empty(rhs))
             break;

-        element_t *rhs_entry;
+        element_t *rhs_entry = NULL;
         list_for_each_entry (rhs_entry, rhs, list) {
             if (strcmp(lhs_entry->value, rhs_entry->value) <= 0)
                 break;
@@ -311,8 +311,11 @@ void q_sort(struct list_head *head)
     size_t size = 0;

     struct list_head array[100000];
+    for (int i = 0; i < 100000; ++i)
+        INIT_LIST_HEAD(&array[i]);
+
     while (!list_empty(head)) {
-        element_t *entry;
+        element_t *entry = NULL;
         list_for_each_entry (entry, head, list) {
             element_t *next = list_entry(entry->list.next, element_t, list);

2023q1 Homework1 (lab0)

開發環境

開發過程

實作 queue.c

Read more

2023q1 Homework2 (quiz2)

2021 Week14 - Number Theory

2021 Week4 - Search

2021 Week5 - Sorting