Try   HackMD

2024q1 Homework1 (lab0)

contributed by < williamlin0518 >

開發環境

$  gcc --version
gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
Copyright (C) 2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

$ lscpu
Architecture:            x86_64
  CPU op-mode(s):        32-bit, 64-bit
  Address sizes:         46 bits physical, 48 bits virtual
  Byte Order:            Little Endian
CPU(s):                  20
  On-line CPU(s) list:   0-19
Vendor ID:               GenuineIntel
  Model name:            12th Gen Intel(R) Core(TM) i7-12700K
    CPU family:          6
    Model:               151
    Thread(s) per core:  2
    Core(s) per socket:  10
    Socket(s):           1
    Stepping:            2
    BogoMIPS:            7219.20

指定的佇列操作

q_new

allocate 的翻譯是「配置」,以區格 dispatch (分配/分發) 的翻譯。

如果空間分配 失敗 回傳NULL
使用函式 INIT_LIST_HEAD 初始化

改進你的漢語表達。

struct list_head *q_new() {
    struct list_head *head = malloc(sizeof(struct list_head));
    if (!head) return NULL;

    INIT_LIST_HEAD(head); // Initialize the list to point to itself
    return head;
}

使用指定的程式碼縮排風格。

q_free

藉由 list_entry 巨集,從 list_head 推導出 element_t 結構開頭地址。移除該節點並釋放其空間

深入list_entry巨集,發現是根據 container_ofoffsetof 算出偏移量

offsetof : 將結構的起始位址設為0,得到的MEMBER位址實際上就等於偏移量

#define offsetof(TYPE, MEMBER) ((unsigned int) &((TYPE *)0)->MEMBER)

container_of : 從 current 的位址中減去 member 的偏移量,得到包含該 list 成員的 element_t的起始位址。


#define container_of(ptr,type,member)\
    __extension__({
    const __typeof__(((type *)0)->member) *__pmember =(ptr);
    (type*)((char*) __pmember - offsetof(type,member));
})
void q_free(struct list_head *head) {
    if (!head) return;

    struct list_head *current, *tmp;
    element_t *element;

    list_for_each_safe(current, tmp, head) {
        element = list_entry(current, element_t, list);
        list_del(current); // Remove from list
        free(element->value); 
        free(element);
    }

    free(head); // Free the queue head
}

HackMD 不是讓你張貼完整程式碼的地方,GitHub 才是!因此你在開發紀錄只該列出關鍵程式碼 (善用 diff 標示),可附上對應 GitHub commit 的超連結,列出程式碼是為了「檢討」和「便於他人參與討論」,不是用來「假裝自己有付出」

q_insert_head & q_insert_tail

使用函式 list_add 將節點加在 head 的下一個位置

Before list_add: HEAD -> A -> B -> HEAD

After list_add(new, HEAD): HEAD -> NEW -> A -> B -> HEAD

使用函式 list_add_tail 將節點加在 head 的前一個位置

Before list_add_tail: HEAD -> A -> B -> HEAD

After list_add_tail(new, HEAD): HEAD -> A -> B -> NEW -> HEAD

bool q_insert_head(struct list_head *head, char *s)
{
    if (!head || !s)
        return false;

    element_t *new_element = malloc(sizeof(element_t));
    if (!new_element)
        return false;

    new_element->value = strdup(s);
    if (!new_element->value) {
        free(new_element);
        return false;
    }

    list_add(&new_element->list, head);  // Insert at head
    return true;
}

q_remove_head & q_remove_tail

這邊要注意的是removedelete 的差別,別把資料free掉

移除第一個元素 (head->next) 並且將移除的資料複製到 buffer 中

element_t *q_remove_head(struct list_head *head, char *sp, size_t bufsize)
{
    if (!head || list_empty(head))
        return NULL;

    struct list_head *first = head->next;
    element_t *element = list_entry(first, element_t, list);

    list_del(first);  // Remove from list

    if (sp) {
        strncpy(sp, element->value, bufsize - 1);
        sp[bufsize - 1] = '\0';
    }

    return element;
}

移除最後一個元素 (head->prev) 並且將移除的資料複製到 buffer 中

element_t *q_remove_tail(struct list_head *head, char *sp, size_t bufsize)
{
    if (!head || list_empty(head))
        return NULL;

    struct list_head *last = head->prev;  // Assuming 'head' is circular
    element_t *element = list_entry(last, element_t, list);

    // Copy string to provided buffer
    if (sp) {
        strncpy(sp, element->value, bufsize - 1);
        sp[bufsize - 1] = '\0';
    }

    // Unlink and return the element
    list_del(last);
    return element;
}

HackMD 不是讓你張貼完整程式碼的地方,GitHub 才是!因此你在開發紀錄只該列出關鍵程式碼 (善用 diff 標示),可附上對應 GitHub commit 的超連結,列出程式碼是為了「檢討」和「便於他人參與討論」,不是用來「假裝自己有付出」

q_size

使用 list_for_each 遍歷整個佇列

q_delete_mid

原先使用 q_size 得到佇列大小,再遍歷一遍將中間值刪除

發現可以使用快慢指標,這樣可以減少一次遍歷

struct list_head *fast = head->next, *slow = head->next;
while (fast != head && fast->next != head) {
    fast = fast->next->next;
    slow = slow->next;
}

或使用指標分別往前 (forward) 及往後 (backward) 走,並找到中間的節點

struct list_head *forward = head->next;
struct list_head *backward = head->prev;

while (forward != backward && forward->prev != backward) {
    forward = forward->next;
    backward = backward->prev;
}

q_delete_dup

在排序的佇列中,移走重複內容的節點,針對每個元素,向後尋找擁有相同字串的元素。

bool q_delete_dup(struct list_head *head) {
    if (!head || list_empty(head) || list_is_singular(head)) {
        return false;
    }
    element_t *current_element, *next_element;
    list_for_each_entry_safe(current_element, next_element, head, list) {
        bool is_duplicate = false;
        
        // Keep removing next elements as long as they are duplicates
        while (&next_element->list != head && !strcmp(current_element->value, next_element->value)) {
            list_del(&next_element->list);
            free(next_element->value);
            free(next_element);
            is_duplicate = true;          
            next_element = list_entry(current_element->list.next, element_t, list);

        }
        // If the current element was part of a duplicate sequence, remove it as well
        if (is_duplicate) {
            list_del(&current_element->list);
            free(current_element->value);
            free(current_element);
        }
    }
    return true ;
}

q_reverse q_reverseK

使用 list_cut_position 將要反轉的部份切出來,反轉後再用 list_splice_init 接回原本佇列

這邊需要仔細看過 list_cut_positionlist_splice_init 才能了解如何串接,並注意裁切過後的位置,起初犯了一個錯誤,將*next_segment_start 設為 cut_point->next 便會少切一個位置

while (start->next != head) {
        struct list_head *cut_point = start;
        int count = 0;
        for (int i = 0; i < k-1 && cut_point->next != head; i++) {
            cut_point = cut_point->next;
            count++;
        }
        if (count< k-1) {
            break;  // Less than k elements left, no more reversing
        }
        struct list_head *next_segment_start = cut_point->next;  // Store the start of the next segment.
        INIT_LIST_HEAD(&temp_head);
        list_cut_position(&temp_list, start, cut_point);
        q_reverse(&temp_list);
        list_splice_init(&temp_list, start);
        start = next_segment_start;
    }

更新後的版本,參考 yeh-sudopao0626 後發現可以利用list_for_each_safe 將cut_point的後指標存起。

list_for_each_safe (cut_point, safe, head) {
        count++;
        if (count == k) {
            list_cut_position(&temp_list, start, cut_point);
            q_reverse(&temp_list);
            list_splice_init(&temp_list, start);
            count = 0;
            start = safe->prev;
        }
    }

sort

use list_move_tail 來排序佇列

void merge(struct list_head *head, struct list_head *left, struct list_head *right, bool descend) {
    while (!list_empty(left) && !list_empty(right)) {
        
        element_t *left_entry = list_entry(left->next, element_t, list);
        element_t *right_entry = list_entry(right->next, element_t, list);
        bool condition = descend ? 
                         (strcmp(left_entry->value, right_entry->value) >= 0) : 
                         (strcmp(left_entry->value, right_entry->value) <= 0);
        if (condition) {
            list_move_tail(left->next, head);
        } else {
            list_move_tail(right->next, head);
        }
    }

    // Append any remaining elements
    if (!list_empty(left)) {
        list_splice_tail(left, head);
    }
    if (!list_empty(right)) {
        list_splice_tail(right, head);
    }
}

HackMD 不是讓你張貼完整程式碼的地方,GitHub 才是!因此你在開發紀錄只該列出關鍵程式碼 (善用 diff 標示),可附上對應 GitHub commit 的超連結,列出程式碼是為了「檢討」和「便於他人參與討論」,不是用來「假裝自己有付出」

這邊要注意 list_cut_position(&left, head, slow); 會將head指向斷點右側,須將head段開,並使用right 指向右側佇列

你如何確保目前的測試涵蓋排序演算法的最差狀況?

q_ascend, q_descend

這邊的想法是利用雙向鏈結特性,只要往回走即可

int q_ascend(struct list_head *head)
{
    if (list_empty(head) || list_is_singular(head)) {
        return 0;  // No action needed if the list is empty or has only one
                   // node.
    }

    int count = 0;

    struct list_head *prev, *node = head->prev;
    element_t *last_entry = list_entry(node, element_t, list);
    char *min_value = last_entry->value;

    while (node != head) {
        element_t *entry = list_entry(node, element_t, list);
        prev = node->prev;  // Save the previous node before potentially
                            // deleting the current node.

        if (strcmp(entry->value, min_value) > 0) {
            list_del(node);
            free(entry->value);
            free(entry);

        } else {
            min_value = entry->value;
            count++;  // Increment the count of nodes that were not removed.
        }
        node = prev;
    }

    return count;  // Return the count of removed nodes.
}

q_merge

  • traverse (動詞) 和 traversal (名詞)

根據 Dictionary.com解釋: (作為及物動詞和不及物動詞都有類似的意思,以下列出作為及物動詞的寓意)

  • to pass or move over, along, or through.
  • to go to and fro over or along.

其實這意思很好懂,就像我們「走過」/「穿越」校園一般,於是 traverse a linked list 就會是「(用某種手段) 存取多個鏈結串列的節點」,但這裡卻沒有必要「所有」的範圍:英語的 "move over/through" 用於某個區域時,根本沒有這樣的隱含意義。如果將 traverse 翻譯為「遍歷」,就會導致「超譯」,也就是跳脫「直譯」和「意譯」。

當我們回頭看 "traverse" 所在的技術描述內容,例如 "traverse every node",若翻譯為「遍歷每個節點」,那麼既然都「遍」(意即「全面」、「到處」),又何來「每個」節點呢?於是,合理的翻譯應改為「逐一走訪每個節點」 —— 差異在哪?在 "traverse every node" 的應用場景中,可能是我們嘗試在鏈結串列尋找特定的節點內含的資料,一旦找到就停止,或者我們要偵測給定的鏈結串列是否包含環狀 (circular) 結構 ,並沒有真的要「遍」(到處/全面)「歷」(意即「經過」) 每個節點。在我們的用語中,要區分「意圖」(intention) 和「實際作用」(reaction),濫用「遍歷」會使得語意不清,從而難以推測英語原文的訴求。

還有個更重要的原因是,「遍歷」這詞已在理工領域存在,且廣泛使用,即「遍歷理論」(Ergodic theory),後者是研究具有不變測度 (invariant measure) 的動力系統及其相關問題的一個數學分支。 遍歷理論研究遍歷變換,由試圖證明統計物理中的遍歷假設 (Ergodic hypothesis) 演進而來。

在統計學中,若單一個物理系統在不同時間內重複相同的實驗 (如丟擲一枚硬幣),其取樣數據所得的統計結果 (如硬幣出現正面的機率) 和極多個完全相同的物理系統的系集 (如丟極多個相同的硬幣) 在同時作相同的實驗取樣數據的統計結果假設為相同時,此種假設即稱為「遍歷性假設」或「遍歷假設」。基於這個假設,對時間平均的統計方式及結果,便可由對系集的平均及統計方式得到。在一般物理系統中,尤其在統計力學範圖中,均採用此遍歷性假設為基本的假設。在流體力學中對亂流的實驗分析,亦是採用此假設。

遍歷 (Ergodic) 源於以下二個希臘詞:

  • ergon (對應英語的 work)
  • odos (對應英語的 path 或 way)

最初這是由奧地利物理學家波茲曼 (Ludwig Boltzmann) 於統計力學領域 (statistical mechanics) 提出的概念,其一廣為人知的結果是:在經過長時間後,時間平均將會趨近空間平均。此事實在動力系統扮演極重要的角色,在隨機分析領域亦然。

因此,若貿然將 traverse 翻譯為「遍歷」,一來語意不清,二來增加和理工多項領域專業人員的溝通成本,實在得不償失。

資訊科技詞彙翻譯

這邊我善用了在 q_sort 裡使用的 merge 函式,所以我遍歷 所有佇列,並且倆倆合併

int q_merge(struct list_head *head, bool descend) {
    if (list_empty(head) || list_is_singular(head)) {
        return 0; // No action needed if the list is empty or has only one element.
    }
    queue_contex_t *first_qctx = list_first_entry(head, queue_contex_t, chain);

    // Prepare a temporary list for iterative merging.
    struct list_head new_head;
    INIT_LIST_HEAD(&new_head);
    struct list_head merged;
    INIT_LIST_HEAD(&merged);

    queue_contex_t *entry, *safe;

    // Start with merging into the first queue.
    list_splice_init(first_qctx->q, &merged);

    // Iterate through the remaining queue contexts for merging.
    list_for_each_entry_safe(entry, safe, head, chain) {
        if (entry == first_qctx) continue; // Skip the first queue context as it's already included.

        // Use the merge function to combine the current queue with the merged results.
        struct list_head temp;
        INIT_LIST_HEAD(&temp);
        list_splice_init(entry->q, &temp); // Prepare the current queue.
        merge(&new_head, &merged, &temp, descend);
        INIT_LIST_HEAD(&merged);
        list_splice_init(&new_head, &merged);
    }
    list_splice(&merged, first_qctx->q);
    
    return q_size(first_qctx->q); z
}

HackMD 不是讓你張貼完整程式碼的地方,GitHub 才是!因此你在開發紀錄只該列出關鍵程式碼 (善用 diff 標示),可附上對應 GitHub commit 的超連結,列出程式碼是為了「檢討」和「便於他人參與討論」,不是用來「假裝自己有付出」

比較自己實作的 merge sort 和 Linux 核心程式碼之間效能落差

參考筆記

kdnvt chiangkd

先新增 list_sort.clist_sort.c 到專案中,更改 qtest.c
使其能測量 cpu cycles

if (current && exception_setup(true)) {
        before_ticks = cpucycles();
        q_sort(current->q, descend);
        // list_sort(NULL, current->q, &cmp);
        after_ticks = cpucycles();
        report_noreturn(0, "cpucycles : %d", after_ticks - before_ticks);
        report_noreturn(0, "\n");
    }
cmd> new
cmd> it RAND 100000
cmd> time sort

實驗數據比較

sort cpu cycles time list length
merge sort 5679844 0.002 10000
list_sort 4700340 0.002 10000
merge sort 138283242 0.087 100000
list_sort 125464132 0.076 100000

list_sort 的

優化

些濫用「優化」的語境,根本與「改良」和「改善」無異,那為何不直接講「改良」和「改善」呢?

資訊科技詞彙翻譯

list_sort 透過迭代而非遞歸 遞迴的方式來合併排序好的子列表,在排序過程中,它將待排序的元素轉換成單向的列表,因此簡化了合併操作

注意用語!

迭代方法:通過一系列迭代步驟來合併子列表,逐步建立最終的排序好的列表。這種方法避免了遞迴呼叫的資源,特別是在處理大列表時可能更高效。

[4, 2, 1, 3]
初始化:將列表轉換為單向列表 [4 -> 2 -> 1 -> 3 -> NULL]。

第一輪:對每個元素進行處理,創建大小為 1 的子列表,並在必要時合併它們。

處理 4:子列表 [4]。
處理 2:子列表 [2],與 [4] 合併得到 [2 -> 4]。
處理 1:子列表 [1]。
處理 3:子列表 [3],與 [1] 合併得到 [1 -> 3]。
第二輪:合併上一輪創建的子列表。

將 [2 -> 4] 與 [1 -> 3] 合併得到最終排序好的列表 [1 -> 2 -> 3 -> 4]。
每一步的合併操作都是通過迭代完成的,不需要遞迴呼叫。

注意用語!

遞歸 方法:通過不斷分割列表 ???遞歸 排序各個部分,然後合併這些部分來建立最終的排序好的列表 ???。這種方法在概念上較為直觀,但可能因遞迴呼叫的資源而在某些情況下效率較低。


閱讀論文〈[Dude, is my code constant time?]

Dude, is my code constant time? 提出一項檢測程式複雜度是否為常數的工具,步驟如下:

Step 1: Measure execution time

進行測試的時候分別利用兩組測資,一組是固定測資(全部固定為某個值),另一組則是隨機測資,並對測量結果進行統計分析,以確定兩個輸入資料類別的時序分佈在統計上是否不同。

Step 2: Apply post-processing

在統計分析之前,對每個單獨的測量值進行後處理,包含:

#### 1. Cropping - Objective: To mitigate the impact of outliers and skewness in the timing data, which can be caused by various factors such as system interrupts, measurement artifacts - Process: Cropping involves setting a threshold (usually determined by a percentile of the data) and discarding measurements that exceed this threshold.

2. Higher-Order Preprocessing

  • Objective: To detect and account for complex forms of timing variability that may not be apparent in a simple analysis of mean execution times.
  • Process: involves transforming the timing data before statistical testing by mimicking higher-order Differential Power Analysis (DPA) attacks. A common method is the centered product
  • example [
    t1
    ​,
    t2
    ​,
    t3
    ​,
    t4
    ​,
    t5
    ​]=[100,105,95,110,90],
    t
    =(100+105+95+110+90)/5=100 cycles.
    • Centered Product:
      • (
        t1
        -
        t
        )*(
        t2
        -
        t
        )=0×5=0
      • (
        t1
        -
        t
        )*(
        t3
        -
        t
        )=0×−5=0
      • (
        t1
        -
        t
        )*(
        t4
        -
        t
        )=0×10=0
      • (
        t1
        -
        t
        )*(
        t5
        -
        t
        )=0×−10=0
      • (
        t2
        -
        t
        )*(
        t3
        -
        t
        )=5×−5=−25
      • And so on for all unique pairs.
    • Analysis: 5×10=50 are higher than others, suggesting that certain combinations of inputs have a more pronounced effect on execution time.

Step 3: Apply Statistical Test

  • A statistical test, Welch's t-test, is used to determine if the execution time distributions of the two input classes are significantly different.

以你的認知重新書寫,不要只是「畫重點」,後者是你中學時期的技能,現在應該要更上一層樓。

實作探討

Percentiles in dudect.h

  • Sorting the execution time measurements in ascending order.
  qsort(ctx->exec_times, ctx->config->number_measurements, sizeof(int64_t), (int (*)(const void *, const void *))cmp);
  • Defining several percentile values to determine different thresholds for cropping.
for (size_t i = 0; i < DUDECT_NUMBER_PERCENTILES; i++) {
    ctx->percentiles[i] = percentile(
        ctx->exec_times, 1 - (pow(0.5, 10 * (double)(i + 1) / DUDECT_NUMBER_PERCENTILES)),
        ctx->config->number_measurements);
  }

The prepare_percentiles function computes these threshold values based on the sorted execution times and stores them in the percentiles array.

Abandon the First Batch of Measurements

  • Warm-up Phase
  • Establishing Baselines
dudect_state_t dudect_main(dudect_ctx_t *ctx) {
  prepare_inputs(ctx->config, ctx->input_data, ctx->classes);
  measure(ctx);
  bool first_time = ctx->percentiles[DUDECT_NUMBER_PERCENTILES - 1] == 0;
  dudect_state_t ret = DUDECT_NO_LEAKAGE_EVIDENCE_YET;
  if (first_time) {
    // throw away the first batch of measurements.
    // this helps warming things up.
    prepare_percentiles(ctx);
  } else {
    update_statistics(ctx);
    ret = report(ctx);
  }
  return ret;
}
  • Flow: with DUDECT_NUMBER_PERCENTILES set to 100
    • Execution Times Measured: [101, 102, 103, 199, 200] cycles.
    • ctx->percentiles[99] (the last element, given zero-based indexing) is 0.
    • prepare_percentiles(ctx); is called to calculate and store the percentile thresholds based on the current batch of execution times.
      • The 1st percentile might be close to 101 cycles.
      • The 50th percentile (median) might be close to 150 cycles.
      • The 100th percentile would be 200 cycles.
    • These thresholds are stored in the percentiles array

Compare dudect in lab0

dudect/fixture.c in lab0 dosen't use Cropping

t_context_t 結構中新增 percentiles

static bool doit(int mode)
{
    ...
    prepare_inputs(input_data, classes);
    bool ret = measure(before_ticks, after_ticks, input_data, mode);
    differentiate(exec_times, before_ticks, after_ticks);
+   if (first_time) {
+        prepare_percentiles(t, exec_times);
+    } else {
+        update_statistics(exec_times, classes);
+        ret &= report();
+    }
-    update_statistics(exec_times, classes);
-    ret &= report();
    return ret;
    ...
}

update_statistics 利用 percentiles 進行 cropping。

static void update_statistics(const int64_t *exec_times, uint8_t *classes)
{
    for (size_t i = 10; /* discard the first few measurements */ i < N_MEASURES;
         i++) {
        int64_t difference = exec_times[i];
        /* CPU cycle counter overflowed or dropped measurement */
        if (difference <= 0)
            continue;
+        for (size_t crop_index = 0; crop_index < +DUDECT_NUMBER_PERCENTILES;
+             crop_index++) {
+            if (difference < t->percentiles[crop_index]) {
+                t_push(t, difference, classes[i]);
+            }
+        }
    }
}

根據 dudect.h 完成了對應的 prepare_percentile 實作並成功通過 traces/trace-17-complexity.cmd 測試


整合 tic-tac-toe 遊戲功能

Basic setups

  1. 創建 hlist.h
  2. 修改 qtest 程式,使其新增 ttt 命令
ADD_COMMAND(ttt, "Do tic-tac-toe game in console", "");

Monte Carlo Search Tree

MCTS is an algorithm used for making decisions in games, using random simulations to generate statistical data about which moves are most likely to lead to a victory

most importaint: balances exploration of untried moves with exploitation of moves known to be effective.

Key Components of MCTS

  • Selection: find a leaf node by selection policy to balance exploration and exploitation
  • Expansion: Once a leaf node is reached, one or more child nodes are added to expand the tree
  • Simulation: a simulation (also called a rollout) is performed from the node's game state until a terminal state is reached
  • Backpropagation: The result of the simulation is then propagated back up the tree, updating the statistics

Monte Carlo Search Tree Implementation

commit 1642dbd

Fixed-point operation with MCTS

詳閱作業規範