2020q1 Homework1 (lab0)

contributed by < AndybnACT >

tags: `linux2020`

Image Not Showing Possible Reasons
The image file may be corrupted
The server hosting the image is unavailable
The image path is incorrect
The image format is not supported
Learn More →

作業描述

作業要求

開發紀錄

queue.h

為了讓 q_insert_tail 和 q_size 有

O (1)

的時間複雜度，增加指向串列尾端的指標（tail）、和表示元素數量（nr）的欄位。接下來，只要在後面實作的函式中適當地更新這些資訊即可。

typedef struct {
    list_ele_t *head; /* Linked list of elements */
    list_ele_t *tail;
    int nr;
} queue_t;

queue.c

q_new

建立一個新的 queue ，然後將欄位初始化。若 malloc 失敗則在回報錯誤原因後返回 NULL。

queue_t *q_new()
{
    queue_t *q = malloc(sizeof(queue_t));
    if (!q) {
        perror("malloc");
        return NULL;
    }
    q->head = NULL;
    q->tail = NULL;
    q->nr = 0;
    return q;
}

q_free

將整個 queue 所佔據的記憶體空間釋放，包含每個節點內動態配置的資料欄位 value。在這裡，用 while 迴圈走訪串列中各個節點，同時，用一個 self 指標來避免在刪除節點後，走訪下一個節點時發生 use-after-free。最後，記得把 queue 本身的空間也一並釋放。

void q_free(queue_t *q)
{
    /* Free queue structure */
    list_ele_t *ptr;
    if (!q)
        return;

    ptr = q->head;
    while (ptr) {
        list_ele_t *self;
        self = ptr;
        ptr = ptr->next;
        if (self->value) {
            free(self->value);
        }
        free(self);
    }

    free(q);
}

q_insert_head

將指定的節點內容 s 加入 queue 的最前端，然後更新 nr。記得如果這是第一次在 queue 中插入節點（此時 tail 為空值），要順便將 tail 也一併初始化。過程中若 strdup 失敗，則要一併將新配置的節點空間釋放。

bool q_insert_head(queue_t *q, char *s)
{
    list_ele_t *newh;
    if (!q) {
        return false;
    }

    newh = malloc(sizeof(list_ele_t));
    if (!newh) {
        perror("malloc, cannot allocate space for new element");
        return false;
    }
    /* Don't forget to allocate space for the string and copy it */
    /* What if either call to malloc returns NULL? */
    newh->value = strdup(s);
    if (!newh->value) {
        perror("strdup, cannot allocate space for value");
        free(newh);
        return false;
    }
    newh->next = q->head;
    q->head = newh;
    if (!q->tail) {
        q->tail = newh;
    }
    q->nr++;
    return true;
}

q_insert_tail

同理，將指定的節點內容 s 加入 queue 的尾端。我們可以直接從 q->tail 得到當前 queue 的尾端位置，直接操作、然後更新即可。這邊也透過 (!q->tail) 來判斷是否為第一次新增節點，然後做相應的處置。由於新增節點部分的程式碼與 q_insert_head 相同，未來開發可以考慮用一個函式呼叫取代之。

bool q_insert_tail(queue_t *q, char *s)
{
    list_ele_t *newh;
    if (!q) {
        return false;
    }

    newh = malloc(sizeof(list_ele_t));
    if (!newh) {
        perror("malloc, cannot allocate space for new element");
        return false;
    }

    newh->value = strdup(s);
    if (!newh->value) {
        perror("strdup, cannot allocate space for value");
        free(newh);
        return false;
    }
    newh->next = NULL;

    if (!q->tail) {
        q->tail = newh;
        q->head = newh;
    } else {
        q->tail->next = newh;
        q->tail = newh;
    }

    q->nr++;
    return true;
}

q_remove_head

將 queue 首端的資料節點移除，節點內容複製到 sp 當中，然後釋放節點及資料所佔的空間。因為 strncpy 不會額外將 null terminator 寫在目標字串的尾端，在複製資料前，透過 memset 將 sp 的內容全部寫為零，避免在複製不完全的時候發生資料洩漏。同時，在移除最後一個節點的時候，我們必須將 q->tail 的值清空，避免之後 q_insert_tail 時發生 use-after-free。

bool q_remove_head(queue_t *q, char *sp, size_t bufsize)
{
    list_ele_t *rm;
    if (!q)
        return false;
    if (!q->head)
        return false;

    rm = q->head;
    if (sp) {
        memset(sp, 0, bufsize);
        if (rm->value) {
            strncpy(sp, rm->value, bufsize - 1);
        }
    }
    free(rm->value);

    q->head = rm->next;
    q->nr--;
    if (rm == q->tail)
        q->tail = NULL;

    free(rm);
    return true;
}

q_size

直接回傳當前紀錄 queue 所擁有的節點數量。目前在透過 dudect 執行時間複雜度測試（trace-17）時，有時會回報為 Probably not constant time，還未細究原因。

int q_size(queue_t *q)
{
    if (!q || !q->head) {
        return 0;
    }
    return q->nr;
}

q_reverse

在不額外配置空間的限制下，將 queue 的節點次序反轉。函示用 prev 紀錄前一個節點的位置，然後將當前節點的下一個 cur->next 指向前一個節點、然後更新當前與前一個節點的位置。最後，再更新 queue 頭尾的位置。

void q_reverse(queue_t *q)
{
    list_ele_t *cur;
    list_ele_t *prev, *tmp;
    if (!q)
        return;
    if (!q->head)
        return;

    cur = q->head;
    prev = NULL;
    while (cur->next) {
        tmp = cur->next;
        cur->next = prev;
        prev = cur;
        cur = tmp;
    }
    cur->next = prev;

    q->tail = q->head;
    q->head = cur;
    return;
}

q_sort

透過 insertion sort 依據節點的字串排序 queue 。過程中，用兩個指標 head 及 tail 來紀錄排序後的首尾位置，每次從原本的 queue 拿出一個節點來對 head 指向的串列做插入排序。為了避免多餘的判斷（判斷插入點是否為串列首端），我們用指標的指標 list_ele_t **haystack 指向串列首端的位置 &head ，找到插入點之後，只需要將 *haystack 的位置更新為即將插入的節點、然後再將插入節點的下一個節點位置更新成原本 *haystack 指向的位置即可。記得，若探詢完插入點之後插入點位置為空值，代標即將插入的節點會是最後一個節點，此時要更新 tail 指標。

void q_sort(queue_t *q)
{
    list_ele_t *head, *tail, *e;
    if (!q)
        return;
    if (!q->head)
        return;

    head = q->head;
    tail = q->head;

    e = head->next;
    head->next = NULL;

    while (e) {
        list_ele_t *insert = e;
        list_ele_t **haystack = &head;
        list_ele_t *tmp;

        e = e->next;
        insert->next = NULL;

        while (*haystack) {
            if (strcasecmp(insert->value, (*haystack)->value) <= 0)
                break;
            haystack = &((*haystack)->next);
        }
        if (!*haystack)
            tail = insert;

        tmp = *haystack;
        *haystack = insert;
        insert->next = tmp;
    }

    q->head = head;
    q->tail = tail;
    return;
}

將比較函式改為 natural sort：
首先，將 natural sort 的原始碼下載之後，取 strnatcmp.[c|h] 檔案，放到 lab0-c 的目錄之中。
然後將 q_sort 以及 do_sort 函式中的比較函式由 strcasecmp 換成 strnatcmp，還要加上正確的 include。

queue.c:

#include "strnatcmp.h"
...
void q_sort(queue_t *q)
{
...
        while (*haystack) {
            if (strnatcmp(insert->value, (*haystack)->value) <= 0)
                break;
            haystack = &((*haystack)->next);
        }
...
}

qtest.c: 這邊可以把 #include <string.h> 移除

#include "strnatcmp.h"
...
bool do_sort(int argc, char *argv[])
{
...
            if (strnatcmp(e->value, e->next->value) > 0) {
                report(1, "ERROR: Not sorted in ascending order");
                ok = false;
                break;
            }
...
}

最後，在 Makefile 裡面 OBJS 變數尾端加上相應的 object file，也就是 strnatcmp.o。

OBJS := qtest.o report.o console.o harness.o queue.o \
        random.o dudect/constant.o dudect/fixture.o dudect/ttest.o \
		strnatcmp.o

重新編譯，然後測試。使用的測試修改自 strnatcmp 的 GitHub repository：
traces/trace-18-natsort.cmd

new
ih fred
ih pic2
ih pic100a
ih pic120
ih pic121
ih jane
ih tom
ih pic02a
ih pic3
ih pic4
ih 1-20
ih pic100
ih pic02000
ih 10-20
ih 1-02
ih 1-2
ih x2-y7
ih x8-y8
ih x2-y08
ih x2-g8
ih pic01
ih pic02
ih pic-6
ih pic----7
ih pic-5
ih pic05
ih pic-5 
ih pic-5-something
ih pic-4-else
sort

$ make
$ ./qtest < traces/trace-18-natsort.cmd

測試完成後，發現 commit 時會被 pre-commit.hook 擋下。原因有二，一是新增檔案的 coding-style 不符合 clang-format的要求，可以透過 clang-format -i <file> 解決。另外一個原因是靜態分析器建議將沒用到的函式 strnatcasecmp() 移除、還有一些變數的 scope 可以移至 while 迴圈裡面。

將演算法改為時間複雜度
$O (n l o g (n))$ 的 merge sort：
將線上測驗的程式碼改寫，可以達成目標。具體而言，在尋找串列中間點的時候參考 linked list sort 之中，快指標與慢指標的概念以在正確的位置分割問題；然後在每次 merge 完成之時，紀錄當前的串列尾於 q->tail 之中。完成之後，make valgrind 以及使用 SANITIZER=1 編譯後的執行檔測試都沒有發現問題。
線上測驗在 merge 時的判斷式非常的漂亮，在這個判斷式中，如果左右指標之一為空，都不會進行比較，直接將不為空的那一側節點插入 merge 的尾端。

(!right || (left && strnatcmp(left->value, right->value) <= 0)

通過測試的完整實作如下，queue.c：

list_ele_t *merge_sort(queue_t *q, list_ele_t *start)
{
    if (!start || !start->next)
        return start;
    list_ele_t *left = start;
    list_ele_t *right;
    list_ele_t *slow, *fast;
    list_ele_t *merge;

    slow = start;
    fast = slow->next;
    while (fast && fast->next) {
        slow = slow->next;
        fast = fast->next->next;
    }
    right = slow->next;
    slow->next = NULL;

    left = merge_sort(q, left);
    right = merge_sort(q, right);

    for (merge = NULL; left || right;) {
        if (!right || (left && strnatcmp(left->value, right->value) <= 0)) {
            if (!merge) {
                start = merge = left;
            } else {
                merge->next = left;
                merge = merge->next;
            }
            left = left->next;
        } else {
            if (!merge) {
                start = merge = right;
            } else {
                merge->next = right;
                merge = merge->next;
            }
            right = right->next;
        }
    }
    q->tail = merge;
    return start;
}

void q_sort(queue_t *q)
{
    if (!q || !q->head)
        return;

    q->head = merge_sort(q, q->head);
    return;
}

Valgrind 與 Massif 的運用

Valgrind

透過 make valgrind 檢查是否有記憶體錯誤，初步沒有看到任何錯誤訊息。

Massif

用 valgrind --tool=massif ./qtest 執行前，必須先將 .valgrindrc 裡面的 --show-leak-kinds=all 拿掉，可能是 massif 看不懂這個參數。成功執行 qtest 之後，簡單設計實驗觀察記憶體使用情況：

Massif 實驗：

在 Massif 的參數裡面，可以指定是否追蹤 stack 的使用情形、以及最終圖表橫軸的呈現方式。圖表橫軸可以理解為時間軸，只是時間軸可以用真實的執行時間 (ms)、執行過的指令 (i)、或配置與釋放的記憶體容量總和 (B)。

實驗的方式是執行兩次 qtest，實驗組與對照組，兩次實驗中，首先插入 10 個節點於 queue 裡面；之後執行不需要額外配置記憶體的 reverse、和 (insertion) sort；最後再將 queue 內部的節點一一釋放、結束程式。實驗組、對照組的差別在於實驗組在釋放節點的時候，使用會額外配置記憶體來暫存節點內部字串資訊的指令 rh；而對照組選用單純、不令外配置空間的 rhq 指令來釋放節點。

實驗組：

$ valgrind  --tool=massif --stacks=yes --time-unit=i ./qtest
(qtest) new
(qtest) ih RAND 10
(qtest) reverse
(qtest) sort
(qtest) rh
  ... repeat rh command x 10 times
(qtest) quit

在實驗組的 massif profile 當中，可以看到程式開始執行之後，記憶體用量逐步增加，應是程式初始化還有插入節點所致。然後有段時間用量持平，應該是在執行 reverse 以及 sort。接著，顯著地看見 do_remove_head 導致用量增加。但事實上 do_remove_head 在執行結束前，還是會釋放暫存的字串空間；而 massif 卻沒有如實反映（這段時間折線圖理應在一範圍內上下來回擺動）。釋放完成之後，可以看到由 test_malloc 配置的空間已然消失，代表 queue 內部資料空間有被釋放。

對照組：

$ valgrind  --tool=massif --stacks=yes --time-unit=i ./qtest
(qtest) new
(qtest) ih RAND 10
(qtest) reverse
(qtest) sort
(qtest) rhq
   ... repeat rhq command x 10 times
(qtest) quit

在對照組當中，可以看到前半段（3e5 個指令之前）的 massif profile 大致同實驗組（2e5~3e5 的下降應該也是因為 massif 採樣不足所以略呈下降，理應持平）。後半段因為 rhq 不會另外配置空間暫存資料，所以記憶體用量逐步下降，最後歸零。

Address Sanitizer

透過 Makefile 編譯時，加上選項 SANITIZER=1，然後執行測試腳本

$ make SANITIZER=1 
$ make test

在第 17 道測試時發生錯誤

...
# Test if q_insert_tail and q_size is constant time complexity
=================================================================
==14773==ERROR: AddressSanitizer: global-buffer-overflow on address 0x558840a45ca0 at pc 0x558840a354b4 bp 0x7ffd7c7d1000 sp 0x7ffd7c7d0ff0
READ of size 4 at 0x558840a45ca0 thread T0
    #0 0x558840a354b3 in do_option_cmd /home/andy/linux2020/lab0-c/console.c:368
    #1 0x558840a3406d in interpret_cmda /home/andy/linux2020/lab0-c/console.c:220
    #2 0x558840a34aa4 in interpret_cmd /home/andy/linux2020/lab0-c/console.c:243
    #3 0x558840a35729 in cmd_select /home/andy/linux2020/lab0-c/console.c:571
    #4 0x558840a35ac9 in run_console /home/andy/linux2020/lab0-c/console.c:630
    #5 0x558840a32c29 in main /home/andy/linux2020/lab0-c/qtest.c:770
    #6 0x7f777a56fee2 in __libc_start_main (/usr/lib/libc.so.6+0x26ee2)
    #7 0x558840a3044d in _start (/home/andy/linux2020/lab0-c/qtest+0x744d)

0x558840a45ca1 is located 0 bytes to the right of global variable 'simulation' defined in 'console.c:20:6' (0x558840a45ca0) of size 1

發現程式在 do_option_cmd 的時候，對 simulation 這個變數有越界讀取，詳請見 Commit #497391f。

Commit 497391f 修補方式：

console.h：

將原本 int *valp 更改為 void *valp 使其支援多種型態的指標，然後用 valsize描述指向區域內容物大小。

/* Optionally supply function that gets invoked when parameter changes */
typedef void (*setter_function)(void *oldvalp, int valsize);

struct PELE {
    char *name;
    void *valp;
    int valsize;
    char *documentation;
    /* Function that gets called whenever parameter changes */
    setter_function setter;
    param_ptr next;
};

console.c：

在 do_option_cmd 裡面，將原本對整數指標取值的行為改成 memcpy

memcpy(oldvalp, &value,
       sizeof(value) > plist->valsize ? plist->valsize
                                      : sizeof(value));
if (plist->setter)
    plist->setter(oldvalp, plist->valsize);

然後在 add_param 裡面對 valsize 初始化、並將有關呼叫、宣告更新。

void add_param(char *name,
               void *valp,
               int valsize,
               char *documentation,
               setter_function setter)
{
    ...
    ele->valsize = valsize;
    ...
}

最後，因為 report 這個函式會需要取值，寫一個 generic_plist_getter 暫時滿足其需求

uint32_t generic_plist_getter(param_ptr p)
{
    switch (p->valsize) {
    case 1:
        return *((uint8_t *) p->valp);
    case 2:
        return *((uint16_t *) p->valp);
    default:
        return *((uint32_t *) p->valp);
    }
}

修改之後執行還是發生錯誤，這次是發生在 strlen 裡面

---	trace-16-perf	0/6
+++ TESTING trace trace-17-complexity:
# Test if q_insert_tail and q_size is constant time complexity
Testing insert_tail...(0/10)

=================================================================
==15708==ERROR: AddressSanitizer: global-buffer-overflow on address 0x5602c9f2a340 at pc 0x7f6a3fff82d1 bp 0x7ffef2545480 sp 0x7ffef2544c28
READ of size 1 at 0x5602c9f2a340 thread T0
    #0 0x7f6a3fff82d0 in __interceptor_strlen /build/gcc/src/gcc/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:354
    #1 0x5602c9f1a096 in test_strdup /home/andy/linux2020/lab0-c/harness.c:216
    #2 0x5602c9f1a3a4 in q_insert_head /home/andy/linux2020/lab0-c/queue.c:70
    #3 0x5602c9f1b02c in measure dudect/constant.c:64
    #4 0x5602c9f1b37b in doit dudect/fixture.c:136
    #5 0x5602c9f1b698 in is_insert_tail_const dudect/fixture.c:168
    #6 0x5602c9f15992 in do_insert_tail /home/andy/linux2020/lab0-c/qtest.c:259
    #7 0x5602c9f17ec9 in interpret_cmda /home/andy/linux2020/lab0-c/console.c:234
    #8 0x5602c9f18900 in interpret_cmd /home/andy/linux2020/lab0-c/console.c:257
    #9 0x5602c9f197fb in cmd_select /home/andy/linux2020/lab0-c/console.c:586
    #10 0x5602c9f19b9b in run_console /home/andy/linux2020/lab0-c/console.c:645
    #11 0x5602c9f16c3b in main /home/andy/linux2020/lab0-c/qtest.c:770
    #12 0x7f6a3fcadee2 in __libc_start_main (/usr/lib/libc.so.6+0x26ee2)
    #13 0x5602c9f1444d in _start (/home/andy/linux2020/lab0-c/qtest+0x744d)

0x5602c9f2a340 is located 32 bytes to the left of global variable 'q' defined in 'dudect/constant.c:21:17' (0x5602c9f2a360) of size 8
0x5602c9f2a340 is located 0 bytes to the right of global variable 'random_string' defined in 'dudect/constant.c:22:13' (0x5602c9f2a020) of size 800

可見 Address Sanitizer 回報 random_string 尾端有 out-of-bound read 發生，詳請見 Commit 71b0892。

Commit 71b0892 修補方式：

程式碼中發現在 get_random_string 中，對 random_string 存取的範圍是由 number_measurements 指定，為 150；然而，該陣列在宣告時大小只有 100 。

const size_t number_measurements = 150;
...
static char random_string[100][8];
...
char *get_random_string(void)
{
    random_string_iter = (random_string_iter + 1) % number_measurements;
    return random_string[random_string_iter];
}

所以我們定義了一個巨集，然後讓宣告的陣列大小和 number_measurements 都由同一個巨集指定，順利解決問題。

論文研讀

Dude, is my code constant time? 是一篇 2016 年的論文。這篇論文主要的目的在用統計學「假設檢定」的方式來確認程式的執行時間不會因輸入的測資的差異而改變；進一步可以用來判斷旁通道攻擊中的 timming attack 對被測試程式的威脅程度。

這篇論文採用的實驗模型將測資分為兩個類型（classes），類型一為唯一固定測資、類型二為其他所有隨機測資的集合。採用的假設檢定為 Welch's t-test，檢定兩個母體的平均值是否有「顯著」差異。這種檢定方式不假設兩母體有同樣的標準差、亦不對兩種採樣的採樣數有所限制。

具體而言，lab0-c 實驗分成以下步驟：準備測資、測試、更新統計資料，若資料量足夠則進行假設檢定、反之重複準備測資…

首先，準備測資。固定測資採用長度為零的 empty queue；隨機測資則是不定長度（<1e4）、內容隨機的 queue。兩類型的測資被隨機打散在測試序列當中。每次執行測試時，測資的總數由 number_measurements 規範（實際上會減去兩倍的 drop_size）

void prepare_inputs(uint8_t *input_data, uint8_t *classes)
{
    randombytes(input_data, number_measurements * chunk_size);
    for (size_t i = 0; i < number_measurements; i++) {
        classes[i] = randombit();
        if (classes[i] == 0)
            *(uint16_t *) (input_data + i * chunk_size) = 0x00;
    }

    for (size_t i = 0; i < NR_MEASURE; ++i) {
        /* Generate random string */
        randombytes((uint8_t *) random_string[i], 7);
        random_string[i][7] = 0;
    }
}

測試執行的方式如下，先將 queue 準備好，然後紀錄 q_size（或 q_insert_tail）的執行時間。

        for (size_t i = drop_size; i < number_measurements - drop_size; i++) {
            char *s = get_random_string();
            dut_new();
            dut_insert_head(
                get_random_string(),
                *(uint16_t *) (input_data + i * chunk_size) % 10000);
            before_ticks[i] = cpucycles();
            dut_insert_tail(s, 1);
            after_ticks[i] = cpucycles();
            dut_free();
        }

測試序列完成之後，將 before_ticks 與 after_ticks 相減得到執行時間，然後更新統計資訊。其中，t_push 為即時（Online）更新統計資訊的函式。

static void update_statistics(int64_t *exec_times, uint8_t *classes)
{
    for (size_t i = 0; i < number_measurements; i++) {
        int64_t difference = exec_times[i];
        /* Cpu cycle counter overflowed or dropped measurement */
        if (difference <= 0) {
            continue;
        }
        /* do a t-test on the execution time */
        t_push(t, difference, classes[i]);
    }
}

值得注意的地方是，根據論文描述，因為作業系統運作時中斷、行程切換的緣故，採樣結果通往往會有（skewwed）時間較長的傾向；因此，會將執行時間超過一定閥值的採樣點移除。然而，這樣的方法直接應用在這裡（測試是否為

O (1)

）可能會有困難，所以沒有看到類似的程式碼。建議可以改成「移除前 10% 的資料點」達成類似的目標。

最後，進行假設檢定，此時若採樣點已湊齊（enough_measurements），則假設檢定的結果具有意義，此時若檢定兩採樣的母體具有顯著差異性，就代表執行時間不是 constant time。