2020q3 Homework1 (lab0)

contributed by < quantabase13 >

說明

作業說明

作業要求

實作 queue，此 queue 能從 head 處插入 element。

需要實現的函式：
- q_new - 新增空的 queue
- q_free - 清除整個 queue
- q_insert_head - 在 queue 的 head 前插入元素
- q_insert_tail - 在 queue 的 tail 後插入元素
- q_remove_head - 取出並移除 queue 的第一個元素
- q_size- 給出 queue 中的元素個數
- q_reverse- 反轉整個 queue ，並且只使用原本 queue 中的 element
- q_sort - 對 queue 進行遞增排序(時間複雜度：O(nlog(n))

實作流程

queue.h

修改 queue_t，新增 size和 tail 這兩個 member
- 從程式碼中的註解可得知 q_size 和 q_insert_tail 的時間複雜度限制為 O(1)。透過這種方式可以讓 q_size 直接返回 size 的值 ; 讓 q_insert_tail 不需計算 tail 的位置。










  /* Queue structure */
typedef struct {
    list_ele_t *head; /* Linked list of elements */
    /* TODO: You will need to add more fields to this structure
     *        to efficiently implement q_size and q_insert_tail.
     */
    /* TODO: Remove the above comment when you are about to implement. */
    list_ele_t *tail;
    int size;
} queue_t;

queue.c

要點(不含q_sort部份)
- 注意 malloc 是否有成功分配記憶體
- 注意不再被使用的記憶體要記得 free
要點( q_sort 實作相關)
- merge sort 的實現方式(遞歸或迭代)
q_size













/*
* Return number of elements in queue.
* Return 0 if q is NULL or empty
*/
int q_size(queue_t *q)
{
   /* TODO: You need to write the code for this function */
   /* Remember: It should operate in O(1) time */
   /* TODO: Remove the above comment when you are about to implement. */
   if (!q)
       return 0;
   return q->size;
}

q_size 確認傳入的 pointer 不為 NULL 後才對其取值。同時透過為 queue_t 加入 size 後可讓所需時間為 O(1)。

q_new















/*
 * Create empty queue.
 * Return NULL if could not allocate space.
 */
queue_t *q_new()
{
    queue_t *q = malloc(sizeof(queue_t));
    /* TODO: What if malloc returned NULL? */
    if (q) {
        q->head = NULL;
        q->tail = NULL;
        q->size = 0;
    }
    return q;
}

一樣根據程式碼中的註解，在實作中加入對記憶體分配失敗時的處理。

q_free
















/* Free all storage used by queue */
void q_free(queue_t *q)
{
    /* TODO: How about freeing the list elements and the strings? */
    /* Free queue structure */
    list_ele_t *next;
    if (!q)
        return;
    for (list_ele_t *indirect = q->head; indirect != NULL;) {
        free(indirect->value);
        next = (indirect)->next;
        free(indirect);
        indirect = next;
    }
    free(q);
}

利用 next 和 indirect 兩個 pointer 遍歷整個 queue 來釋放記憶體。釋放記憶體的順序為：

元素結構中的 *char pointer 指向的記憶體
元素結構佔有的記憶體
queue 結構佔有的記憶體

q_insert_head

































bool q_insert_head(queue_t *q, char *s)
{
    list_ele_t *newh;
    char *newstr;
    int len = strlen(s);
    /* TODO: What should you do if the q is NULL? */
    if (!q) {
        return false;
    }

    newh = malloc(sizeof(list_ele_t));
    if (!newh) {
        return false;
    }
    newstr = malloc(sizeof(char) * len + 1);
    if (!newstr) {
        free(newh);
        return false;
    }
    for (int i = 0; i < len; i++) {
        *(newstr + i) = *(s + i);
    }
    *(newstr + len) = '\0';
    /* Don't forget to allocate space for the string and copy it */
    /* What if either call to malloc returns NULL? */
    newh->next = q->head;
    newh->value = newstr;
    q->head = newh;
    if (!(q->tail))
        q->tail = newh;
    (q->size)++;
    return true;
}

根據傳進來 string 的長度，動態配置記憶體這裡必須要注意 malloc 的結果是否成功。為了突顯記憶體分配的問題，作業中的 malloc 其實已經被包裝過，在 harness.h 檔案中可以看到如下程式碼：



/* Tested program use our versions of malloc and free */
#define malloc test_malloc
#define free test_free

可以得知 malloc 在作業裡被替換成 test_malloc 。追蹤到 harness.c 檔後，可以看到 test_malloc 的具體實現：









































/*
 * Implementation of application functions
 */
void *test_malloc(size_t size)
{
    if (noallocate_mode) {
        report_event(MSG_FATAL, "Calls to malloc disallowed");
        return NULL;
    }

    if (fail_allocation()) {
        report_event(MSG_WARN, "Malloc returning NULL");
        return NULL;
    }

    block_ele_t *new_block =
        malloc(size + sizeof(block_ele_t) + sizeof(size_t));
    if (!new_block) {
        report_event(MSG_FATAL, "Couldn't allocate any more memory");
        error_occurred = true;
    }

    // cppcheck-suppress nullPointerRedundantCheck
    new_block->magic_header = MAGICHEADER;
    // cppcheck-suppress nullPointerRedundantCheck
    new_block->payload_size = size;
    *find_footer(new_block) = MAGICFOOTER;
    void *p = (void *) &new_block->payload;
    memset(p, FILLCHAR, size);
    // cppcheck-suppress nullPointerRedundantCheck
    new_block->next = allocated;
    // cppcheck-suppress nullPointerRedundantCheck
    new_block->prev = NULL;

    if (allocated)
        allocated->prev = new_block;
    allocated = new_block;
    allocated_count++;

    return p;
}

可以看到，在呼叫真正 malloc 之前，多了一個 fail_allocation 函式。
在 harness.c 檔裡同樣可看到 fail_allocation 的實現：






/* Should this allocation fail? */
static bool fail_allocation()
{
    double weight = (double) random() / RAND_MAX;
    return (weight < 0.01 * fail_probability);
}

我們從程式碼中可以得知， fail_allocation 代表分配記憶體失敗的機率。在 harness.c 檔裡可以看到， fail_probability 的值預設為0 。
在作業程式碼中搜尋 fail_probaility 時在 q_test.c 有找到以下程式碼：




























static void console_init()
{
    add_cmd("new", do_new, "                | Create new queue");
    add_cmd("free", do_free, "                | Delete queue");
    add_cmd("ih", do_insert_head,
            " str [n]        | Insert string str at head of queue n times. "
            "Generate random string(s) if str equals RAND. (default: n == 1)");
    add_cmd("it", do_insert_tail,
            " str [n]        | Insert string str at tail of queue n times. "
            "Generate random string(s) if str equals RAND. (default: n == 1)");
    add_cmd("rh", do_remove_head,
            " [str]          | Remove from head of queue.  Optionally compare "
            "to expected value str");
    add_cmd(
        "rhq", do_remove_head_quiet,
        "                | Remove from head of queue without reporting value.");
    add_cmd("reverse", do_reverse, "                | Reverse queue");
    add_cmd("sort", do_sort, "                | Sort queue in ascending order");
    add_cmd("size", do_size,
            " [n]            | Compute queue size n times (default: n == 1)");
    add_cmd("show", do_show, "                | Show queue contents");
    add_param("length", &string_length, "Maximum length of displayed string",
              NULL);
    add_param("malloc", &fail_probability, "Malloc failure probability percent",
              NULL);
    add_param("fail", &fail_limit,
              "Number of times allow queue operations to return false", NULL);
}

再搭配用來測試的腳本，以 trace-10-malloc.cmd 的內容為例：









# Test of malloc failure on new
option fail 10
option malloc 50
new
new
new
new
new
new

從這些程式碼，我們得知程式執行期間我們可以輸入的命令及參數。以 option malloc 50 為例，透過這個命令，能將 fail_probability 從原本的 0 修改為 50 。

q_insert_tail



































bool q_insert_tail(queue_t *q, char *s)
{
    /* TODO: You need to write the complete code for this function */
    /* Remember: It should operate in O(1) time */
    /* TODO: Remove the above comment when you are about to implement. */
    list_ele_t *newt;
    char *newstr;
    int len = strlen(s);
    if (!q)
        return false;
    newt = malloc(sizeof(list_ele_t));
    if (!newt) {
        return false;
    }
    newstr = malloc(sizeof(char) * len + 1);
    if (!newstr) {
        free(newt);
        return false;
    }
    for (int i = 0; i < len; i++) {
        *(newstr + i) = *(s + i);
    }
    *(newstr + len) = '\0';
    newt->next = NULL;
    newt->value = newstr;
    if (!q->tail) {
        q->head = newt;
        q->tail = newt;
    } else {
        q->tail->next = newt;
        q->tail = newt;
    }
    (q->size)++;
    return true;
}

q_insert_tail 的實作與 q_insert_head 基本相同。

q_remove_head























bool q_remove_head(queue_t *q, char *sp, size_t bufsize)
{
    /* TODO: You need to fix up this code. */
    /* TODO: Remove the above comment when you are about to implement. */
    list_ele_t *tmp;
    int i;
    if (!q)
        return false;
    if (q->size == 0)
        return false;
    if (!sp)
        return true;
    for (i = 0; (i < bufsize - 1) && (*(q->head->value + i) != '\0'); i++) {
        *(sp + i) = *(q->head->value + i);
    }
    *(sp + i) = '\0';
    tmp = q->head;
    q->head = q->head->next;
    q->size--;
    free(tmp->value);
    free(tmp);
    return true;
}

這裡要注意的是 bufsize 代表可儲存的最大長度，而非一定要放滿 bufsize 的長度。因此在 for 迴圈中需要加入來源 pointer 指向的 char 是否為 '\0' 的檢查。

q_reverse





















void q_reverse(queue_t *q)
{
    /* TODO: You need to write the code for this function */
    /* TODO: Remove the above comment when you are about to implement. */
    if (!q)
        return;
    if (q->size == 0)
        return;
    list_ele_t *cur = NULL;
    list_ele_t *header = q->head;
    while (header) {
        list_ele_t *next = header->next;
        header->next = cur;
        cur = header;
        header = next;
    }
    list_ele_t *tmp = q->head;
    q->head = q->tail;
    q->tail = tmp;
    return;
}

q_reverse 的實現在第一周上課的小考就有出現，畫出各個 pointer 指向的變化就能一目了然。

q_sort

q_sort 我實作了兩次，第一次是用全遞迴的方式：












































list_ele_t *merge(list_ele_t *l1, list_ele_t *l2);
list_ele_t *mergeSort(list_ele_t *head);
void q_sort(queue_t *q)
{
    /* TODO: You need to write the code for this function */
    /* TODO: Remove the above comment when you are about to implement. */
    if (q == NULL || q->size == 0 || q->size == 1) {
        return;
    }
    q->head = mergeSort(q->head);
    while (q->tail->next) {
        q->tail = q->tail->next;
    }
}
list_ele_t *merge(list_ele_t *l1, list_ele_t *l2)
{
    if (!l2)
        return l1;
    if (!l1)
        return l2;
    if (strcmp(l1->value, l2->value) < 0) {
        l1->next = merge(l1->next, l2);
        return l1;
    } else {
        l2->next = merge(l2->next, l1);
        return l2;
    }
}
list_ele_t *mergeSort(list_ele_t *head)
{
    if (!head || !head->next)
        return head;
    list_ele_t *fast = head->next;
    list_ele_t *slow = head;
    while (fast && fast->next) {
        slow = slow->next;
        fast = fast->next->next;
    }
    fast = slow->next;
    slow->next = NULL;
    list_ele_t *l1 = mergeSort(head);
    list_ele_t *l2 = mergeSort(fast);
    return merge(l1, l2);
}

全遞迴的版本無法通過 trace-15 ，會出現 Segmentation fault (core dumped)的錯誤訊息。
確認過程式碼不會 dereference NULL pointer 後，推測是遞迴用到的 stack 空間過多導致。
於是第二次參考ZhuMon，將 merge 的部份以迭代實現：













































void mergeSort(list_ele_t **head);
void q_sort(queue_t *q)
{
    /* TODO: You need to write the code for this function */
    /* TODO: Remove the above comment when you are about to implement. */
    if (q == NULL || q->size == 0 || q->size == 1) {
        return;
    }
    mergeSort(&q->head);
    while (q->tail->next) {
        q->tail = q->tail->next;
    }
}

void mergeSort(list_ele_t **head)
{
    if (!*head || !(*head)->next)
        return;
    list_ele_t *l1 = (*head)->next;
    list_ele_t *l2 = (*head);
    while (l1 && l1->next) {
        l2 = l2->next;
        l1 = l1->next->next;
    }
    l1 = l2->next;
    l2->next = NULL;
    l2 = *head;

    mergeSort(&l1);
    mergeSort(&l2);

    *head = NULL;
    list_ele_t **cur = head;
    while (l1 && l2) {
        if (strcmp(l1->value, l2->value) < 0) {
            *cur = l1;
            l1 = l1->next;
        } else {
            *cur = l2;
            l2 = l2->next;
        }
        cur = &(*cur)->next;
    }
    *cur = l1 ? l1 : l2;
}

程式碼利用 l1 , l2 指向等待被 merge 的 linked list 的第一個元素 , 利用 cur 這個 pointer to pointer 訪問並修改原本 linked list 元素的 next 欄位

採取部份迭代的方法能順利通過 trace-15 ，但若要從根本上解決問題，程式碼必須全部改成迭代。

論文與 simulation 程式碼分析

is_insert_tail_const 函式負責分析 q_insert_tail 的時間複雜度是不是常數：




















bool is_insert_tail_const(void)
{
    bool result = false;
    t = malloc(sizeof(t_ctx));

    for (int cnt = 0; cnt < test_tries; ++cnt) {
        printf("Testing insert_tail...(%d/%d)\n\n", cnt, test_tries);
        init_once();
        for (int i = 0;
             i <
             enough_measurements / (number_measurements - drop_size * 2) + 1;
             ++i)
            result = doit(0);
        printf("\033[A\033[2K\033[A\033[2K");
        if (result == true)
            break;
    }
    free(t);
    return result;
}

比較重要的是 init_once 和 doit 兩個函式。
init_once 負責待測 queue 的初始化，為了避免干擾到真正要進行 q_insert_tail 的 queue ，會另外生成一個 queue:





















static void init_once(void)
{
   init_dut();
   t_init(t);
}

/* Implement the necessary queue interface to simulation */
void init_dut(void)
{
   q = NULL;
}

void t_init(t_ctx *ctx)
{
   for (int class = 0; class < 2; class ++) {
       ctx->mean[class] = 0.0;
       ctx->m2[class] = 0.0;
       ctx->n[class] = 0.0;
   }
   return;
}

doit 則是包含實際負責測量的函式：





























static bool doit(int mode)
{
    int64_t *before_ticks = calloc(number_measurements + 1, sizeof(int64_t));
    int64_t *after_ticks = calloc(number_measurements + 1, sizeof(int64_t));
    int64_t *exec_times = calloc(number_measurements, sizeof(int64_t));
    uint8_t *classes = calloc(number_measurements, sizeof(uint8_t));
    uint8_t *input_data =
        calloc(number_measurements * chunk_size, sizeof(uint8_t));

    if (!before_ticks || !after_ticks || !exec_times || !classes ||
        !input_data) {
        die();
    }

    prepare_inputs(input_data, classes);

    measure(before_ticks, after_ticks, input_data, mode);
    differentiate(exec_times, before_ticks, after_ticks);
    update_statistics(exec_times, classes);
    bool ret = report();

    free(before_ticks);
    free(after_ticks);
    free(exec_times);
    free(classes);
    free(input_data);

    return ret;
}

prepare_inputs 負責初始化 input_data 、 classes 、 random_string，其中：

input_data 有150個數，每個數佔一個 chunk_size ，每個數表示對應 string 的插入個數。如果 classes[i] 對應的值為0(代表 fixed case)，則對應 input_data 中第 i 個 chunk 的值(插入個數)也會設為0。
classes為長度 150 的 array ， array 的第 i 格的值代表第 i 格的 class 。因為只有兩個 class，故只會有 0 、 1 兩種值。
random_string 存有 150 個 string ，每個 string 長度為7。















void prepare_inputs(uint8_t *input_data, uint8_t *classes)
{
    randombytes(input_data, number_measurements * chunk_size);
    for (size_t i = 0; i < number_measurements; i++) {
        classes[i] = randombit();
        if (classes[i] == 0)
            *(uint16_t *) (input_data + i * chunk_size) = 0x00;
    }

    for (size_t i = 0; i < NR_MEASURE; ++i) {
        /* Generate random string */
        randombytes((uint8_t *) random_string[i], 7);
        random_string[i][7] = 0;
    }
}

measure 負責取得 insert tail 之前和之後的 CPU Cycle 。
另外，在執行 dut_insert_tail 前，有先 insert head。從 random_string 取出 array ，從 input_data 指向的第 i 個 chunk 取出要 insert 的數量。































void measure(int64_t *before_ticks,
             int64_t *after_ticks,
             uint8_t *input_data,
             int mode)
{
    assert(mode == test_insert_tail || mode == test_size);
    if (mode == test_insert_tail) {
        for (size_t i = drop_size; i < number_measurements - drop_size; i++) {
            char *s = get_random_string();
            dut_new();
            dut_insert_head(
                get_random_string(),
                *(uint16_t *) (input_data + i * chunk_size) % 10000);
            before_ticks[i] = cpucycles();
            dut_insert_tail(s, 1);
            after_ticks[i] = cpucycles();
            dut_free();
        }
    } else {
        for (size_t i = drop_size; i < number_measurements - drop_size; i++) {
            dut_new();
            dut_insert_head(
                get_random_string(),
                *(uint16_t *) (input_data + i * chunk_size) % 10000);
            before_ticks[i] = cpucycles();
            dut_size(1);
            after_ticks[i] = cpucycles();
            dut_free();
        }
    }
}

differentiate 將其相減後得出 exe_times ，送入 update_statics ， update_statics 會根據第 i 個 measurement 對應的 class (也就是 class [ i ]的值) 進行 t-test。












static void update_statistics(int64_t *exec_times, uint8_t *classes)
{
    for (size_t i = 0; i < number_measurements; i++) {
        int64_t difference = exec_times[i];
        /* Cpu cycle counter overflowed or dropped measurement */
        if (difference <= 0) {
            continue;
        }
        /* do a t-test on the execution time */
        t_push(t, difference, classes[i]);
    }
}

t_push 負責計算

Δ x

、

\bar{x}

、

\sum_{i = 0}^{N} ({x_{i} - \bar{x})}^{2}











void t_push(t_ctx *ctx, double x, uint8_t class)
{
    assert(class == 0 || class == 1);
    ctx->n[class]++;
    /* Welford method for computing online variance
     * in a numerically stable way.
     */
    double delta = x - ctx->mean[class];
    ctx->mean[class] = ctx->mean[class] + delta / ctx->n[class];
    ctx->m2[class] = ctx->m2[class] + delta * (x - ctx->mean[class]);
}

report 中的 t_compute 則會計算出 t

t = \frac{\bar{X_{0}} - \bar{X_{1}}}{\sqrt{\frac{V a r_{0}}{N_{0}} + \frac{V a r_{1}}{N_{1}}}}

其中：

V a r_{0} = \frac{\sum_{i = 0}^{N_{0}} ({x_{i} - {\bar{x}}_{0})}^{2}}{N_{0} - 1}

V a r_{1} = \frac{\sum_{i = 0}^{N_{1}} ({x_{i} - {\bar{x}}_{1})}^{2}}{N_{1} - 1}










double t_compute(t_ctx *ctx)
{
    double var[2] = {0.0, 0.0};
    var[0] = ctx->m2[0] / (ctx->n[0] - 1);
    var[1] = ctx->m2[1] / (ctx->n[1] - 1);
    double num = (ctx->mean[0] - ctx->mean[1]);
    double den = sqrt(var[0] / ctx->n[0] + var[1] / ctx->n[1]);
    double t_value = num / den;
    return t_value;
}

~~另外，仔細查看 t_push 的實作部份會發現，每次計算~~

$m_{2}$ (也就是
$\sum_{i = 0}^{N} ({x_{i} - \bar{x})}^{2}$ )時，所用的
$\bar{x}$ 都不同，但理論上必須使用全體的
$\bar{x}$ 才符合統計學上的意義。
於是對以下檔案進行修改：
後來發現wikipedia 上其實有對 Welford's online algorithm 的仔細敘述，並且還有詳細舉出傳統我認知計算 variance 的方法可能會導致 overflow 的問題。
基於上述，以下的修改實屬多餘。

ttest.h ：為 t_ctx 新增 sum[2] 欄位，記錄對應 class 的 total sum 。

















#ifndef DUDECT_TTEST_H
#define DUDECT_TTEST_H

#include <stdint.h>
typedef struct {
    double sum[2];/*member added to keep total sums*/
    double mean[2];
    double m2[2];
    double n[2];
} t_ctx;

void t_push(t_ctx *ctx, double x, uint8_t class);
void t_mean(t_ctx *ctx, int64_t *exec_times, uint8_t *classes);/*calculate mean for exec_times*/
double t_compute(t_ctx *ctx);
void t_init(t_ctx *ctx);

#endif

ttest.c : 修改 t_push ，只保留計算
$\sum_{i = 0}^{N} ({x_{i} - \bar{x})}^{2}$ 的部份，將計算mean的工作交給新增的 t_mean








































/**
 * Online Welch's t-test.
 *
 * Tests whether two populations have same mean.
 * This is basically Student's t-test for unequal
 * variances and unequal sample sizes.
 *
 * see https://en.wikipedia.org/wiki/Welch%27s_t-test
 *
 */

#include "ttest.h"
#include <assert.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
extern const size_t number_measurements;
void t_push(t_ctx *ctx, double x, uint8_t class)
{
    assert(class == 0 || class == 1);
    // ctx->n[class]++;
    /* Welford method for computing online variance
     * in a numerically stable way.
     */
    double delta = x - ctx->mean[class];
    // ctx->mean[class] = ctx->mean[class] + delta / ctx->n[class];
    ctx->m2[class] = ctx->m2[class] + delta * delta /*(x - ctx->mean[class])*/;
}
/*Use t_mean to calculate the "correct" mean for total sum */
void t_mean(t_ctx *ctx, int64_t *exec_times, uint8_t *classes)
{
    for (size_t i = 0; i< number_measurements; i++) {
        assert(classes[i] == 0 || classes[i] == 1);
        ctx->n[classes[i]]++;
        ctx->sum[classes[i]] += exec_times[i];
    }
    ctx->mean[0] = ctx->sum[0] / ctx->n[0];
    ctx->mean[1] = ctx->sum[1] / ctx->n[1];
}

3.fixture.c : 把 t_mean 加到程式碼的開頭，一開始就把 mean 給計算出來。













static void update_statistics(int64_t *exec_times, uint8_t *classes)
{
    t_mean(t, exec_times, classes);
    for (size_t i = 0; i < number_measurements; i++) {
        int64_t difference = exec_times[i];
        /* Cpu cycle counter overflowed or dropped measurement */
        if (difference <= 0) {
            continue;
        }
        /* do a t-test on the execution time */
        t_push(t, difference, classes[i]);
    }
}

TODO LIST

用 Valgrind 排除 qtest 實作的記憶體錯誤
用 Massif 視覺化記憶體使用量
設計實驗
解釋程式的 "simulation" 模式如何以實驗驗證複雜度

2020q3 Homework1 (lab0)

作業要求

實作流程

queue.h

queue.c

q_size

q_new

q_free

q_insert_head

q_insert_tail

q_remove_head

q_reverse

q_sort

論文與 simulation 程式碼分析

TODO LIST

Read more

2020q3 quiz11

2020q3 Homework3 (quiz3)

2020q3 Homework6 (kcalc)

2020q3 Homework6 (quiz6)

`q_size`