linux2025-homework2

第二周測驗

2-1

延伸問題：

解釋上方程式碼運作原理，改進 random_shuffle_array 也探討 list_for_each_entry_safe 建構的迴圈中，若將 list_move_tail 換為 list_move，為何會無法滿足 stable sorting
將程式碼整合進 lab0 提及的 qtest，並探討環狀雙向鏈結串列的排序效能，並善用 perf 一類的效能分析工具，探討包含 Linux 核心 list_sort 在內排序實作的效能表現並充分解釋

{
    struct list_head list_less, list_greater;
    struct listitem *pivot;
    struct listitem *item = NULL, *is = NULL;

    if (list_empty(head) || list_is_singular(head))
        return;

    INIT_LIST_HEAD(&list_less);
    INIT_LIST_HEAD(&list_greater);

    pivot = list_first_entry(head, struct listitem, list); 
    list_del(&pivot->list); 

    list_for_each_entry_safe (item, is, head, list) {
        if (cmpint(&item->i, &pivot->i) < 0)
            list_move_tail(&item->list, &list_less);
        else
            list_move_tail(&item->list, &list_greater);
    }

    list_quicksort(&list_less);
    list_quicksort(&list_greater);

    list_add(&pivot->list, head);
    list_splice(&list_less, head);
    list_splice_tail(&list_greater, head);
}

為什麼改成 list_move 無法 stable sort ?

list_move() - Move a list node to the beginning of the list
list_move_tail() - Move a list node to the end of the list

list_move 後 head->next 會變成 node，list_move_tail 後 head->prev 會變成 node

cmpint return > 0, if 前面參數 > 後面參數

stable sort 要求相同值的元素保持原始相對順序。list_move_tail 確保元素按遍歷順序追加到 list_less 或 list_greater，從而保留原始順序。

舉例來說:
head, 3, 3^, 3', 3" 排序完後應該不變，但若改為 list_move，則排序結果會變成:
pivot: 3
queue: head, 3^, 3', 3"
greater: 3", 3' ,3^

int main(void)
{
    struct list_head testlist;
    struct listitem *item, *is = NULL;
    size_t i;

    random_shuffle_array(values, (uint16_t) ARRAY_SIZE(values));

    INIT_LIST_HEAD(&testlist);

    assert(list_empty(&testlist));

    for (i = 0; i < ARRAY_SIZE(values); i++) {
        item = (struct listitem *) malloc(sizeof(*item));
        assert(item);
        item->i = values[i];
        list_add_tail(&item->list, &testlist);
    }

    assert(!list_empty(&testlist));

    qsort(values, ARRAY_SIZE(values), sizeof(values[0]), cmpint);
    list_quicksort(&testlist);

    i = 0;
    list_for_each_entry_safe (item, is, &testlist, list) {
        assert(item->i == values[i]);
        list_del(&item->list);
        free(item);
        i++;
    }

    assert(i == ARRAY_SIZE(values));
    assert(list_empty(&testlist));

    return 0;
}

random_shuffle_array
原始實現使用 get_unsigned16() % (i + 1)，這引入了偏誤，因為模運算無法均勻分佈。改進方法是採用 Fisher-Yates (Knuth) 洗牌演算法，並使用更高質量的隨機數生成器：

#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))

static uint16_t values[256];

static inline uint8_t getnum(void)
{
    static uint16_t s1 = UINT16_C(2);
    static uint16_t s2 = UINT16_C(1);
    static uint16_t s3 = UINT16_C(1);

    s1 *= UINT16_C(171);
    s1 %= UINT16_C(30269);

    s2 *= UINT16_C(172);
    s2 %= UINT16_C(30307);

    s3 *= UINT16_C(170);
    s3 %= UINT16_C(30323);

    return s1 ^ s2 ^ s3;
}

static uint16_t get_unsigned16(void)
{
    uint16_t x = 0;
    size_t i;

    for (i = 0; i < sizeof(x); i++) {
        x <<= 8;
        x |= getnum();
    }

    return x;
}

static inline void random_shuffle_array(uint16_t *operations, uint16_t len)
{
    for (uint16_t i = 0; i < len; i++) {
        /* WARNING biased shuffling */
        uint16_t j = get_unsigned16() % (i + 1);
        operations[i] = operations[j];
        operations[j] = i;
    }
}

在 getnum 內的 s1, s2, s3 不會在每次呼叫該 function 時重置，它們會記住上一次呼叫後的值

C99 (6.2.4.3): An object whose identifier is declared with external or internal linkage, or with the
storage-class specifier static has static storage duration. Its lifetime is the entire
execution of the program and its stored value is initialized only once, prior to program
startup.

static 變數只會在程式開始時初始化一次，而不是每次函數呼叫時都重新初始化。

而 global variable 的 values[256] 也同樣具有靜態儲存期間，此外，其他檔案無法透過 extern 訪問

這裡的 getnum 函式其實是一個偽隨機函數 - 線性同餘 ( lcg )，為甚麼要加一個"偽"字是因為這個函數有週期性
LCG 遞迴關係式:

$N_{j + 1} \equiv (A \times N_{j} + B) (\mod M)$

s1 = (171 * s1) % 30269
s2 = (172 * s2) % 30307
s3 = (170 * s3) % 30323

最後 getnum 會截斷 16bit XOR 回傳 8bit結果

get_unsigned16() : 根據剛剛的邏輯，這個函式是用來生成 16-bit 的隨機數。可以看到使用了 for 迴圈，因為宣告變數 x 為 16-bit 的無符號整數，因此 sizeof(x) = 2 ，經過兩次 for 迴圈，便能得到 16-bit 的隨機數。

random_shuffle_array: 在 len = 256 的情況下，random_shuffle_array 最終會生成一個包含 0 到 255 的排列，且每個數字恰好出現一次。然而，可以看到程式中的註解， WARNING biased shuffling ，是因為 get_unsigned16() % (i + 1) 取模的方式會造成某些 index 的選取機率不同。

當 (i+1) 無法整除 get_unsigned16 時，會造成低索引被選擇的機率較高一點點，導致選擇不均勻。

修改如下:

for (uint16_t i = len - 1; i > 0; i--) {
    uint16_t j = get_unsigned16() % (i + 1);
    uint16_t temp = operations[i];
    operations[i] = operations[j];
    operations[j] = temp;
}

2-2

延伸問題：

解釋上述程式碼，並探討擴充為 ⌈(𝑥)⌉ (向上取整數) 實作，該如何調整並保持只用整數加減及位元操作
參照計算機結構測驗題 C 及其注釋，以 C 語言 (搭配 GNU extension) 撰寫不依賴除法指令的 sqrtf，確保符合 IEEE 754 規格。對照 sqrtf, rsqrt, reciprocal implementations in Arm assembly
參照從 √2 的存在談開平方根的快速運算提到的「藉由查表的平方根運算」，以 C 語言實作，並比較不同手法的整數開平方根效能
一旦發現效能改進的機會，可準備提交改進 int_sqrt 程式碼到 Linux 核心

clz2 函式的實作如下：

static const int mask[] = {0, 8, 12, 14};
static const int magic[] = {2, 1, 0, 0};

unsigned clz2(uint32_t x, int c)
{
    if (!x && !c)
        return 32;

    uint32_t upper = (x >> (16 >> c));
    uint32_t lower = (x & (0xFFFF >> mask[c]));
    if (c == 3)
        return upper ? magic[upper] : 2 + magic[lower];
    return upper ? clz2(upper, c + 1) : (16 >> (c)) + clz2(lower, c + 1);
}

用來控制 lower 部分的遮罩位元數，應為 {0, 8, 12, 14}，因為每次遞迴將位元數減半最後兩位使用查表，所以GGGG = 16 - 2 = 14
magic[] 是最後 2 位元的快速查表，應為 {0, 1, 0, 2}，對應 00, 01, 10, 11 的 leading zeros。

mask[] 是用來當作 lower 的 mask

當 c=0，x 為 32 bit，所以 lower 為 x 的低16位 (x & 0xFFFF);
當 c=1，x 為 16 bit，所以 lower 為 x 的低8位 (x & 0xFF);
當 c=1，x 為 8 bit，所以 lower 為 x 的低4位 (x & 0xF);
當 c=1，x 為 4 bit，所以 lower 為 x 的低2位 (x & 0x3);

當 c==3 時，表示剩下2 bit，這時我們採用查表的方式確認 leadind zero

magic[0b00] = 2
magic[0b01] = 1
magic[0b10] = 0
magic[0b11] = 0

實作整數開平方根：

uint64_t sqrti(uint64_t x)
{
    uint64_t m, y = 0;
    if (x <= 1)
        return x;

    int total_bits = 64;

    /* clz64(x) returns the count of leading zeros in x.
     * (total_bits - 1 - clz64(x)) gives the index of the highest set bit.
     * Rounding that index down to an even number ensures our starting m is a
     * power of 4.
     */
    int shift = (total_bits - 1 - clz64(x)) & ~1;
    m = 1ULL << shift;

根據註解，我們可以得知 m 為 4 的冪，所以要確保 shift 為偶數，也就是說 shift 的 LSB 需為 0 。

因此 MMMM = ~1，任何數 & ~1 即為偶數

至此，m 為 highest set bit

    while (m) {
        uint64_t b = y + m;
        y >>= 1;
        if (x >= b) {
            x -= b;
            y += m;
        }
        m >>= 2;
    }
    return y;
}

參考 Digit-by-digit Method

(a + b)^{2} = a^{2} + (2 a + b) b

\begin{aligned} (a + b + c)^{2} & = ((a + b) + c)^{2} \\ = (a + b)^{2} + (a + b) c + c (a + b) + c^{2} \\ = (a + b)^{2} + (2 (a + b) + c) c \end{aligned}

\begin{aligned} (a + b + c + d)^{2} & = ((a + b + c) + d)^{2} \\ = (a + b + c)^{2} + (a + b + c) d + d (a + b + c) + d^{2} \\ = (a + b + c)^{2} + (2 (a + b + c) + d) d \end{aligned}

觀察規律可以發現

𝑚

元平方和可藉由 1 到 𝑚−1 元平方和總和，同時加上一項

Y_{m}

N^{2} = P_{m + 1}^{2} + Y_{m}

Y_{m} = [(2 \sum_{i = m + 1}^{n} a_{i}) + a_{m}] a_{m} = (2 P_{m + 1} + a_{m}) a_{m}

Y_{m}

是新增項

P_{m + 1} = a_{n} + a_{n - 1} + \dots + a_{m + 2} + a_{m + 1}

P_{m + 1}

是之前計算的和

假設

N^{2}

是待開平方的數，我們從最高位

𝑚 = 𝑛

開始，逐步向下計算到

𝑚 = 0

若要改為向上取整:

關於 while 迴圈細節可以參考大雞哥詳細解釋

2-3

延伸問題：

解釋上述程式碼運作原理，應提供測試程式碼

針對 Linux 核心的 hash table 實作，提供更多圖例並揣摩 Linux 核心開發者
進行《The Art of Computer Programming》(vol 3) 一書section 6.4 的 exercise 8，也就是證明 Theorem S

注意: Linux 核心內部大量使用 hash table，但並非每處的雜湊函數都滿足減少碰撞及避免 clustering 現象的原則，也就是存在若干改進空間
探討 Linux 核心中使用 hash table 的應用案例，至少二項，應當列出原始程式碼並分析，斟酌搭配 git log 以得知 Linux 核心開發者變更程式碼的考量因素

linux2025-homework2

第二周測驗

2-1

2-2

2-3

Read more

2025q1 Homework1 (lab0)

linux2025-ideas

xv6-riscv

軟體設計 final_project