2019q3 Homework2 (quiz2)

# 2019q3 Homework2 (quiz2) contributed by < `kaeteyaruyo` > ## 測驗 `1` 考慮下方檔案 `4thought.c` 是 ACM-ICPC 題目 [4 thought](https://open.kattis.com/problems/4thought) 的一個解法，假設程式的輸入符合 [4 thought](https://open.kattis.com/problems/4thought) 的描述，請補完程式碼: ```cpp #include <stdbool.h> #include <stdio.h> enum { opType1 = 0x1 << 0, opType2 = 0x1 << 1, opType3 = 0x1 << 4, opType4 = 0x1 << 5, }; static int operate(int op, int a, int b) { switch (op) { case opType1: return a + b; case opType2: return a - b; case opType3: return a * b; case opType4: return (int) a / b; } return 0; } static char op_to_char(int op) { return "+-*/?"[op - 1]; } static int op_to_prio(int op) { return ((int[]){opType1, opType2, opType3, opType4, -1})[op - 1]; } static int calc(int op1, int op2, int op3) { op1 = op_to_prio(op1); op2 = op_to_prio(op2); op3 = op_to_prio(op3); bool p1 = (op1 & 0x0F) == 0; // = 1 for * or / bool p2 = (op2 & 0x0F) == 0; // else = 0 bool p3 = (op3 & 0x0F) == 0; // (4 + 4 + 4 + 4) or (4 / 4 / 4 / 4) if ((p1 == p2) && (p2 == p3)) return operate(op3, operate(op2, operate(op1, 4, 4), 4), 4); /* Write your code here */ return 0; } int main(void) { int n; scanf("%d", &n); int sol[n]; for (int i = 0; i < n; i++) scanf("%d", &sol[i]); bool validSolution = false; for (int i = 0; i < n; i++) { for (int op1 = 4; op1 > 0; op1--) { for (int op2 = 4; op2 > 0; op2--) { for (int op3 = 4; op3 > 0; op3--) { int sol_checked = calc(op1, op2, op3); if (sol_checked == sol[i]) { validSolution = true; char op1char = op_to_char(op1); char op2char = op_to_char(op2); char op3char = op_to_char(op3); printf("4 %c 4 %c 4 %c 4 = %d\n", op1char, op2char, op3char, sol[i]); op1 = -1; op2 = -1; op3 = -1; break; } } } } if (!validSolution) printf("no solution\n"); validSolution = false; } return 0; } ``` 注意: 你應該要實作 `calc` 函式中標註 `/* Write your code here */` 之後的程式碼。除了撰寫程式，你應該提供對應的程式碼註解。 :::success 延伸問題: 1. 解釋程式運作的原理和推敲背後的思路; 2. 探討 [4 thought](https://open.kattis.com/problems/4thought) 組合出來的數值分佈，並且透過數論解釋; 3. 提出得以改善上述程式碼執行效率的方案，著手分析和實作; ::: ### 解題由 `calc()` 函式中的第一個 `if` 的註解可以得知，以下的 code 的功能在於區別不同運算子組合時，四個運算元的運算優先度如何。`p1`, `p2`, `p3` 依序代表了三個運算子的優先度（如果是乘除法，則為 1,若是加減法，則為 0）因此可以整理出下表： |type|p1|p2|p3|precedence| |-|-|-|-|-| |1|0|0|0|(((4 + 4) + 4) + 4)| |2|0|0|1|((4 + 4) + (4 × 4))| |3|0|1|0|((4 + (4 × 4)) + 4)| |1|1|0|0|(((4 × 4) + 4) + 4)| |4|0|1|1|(4 + ((4 × 4) × 4))| |2|1|0|1|((4 × 4) + (4 × 4))| |1|1|1|0|(((4 × 4) × 4) + 4)| |1|1|1|1|(((4 × 4) × 4) × 4)| （如果運算優先度相同，應由左到右運算，因為減法和除法沒有交換律或結合律） ```cpp static int calc(int op1, int op2, int op3) { op1 = op_to_prio(op1); op2 = op_to_prio(op2); op3 = op_to_prio(op3); bool p1 = (op1 & 0x0F) == 0; // = 1 for * or / bool p2 = (op2 & 0x0F) == 0; // else = 0 bool p3 = (op3 & 0x0F) == 0; // (4 + 4 + 4 + 4) or (4 / 4 / 4 / 4) if ((p1 == p2) && (p2 == p3)) return operate(op3, operate(op2, operate(op1, 4, 4), 4), 4); // (((4 × 4) + 4) + 4) or (((4 × 4) × 4) + 4) if ((p1 == 1) && (p3 == 0)) return operate(op3, operate(op2, operate(op1, 4, 4), 4), 4); // ((4 + 4) + (4 × 4)) or ((4 × 4) + (4 × 4)) if ((p2 == 0) && (p3 == 1)) return operate(op2, operate(op1, 4, 4), operate(op3, 4, 4)); // ((4 + (4 × 4)) + 4) if ((p1 == 0) && (p2 == 1) && (p3 == 0)) return operate(op3, operate(op1, 4, operate(op2, 4, 4)), 4); // (4 + ((4 × 4) × 4)) if ((p1 == 0) && (p2 == 1) && (p3 == 1)) return operate(op1, 4, operate(op3, operate(op2, 4, 4), 4)); return 0; } ``` ## 測驗 `2` 考慮以下程式碼 (`fitbits.c`) 可檢驗輸入的整數 `x` 是否可用 `n` 個位元來表示，例如 (x = 4, n = 9) 要回傳 `true`, 當 (x = 4, n = 2) 回傳 `false`。 ```cpp #include <stdbool.h> bool fit_bits(int x, int n) { /* Write your code here */ return (bool) x; } ``` 實作的程式碼不能有任何邏輯條件判斷 (如 `if`, `else`, `?`) 或迴圈 (如 `for`, `while`, `goto`, `switch`, `case`, `break`, `continue`)，可用的運算子是 `>>`, `<<`, `-`, `+`, `!`, `~`, `&`, `|` 請補完程式碼，作答時需要包含函式宣告及定義。除了撰寫程式，你應該提供對應的程式碼註解。 ### 解題 ```cpp #include <stdbool.h> bool fit_bits(int x, int n) { // Mask 掉右邊 n 位，如果夠用，則 x 會變成 0； // 如果不夠用，至少有一個位數會是 1 x = x & (-1 << n); // 若 x 為 0，回傳真，否則回傳假。 // 由於 C 語言中 0 為假，非 0 為真，因此直接回傳 !x 即可 x = !x; return (bool) x; } ``` ## 測驗 `3` 考慮以下程式碼 (`is-less-equal.c`) 可檢驗輸入的整數 `x` 和 `y`，是否存在 $x <= y$ 的關係。例如 (x = 4, n = 4) 要回傳 `true`, 當 (x = 14, n = 9) 回傳 `false`。 ```cpp #include <stdbool.h> bool is_leq(int x, int y) { int s; /* Write your code here */ return (bool) s; } ``` 實作的程式碼不能有任何邏輯條件判斷 (如 `if`, `else`, `?`) 或迴圈 (如 `for`, `while`, `goto`, `switch`, `case`, `break`, `continue`)，當然也不能用 `>=`, `>`, `<`, `<=`, `-` 等運算子。可用的運算子是 `>>`, `<<`, `+`, `~` 請補完程式碼，作答時需要包含函式宣告及定義。除了撰寫程式，你應該提供對應的程式碼註解。 :::success 延伸問題: 在重視資訊安全的專案中，找出類似用法的程式碼，予以解說並進行相關 information leaks 的實驗 ::: ### 解題由於 $x <= y$ 可以轉換成等價的問題 $y - x >= 0$，又因為 C 語言當中有號整數以二補數表示，因此 $-x = (\sim x + 1)$ ，故可以用`+`與`~`來模擬減法運算，進而進行大小比較的運算。此函式不含溢位檢查的功能。 ```cpp #include <stdbool.h> bool is_leq(int x, int y) { int s; // s = y - x s = y + (~x + 1); // 由於 C 當中對有號數的右移運算是帶號的（算數位移）， // 因此將一個數字右移 31 次，這個數字會被他的符號填滿 s = s >> 31; // 若 y - x >= 0 ，則 s 為 0 ，須返回真， // 否則 s 為 -1，需返回假。 // 由於 C 當中 0 為假，非 0 為真， // 因此將 s 反相過後，就會是正確的邏輯值。 s = ~s; return (bool) s; } ``` ### 延伸問題 ## 測驗 `4` 考慮一種針對短字串的最佳化操作，假設字串總是小於等於 8 bytes，標的硬體是像 x86_64 這樣的 64-bit 架構而且是 [little endian](https://en.wikipedia.org/wiki/Endianness)，於是我們可實作類似以下的程式碼 (`ministr.c`): ```cpp #include <stdint.h> #include <stdio.h> #include <string.h> typedef union { uint64_t integer; char array[8]; } mini_str; static unsigned BitScanReverse(uint64_t x) { return 63 - __builtin_clzll(x); } /** * Find the length of the given mini_str. * @param str string to find length of * @return length of the given string */ unsigned mini_strlen(mini_str str) { // Special case for empty string. if (str.integer == 0) return 0; // Otherwise find first non-zero bit (which will be in the first non-zero // byte), and find which byte it is in. // FIXME: Assumes little-endian. unsigned msb = BitScanReverse(str.integer); return msb / 8 + 1; } /** * Create a new mini_str with length 0. * @return newly created mini_str */ mini_str mini_str_new(void) { // Create string of all null bytes. mini_str str = {.integer = 0}; return str; } /** * Append str2 to the end of str1 and return the reult. * @param str1 first string * @param str2 second string * @return combined string */ mini_str mini_strcat(mini_str str1, mini_str str2) { // Shift str2 along by str1Len characters to move it into position. unsigned str1Len = mini_strlen(str1); str2.integer <<= str1Len * 8; // FIXME: Assumes little-endian. /* Write your code here */ return str1; } #define mini_str_to_c(mini_str) ((const char *) (mini_str).array) #define mini_str_to_cNoConst(mini_str) ((char *) (mini_str).array) /** * Create a mini_str from a standard C character array. * @param cstr Null-terminated C-string to use as input * @return newly created mini_str */ mini_str mini_str_from_c(const char *cstr) { // Create empty string. mini_str mini_str = mini_str_new(); // Copy string. strncpy(mini_str_to_cNoConst(mini_str), cstr, 7); return mini_str; } int main(int argc, char **argv) { mini_str all = mini_str_from_c("All "); mini_str red = mini_str_from_c("red"); mini_str cat = mini_strcat(all, red); printf("%s\n", mini_str_to_c(cat)); return 0; } ``` 這裡的 `__builtin_clzll` 是 [GCC builtin function](https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html)，作用是 [bit scan](https://www.chessprogramming.org/BitScan)，程式預期輸出為: ``` All red ``` 你應該要實作 `calc` 函式中標註 `/* Write your code here */` 之後的程式碼。除了撰寫程式，你應該提供對應的程式碼註解。注意: 實作的程式碼不能有任何邏輯條件判斷 (如 `if`, `else`, `?`) 或迴圈 (如 `for`, `while`, `goto`, `switch`, `case`, `break`, `continue`)，也不能用 `>=`, `>`, `<`, `<=`, `-` 等運算子。 :::success 延伸問題: 1. 指出這樣針對短字串的最佳化效益，並嘗試量化; 2. 什麼樣的情境會出現大量的短字串？請舉例並分析; 3. 程式碼該如何修改，才能適用 big/little-endian 呢？ 4. 考慮到現代的處理器架構支援 [SIMD](https://en.wikipedia.org/wiki/SIMD)，得以一次處理 128-bit, 256-bit, 甚至是 512-bit，請評估這樣最佳化策略的可用性，應當有對應的實驗; ::: ## 測驗 `5` [population count](https://en.wikichip.org/wiki/population_count) 簡稱 popcount 或叫 sideways sum，是計算數值的二進位表示中，有多少位元是 `1`，在一些場合下很有用，例如計算 0-1 稀疏矩陣 (sparse matrix)或 bit array 中非 `0` 元素個數、計算兩個字串的 [Hamming distance](https://en.wikipedia.org/wiki/Hamming_weight)。Intel 在 2008 年 11 月 Nehalem 架構的處理器 Core i7 引入 SSE4.2 指令集，其中就有 `CRC32` 和 `POPCNT` 指令，`POPCNT` 可處理 16-bit, 32-bit, 64-bit 整數。 GCC 提供對應的 builtin function: ${\_\_builtin\_popcount}(x)$: `x` 總共有幾個 `1`。使用示範: ```cpp int x = 5328; // 00000000000000000001010011010000 printf("%d\n", __builtin_popcount(x)); // 5 ``` 以下是個存在實作缺陷的版本: ```cpp int popcnt_naive(int n) { int count = 0; while (n) { if (n & 1) ++count; n = n >> 1; } return count; } ``` 呼叫 `popcnt_naive(-1)` 時，會造成無窮迴圈，請指出錯誤所在，並且重寫為正確的版本。 ### 解題在 C 語言當中，有號數做右移會採用算數位移，因此符號會被保留。所以這個程式碼在傳入負數的情況下， `n` 最後會維持在 -1 的狀態，永遠無法跳出迴圈。簡單的修改法，只要將 while 迴圈改為固定次數的 for 迴圈即可 ```cpp int popcnt_naive(int n) { int count = 0, i; for(i = 0; i < 32; ++i){ if (n & 1) ++count; n = n >> 1; } return count; } ``` ## 測驗 `6` 延伸測驗 `5`，實作 branchless 的 `popcnt` 並附上對應的程式原理解說。 :::success 延伸問題: 1. 指出 `popcnt` 的應用場景; 2. 在 Linux 核心程式碼找出具體用法並解析; ::: ### 解題 branchless 的版本為： ```cpp int popcnt_naive(int n) { unsigned cnt = 0; cnt += (n >> 0) & 1; cnt += (n >> 1) & 1; // ...（以此類推） cnt += (n >> 31) & 1; return cnt; } ``` 但這樣的運算步數其實與使用固定次數的 for 迴圈是一樣的（雖然都是 $O(1)$ 複雜度）。 ### 延伸問題 ## 測驗 `7` 考慮到以下程式 (`alloc.c`) 是 [aligned_alloc](https://linux.die.net/man/3/posix_memalign) 的一種簡易實作: ```cpp #include <stdlib.h> // Number of bytes used for storing the aligned pointer offset. // up to 64KB alignment, a size which is already unlikely to be // used for alignment. typedef uint16_t offset_t; #define PTR_OFFSET_SIZE sizeof(offset_t) #define align_up(num, align) \ (((num) + ((align) - 1)) & ~((align) - 1)) void *aligned_malloc(size_t align, size_t size) { void *ptr = NULL; // size must be a power of two. if (!((align & (align - 1)) == 0)) return ptr; if (align && size) { // allocate extra bytes to meet the alignment uint32_t header_size = PTR_OFFSET_SIZE + (align - 1); void *p = malloc(size + header_size); /* Write your code here */ } return ptr; } ``` 其作用是配置針對 `align` 個 bytes 對齊的記憶體空間，可對照閱讀 [Introduction & Allocators](http://stevenlr.com/posts/handmade-rust-1-allocators/) 以掌握原理。你應該要實作 `aligned_malloc` 函式中標註 `/* Write your code here */` 之後的程式碼。除了撰寫程式，你應該提供對應的程式碼註解。注意: 輸入的 `align` 應該要是 2^N^ (power of 2)，否則就回傳 `NULL`。 :::success 延伸問題: 1. 解釋程式運作的原理和推敲背後的思路; 2. 在開放原始碼的專案中，找尋類似的程式碼，解說並量化具體效益; ::: ### 解題 ```cpp #include <stdlib.h> // 定義 offest_t 型態為無號， 2 byte 長的整數 typedef uint16_t offset_t; // 定義 offset 佔的位元組數，用以在配置空間時計算標頭大小 #define PTR_OFFSET_SIZE sizeof(offset_t) // 將 num 往上對齊成 align 的倍數的函數 #define align_up(num, align) \ (((num) + ((align) - 1)) & ~((align) - 1)) void *aligned_malloc(size_t align, size_t size) { void *ptr = NULL; // size must be a power of two. if (!((align & (align - 1)) == 0)) return ptr; if (align && size) { // allocate extra bytes to meet the alignment uint32_t header_size = PTR_OFFSET_SIZE + (align - 1); void *p = malloc(size + header_size); /* Write your code here */ } return ptr; } ``` ### 延伸問題 ## 測驗 `8` 延伸測驗 `7`，實作 `aligned_free`，其原型宣告如下: ```cpp void aligned_free(void *ptr); ``` 除了撰寫程式，你應該提供對應的程式碼註解。 ## 測驗 `9` 考慮以下 64-bit GCD (greatest common divisor, 最大公因數) 求值函式: ```cpp #include <stdint.h> uint64_t gcd64(uint64_t u, uint64_t v) { if (!u || !v) return u | v; while (v) { uint64_t t = v; v = u % v; u = t; } return u; } ``` 改寫為以下等價實作: ```cpp #include <stdint.h> uint64_t gcd64(uint64_t u, uint64_t v) { if (!u || !v) return u | v; int shift; for (shift = 0; !((u | v) & 1); shift++) { u /= 2, v /= 2; } while (!(u & 1)) u /= 2; do { while (!(v & 1)) v /= 2; if (u < v) { v -= u; } else { uint64_t t = u - v; u = v; v = t; } } while (/* Write your code here */); return /* Write your code here */; } ``` 補完以上程式碼，即標注 `/* Write your code here */` 的部分，需要抄寫 `while` 和 `return` 所在的程式碼。 ### 解題 ```cpp #include <stdint.h> uint64_t gcd64(uint64_t u, uint64_t v) { // 如果有一方為 0，直接 return 非 0 的那個 if (!u || !v) return u | v; // 取出 u 和 v 當中共同都有的 power of 2 的因數 int shift; for (shift = 0; !((u | v) & 1); shift++) { u /= 2, v /= 2; } // 加速版輾轉相除法 // 輾轉相除法的概念就是用大的數字扣小的數字， // 如果兩數有共同因數，則用大的扣小的扣完的結果 // 一定也會有他們共同的因數在。 // 除以 2 有點類似把紙對折之後只要剪一刀就能剪下兩塊 // 一樣的概念，是減少計算步數的優化方法 while (!(u & 1)) u /= 2; do { while (!(v & 1)) v /= 2; if (u < v) { v -= u; } else { uint64_t t = u - v; u = v; v = t; } } while (v > 0); // 直到其中一者為 0，非 0 者就是最大公因數 // shift 記錄的是 u 和 v 共同因數中 // 2 ^ N 的 N，所以最後必須將 // while 迴圈處理完的結果乘上 2 ^ N， // 也就是 1 << shift return （1 << shift） * u; } ``` ## 測驗 `10` 承測驗 `9`, 透過 gcc 內建的 [\_\_builtin_ctz](https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html) (Returns the number of trailing 0-bits in x, starting at the least significant bit position) 改寫程式碼如下: ```clike #include <stdint.h> uint64_t gcd64(uint64_t u, uint64_t v) { if (!u || !v) return u | v; int shift = __builtin_ctzll(/* Write your code here */); u >>= __builtin_ctzll(u); while (v) { v >>= __builtin_ctzll(v); if (u < v) { /* Write your code here */ } else { uint64_t t = u - v; u = v, v = t; } } return /* Write your code here */; } ``` 請補完程式碼，作答時需要一併包含原本函式內容。除了撰寫程式，你應該提供對應的程式碼註解。 :::success 延伸問題: 解釋上述程式程式運作原理，以及在 x86_64 上透過 [\_\_builtin_ctz](https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html) 改寫 GCD 對效能的提升 ::: ### 解題 ```clike #include <stdint.h> uint64_t gcd64(uint64_t u, uint64_t v) { // 如果有一方為 0，直接 return 非 0 的那個 if (!u || !v) return u | v; // 取出 u 和 v 當中共同都有的 power of 2 的因數 // 如果 u 和 v 有 2 的因數的話，每多一個 2， // 二進位表示法就會多一個 0 在尾巴， // u | v 的 trailing zeros 會是 u 和 v 中 // 擁有較少個 2 的人的數量，也就是公因數的部份 int shift = __builtin_ctzll(u | v); // 加速版輾轉相除法（解釋同上題） u >>= __builtin_ctzll(u); while (v) { v >>= __builtin_ctzll(v); if (u < v) { v -= u; } else { uint64_t t = u - v; u = v, v = t; } } return （1 << shift） * u; } ``` ### 延伸問題 ## 測驗 `11` 考慮到 [memcmp](http://man7.org/linux/man-pages/man3/memcmp.3.html) 一種實作如下: (行為和 ISO C90 有出入) ```cpp #include <stdint.h> #include <stddef.h> int memcmp(const uint8_t *m1, const uint8_t *m1, size_t n) { for (size_t i = 0; i < n; ++i ) { int diff = m1[i] - m2[i]; if (diff != 0) return (diff < 0) ? -1 : +1; } return 0; } ``` 我們可能因此承受 [information leakage](https://en.wikipedia.org/wiki/Information_leakage) 的風險，於是著手避免使用 conditional branching 一類的指令，從而避免 [side-channel attack](https://en.wikipedia.org/wiki/Side-channel_attack)。為了避免 conditional branch 指令的出現，我們可將 `(res > 0) - (res < 0)` 替換為 `((res - 1) >> 8) + (res >> 8) + 1`。隨後我們實作下方功能等價但避免 branch 的 ` cst_memcmp`: ```cpp #include <stdint.h> #include <stddef.h> int cst_memcmp(const void *m1, const void *m2, size_t n) { const uint8_t *pm1 = (const uint8_t *) m1 + n; const uint8_t *pm2 = (const uint8_t *) m2 + n; int res = 0; if (n) { do { int diff = *--pm1 - *--pm2; /* Write your code here */ } while (pm1 != m1); } return ((res - 1) >> 8) + (res >> 8) + 1; } ``` 注意: 在 Linux 核心內部的實作方式可見: * [[PATCH] crypto_memcmp: add constant-time memcmp](https://www.spinics.net/lists/linux-crypto/msg09542.html) * [Re: [PATCH] crypto_memcmp: add constant-time memcmp](https://www.spinics.net/lists/linux-crypto/msg09551.html) 請補完程式碼，作答時需要一併包含原本函式內容。除了撰寫程式，你應該提供對應的程式碼註解。注意: 在 `/* Write your code here */` 所在的程式碼作用區域 (scope) 中，不得存任何邏輯條件判斷 (如 `if`, `else`, `?`) 或迴圈 (如 `for`, `while`, `goto`, `switch`, `case`, `break`, `continue`) :::success 延伸問題: 1. 解釋上述程式的原理，需要從機率統計的觀點分析; * 為何不能用事先計算好的表格呢？ (提示: cache 的影響) * 如何驗證程式正確以及 constant-time 呢？ 2. 在 Linux 核心找到這類 constant-time 的操作程式碼，予以解說和設計實驗; ::: ### 解題 ## 測驗 `12` 給定一個 [circular linked list](https://en.wikipedia.org/wiki/Linked_list#Circular_linked_list) 實作如下: (檔案 `list.h`) ```cpp typedef struct __list_t { struct __list_t *prev, *next; } list_t; /* * Initialize a list to empty. Because these are circular lists, an "empty" * list is an entry where both links point to itself. This makes insertion * and removal simpler because they do not need any branches. */ static inline void list_init(list_t *list) { list->prev = list; list->next = list; } /* * Append the provided entry to the end of the list. This assumes the entry * is not in a list already because it overwrites the linked list pointers. */ static inline void list_push(list_t *list, list_t *entry) { list_t *prev = list->prev; entry->prev = prev; entry->next = list; prev->next = entry; list->prev = entry; } /* * Remove the provided entry from whichever list it is currently in. This * assumes that the entry is in a list. You do not need to provide the list * because the lists are circular, so the list's pointers will automatically * be updated if the first or last entries are removed. */ static inline void list_remove(list_t *entry) { list_t *prev = entry->prev; list_t *next = entry->next; prev->next = next; next->prev = prev; } /* * Remove and return the first entry in the list or NULL if the list is empty. */ static inline list_t *list_pop(list_t *list) { /* Write your code here */ } ``` 請依循程式註解的描述，參照 `list_push`, 實作可正確運作的 `list_pop`。作答時需要一併包含原本函式內容。除了撰寫程式，你應該提供對應的程式碼註解。注意: 應善用 `list_remove` 和已實作的函式。 :::success 延伸問題: 1. 解釋上述程式的原理和技巧; 2. 在 Linux 核心找到這類的操作程式碼; ::: ### 解題 ```clike /* * Remove and return the first entry in the list * or NULL if the list is empty. */ static inline list_t *list_pop(list_t *list) { // 如果串列為空指標（list == NULL）或為空 (empty) // 則不做任何事情，直接回傳 NULL if(!list || list->prev == list->next) return NULL; // 若不滿足以上退出條件，表示串列中至少包含兩個元素 // 取出第一個（list 指向的地方是第一個），保存其位址 list_t *tmp = list; // 用 list_remove() 把第一個元素從串列中去除 list_remove(list); // 回傳保存下來的位址 return tmp; } ``` ### 延伸問題 ## 測驗 `13` 考慮一個 [bit array](https://en.wikipedia.org/wiki/Bit_array) 的實作 (`bit-array.c`) 如下: ```cpp #include <errno.h> #include <inttypes.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> #define MAX(a, b) (((a) >= (b)) ? (a) : (b)) #define trailing_zeros(x) \ ((x) ? (__typeof(x)) __builtin_ctzll(x) : (__typeof(x)) sizeof(x) * 8) #define leading_zeros(x) \ ((x) ? (__typeof(x)) __builtin_clzll(x) : (__typeof(x)) sizeof(x) * 8) // 64 bit words typedef uint64_t word_t, word_addr_t, bit_index_t; typedef struct { word_t *words; bit_index_t num_of_bits; // Number of words used -- this is just round_up(num_of_bits / 64) // if num_of_bits == 0, this is 0 word_addr_t num_of_words; // For more efficient allocation we use realloc only to double size -- // not for adding every word. Initial size is INIT_CAPACITY_WORDS. word_addr_t capacity_in_words; } BIT_ARRAY; #define roundup_bits2words64(bits) (((bits) + 63) / 64) // Round a number up to the nearest number that is a power of two #define roundup2pow(x) (1UL << (64 - leading_zeros(x))) // Bit array (bitset) #define _TYPESHIFT(arr, word, shift) \ ((__typeof(*(arr)))((__typeof(*(arr)))(word) << (shift))) #define bitsetX_wrd(wrdbits, pos) ((pos) / (wrdbits)) #define bitsetX_idx(wrdbits, pos) ((pos) % (wrdbits)) #define bitset_wrd(arr, pos) bitsetX_wrd(sizeof(*(arr)) * 8, pos) #define bitset_idx(arr, pos) bitsetX_idx(sizeof(*(arr)) * 8, pos) #define bitset64_wrd(pos) ((pos) >> 6) #define bitset64_idx(pos) ((pos) &63) #define bitmask(nbits, type) \ ((nbits) ? ~(type) 0 >> (sizeof(type) * 8 - (nbits)) : (type) 0) #define bitmask32(nbits) bitmask(nbits, uint32_t) #define bitmask64(nbits) bitmask(nbits, uint64_t) #define bitset2_get(arr, wrd, idx) (((arr)[wrd] >> (idx)) & 0x1) #define bitset2_set(arr, wrd, idx) ((arr)[wrd] |= _TYPESHIFT(arr, 1, idx)) #define bitset_op(func, arr, pos) \ func(arr, bitset_wrd(arr, pos), bitset_idx(arr, pos)) #define bitset_op2(func, arr, pos, bit) \ func(arr, bitset_wrd(arr, pos), bitset_idx(arr, pos), bit) #define bitset_get(arr, pos) bitset_op(bitset2_get, arr, pos) #define bitset_set(arr, pos) bitset_op(bitset2_set, arr, pos) #define bit_array_get(arr, i) bitset_get((arr)->words, i) #define bit_array_set(arr, i) bitset_set((arr)->words, i) #define POPCOUNT(x) (unsigned) __builtin_popcountll(x) // word of all 1s #define WORD_MAX (~(word_t) 0) // If cannot allocate memory, set errno to ENOMEM, return NULL BIT_ARRAY *bit_array_alloc(BIT_ARRAY *bitarr, bit_index_t nbits) { bitarr->num_of_bits = nbits; bitarr->num_of_words = roundup_bits2words64(nbits); bitarr->capacity_in_words = MAX(8, roundup2pow(bitarr->num_of_words)); bitarr->words = (word_t *) calloc(bitarr->capacity_in_words, sizeof(word_t)); if (bitarr->words == NULL) { errno = ENOMEM; return NULL; } return bitarr; } // If cannot allocate memory, set errno to ENOMEM, return NULL BIT_ARRAY *bit_array_create(bit_index_t nbits) { BIT_ARRAY *bitarr = (BIT_ARRAY *) malloc(sizeof(BIT_ARRAY)); if (bitarr == NULL || bit_array_alloc(bitarr, nbits) == NULL) { if (bitarr != NULL) free(bitarr); errno = ENOMEM; return NULL; } return bitarr; } // Print this array to a file stream. Prints '0's and '1'. Doesn't print // newline. void bit_array_print(const BIT_ARRAY *bitarr, FILE *fout) { for (bit_index_t i = 0; i < bitarr->num_of_bits; i++) fprintf(fout, "%c", bit_array_get(bitarr, i) ? '1' : '0'); } // set a bit (to 1) at position b void bit_array_set_bit(BIT_ARRAY *bitarr, bit_index_t b) { bit_array_set(bitarr, b); } // Set multiple bits at once. // e.g. set bits 1, 20 & 31: bit_array_set_bits(bitarr, 3, 1,20,31); void bit_array_set_bits(BIT_ARRAY *bitarr, size_t n, ...) { va_list argptr; va_start(argptr, n); for (size_t i = 0; i < n; i++) { unsigned int bit_index = va_arg(argptr, unsigned int); bit_array_set_bit(bitarr, bit_index); } va_end(argptr); } static word_t _next_permutation(word_t v) { // http://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation word_t t = v | (v - 1); // t gets v's least significant 0 bits set to 1 // Next set to 1 the most significant bit to change, // set to 0 the least significant ones, and add the necessary 1 bits. return (t + 1) | (((~t & (t + 1)) - 1) >> (trailing_zeros(v) + 1)); } // Get the next permutation of an array with a fixed size and given number of // bits set. Also known as next lexicographic permutation. // Given a bit array find the next lexicographic orginisation of the bits // Number of possible combinations given by (size choose bits_set) i.e. nCk // 00011 -> 00101 -> 00110 -> 01001 -> 01010 -> // 01100 -> 10001 -> 10010 -> 10100 -> 11000 -> 00011 (back to start) void bit_array_next_permutation(BIT_ARRAY *bitarr) { /* Write your code here */ } int main(void) { BIT_ARRAY *bitarr = bit_array_create(10); bit_array_print(bitarr, stdout); fputc('\n', stdout); bit_array_set_bits(bitarr, 3, 1, 2, 5); bit_array_print(bitarr, stdout); fputc('\n', stdout); return 0; } ``` 其中函式 `bit_array_next_permutation` 可將指定的 bit array 所有排列組合予以列舉 ($^nC_k$)，請依據程式碼註解，提供對應的實作，並且標注必要的註解。 :::success 延伸問題: 1. 解釋 [bit array](https://en.wikipedia.org/wiki/Bit_array) 的應用場景; 2. 在 Linux 核心找到這類的操作程式碼，並予以解釋及分析; 3. 提供改善 bit array 效率的機制並評估; :::