2022q1 Homework4 (quiz4)

# 2022q1 Homework4 (quiz4) contributed by < `jim12312321` > > [測驗題目](https://hackmd.io/@sysprog/linux2022-quiz4) ## 測驗 1 ： ceil $log_2$ ```c= int ceil_log2(uint32_t x) { uint32_t r, shift; x--; r = (x > 0xFFFF) << 4; x >>= r; shift = (x > 0xFF) << 3; x >>= shift; r |= shift; shift = (x > 0xF) << 2; x >>= shift; r |= shift; shift = (x > 0x3) << 1; x >>= shift; return (EXP1) + 1; } ``` 解析：這題跟 [第三周測驗題第 7 題](https://hackmd.io/NJrI-98tT4i0B_Ri-BMXsw#%E6%B8%AC%E9%A9%97-7-%EF%BC%9A-ilog32)想法上大致一致，一樣是用 binary search，每次對半檢查，檢查當前 bits 中的上半部分是否有值，之後再位移 `x` 並將結果相加得到答案。 > 不要加上「的概念」，本質上就是 binary search! > :notes: jserv 本題在第 14 行時檢查到 `0x3` ，也就是剩最後兩個 bits ，經過位移後就回傳答案了，因此我們可知題目將以下步驟濃縮在 `(EXP1)+1` 中。 ```c= r |= shift; shift = (x > 0x1) << 0; //為了寫法一致，所以加上 `<< 0` r |= shift; ``` 其中第三行中的 `shift` 可以直接用 `(x > 1)` 取代，因此目前可以簡化成 $\downarrow$ ```c r |= (shift | (x > 1)); ``` 而又因為現在的 `x` 只剩 2 個 bits ，因此判斷是否大於 1 等價於右移 1 ，程式碼可以進一步簡化 $\downarrow$ ```c r |= (shift | (x >> 1)); ``` ` EXP1` $\rightarrow$ r | shift | x >> 1 ### 延伸問題 2 >改進程式碼，使其得以處理 x = 0 的狀況，並仍是 branchless ```c= int ceil_log2(u_int32_t x) { u_int32_t r, shift; /*if input 0,than x will be 0xFFFFFFFF*/ x--; r = (x > 0xFFFF) << 4; x >>= r; /* let x still be 0xFFFFFFFF if input 0*/ x <<= (x == 0xFFFF) << 4; x += (0xFFFF & -(x == 0xFFFF0000)); shift = (x > 0xFF) << 3; x >>= shift; r |= shift; /* let x still be 0xFFFFFFFF if input 0*/ x <<= (x == 0xFFFFFF) << 3; x += (0xFF & -(x == 0xFFFFFF00)); shift = (x > 0xF) << 2; x >>= shift; r |= shift; /* let x still be 0xFFFFFFFF if input 0*/ x <<= (x == 0xFFFFFFF) << 2; x += (0xF & -(x == 0xFFFFFFF0)); shift = (x > 0x3) << 1; x >>= shift; r |= shift; /* let x still be 0xFFFFFFFF if input 0*/ x <<= (x == 0x3FFFFFFF) << 1; x += (0x3 & -(x == 0xFFFFFFFC)); r |= shift; shift = (x > 0x1) << 0; x >>= 1; r |= shift; /* let x still be 0xFFFFFFFF if input 0*/ x <<= (x == 0x7FFFFFFF) << 1; x += (0x1 & -(x == 0xFFFFFFFE)); return (r | x )+1; } ``` 主要想法就是針對輸入 0 這個特例進行處理。一開始的 `x--`會使值從 0 變為 0xFFFFFFFF ，之後每次位移後都將值補回去讓其隨時都為 0xFFFFFFFF ，最後 0xFFFFFFFF +1 就會等於 0。 :::warning 思考如何藉由巨集來簡化程式碼，注意 bitmask/generator 的設計 :notes: jserv ::: --- ## 測驗 2 ： find first set ```c= #define BITS_PER_BYTE 8 #define BITS_PER_LONG (sizeof(unsigned long) * BITS_PER_BYTE) #include <stddef.h> static inline size_t ffs(unsigned long x) { if (x == 0) return 0; size_t o = 1; unsigned long t = ~0UL; size_t shift = BITS_PER_LONG; shift >>= 1; t >>= shift; while (shift) { if ((EXP2) == 0) { x >>= shift; EXP3; } shift >>= 1; t >>= shift; } return o; } ``` 解析：這題一樣是利用 binary search ，每次對半檢查 `x` 中的下半部 (e.g. 假設現在 `x` 為 16 bits ，那下半部就是第 0 到 7 個 bits) 是否有值，若有值則表示應該繼續找下去，若沒有值則表示 first set bit 在目前的上半部，因此要將當前字串右移並累加結果。 `EXP2` $\rightarrow$ x & t `EXP3` $\rightarrow$ o += shift ### 延伸問題 1 > 迴避 while, for, goto 等關鍵字以改寫出功能等價的實作 naïve 版本：因為本質上是 binary search ，而對於 unsigned long (64 bits in LP64) 中最多就是重複 6 次，直接展開即可。 ```c= static inline size_t ffs(unsigned long x) { if (x == 0) return 0; size_t o = 1; unsigned long t = ~0UL; size_t shift = BITS_PER_LONG; shift >>= 1; t >>= shift; if ((x & t) == 0) { x >>= shift; o += shift; } shift >>= 1; t >>= shift; if ((x & t) == 0) { x >>= shift; o += shift; } shift >>= 1; t >>= shift; if ((x & t) == 0) { x >>= shift; o += shift; } shift >>= 1; t >>= shift; if ((x & t) == 0) { x >>= shift; o += shift; } shift >>= 1; t >>= shift; if ((x & t) == 0) { x >>= shift; o += shift; } shift >>= 1; t >>= shift; if ((x & t) == 0) { x >>= shift; o += shift; } return o; } ``` 遞迴版本：除了比較簡短之外，對應不同型態的數值也不用重新思考要對切幾次，可以直接修改 `BITS_PER_LONG`, `t`, `x` 的資料型態即可，另外要特別注意，因為希望在遞迴執行時 `shift`, `t`, `o` 不用特別傳入且始終為同一個，因此變數宣告時要加上 `static` 修飾詞。 ```c= static inline size_t ffs(unsigned long x) { if (x == 0) return 0; static size_t o = 1; static unsigned long t = ~0UL; static size_t shift = BITS_PER_LONG; shift >>= 1; t >>= shift; if ((x & t) == 0) { x >>= shift; o += shift; } if(shift){ ffs(x); }else{ return o; } } ``` 暫時想不到更精簡的寫法了。 ### 延伸問題 2 >在 Linux 核心原始程式碼的 include/asm-generic/bitops 目錄找出相關程式碼，並探討應用案例 > 在 [include/asm-generic/bitops](https://github.com/torvalds/linux/tree/master/include/asm-generic/bitops) 中可以看到許多相關實現方法，如： [__ffs](https://github.com/torvalds/linux/blob/master/include/asm-generic/bitops/__ffs.h), [__fls](https://github.com/torvalds/linux/blob/master/include/asm-generic/bitops/__fls.h), [ffs](https://github.com/torvalds/linux/blob/master/include/asm-generic/bitops/ffs.h), [fls](https://github.com/torvalds/linux/blob/master/include/asm-generic/bitops/fls.h) 等方法。我們來追蹤 [__ffs](https://github.com/torvalds/linux/blob/master/include/asm-generic/bitops/__ffs.h) 的應用情境，可以在 [drivers/clk/ti/clkt_dpll.c](https://github.com/torvalds/linux/blob/5bfc75d92efd494db37f5c4c173d3639d4772966/drivers/clk/ti/clkt_dpll.c#L181) 中的 `_omap2_dpll_is_in_bypass` 找到他。 ```c=174 /** * _omap2_dpll_is_in_bypass - check if DPLL is in bypass mode or not * @v: bitfield value of the DPLL enable * * Checks given DPLL enable bitfield to see whether the DPLL is in bypass * mode or not. Returns 1 if the DPLL is in bypass, 0 otherwise. */ static int _omap2_dpll_is_in_bypass(u32 v) { u8 mask, val; mask = ti_clk_get_features()->dpll_bypass_vals; /* * Each set bit in the mask corresponds to a bypass value equal * to the bitshift. Go through each set-bit in the mask and * compare against the given register value. */ while (mask) { val = __ffs(mask); mask ^= (1 << val); if (v == val) return 1; } return 0; } ``` 就像註解所說的，在這裡 `__ffs` 的目的就是為了從 mask 的高位 set bit 逐步走訪所有 set bit (而 `mask ^= (1 << val);` 就是為了消除找到的 first set bit) 並且判斷是否為 bypass mode 。 --- ## 測驗 3 ︰ a pointer of a pointer ```c struct foo_consumer { int (*handler)(struct foo_consumer *self, void *); struct foo_consumer *next; }; struct foo { struct foo_consumer *consumers; unsigned long flags; }; #include <stdbool.h> /* * For foo @foo, delete the consumer @fc. * Return true if the @fc is deleted sfccessfully * or return false. */ static bool consumer_del(struct foo *foo, struct foo_consumer *fc) { struct foo_consumer **con; bool ret = false; for (con = &foo->consumers; *con; EXP4) { if (*con == fc) { *con = EXP5; ret = true; break; } } return ret; } ``` 解析︰這題只是單純考驗指標的指標運用技巧，在 `comsumer_del` 中需要走訪 `foo->comsumers` 找到指定移除的結點 `fc` 。所以我們知道在 `EXP4` 中需要讓 `con` 繼續走訪 linked list 並在 `EXP5` 時移除找到的結點。 `EXP4` $\rightarrow$ con = &(*con)->next `EXP5` $\rightarrow$ fc->next 另外在延伸問題 1 中也要求以下問題 $\downarrow$ > 區分 `foo` 和 `foo_consumer` 的好處我認為這樣的用意主要是為了封裝，模組化比起大雜燴來得好維護，另一個好處則是增加程式易讀性，方便理解和維護。 --- ## 測驗 4 ︰ coroutine ```c= static void task_switch(struct list_head *tasklist) { jmp_buf env; if (!list_empty(tasklist)) { struct task *t = list_first_entry(tasklist, struct task, list); EXP6; memcpy(env, t->env, sizeof(jmp_buf)); free(t); longjmp(env, 1); } } ``` ```c= static void task_join(struct list_head *tasklist) { jmp_buf env; while (!list_empty(tasklist)) { struct task *t = list_first_entry(tasklist, struct task, list); EXP7; memcpy(env, t->env, sizeof(jmp_buf)); free(t); longjmp(env, 1); } } ``` 在 `task_switch` 和 `task_join` 中都需要將當前的 task 從 task list 中移除。 `EXP6` $\rightarrow$ list_del(&t->list) `EXP7` $\rightarrow$ list_del(&t->list) ```c= void task0(void *arg) { jmp_buf env; static int n; n = *(int *) arg; printf("Task 0: n = %d\n", n); if (setjmp(env) == 0) { task_add(&tasklist, env); EXP8; } for (int i = 0; i < n; i++) { if (setjmp(env) == 0) { task_add(&tasklist, env); task_switch(&tasklist); } printf("Task 0: resume\n"); } printf("Task 0: complete\n"); longjmp(sched, 1); } ``` ```c= void task1(void *arg) { jmp_buf env; static int n; n = *(int *) arg; printf("Task 1: n = %d\n", n); if (setjmp(env) == 0) { task_add(&tasklist, env); EXP9; } for (int i = 0; i < n; i++) { if (setjmp(env) == 0) { task_add(&tasklist, env); task_switch(&tasklist); } printf("Task 1: resume\n"); } printf("Task 1: complete\n"); longjmp(sched, 1); } ``` 在 `task0` 或 `task1` 中執行完 `task_add` 後需要返回到 `schedule` 中。 `EXP8` $\rightarrow$ longjmp(sched, 1) `EXP9` $\rightarrow$ longjmp(sched, 1) --- ## 測驗 5 ： READ_ONCE ```c= #define READ_ONCE(x) \ ({ \ union { \ typeof(x) __val; \ DECL0; \ } __u; \ __read_once_size(&(x), __u.__c, sizeof(x)); \ __u.__val; \ }) ``` union 有別於 struct 的特點在於 union 所佔的記憶體空間大小以 union 中占最大空間的成員，因此 `DECL0` 需要選擇所占空間最小的型態。 `DECL0` $\rightarrow$ char __c[1] ---