# 2023q1 Homework3 (fibdrv) contribute by < `siwon0619` > ## 開發環境 ```cpp $ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 $ lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Address sizes: 39 bits physical, 48 bits virtual Byte Order: Little Endian CPU(s): 12 On-line CPU(s) list: 0-11 Vendor ID: GenuineIntel Model name: Intel(R) Core(TM) i7-10710U CPU @ 1.10GHz CPU family: 6 Model: 166 Thread(s) per core: 2 Core(s) per socket: 6 Socket(s): 1 Stepping: 0 CPU max MHz: 4700.0000 CPU min MHz: 400.0000 ``` ## 前期準備 ```cpp $ uname -r 5.19.0-32-generic $ dpkg -L linux-headers-5.19.0-32-generic | grep "/lib/modules" /lib/modules /lib/modules/5.19.0-32-generic /lib/modules/5.19.0-32-generic/build $ whoami siwon0619 ``` ## 自我檢查清單 - [x] 研讀上述 ==Linux 效能分析的提示== 描述,在自己的實體電腦運作 GNU/Linux,做好必要的設定和準備工作 $\to$ 從中也該理解為何不希望在虛擬機器中進行實驗; - [x] 研讀上述費氏數列相關材料 (包含論文),摘錄關鍵手法,並思考 [clz / ctz](https://en.wikipedia.org/wiki/Find_first_set) 一類的指令對 Fibonacci 數運算的幫助。請列出關鍵程式碼並解說 - [ ] 複習 C 語言 [數值系統](https://hackmd.io/@sysprog/c-numerics) 和 [bitwise operation](https://hackmd.io/@sysprog/c-bitwise),思考 Fibonacci 數快速計算演算法的實作中如何減少乘法運算的成本; - [ ] 學習指出針對大數運算的加速運算和縮減記憶體操作成本的舉措,紀錄你的認知和疑惑 - [ ] 注意到 `fibdrv.c` 存在著 `DEFINE_MUTEX`, `mutex_trylock`, `mutex_init`, `mutex_unlock`, `mutex_destroy` 等字樣,什麼場景中會需要呢?撰寫多執行緒的 userspace 程式來測試,觀察 Linux 核心模組若沒用到 mutex,到底會發生什麼問題。嘗試撰寫使用 [POSIX Thread](https://en.wikipedia.org/wiki/POSIX_Threads) 的程式碼來確認。 $\to$ 搭配閱讀〈[並行和多執行緒程式設計](https://hackmd.io/@sysprog/concurrency)〉 ## 費氏數列 ```cpp static long long fib_sequence(long long k) { /* FIXME: C99 variable-length array (VLA) is not allowed in Linux kernel. */ long long f[k + 2]; f[0] = 0; f[1] = 1; for (int i = 2; i <= k; i++) { f[i] = f[i - 1] + f[i - 2]; } return f[k]; } ``` ### 修正 C99 variable-length array (VLA) is not allowed in Linux kernel ```cpp static long long fib_sequence(long long k) { if (k < 2) return k; long long f_k ; long long f_1 = 1, f_0= 0; for (int i = 2; i <= k; i++) { f_k = f_1 + f_0; f_0 = f_1; f_1 = f_k; } return f_k; } ``` ### 使用Fast Doubling 對應的虛擬碼如下: ```cpp Fast_Fib(n) a = 0; b = 1; // m = 0 for i = (number of binary digit in n) to 1 t1 = a*(2*b - a); t2 = b^2 + a^2; a = t1; b = t2; // m *= 2 if (current binary digit == 1) t1 = a + b; // m++ a = b; b = t1; return a; ``` 可以根據上述程式碼更改為 ```cpp static uint64_t fib_fastdoubling(long long k) { unsigned int h = 0; for (unsigned int i = n ; i ; ++h, i >>= 1); uint64_t a = 0; // F(0) = 0 uint64_t b = 1; // F(1) = 1 // There is only one `1` in the bits of `mask`. The `1`'s position is same as // the highest bit of n(mask = 2^(h-1) at first), and it will be shifted right // iteratively to do `AND` operation with `n` to check `n_j` is odd or even, // where n_j is defined below. for (unsigned int mask = 1 << (h - 1) ; mask ; mask >>= 1) { // Run h times! // Let j = h-i (looping from i = 1 to i = h), n_j = floor(n / 2^j) = n >> j // (n_j = n when j = 0), k = floor(n_j / 2), then a = F(k), b = F(k+1) now. uint64_t c = a * (2 * b - a); // F(2k) = F(k) * [ 2 * F(k+1) – F(k) ] uint64_t d = a * a + b * b; // F(2k+1) = F(k)^2 + F(k+1)^2 if (mask & n) { // n_j is odd: k = (n_j-1)/2 => n_j = 2k + 1 a = d; // F(n_j) = F(2k + 1) b = c + d; // F(n_j + 1) = F(2k + 2) = F(2k) + F(2k + 1) } else { // n_j is even: k = n_j/2 => n_j = 2k a = c; // F(n_j) = F(2k) b = d; // F(n_j + 1) = F(2k + 1) } } return a; } ```