contributed by < LiChiiiii >
題目 - L04: fibdrv
開發環境
$ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 Copyright (C) 2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $ lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Address sizes: 39 bits physical, 48 bits virtual Byte Order: Little Endian CPU(s): 8 On-line CPU(s) list: 0-7 Vendor ID: GenuineIntel Model name: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz CPU family: 6 Model: 94 Thread(s) per core: 2 Core(s) per socket: 4 Socket(s): 1 Stepping: 3 CPU max MHz: 4000.0000 CPU min MHz: 800.0000 BogoMIPS: 6799.81
原本的程式碼是利用 dynamic programming 來計算,時間複雜度為
原本的費氏數列定義為:
經過矩陣的推導後,可得到對於奇數和偶數的不同算法:
根據這個公式,每次進入 recursive
的數字一定是前一個數字的一半,而且 recursive tree
不會有增長,每次呼叫只會重複呼叫自己一次而已。
使用 __builtin_clzll
來計算有多少 leading 0s。
遇到 0
: 求
遇到 1
: 求
static long long fib_sequence(long long k)
{
long long a = 0, b = 1;
long long temp1, temp2;
int num_bits = sizeof(k) * 8 - __builtin_clzll(k);
for (int i = num_bits; i >= 1; i--) {
temp1 = a * (2 * b - a);
temp2 = b * b + a * a;
a = temp1;
b = temp2;
// Check the i-th bit of k
if ((k >> (i - 1)) & 1) {
temp1 = a + b;
a = b;
b = temp1;
}
}
return a;
}
以
i | start | 3 | 2 | 1 | result |
---|---|---|---|---|---|
n | - | 110 | 110 | 110 | - |
F(n) | F(0) | F(0*1+1) = F(1) | F(2*1+1) = F(3) | F(2*3) = F(6) | F(6) |
a | 0 | 1 | 2 | 8 | 8 |
F(n) | F(1) | F(0*1+2) = F(2) | F(2*1+2) = F(4) | F(2*3+1) = F(7) | - |
b | 1 | 1 | 3 | 13 | - |
參考 L04: fibdrv
原始程式碼以 long long
來儲存費氏數列的計算結果,但是在 BigN
來儲存 128 位元的整數:
typedef struct BigN {
unsigned long long lower, upper;
}BigN;
其中 lower
表示低位的 64 位元, upper
表示高位的 64 位元。
BigN
結構體的運算,包含加法運算、減法運算、乘法運算、右移 1 bit、左移 1 bit。其中BigN的乘法運算使用左移右移和加法運算來實作,減少原乘法運算的成本。各自計算兩個 64 位元整數的和,若 res.lower < a.lower
則要進位 upper
。
BigN BigN_add(BigN a, BigN b) {
BigN res;
res.lower = a.lower + b.lower;
res.upper = a.upper + b.upper + (res.lower < a.lower);
return res;
}
各自計算兩個 64 位元整數的差,若 a.lower < b.lower
則要借位 upper
。
BigN BigN_sub(BigN a, BigN b) {
BigN res;
res.lower = a.lower - b.lower;
res.upper = a.upper - b.upper - (a.lower < b.lower);
return res;
}
使用了一個 while
循環來將 b
不斷右移,同時將 a
左移。在每次循環中,如果 b
的最低位是1,則將 a
加入到 res 中,最終得到 a
和 b
的乘積 res
。
BigN BigN_multi(BigN a,BigN b) {
BigN res = {0, 0};
while (b.lower || b.upper) {
if (b.lower & 1) {
res = BigN_add(res, a);
}
a = BigN_shift_left(a);
b = BigN_shift_right(b);
}
return res;
}
BigN BigN_shift_right(BigN a) {
BigN res;
res.upper = a.upper >> 1;
res.lower = (a.lower >> 1) | (a.upper << 63);
return res;
}
BigN BigN_shift_left(BigN a) {
struct BigN res;
res.lower = a.lower << 1;
res.upper = (a.upper << 1) | (a.lower >> 63);
return res;
}
最後利用上述自定義的結構和函數來更改程式碼:
static BigN fib_sequence(long long k)
{
BigN a = {0, 0};
BigN b = {1, 0};
// Get the number of binary digits in k
int num_bits = sizeof(k) * 8 - __builtin_clzll(k);
for (int i = num_bits; i >= 1; i--) {
BigN temp1 = BigN_multi(a , BigN_sub(BigN_shift_left(b), a));
BigN temp2 = BigN_add(BigN_multi(b,b) , BigN_multi(a,a));
a = temp1;
b = temp2;
// Check the i-th bit of k
if ((k >> (i - 1)) & 1) {
temp1 = BigN_add(a,b);
a = b;
b = temp1;
}
}
return a;
}
原始程式碼在 fibdrv.c 中的 fib_read()
回傳計算出的 Fibonacci 數,最初想法是將 fib_read()
回傳的 ssize_t
的型態更改成自定義的 BigN
傳給 client ,但是不能隨意更改 read ,且在使用者模式的位址空間配置一個 buffer 空間時,核心裝置驅動不能直接寫入該地址,因此使用到 copy_to_user ,將想傳給使用者模式 (即運作中的 client) 的值複製到到fib_read()
的 buf 參數後,client 端方可接收到此值並印出。
-- static BigN fib_sequence(long long k)
++ static long long fib_sequence(long long k, char *buf)
{
BigN a = {0, 0};
BigN b = {1, 0};
// Get the number of binary digits in k
int num_bits = sizeof(k) * 8 - __builtin_clzll(k);
for (int i = num_bits; i >= 1; i--) {
BigN temp1 = BigN_multi(a , BigN_sub(BigN_shift_left(b), a));
BigN temp2 = BigN_add(BigN_multi(b,b) , BigN_multi(a,a));
a = temp1;
b = temp2;
// Check the i-th bit of k
if ((k >> (i - 1)) & 1) {
temp1 = BigN_add(a,b);
a = b;
b = temp1;
}
}
++ int test = __copy_to_user(buf, &a , sizeof(struct BigN));
++ if (test) {
++ printk("The copy from kernel to user is fail.");
++ return -1;
++ }
-- return a;
++ return sizeof(a);
}
unsigned long long buf[2];
...
for (int i = 0; i <= offset; i++) {
lseek(fd, i, SEEK_SET);
sz = read(fd, buf, sizeof(buf));
printf("Reading from " FIB_DEV
" at offset %d, returned the sequence "
"%016llu , %016llu.\n",
i, buf[1],buf[0]);
}
上述程式碼修正過後發現 BigN_add()
中的 res.lower = a.lower + b.lower
發生了 overflow 。
BigN BigN_add(BigN a, BigN b) {
BigN res;
res.lower = a.lower + b.lower;
res.upper = a.upper + b.upper + (res.lower < a.lower);
return res;
}
我嘗試加上 (a.lower+b.lower) < a.lower
判斷低 64 位元在相加時是否發生 overflow ,若發生則宣告 128 位元的 temp
存放相加後的值,再將 temp & 0xFFFFFFFFFFFFFFFF
取出低 64 位元,並右移 64 位元取得進位的值(第 8 、 9 行)。
BigN BigN_add(struct BigN a, struct BigN b) {
BigN res = {0,0};
int carry = 0;
if((a.lower+b.lower) < a.lower)
{
unsigned __int128 temp = (unsigned __int128)a.lower + b.lower;
res.lower = temp & 0xFFFFFFFFFFFFFFFF;
carry = temp >> 64;
}
...
return res;
}
修正完的結果依然是錯誤的。
舉例來說, BigN_add()
希望可以得到 res.upper = 1
和 res.lower = 9740274219868223167
,但 19740274219868223167
在二進制取出的低 64 位元得到的會是 1293530146158671551
。
因此我嘗試將第 8 行改成使用 mod 來得到我想要的餘數,卻出現以下錯誤訊息:
ERROR: modpost: "__umodti3" [/home/lichi/Documents/fibdrv/fibdrv.ko] undefined!
ERROR: modpost: "__modti3" [/home/lichi/Documents/fibdrv/fibdrv.ko] undefined!
顯然必須避免使用 128 位無符號整數取模運算。
我嘗試將 upper 轉成 __int128 的型態並左移 64 bits(就是乘上
字串反轉
void reverse_str(char *str, int len) {
int start = 0;
int end = len - 1;
while (start < end) {
char temp = str[start];
str[start] = str[end];
str[end] = temp;
start++;
end--;
}
}
128 位元的整數轉成字串
char *uint128_to_string(__int128 num, char *buffer, int bufferSize) {
if (num == 0) {
buffer[0] = '0';
buffer[1] = '\0';
return buffer;
}
int i = 0;
while (num > 0) {
unsigned long remainder = num % 10;
buffer[i++] = '0' + remainder;
num /= 10;
}
buffer[i] = '\0';
reverse_str(buffer, i);
return buffer;
}
兩個字串相加
char *add_str(const char *str1, const char *str2) {
int len1 = strlen(str1);
int len2 = strlen(str2);
int maxLen = len1 > len2 ? len1 : len2;
int carry = 0;
char *result = (char *) malloc(maxLen + 2); // 分配足夠的空間來存儲結果,包括結束符 '\0'
if (result == NULL) {
return NULL;
}
result[maxLen + 1] = '\0';
for (int i = 0; i < maxLen; i++) {
int digit1 = i < len1 ? str1[len1 - 1 - i] - '0' : 0;
int digit2 = i < len2 ? str2[len2 - 1 - i] - '0' : 0;
int sum = digit1 + digit2 + carry;
result[maxLen - i] = (sum % 10) + '0';
carry = sum / 10;
}
if (carry) {
result[0] = carry + '0';
} else {
memmove(result, result + 1, maxLen); // 移除首位的 0,如果存在
result[maxLen] = '\0'; // 縮小結果字串的長度
}
return result;
}
static long long fib_sequence(long long k, char *buf)
{
...
char buf_1[41];
char buf_2[41];
char *lower = uint128_to_string((uint128_t)a.lower , buf_1, sizeof(buf_1));
char *upper = uint128_to_string((uint128_t)a.upper << 64 , buf_2, sizeof(buf_2));
char *result = add_str(upper, lower);
int len = strlen(result)+1;
int test = __copy_to_user(buf, result, len);
if (test) {
printk("The copy from kernel to user is fail.");
return -1;
}
return len;
}
更改結果成功印出正確數值。
Reading from /dev/fibonacci at offset 92, returned the sequence 7540113804746346429.
Reading from /dev/fibonacci at offset 93, returned the sequence 12200160415121876738.
Reading from /dev/fibonacci at offset 94, returned the sequence 19740274219868223167.
Reading from /dev/fibonacci at offset 95, returned the sequence 31940434634990099905.
Reading from /dev/fibonacci at offset 96, returned the sequence 51680708854858323072.
Reading from /dev/fibonacci at offset 97, returned the sequence 83621143489848422977.
Reading from /dev/fibonacci at offset 98, returned the sequence 135301852344706746049.
Reading from /dev/fibonacci at offset 99, returned the sequence 218922995834555169026.
Reading from /dev/fibonacci at offset 100, returned the sequence 354224848179261915075.
使用 ktime
在核心模組中測量執行時間,發現 fib_write
此暫時沒作用,因此在此函式實作並回傳 kernel space 的執行時間。
static ssize_t fib_write(struct file *file,
const char *buf,
size_t size,
loff_t *offset)
{
ktime_t start, kt;
start = ktime_get();
fib_sequence(*offset, buf);
kt = ktime_sub(ktime_get(), start);
return ktime_to_ns(kt);
}
使用 clock_gettime
計算在呼叫 fib_write
時所需的時間,可得到 user space 的執行時間。兩數相減即可得到 system call 的時間。
for (int i = 0; i <= offset; i++) {
struct timespec start, end;
clock_gettime(CLOCK_REALTIME, &start);
long kernel_time = write(fd, buf, 1);
clock_gettime(CLOCK_REALTIME, &end);
long user_time = end.tv_nsec - start.tv_nsec;
printf("%d ", i);
printf("user space execute time: %ld ns,", user_time);
printf("kernel space execute time : %ld ns,", kernel_time);
printf("system call execute time : %ld ns \n", user_time - kernel_time);
}
導入 driver.py 去除 95% 區間之外的值,可以把圖中的尖端消除掉。