# 2023q1 Homework3 (fibdrv)
contributed by < `yeiogy123` >
## 檢查 linux 核心版本與開發環境
```shell
$uname -r
5.15.0-60-generic
$gcc --version
gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
$lscpu
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
Address sizes: 39 bits physical, 48 bits virtual
CPU(s): 4
On-line CPU(s) list: 0-3
Thread(s) per core: 1
Core(s) per socket: 4
Socket(s): 1
NUMA node(s): 1
Vendor ID: GenuineIntel
CPU family: 6
Model: 165
Model name: Intel(R) Core(TM) i7-10870H CPU @ 2.20GHz
Stepping: 2
CPU MHz: 2208.000
BogoMIPS: 4416.00
Hypervisor vendor: KVM
Virtualization type: full
L1d cache: 128 KiB
L1i cache: 128 KiB
L2 cache: 1 MiB
L3 cache: 64 MiB
```
## 開發紀錄
### Fast Doubling
$F(2k) = F(k) * [ 2 * F(k+1) - F(k)]$
$F(2k+1) = F(k+1)^2 + F(k)^2$
### 將 Fast Doubling 實作
因為 $logF(93) \approx 63.4$,
當我們要計算第94個費式數列的數值時,
會造成 overflow ,
因此我們需要研究大數字運算。
在這邊我實作使用 `big_num` 的資料結構
```c
typedef struct _big_num{
unsigned int *num; /*儲存數值*/
unsigned int size; /*配置記憶體的大小*/
int sign; /*正負*/
}big_num;
```
而因為大數字沒有辦法藉由 pirntf 函式輸出,
我們在這邊需要將大數字轉換成 string 來輸出。
```c
/* input : big number which may be larger than 64 bytes
* output : string with pointer
* */
char *big_num_to_string(const big_num *src){
size_t len = (8 * sizeof(int) * src->size) / 3 + 2 + src->sign;
char *s = kmalloc(len, GFP_KERNEL);
char *p = s;
memset(s, '0', len - 1);
s[len - 1] = '\0';
/* src.number[0] contains least significant bits */
for (int i = src->size - 1; i >= 0; i--) {
/* walk through every bit of bn */
for (unsigned int d = 1U << 31; d; d >>= 1) {
/* binary -> decimal string */
int carry = !!(d & src->num[i]);
for (int j = len - 2; j >= 0; j--) {
s[j] += s[j] - '0' + carry;
carry = (s[j] > '9');
if (carry)
s[j] -= 10;
}
}
}
// skip leading zero
while (p[0] == '0' && p[1] != '\0') {
p++;
}
if (src->sign)
*(--p) = '-';
memmove(s, p, strlen(p) + 1);
return s;
}
```
在這邊使用 kmalloc 的原因是因為他能分配連續的記憶體空間,
並且藉由迴圈將每 32 bytes 處理一次,
計算其值相對於 character '0' 的差值,再加上進位,即可得到該位置的數字。
### `big_num_add`
```c
void big_num_add(const big_num *src1, const big_num *src2, big_num *target){
if(src1->sign == src2->sign) { // both positive or negative
big_num_do_add(src1, src2, target);
target->sign = src1->sign;
} else { // different sign
if (src1->sign) // let a > 0, b < 0
SWAP(src1, src2);
int cmp = big_num_cmp(src1, src2);
if (cmp > 0) {
/* |a| > |b| and b < 0, hence c = a - |b| */
big_num_do_sub(src1, src2, target);
target->sign = 0;
} else if (cmp < 0) {
/* |a| < |b| and b < 0, hence c = -(|b| - |a|) */
big_num_do_sub(src2, src1, target);
target->sign = 1;
} else {
/* |a| == |b| */
big_num_resize(target, 1);
target->num[0] = 0;
target->sign = 0;
}
}
}
```
### `big_num_sub`
```c
void big_num_sub(const big_num *src1, const big_num *src2, big_num *target){
big_num tmp = *src1;
tmp.sign ^= 1;
big_num_add(target, &tmp, &*src2);
}
```
因為 $src1 - src2 == src + -(src2)$,
因此我們可以藉由此法重複使用函式。
### `big_num_do_add`
```c
static void big_num_do_add(const big_num *a, const big_num *b, big_num *c)
{
// max digits = max(sizeof(a) + sizeof(b)) + 1
int d = MAX(big_num_msb(a), big_num_msb(b)) + 1;
d = DIV_ROUNDUP(d, 32) + !d;
big_num_resize(c, d); // round up, min size = 1
unsigned long long int carry = 0;
for (int i = 0; i < c->size; i++) {
unsigned int tmp1 = (i < a->size) ? a->num[i] : 0;
unsigned int tmp2 = (i < b->size) ? b->num[i] : 0;
carry += (unsigned long long int) tmp1 + tmp2;
c->num[i] = carry;
carry >>= 32;
}
if (!c->num[c->size - 1] && c->size > 1)
big_num_resize(c, c->size - 1);
}
```
* `MAX(big_num_msb(a), big_num_msb(b)) + 1` 是為了避免算術溢位,所以多加上 1 。
* `ROUNDUP` 的使用是為了向上取整數
* 再利用迴圈相加各個位數
### `big_num_do_sub`
```c
static void big_num_do_sub(const big_num *a, const big_num *b, big_num *c)
{
// max digits = max(sizeof(a) + sizeof(b))
int d = MAX(a->size, b->size);
big_num_resize(c, d);
long long int carry = 0;
for (int i = 0; i < c->size; i++) {
unsigned int tmp1 = (i < a->size) ? a->num[i] : 0;
unsigned int tmp2 = (i < b->size) ? b->num[i] : 0;
carry = (long long int) tmp1 - tmp2 - carry;
if (carry < 0) {
c->num[i] = carry + (1LL << 32);
carry = 1;
} else {
c->num[i] = carry;
carry = 0;
}
}
d = big_num_clz(c) / 32;
if (d == c->size)
--d;
big_num_resize(c, c->size - d);
}
```
* 因為不會有相加溢位的問題,所以不需要加1
* 需要額外處理借位問題
### `big_num_mul`
```c
void big_num_mul(const big_num *src1, const big_num *src2, big_num *target){
int d = big_num_msb(src1) + big_num_msb(src2);
d = DIV_ROUNDUP(d, 32) + !d; // round up, min size = 1
big_num *tmp;
if (target == src1 || target == src2) {
tmp = target; // save c
target = big_num_allocate(d);
} else {
tmp = NULL;
for (int i = 0; i < target->size; i++)
target->num[i] = 0;
big_num_resize(target, d);
}
for (int i = 0; i < src1->size; i++) {
for (int j = 0; j < src2->size; j++) {
unsigned long long int carry = 0;
carry = (unsigned long long int) src1->num[i] * src2->num[j];
big_num_mult_add(target, i + j, carry);
}
}
target->sign = src1->sign ^ src2->sign;
if (tmp) {
big_num_swap(tmp, target); // restore c
big_num_free(target);
}
}
```
### `big_num_left_shift`
```c
void big_num_left_shift(big_num *src, size_t shift){
size_t z = big_num_clz(src);
shift %= 32;
if(!shift) return;
if(shift > z) big_num_resize(src, src->size + 1);
else big_num_resize(src, src->size);
for(int i = src->size - 1; i > 0; i--)
src->num[i] = src->num[i] << shift | src->num[i - 1] >> (32 - shift);
src->num[0] <<= shift;
}
```
### `big_num_clz`
```c
static int big_num_clz(const big_num *src){
int cnt = 0;
for (int i = src->size - 1; i >= 0; i--) {
if (src->num[i]) {
cnt += __builtin_clz(src->num[i]);
return cnt;
} else {
cnt += 32;
}
}
return cnt;
}
```
## 改寫後的斐波那契數的遞迴
### `big_num_fib_fast_doubling`
```c
void big_num_fib_fast_doubling(big_num *dest, unsigned int n){
big_num_resize(dest, 1);
if (n < 2) {
dest->num[0] = n;
return;
}
big_num *f1 = dest, *f2 = big_num_allocate(1);
f1->num[0] = 0;
f2->num[0] = 1;
big_num *t1 = big_num_allocate(1), *t2 = big_num_allocate(1);
for (unsigned int i = 1U << 31; i; i >>= 1) {
/* F(2t) = F(t) * [ 2 * F(t+1) – F(t) ] */
big_num_cpy(t1, f2);
big_num_left_shift(t1, 1);
big_num_sub(t1, f1, t1);
big_num_mul(t1, f1, t1);
/* F(2k+1) = F(k)^2 + F(k+1)^2 */
big_num_mul(f1, f1, f1);
big_num_mul(f2, f2, f2);
big_num_add(t2, f1, f2);
if (n & i) {
big_num_cpy(f1, t2);
big_num_cpy(f2, t1);
big_num_add(f2, t2, f2);
} else {
big_num_cpy(f1, t1);
big_num_cpy(f2, t2);
}
}
big_num_free(f2);
big_num_free(t1);
big_num_free(t2);
}
```
### 時間測量分析