# linux2021: Hao-yu-lin ###### tags: `linux2021` ## 測驗 $\alpha$ ### 1. 舉出 Linux 核心原始程式碼裡頭 bit rotation 的案例並說明 在 [include/linux/prandom.h](https://github.com/torvalds/linux/blob/1dfb0f47aca11350f45f8c04c3b83f0e829adfa9/include/linux/prandom.h)提到 ```c= #if BITS_PER_LONG == 64 /* * The core SipHash round function. Each line can be executed in * parallel given enough CPU resources. */ #define PRND_SIPROUND(v0, v1, v2, v3) ( \ v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ ) #define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) #define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) #elif BITS_PER_LONG == 32 /* * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash. * This is weaker, but 32-bit machines are not used for high-traffic * applications, so there is less output for an attacker to analyze. */ #define PRND_SIPROUND(v0, v1, v2, v3) ( \ v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ ) ``` 看起來似乎是針對 v0 v1 v2 v3 四個變數進行一連串的計算 在 [lib/random32.c](https://github.com/torvalds/linux/blob/5bfc75d92efd494db37f5c4c173d3639d4772966/lib/random32.c)找到他的應用 ### 2. x86_64 指令集具備 rotr 和 rotl 指令,上述 C 程式碼經過編譯器最佳化 (例如使用 gcc) 後,能否運用到這二個指令呢? ``` gcc -o3 bitwise bitwise.c objdump -d bitwise > bitwise.s ``` 先用 -o3 最佳編譯方式編譯完成後,使用 objdumpz 反組譯獲得 assembly code 擷取重要的部分 重新編譯後,參考了 [維基百科](https://en.wikipedia.org/wiki/Circular_shift)的資訊, 程式碼更改如下,以 Left Circular shift 為例 1. rotla64 Code ```c= uint64_t rotla64 (uint64_t v, unsigned int c) { const int mask = CHAR_BIT * sizeof(v) - 1; c &= mask; return (v << c) | (v >> (-c & mask)); } ``` ##### Aseembly code ```clike = 0000000000001149 <rotla64>: 1149: f3 0f 1e fa endbr64 114d: 55 push %rbp 114e: 48 89 e5 mov %rsp,%rbp 1151: 48 89 7d e8 mov %rdi,-0x18(%rbp) 1155: 89 75 e4 mov %esi,-0x1c(%rbp) 1158: c7 45 fc 3f 00 00 00 movl $0x3f,-0x4(%rbp) 115f: 8b 45 fc mov -0x4(%rbp),%eax 1162: 21 45 e4 and %eax,-0x1c(%rbp) 1165: 8b 45 e4 mov -0x1c(%rbp),%eax 1168: 48 8b 55 e8 mov -0x18(%rbp),%rdx 116c: 48 89 d6 mov %rdx,%rsi 116f: 89 c1 mov %eax,%ecx 1171: 48 d3 e6 shl %cl,%rsi 1174: 8b 45 e4 mov -0x1c(%rbp),%eax 1177: f7 d8 neg %eax 1179: 89 c2 mov %eax,%edx 117b: 8b 45 fc mov -0x4(%rbp),%eax 117e: 21 d0 and %edx,%eax 1180: 48 8b 55 e8 mov -0x18(%rbp),%rdx 1184: 89 c1 mov %eax,%ecx 1186: 48 d3 ea shr %cl,%rdx 1189: 48 89 d0 mov %rdx,%rax 118c: 48 09 f0 or %rsi,%rax 118f: 5d pop %rbp 1190: c3 retq ``` 2. rotle64 Code ```c= uint64_t rotle64(uint64_t v, uint64_t c){ return (v << c) | (v >> (64 - c)); } ``` ##### Assembly code ```clike= 0000000000001191 <rotle64>: 1191: f3 0f 1e fa endbr64 1195: 55 push %rbp 1196: 48 89 e5 mov %rsp,%rbp 1199: 48 89 7d f8 mov %rdi,-0x8(%rbp) 119d: 48 89 75 f0 mov %rsi,-0x10(%rbp) 11a1: 48 8b 45 f0 mov -0x10(%rbp),%rax 11a5: 89 c2 mov %eax,%edx 11a7: 48 8b 45 f8 mov -0x8(%rbp),%rax 11ab: 89 d1 mov %edx,%ecx 11ad: 48 d3 c0 rol %cl,%rax 11b0: 5d pop %rbp 11b1: c3 retq ``` 3. rotli64 Code ```c= uint64_t rotli64 (uint64_t v, unsigned int c) { const int mask = CHAR_BIT * sizeof(v) - 1; c &= mask; return (v << c) | (v >> (32 - c)); } ``` ##### Assembly code ```clike= 00000000000011b2 <rotli64>: 11b2: f3 0f 1e fa endbr64 11b6: 55 push %rbp 11b7: 48 89 e5 mov %rsp,%rbp 11ba: 48 89 7d e8 mov %rdi,-0x18(%rbp) 11be: 89 75 e4 mov %esi,-0x1c(%rbp) 11c1: c7 45 fc 3f 00 00 00 movl $0x3f,-0x4(%rbp) 11c8: 8b 45 fc mov -0x4(%rbp),%eax 11cb: 21 45 e4 and %eax,-0x1c(%rbp) 11ce: 8b 45 e4 mov -0x1c(%rbp),%eax 11d1: 48 8b 55 e8 mov -0x18(%rbp),%rdx 11d5: 48 89 d6 mov %rdx,%rsi 11d8: 89 c1 mov %eax,%ecx 11da: 48 d3 e6 shl %cl,%rsi 11dd: b8 20 00 00 00 mov $0x20,%eax 11e2: 2b 45 e4 sub -0x1c(%rbp),%eax 11e5: 48 8b 55 e8 mov -0x18(%rbp),%rdx 11e9: 89 c1 mov %eax,%ecx 11eb: 48 d3 ea shr %cl,%rdx 11ee: 48 89 d0 mov %rdx,%rax 11f1: 48 09 f0 or %rsi,%rax 11f4: 5d pop %rbp 11f5: c3 retq ``` 4. rotlo32 Code ```c= uint32_t rotlo32 (uint32_t v, unsigned int c) { const int mask = CHAR_BIT * sizeof(v) - 1; c &= mask; return (v << c) | (v >> (32 - c)); } ``` ##### Assembly code ```clike= 00000000000011b2 <rotlo32>: 11b2: f3 0f 1e fa endbr64 11b6: 55 push %rbp 11b7: 48 89 e5 mov %rsp,%rbp 11ba: 89 7d ec mov %edi,-0x14(%rbp) 11bd: 89 75 e8 mov %esi,-0x18(%rbp) 11c0: c7 45 fc 1f 00 00 00 movl $0x1f,-0x4(%rbp) 11c7: 8b 45 fc mov -0x4(%rbp),%eax 11ca: 21 45 e8 and %eax,-0x18(%rbp) 11cd: 8b 45 e8 mov -0x18(%rbp),%eax 11d0: 8b 55 ec mov -0x14(%rbp),%edx 11d3: 89 c1 mov %eax,%ecx 11d5: d3 c2 rol %cl,%edx 11d7: 89 d0 mov %edx,%eax 11d9: 5d pop %rbp 11da: c3 retq ``` 由這四個例子可以看出,當使用 v >> (bits - c),才會使用到指令 rol,使用 -c & mask時,編譯器會按照程式執行順序按照步驟執行。 ## $\beta$ ### 1.說明上述程式碼的運作原理 - 由參考執行輸出,可知這樣的一個數學關係 \begin{cases} ( sz / alignment ) * alignment + alignment, & \text{if sz % alignment $\ne$ 0} \\ sz & \text{if sz % alignment $=$ 0} \end{cases} 說明程式碼的運作原理: ```cpp= #include <stdint.h> static inline uintptr_t align_up(uintptr_t sz, size_t alignment) { uintptr_t mask = alignment - 1; if ((alignment & mask) == 0) { /* power of two? */ return (sz + mask) & ~mask; } return (((sz + mask) / alignment) * alignment); } ``` - if ((alignment & mask) == 0) mask = alignment - 1,以及 if ((alignment & mask) == 0) 由此可知當 alignment 為 2的次方時( $2^0, 2^1, 2^2, 2^3....$) if 等式成立 - (((sz + mask) / alignment) * alignment) alignment 不為 2 的次方時,則執行 (((sz + mask) / alignment) * alignment) 以 alignment = 10 來解釋,即 sz + 9,只有個位數為 0 的十位數不會進位,其餘的皆會進位 十位數會加一,接著計算 / alignment 與 * alignment 來取得答案 - (sz + mask) & ~mask 2 的次方為 alignment時,舉例為 8 ( $2^3$ ),從 LSB 開始數前三個 bits,若不為 000,則第四個 bits 直接 + 1 ,否則直接捨去。 sz + mask 進行,若前三個 bits 不為 0,則 sz + mask 會使得第四個 bits + 1。 取 ~mask 時,即取得以 alignment 為起始點往高位數複製。以從 LSB 的開始數第四個 bits 開始往高位數複製,LSB 數起來前三個直接捨去。 ### 2. 在 linux 核心原始程式碼找出類似 align_up 的程式碼,並舉例說明其用法 在這份 [include/linux/kernel.h](https://github.com/torvalds/linux/blob/master/include/linux/kernel.h) 文件中,看到 kernel 核心文件中有 **#include <linux/align.h>**,進而去搜索 [include/linux/align.h](https://github.com/torvalds/linux/blob/master/include/linux/align.h) ```c= /* @a is a power of 2 value */ #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) #define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) #define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) ``` 而 __ALIGN_KERNEL 與 __ALIGN_KERNEL_MASK 在 [include/uapi/linux/const.h](https://github.com/torvalds/linux/blob/master/include/uapi/linux/const.h)看到 ```c= #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) ``` 這邊進行的程式碼使用方式,與 $\beta$ 中的程式碼,進行一樣的操作。 而 ALIGN_DOWN(x, a) 則是將 x 先扣掉 a - 1,但之後在 __ALIGN_KERNEL_MASK(x, mask) 中會補加回去,(x - (a-1) + (a-1)) & ~ (a-1) 相當於對 x & ~(a-1) ## $\gamma$ ### 解釋程式碼輸出 - 字元數量的原理 ```c = #include <stdio.h> #include <unistd.h> int main(void) { for (int i = 0; i < NNN; i++) { fork(); printf("-"); } fflush(stdout); return 0; } ``` 輸出 | i | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | |:---------:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:----:|:----:|:-----:|:-----:|:-----:| | printf 數 | 2 | 8 | 24 | 64 | 160 | 384 | 896 | 2048 | 4608 | 10240 | 22528 | 49152 | 當資料在 buffer 內沒有清除 ,則在 fork() 時會連同 buffer 內的資料("-")一同複製,因此最後每一個 process 都會有 NNN 個 "-",因此可以寫成這樣的一個公式:$2^{NNN} \times NNN$ ## $\epsilon$ ```c= #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/mman.h> #include <unistd.h> #define PAGE_SIZE 4096 /* FIXME: set at runtime */ typedef struct { int cnt; /* actual pool count */ int pal; /* pool array length (2^x ceil of cnt) */ int min_pool, max_pool; /* minimum/maximum pool size */ void **ps; /* pools */ int *sizes; /* chunk size for each pool */ void *hs[1]; /* heads for pools' free lists */ } mpool; static void *get_mmap(long sz) { void *p = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (p == MAP_FAILED) return NULL; return p; } /* Optimized base-2 integer ceiling, from _Hacker's Delight_ * by Henry S. Warren, pg. 48. Called 'clp2' there. * base-2 integer ceiling */ static unsigned int iceil2(unsigned int x) { x = x - 1; x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); return x+1; } /* mmap a new memory pool of TOTAL_SZ bytes, then build an internal * freelist of SZ-byte cells, with the head at (result)[0]. * Returns NULL on error. */ void **mpool_new_pool(unsigned int sz, unsigned int total_sz) { int o = 0; /* o=offset */ void *p = get_mmap(sz > total_sz ? sz : total_sz); if (!p) return NULL; int **pool = (int **) p; assert(pool); assert(sz > sizeof(void *)); void *last = NULL; int lim = (total_sz / sz); for (int i = 0; i < lim; i++) { if (last) assert(last == pool[o]); o = (i * sz) / sizeof(void *); pool[o] = (int *) &pool[o + (sz / sizeof(void *))]; last = pool[o]; } pool[o] = NULL; return p; } /* Add a new pool, resizing the pool array if necessary. */ static int add_pool(mpool *mp, void *p, int sz) { assert(p); assert(sz > 0); if (mp->cnt == mp->pal) { mp->pal *= 2; /* RAM will exhaust before overflow */ void **nps = realloc(mp->ps, mp->pal * sizeof(void **)); void *nsizes = realloc(mp->sizes, mp->pal * sizeof(int *)); if (!nps || !nsizes) return -1; mp->sizes = nsizes; mp->ps = nps; } mp->ps[mp->cnt] = p; mp->sizes[mp->cnt] = sz; mp->cnt++; return 0; } /* Initialize a memory pool for allocations between 2^min2 and 2^max2, * inclusive. Larger allocations will be directly allocated and freed * via mmap / munmap. * Returns NULL on error. */ mpool *mpool_init(int min2, int max2) { int cnt = max2 - min2 + 1; /* pool array count */ int palen = iceil2(cnt); assert(cnt > 0); mpool *mp = malloc(sizeof(mpool) + (cnt - 1) * sizeof(void *)); void *pools = malloc(palen * sizeof(void **)); int *sizes = malloc(palen * sizeof(int)); if (!mp || !pools || !sizes) return NULL; mp->cnt = cnt; mp->ps = pools; mp->pal = palen; mp->sizes = sizes; mp->min_pool = (1 << min2), mp->max_pool = (1 << max2); memset(sizes, 0, palen * sizeof(int)); memset(pools, 0, palen * sizeof(void *)); memset(mp->hs, 0, cnt * sizeof(void *)); return mp; } /* Free a memory pool set. */ void mpool_free(mpool *mp) { assert(mp); for (long i = 0; i < mp->cnt; i++) { void *p = mp->ps[i]; if (!p) continue; long sz = mp->sizes[i]; assert(sz > 0); if (sz < PAGE_SIZE) sz = PAGE_SIZE; if (munmap(mp->ps[i], sz) == -1) fprintf(stderr, "Failed to unmap %ld bytes at %p\n", sz, mp->ps[i]); } free(mp->ps); free(mp); } /* Allocate memory out of the relevant memory pool. * If larger than max_pool, just mmap it. If pool is full, mmap a new one and * link it to the end of the current one. * Returns NULL on error. */ void *mpool_alloc(mpool *mp, int sz) { void **cur; int i, p; assert(mp); if (sz >= mp->max_pool) { cur = get_mmap(sz); /* just mmap it */ if (!cur) return NULL; return cur; } long szceil = 0; for (i = 0, p = mp->min_pool;; i++, p *= 2) { if (p > sz) { szceil = p; break; } } assert(szceil > 0); cur = mp->hs[i]; /* get current head */ if (!cur) { /* lazily allocate and initialize pool */ void **pool = mpool_new_pool(szceil, PAGE_SIZE); if (!pool) return NULL; mp->ps[i] = mp->hs[i] = pool; mp->sizes[i] = szceil; cur = mp->hs[i]; } assert(cur); if (!(*cur)) { /* if at end, attach to a new page */ void **np = mpool_new_pool(szceil, PAGE_SIZE); if (!np) return NULL; *cur = &np[0]; assert(*cur); if (add_pool(mp, np, szceil) < 0) return NULL; } assert(*cur > (void *) PAGE_SIZE); mp->hs[i] = *cur; /* set head to next head */ return cur; } /* Push an individual pointer P back on the freelist for the pool with size * SZ cells. if SZ is > the max pool size, just munmap it. * Return pointer P (SZ bytes in size) to the appropriate pool. */ void mpool_repool(mpool *mp, void *p, int sz) { int i = 0; if (sz > mp->max_pool) { if (munmap(p, sz) == -1) fprintf(stderr, "Failed to unmap %d bytes at %p\n", sz, p); return; } void **ip = (void **) p; *ip = mp->hs[i]; assert(ip); mp->hs[i] = ip; } #define PMAX 11 int main(void) { /* Initialize a new mpool for values 2^4 to 2^PMAX */ mpool *mp = mpool_init(4, PMAX); srandom(getpid() ^ (intptr_t) &main); /* Assume ASLR */ for (int i = 0; i < 5000000; i++) { int sz = random() % 64; /* also alloc some larger chunks */ if (random() % 100 == 0) sz = random() % 10000; if (!sz) sz = 1; /* avoid zero allocations */ int *ip = (int *) mpool_alloc(mp, sz); *ip = 7; /* randomly repool some of them */ if (random() % 10 == 0) /* repool, known size */ mpool_repool(mp, ip, sz); if (i % 10000 == 0 && i > 0) { putchar('.'); if (i % 700000 == 0) putchar('\n'); } } mpool_free(mp); putchar('\n'); return 0; } ``` ### 1. 解釋上述程式碼運作原理 Memory 一次分配一個 Page 時( 執行 mpool_alloc ),加入至大小相同的 Memory Pool中,若分配的大小 > $2^{11}$ 時,會透過 MMAP 調整分配,由 測試資料來看 mpool_repool 似乎是隨機被呼叫的,當呼叫 mpool_repool 時,hs[1] 會被重新指向 free list ##### mpool_repool 在程式執行時 hs 始終指向 0x00000000,唯有當呼叫 mpool_repool時 hs 的才會指向現在 free list ```c= void **ip = (void **) p; *ip = mp->hs[i]; assert(ip); mp->hs[i] = ip; ``` ### 2. 提出效能和功能的改進策略,並予以實作