Try   HackMD

linux2021: Hao-yu-lin

tags: linux2021

測驗
α

1. 舉出 Linux 核心原始程式碼裡頭 bit rotation 的案例並說明

include/linux/prandom.h提到

#if BITS_PER_LONG == 64 /* * The core SipHash round function. Each line can be executed in * parallel given enough CPU resources. */ #define PRND_SIPROUND(v0, v1, v2, v3) ( \ v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ ) #define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) #define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) #elif BITS_PER_LONG == 32 /* * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash. * This is weaker, but 32-bit machines are not used for high-traffic * applications, so there is less output for an attacker to analyze. */ #define PRND_SIPROUND(v0, v1, v2, v3) ( \ v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ )

看起來似乎是針對 v0 v1 v2 v3 四個變數進行一連串的計算
lib/random32.c找到他的應用

2. x86_64 指令集具備 rotr 和 rotl 指令,上述 C 程式碼經過編譯器最佳化 (例如使用 gcc) 後,能否運用到這二個指令呢?

gcc -o3 bitwise bitwise.c
objdump -d bitwise > bitwise.s

先用 -o3 最佳編譯方式編譯完成後,使用 objdumpz 反組譯獲得 assembly code 擷取重要的部分

重新編譯後,參考了 維基百科的資訊,
程式碼更改如下,以 Left Circular shift 為例

  1. rotla64 Code
uint64_t rotla64 (uint64_t v, unsigned int c) { const int mask = CHAR_BIT * sizeof(v) - 1; c &= mask; return (v << c) | (v >> (-c & mask)); }
Aseembly code
0000000000001149 <rotla64>:
    1149:	f3 0f 1e fa          	endbr64 
    114d:	55                   	push   %rbp
    114e:	48 89 e5             	mov    %rsp,%rbp
    1151:	48 89 7d e8          	mov    %rdi,-0x18(%rbp)
    1155:	89 75 e4             	mov    %esi,-0x1c(%rbp)
    1158:	c7 45 fc 3f 00 00 00 	movl   $0x3f,-0x4(%rbp)
    115f:	8b 45 fc             	mov    -0x4(%rbp),%eax
    1162:	21 45 e4             	and    %eax,-0x1c(%rbp)
    1165:	8b 45 e4             	mov    -0x1c(%rbp),%eax
    1168:	48 8b 55 e8          	mov    -0x18(%rbp),%rdx
    116c:	48 89 d6             	mov    %rdx,%rsi
    116f:	89 c1                	mov    %eax,%ecx
    1171:	48 d3 e6             	shl    %cl,%rsi
    1174:	8b 45 e4             	mov    -0x1c(%rbp),%eax
    1177:	f7 d8                	neg    %eax
    1179:	89 c2                	mov    %eax,%edx
    117b:	8b 45 fc             	mov    -0x4(%rbp),%eax
    117e:	21 d0                	and    %edx,%eax
    1180:	48 8b 55 e8          	mov    -0x18(%rbp),%rdx
    1184:	89 c1                	mov    %eax,%ecx
    1186:	48 d3 ea             	shr    %cl,%rdx
    1189:	48 89 d0             	mov    %rdx,%rax
    118c:	48 09 f0             	or     %rsi,%rax
    118f:	5d                   	pop    %rbp
    1190:	c3                   	retq   
  1. rotle64 Code
uint64_t rotle64(uint64_t v, uint64_t c){ return (v << c) | (v >> (64 - c)); }
Assembly code
0000000000001191 <rotle64>: 1191: f3 0f 1e fa endbr64 1195: 55 push %rbp 1196: 48 89 e5 mov %rsp,%rbp 1199: 48 89 7d f8 mov %rdi,-0x8(%rbp) 119d: 48 89 75 f0 mov %rsi,-0x10(%rbp) 11a1: 48 8b 45 f0 mov -0x10(%rbp),%rax 11a5: 89 c2 mov %eax,%edx 11a7: 48 8b 45 f8 mov -0x8(%rbp),%rax 11ab: 89 d1 mov %edx,%ecx 11ad: 48 d3 c0 rol %cl,%rax 11b0: 5d pop %rbp 11b1: c3 retq
  1. rotli64 Code
uint64_t rotli64 (uint64_t v, unsigned int c) { const int mask = CHAR_BIT * sizeof(v) - 1; c &= mask; return (v << c) | (v >> (32 - c)); }
Assembly code
00000000000011b2 <rotli64>: 11b2: f3 0f 1e fa endbr64 11b6: 55 push %rbp 11b7: 48 89 e5 mov %rsp,%rbp 11ba: 48 89 7d e8 mov %rdi,-0x18(%rbp) 11be: 89 75 e4 mov %esi,-0x1c(%rbp) 11c1: c7 45 fc 3f 00 00 00 movl $0x3f,-0x4(%rbp) 11c8: 8b 45 fc mov -0x4(%rbp),%eax 11cb: 21 45 e4 and %eax,-0x1c(%rbp) 11ce: 8b 45 e4 mov -0x1c(%rbp),%eax 11d1: 48 8b 55 e8 mov -0x18(%rbp),%rdx 11d5: 48 89 d6 mov %rdx,%rsi 11d8: 89 c1 mov %eax,%ecx 11da: 48 d3 e6 shl %cl,%rsi 11dd: b8 20 00 00 00 mov $0x20,%eax 11e2: 2b 45 e4 sub -0x1c(%rbp),%eax 11e5: 48 8b 55 e8 mov -0x18(%rbp),%rdx 11e9: 89 c1 mov %eax,%ecx 11eb: 48 d3 ea shr %cl,%rdx 11ee: 48 89 d0 mov %rdx,%rax 11f1: 48 09 f0 or %rsi,%rax 11f4: 5d pop %rbp 11f5: c3 retq
  1. rotlo32 Code
uint32_t rotlo32 (uint32_t v, unsigned int c) { const int mask = CHAR_BIT * sizeof(v) - 1; c &= mask; return (v << c) | (v >> (32 - c)); }
Assembly code
00000000000011b2 <rotlo32>: 11b2: f3 0f 1e fa endbr64 11b6: 55 push %rbp 11b7: 48 89 e5 mov %rsp,%rbp 11ba: 89 7d ec mov %edi,-0x14(%rbp) 11bd: 89 75 e8 mov %esi,-0x18(%rbp) 11c0: c7 45 fc 1f 00 00 00 movl $0x1f,-0x4(%rbp) 11c7: 8b 45 fc mov -0x4(%rbp),%eax 11ca: 21 45 e8 and %eax,-0x18(%rbp) 11cd: 8b 45 e8 mov -0x18(%rbp),%eax 11d0: 8b 55 ec mov -0x14(%rbp),%edx 11d3: 89 c1 mov %eax,%ecx 11d5: d3 c2 rol %cl,%edx 11d7: 89 d0 mov %edx,%eax 11d9: 5d pop %rbp 11da: c3 retq

由這四個例子可以看出,當使用 v >> (bits - c),才會使用到指令 rol,使用 -c & mask時,編譯器會按照程式執行順序按照步驟執行。

β

1.說明上述程式碼的運作原理

  • 由參考執行輸出,可知這樣的一個數學關係
    {(sz/alignment)alignment+alignment,if sz % alignment  0szif sz % alignment = 0

說明程式碼的運作原理:

#include <stdint.h> static inline uintptr_t align_up(uintptr_t sz, size_t alignment) { uintptr_t mask = alignment - 1; if ((alignment & mask) == 0) { /* power of two? */ return (sz + mask) & ~mask; } return (((sz + mask) / alignment) * alignment); }
  • if ((alignment & mask) == 0)
    mask = alignment - 1,以及 if ((alignment & mask) == 0)
    由此可知當 alignment 為 2的次方時(

    20,21,22,23....) if 等式成立

  • (((sz + mask) / alignment) * alignment)
    alignment 不為 2 的次方時,則執行 (((sz + mask) / alignment) * alignment)
    以 alignment = 10 來解釋,即 sz + 9,只有個位數為 0 的十位數不會進位,其餘的皆會進位 十位數會加一,接著計算 / alignment 與 * alignment 來取得答案

  • (sz + mask) & ~mask
    2 的次方為 alignment時,舉例為 8 (

    23 ),從 LSB 開始數前三個 bits,若不為 000,則第四個 bits 直接 + 1 ,否則直接捨去。
    sz + mask 進行,若前三個 bits 不為 0,則 sz + mask 會使得第四個 bits + 1。
    取 ~mask 時,即取得以 alignment 為起始點往高位數複製。以從 LSB 的開始數第四個 bits 開始往高位數複製,LSB 數起來前三個直接捨去。

2. 在 linux 核心原始程式碼找出類似 align_up 的程式碼,並舉例說明其用法

在這份 include/linux/kernel.h 文件中,看到 kernel 核心文件中有 #include <linux/align.h>,進而去搜索 include/linux/align.h

/* @a is a power of 2 value */ #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) #define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) #define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask))

而 __ALIGN_KERNEL 與 __ALIGN_KERNEL_MASK 在 include/uapi/linux/const.h看到

#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))

這邊進行的程式碼使用方式,與

β 中的程式碼,進行一樣的操作。
而 ALIGN_DOWN(x, a) 則是將 x 先扣掉 a - 1,但之後在 __ALIGN_KERNEL_MASK(x, mask) 中會補加回去,(x - (a-1) + (a-1)) & ~ (a-1) 相當於對 x & ~(a-1)

γ

解釋程式碼輸出 - 字元數量的原理

#include <stdio.h>
#include <unistd.h>

int main(void)
{
    for (int i = 0; i < NNN; i++) {
        fork();
        printf("-");  
        
    }

    fflush(stdout);
    return 0;
}

輸出

i 1 2 3 4 5 6 7 8 9 10 11 12
printf 數 2 8 24 64 160 384 896 2048 4608 10240 22528 49152

當資料在 buffer 內沒有清除 ,則在 fork() 時會連同 buffer 內的資料("-")一同複製,因此最後每一個 process 都會有 NNN 個 "-",因此可以寫成這樣的一個公式:

2NNN×NNN

ϵ

#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/mman.h> #include <unistd.h> #define PAGE_SIZE 4096 /* FIXME: set at runtime */ typedef struct { int cnt; /* actual pool count */ int pal; /* pool array length (2^x ceil of cnt) */ int min_pool, max_pool; /* minimum/maximum pool size */ void **ps; /* pools */ int *sizes; /* chunk size for each pool */ void *hs[1]; /* heads for pools' free lists */ } mpool; static void *get_mmap(long sz) { void *p = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (p == MAP_FAILED) return NULL; return p; } /* Optimized base-2 integer ceiling, from _Hacker's Delight_ * by Henry S. Warren, pg. 48. Called 'clp2' there. * base-2 integer ceiling */ static unsigned int iceil2(unsigned int x) { x = x - 1; x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); return x+1; } /* mmap a new memory pool of TOTAL_SZ bytes, then build an internal * freelist of SZ-byte cells, with the head at (result)[0]. * Returns NULL on error. */ void **mpool_new_pool(unsigned int sz, unsigned int total_sz) { int o = 0; /* o=offset */ void *p = get_mmap(sz > total_sz ? sz : total_sz); if (!p) return NULL; int **pool = (int **) p; assert(pool); assert(sz > sizeof(void *)); void *last = NULL; int lim = (total_sz / sz); for (int i = 0; i < lim; i++) { if (last) assert(last == pool[o]); o = (i * sz) / sizeof(void *); pool[o] = (int *) &pool[o + (sz / sizeof(void *))]; last = pool[o]; } pool[o] = NULL; return p; } /* Add a new pool, resizing the pool array if necessary. */ static int add_pool(mpool *mp, void *p, int sz) { assert(p); assert(sz > 0); if (mp->cnt == mp->pal) { mp->pal *= 2; /* RAM will exhaust before overflow */ void **nps = realloc(mp->ps, mp->pal * sizeof(void **)); void *nsizes = realloc(mp->sizes, mp->pal * sizeof(int *)); if (!nps || !nsizes) return -1; mp->sizes = nsizes; mp->ps = nps; } mp->ps[mp->cnt] = p; mp->sizes[mp->cnt] = sz; mp->cnt++; return 0; } /* Initialize a memory pool for allocations between 2^min2 and 2^max2, * inclusive. Larger allocations will be directly allocated and freed * via mmap / munmap. * Returns NULL on error. */ mpool *mpool_init(int min2, int max2) { int cnt = max2 - min2 + 1; /* pool array count */ int palen = iceil2(cnt); assert(cnt > 0); mpool *mp = malloc(sizeof(mpool) + (cnt - 1) * sizeof(void *)); void *pools = malloc(palen * sizeof(void **)); int *sizes = malloc(palen * sizeof(int)); if (!mp || !pools || !sizes) return NULL; mp->cnt = cnt; mp->ps = pools; mp->pal = palen; mp->sizes = sizes; mp->min_pool = (1 << min2), mp->max_pool = (1 << max2); memset(sizes, 0, palen * sizeof(int)); memset(pools, 0, palen * sizeof(void *)); memset(mp->hs, 0, cnt * sizeof(void *)); return mp; } /* Free a memory pool set. */ void mpool_free(mpool *mp) { assert(mp); for (long i = 0; i < mp->cnt; i++) { void *p = mp->ps[i]; if (!p) continue; long sz = mp->sizes[i]; assert(sz > 0); if (sz < PAGE_SIZE) sz = PAGE_SIZE; if (munmap(mp->ps[i], sz) == -1) fprintf(stderr, "Failed to unmap %ld bytes at %p\n", sz, mp->ps[i]); } free(mp->ps); free(mp); } /* Allocate memory out of the relevant memory pool. * If larger than max_pool, just mmap it. If pool is full, mmap a new one and * link it to the end of the current one. * Returns NULL on error. */ void *mpool_alloc(mpool *mp, int sz) { void **cur; int i, p; assert(mp); if (sz >= mp->max_pool) { cur = get_mmap(sz); /* just mmap it */ if (!cur) return NULL; return cur; } long szceil = 0; for (i = 0, p = mp->min_pool;; i++, p *= 2) { if (p > sz) { szceil = p; break; } } assert(szceil > 0); cur = mp->hs[i]; /* get current head */ if (!cur) { /* lazily allocate and initialize pool */ void **pool = mpool_new_pool(szceil, PAGE_SIZE); if (!pool) return NULL; mp->ps[i] = mp->hs[i] = pool; mp->sizes[i] = szceil; cur = mp->hs[i]; } assert(cur); if (!(*cur)) { /* if at end, attach to a new page */ void **np = mpool_new_pool(szceil, PAGE_SIZE); if (!np) return NULL; *cur = &np[0]; assert(*cur); if (add_pool(mp, np, szceil) < 0) return NULL; } assert(*cur > (void *) PAGE_SIZE); mp->hs[i] = *cur; /* set head to next head */ return cur; } /* Push an individual pointer P back on the freelist for the pool with size * SZ cells. if SZ is > the max pool size, just munmap it. * Return pointer P (SZ bytes in size) to the appropriate pool. */ void mpool_repool(mpool *mp, void *p, int sz) { int i = 0; if (sz > mp->max_pool) { if (munmap(p, sz) == -1) fprintf(stderr, "Failed to unmap %d bytes at %p\n", sz, p); return; } void **ip = (void **) p; *ip = mp->hs[i]; assert(ip); mp->hs[i] = ip; } #define PMAX 11 int main(void) { /* Initialize a new mpool for values 2^4 to 2^PMAX */ mpool *mp = mpool_init(4, PMAX); srandom(getpid() ^ (intptr_t) &main); /* Assume ASLR */ for (int i = 0; i < 5000000; i++) { int sz = random() % 64; /* also alloc some larger chunks */ if (random() % 100 == 0) sz = random() % 10000; if (!sz) sz = 1; /* avoid zero allocations */ int *ip = (int *) mpool_alloc(mp, sz); *ip = 7; /* randomly repool some of them */ if (random() % 10 == 0) /* repool, known size */ mpool_repool(mp, ip, sz); if (i % 10000 == 0 && i > 0) { putchar('.'); if (i % 700000 == 0) putchar('\n'); } } mpool_free(mp); putchar('\n'); return 0; }

1. 解釋上述程式碼運作原理

Memory 一次分配一個 Page 時( 執行 mpool_alloc ),加入至大小相同的 Memory Pool中,若分配的大小 >

211 時,會透過 MMAP 調整分配,由 測試資料來看 mpool_repool 似乎是隨機被呼叫的,當呼叫 mpool_repool 時,hs[1] 會被重新指向 free list

mpool_repool

在程式執行時 hs 始終指向 0x00000000,唯有當呼叫 mpool_repool時 hs 的才會指向現在 free list

void **ip = (void **) p; *ip = mp->hs[i]; assert(ip); mp->hs[i] = ip;

2. 提出效能和功能的改進策略,並予以實作