--- title: 2021 年暑期「Linux 核心」課程先決測驗題的測驗 tags: linux kernel quiz --- ## [2021 年暑期「Linux 核心」課程先決測驗題的測驗 心得](https://hackmd.io/@sysprog/linux2021-summer/https%3A%2F%2Fhackmd.io%2F%40sysprog%2Flinux2021-summer-quiz#2021-%E5%B9%B4%E6%9A%91%E6%9C%9F%E3%80%8CLinux-%E6%A0%B8%E5%BF%83%E3%80%8D%E8%AA%B2%E7%A8%8B%E5%85%88%E6%B1%BA%E6%B8%AC%E9%A9%97%E9%A1%8C) ### 測驗 α * 位元旋轉,以下的程式碼要達到下圖的效果 ![](https://i.imgur.com/nDuFqK3.png) * 程式碼(要填入 LLL 和 RRR) ```c= #include <stdint.h> #define __DECLARE_ROTATE(bits, type) \ static inline type rotl##bits(const type v, int c) \ { \ const int mask = (bits) - (1); \ c &= mask; \ \ return (v << c) | (LLL); \ } \ \ static inline type rotr##bits(const type v, int c) \ { \ const int mask = (bits) - (1); \ c &= mask; \ \ return (v >> c) | (RRR); \ } #define DECLARE_ROTATE(bits) __DECLARE_ROTATE(bits, uint##bits##_t) ``` * 使用的案例,可以發現裡面的參數 bits 代表 input 的數有幾個位元 ```c= DECLARE_ROTATE(64); DECLARE_ROTATE(32); DECLARE_ROTATE(16); DECLARE_ROTATE(8); ``` * 解析 * ```const type v``` 意思是輸入的數 * ```int c``` 代表要旋轉幾位 * 以 ```bits``` = ```8```,```v``` = (1010 1110)~2~,```c``` = ```2``` 為例 * 帶入 ```rotl##bits``` = ```rotl8``` * ```mask``` = ```(bits) - (1)``` = (0000 0111)~2~ * ```c &= mask``` => ```c``` = (0000 0010)~2~ & (0000 0111)~2~ = (0000 0010)~2~ * 再來是最後的 ```(v << c) | (LLL)```,可以發現 ```(v << c)``` 就是代表把原本數先位移,後面補零 * ```LLL``` 要做的事就是把位移掉的數取出來即可,也就是 ```v >> (bits - c)``` * 以上的解法會遇到,當 shift 為 0 時,會遇到 C 語言中規範的 undefined behavior > The integer promotions are performed on each of the operands. The type of the result is that of the promoted left operand. If the value of the right operand is negative or is greater than or equal to the width of the promoted left operand, the behavior is undefined. * 所以 ```LLL``` 要改用 ``` v << (-c & mask) ```,至於為甚麼會相等,可以用補數的方面來思考 ![](https://i.imgur.com/uLZNn3b.png) * 觀察上圖,根據二補數的特性,+6: (0110)~2~ 跟 -2: (1110)~2~ 的差別只有最高位元,因此將(1)==+2 取負號== 後再 (2)==利用 mask 移去 signed bit== ,即可得到 +6。 * 從上例來看,也就是```(bits - c)```是```(8 - 2) = +6```,然後轉成 ```(-c & mask)```,以下為推導 1. ```c = +2``` 取負號得到```-2```後再做 2. 用 ```mask = (bits-1)``` 移去 ```signed bits``` 即可得到 ```+6``` * [參考](https://hackmd.io/@sysprog/bitwise-rotation?fbclid=IwAR1RJHmc631t6Bms29L2wRzxEZIkTOqdksJQsc83o7d2E7ktmZzbfFQ4lwo) ### 測驗 β * 以下程式碼可針對給定的 alignment 數值,輸出大於等於 alignment 的記憶體對齊地址: ```c= #include <stdint.h> static inline uintptr_t align_up(uintptr_t sz, size_t alignment) { uintptr_t mask = alignment - 1; if ((alignment & mask) == 0) { /* power of two? */ return MMM; } return (((sz + mask) / alignment) * alignment); } ``` * 測試 ```c= align_up(120, 4) = 120 align_up(121, 4) = 124 align_up(122, 4) = 124 align_up(123, 4) = 124 ``` * 解析: 從提示可知 if 為判斷 alignment 是否為 2 的冪次,如果是就可以直接用 mask 來去除```(sz + mask)```除以它之後的餘數。如果 alignment 不是 2 的冪次就只能乖乖用除法乘法來做 ```c= #include <stdio.h> #include <stdint.h> static inline uintptr_t align_up(uintptr_t sz, size_t alignment) { uintptr_t mask = alignment - 1; if ((alignment & mask) == 0) { /* power of two? */ return (sz + mask) & (~mask); } return (((sz + mask) / alignment) * alignment); } int main() { printf("%ld\n", align_up(121, 3)); return 0; } ``` ### 測驗 γ * 若要讓 ```./fork | wc -c``` 的輸出為 49152,那上方程式碼的 NNN 應為何值? ```c= #include <stdio.h> #include <unistd.h> int main(void) { for (int i = 0; i < NNN; i++) { fork(); printf("-"); } fflush(stdout); return 0; } ``` * 要先知道[ IO Buffering ](https://medium.com/%E9%96%B1%E8%AE%80-c-c/advance-io-buffering-46daed8e1176) 的概念,簡單來說就是程式在遇到 printf() 時不會馬上輸出到螢幕上,因為要做切換到輸出設備很花時間沒效率,所以通常都會先寫入到 Buffer 之中,但是若遇到以下條件就會馬上輸出到螢幕上: 1. 程式(process)結束 2. 遇到 '\n' 3. 緩衝區滿了 (e.g., 1024 char) * 碰到以上三個條件後都會呼叫 ```fflush(stdout)```,也可以手動呼叫直接印到螢幕上 * 接下來要知道的[觀念](https://blog.licon.tw/operating-system/2020/08/08/process.html)就是,parent 和 child process 會共享 IO Buffer,舉例來說如果 parent 的 IO Buffer 有 "- -",那麼當下 fork 出來的 child 的 IO Buffer 也會有 "- -" * 透過以上觀念可以對題目推出公式,'-' 的數量 = 迴圈數*2^迴圈數^,從而看出 49152 = 12 * 2^12^ ### 測驗 ζ ```c= #include <threads.h> enum { Q_OK, Q_ERROR }; typedef struct { /* Queue node */ void *value; void *next; } node_t; typedef struct { /* Two lock queue */ node_t *first, *last; mtx_t *first_mutex, *last_mutex; } con_queue_t; /* Free the queue struct. It assumes that the queue is depleted, and it will * not manage allocated elements inside of it. */ void con_free(con_queue_t *); #include <stdlib.h> #include <stdio.h> #include <string.h> inline static node_t *_con_node_init(void *value) { node_t *node = malloc(sizeof(node_t)); if (!node) return NULL; node->value = value; node->next = NULL; return node; } /* Allocates and initializes queue. * Returns a pointer to an allocated struct for the synchronized queue or NULL * on failure. */ con_queue_t *con_init() { /* Allocate queue */ con_queue_t *queue = malloc(sizeof(con_queue_t)); if (!queue) return NULL; if ((queue->first_mutex = malloc(sizeof(mtx_t))) == NULL) { free(queue); return NULL; } if ((queue->last_mutex = malloc(sizeof(mtx_t))) == NULL) { free(queue->first_mutex); free(queue); return NULL; } if (mtx_init(queue->first_mutex, mtx_plain) != thrd_success || mtx_init(queue->last_mutex, mtx_plain) != thrd_success) { con_free(queue); return NULL; } int *num = malloc(sizeof(int)); *num = -2; node_t *dummy = _con_node_init(num); if (!dummy) { con_free(queue); return NULL; } queue->first = queue->last = dummy; return queue; } void con_free(con_queue_t *queue) { if (!queue) return; if (!queue->first) free(queue->first); if (queue->first_mutex) { mtx_destroy(queue->first_mutex); free(queue->first_mutex); } if (queue->last_mutex) { mtx_destroy(queue->last_mutex); free(queue->last_mutex); } free(queue); } /* Add element to queue. The client is responsible for freeing elementsput into * the queue afterwards. Returns Q_OK on success or Q_ERROR on failure. */ int con_push(con_queue_t *restrict queue, void *restrict new_element) { /* Prepare new node */ node_t *node = _con_node_init(new_element); if (!node) return Q_ERROR; printf("push value: %d\n", *(int*)node->value); /* Add to queue with lock */ mtx_lock(queue->last_mutex); /* AAA */ queue->last->next = node; queue->last = node; //printf("push value: %d\n", *(int*)queue->last->value); mtx_unlock(queue->last_mutex); return Q_OK; } /* Retrieve element and remove it from the queue. * Returns a pointer to the element previously pushed in or NULL of the queue is * emtpy. */ void *con_pop(con_queue_t *queue) { mtx_lock(queue->first_mutex); node_t *node = queue->first; /* Node to be removed */ node_t *new_header = queue->first->next; /* become the first in the queue */ if (node->value) printf("popped_value: %d\n", *(int*)node->value); /* Queue is empty */ if (!new_header) { mtx_unlock(queue->first_mutex); return NULL; } /* Queue not empty: retrieve data and rewire */ /* BBB */ void *return_value = queue->first->value; /* CCC */ queue->first = new_header; mtx_unlock(queue->first_mutex); /* Free removed node and return */ free(node); return return_value; } #include <assert.h> #include <stdio.h> #define N_PUSH_THREADS 3 #define N_POP_THREADS 3 #define NUM 10 /* This thread writes integers into the queue */ int push_thread(void *queue_ptr) { con_queue_t *queue = (con_queue_t *) queue_ptr; /* Push ints into queue */ for (int i = 0; i < NUM; ++i) { int *pushed_value = malloc(sizeof(int)); *pushed_value = i; if (con_push(queue, pushed_value) != Q_OK) printf("Error pushing element %i\n", i); } thrd_exit(0); } /* This thread reads ints from the queue and frees them */ int pop_thread(void *queue_ptr) { con_queue_t *queue = (con_queue_t *) queue_ptr; /* Read values from queue. Break loop on -1 */ while (1) { int *popped_value = con_pop(queue); if (popped_value) { if (*popped_value == -1) { puts("here"); free(popped_value); break; } free(popped_value); } else if (!popped_value) { break; } } thrd_exit(0); } int main() { thrd_t push_threads[N_PUSH_THREADS], pop_threads[N_POP_THREADS]; con_queue_t *queue = con_init(); for (int i = 0; i < N_PUSH_THREADS; ++i) { if (thrd_create(&push_threads[i], push_thread, queue) != thrd_success) printf("Error creating push thread %i\n", i); } for (int i = 0; i < N_PUSH_THREADS; ++i) { if (thrd_join(push_threads[i], NULL) != thrd_success) continue; } /* Push kill signals */ for (int i = 0; i < N_POP_THREADS; ++i) { int *kill_signal = malloc(sizeof(int)); /* signal pop threads to exit */ *kill_signal = -1; con_push(queue, kill_signal); } for (int i = 0; i < N_POP_THREADS; ++i) { if (thrd_create(&pop_threads[i], pop_thread, queue) != thrd_success) printf("Error creating pop thread %i\n", i); } for (int i = 0; i < N_POP_THREADS; ++i) { if (thrd_join(pop_threads[i], NULL) != thrd_success) continue; } con_free(queue); return 0; } ``` * node 結構: ```... <- node3 <- node2 <- node1``` * queue 結構: * ```first``` 指向最一開始的 node * ```last``` 則是指向最晚進來的 node * ```first_mutex``` 顧名思義就是 first node 的 mutex lock * ```last_mutex``` 同理 * ```node_t *_con_node_init(void *value)```: 初始化 node,並回傳 * ```con_queue_t *con_init()```: 初始化 queue,first 和 last 指向一個 value 為 -2 的 dummy node * 問題: 這邊改 NULL 成 -2,因為最後一個點如果是 NULL 在 ```pop_thread()``` 會有無限迴圈 * ```con_free(con_queue_t *queue)```: 釋放掉 queue * ```int con_push(con_queue_t *restrict queue, void *restrict new_element)```: 透過 ```_con_node_init(new_element)``` 建立新的 node 然後把它放到 node 最尾端( [restrict](https://liquid0118.pixnet.net/blog/post/48494846) 代表這個指標是唯一指向這個 data 的 pointer ),最後再更新 queue 的 last pointer * ```con_pop()``` : 把 first node 移出 queue 結構 * 把 ```return_value``` 設成這個 first node 的 value * 然後把 queue 的 first 只到下一個 node(new_header) * free 掉這個 node 後,再回傳 ```return_value``` * ```int push_thread(void *queue_ptr)```: thread 要做的事,把 value 為 ```0 ~ NUM-1``` 的 node push 到 queue 中 * ```int pop_thread(void *queue_ptr)```: 一直 ```con_pop()``` 並且 free 掉回傳的 value,直到遇到 node value 為 ```-1``` 的點為止 * 如果第一個點 dummy value 是 NULL,原本的程式碼判斷條件會跟最後一點重複導致無限迴圈 * 如果 ```popped_value``` 回傳 NULL,代表 queue 只剩下一個點了,就直接 break * 如果有 4 個 threads,那麼 ```if (*popped_value == -1``` 就會進去 ==3== 次 * main() 要注意的地方就是 push 的 for 迴圈 與 push ```-1``` 之間一定要放 ```thrd_join(push_threads[i], NULL)``` 代表 ```-1``` 一定最後 push,之後 pop 後就 ```con_free(queue)``` * 再來就是 pop 要放在 ```thrd_join(push_threads[i], NULL)``` 後面,才不會發生先 pop 後 push 的情況 * 填空 * AAA: ```queue->last->next = node;``` * BBB: ```void *return_value = queue->first->value;``` * CCC: ```queue->first = new_header;``` * [mtx_init](https://en.cppreference.com/w/c/thread/mtx_init)、[C11 執行緒庫](https://www.delftstack.com/zh-tw/howto/c/c11-thread-example-in-c/)、[遞迴鎖](https://www.796t.com/content/1546841722.html) ### 測驗 ϵ * 由 mmap 系統呼叫來實作 memory pool: ```c= #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/mman.h> #include <unistd.h> #define PAGE_SIZE 4096 /* FIXME: set at runtime */ typedef struct { int cnt; /* actual pool count */ int pal; /* pool array length (2^x ceil of cnt) */ int min_pool, max_pool; /* minimum/maximum pool size */ void **ps; /* pools */ int *sizes; /* chunk size for each pool */ void *hs[1]; /* heads for pools' free lists */ } mpool; static void *get_mmap(long sz) { void *p = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (p == MAP_FAILED) return NULL; return p; } /* base-2 integer ceiling */ static unsigned int iceil2(unsigned int x) { x = x - 1; x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); return XXX; } /* mmap a new memory pool of TOTAL_SZ bytes, then build an internal * freelist of SZ-byte cells, with the head at (result)[0]. * Returns NULL on error. */ void **mpool_new_pool(unsigned int sz, unsigned int total_sz) { int o = 0; /* o=offset */ void *p = get_mmap(sz > total_sz ? sz : total_sz); if (!p) return NULL; int **pool = (int **) p; assert(pool); assert(sz > sizeof(void *)); void *last = NULL; int lim = (total_sz / sz); for (int i = 0; i < lim; i++) { if (last) assert(last == pool[o]); o = (i * sz) / sizeof(void *); pool[o] = (int *) &pool[o + (sz / sizeof(void *))]; last = pool[o]; } pool[o] = NULL; return p; } /* Add a new pool, resizing the pool array if necessary. */ static int add_pool(mpool *mp, void *p, int sz) { assert(p); assert(sz > 0); if (mp->cnt == mp->pal) { mp->pal *= 2; /* RAM will exhaust before overflow */ void **nps = realloc(mp->ps, mp->pal * sizeof(void **)); void *nsizes = realloc(mp->sizes, mp->pal * sizeof(int *)); if (!nps || !nsizes) return -1; mp->sizes = nsizes; mp->ps = nps; } mp->ps[mp->cnt] = p; mp->sizes[mp->cnt] = sz; mp->cnt++; return 0; } /* Initialize a memory pool for allocations between 2^min2 and 2^max2, * inclusive. Larger allocations will be directly allocated and freed * via mmap / munmap. * Returns NULL on error. */ mpool *mpool_init(int min2, int max2) { int cnt = max2 - min2 + 1; /* pool array count */ int palen = iceil2(cnt); assert(cnt > 0); mpool *mp = malloc(sizeof(mpool) + (cnt - 1) * sizeof(void *)); void *pools = malloc(palen * sizeof(void **)); int *sizes = malloc(palen * sizeof(int)); if (!mp || !pools || !sizes) return NULL; mp->cnt = cnt; mp->ps = pools; mp->pal = palen; mp->sizes = sizes; mp->min_pool = (1 << min2), mp->max_pool = (1 << max2); memset(sizes, 0, palen * sizeof(int)); memset(pools, 0, palen * sizeof(void *)); memset(mp->hs, 0, cnt * sizeof(void *)); return mp; } /* Free a memory pool set. */ void mpool_free(mpool *mp) { assert(mp); for (long i = 0; i < mp->cnt; i++) { void *p = mp->ps[i]; if (!p) continue; long sz = mp->sizes[i]; assert(sz > 0); if (sz < PAGE_SIZE) sz = PAGE_SIZE; if (munmap(mp->ps[i], sz) == -1) fprintf(stderr, "Failed to unmap %ld bytes at %p\n", sz, mp->ps[i]); } free(mp->ps); free(mp); } /* Allocate memory out of the relevant memory pool. * If larger than max_pool, just mmap it. If pool is full, mmap a new one and * link it to the end of the current one. * Returns NULL on error. */ void *mpool_alloc(mpool *mp, int sz) { void **cur; int i, p; assert(mp); if (sz >= mp->max_pool) { cur = get_mmap(sz); /* just mmap it */ if (!cur) return NULL; return cur; } long szceil = 0; for (i = 0, p = mp->min_pool;; i++, p *= 2) { if (p > sz) { szceil = p; break; } } assert(szceil > 0); cur = mp->hs[i]; /* get current head */ if (!cur) { /* lazily allocate and initialize pool */ void **pool = mpool_new_pool(szceil, PAGE_SIZE); if (!pool) return NULL; mp->ps[i] = mp->hs[i] = pool; mp->sizes[i] = szceil; cur = mp->hs[i]; } assert(cur); if (!(*cur)) { /* if at end, attach to a new page */ void **np = mpool_new_pool(szceil, PAGE_SIZE); if (!np) return NULL; *cur = &np[0]; assert(*cur); if (add_pool(mp, np, szceil) < 0) return NULL; } assert(*cur > (void *) PAGE_SIZE); mp->hs[i] = *cur; /* set head to next head */ return cur; } /* Push an individual pointer P back on the freelist for the pool with size * SZ cells. if SZ is > the max pool size, just munmap it. * Return pointer P (SZ bytes in size) to the appropriate pool. */ void mpool_repool(mpool *mp, void *p, int sz) { int i = 0; if (sz > mp->max_pool) { if (munmap(p, sz) == -1) fprintf(stderr, "Failed to unmap %d bytes at %p\n", sz, p); return; } void **ip = (void **) p; YYY; assert(ip); mp->hs[i] = ip; } 對應的測試程式如下: #define PMAX 11 int main(void) { /* Initialize a new mpool for values 2^4 to 2^PMAX */ mpool *mp = mpool_init(4, PMAX); srandom(getpid() ^ (intptr_t) &main); /* Assume ASLR */ for (int i = 0; i < 5000000; i++) { int sz = random() % 64; /* also alloc some larger chunks */ if (random() % 100 == 0) sz = random() % 10000; if (!sz) sz = 1; /* avoid zero allocations */ int *ip = (int *) mpool_alloc(mp, sz); *ip = 7; /* randomly repool some of them */ if (random() % 10 == 0) /* repool, known size */ mpool_repool(mp, ip, sz); if (i % 10000 == 0 && i > 0) { putchar('.'); if (i % 700000 == 0) putchar('\n'); } } mpool_free(mp); putchar('\n'); return 0; } ``` * pool 結構: * ```cnt``` 為 pool 總數 * ```pal``` 為 pool array 長度(ct 取上限到 2^x^) * ```min_pool、max_pool``` 就是字面上的意思,分別代表最大跟最小的 pool size * ```void **ps``` 紀錄每個 pool 的起始位置,包括分配出去的跟未分配的 pools,以便在 ```mpool_free()``` 時方便釋放 * ```*sizes``` 為對應的 pool 的 chunk size * ```void *hs[1]``` 代表每個 pool 的 head(在這個例子中有 ==8== 個 pool,分別是大小 2^4^ ~ 2^11^,可以看成 hs[8][1],分別指向該 pool 還未分配的 chunk 的開頭位址),也就是 free list 結構 * ```void *get_mmap(long sz)``` * 利用 ```mmap()``` 來配置大小為 ```sz``` 的記憶體空間,```fd = -1``` 代表[匿名映射](https://blog.csdn.net/SweeNeil/article/details/83661933) * ```unsigned int iceil2(unsigned int x)``` * 回傳 ```x``` 的上界,以 2^x^ 為單位 * ```void **mpool_new_pool(unsigned int sz, unsigned int total_sz)``` * 利用 ```get_mmap()``` 來請求新的 memory pool * ```sz``` 代表 chunk 大小,```total_sz``` 代表要求的 memory pool 大小 * ```p``` 代表整個 memory pool 的開頭位置,之後按照 ```sz``` 大小來切割 * ```pool[o] = (int *) &pool[o + (sz / sizeof(void *))]``` 這行就是在 assign free list 的值,例如: 這個 chunk 的 head 指向 ==下一個== chunk 的起始位置 * ```pool[o] = NULL``` 最後一個 chunk 的 head 指向 NULL * ```int add_pool(mpool *mp, void *p, int sz)``` * ```mp``` 代表整個管理記憶體的 struct * ```p``` 代表要新加入的 pool * ```sz``` 代表請求的 pool 的 chunk 的大小 * ```if (mp->cnt == mp->pal)``` 意思是如果本來的 cnt 不夠用了(也就是 cnt = pal 時),就把 ```pal``` 乘以 2 倍(因為初始化的時候是用 ```iceil2()```),之後就是用 ```realloc()``` 重新配置新的大小的 ```size``` 跟 ```pool array``` * 最後就是簡單的在最後塞進新的 pool * 為何不用跟新 ```hs[mp->cnt]```: 因為 ```add_pool()``` 唯一會用到的地方就是在 ```mpool_alloc()``` 裡,而呼叫完 ```add_pool()``` 後就會馬上做 ```mp->hs[i] = *cur; /* set head to next head */``` * ```mpool *mpool_init(int min2, int max2)``` * ```mpool``` 初始化,例如: min = 4、max = 11,代表 ```mpool``` 在初始化後會有 ==8== 個 pools,裡面的 chunk 大小依序是 2^4^、2^5^、...、2^11^,每個 pool 大小為設定的 page size(能切幾個代表有幾個有幾個 chunk),然後一開始裡面的值都初始化成 0 * ```mpool *mp = malloc(sizeof(mpool) + (cnt - 1) * sizeof(void *))```,後面的 ```(cnt - 1) * sizeof(void *)``` 是給 ```memset(mp->hs, 0, cnt * sizeof(void *))``` 用的 * ```void mpool_free(mpool *mp)```: 就是 free 掉 mpool * ```void *mpool_alloc(mpool *mp, int sz)``` * 向 mpool 請求 ```sz``` 大小的記憶體,有以下規則 * If larger than max_pool: 如果請求的大小超出 MAX pool 的大小,就直接用 ```get_mmap(sz)``` 給使用者 * lazily allocate & init pool: 如果請求的 pool 是剛經過初始化裡面都是 0(```if (!cur)```),就用一個新的 page 來建一個新的 pool (```mpool_new_pool()```) * if at end, attach to a new page: 如果這個 pool 只剩下最後一個 chunk 可以用了(```if (!(*cur))```),就 new 一個新的 pool 給他(```void **np = mpool_new_pool(szceil, PAGE_SIZE)```),然後接在這個 pool 最後的 chunk 的後面(```*cur = &np[0]```),最後因為有新增 pool,所以要更新一下 mpool 的資訊(```add_pool(mp, np, szceil)```) * ```mp->hs[i] = *cur; /* set head to next head */```,因為最後要 return ```cur``` 這個 chunk,也就代表這個 chunk 已分配,所以 hs[i] 自然也要指到下一個 free chunk * ```for (i = 0, p = mp->min_pool;; i++, p *= 2)```,意思是從 最小的 pool size 開始,找到適合的 size 給 sz (請求的大小) * ```void mpool_repool(mpool *mp, void *p, int sz)``` * 意思是把 p 這個 pointer 插入到 hs[i] 開頭,原本的 hs[i] 變成第二個 * XXX = ```X + 1;``` * YYY = ```*ip = mp->hs[i];``` * 參考 https://github.com/silentbicycle/mpool/blob/master/mpool.h https://blog.csdn.net/ababab12345/article/details/102931841 https://blog.csdn.net/SweeNeil/article/details/83661933 https://hackmd.io/vHx-E01ATw2QUBGKFSjJuQ?view https://hackmd.io/@luswdev/rt-thread-mem-pool#%E5%BB%BA%E7%AB%8B-memory-pool ### 測驗 ζ * 先看 Linux 特有的 ```splice``` 的 Zero-Copy![](https://i.imgur.com/xipTnkG.png) * 適用於在 Linux 中兩個 ```fd``` 之間的資料 Zero-Copy * 過程中的操作有 * 2 次 context switch * 0 次 CPU 複製 * 2 次 DMA 複製 * 流程: * 使用者利用 ```splice()``` 發起 system call,從使用者模式變更到核心模式 * CPU 利用 DMA Controller 將資料從 disk 等裝置複製到 核心空間的 read buffer * CPU 利用核心空間的 read buffer 和 socket buffer 之間建立管線 (PIPE) * CPU 利用 DMA Controller 把資料複製到網路裝置進行資料傳輸 * 從核心模式切回使用者模式,結束執行 * [範例:](https://man7.org/linux/man-pages/man2/splice.2.html),從 ```in_fd``` 移動資料到 ```out_fd``` ```c= splice(fd_in, off_in, fd_out, off_out, len, flags); ``` ```c= int pip[2]; pipe(pip); int ret = splice(in_fd, NULL, pip[1], NULL, 8, SPLICE_F_MOVE); ret = splice(pip[0], NULL, out_fd, NULL, 8, SPLICE_F_MOVE); ``` * [參考](https://hackmd.io/@sysprog/linux2020-zerocopy#splice-%E7%B3%BB%E7%B5%B1%E5%91%BC%E5%8F%AB) * 接著看 ```Poll``` * 與 ```select``` 類似,建立一個 ```fd set```,並且一直走訪查看是否有 事件(event) 發生,例如: [POLLIN](https://man7.org/linux/man-pages/man2/poll.2.html)等 * 範例 ```c= struct pollfd { int fd; // the socket descriptor short events; // bitmap of events we're interested in short revents; // when poll() returns, bitmap of events that occurred }; ``` ```c= struct pollfd ufds[2]; ufds[0].fd = s1; ufds[0].events = POLLIN; // 只檢查一般的資料 ufds[1] = s2; ufds[1].events = POLLIN; // 等待 sockets 上的事件,timeout 時間是 1 秒 rv = poll(ufds, 2, 1000); if (rv == -1) { perror("poll"); // poll() 時發生錯誤 } else if (rv == 0) { printf("Timeout occurred! No data after 3.5 seconds.\n"); } else { // 檢查 s1 上的事件: if (ufds[0].revents & POLLIN) { recv(s1, buf1, sizeof buf1, 0); // 接收一般的資料 } // 檢查 s2 上的事件: if (ufds[1].revents & POLLIN) { recv(s2, buf2, sizeof buf2, 0); } } ``` * [參考](https://beej-zhtw-gitbook.netdpi.net/man_shi_yong_shou_ce/poll) * 了解原程式碼: ```c= #define _GNU_SOURCE #include <arpa/inet.h> #include <errno.h> #include <fcntl.h> #include <netinet/in.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/poll.h> #include <sys/socket.h> #include <unistd.h> static int connect_to(char *ip, int port) { int sockfd = socket(AF_INET, SOCK_STREAM, 0); if (sockfd < 0) { printf("Failed to create socket.\n"); return -1; } struct sockaddr_in addr; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_port = htons(port); if (inet_pton(AF_INET, ip, &addr.sin_addr) <= 0) { perror("inet_pton"); return -1; } if (connect(sockfd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { printf("Failed to connect.\n"); return -1; } return sockfd; } static void move(int in_fd, int out_fd, int pip[2]) { int ret = splice(in_fd, NULL, pip[1], NULL, 8, SPLICE_F_MOVE); if (ret == -1) { perror("splice(1)"); return; } ret = splice(pip[0], NULL, out_fd, NULL, 8, SPLICE_F_MOVE); if (ret == -1) { perror("splice(1)"); return; } } static void proxy(int cl_fd, int target_fd) { if (target_fd == -1) return; int fds[2]; if (pipe(fds) == -1) { perror("pipe"); return; } struct pollfd polls[2] = { [1] = {III}, [0] = {JJJ}, }; int ret; while ((ret = poll(polls, 2, 1000)) != -1) { if (ret == 0) continue; int from_client = polls[0].revents & POLLIN; if (from_client) move(cl_fd, target_fd, fds); else move(target_fd, cl_fd, fds); } perror("poll"); } #define PORT 1922 int main(int argc, char *argv[]) { if (argc < 3) { fprintf(stderr, "Usage: %s <target IP address> <target port>\n", argv[0]); return -1; } int listenfd = socket(AF_INET, SOCK_STREAM, 0); struct sockaddr_in addr; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); addr.sin_port = htons(PORT); int optval = 1; setsockopt(listenfd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(optval)); bind(listenfd, (struct sockaddr *) &addr, sizeof(addr)); listen(listenfd, 1); while (1) { int connfd = accept(listenfd, (struct sockaddr *) NULL, NULL); int target_fd = connect_to(argv[1], atoi(argv[2])); if (target_fd >= 0) proxy(connfd, target_fd); close(connfd); } return 0; /* should not reach here */ } ``` * 簡單來說就是一個簡易的 proxy 伺服器,這個代理會代替 localhost 的連線請求(這邊的例子是 port 1922),例如: localhost 想要連到 google,proxy 就會代替它連線,然後把收到的結果回傳給 localhost * ```connect_to``` 負責與 google 建立 socket 連線 * ```move``` 就是在 fd 之間移動資料 * int in_fd: 送來資料的 fd * int out_fd: 要傳給請求資料的 fd * int pip[2]: 因為是要通過 ```splice``` 來傳,所以要建立 ```PIPE``` * ```proxy``` 簡單來說就是在 client_fd 和 target_fd 之間建立 poll,來做 multiplexing I/O * 所以可以猜 ```III```、```JJJ``` 為: ```c= struct pollfd polls[2] = { [1] = {target_fd, POLLIN}, [0] = {cl_fd, POLLIN}, }; ``` * 以 ```epoll``` 改寫程式碼 * 理解 ```epoll```: * 相較於 ```select```、```poll``` 走訪整個 ```fd set``` 來檢查事件,利用 紅黑樹和一個 Linked list,來更快的反應事件。[參考](http://www.gonglin91.com/linux-epoll-epoll%E7%9A%84%E5%8E%9F%E7%90%86%EF%BC%9Bstruct-epoll_event-%E4%B8%BA%E4%BB%80%E4%B9%88%E8%A6%81%E8%BF%99%E6%A0%B7%E8%AE%BE%E8%AE%A1/) * [原理理解1](https://zhuanlan.zhihu.com/p/427512269) * 使用 ```epoll```,基本就三個函式 * ```epollcreate```: 負責創建一個 epoll,用來監控和管理 fd 的池子 * ```epollctl``` 負責 epoll 的 fd 的新增、刪除、修改 * ```epollwait``` 就是負責等待的,讓出 CPU 資源,但是只要有事件進來,會==馬上==會從喚醒(離開這個等待迴圈)。原理是在網路設備收到資料觸發中斷時會順便修改 Linked list,把該 socket fd 放到這個 ready set (Linked list) 裡 * [流程範例](https://man7.org/linux/man-pages/man7/epoll.7.html) * ```epoll_event``` struct ```c= typedef union epoll_data { void *ptr; int fd; __uint32_t u32; __uint64_t u64; } epoll_data_t; struct epoll_event { __uint32_t events; /* Epoll events */ epoll_data_t data; /* User data variable */ }; ``` * ```epoll_create1``` ```c= #define MAX_EVENTS 10 struct epoll_event ev, events[MAX_EVENTS]; epollfd = epoll_create1(0); ``` * ```epollctl```,加入 fd 和 epoll_event 物件 ```c= ev.events = EPOLLIN; ev.data.fd = listen_sock; (epoll_ctl(epollfd, EPOLL_CTL_ADD, listen_sock, &ev) ``` * ```epoll_wait```,```events``` 是一個 陣列,用來存 ```epoll_wait``` 跳出迴圈後的 ```ready set``` ```c= nfds = epoll_wait(epollfd, events, MAX_EVENTS, -1); for (int n = 0; n < nfds; ++n) { if (events[n].data.fd == listen_sock) {...} } ``` * 原程式碼改成 ```epoll``` 後 ```c= #define _GNU_SOURCE #include <arpa/inet.h> #include <errno.h> #include <fcntl.h> #include <netinet/in.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/epoll.h> #include <sys/socket.h> #include <unistd.h> static int connect_to(char *ip, int port) { int sockfd = socket(AF_INET, SOCK_STREAM, 0); if (sockfd < 0) { printf("Failed to create socket.\n"); return -1; } struct sockaddr_in addr; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_port = htons(port); if (inet_pton(AF_INET, ip, &addr.sin_addr) <= 0) { perror("inet_pton"); return -1; } if (connect(sockfd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { printf("Failed to connect.\n"); return -1; } return sockfd; } static void move(int in_fd, int out_fd, int pip[2]) { int ret = splice(in_fd, NULL, pip[1], NULL, 8, SPLICE_F_MOVE); if (ret == -1) { perror("splice(1)"); return; } ret = splice(pip[0], NULL, out_fd, NULL, 8, SPLICE_F_MOVE); if (ret == -1) { perror("splice(1)"); return; } } #define MAX_EVENTS 10 struct epoll_event ev, events[MAX_EVENTS]; static void proxy(int cl_fd, int target_fd) { if (target_fd == -1) return; int fds[2]; if (pipe(fds) == -1) { perror("pipe"); return; } epollfd = epoll_create1(0); if (epollfd == -1) { perror("epoll_create1"); exit(EXIT_FAILURE); } ev.events = EPOLLIN; ev.data.fd = cl_fd; if (epoll_ctl(epollfd, EPOLL_CTL_ADD, cl_fd, &ev) == -1) { perror("epoll_ctl: cl_fd"); exit(EXIT_FAILURE); } ev.events = EPOLLIN; ev.data.fd = target_fd; if (epoll_ctl(epollfd, EPOLL_CTL_ADD, target_fd, &ev) == -1) { perror("epoll_ctl: target_fd"); exit(EXIT_FAILURE); } int ret; while ((ret = epoll_wait(epollfd, events, MAX_EVENTS, 1000)) != -1) { if (ret == -1) { perror("epoll_wait"); exit(EXIT_FAILURE); } if (ret == 0) continue; for (int i = 0; i < ret; ++i) { int from_client = (events[i].events & POLLIN) && (events[i].data.fd == cl_fd); if (from_client) move(cl_fd, target_fd, fds); else move(target_fd, cl_fd, fds); } } perror("epoll"); } #define PORT 1922 int main(int argc, char *argv[]) { if (argc < 3) { fprintf(stderr, "Usage: %s <target IP address> <target port>\n", argv[0]); return -1; } int listenfd = socket(AF_INET, SOCK_STREAM, 0); struct sockaddr_in addr; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); addr.sin_port = htons(PORT); int optval = 1; setsockopt(listenfd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(optval)); bind(listenfd, (struct sockaddr *) &addr, sizeof(addr)); listen(listenfd, 1); while (1) { int connfd = accept(listenfd, (struct sockaddr *) NULL, NULL); int target_fd = connect_to(argv[1], atoi(argv[2])); if (target_fd >= 0) proxy(connfd, target_fd); close(connfd); } return 0; /* should not reach here */ } ``` * [參考](https://www.readfog.com/a/1641834490361909248) * [參考](https://hackmd.io/@dNNN2mItS4euNM5VnjPLpQ/linux2021-quiz#%E6%B8%AC%E9%A9%97-%CE%B6)