目的: 檢驗學員對並行和多執行緒程式設計、Linux: 不只挑選任務的排程器,和 C 語言: bitwise 操作的認知
作答表單: 測驗 1-2 (針對 Linux 核心「設計」課程)
作答表單: 測驗 3-4 (針對 Linux 核心「實作」課程)
1
以下嘗試用 lab0 提及的 setjmp 和 longjmp,搭配〈你所不知道的 C 語言: goto 和流程控制篇〉闡述的 coroutine,開發簡易的排程器,以達到〈並行和多執行緒程式設計〉提及任務切換機制。在網路伺服器的應用場景,快速切換網路連線,以縮減服務的延遲。示意:
| Main loop | | Connection 1 | | Connection 2 |
----------------------------------------------------------
|
|
v
*** --> resume --> ***
***
***
*** <-- yield <-- ***
***
*** ------------> resume -------------> ***
***
*** <------------ yield <------------- ***
***
***
***
*** --> resume --> ***
***
給定程式的參考執行輸出如下:
Task 0: n = 70
Task 1: n = 70
Task 2: n = 70
Task 0 fib(0) = 0
Task 1 fib(1) = 1
...
Task 0 fib(68) = 72723460248141
Task 1: resume
Task 1 fib(69) = 117669030460994
Task 2: resume
...
Task 2 68
Task 2: resume
Task 2 69
Task 2: resume
Task 2: complete
對照 FIBONACCI 網站列出的數值
程式碼列表如下: (部分遮蔽)
/* Implementing coroutines with setjmp/longjmp */
#include <setjmp.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "list.h"
struct task {
jmp_buf env;
struct list_head list;
char task_name[10];
int n;
int i;
};
struct arg {
int n;
int i;
char *task_name;
};
static LIST_HEAD(tasklist);
static void (**tasks)(void *);
static struct arg *args;
static int ntasks;
static jmp_buf sched;
static struct task *cur_task;
static void task_add(struct task *task)
{
list_add_tail(&task->list, &tasklist);
}
static void task_switch()
{
if (!list_empty(&tasklist)) {
struct task *t = list_first_entry(&tasklist, struct task, list);
list_del(&t->list);
cur_task = t;
longjmp(t->env, 1);
}
}
void schedule(void)
{
static int i;
setjmp(sched);
while (ntasks-- > 0) {
struct arg arg = args[i];
AAAA;
printf("Never reached\n");
}
task_switch();
}
static long long fib_sequence(long long k)
{
long long f[k + 2];
f[0] = 0, f[1] = 1;
for (int i = 2; i <= k; i++) {
f[i] = f[i - 1] + f[i - 2];
}
return f[k];
}
/* A task yields control n times */
void task0(void *arg)
{
struct task *task = malloc(sizeof(struct task));
strcpy(task->task_name, ((struct arg *) arg)->task_name);
task->n = ((struct arg *) arg)->n;
task->i = ((struct arg *) arg)->i;
INIT_LIST_HEAD(&task->list);
printf("%s: n = %d\n", task->task_name, task->n);
if (setjmp(task->env) == 0) {
task_add(task);
longjmp(sched, 1);
}
task = cur_task;
for (; task->i < task->n; task->i += 2) {
if (setjmp(task->env) == 0) {
long long res = fib_sequence(task->i);
printf("%s fib(%d) = %lld\n", task->task_name, task->i, res);
task_add(task);
BBBB;
}
task = cur_task;
printf("%s: resume\n", task->task_name);
}
printf("%s: complete\n", task->task_name);
longjmp(sched, 1);
}
void task1(void *arg)
{
struct task *task = malloc(sizeof(struct task));
strcpy(task->task_name, ((struct arg *) arg)->task_name);
task->n = ((struct arg *) arg)->n;
task->i = ((struct arg *) arg)->i;
INIT_LIST_HEAD(&task->list);
printf("%s: n = %d\n", task->task_name, task->n);
if (setjmp(task->env) == 0) {
task_add(task);
longjmp(sched, 1);
}
task = cur_task;
for (; task->i < task->n; task->i++) {
if (setjmp(task->env) == 0) {
printf("%s %d\n", task->task_name, task->i);
task_add(task);
CCCC;
}
task = cur_task;
printf("%s: resume\n", task->task_name);
}
printf("%s: complete\n", task->task_name);
longjmp(sched, 1);
}
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
int main(void)
{
void (*registered_task[])(void *) = {task0, task0, task1};
struct arg arg0 = {.n = 70, .i = 0, .task_name = "Task 0"};
struct arg arg1 = {.n = 70, .i = 1, .task_name = "Task 1"};
struct arg arg2 = {.n = 70, .i = 0, .task_name = "Task 2"};
struct arg registered_arg[] = {arg0, arg1, arg2};
tasks = registered_task;
args = registered_arg;
ntasks = ARRAY_SIZE(registered_task);
schedule();
return 0;
}
其中 task0
和 task1
分別計算 Fibonacci 數的偶數和奇數項,list.h
標頭檔來自 lab0-c。請補完程式碼,使程式碼運作符合預期。作答規範:
AAAA
, BBBB
, CCCC
均為表示式,三者應當包含 task
,
和 ;
延伸問題:
2
研讀 A (Bare-Bones) User-Level Thread Library 一文後,延伸測驗 1
,我們可利用 SIGALRM
signal 來實作搶佔式多工,下方的程式建立 3 個排序的任務,原理是藉由 SIGALRM signal 來模擬作業系統的 timer 中斷,設定的時間間隔為 10 ms,於是每 10 ms 觸發模擬的 timer 中斷,帶動排程器,實作 round-robin 排程策略。留意到 trampoline 的使用:
Trampolines (sometimes referred to as indirect jump vectors) are memory locations holding addresses pointing to interrupt service routines, I/O routines, etc. Execution jumps into the trampoline and then immediately jumps out, or bounces, hence the term trampoline.
預期程式執行輸出:
[1] sort: begin
[1] sort: start sorting
[2] sort: begin
[2] sort: start sorting
[3] sort: begin
[3] sort: start sorting
[2] sort: end
[3] sort: end
[1] sort: end
注意後 3 行的順序可能是
請補完程式碼,使其運作符合預期。作答規範:
DDDD
和 EEEE
為函式名稱FFFF
是表示式,應包含 >>
運算子,但不包含小括號 (即 (
和 )
)參考資訊:
trampoline function 的展現
延伸問題:
preempt_disable
, preempt_enable
, local_irq_save
, local_irq_restore
等等coro
的實作,探討其原理3
除法運算的成本往往高於加法和乘法,於是改進的策略被引入。其中一種策略是將除法改為乘法運算,例如
重新檢視
const uint32_t D = 3;
#define M ((uint64_t)(UINT64_C(0xFFFFFFFFFFFFFFFF) / (D) + 1))
/* compute (n mod d) given precomputed M */
uint32_t quickmod(uint32_t n)
{ uint64_t quotient = ((__uint128_t) M * n) >> 64;
return n - quotient * D;
}
其中 __uint128_t
是 GCC extension,用以表示 128 位元的無號數值。
其中 M
無條件捨去的除法運算再補一對應到「取上界」, 故
由 Facebook 公司所維護的 jemalloc 是個高效能的記憶體配置器 (memory allocator,即 malloc 和 free 函式對應的實作),特別在多核多執行緒的環境有優異表現,在其原始程式碼 include/jemalloc/internal/div.h 和 src/div.c 也運用類似的技巧:已知
假設
證明
證明
比較 jemalloc 的快速除法與通用除法運算
O0
到 O3
都做測試測試結果:
-O0
-O1
-O2
-O3
fastdiv (部分程式碼遮蔽) 運用上述技巧,提出較通用除法快的實作。以下是測試程式碼:
#include <assert.h>
#include <stdio.h>
#include "fastdiv.h"
int main(int argc, char **argv)
{
const uint64_t MAX_NUM = 1 << 16;
const uint64_t MAX_DIV = 1 << 14;
for (uint64_t d = 1U; d < MAX_DIV; ++d) {
div_t fd; /* general method */
fastdiv(&fd, d); /* divide-by-d */
const uint64_t mul = fd.mul, add = fd.add, shift = fd.shift;
div_t fd2; /* bounded method optimization */
fastdiv_bounded(&fd2, d, MAX_DIV); /* divide-by-d */
const uint64_t mul2 = fd2.mul, add2 = fd2.add, shift2 = fd2.shift;
assert(mul2 == 1U || add2 == 0U || shift2 == 0U);
assert(add2 == 0U);
for (uint64_t num = 0; num < MAX_NUM; ++num) {
const uint64_t normal = num / d;
const uint64_t general = (num * mul + add) >> shift;
const uint64_t bounded = (num * mul2) >> shift2;
assert(general == normal);
assert(bounded == normal);
}
}
}
請補完程式碼,使 assert
巨集不會在執行時期觸發錯誤並運作符合預期。作答規範:
AAAA
, BBBB
, CCCC
, DDDD
皆為整數(
和 )
),以最精簡的形式書寫延伸問題:
fastdiv
的效率,並試圖解釋其行為4
延伸第 5 週測驗題,我們想開發一套快速的 heap allocator,其記憶體開銷約為 1.67%,並比照第 5 週提到的 TLSF,嘗試建立時間複雜度為
參考執行輸出:
This 268435456 bytes heap requires 824 bytes for its base structure plus 4473928 bytes (1.67%) for bookkeeping.
There are 91 base sizes.
allocated 1048576 times (16 + 32 + 64 + 128 + 16) bytes
freed them all.
Allocated 2048 times 69392 bytes.
Freed them all.
Allocated 16777216 times 16 bytes.
Freed them all.
Allocated 8388608 times 24 bytes.
Freed them all.
Allocated 8388608 times 32 bytes.
Freed them all.
Allocated 5242880 times 48 bytes.
Freed them all.
Allocated 4194304 times 61 bytes.
Freed them all.
Allocated 3145728 times 65 bytes.
Freed them all.
Allocated 3145728 times 79 bytes.
Freed them all.
Allocated 3145728 times 80 bytes.
Freed them all.
Allocated 2097152 times 81 bytes.
Freed them all.
Allocated 32768 times 4368 bytes.
Freed them all.
Allocated 524288 times 345 bytes.
Freed them all.
其原始程式碼可見 heap.c (部分遮蔽)。請補完程式碼,使其運作符合預期。作答規範:
EEEE
, FFFF
皆為整數GGGG
為 hexadecimal literal