---
tags: linux2021
---
# [2021q1](http://wiki.csie.ncku.edu.tw/linux/schedule) 第 16 週測驗題
:::info
目的: 檢驗學員對 Linux scheduler 和 [signal](https://man7.org/linux/man-pages/man7/signal.7.html) 的認知
:::
==[作答表單](https://docs.google.com/forms/d/e/1FAIpQLSf_SwS8hrcUiJnlXLtV011z6dxtDj57Pph-szEKiuxTqXXWDQ/viewform)==
### 測驗 `1`
研讀 [A (Bare-Bones) User-Level Thread Library](https://www.hacks.moe/posts/1/A_BareBones_UserLevel_Thread_Library) 一文後,下方程式碼嘗試給予更進階的 user-level thread (ULT) 實作: (檔名: `task_sched.c`)
```cpp
#define _GNU_SOURCE
#include <signal.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <ucontext.h>
#include <unistd.h>
#include "list.h"
static int preempt_count = 0;
static void preempt_disable(void) { DDD; }
static void preempt_enable(void) { EEE; }
static void local_irq_save(sigset_t *sig_set) {
sigset_t block_set;
sigfillset(&block_set);
HHH(&block_set, SIGINT);
sigprocmask(SIG_BLOCK, &block_set, sig_set);
}
static void local_irq_restore(sigset_t *sig_set) {
sigprocmask(SIG_SETMASK, sig_set, NULL);
}
#define task_printf(...) \
({ \
preempt_disable(); \
printf(__VA_ARGS__); \
preempt_enable(); \
})
typedef void(task_callback_t)(void *arg);
struct task_struct {
struct list_head list;
ucontext_t context;
void *stack;
task_callback_t *callback;
void *arg;
bool reap_self;
};
static struct task_struct *task_current, task_main;
static LIST_HEAD(task_reap);
static void task_init(void) {
INIT_LIST_HEAD(&task_main.list);
task_current = &task_main;
}
static struct task_struct *task_alloc(task_callback_t *func, void *arg) {
struct task_struct *task = calloc(1, sizeof(*task));
task->stack = calloc(1, 1 << 20);
task->callback = func;
task->arg = arg;
return task;
}
static void task_destroy(struct task_struct *task) {
list_del(&task->list);
free(task->stack);
free(task);
}
static void task_switch_to(struct task_struct *from, struct task_struct *to) {
task_current = to;
swapcontext(&from->context, &to->context);
}
static void schedule(void) {
sigset_t set;
local_irq_save(&set);
struct task_struct *next_task =
list_first_entry(&task_current->list, struct task_struct, list);
if (next_task) {
if (task_current->reap_self) list_move(&task_current->list, &task_reap);
task_switch_to(task_current, next_task);
}
struct task_struct *task, *tmp;
list_for_each_entry_safe (task, tmp, &task_reap, list) /* clean reaps */
task_destroy(task);
local_irq_restore(&set);
}
union task_ptr {
void *p;
int i[2];
};
static void local_irq_restore_trampoline(struct task_struct *task) {
JJJ(&task->context.uc_sigmask, SIGALRM);
local_irq_restore(&task->context.uc_sigmask);
}
__attribute__((noreturn)) static void task_trampoline(int i0, int i1) {
union task_ptr ptr = {.i = {i0, i1}};
struct task_struct *task = ptr.p;
/* We switch to trampoline with blocked timer. That is safe.
* So the first thing that we have to do is to unblock timer signal.
* Paired with task_add().
*/
local_irq_restore_trampoline(task);
task->callback(task->arg);
task->reap_self = true;
schedule();
__builtin_unreachable(); /* shall not reach here */
}
static void task_add(task_callback_t *func, void *param) {
struct task_struct *task = task_alloc(func, param);
if (getcontext(&task->context) == -1) abort();
task->context.uc_stack.ss_sp = task->stack;
task->context.uc_stack.ss_size = 1 << 20;
task->context.uc_stack.ss_flags = 0;
task->context.uc_link = NULL;
union task_ptr ptr = {.p = task};
makecontext(&task->context, (void (*)(void)) task_trampoline, 2, ptr.i[0],
ptr.i[1]);
/* When we switch to it for the first time, timer signal must be blocked.
* Paired with task_trampoline().
*/
sigaddset(&task->context.uc_sigmask, SIGALRM);
preempt_disable();
FFF(&task->list, &task_main.list);
preempt_enable();
}
static void timer_handler(int signo, siginfo_t *info, ucontext_t *ctx) {
if (preempt_count) /* once preemption is disabled */
return;
/* We can schedule directly from sighandler because Linux kernel cares only
* about proper sigreturn frame in the stack.
*/
schedule();
}
static void timer_init(void) {
struct sigaction sa = {.sa_handler = (void (*)(int)) timer_handler,
.sa_flags = SA_SIGINFO};
sigfillset(&sa.sa_mask);
sigaction(SIGALRM, &sa, NULL);
}
static void timer_create(unsigned int usecs) { ualarm(usecs, usecs); }
static void timer_cancel(void) { ualarm(0, 0); }
static void timer_wait(void) {
sigset_t mask;
sigprocmask(0, NULL, &mask);
GGG(&mask, SIGALRM);
sigsuspend(&mask);
}
static int cmp_u32(const void *a, const void *b, void *arg) {
uint32_t x = *(uint32_t *) a, y = *(uint32_t *) b;
uint32_t diff = x ^ y;
if (!diff) return 0; /* *a == *b */
diff = diff | (diff >> 1);
diff |= diff >> 2;
diff |= diff >> 4;
diff |= diff >> 8;
diff |= diff >> 16;
diff KKK diff >> 1;
return x & diff ? 1 : -1; /* if *a > *b, then 1; if *a < *b, then -1; */
}
static inline uint32_t random_shuffle(uint32_t x) {
/* by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/> */
x ^= x >> 16;
x *= 0x7feb352dUL;
x ^= x >> 15;
x *= 0x846ca68bUL;
x ^= x >> 16;
return x;
}
#define ARR_SIZE 1000000
static void sort(void *arg) {
char *name = arg;
preempt_disable();
uint32_t *arr = malloc(ARR_SIZE * sizeof(uint32_t));
preempt_enable();
task_printf("[%s] %s: begin\n", name, __func__);
uint32_t r = getpid();
for (int i = 0; i < ARR_SIZE; i++) arr[i] = (r = random_shuffle(r));
task_printf("[%s] %s: start sorting\n", name, __func__);
qsort_r(arr, ARR_SIZE, sizeof(uint32_t), cmp_u32, name);
for (int i = 0; i < ARR_SIZE - 1; i++)
if (arr[i] > arr[i + 1]) {
task_printf("[%s] %s: failed: a[%d]=%u, a[%d]=%u\n", name, __func__,
i, arr[i], i + 1, arr[i + 1]);
abort();
}
task_printf("[%s] %s: end\n", name, __func__);
preempt_disable();
free(arr);
preempt_enable();
}
int main() {
timer_init();
task_init();
task_add(sort, "1"), task_add(sort, "2"), task_add(sort, "3");
preempt_disable();
timer_create(10000); /* 10 ms */
while (!list_empty(&task_main.list) || !list_empty(&task_reap)) {
preempt_enable();
timer_wait();
preempt_disable();
}
preempt_enable();
timer_cancel();
return 0;
}
```
其中 `list.h` 取自 [linux-list/include/list.h](https://github.com/sysprog21/linux-list/blob/master/include/list.h)。該 ULT 原理是透過 [SIGALRM signal](https://www.gnu.org/software/libc/manual/html_node/Alarm-Signals.html) 來模擬作業系統的 timer 中斷,設定的時間間隔為 10 ms,於是每 10 ms 觸發模擬的 timer 中斷,帶動 ULT 排程器,實作 round-robin 排程策略。留意到 [trampoline](https://en.wikipedia.org/wiki/Trampoline_(computing)) 的使用:
> Trampolines (sometimes referred to as indirect jump vectors) are memory locations holding addresses pointing to interrupt service routines, I/O routines, etc. Execution jumps into the trampoline and then immediately jumps out, or bounces, hence the term trampoline.
![](https://i.imgur.com/EbCwNDT.png)
> 出處: [Trampoline mirror may push laser pulse through fabric of the Universe](https://arstechnica.com/science/2019/09/trampoline-mirror-may-push-laser-pulse-through-fabric-of-the-universe/)
預期程式執行輸出:
```
[1] sort: begin
[1] sort: start sorting
[2] sort: begin
[2] sort: start sorting
[3] sort: begin
[3] sort: start sorting
[2] sort: end
[3] sort: end
[1] sort: end
```
注意後 3 行的順序可能是 $3 \to 2 \to 1$ 或 $2 \to 3 \to 1$。
請補完程式碼。
參考資訊:
* [Proper Locking Under a Preemptible Kernel: Keeping Kernel Code Preempt-Safe](https://www.kernel.org/doc/Documentation/preempt-locking.txt)
* [What is a trampoline function?](https://stackoverflow.com/questions/189725/what-is-a-trampoline-function)
* [Functional Programming 風格的 C 語言實作](https://hackmd.io/@sysprog/c-functional-programming)
> trampoline function 的展現
==作答區==
DDD = ?
* `(a)` `preempt_count = 0`
* `(b)` `preempt_count = 1`
* `(c)` `preempt_count--`
* `(d)` `preempt_count++`
EEE = ?
* `(a)` `preempt_count = 0`
* `(b)` `preempt_count = 1`
* `(c)` `preempt_count--`
* `(d)` `preempt_count++`
FFF = ?
* `(a)` `list_add`
* `(b)` `list_add_tail`
GGG = ?
* `(a)` `sigaddset`
* `(b)` `sigdelset`
* `(c)` `sigfillset`
HHH = ?
* `(a)` `sigaddset`
* `(b)` `sigdelset`
* `(c)` `sigfillset`
JJJ = ?
* `(a)` `sigaddset`
* `(b)` `sigdelset`
* `(c)` `sigfillset`
KKK = ?
* `(a)` `=`
* `(b)` `&=`
* `(c)` `|=`
* `(d)` `^=`
:::success
延伸問題:
1. 解釋上述程式碼運作原理,以及對應到 Linux 核心原始碼的若干機制,如 `preempt_disable`, `preempt_enable`, `local_irq_save`, `local_irq_restore` 等等
2. 探討上述程式碼的 signal 使用機制,指出其正確性或效率疑慮,並著手改進
:::