# OSC lab5 ### init running queue, waiting queue, stopped_queue ```c #define LIST_HEAD_INIT(name) \ { \ &(name), &(name) \ } list running_queue = LIST_HEAD_INIT(running_queue); list waiting_queue = LIST_HEAD_INIT(waiting_queue); list stopped_queue = LIST_HEAD_INIT(stopped_queue); convert to: list running_queue = { &(running_queue), &(running_queue) } //first as prev, second as next ``` ### Main function ```c void kernel_main(void) { uart_send_string("Hello, world!\n"); mm_init(); thread_init(); // thread_create(&shell); exe_new_prog("syscall.img"); timeout_event_init(); add_timer((timer_callback)thread_schedule, (size_t)0, MS(SCHE_CYCLE)); enable_interrupt(); idle(); } ``` ### thread_init() - create a init thread to serve as idle thread always in first running queue ```c void thread_init() { struct task *init = create_task(); set_thread_ds(init); // init the thread structure } ``` - thread init will be the init_task in running queue and will not be poped out from the queue. `struct task *init = create_task()//initialize task struct` - struct task - struct cpu_context //store reg info - char*kernel_stack, user_stack, user_prog - size_t(unsig integral) user_prog_size - state_t state - enum description ```c typedef enum { TASK_RUNNING, TASK_WAITING, TASK_STOPPED, TASK_INIT, } state_t; ``` - pid_t pid (pid_t is typedef unsigned long pid_t;) - unsigned need_resched //check if need reschedule - int exitcode - unsigned long timeout - list list (list is doubly linked list) - struct signal* signal - unsigned int sig_num - signal_handler handler(a func ptr: typedef void (*signal_handler)(int); ) - struct list list (doubly linked list) - struct signal_context *sig_context - TrapFrame *trapframe (type_def struct) - unsigned long regs[31] //general purpose of regs x0~x30 - unsigned long sp //sp_el0 - unsigned long pc //elr_el1 - unsigned long pstate //spsr_el1 - char *user_stack ### create_task 1. dynamic malloc allocate memory address for store task struct 2. initialize all the members ```c struct task *create_task() { struct task *new_task = kmalloc(sizeof(struct task)); new_task->cpu_context.lr = new_task->cpu_context.sp = new_task->cpu_context.fp = 0; new_task->kernel_stack = NULL; new_task->user_stack = NULL; new_task->user_prog = NULL; new_task->user_prog_size = 0; new_task->state = TASK_INIT; new_task->pid = task_count++; new_task->need_resched = 0; new_task->exitcode = 0; new_task->timeout = get_current_time() + DEFAULT_TIMEOUT; new_task->signal = NULL; new_task->sig_context = NULL; return new_task; } ``` - warning: the state will be kept as init 3. return ptr of new_task (variable name init) - set_thread_ds(init) //init the thread structure - static inline void set_thread_ds(struct task *cur) - write_sysreg(tpidr_el1, cur) //cur is init - tpidr_el1 explanation: - tpidr_el1 is a special register in the ARM64 (AArch64) CPU architecture. It's known as the Thread ID register for Exception Level 1 (EL1). - The tpidr_el1 register doesn't store the thread ID itself, but usually a pointer to a structure that contains thread-related information. For example, the Linux kernel uses it to store a pointer to the current task's task_struct. - When the system switches from one thread to another (a process known as context switching), the operating system needs to save the state of the current thread, including the value of tpidr_el1, and then load the saved state of the next thread to be executed. - Storing a pointer to the current thread's control block in tpidr_el1 allows quick access to important information about the thread. ## Create Thread ### exe_new_prog() ```c void exe_new_prog(char *filename) { void *target_addr; size_t data_size = cpio_load_program(filename, &target_addr); if (data_size == -1) { uart_send_string("!! exe_new_prog fail !!\n"); return; } struct task *prog = thread_create(init_user_prog); prog->user_stack = kmalloc(STACK_SIZE); memset(prog->user_stack, 0, STACK_SIZE); prog->user_prog = target_addr; prog->user_prog_size = data_size; return; } ``` - set target_addr ptr - get data_size from cpio_load_program(filename, &target_addr) ```c size_t cpio_load_program(const char *filename, void **target_addr) { char *prog_addr = findFile(filename); if (prog_addr) { cpio_header *header = (cpio_header *)prog_addr; unsigned int pathname_size = hex2dec(header->c_namesize); unsigned int file_size = hex2dec(header->c_filesize); unsigned int headerPathname_size = sizeof(cpio_header) + pathname_size; align(&headerPathname_size, 4); align(&file_size, 4); uart_printf("load the %s\n",prog_addr + sizeof(cpio_header)); char *file_content = prog_addr + headerPathname_size; *target_addr = kcalloc(file_size); memcpy(*target_addr, file_content, file_size); return file_size; } else { uart_send_string("Not found the program\n"); return -1; } } ``` - thread_create will create a new_task with specific func ptr, after context switch "ret" will return to lr which is addr of func ptr ```c struct task *thread_create(void *func) { struct task *new_thread = create_task(); new_thread->kernel_stack = kmalloc(STACK_SIZE); new_thread->state = TASK_RUNNING; new_thread->cpu_context.lr = (unsigned long)func; new_thread->cpu_context.sp = (unsigned long)new_thread->kernel_stack + STACK_SIZE - sizeof(TrapFrame); add_task(new_thread); return new_thread; } static void init_user_prog() { jump_user_prog(current->user_prog, 0, (char *)current->user_stack + STACK_SIZE - 0x10); } ``` - create_task and assign it into new_thread(task ptr) - dynamic allocate stack size for kernel stack - change state to task_running - setting task->cpu_context.lr = address of func - setting task->cpu_context.sp = address of new_thread's kernel_stack + stack_size(since sp is decreasing) - size of trapframe(need trapframe to store cpu states) - add_task this new_thread(task ptr) ```c void add_task(struct task *t) { size_t flags = disable_irq(); insert_tail(&running_queue, &t->list); irq_restore(flags); } ``` - flags will store the current DAIF status then disable interrupt - insert tail will make running queue insert current task at tail ```c void insert_tail(list *head, list *v) { v->next = head; v->prev = head->prev; head->prev->next = v; head->prev = v; } ``` - irq_restore will restore DAIF status(enable them) - return new_thread address to task ptr prog - prog->user_stack will be allocated a stack size, which is an address - setting address of user stack all zero - set prog->user_prog = target_addr - set user_prog_size as data_size ## Scheduler and Contet Switch ### time_out_event_init() ```c void timeout_event_init() { timeout_queue_head = 0; timeout_queue_tail = 0; unsigned long cntkctl_el1; cntkctl_el1 = read_sysreg(CNTKCTL_EL1); cntkctl_el1 |= 1; write_sysreg(CNTKCTL_EL1, cntkctl_el1); } ``` - cntkctl_el1 controls whether el0 program(user program) can access to physical counter or not by bit[0]. - This approach allows a user-space program to read the physical counter directly, without needing to make a system call. ### add_timer((timer_callback)thread_schedule, (size_t)0, MS(SCHE_CYCLE)); - pass callback function "thread_schedule" size_of_argument and MS(SCHE_CYCLE, which is 30LL) `#define MS(n) (n * 1LL)` - steps of add_timer: 1. kmalloc an address for timeout_event 2. assign the arguments into timeout_event 3. disable irq interrupt and extract current flags 4. insert current event into timeout queue 5. enable irq interrupt by extracted flags - trigger timer_handler and designed a module specifically for scheduler ```c if (cur_event->callback == (timer_callback)&thread_schedule) { set_resched(current_time); add_timer((timer_callback)thread_schedule, (size_t)0, MS(SCHE_CYCLE)); enable_interrupt(); thread_schedule(0); disable_interrupt(); } ``` - set_reschedule: using inline to expand it ```c static inline void set_resched(unsigned long current_time) { if (current_time >= current->timeout) { current->need_resched = 1; } } ``` - enable DAIF interrupt - thread_schedule ```c void thread_schedule(size_t _) { if (!current->need_resched) { return; } unsigned long flags = disable_irq(); struct task *next = pick_next_task(); next->need_resched = 0; irq_restore(flags); switch_task(next); } ``` - pick_next_task(inside thread_schedule) ```c struct task *pick_next_task() { // check if running_queue have other tasks // if there is only current task if (list_empty(&running_queue)) { while (1) { }; } struct task *next_task = list_first_entry(&running_queue, struct task, list); //list_first_entry(ptr, type, member)= list_entry((ptr)->next,type,member) //list_entry=container_of(ptr,type,member) //container_of = // void * __mptr = (void *)ptr; // ((type *)(__mptr - offsetof(type, member))); }) // so it will return the next_task struct start address //unlink next_task's linked list prev and next unlink(&next_task->list); //insert current next task to running queue insert_tail(&running_queue, &next_task->list); return next_task; } ``` - switch_task(next) ```c void switch_task(struct task *next) { if (current == next) //meaning no other task { return; } //x0 will take first arg and x1 second switch_to(&current->cpu_context, &next->cpu_context); } ``` - switch_to ```asm .global switch_to switch_to: mov x9, sp //x0 = cur task_struct address stp x19, x20, [x0, #0x0] stp x21, x22, [x0, #0x10] stp x23, x24, [x0, #0x20] stp x25, x26, [x0, #0x30] stp x27, x28, [x0, #0x40] stp fp, x9, [x0, #0x50] str lr, [x0, #0x60] ldp x19, x20, [x1, #0x0] ldp x21, x22, [x1, #0x10] ldp x23, x24, [x1, #0x20] ldp x25, x26, [x1, #0x30] ldp x27, x28, [x1, #0x40] ldp fp, x9, [x1, #0x50] ldr lr, [x1, #0x60] mov sp, x9 msr tpidr_el1, x1 ret ``` - why only save callee not caller registers? - Ans: because caller's state will be stored in stack first, but when doing switch task, callee doesn't know it will be suspended. Therefore, we need to store its status. ### Enable Interrupt `void enable_interrupt() { asm volatile("msr DAIFClr, 0xf"); }` - Enable DAIF interrupt ## The Idle Thread ### idle() ```c static void idle(void) { while (1) { thread_kill_zombies(); thread_schedule(0); } } ``` - thread_kill_zombies ```c void thread_kill_zombies() { unsigned long flags = disable_irq(); while (!list_empty(&stopped_queue)) { struct task *victim = list_first_entry(&stopped_queue, struct task, list); unlink(&victim->list); free_task(victim); } irq_restore(flags); } ``` - thread_schedule(0) ```c void thread_schedule(size_t _) { if (!current->need_resched) { return; } unsigned long flags = disable_irq(); struct task *next = pick_next_task(); next->need_resched = 0; irq_restore(flags); switch_task(next); } ``` ## User Process and System Call ### Syscall - before entering syscall_handler - concept: user program will call svc with an argument standing for syscall num, then invoke **_el1_lower_el_aarch64_sync** in exception table. - PageFrame struct ```c typedef struct { unsigned long regs[31]; // general purpose regs x0~x30 unsigned long sp; // sp_el0 unsigned long pc; // elr_el1 unsigned long pstate; // spsr_el1 }TrapFrame; ``` - An overview of exception table: ```asm .global el1_vector_base .align 11 el1_vector_base: exception_entry exception_handler exception_entry exception_handler exception_entry exception_handler exception_entry exception_handler exception_entry _el1_curr_el_spx_sync exception_entry _el1_curr_el_spx_irq exception_entry _el1_curr_el_spx_fiq exception_entry _el1_curr_el_spx_serr //this one exception_entry _el1_lower_el_aarch64_sync exception_entry _el1_lower_el_aarch64_irq exception_entry exception_handler exception_entry exception_handler exception_entry exception_handler exception_entry exception_handler exception_entry exception_handler exception_entry exception_handler //macro of exception_entry: //branch to a handler function. .macro exception_entry label .align 7 b \label .endm //enter svc handler //1. before system call, store gp... in trapframe //2. goto system call //3. exit system call, load reg info from trapframe _el1_lower_el_aarch64_sync: kernel_entry 0 bl lower_sync_handler kernel_exit 0 //macro of kernel_entry //save all gp_regs and exc_regs .macro kernel_entry el //sub 17*16 for pageframe sub sp, sp, 17 * 16 //x0, x1 store at address of sp + 16*0 stp x0, x1, [sp ,16 * 0] stp x2, x3, [sp ,16 * 1] stp x4, x5, [sp ,16 * 2] stp x6, x7, [sp ,16 * 3] stp x8, x9, [sp ,16 * 4] stp x10, x11, [sp ,16 * 5] stp x12, x13, [sp ,16 * 6] stp x14, x15, [sp ,16 * 7] stp x16, x17, [sp ,16 * 8] stp x18, x19, [sp ,16 * 9] stp x20, x21, [sp ,16 * 10] stp x22, x23, [sp ,16 * 11] stp x24, x25, [sp ,16 * 12] stp x26, x27, [sp ,16 * 13] stp x28, x29, [sp ,16 * 14] //if current exception level = 0 .if \el == 0 //store stack pointer in trapframe->sp mrs x0, sp_el0 stp x30, x0, [sp, 16 * 15] .else str x30, [sp, 16 * 15] .endif //store elr and spsr in trapframe->pc and state mrs x0, elr_el1 mrs x1, spsr_el1 stp x0, x1, [sp, 16 * 16] //mov sp address to x0, so c will take it as first arg mov x0, sp .endm // load all gp_regs(general purpose) and exc_regs .macro kernel_exit el ldp x0, x1, [sp, 16 * 16] msr elr_el1, x0 msr spsr_el1, x1 .if \el ==0 ldp x30, x0, [sp, 16 * 15] msr sp_el0, x0 .else ldr x30, [sp, 16 * 15] .endif ldp x28, x29, [sp ,16 * 14] ldp x26, x27, [sp ,16 * 13] ldp x24, x25, [sp ,16 * 12] ldp x22, x23, [sp ,16 * 11] ldp x20, x21, [sp ,16 * 10] ldp x18, x19, [sp ,16 * 9] ldp x16, x17, [sp ,16 * 8] ldp x14, x15, [sp ,16 * 7] ldp x12, x13, [sp ,16 * 6] ldp x10, x11, [sp ,16 * 5] ldp x8, x9, [sp ,16 * 4] ldp x6, x7, [sp ,16 * 3] ldp x4, x5, [sp ,16 * 2] ldp x2, x3, [sp ,16 * 1] ldp x0, x1, [sp ,16 * 0] add sp, sp, 17 * 16 //eret will load previous el and go back eret .endm ``` - enter lower_sync_handler ```c // void lower_sync_handler(TrapFrame *_regs) { unsigned long esr = read_sysreg(esr_el1); // cause of that exception unsigned int ec = ESR_ELx_EC(esr); switch (ec) { //if elr_el1 return 21(10101): case ESR_ELx_EC_SVC64: enable_interrupt(); syscall_handler(_regs); disable_interrupt(); break; case ESR_ELx_EC_DABT_LOW: uart_send_string("in Data Abort\n"); break; case ESR_ELx_EC_IABT_LOW: uart_send_string("in Instruction Abort\n"); break; default: return; } } ``` - enter syscall_handler ```c void syscall_handler(TrapFrame *_regs) { //because system call number stored in x8 unsigned int sys_index = _regs->regs[8]; if (sys_index >= NUM_syscalls) { uart_send_string("!!! Invalid system call !!!\n"); return; } (syscall_table[sys_index])(_regs); } //each system call function ptr stored in each syscall_table syscall syscall_table[NUM_syscalls] = { [SYS_GETPID] = &sys_getpid, [SYS_UART_RECV] = &sys_uartrecv, [SYS_UART_WRITE] = &sys_uartwrite, [SYS_EXEC] = &sys_exec, [SYS_FORK] = &sys_fork, [SYS_EXIT] = &sys_exit, [SYS_MBOX] = &sys_mbox_call, [SYS_KILL_PID] = &sys_kill_pid, [SYS_SIGNAL] = &sys_signal, [SYS_SIGKILL] = &sys_sigkill, [SYS_SIGRETURN] = &sys_sigreturn, }; ``` - each system call function - sys_getpid ```c //each returned value will be put in x[0] void sys_getpid(TrapFrame *_regs) { _regs->regs[0] = current->pid; } ``` - sys_uartrecv ```c //size_t uart_read(char buf[], size_t size) ->x0 = buffer, x1 =size //return size of buffer void sys_uartrecv(TrapFrame *_regs) { char *buf = (char *)_regs->regs[0];//get buffer addr int count = _regs->regs[1];//get count num for (int i = 0; i < count; i++)//receive each element to uart { buf[i] = uart_recv(); } _regs->regs[0] = count;//return size of buffer } ``` - sys_uartwrite ```c //size_t uart_write(const char buf[], size_t size) //x0 store buf, x1 store size of buf, and it should return size of buf void sys_uartwrite(TrapFrame *_regs) { char *buf = (char *)_regs->regs[0]; int count = _regs->regs[1]; for (int i = 0; i < count; i++) { uart_send(buf[i]); } _regs->regs[0] = count; } ``` - sys_exec ```c //int exec(const char* name, char *const argv[]) void sys_exec(TrapFrame *_regs) { const char *path = (char *)_regs->regs[0]; //args is also a ptr point to args array. const char **args = (const char **)_regs->regs[1]; _regs->regs[0] = do_exec(path, args); } /////////////////////inside sys_exec///////////////////// //current: get current task struct from reading tpidr_el1 #define current get_thread_ds() static inline struct task *get_thread_ds() { return (struct task *)read_sysreg(tpidr_el1); } //do_exec function int do_exec(const char *path, const char *argv[]) { void *target_addr; //assign prog addr to target_addr size_t data_size = cpio_load_program(path, &target_addr); if (data_size == -1) { uart_send_string("!! do_exec fail !!\n"); return -1; } //replace user prog with target_addr(the program want to execute) replace_user_context(target_addr, data_size); jump_user_prog(current->user_prog, current->kernel_stack + STACK_SIZE - sizeof(TrapFrame), current->user_stack + STACK_SIZE - 0x10); return 0; } //replace_user_context static void replace_user_context(void *prog, size_t data_size) { struct task *_task = current; memset(_task->user_stack, 0, STACK_SIZE); kfree(_task->user_prog); _task->user_prog = prog; _task->user_prog_size = data_size; //first convert to char to add 1 as 1 not 1 as one trapframe TrapFrame *trapframe = (TrapFrame *)((char *)_task->kernel_stack + STACK_SIZE - sizeof(TrapFrame)); memset(trapframe, 0, sizeof(TrapFrame)); trapframe->sp = (unsigned long)_task->user_stack + STACK_SIZE - 0x10; } //jump_user_prog void jump_user_prog(void *target_addr, char *kernel_sp, char *user_sp) { asm volatile("mov x0, 0 \n"); //set bit[6:9] as 0, turning off DAIF interrupt mask asm volatile("msr spsr_el1, x0 \n"); // daif=0 //elr_el1 set to the address we want to go back asm volatile("msr elr_el1, %0 \n" ::"r"(target_addr)); //stack pointer el0 asm volatile("msr sp_el0, %0 \n" ::"r"(user_sp)); if (kernel_sp) { asm volatile("mov sp, %0 \n" ::"r"(kernel_sp)); } asm volatile("eret \n"); } /////////////////////inside sys_exec///////////////////// ``` - sys_fork - instruction: Set the parent process’s return value to the child’s id and the child process’s return value to 0 to distinguish them ```c void sys_fork(TrapFrame *_regs) { _regs->regs[0] = do_fork(_regs); } /////////////////////inside sys_fork///////////////////// size_t do_fork(TrapFrame *_regs) { struct task *child = fork_context(_regs); add_task(child); return child->pid; } static struct task *fork_context(TrapFrame *_regs) { struct task *child = kmalloc(sizeof(struct task)); unsigned long flags = disable_irq(); *child = *current; // copy the current to child entirely //fork current thread to and give pid based on current task count child->pid = task_count++; irq_restore(flags); child->need_resched = 0; child->user_stack = kmalloc(STACK_SIZE); //cp parent's user_stack to child's user_stack memcpy(child->user_stack, current->user_stack, STACK_SIZE); child->kernel_stack = kmalloc(STACK_SIZE); TrapFrame *trapframe = (TrapFrame *)((unsigned long)child->kernel_stack + STACK_SIZE - sizeof(TrapFrame)); //cp parent's trapframe to child's trapframe memcpy(trapframe, _regs, sizeof(TrapFrame)); child->user_prog = kmalloc(current->user_prog_size); //cp current user_prog to child user_prog memcpy(child->user_prog, current->user_prog, current->user_prog_size); // using x30 (link return register) while function call on AArch64(lr stores return address of func call) //because child's regs[30] will be parent's lr so we need to shift it to child's lr //so we first calculate offset of parent->user_program - regs[30] to get offset of lr //and we add offset of lr to child->user_program and store to regs30 trapframe->regs[30] = (unsigned long)child->user_prog + (_regs->regs[30] - (unsigned long)current->user_prog); //sp also need to shift to child's sp(sp = user_stack + stack_size - pageframe_size) //offset of sp to user_stack = parent's sp(regs->sp) - parent's user_stack + address of child's stack trapframe->sp = (unsigned long)child->user_stack + (_regs->sp - (unsigned long)current->user_stack); //pc is similar to lr, but it stores the addr of next instruction trapframe->pc = (unsigned long)child->user_prog + (_regs->pc - (unsigned long)current->user_prog); //pid = 0 trapframe->regs[0] = 0; // child process : return 0 //sp addr = trapframe addr child->cpu_context.sp = (unsigned long)trapframe; //lr will store the restore_regs_eret's addr //why we need restore_regs_eret? -> because switch_to function will use cpu_context directly //but in our child process, it has same cpu_context as parent process, including sp, lr .... //so we will go to restore_regs_eret first to refresh cpu_context child->cpu_context.lr = (unsigned long)restore_regs_eret; return child; /* .global restore_regs_eret restore_regs_eret: ldp x0, x1, [sp, 16 * 16] msr elr_el1, x0 msr spsr_el1, x1 ldp x30, x0, [sp, 16 * 15] msr sp_el0, x0 ldp x28, x29, [sp ,16 * 14] ldp x26, x27, [sp ,16 * 13] ldp x24, x25, [sp ,16 * 12] ldp x22, x23, [sp ,16 * 11] ldp x20, x21, [sp ,16 * 10] ldp x18, x19, [sp ,16 * 9] ldp x16, x17, [sp ,16 * 8] ldp x14, x15, [sp ,16 * 7] ldp x12, x13, [sp ,16 * 6] ldp x10, x11, [sp ,16 * 5] ldp x8, x9, [sp ,16 * 4] ldp x6, x7, [sp ,16 * 3] ldp x4, x5, [sp ,16 * 2] ldp x2, x3, [sp ,16 * 1] ldp x0, x1, [sp ,16 * 0] add sp, sp, 17 * 16 eret */ } /////////////////////inside sys_fork///////////////////// ``` - sys_exit ```c void sys_exit(TrapFrame *_regs) { kill_task(current, _regs->regs[0]); } /////////////////////inside sys_exit///////////////////// void kill_task(struct task *_task, int status) { size_t flags = disable_irq(); _task->state = TASK_STOPPED; _task->need_resched = 1; //unlink itself from running queue unlink(&_task->list); _task->exitcode = status; //insert it to stopped_queue just after head insert_head(&stopped_queue, &_task->list); irq_restore(flags); //switch to other task thread_schedule(0); } /////////////////////inside sys_exit///////////////////// ``` - sys_mbox_call ```c void sys_mbox_call(TrapFrame *_regs) { unsigned int channel = _regs->regs[0]; unsigned int *mailbox = (unsigned int *)_regs->regs[1]; mailbox_call(channel, mailbox); } /////////////////////inside sys_mbox_call///////////////////// unsigned int mailbox_call(unsigned char channel, unsigned int *_mailbox) { unsigned int readChannel = (((unsigned int)((unsigned long)_mailbox) & ~0xF) | (channel & 0xF)); while (*MAILBOX_STATUS & MAILBOX_FULL) { } //writes the message address to the mailbox. *MAILBOX_WRITE = readChannel; while (1) { //if not empty, then busy polling while (*MAILBOX_STATUS & MAILBOX_EMPTY) { } if (readChannel == *MAILBOX_READ) { return _mailbox[1] == MAILBOX_RESPONSE; } } return 0; } /////////////////////inside sys_mbox_call///////////////////// ``` - sys_kill_pid ```c void sys_kill_pid(TrapFrame *_regs) { //regs[0] will store killed pid pid_t target = _regs->regs[0]; if (current->pid == target) { kill_task(current, target); return; } struct task *victim = get_task(target); if (victim) { kill_task(victim, 0); } } /////////////////////inside sys_kill_pid///////////////////// struct task *get_task(pid_t target) { struct task *_task; list_for_each_entry(_task, &running_queue, list) { if (_task->pid == target) { return _task; } } return NULL; } #define list_for_each_entry(entry, head, member) \ for (entry = list_entry((head)->next, __typeof__(*entry), member); \ &entry->member != (head); \ entry = list_entry(entry->member.next, __typeof__(*entry), member)) /////////////////////inside sys_kill_pid///////////////////// ``` ### POSIX Signal - simplified steps: 1. **Checking for signals before returning to user mode:** The idea here is that the kernel, which has control over the system, should always check if there are any signals that the process needs to handle before it lets the process go back to executing its normal code. 2. **Handling signals in the correct mode:** If the signal handler is a default one, it can be executed in the kernel mode. But if it's a registered one (provided by the process), it needs to be run in user mode. 3. **Saving the original context:** Before running the signal handler, the system should save the current state of the process. This is because the signal handler might cause the process to enter kernel mode again (for example, by making a system call), and when the signal handler is done, the system needs to be able to restore the process to the state it was in before the signal was handled. 4. **Forcing a return to kernel mode:** After the signal handler is done, the process is still in user mode. But the system needs to know that the signal handler is done so it can restore the original context. To achieve this, it can set the return address of the signal handler to a special function, sigreturn(), that will cause the process to enter kernel mode again and let the system know that the signal handling is done. 5. **Allocating a new stack for the signal handler:** When the signal handler is executed, it needs its own stack (the data structure that holds function calls and local variables). The system should allocate this stack, and then deallocate it when the signal handler is done. This stack should also contain the original context of the process and the sigreturn() function, so that when the signal handler is done, the process can return to its original state and enter kernel mode. - sys_signal ```c //we type register, user_process will put arguments in registers(from x0) and put the syscall num in x8. void sys_signal(TrapFrame *_regs) { int sig_num = _regs->regs[0]; //functional ptr will be stored at regs[1] signal_handler _hand = (signal_handler)_regs->regs[1]; //for this signal, build a struct to store its properties struct signal *new_signal = kmalloc(sizeof(struct signal)); //store current num and func ptr and initialize linked list new_signal->sig_num = sig_num; new_signal->handler = _hand; new_signal->list.next = new_signal->list.prev = &new_signal->list; //if current task didn't contain signal, then put current signal into it directly if (!current->signal) { current->signal = new_signal; } else { insert_tail(&current->signal->list, &new_signal->list); } } /////////////////////inside sys_signal///////////////////// typedef void (*signal_handler)(int); struct signal { unsigned int sig_num; signal_handler handler; struct list list; }; //default handlers signal_handler signal_table[] = { [0] = &sig_ignore, [1] = &sig_ignore, [2] = &sig_ignore, [3] = &sig_ignore, [4] = &sig_ignore, [5] = &sig_ignore, [6] = &sig_ignore, [7] = &sig_ignore, [8] = &sig_ignore, [SIGKILL] = &sigkill_handler, }; //default sigkill_handler void sigkill_handler(int target) { if (current->pid == target) { kill_task(current, target); return; } struct task *victim = get_task(target); if (victim) { kill_task(victim, 0); } } /////////////////////inside sys_signal///////////////////// ``` - sys_sigkill ```c //sys_sigkill will check if there is a registered handler //do sys_sigkill -> user_process put arguments in [x0...], systemcall num [x8] set as 9 void sys_sigkill(TrapFrame *_regs) { int target = _regs->regs[0]; int SIGNAL = _regs->regs[1]; int is_find = 0; //if current task has signal_struct(after registered), then it will check matched signal num if (current->signal) { struct signal *cur = current->signal; do { //if current signal_num matched with SIGNAL if (cur->sig_num == SIGNAL) { is_find = 1; sig_context_update(_regs, cur->handler); break; } cur = list_entry(cur->list.next, struct signal, list); } while (cur != current->signal); } else if (!current->signal && !is_find) { (signal_table[SIGNAL])(target); } } /////////////////////inside sys_sigkill///////////////////// void sig_context_update(TrapFrame *_regs, void (*handler)()) { struct signal_context *sig_context = kmalloc(sizeof(struct signal_context)); sig_context->trapframe = kmalloc(sizeof(TrapFrame)); sig_context->user_stack = kmalloc(STACK_SIZE); //sig_context->trapframe will store the original state of the user_process memcpy(sig_context->trapframe, _regs, sizeof(TrapFrame)); current->sig_context = sig_context; //x30 is link register, after finished handler function it will link to sig_return _regs->regs[30] = (unsigned long)&sig_return; //elr after do the eret at kernel_exit will first goto elr addr _regs->pc = (unsigned long)handler; //exec at user stack _regs->sp = (unsigned long)sig_context->user_stack + STACK_SIZE - 0x10; } struct signal_context { TrapFrame *trapframe; char *user_stack; }; void sig_return(void) { asm volatile( //10 is sys_sigreturn "mov x8, 10\n" "svc 0\n"); } //restore the status of user process void sys_sigreturn(TrapFrame *_regs) { sig_context_restore(_regs); disable_interrupt(); kfree(current->sig_context->trapframe); kfree(current->sig_context->user_stack); kfree(current->sig_context); current->sig_context = NULL; enable_interrupt(); } void sig_context_restore(TrapFrame *_regs) { memcpy(_regs, current->sig_context->trapframe, sizeof(TrapFrame)); } /////////////////////inside sys_sigkill///////////////////// ```