learning
POSIX決定Linux中signal如何實現
Linux kernel對signal的傳遞過程分成了兩階段
Signal generation: 產生signal, 由kernel在目標process的task_struct
更新signal的狀態
Signal delivery: 傳遞signal, kernel將process的control flow交給signal handler
Signal已被產生但還沒傳遞時,稱為pending signal
Note: SIGKILL. SIGSTOP不能被ignore
task_struct
中跟signal相關的data與其structure
sigpending
,一個是thread共享的,signal->shared_pending
中,另一個是private的,放在pending
中,給kill
這類針對所有thread的signal使用
以下函數都可以產生Signal
send_sig
send_sig_info
force_sig
force_sig_info
sys_kill
sys_tkill
sys_tgkill
以send_sig
為例, function在kernel/signal.c
中,v5.10.5
版本的呼叫流程大概如下圖,而上述的function最終都會呼叫到send_signal
__send_signal
pid_type
來判斷要signal要放進signal->shared_pending
還是pending
__sigqueue_alloc
來創建一個signal queue,加到pending->list
中SIGKILL
signalfd_notify
用來通知signalfd
有signal來了sigaddset
把pending->signal
中代表sig
的bit改成1Note: sigpending->signal是一個64bit的結構,每個bit對應一個signal
complete_signal
,用signal_wake_up
在要接收signal的thread中設置TIF_SIGPENDING
來完成signal的產生Note: 如果是SIGKILL,所有thread都會被設置TIF_SIGPENDING
static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t,
enum pid_type type, bool force)
{
...
//1.
pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
...
//2.
q = __sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit);
if (q) {
list_add_tail(&q->list, &pending->list);
...
//3.
if ((sig == SIGKILL) || (t->flags & PF_KTHREAD))
goto out_set;
...
out_set:
signalfd_notify(t, sig);
sigaddset(&pending->signal, sig);
...
complete_signal(sig, t, type);
...
}
從kernel mode切換回user mode時, kernel都會檢查TIF_SIGPENDING
,如果有被set,代表此thread有signal要處理
ret_to_user
跳回userspace,TIF_SIGPENDING
,若為1就跳到working_pending
do_notify_resume
,會呼叫do_signal
/*
* Ok, we need to do extra processing, enter the slow path.
*/
work_pending:
mov x0, sp // 'regs'
bl do_notify_resume //call do_signal
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on // enabled while in userspace
#endif
ldr x1, [tsk, #TSK_TI_FLAGS] // 再次檢查TIF_SIGPENDING
b finish_ret_to_user
/*
* "slow" syscall return path.
*/
ret_to_user:
disable_daif
gic_prio_kentry_setup tmp=x3
ldr x1, [tsk, #TSK_TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK //檢查TIF_SIGPENDING
cbnz x2, work_pending //若不為0就跳到working_pending
finish_ret_to_user:
enable_step_tsk x1, x2
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
bl stackleak_erase
#endif
kernel_exit 0
do_signal
get_signal
: 從queue將此次處理的signal取出,放進一個ksignal
的struct中handle_signal
: 負責為user space準備好處理signal需要的環境
static void do_signal(struct pt_regs *regs)
{
...
struct ksignal ksig;
...
/*
* Get the signal to deliver. When running under ptrace, at this point
* the debugger may change all of our registers.
*/
if (get_signal(&ksig)) {
/*
* Depending on the signal settings, we may need to revert the
* decision to restart the system call, but skip this if a
* debugger has chosen to restart at a different PC.
*/
if (regs->pc == restart_addr &&
(retval == -ERESTARTNOHAND ||
retval == -ERESTART_RESTARTBLOCK ||
(retval == -ERESTARTSYS &&
!(ksig.ka.sa.sa_flags & SA_RESTART))))
{
regs->regs[0] = -EINTR;
regs->pc = continue_addr;
}
handle_signal(&ksig, regs);
return;
}
...
}
get_signal
signr
指定為要處理的signal numberdequeue_signal
往下執行會呼叫sigdelset
跟__sigqueue_free
collect_signal
中的sigdelset
會負責將對應的bit清成0collect_signal
中的__sigqueue_free
負責將signal從queue中移除recalc_sigpending
檢查是否還有待傳遞的signal,沒有就把TIF_SIGPENDING
清成0
bool get_signal(struct ksignal *ksig)
{
struct sighand_struct *sighand = current->sighand;
struct signal_struct *signal = current->signal;
int signr; //最後會是signal number
...
/*
* Signals generated by the execution of an instruction
* need to be delivered before any other pending signals
* so that the instruction pointer in the signal stack
* frame points to the faulting instruction.
*/
signr = dequeue_synchronous_signal(&ksig->info);
if (!signr)
signr = dequeue_signal(current, ¤t->blocked, &ksig->info);
...
ksig->sig = signr;
return ksig->sig > 0;
}
/*ksignal中會包含處理signal需要的所有訊息*/
struct ksignal {
struct k_sigaction ka; //signal對應的處理方式
kernel_siginfo_t info; //附加訊息
int sig; //signal number
};
//Linux可以用`sigaction`設定對特定signal的處理方式
handle_signal
負責準備處理signal需要的環境
/*
* OK, we're invoking a handler
*/
static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
sigset_t *oldset = sigmask_to_save();
int ret;
/*
* Perform fixup for the pre-signal frame.
*/
rseq_signal_deliver(ksig, regs);
/*
* Set up the stack frame
*/
if (ksig->ka.sa.sa_flags & SA_SIGINFO)
ret = setup_rt_frame(ksig, oldset, regs);
else
ret = setup_frame(ksig, oldset, regs);
/*
* Check that the resulting registers are actually sane.
*/
ret |= !valid_user_regs(regs);
signal_setup_done(ret, ksig, 0);
}
setup_rt_frame
呼叫get_sigframe
(下面第4行)負責處理1.
的部分
pt_regs
的struct保存user space在進入kernek前的registerpt_regs
就會丟失,因為每次由進入kernel space的時候,kernel space的stack都一定是空的,因此不能把user space的context保存在kernel上,必須保存在user space的stack上get_sigframe
規劃
static int
setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame = get_sigframe(ksig, regs, sizeof(*frame));
int err = 0;
if (!frame)
return 1;
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
err |= __put_user(0, &frame->sig.uc.uc_flags);
err |= __put_user(NULL, &frame->sig.uc.uc_link);
err |= __save_altstack(&frame->sig.uc.uc_stack, regs->ARM_sp);
err |= setup_sigframe(&frame->sig, regs, set);
if (err == 0)
err = setup_return(regs, ksig, frame->sig.retcode, frame);
if (err == 0) {
/*
* For realtime signals we must also set the second and third
* arguments for the signal handler.
* -- Peter Maydell <pmaydell@chiark.greenend.org.uk> 2000-12-06
*/
regs->ARM_r1 = (unsigned long)&frame->info;
regs->ARM_r2 = (unsigned long)&frame->sig.uc;
}
return err;
}
2.3.
則由上面第18行的setup_return
(以ARM64為例)完成
setup_sigframe
已經將user space的 context存到user
中,因此這邊可以直接修改pt_regs
來將PC指向signal handlerregs[0]
存signal number,regs[29]
是stack pointer, regs[30]
是Link Register(存放函數的return address)sigtramp
指向VDSO的rt_sigreturn
(定義在arch/arm64/kernel/vdso/sigreturn.S
),並在第24行將return address設為sigtramp
,因此signal handler執行完後會接著執行rt_sigreturn
static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
struct rt_sigframe_user_layout *user, int usig)
{
__sigrestore_t sigtramp;
regs->regs[0] = usig;
regs->sp = (unsigned long)user->sigframe;
regs->regs[29] = (unsigned long)&user->next_frame->fp;
regs->pc = (unsigned long)ka->sa.sa_handler;
if (system_supports_bti()) {
regs->pstate &= ~PSR_BTYPE_MASK;
regs->pstate |= PSR_BTYPE_C;
}
/* TCO (Tag Check Override) always cleared for signal handlers */
regs->pstate &= ~PSR_TCO_BIT;
if (ka->sa.sa_flags & SA_RESTORER)
sigtramp = ka->sa.sa_restorer;
else
sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
regs->regs[30] = (unsigned long)sigtramp;
}
sys_rt_sigreturn
,呼叫restore_sigframe
來恢復user space的執行
asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
/*
* Since we stacked the signal on a 64-bit boundary,
* then 'sp' should be word aligned here. If it's
* not, then the user is trying to mess with us.
*/
if (regs->ARM_sp & 7)
goto badframe;
frame = (struct rt_sigframe __user *)regs->ARM_sp;
if (!access_ok(frame, sizeof (*frame)))
goto badframe;
if (restore_sigframe(regs, &frame->sig))
goto badframe;
if (restore_altstack(&frame->sig.uc.uc_stack))
goto badframe;
return regs->ARM_r0;
badframe:
force_sig(SIGSEGV);
return 0;
}