Linux
ARM
底層
主要想追蹤一下 Linux 是怎麼做 irq handling 的。
本篇追蹤的 Linux 版本為 5.17.1
先找 vector table 的配置
/*
* The following callee saved general purpose registers are used on the
* primary lowlevel boot path:
*
* Register Scope Purpose
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
* x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
* x28 __create_page_tables() callee preserved temp register
* x19/x20 __primary_switch() callee preserved temp registers
* x24 __primary_switch() .. relocate_kernel() current RELR displacement
*/
SYM_CODE_START(primary_entry)
bl preserve_boot_args
bl init_kernel_el // w0=cpu_boot_mode
...
b __primary_switch
SYM_CODE_END(primary_entry)
/*
* Preserve the arguments passed by the bootloader in x0 .. x3
*/
SYM_CODE_START_LOCAL(preserve_boot_args)
mov x21, x0 // x21=FDT
...
b dcache_inval_poc // tail call
SYM_CODE_END(preserve_boot_args)
preserve_boot_args
將 fdt 的位址存放到 x21,並另外做一些事情init_kernel_el
將 Exception Level 從 EL2 降級到 EL1/*
* Starting from EL2 or EL1, configure the CPU to execute at the highest
* reachable EL supported by the kernel in a chosen default state. If dropping
* from EL2 to EL1, configure EL2 before configuring EL1.
*
* Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
* SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
*
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
* booted in EL1 or EL2 respectively.
*/
SYM_FUNC_START(init_kernel_el)
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.eq init_el2
SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
isb
mov_q x0, INIT_PSTATE_EL1
msr spsr_el1, x0
msr elr_el1, lr
mov w0, #BOOT_CPU_MODE_EL1
eret
SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
mov_q x0, HCR_HOST_NVHE_FLAGS
msr hcr_el2, x0
isb
init_el2_state
/* Hypervisor stub */
adr_l x0, __hyp_stub_vectors
msr vbar_el2, x0
isb
/*
* Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
* making it impossible to start in nVHE mode. Is that
* compliant with the architecture? Absolutely not!
*/
mrs x0, hcr_el2
and x0, x0, #HCR_E2H
cbz x0, 1f
/* Switching to VHE requires a sane SCTLR_EL1 as a start */
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr_s SYS_SCTLR_EL12, x0
/*
* Force an eret into a helper "function", and let it return
* to our original caller... This makes sure that we have
* initialised the basic PSTATE state.
*/
mov x0, #INIT_PSTATE_EL2
msr spsr_el1, x0
adr x0, __cpu_stick_to_vhe
msr elr_el1, x0
eret
1:
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2
eret
__cpu_stick_to_vhe:
mov x0, #HVC_VHE_RESTART
hvc #0
mov x0, #BOOT_CPU_MODE_EL2
ret
SYM_FUNC_END(init_kernel_el)
__hyp_stub_vectors
__cpu_stick_to_vhe
SYM_FUNC_START_LOCAL(__primary_switch)
...
adrp x1, init_pg_dir
bl __enable_mmu
...
ldr x8, =__primary_switched
adrp x0, __PHYS_OFFSET
br x8
SYM_FUNC_END(__primary_switch)
__primary_switched
/*
* The following fragment of code is executed with the MMU enabled.
*
* x0 = __PHYS_OFFSET
*/
SYM_FUNC_START_LOCAL(__primary_switched)
adr_l x4, init_task
init_cpu_task x4, x5, x6
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
stp x29, x30, [sp, #-16]!
mov x29, sp
str_l x21, __fdt_pointer, x5 // Save FDT pointer
ldr_l x4, kimage_vaddr // Save the offset between
sub x4, x4, x0 // the kernel virtual and
str_l x4, kimage_voffset, x5 // physical mappings
// Clear BSS
adr_l x0, __bss_start
mov x1, xzr
adr_l x2, __bss_stop
sub x2, x2, x0
bl __pi_memset
dsb ishst // Make zero page visible to PTW
...
mov x0, x21 // pass FDT address in x0
bl early_fdt_map // Try mapping the FDT early
bl init_feature_override // Parse cpu feature overrides
...
bl switch_to_vhe // Prefer VHE if possible
ldp x29, x30, [sp], #16
bl start_kernel
ASM_BUG()
SYM_FUNC_END(__primary_switched)
early_fdt_map
vectors
,這就是我感興趣的部分!/*
* Exception vectors.
*/
.pushsection ".entry.text", "ax"
.align 11
SYM_CODE_START(vectors)
kernel_ventry 1, t, 64, sync // Synchronous EL1t
kernel_ventry 1, t, 64, irq // IRQ EL1t
kernel_ventry 1, t, 64, fiq // FIQ EL1h
kernel_ventry 1, t, 64, error // Error EL1t
kernel_ventry 1, h, 64, sync // Synchronous EL1h
kernel_ventry 1, h, 64, irq // IRQ EL1h
kernel_ventry 1, h, 64, fiq // FIQ EL1h
kernel_ventry 1, h, 64, error // Error EL1h
kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0
kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0
kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0
kernel_ventry 0, t, 64, error // Error 64-bit EL0
kernel_ventry 0, t, 32, sync // Synchronous 32-bit EL0
kernel_ventry 0, t, 32, irq // IRQ 32-bit EL0
kernel_ventry 0, t, 32, fiq // FIQ 32-bit EL0
kernel_ventry 0, t, 32, error // Error 32-bit EL0
SYM_CODE_END(vectors)
而 kernel_ventry
定義如下:
.macro kernel_ventry, el:req, ht:req, regsize:req, label:req
.align 7
.Lventry_start\@:
.if \el == 0
...
.endif
sub sp, sp, #PT_REGS_SIZE
#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
* Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT)
* should always be zero.
*/
add sp, sp, x0 // sp' = sp + x0
sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
tbnz x0, #THREAD_SHIFT, 0f
sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
b el\el\ht\()_\regsize\()_\label
0:
/*
* Either we've just detected an overflow, or we've taken an exception
* while on the overflow stack. Either way, we won't return to
* userspace, and can clobber EL0 registers to free up GPRs.
*/
/* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */
msr tpidr_el0, x0
/* Recover the original x0 value and stash it in tpidrro_el0 */
sub x0, sp, x0
msr tpidrro_el0, x0
/* Switch to the overflow stack */
adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
/*
* Check whether we were already on the overflow stack. This may happen
* after panic() re-enables interrupts.
*/
mrs x0, tpidr_el0 // sp of interrupted context
sub x0, sp, x0 // delta with top of overflow stack
tst x0, #~(OVERFLOW_STACK_SIZE - 1) // within range?
b.ne __bad_stack // no? -> bad stack pointer
/* We were already on the overflow stack. Restore sp/x0 and carry on. */
sub sp, sp, x0
mrs x0, tpidrro_el0
#endif
b el\el\ht\()_\regsize\()_\label
.org .Lventry_start\@ + 128 // Did we overflow the ventry slot?
.endm
目前重點先關注 IRQ EL1h 的部分,經過展開後會變成 el1h_64_irq
\()
用來區隔 macro 參數名稱,參考手冊後面建立了各 exception handler:
/*
* Early exception handlers
*/
entry_handler 1, t, 64, sync
entry_handler 1, t, 64, irq
entry_handler 1, t, 64, fiq
entry_handler 1, t, 64, error
entry_handler 1, h, 64, sync
entry_handler 1, h, 64, irq
entry_handler 1, h, 64, fiq
entry_handler 1, h, 64, error
entry_handler 0, t, 64, sync
entry_handler 0, t, 64, irq
entry_handler 0, t, 64, fiq
entry_handler 0, t, 64, error
entry_handler 0, t, 32, sync
entry_handler 0, t, 32, irq
entry_handler 0, t, 32, fiq
entry_handler 0, t, 32, error
entry_handler
macro 定義如下:
.macro entry_handler el:req, ht:req, regsize:req, label:req
SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label)
kernel_entry \el, \regsize
mov x0, sp
bl el\el\ht\()_\regsize\()_\label\()_handler
.if \el == 0
b ret_to_user
.else
b ret_to_kernel
.endif
SYM_CODE_END(el\el\ht\()_\regsize\()_\label)
.endm
以 entry_handler 1, h, 64, irq
來說,其會展開成以下:
SYM_CODE_START_LOCAL(el1h_64_irq)
kernel_entry 1, 64
mov x0, sp
bl el1h_64_irq_handler
b ret_to_kernel
SYM_CODE_END(el1h_64_irq)
arch/arm64/include/asm/exception.h
包含了 el1h_64_irq_handler
的宣告:
asmlinkage void el1t_64_sync_handler(struct pt_regs *regs);
asmlinkage void el1t_64_irq_handler(struct pt_regs *regs);
asmlinkage void el1t_64_fiq_handler(struct pt_regs *regs);
asmlinkage void el1t_64_error_handler(struct pt_regs *regs);
asmlinkage void el1h_64_sync_handler(struct pt_regs *regs);
asmlinkage void el1h_64_irq_handler(struct pt_regs *regs);
asmlinkage void el1h_64_fiq_handler(struct pt_regs *regs);
asmlinkage void el1h_64_error_handler(struct pt_regs *regs);
asmlinkage void el0t_64_sync_handler(struct pt_regs *regs);
asmlinkage void el0t_64_irq_handler(struct pt_regs *regs);
asmlinkage void el0t_64_fiq_handler(struct pt_regs *regs);
asmlinkage void el0t_64_error_handler(struct pt_regs *regs);
asmlinkage void el0t_32_sync_handler(struct pt_regs *regs);
asmlinkage void el0t_32_irq_handler(struct pt_regs *regs);
asmlinkage void el0t_32_fiq_handler(struct pt_regs *regs);
asmlinkage void el0t_32_error_handler(struct pt_regs *regs);
arch/arm64/kernel/entry-common.c
asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs)
{
el1_interrupt(regs, handle_arch_irq);
}
static void noinstr el1_interrupt(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
/* Mask {I, F} bits */
write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
__el1_pnmi(regs, handler);
else
__el1_irq(regs, handler);
}
static __always_inline void __el1_irq(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
enter_from_kernel_mode(regs);
irq_enter_rcu();
do_interrupt_handler(regs, handler);
irq_exit_rcu();
/*
* Note: thread_info::preempt_count includes both thread_info::count
* and thread_info::need_resched, and is not equivalent to
* preempt_count().
*/
if (IS_ENABLED(CONFIG_PREEMPTION) &&
READ_ONCE(current_thread_info()->preempt_count) == 0)
arm64_preempt_schedule_irq();
exit_to_kernel_mode(regs);
}
static void do_interrupt_handler(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
struct pt_regs *old_regs = set_irq_regs(regs);
if (on_thread_stack())
call_on_irq_stack(regs, handler);
else
handler(regs);
set_irq_regs(old_regs);
}
追到底就是呼叫 handler,也就是 handle_arch_irq
static void default_handle_irq(struct pt_regs *regs)
{
panic("IRQ taken without a root IRQ handler\n");
}
void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq;
int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
{
if (handle_arch_irq != default_handle_irq)
return -EBUSY;
handle_arch_irq = handle_irq;
pr_info("Root IRQ handler: %ps\n", handle_irq);
return 0;
}
handle_arch_irq
是一個全域變數,預設為 default_handle_irq
,kernel 需要在初始化階段呼叫 set_handle_irq
來配置 handle_arch_irq
driver/irqchip/
底下的 interrupt controller drivers 會呼叫 set_handle_irq
driver/irqchip/irq-bcm2836.c
static int __init bcm2836_arm_irqchip_l1_intc_of_init(struct device_node *node,
struct device_node *parent)
{
intc.base = of_iomap(node, 0);
if (!intc.base) {
panic("%pOF: unable to map local interrupt registers\n", node);
}
bcm2835_init_local_timer_frequency();
intc.domain = irq_domain_add_linear(node, LAST_IRQ + 1,
&bcm2836_arm_irqchip_intc_ops,
NULL);
if (!intc.domain)
panic("%pOF: unable to create IRQ domain\n", node);
irq_domain_update_bus_token(intc.domain, DOMAIN_BUS_WIRED);
bcm2836_arm_irqchip_smp_init();
set_handle_irq(bcm2836_arm_irqchip_handle_irq);
return 0;
}
bcm2836_arm_irqchip_handle_irq
static void
__exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs)
{
int cpu = smp_processor_id();
u32 stat;
stat = readl_relaxed(intc.base + LOCAL_IRQ_PENDING0 + 4 * cpu);
if (stat) {
u32 hwirq = ffs(stat) - 1;
generic_handle_domain_irq(intc.domain, hwirq);
}
}
generic_handle_domain_irq
intc
、hwirq
意義,目前看起來 intc
會從 fdt 取得資訊,hwirq
應該是對應裝置的數字,但不是很確定Windbg 筆記 http://windbg.info/doc/1-common-cmds.html Setting Load symbols 先創好資料夾 C:\MyLocalSymbols File -> Settings -> Debugging settings -> Symbol Path srv*C:\MyLocalSymbols*http://msdl.microsoft.com/download/symbols
Apr 1, 2025Linux Kernel pwn Inspection 取得 symbol address 方便下斷點觀察 # 觀察 printk 輸出 echo /proc/sys/kernel/dmesg_restrict # 觀察是否有權限讀 printk 輸出 dmesg # 拿 kernel module address
May 22, 2023好文: http://courses.cms.caltech.edu/cs124/lectures-wi2016/CS124Lec15.pdf 其他篇筆記: https://hackmd.io/@LJP/BJAb95O1c 在 task_struct 有兩個 member:
Apr 29, 2022其他篇筆記: https://hackmd.io/@LJP/SkrXX8OSc Kernel 發生 signal arch_do_signal_or_restart() https://elixir.bootlin.com/linux/latest/source/arch/x86/kernel/signal.c#L864
Apr 28, 2022or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up