Try   HackMD
tags: Linux ARM 底層

AArch64 EL1 IRQ handling in Linux

主要想追蹤一下 Linux 是怎麼做 irq handling 的。

本篇追蹤的 Linux 版本為 5.17.1

head.S

arch/arm64/kernel/head.S

先找 vector table 的配置

primary_entry

    /*
     * The following callee saved general purpose registers are used on the
     * primary lowlevel boot path:
     *
     *  Register   Scope                      Purpose
     *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
     *  x23        primary_entry() .. start_kernel()        physical misalignment/KASLR offset
     *  x28        __create_page_tables()                   callee preserved temp register
     *  x19/x20    __primary_switch()                       callee preserved temp registers
     *  x24        __primary_switch() .. relocate_kernel()  current RELR displacement
     */
SYM_CODE_START(primary_entry)
    bl    preserve_boot_args
    bl    init_kernel_el            // w0=cpu_boot_mode
    ...
    b    __primary_switch
SYM_CODE_END(primary_entry)

/*
 * Preserve the arguments passed by the bootloader in x0 .. x3
 */
SYM_CODE_START_LOCAL(preserve_boot_args)
    mov    x21, x0           // x21=FDT

    ...
    b    dcache_inval_poc    // tail call
SYM_CODE_END(preserve_boot_args)

  • preserve_boot_args 將 fdt 的位址存放到 x21,並另外做一些事情
  • init_kernel_el 將 Exception Level 從 EL2 降級到 EL1

init_kernel_el

/*
 * Starting from EL2 or EL1, configure the CPU to execute at the highest
 * reachable EL supported by the kernel in a chosen default state. If dropping
 * from EL2 to EL1, configure EL2 before configuring EL1.
 *
 * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
 * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
 *
 * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
 * booted in EL1 or EL2 respectively.
 */
SYM_FUNC_START(init_kernel_el)
    mrs    x0, CurrentEL
    cmp    x0, #CurrentEL_EL2
    b.eq    init_el2

SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
    mov_q    x0, INIT_SCTLR_EL1_MMU_OFF
    msr    sctlr_el1, x0
    isb
    mov_q    x0, INIT_PSTATE_EL1
    msr    spsr_el1, x0
    msr    elr_el1, lr
    mov    w0, #BOOT_CPU_MODE_EL1
    eret

SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
    mov_q    x0, HCR_HOST_NVHE_FLAGS
    msr    hcr_el2, x0
    isb

    init_el2_state

    /* Hypervisor stub */
    adr_l    x0, __hyp_stub_vectors
    msr    vbar_el2, x0
    isb

    /*
     * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
     * making it impossible to start in nVHE mode. Is that
     * compliant with the architecture? Absolutely not!
     */
    mrs    x0, hcr_el2
    and    x0, x0, #HCR_E2H
    cbz    x0, 1f

    /* Switching to VHE requires a sane SCTLR_EL1 as a start */
    mov_q    x0, INIT_SCTLR_EL1_MMU_OFF
    msr_s    SYS_SCTLR_EL12, x0

    /*
     * Force an eret into a helper "function", and let it return
     * to our original caller... This makes sure that we have
     * initialised the basic PSTATE state.
     */
    mov    x0, #INIT_PSTATE_EL2
    msr    spsr_el1, x0
    adr    x0, __cpu_stick_to_vhe
    msr    elr_el1, x0
    eret

1:
    mov_q    x0, INIT_SCTLR_EL1_MMU_OFF
    msr    sctlr_el1, x0

    msr    elr_el2, lr
    mov    w0, #BOOT_CPU_MODE_EL2
    eret

__cpu_stick_to_vhe:
    mov    x0, #HVC_VHE_RESTART
    hvc    #0
    mov    x0, #BOOT_CPU_MODE_EL2
    ret
SYM_FUNC_END(init_kernel_el)
  • 設置 EL2 vector table 為 __hyp_stub_vectors
  • 降級到 EL1 後,執行 __cpu_stick_to_vhe

__primary_switch

SYM_FUNC_START_LOCAL(__primary_switch)
    ...
    adrp   x1, init_pg_dir
    bl     __enable_mmu
    ...
    ldr   x8, =__primary_switched
    adrp  x0, __PHYS_OFFSET
    br    x8
SYM_FUNC_END(__primary_switch)
  • 呼叫 __primary_switched

__primary_switched

/*
 * The following fragment of code is executed with the MMU enabled.
 *
 *   x0 = __PHYS_OFFSET
 */
SYM_FUNC_START_LOCAL(__primary_switched)
    adr_l    x4, init_task
    init_cpu_task x4, x5, x6

    adr_l  x8, vectors            // load VBAR_EL1 with virtual
    msr    vbar_el1, x8           // vector table address
    isb

    stp    x29, x30, [sp, #-16]!
    mov    x29, sp

    str_l  x21, __fdt_pointer, x5  // Save FDT pointer

    ldr_l  x4, kimage_vaddr        // Save the offset between
    sub    x4, x4, x0              // the kernel virtual and
    str_l  x4, kimage_voffset, x5  // physical mappings

    // Clear BSS
    adr_l  x0, __bss_start
    mov    x1, xzr
    adr_l  x2, __bss_stop
    sub    x2, x2, x0
    bl     __pi_memset
    dsb    ishst                  // Make zero page visible to PTW

    ...
    mov    x0, x21                // pass FDT address in x0
    bl     early_fdt_map          // Try mapping the FDT early
    bl     init_feature_override  // Parse cpu feature overrides
    ...
    bl     switch_to_vhe          // Prefer VHE if possible
    ldp    x29, x30, [sp], #16
    bl     start_kernel
    ASM_BUG()
SYM_FUNC_END(__primary_switched)
  • 可以看到 clear bss,傳遞 fdt address 給 early_fdt_map
  • 設置 EL1 vector table 為 vectors,這就是我感興趣的部分!

entry.S

arch/arm64/kernel/entry.S

vectors

/*
 * Exception vectors.
 */
    .pushsection ".entry.text", "ax"

    .align    11
SYM_CODE_START(vectors)
    kernel_ventry    1, t, 64, sync       // Synchronous EL1t
    kernel_ventry    1, t, 64, irq        // IRQ EL1t
    kernel_ventry    1, t, 64, fiq        // FIQ EL1h
    kernel_ventry    1, t, 64, error      // Error EL1t

    kernel_ventry    1, h, 64, sync       // Synchronous EL1h
    kernel_ventry    1, h, 64, irq        // IRQ EL1h
    kernel_ventry    1, h, 64, fiq        // FIQ EL1h
    kernel_ventry    1, h, 64, error      // Error EL1h

    kernel_ventry    0, t, 64, sync       // Synchronous 64-bit EL0
    kernel_ventry    0, t, 64, irq        // IRQ 64-bit EL0
    kernel_ventry    0, t, 64, fiq        // FIQ 64-bit EL0
    kernel_ventry    0, t, 64, error      // Error 64-bit EL0

    kernel_ventry    0, t, 32, sync       // Synchronous 32-bit EL0
    kernel_ventry    0, t, 32, irq        // IRQ 32-bit EL0
    kernel_ventry    0, t, 32, fiq        // FIQ 32-bit EL0
    kernel_ventry    0, t, 32, error      // Error 32-bit EL0
SYM_CODE_END(vectors)

kernel_ventry 定義如下:

    .macro kernel_ventry, el:req, ht:req, regsize:req, label:req
    .align 7
.Lventry_start\@:
    .if    \el == 0
    ...
    .endif

    sub    sp, sp, #PT_REGS_SIZE
#ifdef CONFIG_VMAP_STACK
    /*
     * Test whether the SP has overflowed, without corrupting a GPR.
     * Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT)
     * should always be zero.
     */
    add    sp, sp, x0            // sp' = sp + x0
    sub    x0, sp, x0            // x0' = sp' - x0 = (sp + x0) - x0 = sp
    tbnz    x0, #THREAD_SHIFT, 0f
    sub    x0, sp, x0            // x0'' = sp' - x0' = (sp + x0) - sp = x0
    sub    sp, sp, x0            // sp'' = sp' - x0 = (sp + x0) - x0 = sp
    b    el\el\ht\()_\regsize\()_\label

0:
    /*
     * Either we've just detected an overflow, or we've taken an exception
     * while on the overflow stack. Either way, we won't return to
     * userspace, and can clobber EL0 registers to free up GPRs.
     */

    /* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */
    msr    tpidr_el0, x0

    /* Recover the original x0 value and stash it in tpidrro_el0 */
    sub    x0, sp, x0
    msr    tpidrro_el0, x0

    /* Switch to the overflow stack */
    adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0

    /*
     * Check whether we were already on the overflow stack. This may happen
     * after panic() re-enables interrupts.
     */
    mrs    x0, tpidr_el0            // sp of interrupted context
    sub    x0, sp, x0            // delta with top of overflow stack
    tst    x0, #~(OVERFLOW_STACK_SIZE - 1)    // within range?
    b.ne    __bad_stack            // no? -> bad stack pointer

    /* We were already on the overflow stack. Restore sp/x0 and carry on. */
    sub    sp, sp, x0
    mrs    x0, tpidrro_el0
#endif
    b    el\el\ht\()_\regsize\()_\label
.org .Lventry_start\@ + 128    // Did we overflow the ventry slot?
    .endm

目前重點先關注 IRQ EL1h 的部分,經過展開後會變成 el1h_64_irq

後面建立了各 exception handler:

/*
 * Early exception handlers
 */
    entry_handler    1, t, 64, sync
    entry_handler    1, t, 64, irq
    entry_handler    1, t, 64, fiq
    entry_handler    1, t, 64, error

    entry_handler    1, h, 64, sync
    entry_handler    1, h, 64, irq
    entry_handler    1, h, 64, fiq
    entry_handler    1, h, 64, error

    entry_handler    0, t, 64, sync
    entry_handler    0, t, 64, irq
    entry_handler    0, t, 64, fiq
    entry_handler    0, t, 64, error

    entry_handler    0, t, 32, sync
    entry_handler    0, t, 32, irq
    entry_handler    0, t, 32, fiq
    entry_handler    0, t, 32, error

entry_handler macro 定義如下:

    .macro entry_handler el:req, ht:req, regsize:req, label:req
SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label)
    kernel_entry \el, \regsize
    mov    x0, sp
    bl     el\el\ht\()_\regsize\()_\label\()_handler
    .if \el == 0
    b      ret_to_user
    .else
    b      ret_to_kernel
    .endif
SYM_CODE_END(el\el\ht\()_\regsize\()_\label)
    .endm

entry_handler 1, h, 64, irq 來說,其會展開成以下:

SYM_CODE_START_LOCAL(el1h_64_irq)
    kernel_entry 1, 64
    mov    x0, sp
    bl     el1h_64_irq_handler
    b      ret_to_kernel
SYM_CODE_END(el1h_64_irq)

exception.h

arch/arm64/include/asm/exception.h

包含了 el1h_64_irq_handler 的宣告:

asmlinkage void el1t_64_sync_handler(struct pt_regs *regs);
asmlinkage void el1t_64_irq_handler(struct pt_regs *regs);
asmlinkage void el1t_64_fiq_handler(struct pt_regs *regs);
asmlinkage void el1t_64_error_handler(struct pt_regs *regs);

asmlinkage void el1h_64_sync_handler(struct pt_regs *regs);
asmlinkage void el1h_64_irq_handler(struct pt_regs *regs);
asmlinkage void el1h_64_fiq_handler(struct pt_regs *regs);
asmlinkage void el1h_64_error_handler(struct pt_regs *regs);

asmlinkage void el0t_64_sync_handler(struct pt_regs *regs);
asmlinkage void el0t_64_irq_handler(struct pt_regs *regs);
asmlinkage void el0t_64_fiq_handler(struct pt_regs *regs);
asmlinkage void el0t_64_error_handler(struct pt_regs *regs);

asmlinkage void el0t_32_sync_handler(struct pt_regs *regs);
asmlinkage void el0t_32_irq_handler(struct pt_regs *regs);
asmlinkage void el0t_32_fiq_handler(struct pt_regs *regs);
asmlinkage void el0t_32_error_handler(struct pt_regs *regs);

entry-common.c

arch/arm64/kernel/entry-common.c

el1h_64_irq_handler

asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs)
{
    el1_interrupt(regs, handle_arch_irq);
}

el1_interrupt

static void noinstr el1_interrupt(struct pt_regs *regs,
                  void (*handler)(struct pt_regs *))
{
    /* Mask {I, F} bits */
    write_sysreg(DAIF_PROCCTX_NOIRQ, daif);

    if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
        __el1_pnmi(regs, handler);
    else
        __el1_irq(regs, handler);
}

__el1_irq

static __always_inline void __el1_irq(struct pt_regs *regs,
                                      void (*handler)(struct pt_regs *))
{
    enter_from_kernel_mode(regs);

    irq_enter_rcu();
    do_interrupt_handler(regs, handler);
    irq_exit_rcu();

    /*
     * Note: thread_info::preempt_count includes both thread_info::count
     * and thread_info::need_resched, and is not equivalent to
     * preempt_count().
     */
    if (IS_ENABLED(CONFIG_PREEMPTION) &&
        READ_ONCE(current_thread_info()->preempt_count) == 0)
        arm64_preempt_schedule_irq();

    exit_to_kernel_mode(regs);
}

do_interrupt_handler

static void do_interrupt_handler(struct pt_regs *regs,
                                 void (*handler)(struct pt_regs *))
{
    struct pt_regs *old_regs = set_irq_regs(regs);

    if (on_thread_stack())
        call_on_irq_stack(regs, handler);
    else
        handler(regs);

    set_irq_regs(old_regs);
}

追到底就是呼叫 handler,也就是 handle_arch_irq

irq.c

arch/arm64/kernel/irq.c

handle_arch_irq

static void default_handle_irq(struct pt_regs *regs)
{
    panic("IRQ taken without a root IRQ handler\n");
}

void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq;

int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
{
    if (handle_arch_irq != default_handle_irq)
        return -EBUSY;

    handle_arch_irq = handle_irq;
    pr_info("Root IRQ handler: %ps\n", handle_irq);
    return 0;
}
  • 可以看到 handle_arch_irq 是一個全域變數,預設為 default_handle_irq,kernel 需要在初始化階段呼叫 set_handle_irq 來配置 handle_arch_irq
  • driver/irqchip/ 底下的 interrupt controller drivers 會呼叫 set_handle_irq
  • 這邊追蹤 driver/irqchip/irq-bcm2836.c

irq-bcm2836.c

drivers/irqchip/irq-bcm2836.c

bcm2836_arm_irqchip_l1_intc_of_init

static int __init bcm2836_arm_irqchip_l1_intc_of_init(struct device_node *node,
                                                      struct device_node *parent)
{
    intc.base = of_iomap(node, 0);
    if (!intc.base) {
        panic("%pOF: unable to map local interrupt registers\n", node);
    }

    bcm2835_init_local_timer_frequency();

    intc.domain = irq_domain_add_linear(node, LAST_IRQ + 1,
                        &bcm2836_arm_irqchip_intc_ops,
                        NULL);
    if (!intc.domain)
        panic("%pOF: unable to create IRQ domain\n", node);

    irq_domain_update_bus_token(intc.domain, DOMAIN_BUS_WIRED);

    bcm2836_arm_irqchip_smp_init();

    set_handle_irq(bcm2836_arm_irqchip_handle_irq);
    return 0;
}
  • 這邊註冊 irq handler 為 bcm2836_arm_irqchip_handle_irq

bcm2836_arm_irqchip_handle_irq

static void
__exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs)
{
    int cpu = smp_processor_id();
    u32 stat;

    stat = readl_relaxed(intc.base + LOCAL_IRQ_PENDING0 + 4 * cpu);
    if (stat) {
        u32 hwirq = ffs(stat) - 1;

        generic_handle_domain_irq(intc.domain, hwirq);
    }
}
  • 最終呼叫 generic_handle_domain_irq
  • TODO: 搞懂 intchwirq 意義,目前看起來 intc 會從 fdt 取得資訊,hwirq 應該是對應裝置的數字,但不是很確定