Try   HackMD

4. Virtual Memory

我在開發 day4、day5 的時候是使用 qemu5,那時候都可以動,後來開發 day6 的時候更新成了 qemu8,day4、day5 就不能動了,我有把 day6 修好,前面的我就不修了,請參考 day6 的 source code。

把 Kernel 映射到虛擬位置

爲了方便實作,我們把 Kernel 的虛擬位置直接映射到相同的實體位置,我們在此直接將 0x80000000 映射到 0x80000000

修改 mem.c,使 malloc 變成 4k align

這步驟很關鍵,因爲我們之後創建 page table 會使用 malloc,page table 必須是 4k align 的。

void page_init() { _num_pages = HEAP_SIZE / (PAGE_SIZE + sizeof(struct Page)) - 1; // -1 for align _alloc_start = (void*)((((uint64_t)HEAP_START + _num_pages * sizeof(struct Page) + 4095) / 4096) * 4096); // ceil to 4k _alloc_end = _alloc_start + (PAGE_SIZE * _num_pages); struct Page *page = (struct Page *)HEAP_START; for (uint32_t i = 0; i < _num_pages; i++) { page->flag = 0; page->ptr = _alloc_start + i * PAGE_SIZE; page++; } lock_init(&lock); }

新增 s_boot.h

由於某些設計,mmu 只在 s mode/u mode 下生效,因此我們要切入 s mode:

static inline void write_mstatus(uint64_t value) { asm volatile("csrw mstatus, %0" : : "r"(value)); } static inline void write_mepc(uint64_t value) { asm volatile("csrw mepc, %0" : : "r"(value)); } static inline uint64_t read_mstatus() { uint64_t value; asm volatile("csrr %0, mstatus" : "=r"(value)); return value; } void switch_to_s_mode(void (*s_mode_entry)(void)) { // Set the mepc register to the S-mode entry point write_mepc((uint64_t)s_mode_entry); // Set the MPP field in the mstatus register to S-mode uint64_t mstatus = read_mstatus(); mstatus &= ~MSTATUS_MPP_MASK; mstatus |= MSTATUS_MPP_S; write_mstatus(mstatus); asm volatile("mret"); }

新增 vm.h

#pragma once #include "riscv.h" #include "mem.h" #include "uart.h" #include "plic.h" #define PAGE_SIZE 4096 #define PT_ENTRY_COUNT 512 #define PGSHIFT 12 #define PGROUNDUP(sz) (((sz)+PAGE_SIZE-1) & ~(PAGE_SIZE-1)) #define PGROUNDDOWN(a) (((a)) & ~(PAGE_SIZE-1)) #define PTE_V (1 << 0) #define PTE_R (1 << 1) #define PTE_W (1 << 2) #define PTE_X (1 << 3) #define PTE_U (1 << 4) #define PA2PTE(pa) ((((uint64_t)pa) >> 12) << 10) #define PTE2PA(pte) (((pte) >> 10) << 12) #define PTE_FLAGS(pte) ((pte) & 0x3FF) // extract the three 9-bit page table indices from a virtual address. #define PXMASK 0x1FF // 9 bits #define PXSHIFT(level) (PGSHIFT+(9*(level))) #define PX(level, va) ((((uint64_t) (va)) >> PXSHIFT(level)) & PXMASK) #define MAXVA (1L << (9 + 9 + 9 + 12 - 1)) #define SATP_SV39 (8L << 60) #define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64_t)pagetable) >> 12)) typedef uint64_t pte_t; typedef uint64_t pde_t; typedef uint64_t *pagetable_t; static inline void w_satp(uint64_t x) { asm volatile("csrw satp, %0" : : "r" (x)); } static inline void sfence_vma() { // the zero, zero means flush all TLB entries. asm volatile("sfence.vma zero, zero"); } void map_kernel(); void vm_init();

新增 vm.c

#include "vm.h" #include "string.h" #define KERNEL_START_ADDR 0x80000000ll #define KERNEL_END_ADDR 0x88000000ll #define VIRTUAL_BASE 0x80000000ll pagetable_t root_pt; pte_t * walk(pagetable_t pagetable, uint64_t va) { if(va >= MAXVA) return 0; for(int level = 2; level > 0; level--) { pte_t *pte = &pagetable[PX(level, va)]; if(*pte & PTE_V) { pagetable = (pagetable_t)PTE2PA(*pte); } else { if((pagetable = (pde_t*)malloc(4096)) == 0) return 0; memset(pagetable, 0, PAGE_SIZE); *pte = PA2PTE(pagetable) | PTE_V; } } return &pagetable[PX(0, va)]; } int map_page(pagetable_t pagetable, uint64_t va, uint64_t pa, int perm) { va = PGROUNDDOWN(va); pa = PGROUNDDOWN(pa); pte_t *pte = walk(pagetable, va); if(pte == 0) return -1; if(*pte & PTE_V) return -2; *pte = PA2PTE(pa) | perm | PTE_V; return 0; } int map_pages(pagetable_t pagetable, uint64_t va, uint64_t pa, uint64_t size, int perm) { uint64_t a, last; if(size == 0) return 0; a = PGROUNDDOWN(va); last = PGROUNDDOWN(va + size - 1); for(;a <= last; a += PAGE_SIZE, pa += PAGE_SIZE) map_page(pagetable, a, pa, perm); return 0; } // Make a direct-map page table for the kernel. void kernel_mapping(pagetable_t pt) { // uart registers map_page(pt, UART, UART, PTE_R | PTE_W); // virtio mmio disk interface map_page(pt, VIRTIO0, VIRTIO0, PTE_R | PTE_W); // PLIC map_pages(pt, PLIC_BASE, PLIC_BASE, 0x400000, PTE_R | PTE_W); // map kernel map_pages(pt, KERNEL_START_ADDR, VIRTUAL_BASE, KERNEL_END_ADDR - KERNEL_START_ADDR, PTE_W | PTE_R | PTE_X); } void kvm_init(void) { root_pt = malloc(PAGE_SIZE); memset(root_pt, 0, PAGE_SIZE); kernel_mapping(root_pt); } void vm_init() { kvm_init(); // wait for any previous writes to the page table memory to finish. sfence_vma(); w_satp(MAKE_SATP(root_pt)); sfence_vma(); // flush stale entries from the TLB. }

這個部分很難,我們來稍微看一下上面的 Macro 定義和 walk 函式:

#define PA2PTE(pa) ((((uint64_t)pa) >> 12) << 10) #define PTE2PA(pte) (((pte) >> 10) << 12) #define PXMASK 0x1FF // 9 bits #define PX(level, va) ((((uint64_t) (va)) >> PXSHIFT(level)) & PXMASK) pte_t* walk(pagetable_t pagetable, uint64_t va) { if(va >= MAXVA) return 0; for(int level = 2; level > 0; level--) { pte_t *pte = &pagetable[PX(level, va)]; if(*pte & PTE_V) { pagetable = (pagetable_t)PTE2PA(*pte); } else { if((pagetable = (pde_t*)malloc(PAGE_SIZE)) == 0) return 0; _clear(pagetable, PAGE_SIZE); *pte = PA2PTE(pagetable) | PTE_V; } } return &pagetable[PX(0, va)]; }

他的功能很簡單,就是在 pagetable 中找到某個 va 存在的點,然後回傳他的位置。

PX(i, va) 這個 Macro 的意思就是,在第 i 層的情況下,va 應該屬於哪個子表/Entry?

PTE2PAPA2PTE 在做的事情,基本上就是將 PTE 的格式轉換成 address 以及反向。

修改 os.c

#include "printf.h" #include "mem.h" #include "time.h" #include "trap.h" #include "plic.h" #include "vm.h" #include "s_mode.h" #include "usys.h" void delay(int count) { count *= 50000; while (count--) { asm volatile ("nop"); } } void kernel_main() { printf("Entered S Mode!\n"); sys_free(sys_malloc(8964 * 2)); while(1) { printf("OS Loop\n"); delay(10000); }; } void boot() { uart_init(); page_init(); trap_init(); timer_init(); plic_init(); vm_init(); } int os_main() { boot(); switch_to_s_mode(kernel_main); return 0; }

目錄結構

參考資料

  1. ChatGPT
  2. xv6-riscv
  3. mythili-cs347_autumn2016