6.1810
本次作業實做 Copy-on-Write Fork,
使用 RISC-V PTE 的 RSW bit 當作 page 是否為 cow 的依據
# kernel/riscv.h
define PTE_COW (1L << 8) // cow PTE
fork() 透過 uvmcopy() 複製 parent 的 pagetable,
COW 為 child 建立新的 pagetable,將所有的 write bit 改為 COW bit,
這樣一來,當 child process 寫入 COW page 時,才會觸發 write page fault
# kernel/vm.c
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
pte_t *pte;
uint64 pa, i;
uint flags;
for(i = 0; i < sz; i += PGSIZE){
if((pte = walk(old, i, 0)) == 0)
panic("uvmcopy: pte should exist");
if((*pte & PTE_V) == 0)
panic("uvmcopy: page not present");
if ((*pte) & PTE_W) {
(*pte) = ((*pte) & ~PTE_W) | PTE_COW;
}
pa = PTE2PA(*pte);
refcount_inc(pa);
flags = PTE_FLAGS(*pte);
if(mappages(new, i, PGSIZE, pa, flags) != 0) {
goto err;
}
}
return 0;
err:
uvmunmap(new, 0, i / PGSIZE, 0);
freewalk(new);
for (uint64 j = 0; j <= i; j +=PGSIZE) {
pte = walk(old, i, 0);
if ((*pte) & PTE_COW) {
(*pte) = (*pte) & ~PTE_COW;
(*pte) &= PTE_W;
}
pa = PTE2PA(*pte);
}
return -1;
}
接著處理 usertrap(),當 user trap 觸發,而且是 store page fault 時,表示 cow page 被寫入,需要建一個新的 page
kernel/trap.c
usertrap(void)
intr_on();
syscall();
} else if (r_scause() == 15) {
// Store Page fault
if (get_new_page(p->pagetable, r_stval()) != 0)
setkilled(p);
} else if((which_dev = devintr()) != 0){
// ok
} else {
get_new_page() 首先檢查 pte 的屬性,接著檢查 page 的使用情況,如果只有一人在用,那就不需要取得新的 page。
如果取得新的 page 失敗,要將 refcount 加回去,避免回收 child process 時共享的 page 也被收回
int
get_new_page(pagetable_t pagetable, uint64 va)
{
// Reads kernel memory
if (va >= KERNBASE)
return -1;
va = PGROUNDDOWN(va);
pte_t *pte;
if((pte = walk(pagetable, va, 0)) == 0) {
panic("get_new_page: walk");
return -1;
}
if ((*pte & PTE_V) == 0) {
panic("get_new_page: not valid pte");
return -1;
}
uint flags = PTE_FLAGS(*pte);
if ((flags & PTE_COW) == 0)
return -1;
// Modify PTE attribute
(*pte) = ((*pte) | PTE_W) & ~PTE_COW;
flags = PTE_FLAGS(*pte);
uint64 old = PTE2PA(*pte);
// No need to get a new page if it is not a shared page.
if (refcount_dec(old) == 1) {
return 0;
}
char *new;
if ((new = (char*)kalloc()) == 0) {
// child process is going to be killed and kfree() will free this page if refcount is 1,
// but parent still uses the page.
// Add 1 back.
refcount_inc(old);
return -1;
}
*pte = PA2PTE((uint64)memmove(new, (void*)old, PGSIZE)) | flags;
return 0;
}
kfree()時,根據 refcount 判斷需不需要真的收回 page
void
@@ -51,6 +73,9 @@ kfree(void *pa)
if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
panic("kfree");
if (refcount_dec((uint64)pa) > 1)
return;
// Fill with junk to catch dangling refs.
memset(pa, 1, PGSIZE);
@@ -76,7 +101,46 @@ kalloc(void)
kmem.freelist = r->next;
release(&kmem.lock);
- if(r)
if(r) {
memset((char*)r, 5, PGSIZE); // fill with junk
refcount_set((uint64)r, 1);
}
return (void*)r;
}
新增一個 array 存放 refcount
# kernel/kalloc.c
struct {
struct run *freelist;
} kmem;
+struct {
+ struct spinlock lock;
+ uint *array;
+ uint size;
+} pg_refcount;
+
+void
+initrefcount()
+{
+ for (uint i = 0; i < pg_refcount.size; i++)
+ pg_refcount.array[i] = 1;
+}
+
void
kinit()
{
initlock(&kmem.lock, "kmem");
- freerange(end, (void*)PHYSTOP);
+ initlock(&pg_refcount.lock, "pg_refcount");
+
+ uint64 range = ((PHYSTOP - (uint64)end) >> 12) * sizeof(*pg_refcount.array);
+ range = PGROUNDUP(range);
+ uint64 size = range / sizeof(*pg_refcount.array);
+ pg_refcount.array = (uint*)(PHYSTOP - range);
+ pg_refcount.size = size;
+
+ initrefcount();
+ freerange(end, (void*)(PHYSTOP-range));
}
+void
+refcount_inc(uint64 pa)
+{
+ uint64 index = (pa - (uint64)end) >> 12;
+
+ acquire(&pg_refcount.lock);
+ ++pg_refcount.array[index];
+ release(&pg_refcount.lock);
+}
+
+void
+refcount_set(uint64 pa, uint val)
+{
+ uint64 index = (pa - (uint64)end) >> 12;
+
+ acquire(&pg_refcount.lock);
+ pg_refcount.array[index] = val;
+ release(&pg_refcount.lock);
+}
+
+uint
+refcount_dec(uint64 pa)
+{
+ uint64 index = (pa - (uint64)end) >> 12;
+
+ acquire(&pg_refcount.lock);
+ uint *val = &pg_refcount.array[index];
+ if (*val == 1) {
+ release(&pg_refcount.lock);
+ return 1;
+ }
+ uint ret = *val;
+ --*val;
+ release(&pg_refcount.lock);
+ return ret;
+}
最後 copyout 時也要判斷有無 cow page 的情況
// Copy from kernel to user.
// Copy len bytes from src to virtual address dstva in a given page table.
// Return 0 on success, -1 on error.
@@ -358,6 +412,11 @@ copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
+ pte_t *pte = walk(pagetable, va0, 0);
+ if ((*pte & PTE_COW) && get_new_page(pagetable, va0) != 0)
+ return -1;
+ // pa is updated.
+ pa0 = PTE2PA(*pte);
n = PGSIZE - (dstva - va0);
if(n > len)
n = len;