# 5. VIRTIO 與 Filesystem ## VIRTIO ### 新增 ds.h ```c= #pragma once #include "riscv.h" #define BLOCK_SIZE 1024 struct buf { int valid; // has data been read from disk? int disk; // does disk "own" buf? int successful; // is the reading successful uint32_t dev; uint32_t blockno; lock_t lock; uint32_t refcnt; struct buf *prev; // LRU cache list struct buf *next; unsigned char data[1024]; }; ``` ### 新增 virtio.h ```c= #pragma once #include "printf.h" #include "riscv.h" #include "lock.h" #include "mem.h" #include "ds.h" #include <stddef.h> #include <stdint.h> #define VIRTIO0 0x10001000 #define VIRTIO_MMIO_MAGIC_VALUE 0x000 // 0x74726976 #define VIRTIO_MMIO_VERSION 0x004 // version; should be 2 #define VIRTIO_MMIO_DEVICE_ID 0x008 // device type; 1 is net, 2 is disk #define VIRTIO_MMIO_VENDOR_ID 0x00c // 0x554d4551 #define VIRTIO_MMIO_DEVICE_FEATURES 0x010 #define VIRTIO_MMIO_DRIVER_FEATURES 0x020 #define VIRTIO_MMIO_QUEUE_SEL 0x030 // select queue, write-only #define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 // max size of current queue, read-only #define VIRTIO_MMIO_QUEUE_NUM 0x038 // size of current queue, write-only #define VIRTIO_MMIO_QUEUE_READY 0x044 // ready bit #define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 // write-only #define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 // read-only #define VIRTIO_MMIO_INTERRUPT_ACK 0x064 // write-only #define VIRTIO_MMIO_STATUS 0x070 // read/write #define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 // physical address for descriptor table, write-only #define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084 #define VIRTIO_MMIO_DRIVER_DESC_LOW 0x090 // physical address for available ring, write-only #define VIRTIO_MMIO_DRIVER_DESC_HIGH 0x094 #define VIRTIO_MMIO_DEVICE_DESC_LOW 0x0a0 // physical address for used ring, write-only #define VIRTIO_MMIO_DEVICE_DESC_HIGH 0x0a4 // status register bits, from qemu virtio_config.h #define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 #define VIRTIO_CONFIG_S_DRIVER 2 #define VIRTIO_CONFIG_S_DRIVER_OK 4 #define VIRTIO_CONFIG_S_FEATURES_OK 8 // device feature bits #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ #define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ #define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ #define VIRTIO_F_ANY_LAYOUT 27 #define VIRTIO_RING_F_INDIRECT_DESC 28 #define VIRTIO_RING_F_EVENT_IDX 29 // this many virtio descriptors. // must be a power of two. #define NUM 8 // a single descriptor, from the spec. struct virtq_desc { uint64_t addr; uint32_t len; uint16_t flags; uint16_t next; }; #define VRING_DESC_F_NEXT 1 // chained with another descriptor #define VRING_DESC_F_WRITE 2 // device writes (vs read) // the (entire) avail ring, from the spec. struct virtq_avail { uint16_t flags; // always zero uint16_t idx; // driver will write ring[idx] next uint16_t ring[NUM]; // descriptor numbers of chain heads uint16_t unused; }; // one entry in the "used" ring, with which the // device tells the driver about completed requests. struct virtq_used_elem { uint32_t id; // index of start of completed descriptor chain uint32_t len; }; struct virtq_used { uint16_t flags; // always zero uint16_t idx; // device increments when it adds a ring[] entry struct virtq_used_elem ring[NUM]; }; // these are specific to virtio block devices, e.g. disks, // described in Section 5.2 of the spec. #define VIRTIO_BLK_T_IN 0 // read the disk #define VIRTIO_BLK_T_OUT 1 // write the disk // the format of the first descriptor in a disk request. // to be followed by two more descriptors containing // the block, and a one-byte status. struct virtio_blk_req { uint32_t type; // VIRTIO_BLK_T_IN or ..._OUT uint32_t reserved; uint64_t sector; }; void virtio_disk_rw(struct buf *b, int write); void virtio_disk_init(void); ``` ### 新增 virtio.c ```c= #include "printf.h" #include "riscv.h" #include "lock.h" #include "virtio.h" #include "mem.h" // the address of virtio mmio register r. #define R(r) ((volatile uint32_t *)(VIRTIO0 + (r))) #define BSIZE 512 static struct disk { // a set (not a ring) of DMA descriptors, with which the // driver tells the device where to read and write individual // disk operations. there are NUM descriptors. // most commands consist of a "chain" (a linked list) of a couple of // these descriptors. struct virtq_desc *desc; // a ring in which the driver writes descriptor numbers // that the driver would like the device to process. it only // includes the head descriptor of each chain. the ring has // NUM elements. struct virtq_avail *avail; // a ring in which the device writes descriptor numbers that // the device has finished processing (just the head of each chain). // there are NUM used ring entries. struct virtq_used *used; // our own book-keeping. char free[NUM]; // is a descriptor free? uint16_t used_idx; // we've looked this far in used[2..NUM]. // track info about in-flight operations, // for use when completion interrupt arrives. // indexed by first descriptor index of chain. struct { struct buf *b; char status; } info[NUM]; // disk command headers. // one-for-one with descriptors, for convenience. struct virtio_blk_req ops[NUM]; lock_t vdisk_lock; } disk; void virtio_disk_init(void) { uint32_t status = 0; lock_init(&disk.vdisk_lock); if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 || *R(VIRTIO_MMIO_VERSION) != 2 || *R(VIRTIO_MMIO_DEVICE_ID) != 2 || *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){ panic("could not find virtio disk"); } // reset device *R(VIRTIO_MMIO_STATUS) = status; // set ACKNOWLEDGE status bit status |= VIRTIO_CONFIG_S_ACKNOWLEDGE; *R(VIRTIO_MMIO_STATUS) = status; // set DRIVER status bit status |= VIRTIO_CONFIG_S_DRIVER; *R(VIRTIO_MMIO_STATUS) = status; // negotiate features uint64_t features = *R(VIRTIO_MMIO_DEVICE_FEATURES); features &= ~(1 << VIRTIO_BLK_F_RO); features &= ~(1 << VIRTIO_BLK_F_SCSI); features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE); features &= ~(1 << VIRTIO_BLK_F_MQ); features &= ~(1 << VIRTIO_F_ANY_LAYOUT); features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); *R(VIRTIO_MMIO_DRIVER_FEATURES) = features; // tell device that feature negotiation is complete. status |= VIRTIO_CONFIG_S_FEATURES_OK; *R(VIRTIO_MMIO_STATUS) = status; // re-read status to ensure FEATURES_OK is set. status = *R(VIRTIO_MMIO_STATUS); if(!(status & VIRTIO_CONFIG_S_FEATURES_OK)) panic("virtio disk FEATURES_OK unset"); // initialize queue 0. *R(VIRTIO_MMIO_QUEUE_SEL) = 0; // ensure queue 0 is not in use. if(*R(VIRTIO_MMIO_QUEUE_READY)) panic("virtio disk should not be ready"); // check maximum queue size. uint32_t max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX); if(max == 0) panic("virtio disk has no queue 0"); if(max < NUM) panic("virtio disk max queue too short"); // allocate and zero queue memory. disk.desc = malloc(4096); disk.avail = malloc(4096); disk.used = malloc(4096); if(!disk.desc || !disk.avail || !disk.used) panic("virtio disk malloc(4096)"); _clear(disk.desc, PAGE_SIZE); _clear(disk.avail, PAGE_SIZE); _clear(disk.used, PAGE_SIZE); // set queue size. *R(VIRTIO_MMIO_QUEUE_NUM) = NUM; // write physical addresses. *R(VIRTIO_MMIO_QUEUE_DESC_LOW) = (uint64_t)disk.desc; *R(VIRTIO_MMIO_QUEUE_DESC_HIGH) = (uint64_t)disk.desc >> 32; *R(VIRTIO_MMIO_DRIVER_DESC_LOW) = (uint64_t)disk.avail; *R(VIRTIO_MMIO_DRIVER_DESC_HIGH) = (uint64_t)disk.avail >> 32; *R(VIRTIO_MMIO_DEVICE_DESC_LOW) = (uint64_t)disk.used; *R(VIRTIO_MMIO_DEVICE_DESC_HIGH) = (uint64_t)disk.used >> 32; // queue is ready. *R(VIRTIO_MMIO_QUEUE_READY) = 0x1; // all NUM descriptors start out unused. for(int i = 0; i < NUM; i++) disk.free[i] = 1; // tell device we're completely ready. status |= VIRTIO_CONFIG_S_DRIVER_OK; *R(VIRTIO_MMIO_STATUS) = status; // plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ. } // find a free descriptor, mark it non-free, return its index. static int alloc_desc() { for(int i = 0; i < NUM; i++){ if(disk.free[i]){ disk.free[i] = 0; return i; } } return -1; } // mark a descriptor as free. static void free_desc(int i) { if(i >= NUM) panic("free_desc 1"); if(disk.free[i]) panic("free_desc 2"); disk.desc[i].addr = 0; disk.desc[i].len = 0; disk.desc[i].flags = 0; disk.desc[i].next = 0; disk.free[i] = 1; /* wakeup(&disk.free[0]); */ } // free a chain of descriptors. static void free_chain(int i) { while(1){ int flag = disk.desc[i].flags; int nxt = disk.desc[i].next; free_desc(i); if(flag & VRING_DESC_F_NEXT) i = nxt; else break; } } // allocate three descriptors (they need not be contiguous). // disk transfers always use three descriptors. static int alloc3_desc(int *idx) { for(int i = 0; i < 3; i++){ idx[i] = alloc_desc(); if(idx[i] < 0){ for(int j = 0; j < i; j++) free_desc(idx[j]); return -1; } } return 0; } void virtio_disk_rw(struct buf *b, int write) { uint64_t sector = b->blockno * (BLOCK_SIZE / 512); lock_acquire(&disk.vdisk_lock); // the spec's Section 5.2 says that legacy block operations use // three descriptors: one for type/reserved/sector, one for the // data, one for a 1-byte status result. // allocate the three descriptors. int idx[3]; while(1){ if(alloc3_desc(idx) == 0) { break; } /* sleep(&disk.free[0], &disk.vdisk_lock); */ } // format the three descriptors. // qemu's virtio-blk.c reads them. struct virtio_blk_req *buf0 = &disk.ops[idx[0]]; if(write) buf0->type = VIRTIO_BLK_T_OUT; // write the disk else buf0->type = VIRTIO_BLK_T_IN; // read the disk buf0->reserved = 0; buf0->sector = sector; disk.desc[idx[0]].addr = (uint64_t) buf0; disk.desc[idx[0]].len = sizeof(struct virtio_blk_req); disk.desc[idx[0]].flags = VRING_DESC_F_NEXT; disk.desc[idx[0]].next = idx[1]; disk.desc[idx[1]].addr = (uint64_t) b->data; disk.desc[idx[1]].len = BLOCK_SIZE; if(write) disk.desc[idx[1]].flags = 0; // device reads b->data else disk.desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT; disk.desc[idx[1]].next = idx[2]; disk.info[idx[0]].status = 0xff; // device writes 0 on success disk.desc[idx[2]].addr = (uint64_t) &disk.info[idx[0]].status; disk.desc[idx[2]].len = 1; disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status disk.desc[idx[2]].next = 0; // record struct buf for virtio_disk_intr(). b->disk = 1; disk.info[idx[0]].b = b; // tell the device the first index in our chain of descriptors. disk.avail->ring[disk.avail->idx % NUM] = idx[0]; __sync_synchronize(); // tell the device another avail ring entry is available. disk.avail->idx += 1; // not % NUM ... __sync_synchronize(); *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number // Wait for virtio_disk_intr() to say request has finished. while(b->disk == 1) { /* sleep(b, &disk.vdisk_lock); */ // busy waiting } disk.info[idx[0]].b = 0; free_chain(idx[0]); lock_free(&disk.vdisk_lock); } void virtio_disk_intr() { /* lock_acquire(&disk.vdisk_lock); */ // the device won't raise another interrupt until we tell it // we've seen this interrupt, which the following line does. // this may race with the device writing new entries to // the "used" ring, in which case we may process the new // completion entries in this interrupt, and have nothing to do // in the next interrupt, which is harmless. *R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3; __sync_synchronize(); // the device increments disk.used->idx when it // adds an entry to the used ring. while(disk.used_idx != disk.used->idx){ __sync_synchronize(); int id = disk.used->ring[disk.used_idx % NUM].id; struct buf *b = disk.info[id].b; if(disk.info[id].status != 0) { printf("virtio_disk_intr status failed.\n"); b->successful = 0; } else { b->successful = 1; } b->disk = 0; // disk is done with buf /* wakeup(b); */ disk.used_idx += 1; } /* lock_free(&disk.vdisk_lock); */ } ``` ### 修改 plic.h 追加一條: ```c= #define VIRTIO_IRQ 1 ``` ### 修改 plic.c,注冊 virio 的 interrupt ```c= void plic_init() { int hart = r_tp(); // The "0" is reserved, and the lowest priority is "1". *(uint32_t *)PLIC_PRIORITY(UART0_IRQ) = 1; /* Enable UART0 */ *(uint32_t *)PLIC_MENABLE(hart) |= (1 << UART0_IRQ); *(uint32_t *)PLIC_MENABLE(hart) |= (1 << VIRTIO_IRQ); *(uint32_t *)(PLIC_BASE + UART0_IRQ*4) = 1; *(uint32_t *)(PLIC_BASE + VIRTIO_IRQ*4) = 1; /* Set priority threshold for UART0. */ *(uint32_t *)PLIC_MTHRESHOLD(hart) = 0; } ``` ### 修改 os.c,測試我們的 virtio ```c= #include "printf.h" #include "mem.h" #include "time.h" #include "trap.h" #include "plic.h" #include "vm.h" #include "virtio.h" #include "s_mode.h" #include "string.h" #include "usys.h" #define BSIZE 512 void test_virtio_disk(void) { const int test_blockno = 100; char data[BSIZE]; struct buf b; b.blockno = test_blockno; printf("Reading data from VIRTIO.\n"); virtio_disk_rw(&b, 0); memcpy(data, b.data, BSIZE); printf("Original data at block %d: %s\n", test_blockno, data); strcpy(b.data, "Hello, Virtio Disk!"); virtio_disk_rw(&b, 1); memset(b.data, 0, BSIZE); virtio_disk_rw(&b, 0); printf("New data at block %d: %s\n", test_blockno, b.data); printf("Disk test succeeded!\n"); } void delay(int count) { count *= 50000; while (count--) { asm volatile ("nop"); } } void kernel_main() { printf("Entered S Mode!\n"); test_virtio_disk(); while(1) { printf("OS Loop\n"); delay(10000); }; } void boot() { uart_init(); page_init(); trap_init(); timer_init(); plic_init(); vm_init(); virtio_disk_init(); } int os_main() { boot(); switch_to_s_mode(kernel_main); return 0; } ``` ### 測試 先創造一個 2M 的檔案當作硬碟: ``` dd if=/dev/zero of=disk.img bs=1M count=2 ``` 修改我們的 Makefile: ``` run: all qemu-system-riscv64 -m 128M -M virt -serial stdio -display none -bios none -kernel kernel.img -global virtio-mmio.force-legacy=false -drive file=disk.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 ``` ![](https://hackmd.io/_uploads/Sk2wwlqb6.png) 來看看 `disk.img`: ![](https://hackmd.io/_uploads/Byz5Deq-p.png) 如果我們寫一個超出硬碟 size 的位置: ![](https://hackmd.io/_uploads/B1mB2hqZp.png) ## Block IO ### bio.h ```c= #include "lock.h" #include "string.h" #include "mem.h" #include "virtio.h" #include "ds.h" int bread(uint32_t dev, uint32_t blockno, char * buffer); int bwrite(uint32_t dev, uint32_t blockno, const char *data); ``` ### bio.c ```c= #include "bio.h" int bread(uint32_t dev, uint32_t blockno, char * buffer) { struct buf *b = malloc(sizeof(struct buf)); b->dev = dev; b->blockno = blockno; virtio_disk_rw(b, 0); if(b->successful == 0) return 0; memcpy(buffer, (void *)b->data, BLOCK_SIZE); free(b); return 1; } int bwrite(uint32_t dev, uint32_t blockno, const char *data) { struct buf *b = malloc(sizeof(struct buf)); b->dev = dev; b->blockno = blockno; memcpy(b->data, data, BLOCK_SIZE); virtio_disk_rw(b, 1); int result = b->successful; free(b); return result; } ``` ### os.c ```c= void test_bio(void) { const int test_blockno = 1; printf("Writing \"Hello World!\" to disk.\n"); char data[1024] = "Hello World!"; int w_status = bwrite(0, 1, data); if(w_status) { printf("writing successful.\n"); } memset(data, 0, BLOCK_SIZE); int r_status = bread(0, 1, &data[0]); if(r_status) { printf("reading successful.\n"); printf("Read data: %s\n", data); } } void kernel_main() { printf("Entered S Mode!\n"); test_bio(); while(1) { printf("OS Loop\n"); delay(10000); }; } ``` ## Filesystem 由於修改的 code 太多,這邊僅節錄比較重要的部分。 :::warning 這是我本人隨便寫的陽春 file system,不具備很多功能。 ::: ### 新增 fs.h ```c= #include "riscv.h" #include "bio.h" #include "string.h" #define MAX_FILENAME_SIZE 256 #define DATA_SIZE (BLOCK_SIZE - sizeof(int8_t) - 3 * sizeof(int64_t)) #define MAX_BLOCK 2048 typedef struct { /* 0 -> not initialized * 1 -> initialized * */ int8_t flag; uint64_t root_directory_block; // Block number of the root directory. unsigned char reserved[1024]; // Make sure the struct is larger than 1 block } fs_metadata_t; typedef struct { /* 0 -> nothing here * 1 -> there is an inode here * */ uint8_t flag; uint8_t isDirectory; // Is this inode indicating a directory? char name[MAX_FILENAME_SIZE]; // Name of the file. uint64_t parent; // parent inode uint64_t starting_block; // Starting block of the file. uint64_t file_size; // Size of the file in bytes. uint64_t creation_time; // File creation time. uint64_t modification_time; // Last modification time. uint64_t access_time; // Last access time. uint64_t addr; // The location of this inode(unit: block) unsigned char reserved[1024]; // Make sure the struct is larger than 1 block } inode_t; typedef struct { /* 0 -> nothing here * 1 -> it is referring an inode here * 2 -> it is the end of the list, nothing here. * */ uint8_t flag; uint64_t addr; uint64_t inode_block; uint64_t next; unsigned char reserved[1024]; // Make sure the struct is larger than 1 block } ilist_t; typedef struct { int8_t flag; unsigned char data[DATA_SIZE]; uint64_t addr; uint64_t prev; uint64_t next; } block_t; inline size_t min(size_t a, size_t b) { return a < b ? a : b; } uint64_t dalloc(); void dfree(uint64_t addr); void format(); void dump_fs(); void dump_directory(inode_t inode, int ident); void dump_name(inode_t inode, int ident); int fs_init(); int fs_create(const char *path, const char *filename); int fs_write(const char *path, const char *data, size_t size); size_t fs_read(const char *path, char *buffer, size_t size); int fs_mkdir(const char *path, const char *dirname); int fs_rm(const char *path); int fs_rmdir(const char *path); ``` ### 新增 fs.c ```c= #include "fs.h" uint64_t dalloc() { block_t block; for(uint64_t i = 1; i < MAX_BLOCK; i++) { bread(0, i, &block); if(block.flag == 0) { memset(&block, 0, BLOCK_SIZE); block.flag = 1; bwrite(0, i, &block); return i; } } return MAX_BLOCK; } void dfree(uint64_t addr) { block_t block; memset(&block, 0, BLOCK_SIZE); bwrite(0, addr, &block); } void format() { char zero_block[BLOCK_SIZE] = ""; for(uint64_t i = 0; i < MAX_BLOCK; i++) { bwrite(0, i, &zero_block); } } void dump_fs() { fs_metadata_t superblock; bread(0, 0, &superblock); inode_t root; bread(0, superblock.root_directory_block, &root); dump_directory(root, 0); } void dump_directory(inode_t inode, int ident) { ilist_t ilist; bread(0, inode.starting_block, &ilist); dump_name(inode, ident); while(ilist.flag != 2) { inode_t subInode; bread(0, ilist.inode_block, &subInode); if(subInode.isDirectory) dump_directory(subInode, ident + 2); else dump_name(subInode, ident + 2); bread(0, ilist.next, &ilist); } } void dump_name(inode_t inode, int ident) { for(int i = 0; i < ident * 2; i++) printf(" "); if(inode.name[0] == 0) printf("/\n"); else printf("%s\n", inode.name); } inode_t* find_inode(const char *path) { inode_t* res = malloc(sizeof(inode_t)); fs_metadata_t superblock; bread(0, 0, &superblock); superblock.flag = 1; if (strcmp(path, "/") == 0) { bread(0, superblock.root_directory_block, res); return res; } inode_t currentDirectory, nextDirectory; bread(0, superblock.root_directory_block, &currentDirectory); char *temp_path = strdup(path); char *token = strtok(temp_path, "/"); while (token != NULL) { ilist_t ilist; bread(0, currentDirectory.starting_block, &ilist); while (ilist.flag != 2) { bread(0, ilist.inode_block, &nextDirectory); /* printf("%d %d %d, token = %s, name = %s, %d\n", ilist.flag, ilist.addr, ilist.next, token, nextDirectory.name, strcmp(nextDirectory.name, token)); */ if (nextDirectory.flag && strcmp(nextDirectory.name, token) == 0) { break; } bread(0, ilist.next, &ilist); } if(ilist.flag == 2) return NULL; token = strtok(NULL, "/"); if (token) { currentDirectory = nextDirectory; } } free(temp_path); memcpy(res, &nextDirectory, sizeof(inode_t)); return res; } int fs_init() { fs_metadata_t superblock; bread(0, 0, &superblock); if(superblock.flag == 1) return -1; // init metadata uint64_t rootBlock = dalloc(); memset(&superblock, 0, sizeof(fs_metadata_t)); superblock.flag = 1; superblock.root_directory_block = rootBlock; if(bwrite(0, 0, &superblock)) return -2; // init root directory inode uint64_t rootList = dalloc(); inode_t rootDirectory; memset(&rootDirectory, 0, sizeof(inode_t)); rootDirectory.flag = 1; strcpy(rootDirectory.name, ""); rootDirectory.parent = rootBlock; rootDirectory.starting_block = rootList; rootDirectory.isDirectory = 1; rootDirectory.addr = rootBlock; if(bwrite(0, rootBlock, &rootDirectory)) return -2; // init root directory list node ilist_t rootDirectoryList; memset(&rootDirectoryList, 0, sizeof(ilist_t)); rootDirectoryList.flag = 2; // mark the list is empty rootDirectoryList.addr = rootList; if(bwrite(0, rootList, &rootDirectoryList)) return -2; return 0; } int fs_create(const char *path, const char *filename) { // Step 1: Find the target directory using the provided path. inode_t* parentDir = find_inode(path); if (parentDir == NULL || parentDir->flag == 0 || parentDir->isDirectory == 0) { return -1; // Error: Invalid directory or path not found. } // Step 2: Check if a file with the same name already exists in the directory. ilist_t ilist; bread(0, parentDir->starting_block, &ilist); while (ilist.flag != 2) { inode_t inode; bread(0, ilist.inode_block, &inode); if (inode.flag && !inode.isDirectory && strcmp(inode.name, filename) == 0) { return -2; // Error: File with the same name already exists. } bread(0, ilist.next, &ilist); } // Step 3: Allocate an inode and a data block for the new file. uint64_t newInodeBlock = dalloc(); uint64_t newDataBlock = dalloc(); if (newInodeBlock == MAX_BLOCK || newDataBlock == MAX_BLOCK) { return -3; // Error: No space left on the device. } // Step 4: Write the metadata for the new file into the allocated inode. inode_t newFile; memset(&newFile, 0, sizeof(inode_t)); newFile.flag = 1; newFile.addr = newInodeBlock; strncpy(newFile.name, filename, MAX_FILENAME_SIZE); newFile.parent = parentDir->addr; newFile.starting_block = newDataBlock; if (bwrite(0, newInodeBlock, &newFile)) { return -4; // Error: Failed to write the new inode to disk. } // Step 5: Update the current block. block_t newBlock; memset(&newBlock, 0, sizeof(block_t)); newBlock.flag = 1; newBlock.addr = newDataBlock; newBlock.next = 0; newBlock.prev = 0; memset(newBlock.data, 0, DATA_SIZE); if (bwrite(0, newDataBlock, &newBlock)) { return -4; // Error: Failed to write the new inode to disk. } // Step 6: Update the parent directory to include a reference to the new file. ilist_t newIlist; memset(&newIlist, 0, sizeof(ilist_t)); newIlist.flag = 2; bread(0, parentDir->starting_block, &ilist); while (ilist.flag != 2) { bread(0, ilist.next, &ilist); } newIlist.flag = 2; newIlist.addr = dalloc(); ilist.flag = 1; ilist.inode_block = newInodeBlock; ilist.next = newIlist.addr; if (bwrite(0, ilist.addr, &ilist)) { return -5; // Error: Failed to update the parent directory. } if (bwrite(0, newIlist.addr, &newIlist)) { return -6; // Error: Failed to update the parent directory. } free(parentDir); return 0; } int fs_write(const char *path, const char *data, size_t size) { // Step 1: Find the target file using the provided path and filename. inode_t* targetFile = find_inode(path); if (targetFile == NULL || !targetFile->flag || targetFile->isDirectory) { return -1; // Error: Invalid file or file not found. } // Step 2: Write the data to the target file's data block. block_t block; bread(0, targetFile->starting_block, &block); size_t remain_size = size; size_t offset = 0; while (remain_size) { size_t copy_size = remain_size < DATA_SIZE ? remain_size : DATA_SIZE; remain_size -= copy_size; /* printf("write %d %d %d, rm_size %d, cp_size %d\n", block.flag, block.addr, block.next, remain_size, copy_size); */ memcpy(block.data, data + offset, copy_size); if(remain_size) { if(block.next == 0) { block_t newBlock; newBlock.flag = 1; newBlock.addr = dalloc(); newBlock.prev = block.addr; newBlock.next = 0; block.next = newBlock.addr; bwrite(0, block.addr, &block); block = newBlock; /* printf("%s", buffer); */ } else { bwrite(0, block.addr, &block); bread(0, block.next, &block); } } else { bwrite(0, block.addr, &block); } offset += copy_size; } // Step 3: Update the file size in the inode (assuming you have a file size field in your inode structure). targetFile->file_size = size; // Uncomment this line if you have a size field in your inode structure. bwrite(0, targetFile->addr, targetFile); // Save the updated inode to disk. free(targetFile); // Step 4: Return success. return 0; } size_t fs_read(const char *path, char *buffer, size_t size) { // Step 1: Find the target file using the provided path. inode_t* targetFile = find_inode(path); if (targetFile == NULL || !targetFile->flag || targetFile->isDirectory) { return -1; // Error: Invalid file or file not found. } // Step 2: Check if the requested size is more than the file's size. // If yes, update the size to be read to the file's size. if (size > targetFile->file_size) { size = targetFile->file_size; } // Step 3: Read the data from the target file's data blocks. block_t block; bread(0, targetFile->starting_block, &block); size_t total_read = 0; while (size > 0) { /* printf("%d %d %d\n", block.flag, block.addr, block.next); */ size_t bytes_to_read = size < DATA_SIZE ? size : DATA_SIZE; /* printf("%lld, writing %lld\n", (long long)size, (long long)bytes_to_read); */ memcpy(buffer + total_read, block.data, bytes_to_read); total_read += bytes_to_read; size -= bytes_to_read; // If there's more to read, move on to the next block. if (size) { if (block.next == 0) { // No more blocks to read. break; } bread(0, block.next, &block); } } free(targetFile); // Step 4: Return the total number of bytes read. return total_read; } int fs_mkdir(const char *path, const char *dirname) { // Step 1: Find the target directory using the provided path. inode_t* parentDir = find_inode(path); if (path == NULL || parentDir->flag == 0 || parentDir->isDirectory == 0) { return -1; // Error: Invalid directory or path not found. } // Step 2: Check if a directory with the same name already exists in the directory. ilist_t ilist; bread(0, parentDir->starting_block, &ilist); while (ilist.flag != 2) { inode_t inode; bread(0, ilist.inode_block, &inode); if (inode.flag && inode.isDirectory && strcmp(inode.name, dirname) == 0) { return -2; // Error: Directory with the same name already exists. } bread(0, ilist.next, &ilist); } // Step 3: Allocate an inode and a data block for the new directory. uint64_t newInodeBlock = dalloc(); uint64_t newListBlock = dalloc(); if (newInodeBlock == MAX_BLOCK || newListBlock == MAX_BLOCK) { return -3; // Error: No space left on the device. } // Step 4: Write the metadata for the new directory into the allocated inode. inode_t newDir; memset(&newDir, 0, sizeof(inode_t)); newDir.flag = 1; newDir.addr = newInodeBlock; strncpy(newDir.name, dirname, MAX_FILENAME_SIZE); newDir.parent = parentDir->addr; newDir.starting_block = newListBlock; newDir.isDirectory = 1; if (bwrite(0, newInodeBlock, &newDir)) { return -4; // Error: Failed to write the new inode to disk. } // Step 5: Initialize the directory list node for the new directory as empty. ilist_t newDirList; memset(&newDirList, 0, sizeof(ilist_t)); newDirList.flag = 2; // mark the list as empty newDirList.addr = newListBlock; if (bwrite(0, newListBlock, &newDirList)) { return -5; // Error: Failed to write the new directory list to disk. } // Step 6: Update the parent directory to include a reference to the new directory. ilist_t newIlist; memset(&newIlist, 0, sizeof(ilist_t)); newIlist.flag = 2; bread(0, parentDir->starting_block, &ilist); while (ilist.flag != 2) { bread(0, ilist.next, &ilist); } newIlist.flag = 2; newIlist.addr = dalloc(); ilist.flag = 1; ilist.inode_block = newInodeBlock; ilist.next = newIlist.addr; if (bwrite(0, ilist.addr, &ilist)) { return -6; // Error: Failed to update the parent directory. } if (bwrite(0, newIlist.addr, &newIlist)) { return -7; // Error: Failed to update the parent directory. } free(parentDir); return 0; } int fs_rm(const char *path) { // Step 1: Find the target file using the provided path. inode_t* targetFile = find_inode(path); if (targetFile == NULL || !targetFile->flag || targetFile->isDirectory) { free(targetFile); return -1; // Error: Invalid file or file not found. } // Step 2: Clear the file's data blocks. block_t block; bread(0, targetFile->starting_block, &block); while (block.flag) { uint64_t next = block.next; dfree(block.addr); if (next == 0) break; bread(0, next, &block); } // Step 3: Remove the inode reference from the parent directory list. inode_t parentDir; bread(0, targetFile->parent, &parentDir); ilist_t ilist, prev_ilist; memset(&prev_ilist, 0, sizeof(ilist_t)); bread(0, parentDir.starting_block, &ilist); while (ilist.flag != 2) { if (ilist.inode_block == targetFile->addr) { if (prev_ilist.flag) { prev_ilist.next = ilist.next; bwrite(0, prev_ilist.addr, &prev_ilist); } else { parentDir.starting_block = ilist.next; bwrite(0, parentDir.addr, &parentDir); } dfree(ilist.addr); break; } prev_ilist = ilist; bread(0, ilist.next, &ilist); } // Step 4: Free the target file's inode block. dfree(targetFile->addr); free(targetFile); return 0; } int fs_rmdir(const char *path) { // Step 1: Find the target directory using the provided path. inode_t* targetDir = find_inode(path); if (targetDir == NULL || !targetDir->flag || !targetDir->isDirectory) { free(targetDir); return -1; // Error: Invalid directory or directory not found. } // Step 2: Ensure the directory is empty. ilist_t ilist; bread(0, targetDir->starting_block, &ilist); if (ilist.flag != 2) { free(targetDir); return -2; // Error: Directory is not empty. } // Step 3: Remove the inode reference from the parent directory list. inode_t parentDir; bread(0, targetDir->parent, &parentDir); ilist_t prev_ilist; memset(&prev_ilist, 0, sizeof(ilist_t)); bread(0, parentDir.starting_block, &ilist); while (ilist.flag != 2) { if (ilist.inode_block == targetDir->addr) { if (prev_ilist.flag) { prev_ilist.next = ilist.next; bwrite(0, prev_ilist.addr, &prev_ilist); } else { parentDir.starting_block = ilist.next; bwrite(0, parentDir.addr, &parentDir); } dfree(ilist.addr); break; } prev_ilist = ilist; bread(0, ilist.next, &ilist); } // Step 4: Free the target directory's inode block. dfree(targetDir->addr); free(targetDir); return 0; } ``` ### 自定義 file system 簡單介紹 基礎結構: 1. inode_t:inode 是文件系統中存儲文件或目錄的元數據(metadata)的結構。這包括名稱、大小、數據所在的實際塊的位置等。 2. ilist_t:這是一個鏈接節點列表,用於將多個 inode 連接在一起,用在目錄中。 3. block_t:這代表了存儲在磁盤上的一個塊。它包含了一個 flag 用於標記塊是否被使用,以及數據本身。 4. fs_metadata_t:這是文件系統的 superblock,存儲文件系統的基本信息,如根目錄的位置。 函式: 1. dalloc 通過搜索第一個未使用的塊來動態分配一個新的塊。 2. dfree 將一個塊設置為未使用的狀態。 3. fs_init 初始化文件系統。如果文件系統尚未初始化,則此函數會設置 superblock 和創建根目錄。 4. find_inode 基於給定的路徑找到對應的 inode 5. fs_create 在指定的路徑創建一個新文件。此函數將檢查所需名稱的文件是否已存在,如果不存在,則將分配新的 inode 和數據塊並更新相應的目錄結構。 6. fs_mkdir 在指定的路徑創建一個新目錄。操作過程與 fs_create 類似,但結果是創建目錄而不是文件。 7. fs_write 將數據寫入指定文件。如果文件當前的塊不足以容納所有數據,此函數將分配更多的塊。 8. fs_read 從指定文件讀取數據。 ### os.c ```c= void test_fs() { dump_fs(); printf("creating directory test1_dir %d\n", fs_mkdir("/", "test1_dir")); printf("creating directory test2_dir %d\n", fs_mkdir("/", "test2_dir")); printf("creating directory test1_dir/test3_dir %d\n", fs_mkdir("/test1_dir", "test3_dir")); dump_fs(); printf("creating file test1 %d\n", fs_create("/test2_dir", "test1")); printf("creating file test2 %d\n", fs_create("/test2_dir", "test2")); printf("creating file test3 %d\n", fs_create("/test2_dir/", "test3")); char data[] = "Hello World!"; printf("writing to test2_dir/test2: "); printf("%d\n", fs_write("/test2_dir/test2", data, sizeof(data))); char res[2048]; memset(res, 0, 2048); printf("READ RESULT %d; ", fs_read("/test2_dir/test2", res, 2048)); printf("DATA: %s\n", res); dump_fs(); printf("remove file test2 %d\n", fs_rm("/test2_dir/test2")); dump_fs(); printf("remove directory test3_dir %d\n", fs_rmdir("/test1_dir/test3_dir/")); dump_fs(); printf("remove a non-existing file %d\n", fs_rm("/test2_dir/test4")); printf("remove a non-existing directory %d\n", fs_rmdir("/test3_dir/")); printf("remove all files and directories\n"); fs_rm("/test2_dir/test1"); fs_rm("/test2_dir/test3"); fs_rmdir("/test1_dir"); fs_rmdir("/test2_dir"); fs_rmdir("/test3_dir"); dump_fs(); } void kernel_main() { printf("Entered S Mode!\n"); test_fs(); while(1) { printf("OS Loop\n"); delay(10000); }; } ``` ## mkfs 調整了很多東西,參考 5-4-make-file-system,僅節錄重要片段。 ```c= #include "fs.h" #include <stdio.h> #include <string.h> #include <stdlib.h> #include <stdint.h> #define FILE_NAME "disk.img" #define MAX_SIZE 2048 int bwrite(int dev, uint64_t no, void *buffer) { FILE *fp = fopen(FILE_NAME, "rb+"); if (!fp) { perror("Error opening file"); return -1; } fseek(fp, no * BLOCK_SIZE, SEEK_SET); printf("writing to %ld, length = %d\n", no * BLOCK_SIZE, BLOCK_SIZE); size_t writeSize = fwrite((char*)buffer, 1, BLOCK_SIZE, fp); fclose(fp); if (writeSize != BLOCK_SIZE) { perror("Error writing block"); return -1; } return 0; // 成功 } int bread(int dev, uint64_t no, void *buffer) { FILE *fp = fopen(FILE_NAME, "rb"); if (!fp) { perror("Error opening file"); return -1; } fseek(fp, no * BLOCK_SIZE, SEEK_SET); size_t readSize = fread((char*)buffer, 1, BLOCK_SIZE, fp); fclose(fp); if (readSize != BLOCK_SIZE) { perror("Error reading block"); return -1; } return 0; } uint64_t dalloc() { char buffer[BLOCK_SIZE]; block_t temp; memset(&temp, 1, sizeof(block_t)); uint64_t addr = 1; while (bread(0, addr, buffer) == 0) { int allZero = 1; for (int32_t i = 0; i < BLOCK_SIZE; i++) { if (buffer[i] != 0) { allZero = 0; break; } } if (allZero) { printf("alloc %ld\n", addr); printf("%d\n", bwrite(0, addr, &temp)); return addr; } addr++; } return MAX_SIZE; } void dfree(uint64_t addr) { FILE *fp = fopen(FILE_NAME, "r+b"); if (!fp) { perror("Error opening file"); return; } char buffer[BLOCK_SIZE]; memset(buffer, 0, BLOCK_SIZE); fseek(fp, addr * BLOCK_SIZE, SEEK_SET); fwrite(buffer, 1, BLOCK_SIZE, fp); fclose(fp); } void addFile(const char *filename) { FILE *file = fopen(filename, "rb"); if (!file) { perror("Error opening file"); return; } // Determine the file size fseek(file, 0, SEEK_END); size_t size = ftell(file); fseek(file, 0, SEEK_SET); // Allocate memory for the file content char *buffer = (char *)malloc(size); if (!buffer) { perror("Memory allocation failed"); fclose(file); return; } // Read file into the buffer size_t bytesRead = fread(buffer, 1, size, file); fclose(file); if (bytesRead != size) { perror("Error reading the file"); free(buffer); return; } /* printf("->%d\n", fs_create("/", "test")); */ fs_create("/", filename); char path[257] = "/"; strcpy(path + 1, filename); printf("Add %s\n", filename); fs_write(path, buffer, size); free(buffer); } int main(int argc, char *argv[]) { if(fs_init() == 0) { for (int i = 1; i < argc; i++) { addFile(argv[i]); } dump_fs(); } else { printf("not empty disk.\n"); } return 0; } ``` 使用方式:`mkfs/mkfs A B C D。 ## 參考 1. ChatGPT 2. [xv6-riscv](https://github.com/mit-pdos/xv6-riscv/tree/riscv) 3. [mini-riscv-os Docs](https://github.com/cccriscv/mini-riscv-os/blob/70443b0ab3bf186c47b42127a0ffe31ad417fb91/doc/tw/08-BlockDeviceDriver.md) 4. [mythili-cs347_autumn2016](https://www.cse.iitb.ac.in/~mythili/teaching/cs347_autumn2016/)