# 5. VIRTIO 與 Filesystem
## VIRTIO
### 新增 ds.h
```c=
#pragma once
#include "riscv.h"
#define BLOCK_SIZE 1024
struct buf {
int valid; // has data been read from disk?
int disk; // does disk "own" buf?
int successful; // is the reading successful
uint32_t dev;
uint32_t blockno;
lock_t lock;
uint32_t refcnt;
struct buf *prev; // LRU cache list
struct buf *next;
unsigned char data[1024];
};
```
### 新增 virtio.h
```c=
#pragma once
#include "printf.h"
#include "riscv.h"
#include "lock.h"
#include "mem.h"
#include "ds.h"
#include <stddef.h>
#include <stdint.h>
#define VIRTIO0 0x10001000
#define VIRTIO_MMIO_MAGIC_VALUE 0x000 // 0x74726976
#define VIRTIO_MMIO_VERSION 0x004 // version; should be 2
#define VIRTIO_MMIO_DEVICE_ID 0x008 // device type; 1 is net, 2 is disk
#define VIRTIO_MMIO_VENDOR_ID 0x00c // 0x554d4551
#define VIRTIO_MMIO_DEVICE_FEATURES 0x010
#define VIRTIO_MMIO_DRIVER_FEATURES 0x020
#define VIRTIO_MMIO_QUEUE_SEL 0x030 // select queue, write-only
#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 // max size of current queue, read-only
#define VIRTIO_MMIO_QUEUE_NUM 0x038 // size of current queue, write-only
#define VIRTIO_MMIO_QUEUE_READY 0x044 // ready bit
#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 // write-only
#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 // read-only
#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 // write-only
#define VIRTIO_MMIO_STATUS 0x070 // read/write
#define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 // physical address for descriptor table, write-only
#define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084
#define VIRTIO_MMIO_DRIVER_DESC_LOW 0x090 // physical address for available ring, write-only
#define VIRTIO_MMIO_DRIVER_DESC_HIGH 0x094
#define VIRTIO_MMIO_DEVICE_DESC_LOW 0x0a0 // physical address for used ring, write-only
#define VIRTIO_MMIO_DEVICE_DESC_HIGH 0x0a4
// status register bits, from qemu virtio_config.h
#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
#define VIRTIO_CONFIG_S_DRIVER 2
#define VIRTIO_CONFIG_S_DRIVER_OK 4
#define VIRTIO_CONFIG_S_FEATURES_OK 8
// device feature bits
#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */
#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */
#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */
#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
#define VIRTIO_F_ANY_LAYOUT 27
#define VIRTIO_RING_F_INDIRECT_DESC 28
#define VIRTIO_RING_F_EVENT_IDX 29
// this many virtio descriptors.
// must be a power of two.
#define NUM 8
// a single descriptor, from the spec.
struct virtq_desc {
uint64_t addr;
uint32_t len;
uint16_t flags;
uint16_t next;
};
#define VRING_DESC_F_NEXT 1 // chained with another descriptor
#define VRING_DESC_F_WRITE 2 // device writes (vs read)
// the (entire) avail ring, from the spec.
struct virtq_avail {
uint16_t flags; // always zero
uint16_t idx; // driver will write ring[idx] next
uint16_t ring[NUM]; // descriptor numbers of chain heads
uint16_t unused;
};
// one entry in the "used" ring, with which the
// device tells the driver about completed requests.
struct virtq_used_elem {
uint32_t id; // index of start of completed descriptor chain
uint32_t len;
};
struct virtq_used {
uint16_t flags; // always zero
uint16_t idx; // device increments when it adds a ring[] entry
struct virtq_used_elem ring[NUM];
};
// these are specific to virtio block devices, e.g. disks,
// described in Section 5.2 of the spec.
#define VIRTIO_BLK_T_IN 0 // read the disk
#define VIRTIO_BLK_T_OUT 1 // write the disk
// the format of the first descriptor in a disk request.
// to be followed by two more descriptors containing
// the block, and a one-byte status.
struct virtio_blk_req {
uint32_t type; // VIRTIO_BLK_T_IN or ..._OUT
uint32_t reserved;
uint64_t sector;
};
void virtio_disk_rw(struct buf *b, int write);
void virtio_disk_init(void);
```
### 新增 virtio.c
```c=
#include "printf.h"
#include "riscv.h"
#include "lock.h"
#include "virtio.h"
#include "mem.h"
// the address of virtio mmio register r.
#define R(r) ((volatile uint32_t *)(VIRTIO0 + (r)))
#define BSIZE 512
static struct disk {
// a set (not a ring) of DMA descriptors, with which the
// driver tells the device where to read and write individual
// disk operations. there are NUM descriptors.
// most commands consist of a "chain" (a linked list) of a couple of
// these descriptors.
struct virtq_desc *desc;
// a ring in which the driver writes descriptor numbers
// that the driver would like the device to process. it only
// includes the head descriptor of each chain. the ring has
// NUM elements.
struct virtq_avail *avail;
// a ring in which the device writes descriptor numbers that
// the device has finished processing (just the head of each chain).
// there are NUM used ring entries.
struct virtq_used *used;
// our own book-keeping.
char free[NUM]; // is a descriptor free?
uint16_t used_idx; // we've looked this far in used[2..NUM].
// track info about in-flight operations,
// for use when completion interrupt arrives.
// indexed by first descriptor index of chain.
struct {
struct buf *b;
char status;
} info[NUM];
// disk command headers.
// one-for-one with descriptors, for convenience.
struct virtio_blk_req ops[NUM];
lock_t vdisk_lock;
} disk;
void virtio_disk_init(void) {
uint32_t status = 0;
lock_init(&disk.vdisk_lock);
if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 ||
*R(VIRTIO_MMIO_VERSION) != 2 ||
*R(VIRTIO_MMIO_DEVICE_ID) != 2 ||
*R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){
panic("could not find virtio disk");
}
// reset device
*R(VIRTIO_MMIO_STATUS) = status;
// set ACKNOWLEDGE status bit
status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
*R(VIRTIO_MMIO_STATUS) = status;
// set DRIVER status bit
status |= VIRTIO_CONFIG_S_DRIVER;
*R(VIRTIO_MMIO_STATUS) = status;
// negotiate features
uint64_t features = *R(VIRTIO_MMIO_DEVICE_FEATURES);
features &= ~(1 << VIRTIO_BLK_F_RO);
features &= ~(1 << VIRTIO_BLK_F_SCSI);
features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
features &= ~(1 << VIRTIO_BLK_F_MQ);
features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
*R(VIRTIO_MMIO_DRIVER_FEATURES) = features;
// tell device that feature negotiation is complete.
status |= VIRTIO_CONFIG_S_FEATURES_OK;
*R(VIRTIO_MMIO_STATUS) = status;
// re-read status to ensure FEATURES_OK is set.
status = *R(VIRTIO_MMIO_STATUS);
if(!(status & VIRTIO_CONFIG_S_FEATURES_OK))
panic("virtio disk FEATURES_OK unset");
// initialize queue 0.
*R(VIRTIO_MMIO_QUEUE_SEL) = 0;
// ensure queue 0 is not in use.
if(*R(VIRTIO_MMIO_QUEUE_READY))
panic("virtio disk should not be ready");
// check maximum queue size.
uint32_t max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX);
if(max == 0)
panic("virtio disk has no queue 0");
if(max < NUM)
panic("virtio disk max queue too short");
// allocate and zero queue memory.
disk.desc = malloc(4096);
disk.avail = malloc(4096);
disk.used = malloc(4096);
if(!disk.desc || !disk.avail || !disk.used)
panic("virtio disk malloc(4096)");
_clear(disk.desc, PAGE_SIZE);
_clear(disk.avail, PAGE_SIZE);
_clear(disk.used, PAGE_SIZE);
// set queue size.
*R(VIRTIO_MMIO_QUEUE_NUM) = NUM;
// write physical addresses.
*R(VIRTIO_MMIO_QUEUE_DESC_LOW) = (uint64_t)disk.desc;
*R(VIRTIO_MMIO_QUEUE_DESC_HIGH) = (uint64_t)disk.desc >> 32;
*R(VIRTIO_MMIO_DRIVER_DESC_LOW) = (uint64_t)disk.avail;
*R(VIRTIO_MMIO_DRIVER_DESC_HIGH) = (uint64_t)disk.avail >> 32;
*R(VIRTIO_MMIO_DEVICE_DESC_LOW) = (uint64_t)disk.used;
*R(VIRTIO_MMIO_DEVICE_DESC_HIGH) = (uint64_t)disk.used >> 32;
// queue is ready.
*R(VIRTIO_MMIO_QUEUE_READY) = 0x1;
// all NUM descriptors start out unused.
for(int i = 0; i < NUM; i++)
disk.free[i] = 1;
// tell device we're completely ready.
status |= VIRTIO_CONFIG_S_DRIVER_OK;
*R(VIRTIO_MMIO_STATUS) = status;
// plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ.
}
// find a free descriptor, mark it non-free, return its index.
static int alloc_desc() {
for(int i = 0; i < NUM; i++){
if(disk.free[i]){
disk.free[i] = 0;
return i;
}
}
return -1;
}
// mark a descriptor as free.
static void free_desc(int i) {
if(i >= NUM)
panic("free_desc 1");
if(disk.free[i])
panic("free_desc 2");
disk.desc[i].addr = 0;
disk.desc[i].len = 0;
disk.desc[i].flags = 0;
disk.desc[i].next = 0;
disk.free[i] = 1;
/* wakeup(&disk.free[0]); */
}
// free a chain of descriptors.
static void free_chain(int i) {
while(1){
int flag = disk.desc[i].flags;
int nxt = disk.desc[i].next;
free_desc(i);
if(flag & VRING_DESC_F_NEXT)
i = nxt;
else
break;
}
}
// allocate three descriptors (they need not be contiguous).
// disk transfers always use three descriptors.
static int alloc3_desc(int *idx) {
for(int i = 0; i < 3; i++){
idx[i] = alloc_desc();
if(idx[i] < 0){
for(int j = 0; j < i; j++)
free_desc(idx[j]);
return -1;
}
}
return 0;
}
void virtio_disk_rw(struct buf *b, int write) {
uint64_t sector = b->blockno * (BLOCK_SIZE / 512);
lock_acquire(&disk.vdisk_lock);
// the spec's Section 5.2 says that legacy block operations use
// three descriptors: one for type/reserved/sector, one for the
// data, one for a 1-byte status result.
// allocate the three descriptors.
int idx[3];
while(1){
if(alloc3_desc(idx) == 0) {
break;
}
/* sleep(&disk.free[0], &disk.vdisk_lock); */
}
// format the three descriptors.
// qemu's virtio-blk.c reads them.
struct virtio_blk_req *buf0 = &disk.ops[idx[0]];
if(write)
buf0->type = VIRTIO_BLK_T_OUT; // write the disk
else
buf0->type = VIRTIO_BLK_T_IN; // read the disk
buf0->reserved = 0;
buf0->sector = sector;
disk.desc[idx[0]].addr = (uint64_t) buf0;
disk.desc[idx[0]].len = sizeof(struct virtio_blk_req);
disk.desc[idx[0]].flags = VRING_DESC_F_NEXT;
disk.desc[idx[0]].next = idx[1];
disk.desc[idx[1]].addr = (uint64_t) b->data;
disk.desc[idx[1]].len = BLOCK_SIZE;
if(write)
disk.desc[idx[1]].flags = 0; // device reads b->data
else
disk.desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data
disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT;
disk.desc[idx[1]].next = idx[2];
disk.info[idx[0]].status = 0xff; // device writes 0 on success
disk.desc[idx[2]].addr = (uint64_t) &disk.info[idx[0]].status;
disk.desc[idx[2]].len = 1;
disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
disk.desc[idx[2]].next = 0;
// record struct buf for virtio_disk_intr().
b->disk = 1;
disk.info[idx[0]].b = b;
// tell the device the first index in our chain of descriptors.
disk.avail->ring[disk.avail->idx % NUM] = idx[0];
__sync_synchronize();
// tell the device another avail ring entry is available.
disk.avail->idx += 1; // not % NUM ...
__sync_synchronize();
*R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
// Wait for virtio_disk_intr() to say request has finished.
while(b->disk == 1) {
/* sleep(b, &disk.vdisk_lock); */
// busy waiting
}
disk.info[idx[0]].b = 0;
free_chain(idx[0]);
lock_free(&disk.vdisk_lock);
}
void virtio_disk_intr() {
/* lock_acquire(&disk.vdisk_lock); */
// the device won't raise another interrupt until we tell it
// we've seen this interrupt, which the following line does.
// this may race with the device writing new entries to
// the "used" ring, in which case we may process the new
// completion entries in this interrupt, and have nothing to do
// in the next interrupt, which is harmless.
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
__sync_synchronize();
// the device increments disk.used->idx when it
// adds an entry to the used ring.
while(disk.used_idx != disk.used->idx){
__sync_synchronize();
int id = disk.used->ring[disk.used_idx % NUM].id;
struct buf *b = disk.info[id].b;
if(disk.info[id].status != 0) {
printf("virtio_disk_intr status failed.\n");
b->successful = 0;
} else {
b->successful = 1;
}
b->disk = 0; // disk is done with buf
/* wakeup(b); */
disk.used_idx += 1;
}
/* lock_free(&disk.vdisk_lock); */
}
```
### 修改 plic.h
追加一條:
```c=
#define VIRTIO_IRQ 1
```
### 修改 plic.c,注冊 virio 的 interrupt
```c=
void plic_init() {
int hart = r_tp();
// The "0" is reserved, and the lowest priority is "1".
*(uint32_t *)PLIC_PRIORITY(UART0_IRQ) = 1;
/* Enable UART0 */
*(uint32_t *)PLIC_MENABLE(hart) |= (1 << UART0_IRQ);
*(uint32_t *)PLIC_MENABLE(hart) |= (1 << VIRTIO_IRQ);
*(uint32_t *)(PLIC_BASE + UART0_IRQ*4) = 1;
*(uint32_t *)(PLIC_BASE + VIRTIO_IRQ*4) = 1;
/* Set priority threshold for UART0. */
*(uint32_t *)PLIC_MTHRESHOLD(hart) = 0;
}
```
### 修改 os.c,測試我們的 virtio
```c=
#include "printf.h"
#include "mem.h"
#include "time.h"
#include "trap.h"
#include "plic.h"
#include "vm.h"
#include "virtio.h"
#include "s_mode.h"
#include "string.h"
#include "usys.h"
#define BSIZE 512
void test_virtio_disk(void) {
const int test_blockno = 100;
char data[BSIZE];
struct buf b;
b.blockno = test_blockno;
printf("Reading data from VIRTIO.\n");
virtio_disk_rw(&b, 0);
memcpy(data, b.data, BSIZE);
printf("Original data at block %d: %s\n", test_blockno, data);
strcpy(b.data, "Hello, Virtio Disk!");
virtio_disk_rw(&b, 1);
memset(b.data, 0, BSIZE);
virtio_disk_rw(&b, 0);
printf("New data at block %d: %s\n", test_blockno, b.data);
printf("Disk test succeeded!\n");
}
void delay(int count) {
count *= 50000;
while (count--) {
asm volatile ("nop");
}
}
void kernel_main() {
printf("Entered S Mode!\n");
test_virtio_disk();
while(1) {
printf("OS Loop\n");
delay(10000);
};
}
void boot() {
uart_init();
page_init();
trap_init();
timer_init();
plic_init();
vm_init();
virtio_disk_init();
}
int os_main() {
boot();
switch_to_s_mode(kernel_main);
return 0;
}
```
### 測試
先創造一個 2M 的檔案當作硬碟:
```
dd if=/dev/zero of=disk.img bs=1M count=2
```
修改我們的 Makefile:
```
run: all
qemu-system-riscv64 -m 128M -M virt -serial stdio -display none -bios none -kernel kernel.img -global virtio-mmio.force-legacy=false -drive file=disk.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
```

來看看 `disk.img`:

如果我們寫一個超出硬碟 size 的位置:

## Block IO
### bio.h
```c=
#include "lock.h"
#include "string.h"
#include "mem.h"
#include "virtio.h"
#include "ds.h"
int bread(uint32_t dev, uint32_t blockno, char * buffer);
int bwrite(uint32_t dev, uint32_t blockno, const char *data);
```
### bio.c
```c=
#include "bio.h"
int bread(uint32_t dev, uint32_t blockno, char * buffer) {
struct buf *b = malloc(sizeof(struct buf));
b->dev = dev;
b->blockno = blockno;
virtio_disk_rw(b, 0);
if(b->successful == 0) return 0;
memcpy(buffer, (void *)b->data, BLOCK_SIZE);
free(b);
return 1;
}
int bwrite(uint32_t dev, uint32_t blockno, const char *data) {
struct buf *b = malloc(sizeof(struct buf));
b->dev = dev;
b->blockno = blockno;
memcpy(b->data, data, BLOCK_SIZE);
virtio_disk_rw(b, 1);
int result = b->successful;
free(b);
return result;
}
```
### os.c
```c=
void test_bio(void) {
const int test_blockno = 1;
printf("Writing \"Hello World!\" to disk.\n");
char data[1024] = "Hello World!";
int w_status = bwrite(0, 1, data);
if(w_status) {
printf("writing successful.\n");
}
memset(data, 0, BLOCK_SIZE);
int r_status = bread(0, 1, &data[0]);
if(r_status) {
printf("reading successful.\n");
printf("Read data: %s\n", data);
}
}
void kernel_main() {
printf("Entered S Mode!\n");
test_bio();
while(1) {
printf("OS Loop\n");
delay(10000);
};
}
```
## Filesystem
由於修改的 code 太多,這邊僅節錄比較重要的部分。
:::warning
這是我本人隨便寫的陽春 file system,不具備很多功能。
:::
### 新增 fs.h
```c=
#include "riscv.h"
#include "bio.h"
#include "string.h"
#define MAX_FILENAME_SIZE 256
#define DATA_SIZE (BLOCK_SIZE - sizeof(int8_t) - 3 * sizeof(int64_t))
#define MAX_BLOCK 2048
typedef struct {
/* 0 -> not initialized
* 1 -> initialized
* */
int8_t flag;
uint64_t root_directory_block; // Block number of the root directory.
unsigned char reserved[1024]; // Make sure the struct is larger than 1 block
} fs_metadata_t;
typedef struct {
/* 0 -> nothing here
* 1 -> there is an inode here
* */
uint8_t flag;
uint8_t isDirectory; // Is this inode indicating a directory?
char name[MAX_FILENAME_SIZE]; // Name of the file.
uint64_t parent; // parent inode
uint64_t starting_block; // Starting block of the file.
uint64_t file_size; // Size of the file in bytes.
uint64_t creation_time; // File creation time.
uint64_t modification_time; // Last modification time.
uint64_t access_time; // Last access time.
uint64_t addr; // The location of this inode(unit: block)
unsigned char reserved[1024]; // Make sure the struct is larger than 1 block
} inode_t;
typedef struct {
/* 0 -> nothing here
* 1 -> it is referring an inode here
* 2 -> it is the end of the list, nothing here.
* */
uint8_t flag;
uint64_t addr;
uint64_t inode_block;
uint64_t next;
unsigned char reserved[1024]; // Make sure the struct is larger than 1 block
} ilist_t;
typedef struct {
int8_t flag;
unsigned char data[DATA_SIZE];
uint64_t addr;
uint64_t prev;
uint64_t next;
} block_t;
inline size_t min(size_t a, size_t b) {
return a < b ? a : b;
}
uint64_t dalloc();
void dfree(uint64_t addr);
void format();
void dump_fs();
void dump_directory(inode_t inode, int ident);
void dump_name(inode_t inode, int ident);
int fs_init();
int fs_create(const char *path, const char *filename);
int fs_write(const char *path, const char *data, size_t size);
size_t fs_read(const char *path, char *buffer, size_t size);
int fs_mkdir(const char *path, const char *dirname);
int fs_rm(const char *path);
int fs_rmdir(const char *path);
```
### 新增 fs.c
```c=
#include "fs.h"
uint64_t dalloc() {
block_t block;
for(uint64_t i = 1; i < MAX_BLOCK; i++) {
bread(0, i, &block);
if(block.flag == 0) {
memset(&block, 0, BLOCK_SIZE);
block.flag = 1;
bwrite(0, i, &block);
return i;
}
}
return MAX_BLOCK;
}
void dfree(uint64_t addr) {
block_t block;
memset(&block, 0, BLOCK_SIZE);
bwrite(0, addr, &block);
}
void format() {
char zero_block[BLOCK_SIZE] = "";
for(uint64_t i = 0; i < MAX_BLOCK; i++) {
bwrite(0, i, &zero_block);
}
}
void dump_fs() {
fs_metadata_t superblock;
bread(0, 0, &superblock);
inode_t root;
bread(0, superblock.root_directory_block, &root);
dump_directory(root, 0);
}
void dump_directory(inode_t inode, int ident) {
ilist_t ilist;
bread(0, inode.starting_block, &ilist);
dump_name(inode, ident);
while(ilist.flag != 2) {
inode_t subInode;
bread(0, ilist.inode_block, &subInode);
if(subInode.isDirectory) dump_directory(subInode, ident + 2);
else dump_name(subInode, ident + 2);
bread(0, ilist.next, &ilist);
}
}
void dump_name(inode_t inode, int ident) {
for(int i = 0; i < ident * 2; i++) printf(" ");
if(inode.name[0] == 0)
printf("/\n");
else
printf("%s\n", inode.name);
}
inode_t* find_inode(const char *path) {
inode_t* res = malloc(sizeof(inode_t));
fs_metadata_t superblock;
bread(0, 0, &superblock);
superblock.flag = 1;
if (strcmp(path, "/") == 0) {
bread(0, superblock.root_directory_block, res);
return res;
}
inode_t currentDirectory, nextDirectory;
bread(0, superblock.root_directory_block, ¤tDirectory);
char *temp_path = strdup(path);
char *token = strtok(temp_path, "/");
while (token != NULL) {
ilist_t ilist;
bread(0, currentDirectory.starting_block, &ilist);
while (ilist.flag != 2) {
bread(0, ilist.inode_block, &nextDirectory);
/* printf("%d %d %d, token = %s, name = %s, %d\n", ilist.flag, ilist.addr, ilist.next, token, nextDirectory.name, strcmp(nextDirectory.name, token)); */
if (nextDirectory.flag && strcmp(nextDirectory.name, token) == 0) {
break;
}
bread(0, ilist.next, &ilist);
}
if(ilist.flag == 2)
return NULL;
token = strtok(NULL, "/");
if (token) {
currentDirectory = nextDirectory;
}
}
free(temp_path);
memcpy(res, &nextDirectory, sizeof(inode_t));
return res;
}
int fs_init() {
fs_metadata_t superblock;
bread(0, 0, &superblock);
if(superblock.flag == 1) return -1;
// init metadata
uint64_t rootBlock = dalloc();
memset(&superblock, 0, sizeof(fs_metadata_t));
superblock.flag = 1;
superblock.root_directory_block = rootBlock;
if(bwrite(0, 0, &superblock))
return -2;
// init root directory inode
uint64_t rootList = dalloc();
inode_t rootDirectory;
memset(&rootDirectory, 0, sizeof(inode_t));
rootDirectory.flag = 1;
strcpy(rootDirectory.name, "");
rootDirectory.parent = rootBlock;
rootDirectory.starting_block = rootList;
rootDirectory.isDirectory = 1;
rootDirectory.addr = rootBlock;
if(bwrite(0, rootBlock, &rootDirectory))
return -2;
// init root directory list node
ilist_t rootDirectoryList;
memset(&rootDirectoryList, 0, sizeof(ilist_t));
rootDirectoryList.flag = 2; // mark the list is empty
rootDirectoryList.addr = rootList;
if(bwrite(0, rootList, &rootDirectoryList))
return -2;
return 0;
}
int fs_create(const char *path, const char *filename) {
// Step 1: Find the target directory using the provided path.
inode_t* parentDir = find_inode(path);
if (parentDir == NULL || parentDir->flag == 0 || parentDir->isDirectory == 0) {
return -1; // Error: Invalid directory or path not found.
}
// Step 2: Check if a file with the same name already exists in the directory.
ilist_t ilist;
bread(0, parentDir->starting_block, &ilist);
while (ilist.flag != 2) {
inode_t inode;
bread(0, ilist.inode_block, &inode);
if (inode.flag && !inode.isDirectory && strcmp(inode.name, filename) == 0) {
return -2; // Error: File with the same name already exists.
}
bread(0, ilist.next, &ilist);
}
// Step 3: Allocate an inode and a data block for the new file.
uint64_t newInodeBlock = dalloc();
uint64_t newDataBlock = dalloc();
if (newInodeBlock == MAX_BLOCK || newDataBlock == MAX_BLOCK) {
return -3; // Error: No space left on the device.
}
// Step 4: Write the metadata for the new file into the allocated inode.
inode_t newFile;
memset(&newFile, 0, sizeof(inode_t));
newFile.flag = 1;
newFile.addr = newInodeBlock;
strncpy(newFile.name, filename, MAX_FILENAME_SIZE);
newFile.parent = parentDir->addr;
newFile.starting_block = newDataBlock;
if (bwrite(0, newInodeBlock, &newFile)) {
return -4; // Error: Failed to write the new inode to disk.
}
// Step 5: Update the current block.
block_t newBlock;
memset(&newBlock, 0, sizeof(block_t));
newBlock.flag = 1;
newBlock.addr = newDataBlock;
newBlock.next = 0;
newBlock.prev = 0;
memset(newBlock.data, 0, DATA_SIZE);
if (bwrite(0, newDataBlock, &newBlock)) {
return -4; // Error: Failed to write the new inode to disk.
}
// Step 6: Update the parent directory to include a reference to the new file.
ilist_t newIlist;
memset(&newIlist, 0, sizeof(ilist_t));
newIlist.flag = 2;
bread(0, parentDir->starting_block, &ilist);
while (ilist.flag != 2) {
bread(0, ilist.next, &ilist);
}
newIlist.flag = 2;
newIlist.addr = dalloc();
ilist.flag = 1;
ilist.inode_block = newInodeBlock;
ilist.next = newIlist.addr;
if (bwrite(0, ilist.addr, &ilist)) {
return -5; // Error: Failed to update the parent directory.
}
if (bwrite(0, newIlist.addr, &newIlist)) {
return -6; // Error: Failed to update the parent directory.
}
free(parentDir);
return 0;
}
int fs_write(const char *path, const char *data, size_t size) {
// Step 1: Find the target file using the provided path and filename.
inode_t* targetFile = find_inode(path);
if (targetFile == NULL || !targetFile->flag || targetFile->isDirectory) {
return -1; // Error: Invalid file or file not found.
}
// Step 2: Write the data to the target file's data block.
block_t block;
bread(0, targetFile->starting_block, &block);
size_t remain_size = size;
size_t offset = 0;
while (remain_size) {
size_t copy_size = remain_size < DATA_SIZE ? remain_size : DATA_SIZE;
remain_size -= copy_size;
/* printf("write %d %d %d, rm_size %d, cp_size %d\n", block.flag, block.addr, block.next, remain_size, copy_size); */
memcpy(block.data, data + offset, copy_size);
if(remain_size) {
if(block.next == 0) {
block_t newBlock;
newBlock.flag = 1;
newBlock.addr = dalloc();
newBlock.prev = block.addr;
newBlock.next = 0;
block.next = newBlock.addr;
bwrite(0, block.addr, &block);
block = newBlock;
/* printf("%s", buffer); */
} else {
bwrite(0, block.addr, &block);
bread(0, block.next, &block);
}
} else {
bwrite(0, block.addr, &block);
}
offset += copy_size;
}
// Step 3: Update the file size in the inode (assuming you have a file size field in your inode structure).
targetFile->file_size = size; // Uncomment this line if you have a size field in your inode structure.
bwrite(0, targetFile->addr, targetFile); // Save the updated inode to disk.
free(targetFile);
// Step 4: Return success.
return 0;
}
size_t fs_read(const char *path, char *buffer, size_t size) {
// Step 1: Find the target file using the provided path.
inode_t* targetFile = find_inode(path);
if (targetFile == NULL || !targetFile->flag || targetFile->isDirectory) {
return -1; // Error: Invalid file or file not found.
}
// Step 2: Check if the requested size is more than the file's size.
// If yes, update the size to be read to the file's size.
if (size > targetFile->file_size) {
size = targetFile->file_size;
}
// Step 3: Read the data from the target file's data blocks.
block_t block;
bread(0, targetFile->starting_block, &block);
size_t total_read = 0;
while (size > 0) {
/* printf("%d %d %d\n", block.flag, block.addr, block.next); */
size_t bytes_to_read = size < DATA_SIZE ? size : DATA_SIZE;
/* printf("%lld, writing %lld\n", (long long)size, (long long)bytes_to_read); */
memcpy(buffer + total_read, block.data, bytes_to_read);
total_read += bytes_to_read;
size -= bytes_to_read;
// If there's more to read, move on to the next block.
if (size) {
if (block.next == 0) {
// No more blocks to read.
break;
}
bread(0, block.next, &block);
}
}
free(targetFile);
// Step 4: Return the total number of bytes read.
return total_read;
}
int fs_mkdir(const char *path, const char *dirname) {
// Step 1: Find the target directory using the provided path.
inode_t* parentDir = find_inode(path);
if (path == NULL || parentDir->flag == 0 || parentDir->isDirectory == 0) {
return -1; // Error: Invalid directory or path not found.
}
// Step 2: Check if a directory with the same name already exists in the directory.
ilist_t ilist;
bread(0, parentDir->starting_block, &ilist);
while (ilist.flag != 2) {
inode_t inode;
bread(0, ilist.inode_block, &inode);
if (inode.flag && inode.isDirectory && strcmp(inode.name, dirname) == 0) {
return -2; // Error: Directory with the same name already exists.
}
bread(0, ilist.next, &ilist);
}
// Step 3: Allocate an inode and a data block for the new directory.
uint64_t newInodeBlock = dalloc();
uint64_t newListBlock = dalloc();
if (newInodeBlock == MAX_BLOCK || newListBlock == MAX_BLOCK) {
return -3; // Error: No space left on the device.
}
// Step 4: Write the metadata for the new directory into the allocated inode.
inode_t newDir;
memset(&newDir, 0, sizeof(inode_t));
newDir.flag = 1;
newDir.addr = newInodeBlock;
strncpy(newDir.name, dirname, MAX_FILENAME_SIZE);
newDir.parent = parentDir->addr;
newDir.starting_block = newListBlock;
newDir.isDirectory = 1;
if (bwrite(0, newInodeBlock, &newDir)) {
return -4; // Error: Failed to write the new inode to disk.
}
// Step 5: Initialize the directory list node for the new directory as empty.
ilist_t newDirList;
memset(&newDirList, 0, sizeof(ilist_t));
newDirList.flag = 2; // mark the list as empty
newDirList.addr = newListBlock;
if (bwrite(0, newListBlock, &newDirList)) {
return -5; // Error: Failed to write the new directory list to disk.
}
// Step 6: Update the parent directory to include a reference to the new directory.
ilist_t newIlist;
memset(&newIlist, 0, sizeof(ilist_t));
newIlist.flag = 2;
bread(0, parentDir->starting_block, &ilist);
while (ilist.flag != 2) {
bread(0, ilist.next, &ilist);
}
newIlist.flag = 2;
newIlist.addr = dalloc();
ilist.flag = 1;
ilist.inode_block = newInodeBlock;
ilist.next = newIlist.addr;
if (bwrite(0, ilist.addr, &ilist)) {
return -6; // Error: Failed to update the parent directory.
}
if (bwrite(0, newIlist.addr, &newIlist)) {
return -7; // Error: Failed to update the parent directory.
}
free(parentDir);
return 0;
}
int fs_rm(const char *path) {
// Step 1: Find the target file using the provided path.
inode_t* targetFile = find_inode(path);
if (targetFile == NULL || !targetFile->flag || targetFile->isDirectory) {
free(targetFile);
return -1; // Error: Invalid file or file not found.
}
// Step 2: Clear the file's data blocks.
block_t block;
bread(0, targetFile->starting_block, &block);
while (block.flag) {
uint64_t next = block.next;
dfree(block.addr);
if (next == 0) break;
bread(0, next, &block);
}
// Step 3: Remove the inode reference from the parent directory list.
inode_t parentDir;
bread(0, targetFile->parent, &parentDir);
ilist_t ilist, prev_ilist;
memset(&prev_ilist, 0, sizeof(ilist_t));
bread(0, parentDir.starting_block, &ilist);
while (ilist.flag != 2) {
if (ilist.inode_block == targetFile->addr) {
if (prev_ilist.flag) {
prev_ilist.next = ilist.next;
bwrite(0, prev_ilist.addr, &prev_ilist);
} else {
parentDir.starting_block = ilist.next;
bwrite(0, parentDir.addr, &parentDir);
}
dfree(ilist.addr);
break;
}
prev_ilist = ilist;
bread(0, ilist.next, &ilist);
}
// Step 4: Free the target file's inode block.
dfree(targetFile->addr);
free(targetFile);
return 0;
}
int fs_rmdir(const char *path) {
// Step 1: Find the target directory using the provided path.
inode_t* targetDir = find_inode(path);
if (targetDir == NULL || !targetDir->flag || !targetDir->isDirectory) {
free(targetDir);
return -1; // Error: Invalid directory or directory not found.
}
// Step 2: Ensure the directory is empty.
ilist_t ilist;
bread(0, targetDir->starting_block, &ilist);
if (ilist.flag != 2) {
free(targetDir);
return -2; // Error: Directory is not empty.
}
// Step 3: Remove the inode reference from the parent directory list.
inode_t parentDir;
bread(0, targetDir->parent, &parentDir);
ilist_t prev_ilist;
memset(&prev_ilist, 0, sizeof(ilist_t));
bread(0, parentDir.starting_block, &ilist);
while (ilist.flag != 2) {
if (ilist.inode_block == targetDir->addr) {
if (prev_ilist.flag) {
prev_ilist.next = ilist.next;
bwrite(0, prev_ilist.addr, &prev_ilist);
} else {
parentDir.starting_block = ilist.next;
bwrite(0, parentDir.addr, &parentDir);
}
dfree(ilist.addr);
break;
}
prev_ilist = ilist;
bread(0, ilist.next, &ilist);
}
// Step 4: Free the target directory's inode block.
dfree(targetDir->addr);
free(targetDir);
return 0;
}
```
### 自定義 file system 簡單介紹
基礎結構:
1. inode_t:inode 是文件系統中存儲文件或目錄的元數據(metadata)的結構。這包括名稱、大小、數據所在的實際塊的位置等。
2. ilist_t:這是一個鏈接節點列表,用於將多個 inode 連接在一起,用在目錄中。
3. block_t:這代表了存儲在磁盤上的一個塊。它包含了一個 flag 用於標記塊是否被使用,以及數據本身。
4. fs_metadata_t:這是文件系統的 superblock,存儲文件系統的基本信息,如根目錄的位置。
函式:
1. dalloc 通過搜索第一個未使用的塊來動態分配一個新的塊。
2. dfree 將一個塊設置為未使用的狀態。
3. fs_init 初始化文件系統。如果文件系統尚未初始化,則此函數會設置 superblock 和創建根目錄。
4. find_inode 基於給定的路徑找到對應的 inode
5. fs_create 在指定的路徑創建一個新文件。此函數將檢查所需名稱的文件是否已存在,如果不存在,則將分配新的 inode 和數據塊並更新相應的目錄結構。
6. fs_mkdir 在指定的路徑創建一個新目錄。操作過程與 fs_create 類似,但結果是創建目錄而不是文件。
7. fs_write 將數據寫入指定文件。如果文件當前的塊不足以容納所有數據,此函數將分配更多的塊。
8. fs_read 從指定文件讀取數據。
### os.c
```c=
void test_fs() {
dump_fs();
printf("creating directory test1_dir %d\n", fs_mkdir("/", "test1_dir"));
printf("creating directory test2_dir %d\n", fs_mkdir("/", "test2_dir"));
printf("creating directory test1_dir/test3_dir %d\n", fs_mkdir("/test1_dir", "test3_dir"));
dump_fs();
printf("creating file test1 %d\n", fs_create("/test2_dir", "test1"));
printf("creating file test2 %d\n", fs_create("/test2_dir", "test2"));
printf("creating file test3 %d\n", fs_create("/test2_dir/", "test3"));
char data[] = "Hello World!";
printf("writing to test2_dir/test2: ");
printf("%d\n", fs_write("/test2_dir/test2", data, sizeof(data)));
char res[2048];
memset(res, 0, 2048);
printf("READ RESULT %d; ", fs_read("/test2_dir/test2", res, 2048));
printf("DATA: %s\n", res);
dump_fs();
printf("remove file test2 %d\n", fs_rm("/test2_dir/test2"));
dump_fs();
printf("remove directory test3_dir %d\n", fs_rmdir("/test1_dir/test3_dir/"));
dump_fs();
printf("remove a non-existing file %d\n", fs_rm("/test2_dir/test4"));
printf("remove a non-existing directory %d\n", fs_rmdir("/test3_dir/"));
printf("remove all files and directories\n");
fs_rm("/test2_dir/test1");
fs_rm("/test2_dir/test3");
fs_rmdir("/test1_dir");
fs_rmdir("/test2_dir");
fs_rmdir("/test3_dir");
dump_fs();
}
void kernel_main() {
printf("Entered S Mode!\n");
test_fs();
while(1) {
printf("OS Loop\n");
delay(10000);
};
}
```
## mkfs
調整了很多東西,參考 5-4-make-file-system,僅節錄重要片段。
```c=
#include "fs.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#define FILE_NAME "disk.img"
#define MAX_SIZE 2048
int bwrite(int dev, uint64_t no, void *buffer) {
FILE *fp = fopen(FILE_NAME, "rb+");
if (!fp) {
perror("Error opening file");
return -1;
}
fseek(fp, no * BLOCK_SIZE, SEEK_SET);
printf("writing to %ld, length = %d\n", no * BLOCK_SIZE, BLOCK_SIZE);
size_t writeSize = fwrite((char*)buffer, 1, BLOCK_SIZE, fp);
fclose(fp);
if (writeSize != BLOCK_SIZE) {
perror("Error writing block");
return -1;
}
return 0; // 成功
}
int bread(int dev, uint64_t no, void *buffer) {
FILE *fp = fopen(FILE_NAME, "rb");
if (!fp) {
perror("Error opening file");
return -1;
}
fseek(fp, no * BLOCK_SIZE, SEEK_SET);
size_t readSize = fread((char*)buffer, 1, BLOCK_SIZE, fp);
fclose(fp);
if (readSize != BLOCK_SIZE) {
perror("Error reading block");
return -1;
}
return 0;
}
uint64_t dalloc() {
char buffer[BLOCK_SIZE];
block_t temp;
memset(&temp, 1, sizeof(block_t));
uint64_t addr = 1;
while (bread(0, addr, buffer) == 0) {
int allZero = 1;
for (int32_t i = 0; i < BLOCK_SIZE; i++) {
if (buffer[i] != 0) {
allZero = 0;
break;
}
}
if (allZero) {
printf("alloc %ld\n", addr);
printf("%d\n", bwrite(0, addr, &temp));
return addr;
}
addr++;
}
return MAX_SIZE;
}
void dfree(uint64_t addr) {
FILE *fp = fopen(FILE_NAME, "r+b");
if (!fp) {
perror("Error opening file");
return;
}
char buffer[BLOCK_SIZE];
memset(buffer, 0, BLOCK_SIZE);
fseek(fp, addr * BLOCK_SIZE, SEEK_SET);
fwrite(buffer, 1, BLOCK_SIZE, fp);
fclose(fp);
}
void addFile(const char *filename) {
FILE *file = fopen(filename, "rb");
if (!file) {
perror("Error opening file");
return;
}
// Determine the file size
fseek(file, 0, SEEK_END);
size_t size = ftell(file);
fseek(file, 0, SEEK_SET);
// Allocate memory for the file content
char *buffer = (char *)malloc(size);
if (!buffer) {
perror("Memory allocation failed");
fclose(file);
return;
}
// Read file into the buffer
size_t bytesRead = fread(buffer, 1, size, file);
fclose(file);
if (bytesRead != size) {
perror("Error reading the file");
free(buffer);
return;
}
/* printf("->%d\n", fs_create("/", "test")); */
fs_create("/", filename);
char path[257] = "/";
strcpy(path + 1, filename);
printf("Add %s\n", filename);
fs_write(path, buffer, size);
free(buffer);
}
int main(int argc, char *argv[]) {
if(fs_init() == 0) {
for (int i = 1; i < argc; i++) {
addFile(argv[i]);
}
dump_fs();
} else {
printf("not empty disk.\n");
}
return 0;
}
```
使用方式:`mkfs/mkfs A B C D。
## 參考
1. ChatGPT
2. [xv6-riscv](https://github.com/mit-pdos/xv6-riscv/tree/riscv)
3. [mini-riscv-os Docs](https://github.com/cccriscv/mini-riscv-os/blob/70443b0ab3bf186c47b42127a0ffe31ad417fb91/doc/tw/08-BlockDeviceDriver.md)
4. [mythili-cs347_autumn2016](https://www.cse.iitb.ac.in/~mythili/teaching/cs347_autumn2016/)