# Build RISC-V Instruction Set Simulator from scratch > 黃詠筑, 劉欣宜 [GitHub](https://github.com/ychu0406/riscv_simulator) ## Mission Task: Develop a RISC-V instruction set simulator from scratch - Based on the foundation from [Problem A](https://hackmd.io/@sysprog/arch2024-quiz3-sol#Problem-A), gradually expand it to support ``RV32I``, ``RV32M``, and ``RV32C``, and pass the tests from [riscv-tests](https://github.com/riscv-software-src/riscv-tests). Additionally, select at least five RISC-V programs from Quiz 1 to Quiz 6, rewrite them, and ensure they work correctly on the developed RISC-V instruction set simulator. ## RV32I, RV32M, and RV32C **1. RV32I:** This is the basic version of the RISC-V instruction set, supporting 32-bit operations. It includes the core instructions of the RISC-V instruction set, such as arithmetic operations, logical operations, and control flow. RV32I is the foundation of the RISC-V architecture, and other extensions are built upon it. **2. RV32M:** This is an extended version of RV32I, adding support for multiplication and division instructions. The RV32M extension provides M instructions that allow the processor to perform more efficient multiplication (MUL) and division (DIV) operations. **3. RV32C:** This is another extension of RV32I that provides a compressed instruction set. The C extension adds smaller, more compact instructions to reduce program size and improve performance, especially in embedded systems and resource-constrained devices. These instructions are usually simplified versions of RV32I instructions and help save storage space while enhancing execution efficiency. ## Quiz3 Problem A :::danger Summarize the program, addressing its purpose and behavior. ::: This code primarily covers the basic instruction set of ``RV32I`` and implements support for instructions like ADDI, LUI, AUIPC, etc. It does not implement the multiplication and division instructions of ``RV32M``, nor does it include the compressed instruction set of ``RV32C``. To extend it to ``RV32M`` or ``RV32C``, the corresponding instruction handling logic would need to be added. ```c= #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #define fatal(args...) \ do { \ printf("Fatal: "); \ printf(args); \ exit(-1); \ } while (0) enum { /* I-type integer computation or integer register-immediate */ INTEGER_COMP_RI = 0x13, LUI = 0x37 AUIPC = 0x17 /* ECALL and EBREAK */ ECALL = 0x73, }; /* FUNCT3 for INTEGER_COMPI_RI */ enum { ADDI = 0x0, }; enum { OPCODE_MASK = 0x7F, REG_ADDR_MASK = 0x1F }; enum { RD_SHIFT = 7, RS1_SHIFT = 15, RS2_SHIFT = 20, FUNCT3_SHIFT = 12, FUNCT7_SHIFT = 25 }; enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F }; enum { RAM_SIZE = 1204 * 1024 * 2, /* 2 MiB */ RAM_BASE = 0x0, }; typedef struct { uint8_t *mem; } ram_t; typedef struct { uint32_t regs[32]; size_t pc; ram_t *ram; } cpu_t; typedef struct { uint8_t opcode; uint8_t rd, rs1, rs2, funct3, funct7; } insn_t; ram_t *ram_new(uint8_t *code, size_t len) { ram_t *ram = malloc(sizeof(ram_t)); ram->mem = malloc(RAM_SIZE); memcpy(ram->mem, code, len); return ram; } void ram_free(ram_t *mem) { free(mem->mem); free(mem); } static inline void check_addr(int32_t addr) { size_t index = addr - RAM_BASE; if (index >= RAM_SIZE) fatal("out of memory.\n"); } static uint32_t ram_load8(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return (uint32_t) mem->mem[index]; } static uint32_t ram_load16(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8)); } static uint32_t ram_load32(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) | ((uint32_t) mem->mem[index + 2] << 16) | ((uint32_t) mem->mem[index + 3] << 24)); } uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size) { check_addr(addr); uint32_t r = 0; switch (size) { case 8: r = ram_load8(mem, addr); break; case 16: r = ram_load16(mem, addr); break; case 32: r = ram_load32(mem, addr); break; default: fatal("RAM: invalid load size(%d)\n", size); } return r; } cpu_t *cpu_new(uint8_t *code, size_t len) { cpu_t *cpu = malloc(sizeof(cpu_t)); memset(cpu->regs, 0, sizeof(cpu->regs)); cpu->regs[0] = 0; /* Stack pointer */ cpu->regs[2] = RAM_BASE + RAM_SIZE; cpu->pc = RAM_BASE; cpu->ram = ram_new(code, len); return cpu; } void cpu_free(cpu_t *cpu) { ram_free(cpu->ram); free(cpu); } uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size) { return ram_load(cpu->ram, addr, size); } uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); } void cpu_decode(uint32_t raw, insn_t *inst) { uint8_t opcode = raw & OPCODE_MASK; uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK; uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK; uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK; uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK; uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK; inst->opcode = opcode; inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2; inst->funct3 = funct3; inst->funct7 = funct7; } static inline int32_t i_imm(uint32_t raw) { /* imm[11:0] = inst[31:20] */ return ((int32_t) raw) >> 20; } static inline int32_t u_imm(uint32_t raw) { /* imm[20:0] = inst[31:12] */ return ((int32_t) raw & 0xFFFFF000); } enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 }; enum { STDOUT = 1, STDERR = 2 }; void ecall_handler(cpu_t *cpu) { uint32_t syscall_nr = cpu->regs[17]; switch (syscall_nr) { case SYSCALL_WRITE: { uint32_t fd = cpu->regs[10]; uint32_t addr = cpu->regs[11]; uint32_t count = cpu->regs[12]; FILE *stream; switch (fd) { case STDOUT: stream = stdout; break; case STDERR: stream = stderr; break; default: fatal("invalid file-descriptor %d.\n", fd); } for (uint32_t i = 0; i < count; i++) fprintf(stream, "%c", cpu_load(cpu, addr + i, 8)); break; } case SYSCALL_EXIT: { int32_t exit_code = cpu->regs[10]; exit(exit_code); break; } default: fatal("unkown syscall number %d.\n", syscall_nr); } } static insn_t inst; void cpu_execute(cpu_t *cpu, uint32_t raw) { cpu_decode(raw, &inst); cpu->regs[0] = 0; switch (inst.opcode) { case INTEGER_COMP_RI: { int32_t imm = i_imm(raw); switch (inst.funct3) { case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break; default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI"); } break; } case LUI: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = imm; break; } case AUIPC: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm; break; } case ECALL: if (raw == 0x100073) break; /* EBREAK */ ecall_handler(cpu); break; default: fatal("Illegal Instruction 0x%x\n", inst.opcode); } } int main(int argc, char *argv[]) { if (argc < 2) { printf("Usage: %s [filename]\n", argv[0]); exit(-1); } const char *filename = argv[1]; FILE *file = fopen(filename, "rb"); if (!file) { printf("Failed to open %s\n", filename); exit(-1); } fseek(file, 0L, SEEK_END); size_t code_size = ftell(file); rewind(file); uint8_t *code = malloc(code_size); fread(code, sizeof(uint8_t), code_size, file); fclose(file); cpu_t *cpu = cpu_new(code, code_size); while (1) { uint32_t raw = cpu_fetch(cpu); cpu->pc += 4; if (cpu->pc > code_size) break; cpu_execute(cpu, raw); if (!cpu->pc) break; } cpu_free(cpu); return 0; } ``` 1. Add support for ``RV32M`` extension instructions: * ``RV32M`` provides multiplication and division instructions, such as ``MUL``, ``MULH``, ``DIV``, ``DIVU``, ``REM``, and ``REMU``. * These instructions need to be handled during the decode and execution stages. 2. Add support for ``RV32C`` extension instructions: * ``RV32C`` is a compressed instruction set that includes 16-bit instructions. These instructions need to be handled separately and identified as compressed instructions during the decode stage. * ``RV32C`` instructions typically use fewer bits for encoding and can be aligned to 16 bits, so additional checks and processing are required. ```c= #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #define fatal(args...) \ do { \ printf("Fatal: "); \ printf(args); \ exit(-1); \ } while (0) enum { /* I-type integer computation or integer register-immediate */ INTEGER_COMP_RI = 0x13, LUI = 0x37, AUIPC = 0x17, ECALL = 0x73, /* RV32M extension */ MUL = 0x33, // Multiply MULH = 0x33, // Multiply high DIV = 0x33, // Divide DIVU = 0x33, // Divide unsigned REM = 0x33, // Remainder REMU = 0x33, // Remainder unsigned /* RV32C extension */ C_ADDI4SPN = 0x10, // Compressed ADDI4SPN C_LW = 0x00, // Compressed Load Word C_SW = 0x01, // Compressed Store Word }; /* FUNCT3 for INTEGER_COMP_RI */ enum { ADDI = 0x0, MUL_FUNCT3 = 0x0, // For RV32M MUL DIV_FUNCT3 = 0x4, // For RV32M DIV REM_FUNCT3 = 0x5, // For RV32M REM }; /* RV32M MUL */ enum { MUL_FUNCT7 = 0x1, MULH_FUNCT7 = 0x2, }; /* FUNCT3 mask */ enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F }; enum { RAM_SIZE = 1204 * 1024 * 2, /* 2 MiB */ RAM_BASE = 0x0, }; typedef struct { uint8_t *mem; } ram_t; typedef struct { uint32_t regs[32]; size_t pc; ram_t *ram; } cpu_t; typedef struct { uint8_t opcode; uint8_t rd, rs1, rs2, funct3, funct7; } insn_t; ram_t *ram_new(uint8_t *code, size_t len) { ram_t *ram = malloc(sizeof(ram_t)); ram->mem = malloc(RAM_SIZE); memcpy(ram->mem, code, len); return ram; } void ram_free(ram_t *mem) { free(mem->mem); free(mem); } static inline void check_addr(int32_t addr) { size_t index = addr - RAM_BASE; if (index >= RAM_SIZE) fatal("out of memory.\n"); } static uint32_t ram_load8(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return (uint32_t) mem->mem[index]; } static uint32_t ram_load16(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8)); } static uint32_t ram_load32(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) | ((uint32_t) mem->mem[index + 2] << 16) | ((uint32_t) mem->mem[index + 3] << 24)); } uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size) { check_addr(addr); uint32_t r = 0; switch (size) { case 8: r = ram_load8(mem, addr); break; case 16: r = ram_load16(mem, addr); break; case 32: r = ram_load32(mem, addr); break; default: fatal("RAM: invalid load size(%d)\n", size); } return r; } cpu_t *cpu_new(uint8_t *code, size_t len) { cpu_t *cpu = malloc(sizeof(cpu_t)); memset(cpu->regs, 0, sizeof(cpu->regs)); cpu->regs[0] = 0; /* Stack pointer */ cpu->regs[2] = RAM_BASE + RAM_SIZE; cpu->pc = RAM_BASE; cpu->ram = ram_new(code, len); return cpu; } void cpu_free(cpu_t *cpu) { ram_free(cpu->ram); free(cpu); } uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size) { return ram_load(cpu->ram, addr, size); } uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); } void cpu_decode(uint32_t raw, insn_t *inst) { uint8_t opcode = raw & OPCODE_MASK; uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK; uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK; uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK; uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK; uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK; inst->opcode = opcode; inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2; inst->funct3 = funct3; inst->funct7 = funct7; } static inline int32_t i_imm(uint32_t raw) { /* imm[11:0] = inst[31:20] */ return ((int32_t) raw) >> 20; } static inline int32_t u_imm(uint32_t raw) { /* imm[20:0] = inst[31:12] */ return ((int32_t) raw & 0xFFFFF000); } enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 }; enum { STDOUT = 1, STDERR = 2 }; void ecall_handler(cpu_t *cpu) { uint32_t syscall_nr = cpu->regs[17]; switch (syscall_nr) { case SYSCALL_WRITE: { uint32_t fd = cpu->regs[10]; uint32_t addr = cpu->regs[11]; uint32_t count = cpu->regs[12]; FILE *stream; switch (fd) { case STDOUT: stream = stdout; break; case STDERR: stream = stderr; break; default: fatal("invalid file-descriptor %d.\n", fd); } for (uint32_t i = 0; i < count; i++) fprintf(stream, "%c", cpu_load(cpu, addr + i, 8)); break; } case SYSCALL_EXIT: { int32_t exit_code = cpu->regs[10]; exit(exit_code); break; } default: fatal("unkown syscall number %d.\n", syscall_nr); } } static insn_t inst; void cpu_execute(cpu_t *cpu, uint32_t raw) { cpu_decode(raw, &inst); cpu->regs[0] = 0; switch (inst.opcode) { case INTEGER_COMP_RI: { int32_t imm = i_imm(raw); switch (inst.funct3) { case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break; default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI"); } break; } case MUL: switch (inst.funct7) { case MUL_FUNCT7: cpu->regs[inst.rd] = cpu->regs[inst.rs1] * cpu->regs[inst.rs2]; break; default: fatal("Unknown FUNCT7 for MUL"); } break; case DIV: switch (inst.funct3) { case DIV_FUNCT3: cpu->regs[inst.rd] = cpu->regs[inst.rs1] / cpu->regs[inst.rs2]; break; default: fatal("Unknown FUNCT3 for DIV"); } break; case REM: switch (inst.funct3) { case REM_FUNCT3: cpu->regs[inst.rd] = cpu->regs[inst.rs1] % cpu->regs[inst.rs2]; break; default: fatal("Unknown FUNCT3 for REM"); } break; case MULH: switch (inst.funct7) { case MULH_FUNCT7: cpu->regs[inst.rd] = (int32_t) cpu->regs[inst.rs1] * (int32_t) cpu->regs[inst.rs2] >> 32; break; default: fatal("Unknown FUNCT7 for MULH"); } break; case DIVU: switch (inst.funct3) { case 0x4: cpu->regs[inst.rd] = (uint32_t) cpu->regs[inst.rs1] / (uint32_t) cpu->regs[inst.rs2]; break; default: fatal("Unknown FUNCT3 for DIVU"); } break; case REMU: switch (inst.funct3) { case 0x5: cpu->regs[inst.rd] = (uint32_t) cpu->regs[inst.rs1] % (uint32_t) cpu->regs[inst.rs2]; break; default: fatal("Unknown FUNCT3 for REMU"); } break; case LUI: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = imm; break; } case AUIPC: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm; break; } case ECALL: if (raw == 0x100073) break; /* EBREAK */ ecall_handler(cpu); break; default: fatal("Illegal Instruction 0x%x\n", inst.opcode); } } int main(int argc, char *argv[]) { if (argc < 2) { printf("Usage: %s [filename]\n", argv[0]); exit(-1); } const char *filename = argv[1]; FILE *file = fopen(filename, "rb"); if (!file) { printf("Failed to open %s\n", filename); exit(-1); } fseek(file, 0L, SEEK_END); size_t code_size = ftell(file); rewind(file); uint8_t *code = malloc(code_size); fread(code, sizeof(uint8_t), code_size, file); fclose(file); cpu_t *cpu = cpu_new(code, code_size); while (1) { uint32_t raw = cpu_fetch(cpu); cpu->pc += 4; if (cpu->pc > code_size) break; cpu_execute(cpu, raw); if (!cpu->pc) break; } cpu_free(cpu); return 0; } ``` ### Makefile ```md= CC = gcc CFLAGS = -g -Wall SRC = filename.c OBJ = filename.o EXEC = riscv_simulator all: $(EXEC) $(EXEC): $(OBJ) $(CC) $(OBJ) -o $(EXEC) $(OBJ): $(SRC) $(CC) $(CFLAGS) -c $(SRC) clean: rm -f $(OBJ) $(EXEC) ``` - `make` ### Build_and_run.sh ```sh= if [ -z "$1" ]; then echo "Usage: $0 <source_file.s>" exit 1 fi SRC_FILE=$1 BASENAME=$(basename "$SRC_FILE" .s) OBJ_FILE="${BASENAME}.o" ELF_FILE="${BASENAME}.elf" riscv64-unknown-elf-as -o $OBJ_FILE $SRC_FILE riscv64-unknown-elf-ld -o $ELF_FILE $OBJ_FILE ./riscv_simulator $ELF_FILE ``` - `chmod +x build_and_run.sh` - `./build_and_run.sh filename.s` ### riscv.c ```c= #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #define fatal(args...) \ do { \ printf("Fatal: "); \ printf(args); \ exit(-1); \ } while (0) enum { /* I-type integer computation or integer register-immediate */ INTEGER_COMP_RI = 0x13, LUI = 0x37, AUIPC = 0x17, ECALL = 0x73, }; enum { ADDI = 0x0, /* RV32M opcodes */ MUL = 0x0, MULH = 0x1, DIV = 0x4, DIVU = 0x5, REM = 0x6, REMU = 0x7, }; /* FUNCT3 for INTEGER_COMP_RI */ enum { OPCODE_MASK = 0x7F, REG_ADDR_MASK = 0x1F }; enum { RD_SHIFT = 7, RS1_SHIFT = 15, RS2_SHIFT = 20, FUNCT3_SHIFT = 12, FUNCT7_SHIFT = 25 }; enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F }; enum { RAM_SIZE = 1204 * 1024 * 2, RAM_BASE = 0x0 }; typedef struct { uint8_t *mem; } ram_t; typedef struct { uint32_t regs[32]; size_t pc; ram_t *ram; } cpu_t; typedef struct { uint8_t opcode; uint8_t rd, rs1, rs2, funct3, funct7; } insn_t; ram_t *ram_new(uint8_t *code, size_t len) { ram_t *ram = malloc(sizeof(ram_t)); ram->mem = malloc(RAM_SIZE); memcpy(ram->mem, code, len); return ram; } void ram_free(ram_t *mem) { free(mem->mem); free(mem); } static inline void check_addr(int32_t addr) { size_t index = addr - RAM_BASE; if (index >= RAM_SIZE) fatal("out of memory.\n"); } static uint32_t ram_load8(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return (uint32_t) mem->mem[index]; } static uint32_t ram_load16(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8)); } static uint32_t ram_load32(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) | ((uint32_t) mem->mem[index + 2] << 16) | ((uint32_t) mem->mem[index + 3] << 24)); } uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size) { check_addr(addr); uint32_t r = 0; switch (size) { case 8: r = ram_load8(mem, addr); break; case 16: r = ram_load16(mem, addr); break; case 32: r = ram_load32(mem, addr); break; default: fatal("RAM: invalid load size(%d)\n", size); } return r; } cpu_t *cpu_new(uint8_t *code, size_t len) { cpu_t *cpu = malloc(sizeof(cpu_t)); memset(cpu->regs, 0, sizeof(cpu->regs)); cpu->regs[0] = 0; // x0 is always zero cpu->regs[2] = RAM_BASE + RAM_SIZE; // Stack pointer cpu->pc = RAM_BASE; cpu->ram = ram_new(code, len); return cpu; } void cpu_free(cpu_t *cpu) { ram_free(cpu->ram); free(cpu); } uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size) { return ram_load(cpu->ram, addr, size); } uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); } void cpu_decode(uint32_t raw, insn_t *inst) { uint8_t opcode = raw & OPCODE_MASK; uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK; uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK; uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK; uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK; uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK; inst->opcode = opcode; inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2; inst->funct3 = funct3; inst->funct7 = funct7; } static inline int32_t i_imm(uint32_t raw) { return ((int32_t) raw) >> 20; } static inline int32_t u_imm(uint32_t raw) { return ((int32_t) raw & 0xFFFFF000); } enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 }; enum { STDOUT = 1, STDERR = 2 }; void ecall_handler(cpu_t *cpu) { uint32_t syscall_nr = cpu->regs[17]; switch (syscall_nr) { case SYSCALL_WRITE: { uint32_t fd = cpu->regs[10]; uint32_t addr = cpu->regs[11]; uint32_t count = cpu->regs[12]; FILE *stream; switch (fd) { case STDOUT: stream = stdout; break; case STDERR: stream = stderr; break; default: fatal("invalid file-descriptor %d.\n", fd); } for (uint32_t i = 0; i < count; i++) { fprintf(stream, "%c", cpu_load(cpu, addr + i, 8)); } break; } case SYSCALL_EXIT: { int32_t exit_code = cpu->regs[10]; exit(exit_code); break; } default: fatal("unkown syscall number %d.\n", syscall_nr); } } static insn_t inst; void cpu_execute(cpu_t *cpu, uint32_t raw) { cpu_decode(raw, &inst); cpu->regs[0] = 0; // x0 is always zero switch (inst.opcode) { case INTEGER_COMP_RI: { int32_t imm = i_imm(raw); switch (inst.funct3) { case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break; default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI"); } break; } case LUI: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = imm; break; } case AUIPC: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm; break; } // RV32M 指令集 (乘法與除法) case 0x33: { // RV32M 是 0x33 opcode switch (inst.funct3) { case 0x0: // MUL if (inst.funct7 == 0x01) { cpu->regs[inst.rd] = (int32_t)cpu->regs[inst.rs1] * (int32_t)cpu->regs[inst.rs2]; } break; case 0x1: // MULH if (inst.funct7 == 0x01) { int64_t result = (int64_t)(int32_t)cpu->regs[inst.rs1] * (int32_t)cpu->regs[inst.rs2]; cpu->regs[inst.rd] = (result >> 32) & 0xFFFFFFFF; } break; case 0x4: // DIV if (inst.funct7 == 0x01) { cpu->regs[inst.rd] = cpu->regs[inst.rs1] / cpu->regs[inst.rs2]; } break; case 0x5: // DIVU if (inst.funct7 == 0x01) { cpu->regs[inst.rd] = (unsigned int)cpu->regs[inst.rs1] / (unsigned int)cpu->regs[inst.rs2]; } break; case 0x6: // REM if (inst.funct7 == 0x01) { cpu->regs[inst.rd] = cpu->regs[inst.rs1] % cpu->regs[inst.rs2]; } break; case 0x7: // REMU if (inst.funct7 == 0x01) { cpu->regs[inst.rd] = (unsigned int)cpu->regs[inst.rs1] % (unsigned int)cpu->regs[inst.rs2]; } break; default: fatal("Unknown funct3 for RV32M instruction\n"); } break; } // RV32C 指令集 case 0x1: { // set RV32C指令的opcode是0x1 switch (inst.funct3) { case 0x0: // C.ADDI cpu->regs[inst.rd] = cpu->regs[inst.rs1] + i_imm(raw); break; // 其他 RV32C 指令 default: fatal("Unknown FUNCT3 for RV32C instruction\n"); } break; } case ECALL: if (raw == 0x100073) break; /* EBREAK */ ecall_handler(cpu); break; default: fatal("Illegal Instruction 0x%x\n", inst.opcode); } } int main(int argc, char *argv[]) { if (argc < 2) { printf("Usage: %s [filename]\n", argv[0]); exit(-1); } const char *filename = argv[1]; FILE *file = fopen(filename, "rb"); if (!file) { printf("Failed to open %s\n", filename); exit(-1); } fseek(file, 0L, SEEK_END); size_t code_size = ftell(file); rewind(file); uint8_t *code = malloc(code_size); fread(code, sizeof(uint8_t), code_size, file); fclose(file); cpu_t *cpu = cpu_new(code, code_size); while (1) { uint32_t raw = cpu_fetch(cpu); cpu->pc += 4; if (cpu->pc > code_size) break; cpu_execute(cpu, raw); if (!cpu->pc) break; } cpu_free(cpu); return 0; } ``` ## The test from riscv-tests ### Quiz2-ProblemA ```s= .text la a0, multiplier # Load multiplier address lw a1, 0(a0) # Load multiplier value la a2, multiplicand # Load multiplicand address lw a3, 0(a2) # Load multiplicand value li t0, 0 # Initialize accumulator li t1, 32 # Set bit counter # Check for negative values bltz a1, handle_negative1 # If multiplier negative j shift_and_add_loop # Skip to main loop bltz a3, handle_negative2 # If multiplicand negative j shift_and_add_loop # Continue to main loop handle_negative1: neg a1, a1 # Make multiplier positive handle_negative2: neg a3, a3 # Make multiplicand positive shift_and_add_loop: beqz t1, end_shift_and_add # Exit if bit count is zero andi t2, a1, 1 # Check least significant bit beqz t2, skip_add # Skip add if bit is 0 add t0, t0, a3 # Add to accumulator skip_add: srai a1, a1, 1 # Right shift multiplier slli a3, a3, 1 # Left shift multiplicand addi t1, t1, -1 # Decrease bit counter j shift_and_add_loop # Repeat loop end_shift_and_add: la a4, result # Load result address sw t0, 0(a4) # Store final result ``` - This program performs multiplication of two integers using the shift-and-add algorithm. - Inputs 1. multiplier: The first integer (loaded into register a1). 2. multiplicand: The second integer (loaded into register a3). 3. result: The memory location where the product of multiplier and multiplicand will be stored. - Algorithm - The program multiplies the multiplier and multiplicand using bitwise shifts and addition (similar to how binary multiplication works). - Handles negative numbers by converting them to positive (if necessary). - The least significant bit (LSB) of the multiplier is checked: - If the LSB is 1, the multiplicand is added to the accumulator (t0). - Then, the multiplier is shifted right, and the multiplicand is shifted left. - This continues until the multiplier has been completely shifted out (bit count reaches 0). - The final result is stored in the result memory location. - Example Simulation - Input Values (in memory): - multiplier = 13 (binary: 1101) - multiplicand = 7 (binary: 0111) - Result - The final product is stored in result → 91. ### Quiz2-ProblemB ```s= .data md: .string "Move Disk " # String for move message from: .string " from '" # String for source pole to: .string "' to '" # String for destination pole newline: .string "'\n" # String for newline src: .string "A" # Source pole (A) aux: .string "B" # Auxiliary pole (B) dst: .string "C" # Destination pole (C) n: .word 3 # Number of disks .text .globl main main: lw a1, n # Load the number of disks (n) into register a1 la t0, src # Load the address of the source pole (A) into t0 la t1, dst # Load the address of the destination pole (C) into t1 la t2, aux # Load the address of the auxiliary pole (B) into t2 lbu a2, 0(t0) # Load the first character of source pole into a2 lbu a3, 0(t2) # Load the first character of auxiliary pole into a3 lbu a4, 0(t1) # Load the first character of destination pole into a4 jal x1, hanoi # Call the hanoi function (jump and link) li a7, 10 # Load system call number for program exit into a7 ecall # Make the system call to exit the program hanoi: addi sp, sp, -20 # Allocate 20 bytes of space on the stack sw x1, 0(sp) # Save the return address on the stack sw a1, 4(sp) # Save the number of disks (a1) on the stack sw a2, 8(sp) # Save the source pole (a2) on the stack sw a3, 12(sp) # Save the auxiliary pole (a3) on the stack sw a4, 16(sp) # Save the destination pole (a4) on the stack addi t0, x0, 1 # Load 1 into t0 (used for comparison) beq a1, t0, return # If there's only 1 disk, jump to return lw a1, 4(sp) # Load the number of disks from the stack addi a1, a1, -1 # Decrement the number of disks (a1) lbu a2, 8(sp) # Load source pole from the stack lbu a3, 16(sp) # Load destination pole from the stack lbu a4, 12(sp) # Load auxiliary pole from the stack jal x1, hanoi # Recursive call to hanoi lw a1, 4(sp) # Load the number of disks from the stack lbu a2, 8(sp) # Load source pole from the stack lbu a3, 16(sp) # Load destination pole from the stack jal x1, print # Call the print function to display the move lw a1, 4(sp) # Load the number of disks from the stack addi a1, a1, -1 # Decrement the number of disks (a1) lbu a2, 12(sp) # Load auxiliary pole from the stack lbu a3, 8(sp) # Load source pole from the stack lbu a4, 16(sp) # Load destination pole from the stack jal x1, hanoi # Recursive call to hanoi lw x1, 0(sp) # Load the return address from the stack addi sp, sp, 20 # Deallocate space on the stack jalr x0, x1, 0 # Return to the caller return: lw a1, 4(sp) # Load the number of disks from the stack lbu a2, 8(sp) # Load source pole from the stack lbu a3, 16(sp) # Load destination pole from the stack jal x1, print # Call the print function to display the move lw x1, 0(sp) # Load the return address from the stack addi sp, sp, 20 # Deallocate space on the stack jalr x0, x1, 0 # Return to the caller print: la a0, md # Load the address of "Move Disk " into a0 li a7, 4 # Load system call number for printing a string into a7 ecall # Make the system call to print the string addi a0, a1, 0 # Load the disk number (a1) into a0 li a7, 1 # Load system call number for printing an integer into a7 ecall # Make the system call to print the integer la a0, from # Load the address of " from '" into a0 li a7, 4 # Load system call number for printing a string into a7 ecall # Make the system call to print the string addi a0, a2, 0 # Load the source pole into a0 li a7, 11 # Load system call number for printing a character into a7 ecall # Make the system call to print the character la a0, to # Load the address of "' to '" into a0 li a7, 4 # Load system call number for printing a string into a7 ecall # Make the system call to print the string addi a0, a3, 0 # Load the destination pole into a0 li a7, 11 # Load system call number for printing a character into a7 ecall # Make the system call to print the character la a0, newline # Load the address of "'\n" into a0 li a7, 4 # Load system call number for printing a string into a7 ecall # Make the system call to print the string jalr x0, x1, 0 # Return to the caller ``` 1. Initialization: - The main function initializes the number of disks (n = 3) and the pole labels (A, B, and C). - It calls the hanoi function with these values. 2. Recursive Execution: - The hanoi function recursively solves the Tower of Hanoi problem. - When n = 1, it directly prints the move using the print function. - For n > 1, it breaks the problem into smaller subproblems: - Move n-1 disks from the source pole (A) to the auxiliary pole (B). - Move the largest disk from the source pole (A) to the destination pole (C). - Move the n-1 disks from the auxiliary pole (B) to the destination pole (C). 3. Result ``` Move Disk 1 from 'A' to 'C' Move Disk 2 from 'A' to 'B' Move Disk 1 from 'C' to 'B' Move Disk 3 from 'A' to 'C' Move Disk 1 from 'B' to 'A' Move Disk 2 from 'B' to 'C' Move Disk 1 from 'A' to 'C' ``` ### Quiz2-ProblemD ```s= .data # Define the data section with two numbers to multiply num1: .word 13 num2: .word 7 result: .word 0 # Placeholder for the final result .text # Begin the main code in the text section la x1, result # Load the address of the result variable into register x1 lw t0, num1 # Load the first number (num1) into register t0 lw t1, num2 # Load the second number (num2) into register t1 li t2, 0 # Initialize the result (t2) to 0 loop: # Check if the least significant bit of t0 (num1) is 1 (i.e., if the number is odd) andi t3, t0, 1 beq t3, x0, skip_add # If the bit is 0 (even), skip the addition # If the number is odd, add the value in t1 (num2) to the result in t2 add t2, t2, t1 skip_add: # Perform a right shift on t0 (num1), effectively dividing it by 2 srli t0, t0, 1 # Perform a left shift on t1 (num2), effectively multiplying it by 2 slli t1, t1, 1 # If t0 (num1) is not zero, repeat the loop bnez t0, loop # Store the final result in the memory location pointed by x1 sw t2, 0(x1) ``` - This program implements an integer multiplication using the “Shift and Add Algorithm.” It multiplies two integers (num1 and num2) and stores the result in result. - Execution Steps 1. Data Initialization - num1 is set to 13 (binary: 0b1101). - num2 is set to 7. - result is initialized to 0. 2. Algorithm Logic - The program uses the “Shift and Add Algorithm” to perform multiplication by iterating through each bit of num1. - During each loop: - If the least significant bit (LSB) of num1 is 1, the value of num2 is added to result (t2). - num1 is right-shifted (srli), effectively dividing it by 2. - num2 is left-shifted (slli), effectively multiplying it by 2. - The loop continues until num1 becomes 0. - The final result is stored in the memory location pointed to by result. - After all bits are processed, the result (91) is stored in memory at the address of result. - Result:`91` ### Quiz4-ProblemH ```s= # Convert signed int to float # float i2f(s32 num); # INPUT: A0 = integer number # OUTPUT: A0 = float number (IEEE 754 single-precision) i2f: # Prepare result sign in A1 srli a1, a0, 31 # A1 = (A0 >> 31), extract sign bit slli a1, a1, 31 # A1 = A1 << 31, position sign bit at bit 31 # Get absolute value of the number beq a1, x0, positive # If sign bit is zero, number is positive sub a0, x0, a0 # A0 = -A0 (negate to get absolute value) positive: # Check if number is zero beq a0, x0, zero_result # If A0 == 0, result is zero # Store sign and absolute value add a2, x0, a1 # A2 = A1 (store sign) add a1, x0, a0 # A1 = A0 (store absolute value) # Count leading zeros in A0 (result stored in T0) # Initialize T0 (leading zero count) to 0 add t0, x0, x0 # T0 = 0 clz_loop: sll t1, a1, 1 # Shift A1 left by 1 bit addi t0, t0, 1 # T0 = T0 + 1 (increment count) mv a1, t1 # Update A1 with shifted value bgez a1, clz_loop # Loop while A1 >= 0 (MSB is 0) # Adjust for possible overflow in shift li t1, 32 # Load 32 into t1 blt t0, t1, clz_done # If t0 < 32, proceed to clz_done li a0, 32 # If t0 >= 32, leading zeros = 32 j normalize clz_done: # A0 now contains the number of leading zeros (stored in T0) add a0, x0, t0 # Move T0 to A0 normalize: # Normalize the number (shift left by leading zeros) sub t1, x0, a0 # t1 = -A0 sll a1, a1, t1 # A1 = A1 << (32 - leading zeros) # Prepare result exponent # Exponent = 158 - leading_zeros li t1, 158 sub a0, t1, a0 # A0 = 158 - leading zeros # Rounding: Add 0x80 to A1 (rounding bit at position 7) addi a1, a1, 128 # A1 = A1 + 0x80 # Check for overflow after rounding bgez a1, rounding_ok # If A1 >= 0, no overflow addi a0, a0, 1 # Exponent increment due to rounding overflow j adjust_mantissa rounding_ok: # Check if lowest 8 bits are zero (for rounding to even) andi a3, a1, 0xFF # A3 = A1 & 0xFF (lowest 8 bits) beq a3, x0, round_even # If lowest 8 bits are zero, round to even adjust_mantissa: # Remove leading hidden bit '1' slli a1, a1, 1 # A1 = A1 << 1 (remove hidden '1') j compose_result round_even: # Ensure even mantissa (clear least significant bit) srli a1, a1, 9 # A1 = A1 >> 9 slli a1, a1, 10 # A1 = A1 << 10 (remove hidden '1') j compose_result compose_result: # Align mantissa and exponent srli a1, a1, 9 # A1 = A1 >> 9 (align mantissa to bits 0..22) slli a0, a0, 23 # A0 = A0 << 23 (align exponent to bits 23..30), H10 or a0, a0, a1 # A0 = exponent | mantissa or a0, a0, a2 # A0 = A0 | sign bit (bit 31) jalr x0, ra, 0 # Return from function zero_result: # Return zero (signed zero with correct sign) or a0, x0, a2 # A0 = sign bit (A2) jalr x0, ra, 0 # Return from function ``` - Result - `0x42280000` ### Quiz5-ProblemD ```s= # Takes input N (a0), returns its log base 2 in a0 logint: addi sp, sp, -4 sw t0, 0(sp) add t0, a0, zero# k = N add a0, zero, zero# i = 0 logloop: beq t0, zero, logloop_end # Exit if k == 0 srai t0, t0, 1 # k >>= 1 addi a0, a0, 1 # i++ j logloop logloop_end: addi a0, a0, -1 # Return i - 1 lw t0, 0(sp) addi sp, sp, 4 jr ra # Takes inputs N(a0) and n(a1), reverses the number in binary reverse: addi sp, sp, -28 sw ra, 0(sp) sw s0, 4(sp) sw s1, 8(sp) sw s2, 12(sp) sw s3, 16(sp) sw s4, 20(sp) sw s5, 24(sp) call logint# Now a0 has log2(N) addi s0, zero, 1 # j = 1 add s1, zero, zero # p = 0 forloop_reverse: bgt s0, a0, forloop_reverse_end sub s2, a0, s0# s2 = a0 - s0 add is3, zero, 1 sll s3, s3, s2 and s3, a1, s3 beq s3, zero, elses3 # If not, skip ifs3: addi s4, s0, -1 # s4 = j - 1 addi s5, zero, 1 sll s5, s5, s4 or s1, s1, s5 elses3: addi s0, s0, 1 j forloop_reverse forloop_reverse_end: add a0, s1, zero # Return p lw ra, 0(sp) lw s0, 4(sp) lw s1, 8(sp) lw s2, 12(sp) lw s3, 16(sp) lw s4, 20(sp) lw s5, 24(sp) addi sp, sp, 28 jr ra ``` **1. Corrected Execution of logint** - The purpose of logint is to calculate the integer base-2 logarithm (log2) of N. - Initial value: a0 = 42 - Execution steps: ``` Iteration 1: t0 = 21, a0 = 1 Iteration 2: t0 = 10, a0 = 2 Iteration 3: t0 = 5, a0 = 3 Iteration 4: t0 = 2, a0 = 4 Iteration 5: t0 = 1, a0 = 5 Iteration 6: t0 = 0, exit loop. ``` - After exiting the loop, addi a0, a0, -1 is executed. Final result: - a0 = 5 - 1 = 4 - Thus, the result of logint is a0 = 4. **2. Corrected Execution of reverse** - The purpose of reverse is to reverse the binary representation of a number. - Initial values: - a0 = 4 (result from log2(N)) - a1 = 42 (binary representation: 101010) - Execution process: 1. Confirmed a0 = 4, s0 = 1, s1 = 0. 2. Begin reversing the binary number 101010: ``` Bit 1: s0 = 1, extract bit 5 (most significant bit), which is 1 → reverse to the least significant bit, s1 = 0b000001. Bit 2: s0 = 2, extract bit 4, which is 0 → skip. Bit 3: s0 = 3, extract bit 3, which is 1 → reverse to the 2nd least significant bit, s1 = 0b000101. Bit 4: s0 = 4, extract bit 2, which is 0 → skip. Bit 5: s0 = 5, extract bit 1, which is 1 → reverse to the 4th least significant bit, s1 = 0b010101 (decimal value: 21). ``` - Final result: - a0 = 11. - Thus, the result of reverse is a0 = 11. - Conclusion Result - logint result: a0 = 4 - reverse result: a0 = 11 ## References https://github.com/riscv/riscv-isa-manual/releases https://acsa.ustc.edu.cn/ics/download/riscv/RISC-V-Reader-Chinese-v2p1.pdf https://blog.csdn.net/qq_38798111/article/details/129718824 https://github.com/riscv-collab/riscv-gnu-toolchain https://github.com/riscv-software-src/riscv-tests https://hackmd.io/@sysprog/arch2024-quiz2-sol https://hackmd.io/@sysprog/arch2024-quiz4-sol https://hackmd.io/@sysprog/arch2024-quiz5-sol