Try   HackMD

Build RISC-V Instruction Set Simulator from scratch

黃詠筑, 劉欣宜

GitHub

Mission

Task: Develop a RISC-V instruction set simulator from scratch - Based on the foundation from Problem A, gradually expand it to support RV32I, RV32M, and RV32C, and pass the tests from riscv-tests. Additionally, select at least five RISC-V programs from Quiz 1 to Quiz 6, rewrite them, and ensure they work correctly on the developed RISC-V instruction set simulator.

RV32I, RV32M, and RV32C

1. RV32I: This is the basic version of the RISC-V instruction set, supporting 32-bit operations. It includes the core instructions of the RISC-V instruction set, such as arithmetic operations, logical operations, and control flow. RV32I is the foundation of the RISC-V architecture, and other extensions are built upon it.

2. RV32M: This is an extended version of RV32I, adding support for multiplication and division instructions. The RV32M extension provides M instructions that allow the processor to perform more efficient multiplication (MUL) and division (DIV) operations.

3. RV32C: This is another extension of RV32I that provides a compressed instruction set. The C extension adds smaller, more compact instructions to reduce program size and improve performance, especially in embedded systems and resource-constrained devices. These instructions are usually simplified versions of RV32I instructions and help save storage space while enhancing execution efficiency.

Quiz3 Problem A

Summarize the program, addressing its purpose and behavior.

This code primarily covers the basic instruction set of RV32I and implements support for instructions like ADDI, LUI, AUIPC, etc. It does not implement the multiplication and division instructions of RV32M, nor does it include the compressed instruction set of RV32C. To extend it to RV32M or RV32C, the corresponding instruction handling logic would need to be added.

#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #define fatal(args...) \ do { \ printf("Fatal: "); \ printf(args); \ exit(-1); \ } while (0) enum { /* I-type integer computation or integer register-immediate */ INTEGER_COMP_RI = 0x13, LUI = 0x37 AUIPC = 0x17 /* ECALL and EBREAK */ ECALL = 0x73, }; /* FUNCT3 for INTEGER_COMPI_RI */ enum { ADDI = 0x0, }; enum { OPCODE_MASK = 0x7F, REG_ADDR_MASK = 0x1F }; enum { RD_SHIFT = 7, RS1_SHIFT = 15, RS2_SHIFT = 20, FUNCT3_SHIFT = 12, FUNCT7_SHIFT = 25 }; enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F }; enum { RAM_SIZE = 1204 * 1024 * 2, /* 2 MiB */ RAM_BASE = 0x0, }; typedef struct { uint8_t *mem; } ram_t; typedef struct { uint32_t regs[32]; size_t pc; ram_t *ram; } cpu_t; typedef struct { uint8_t opcode; uint8_t rd, rs1, rs2, funct3, funct7; } insn_t; ram_t *ram_new(uint8_t *code, size_t len) { ram_t *ram = malloc(sizeof(ram_t)); ram->mem = malloc(RAM_SIZE); memcpy(ram->mem, code, len); return ram; } void ram_free(ram_t *mem) { free(mem->mem); free(mem); } static inline void check_addr(int32_t addr) { size_t index = addr - RAM_BASE; if (index >= RAM_SIZE) fatal("out of memory.\n"); } static uint32_t ram_load8(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return (uint32_t) mem->mem[index]; } static uint32_t ram_load16(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8)); } static uint32_t ram_load32(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) | ((uint32_t) mem->mem[index + 2] << 16) | ((uint32_t) mem->mem[index + 3] << 24)); } uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size) { check_addr(addr); uint32_t r = 0; switch (size) { case 8: r = ram_load8(mem, addr); break; case 16: r = ram_load16(mem, addr); break; case 32: r = ram_load32(mem, addr); break; default: fatal("RAM: invalid load size(%d)\n", size); } return r; } cpu_t *cpu_new(uint8_t *code, size_t len) { cpu_t *cpu = malloc(sizeof(cpu_t)); memset(cpu->regs, 0, sizeof(cpu->regs)); cpu->regs[0] = 0; /* Stack pointer */ cpu->regs[2] = RAM_BASE + RAM_SIZE; cpu->pc = RAM_BASE; cpu->ram = ram_new(code, len); return cpu; } void cpu_free(cpu_t *cpu) { ram_free(cpu->ram); free(cpu); } uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size) { return ram_load(cpu->ram, addr, size); } uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); } void cpu_decode(uint32_t raw, insn_t *inst) { uint8_t opcode = raw & OPCODE_MASK; uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK; uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK; uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK; uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK; uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK; inst->opcode = opcode; inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2; inst->funct3 = funct3; inst->funct7 = funct7; } static inline int32_t i_imm(uint32_t raw) { /* imm[11:0] = inst[31:20] */ return ((int32_t) raw) >> 20; } static inline int32_t u_imm(uint32_t raw) { /* imm[20:0] = inst[31:12] */ return ((int32_t) raw & 0xFFFFF000); } enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 }; enum { STDOUT = 1, STDERR = 2 }; void ecall_handler(cpu_t *cpu) { uint32_t syscall_nr = cpu->regs[17]; switch (syscall_nr) { case SYSCALL_WRITE: { uint32_t fd = cpu->regs[10]; uint32_t addr = cpu->regs[11]; uint32_t count = cpu->regs[12]; FILE *stream; switch (fd) { case STDOUT: stream = stdout; break; case STDERR: stream = stderr; break; default: fatal("invalid file-descriptor %d.\n", fd); } for (uint32_t i = 0; i < count; i++) fprintf(stream, "%c", cpu_load(cpu, addr + i, 8)); break; } case SYSCALL_EXIT: { int32_t exit_code = cpu->regs[10]; exit(exit_code); break; } default: fatal("unkown syscall number %d.\n", syscall_nr); } } static insn_t inst; void cpu_execute(cpu_t *cpu, uint32_t raw) { cpu_decode(raw, &inst); cpu->regs[0] = 0; switch (inst.opcode) { case INTEGER_COMP_RI: { int32_t imm = i_imm(raw); switch (inst.funct3) { case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break; default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI"); } break; } case LUI: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = imm; break; } case AUIPC: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm; break; } case ECALL: if (raw == 0x100073) break; /* EBREAK */ ecall_handler(cpu); break; default: fatal("Illegal Instruction 0x%x\n", inst.opcode); } } int main(int argc, char *argv[]) { if (argc < 2) { printf("Usage: %s [filename]\n", argv[0]); exit(-1); } const char *filename = argv[1]; FILE *file = fopen(filename, "rb"); if (!file) { printf("Failed to open %s\n", filename); exit(-1); } fseek(file, 0L, SEEK_END); size_t code_size = ftell(file); rewind(file); uint8_t *code = malloc(code_size); fread(code, sizeof(uint8_t), code_size, file); fclose(file); cpu_t *cpu = cpu_new(code, code_size); while (1) { uint32_t raw = cpu_fetch(cpu); cpu->pc += 4; if (cpu->pc > code_size) break; cpu_execute(cpu, raw); if (!cpu->pc) break; } cpu_free(cpu); return 0; }
  1. Add support for RV32M extension instructions:
  • RV32M provides multiplication and division instructions, such as MUL, MULH, DIV, DIVU, REM, and REMU.
  • These instructions need to be handled during the decode and execution stages.
  1. Add support for RV32C extension instructions:
  • RV32C is a compressed instruction set that includes 16-bit instructions. These instructions need to be handled separately and identified as compressed instructions during the decode stage.
  • RV32C instructions typically use fewer bits for encoding and can be aligned to 16 bits, so additional checks and processing are required.
#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #define fatal(args...) \ do { \ printf("Fatal: "); \ printf(args); \ exit(-1); \ } while (0) enum { /* I-type integer computation or integer register-immediate */ INTEGER_COMP_RI = 0x13, LUI = 0x37, AUIPC = 0x17, ECALL = 0x73, /* RV32M extension */ MUL = 0x33, // Multiply MULH = 0x33, // Multiply high DIV = 0x33, // Divide DIVU = 0x33, // Divide unsigned REM = 0x33, // Remainder REMU = 0x33, // Remainder unsigned /* RV32C extension */ C_ADDI4SPN = 0x10, // Compressed ADDI4SPN C_LW = 0x00, // Compressed Load Word C_SW = 0x01, // Compressed Store Word }; /* FUNCT3 for INTEGER_COMP_RI */ enum { ADDI = 0x0, MUL_FUNCT3 = 0x0, // For RV32M MUL DIV_FUNCT3 = 0x4, // For RV32M DIV REM_FUNCT3 = 0x5, // For RV32M REM }; /* RV32M MUL */ enum { MUL_FUNCT7 = 0x1, MULH_FUNCT7 = 0x2, }; /* FUNCT3 mask */ enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F }; enum { RAM_SIZE = 1204 * 1024 * 2, /* 2 MiB */ RAM_BASE = 0x0, }; typedef struct { uint8_t *mem; } ram_t; typedef struct { uint32_t regs[32]; size_t pc; ram_t *ram; } cpu_t; typedef struct { uint8_t opcode; uint8_t rd, rs1, rs2, funct3, funct7; } insn_t; ram_t *ram_new(uint8_t *code, size_t len) { ram_t *ram = malloc(sizeof(ram_t)); ram->mem = malloc(RAM_SIZE); memcpy(ram->mem, code, len); return ram; } void ram_free(ram_t *mem) { free(mem->mem); free(mem); } static inline void check_addr(int32_t addr) { size_t index = addr - RAM_BASE; if (index >= RAM_SIZE) fatal("out of memory.\n"); } static uint32_t ram_load8(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return (uint32_t) mem->mem[index]; } static uint32_t ram_load16(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8)); } static uint32_t ram_load32(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) | ((uint32_t) mem->mem[index + 2] << 16) | ((uint32_t) mem->mem[index + 3] << 24)); } uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size) { check_addr(addr); uint32_t r = 0; switch (size) { case 8: r = ram_load8(mem, addr); break; case 16: r = ram_load16(mem, addr); break; case 32: r = ram_load32(mem, addr); break; default: fatal("RAM: invalid load size(%d)\n", size); } return r; } cpu_t *cpu_new(uint8_t *code, size_t len) { cpu_t *cpu = malloc(sizeof(cpu_t)); memset(cpu->regs, 0, sizeof(cpu->regs)); cpu->regs[0] = 0; /* Stack pointer */ cpu->regs[2] = RAM_BASE + RAM_SIZE; cpu->pc = RAM_BASE; cpu->ram = ram_new(code, len); return cpu; } void cpu_free(cpu_t *cpu) { ram_free(cpu->ram); free(cpu); } uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size) { return ram_load(cpu->ram, addr, size); } uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); } void cpu_decode(uint32_t raw, insn_t *inst) { uint8_t opcode = raw & OPCODE_MASK; uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK; uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK; uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK; uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK; uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK; inst->opcode = opcode; inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2; inst->funct3 = funct3; inst->funct7 = funct7; } static inline int32_t i_imm(uint32_t raw) { /* imm[11:0] = inst[31:20] */ return ((int32_t) raw) >> 20; } static inline int32_t u_imm(uint32_t raw) { /* imm[20:0] = inst[31:12] */ return ((int32_t) raw & 0xFFFFF000); } enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 }; enum { STDOUT = 1, STDERR = 2 }; void ecall_handler(cpu_t *cpu) { uint32_t syscall_nr = cpu->regs[17]; switch (syscall_nr) { case SYSCALL_WRITE: { uint32_t fd = cpu->regs[10]; uint32_t addr = cpu->regs[11]; uint32_t count = cpu->regs[12]; FILE *stream; switch (fd) { case STDOUT: stream = stdout; break; case STDERR: stream = stderr; break; default: fatal("invalid file-descriptor %d.\n", fd); } for (uint32_t i = 0; i < count; i++) fprintf(stream, "%c", cpu_load(cpu, addr + i, 8)); break; } case SYSCALL_EXIT: { int32_t exit_code = cpu->regs[10]; exit(exit_code); break; } default: fatal("unkown syscall number %d.\n", syscall_nr); } } static insn_t inst; void cpu_execute(cpu_t *cpu, uint32_t raw) { cpu_decode(raw, &inst); cpu->regs[0] = 0; switch (inst.opcode) { case INTEGER_COMP_RI: { int32_t imm = i_imm(raw); switch (inst.funct3) { case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break; default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI"); } break; } case MUL: switch (inst.funct7) { case MUL_FUNCT7: cpu->regs[inst.rd] = cpu->regs[inst.rs1] * cpu->regs[inst.rs2]; break; default: fatal("Unknown FUNCT7 for MUL"); } break; case DIV: switch (inst.funct3) { case DIV_FUNCT3: cpu->regs[inst.rd] = cpu->regs[inst.rs1] / cpu->regs[inst.rs2]; break; default: fatal("Unknown FUNCT3 for DIV"); } break; case REM: switch (inst.funct3) { case REM_FUNCT3: cpu->regs[inst.rd] = cpu->regs[inst.rs1] % cpu->regs[inst.rs2]; break; default: fatal("Unknown FUNCT3 for REM"); } break; case MULH: switch (inst.funct7) { case MULH_FUNCT7: cpu->regs[inst.rd] = (int32_t) cpu->regs[inst.rs1] * (int32_t) cpu->regs[inst.rs2] >> 32; break; default: fatal("Unknown FUNCT7 for MULH"); } break; case DIVU: switch (inst.funct3) { case 0x4: cpu->regs[inst.rd] = (uint32_t) cpu->regs[inst.rs1] / (uint32_t) cpu->regs[inst.rs2]; break; default: fatal("Unknown FUNCT3 for DIVU"); } break; case REMU: switch (inst.funct3) { case 0x5: cpu->regs[inst.rd] = (uint32_t) cpu->regs[inst.rs1] % (uint32_t) cpu->regs[inst.rs2]; break; default: fatal("Unknown FUNCT3 for REMU"); } break; case LUI: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = imm; break; } case AUIPC: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm; break; } case ECALL: if (raw == 0x100073) break; /* EBREAK */ ecall_handler(cpu); break; default: fatal("Illegal Instruction 0x%x\n", inst.opcode); } } int main(int argc, char *argv[]) { if (argc < 2) { printf("Usage: %s [filename]\n", argv[0]); exit(-1); } const char *filename = argv[1]; FILE *file = fopen(filename, "rb"); if (!file) { printf("Failed to open %s\n", filename); exit(-1); } fseek(file, 0L, SEEK_END); size_t code_size = ftell(file); rewind(file); uint8_t *code = malloc(code_size); fread(code, sizeof(uint8_t), code_size, file); fclose(file); cpu_t *cpu = cpu_new(code, code_size); while (1) { uint32_t raw = cpu_fetch(cpu); cpu->pc += 4; if (cpu->pc > code_size) break; cpu_execute(cpu, raw); if (!cpu->pc) break; } cpu_free(cpu); return 0; }

Makefile

CC = gcc CFLAGS = -g -Wall SRC = filename.c OBJ = filename.o EXEC = riscv_simulator all: $(EXEC) $(EXEC): $(OBJ) $(CC) $(OBJ) -o $(EXEC) $(OBJ): $(SRC) $(CC) $(CFLAGS) -c $(SRC) clean: rm -f $(OBJ) $(EXEC)
  • make

Build_and_run.sh

if [ -z "$1" ]; then echo "Usage: $0 <source_file.s>" exit 1 fi SRC_FILE=$1 BASENAME=$(basename "$SRC_FILE" .s) OBJ_FILE="${BASENAME}.o" ELF_FILE="${BASENAME}.elf" riscv64-unknown-elf-as -o $OBJ_FILE $SRC_FILE riscv64-unknown-elf-ld -o $ELF_FILE $OBJ_FILE ./riscv_simulator $ELF_FILE
  • chmod +x build_and_run.sh
  • ./build_and_run.sh filename.s

riscv.c

#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #define fatal(args...) \ do { \ printf("Fatal: "); \ printf(args); \ exit(-1); \ } while (0) enum { /* I-type integer computation or integer register-immediate */ INTEGER_COMP_RI = 0x13, LUI = 0x37, AUIPC = 0x17, ECALL = 0x73, }; enum { ADDI = 0x0, /* RV32M opcodes */ MUL = 0x0, MULH = 0x1, DIV = 0x4, DIVU = 0x5, REM = 0x6, REMU = 0x7, }; /* FUNCT3 for INTEGER_COMP_RI */ enum { OPCODE_MASK = 0x7F, REG_ADDR_MASK = 0x1F }; enum { RD_SHIFT = 7, RS1_SHIFT = 15, RS2_SHIFT = 20, FUNCT3_SHIFT = 12, FUNCT7_SHIFT = 25 }; enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F }; enum { RAM_SIZE = 1204 * 1024 * 2, RAM_BASE = 0x0 }; typedef struct { uint8_t *mem; } ram_t; typedef struct { uint32_t regs[32]; size_t pc; ram_t *ram; } cpu_t; typedef struct { uint8_t opcode; uint8_t rd, rs1, rs2, funct3, funct7; } insn_t; ram_t *ram_new(uint8_t *code, size_t len) { ram_t *ram = malloc(sizeof(ram_t)); ram->mem = malloc(RAM_SIZE); memcpy(ram->mem, code, len); return ram; } void ram_free(ram_t *mem) { free(mem->mem); free(mem); } static inline void check_addr(int32_t addr) { size_t index = addr - RAM_BASE; if (index >= RAM_SIZE) fatal("out of memory.\n"); } static uint32_t ram_load8(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return (uint32_t) mem->mem[index]; } static uint32_t ram_load16(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8)); } static uint32_t ram_load32(ram_t *mem, uint32_t addr) { size_t index = addr - RAM_BASE; return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) | ((uint32_t) mem->mem[index + 2] << 16) | ((uint32_t) mem->mem[index + 3] << 24)); } uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size) { check_addr(addr); uint32_t r = 0; switch (size) { case 8: r = ram_load8(mem, addr); break; case 16: r = ram_load16(mem, addr); break; case 32: r = ram_load32(mem, addr); break; default: fatal("RAM: invalid load size(%d)\n", size); } return r; } cpu_t *cpu_new(uint8_t *code, size_t len) { cpu_t *cpu = malloc(sizeof(cpu_t)); memset(cpu->regs, 0, sizeof(cpu->regs)); cpu->regs[0] = 0; // x0 is always zero cpu->regs[2] = RAM_BASE + RAM_SIZE; // Stack pointer cpu->pc = RAM_BASE; cpu->ram = ram_new(code, len); return cpu; } void cpu_free(cpu_t *cpu) { ram_free(cpu->ram); free(cpu); } uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size) { return ram_load(cpu->ram, addr, size); } uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); } void cpu_decode(uint32_t raw, insn_t *inst) { uint8_t opcode = raw & OPCODE_MASK; uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK; uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK; uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK; uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK; uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK; inst->opcode = opcode; inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2; inst->funct3 = funct3; inst->funct7 = funct7; } static inline int32_t i_imm(uint32_t raw) { return ((int32_t) raw) >> 20; } static inline int32_t u_imm(uint32_t raw) { return ((int32_t) raw & 0xFFFFF000); } enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 }; enum { STDOUT = 1, STDERR = 2 }; void ecall_handler(cpu_t *cpu) { uint32_t syscall_nr = cpu->regs[17]; switch (syscall_nr) { case SYSCALL_WRITE: { uint32_t fd = cpu->regs[10]; uint32_t addr = cpu->regs[11]; uint32_t count = cpu->regs[12]; FILE *stream; switch (fd) { case STDOUT: stream = stdout; break; case STDERR: stream = stderr; break; default: fatal("invalid file-descriptor %d.\n", fd); } for (uint32_t i = 0; i < count; i++) { fprintf(stream, "%c", cpu_load(cpu, addr + i, 8)); } break; } case SYSCALL_EXIT: { int32_t exit_code = cpu->regs[10]; exit(exit_code); break; } default: fatal("unkown syscall number %d.\n", syscall_nr); } } static insn_t inst; void cpu_execute(cpu_t *cpu, uint32_t raw) { cpu_decode(raw, &inst); cpu->regs[0] = 0; // x0 is always zero switch (inst.opcode) { case INTEGER_COMP_RI: { int32_t imm = i_imm(raw); switch (inst.funct3) { case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break; default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI"); } break; } case LUI: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = imm; break; } case AUIPC: { int32_t imm = u_imm(raw); cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm; break; } // RV32M 指令集 (乘法與除法) case 0x33: { // RV32M 是 0x33 opcode switch (inst.funct3) { case 0x0: // MUL if (inst.funct7 == 0x01) { cpu->regs[inst.rd] = (int32_t)cpu->regs[inst.rs1] * (int32_t)cpu->regs[inst.rs2]; } break; case 0x1: // MULH if (inst.funct7 == 0x01) { int64_t result = (int64_t)(int32_t)cpu->regs[inst.rs1] * (int32_t)cpu->regs[inst.rs2]; cpu->regs[inst.rd] = (result >> 32) & 0xFFFFFFFF; } break; case 0x4: // DIV if (inst.funct7 == 0x01) { cpu->regs[inst.rd] = cpu->regs[inst.rs1] / cpu->regs[inst.rs2]; } break; case 0x5: // DIVU if (inst.funct7 == 0x01) { cpu->regs[inst.rd] = (unsigned int)cpu->regs[inst.rs1] / (unsigned int)cpu->regs[inst.rs2]; } break; case 0x6: // REM if (inst.funct7 == 0x01) { cpu->regs[inst.rd] = cpu->regs[inst.rs1] % cpu->regs[inst.rs2]; } break; case 0x7: // REMU if (inst.funct7 == 0x01) { cpu->regs[inst.rd] = (unsigned int)cpu->regs[inst.rs1] % (unsigned int)cpu->regs[inst.rs2]; } break; default: fatal("Unknown funct3 for RV32M instruction\n"); } break; } // RV32C 指令集 case 0x1: { // set RV32C指令的opcode是0x1 switch (inst.funct3) { case 0x0: // C.ADDI cpu->regs[inst.rd] = cpu->regs[inst.rs1] + i_imm(raw); break; // 其他 RV32C 指令 default: fatal("Unknown FUNCT3 for RV32C instruction\n"); } break; } case ECALL: if (raw == 0x100073) break; /* EBREAK */ ecall_handler(cpu); break; default: fatal("Illegal Instruction 0x%x\n", inst.opcode); } } int main(int argc, char *argv[]) { if (argc < 2) { printf("Usage: %s [filename]\n", argv[0]); exit(-1); } const char *filename = argv[1]; FILE *file = fopen(filename, "rb"); if (!file) { printf("Failed to open %s\n", filename); exit(-1); } fseek(file, 0L, SEEK_END); size_t code_size = ftell(file); rewind(file); uint8_t *code = malloc(code_size); fread(code, sizeof(uint8_t), code_size, file); fclose(file); cpu_t *cpu = cpu_new(code, code_size); while (1) { uint32_t raw = cpu_fetch(cpu); cpu->pc += 4; if (cpu->pc > code_size) break; cpu_execute(cpu, raw); if (!cpu->pc) break; } cpu_free(cpu); return 0; }

The test from riscv-tests

Quiz2-ProblemA

.text la a0, multiplier # Load multiplier address lw a1, 0(a0) # Load multiplier value la a2, multiplicand # Load multiplicand address lw a3, 0(a2) # Load multiplicand value li t0, 0 # Initialize accumulator li t1, 32 # Set bit counter # Check for negative values bltz a1, handle_negative1 # If multiplier negative j shift_and_add_loop # Skip to main loop bltz a3, handle_negative2 # If multiplicand negative j shift_and_add_loop # Continue to main loop handle_negative1: neg a1, a1 # Make multiplier positive handle_negative2: neg a3, a3 # Make multiplicand positive shift_and_add_loop: beqz t1, end_shift_and_add # Exit if bit count is zero andi t2, a1, 1 # Check least significant bit beqz t2, skip_add # Skip add if bit is 0 add t0, t0, a3 # Add to accumulator skip_add: srai a1, a1, 1 # Right shift multiplier slli a3, a3, 1 # Left shift multiplicand addi t1, t1, -1 # Decrease bit counter j shift_and_add_loop # Repeat loop end_shift_and_add: la a4, result # Load result address sw t0, 0(a4) # Store final result
  • This program performs multiplication of two integers using the shift-and-add algorithm.

  • Inputs

    1. multiplier: The first integer (loaded into register a1).
    2. multiplicand: The second integer (loaded into register a3).
    3. result: The memory location where the product of multiplier and multiplicand will be stored.
  • Algorithm

    • The program multiplies the multiplier and multiplicand using bitwise shifts and addition (similar to how binary multiplication works).
    • Handles negative numbers by converting them to positive (if necessary).
    • The least significant bit (LSB) of the multiplier is checked:
    • If the LSB is 1, the multiplicand is added to the accumulator (t0).
    • Then, the multiplier is shifted right, and the multiplicand is shifted left.
    • This continues until the multiplier has been completely shifted out (bit count reaches 0).
  • The final result is stored in the result memory location.

  • Example Simulation

    • Input Values (in memory):
      • multiplier = 13 (binary: 1101)
      • multiplicand = 7 (binary: 0111)
    • Result
      • The final product is stored in result → 91.

Quiz2-ProblemB

.data md: .string "Move Disk " # String for move message from: .string " from '" # String for source pole to: .string "' to '" # String for destination pole newline: .string "'\n" # String for newline src: .string "A" # Source pole (A) aux: .string "B" # Auxiliary pole (B) dst: .string "C" # Destination pole (C) n: .word 3 # Number of disks .text .globl main main: lw a1, n # Load the number of disks (n) into register a1 la t0, src # Load the address of the source pole (A) into t0 la t1, dst # Load the address of the destination pole (C) into t1 la t2, aux # Load the address of the auxiliary pole (B) into t2 lbu a2, 0(t0) # Load the first character of source pole into a2 lbu a3, 0(t2) # Load the first character of auxiliary pole into a3 lbu a4, 0(t1) # Load the first character of destination pole into a4 jal x1, hanoi # Call the hanoi function (jump and link) li a7, 10 # Load system call number for program exit into a7 ecall # Make the system call to exit the program hanoi: addi sp, sp, -20 # Allocate 20 bytes of space on the stack sw x1, 0(sp) # Save the return address on the stack sw a1, 4(sp) # Save the number of disks (a1) on the stack sw a2, 8(sp) # Save the source pole (a2) on the stack sw a3, 12(sp) # Save the auxiliary pole (a3) on the stack sw a4, 16(sp) # Save the destination pole (a4) on the stack addi t0, x0, 1 # Load 1 into t0 (used for comparison) beq a1, t0, return # If there's only 1 disk, jump to return lw a1, 4(sp) # Load the number of disks from the stack addi a1, a1, -1 # Decrement the number of disks (a1) lbu a2, 8(sp) # Load source pole from the stack lbu a3, 16(sp) # Load destination pole from the stack lbu a4, 12(sp) # Load auxiliary pole from the stack jal x1, hanoi # Recursive call to hanoi lw a1, 4(sp) # Load the number of disks from the stack lbu a2, 8(sp) # Load source pole from the stack lbu a3, 16(sp) # Load destination pole from the stack jal x1, print # Call the print function to display the move lw a1, 4(sp) # Load the number of disks from the stack addi a1, a1, -1 # Decrement the number of disks (a1) lbu a2, 12(sp) # Load auxiliary pole from the stack lbu a3, 8(sp) # Load source pole from the stack lbu a4, 16(sp) # Load destination pole from the stack jal x1, hanoi # Recursive call to hanoi lw x1, 0(sp) # Load the return address from the stack addi sp, sp, 20 # Deallocate space on the stack jalr x0, x1, 0 # Return to the caller return: lw a1, 4(sp) # Load the number of disks from the stack lbu a2, 8(sp) # Load source pole from the stack lbu a3, 16(sp) # Load destination pole from the stack jal x1, print # Call the print function to display the move lw x1, 0(sp) # Load the return address from the stack addi sp, sp, 20 # Deallocate space on the stack jalr x0, x1, 0 # Return to the caller print: la a0, md # Load the address of "Move Disk " into a0 li a7, 4 # Load system call number for printing a string into a7 ecall # Make the system call to print the string addi a0, a1, 0 # Load the disk number (a1) into a0 li a7, 1 # Load system call number for printing an integer into a7 ecall # Make the system call to print the integer la a0, from # Load the address of " from '" into a0 li a7, 4 # Load system call number for printing a string into a7 ecall # Make the system call to print the string addi a0, a2, 0 # Load the source pole into a0 li a7, 11 # Load system call number for printing a character into a7 ecall # Make the system call to print the character la a0, to # Load the address of "' to '" into a0 li a7, 4 # Load system call number for printing a string into a7 ecall # Make the system call to print the string addi a0, a3, 0 # Load the destination pole into a0 li a7, 11 # Load system call number for printing a character into a7 ecall # Make the system call to print the character la a0, newline # Load the address of "'\n" into a0 li a7, 4 # Load system call number for printing a string into a7 ecall # Make the system call to print the string jalr x0, x1, 0 # Return to the caller
  1. Initialization:
  • The main function initializes the number of disks (n = 3) and the pole labels (A, B, and C).
  • It calls the hanoi function with these values.
  1. Recursive Execution:
  • The hanoi function recursively solves the Tower of Hanoi problem.
  • When n = 1, it directly prints the move using the print function.
  • For n > 1, it breaks the problem into smaller subproblems:
  • Move n-1 disks from the source pole (A) to the auxiliary pole (B).
  • Move the largest disk from the source pole (A) to the destination pole ©.
  • Move the n-1 disks from the auxiliary pole (B) to the destination pole ©.
  1. Result
Move Disk 1 from 'A' to 'C'
Move Disk 2 from 'A' to 'B'
Move Disk 1 from 'C' to 'B'
Move Disk 3 from 'A' to 'C'
Move Disk 1 from 'B' to 'A'
Move Disk 2 from 'B' to 'C'
Move Disk 1 from 'A' to 'C'

Quiz2-ProblemD

.data # Define the data section with two numbers to multiply num1: .word 13 num2: .word 7 result: .word 0 # Placeholder for the final result .text # Begin the main code in the text section la x1, result # Load the address of the result variable into register x1 lw t0, num1 # Load the first number (num1) into register t0 lw t1, num2 # Load the second number (num2) into register t1 li t2, 0 # Initialize the result (t2) to 0 loop: # Check if the least significant bit of t0 (num1) is 1 (i.e., if the number is odd) andi t3, t0, 1 beq t3, x0, skip_add # If the bit is 0 (even), skip the addition # If the number is odd, add the value in t1 (num2) to the result in t2 add t2, t2, t1 skip_add: # Perform a right shift on t0 (num1), effectively dividing it by 2 srli t0, t0, 1 # Perform a left shift on t1 (num2), effectively multiplying it by 2 slli t1, t1, 1 # If t0 (num1) is not zero, repeat the loop bnez t0, loop # Store the final result in the memory location pointed by x1 sw t2, 0(x1)
  • This program implements an integer multiplication using the “Shift and Add Algorithm.” It multiplies two integers (num1 and num2) and stores the result in result.
  • Execution Steps
    1. Data Initialization
    • num1 is set to 13 (binary: 0b1101).
    • num2 is set to 7.
    • result is initialized to 0.
    1. Algorithm Logic
    • The program uses the “Shift and Add Algorithm” to perform multiplication by iterating through each bit of num1.
    • During each loop:
    • If the least significant bit (LSB) of num1 is 1, the value of num2 is added to result (t2).
    • num1 is right-shifted (srli), effectively dividing it by 2.
    • num2 is left-shifted (slli), effectively multiplying it by 2.
    • The loop continues until num1 becomes 0.
    • The final result is stored in the memory location pointed to by result.
  • After all bits are processed, the result (91) is stored in memory at the address of result.
  • Result:91

Quiz4-ProblemH

# Convert signed int to float # float i2f(s32 num); # INPUT: A0 = integer number # OUTPUT: A0 = float number (IEEE 754 single-precision) i2f: # Prepare result sign in A1 srli a1, a0, 31 # A1 = (A0 >> 31), extract sign bit slli a1, a1, 31 # A1 = A1 << 31, position sign bit at bit 31 # Get absolute value of the number beq a1, x0, positive # If sign bit is zero, number is positive sub a0, x0, a0 # A0 = -A0 (negate to get absolute value) positive: # Check if number is zero beq a0, x0, zero_result # If A0 == 0, result is zero # Store sign and absolute value add a2, x0, a1 # A2 = A1 (store sign) add a1, x0, a0 # A1 = A0 (store absolute value) # Count leading zeros in A0 (result stored in T0) # Initialize T0 (leading zero count) to 0 add t0, x0, x0 # T0 = 0 clz_loop: sll t1, a1, 1 # Shift A1 left by 1 bit addi t0, t0, 1 # T0 = T0 + 1 (increment count) mv a1, t1 # Update A1 with shifted value bgez a1, clz_loop # Loop while A1 >= 0 (MSB is 0) # Adjust for possible overflow in shift li t1, 32 # Load 32 into t1 blt t0, t1, clz_done # If t0 < 32, proceed to clz_done li a0, 32 # If t0 >= 32, leading zeros = 32 j normalize clz_done: # A0 now contains the number of leading zeros (stored in T0) add a0, x0, t0 # Move T0 to A0 normalize: # Normalize the number (shift left by leading zeros) sub t1, x0, a0 # t1 = -A0 sll a1, a1, t1 # A1 = A1 << (32 - leading zeros) # Prepare result exponent # Exponent = 158 - leading_zeros li t1, 158 sub a0, t1, a0 # A0 = 158 - leading zeros # Rounding: Add 0x80 to A1 (rounding bit at position 7) addi a1, a1, 128 # A1 = A1 + 0x80 # Check for overflow after rounding bgez a1, rounding_ok # If A1 >= 0, no overflow addi a0, a0, 1 # Exponent increment due to rounding overflow j adjust_mantissa rounding_ok: # Check if lowest 8 bits are zero (for rounding to even) andi a3, a1, 0xFF # A3 = A1 & 0xFF (lowest 8 bits) beq a3, x0, round_even # If lowest 8 bits are zero, round to even adjust_mantissa: # Remove leading hidden bit '1' slli a1, a1, 1 # A1 = A1 << 1 (remove hidden '1') j compose_result round_even: # Ensure even mantissa (clear least significant bit) srli a1, a1, 9 # A1 = A1 >> 9 slli a1, a1, 10 # A1 = A1 << 10 (remove hidden '1') j compose_result compose_result: # Align mantissa and exponent srli a1, a1, 9 # A1 = A1 >> 9 (align mantissa to bits 0..22) slli a0, a0, 23 # A0 = A0 << 23 (align exponent to bits 23..30), H10 or a0, a0, a1 # A0 = exponent | mantissa or a0, a0, a2 # A0 = A0 | sign bit (bit 31) jalr x0, ra, 0 # Return from function zero_result: # Return zero (signed zero with correct sign) or a0, x0, a2 # A0 = sign bit (A2) jalr x0, ra, 0 # Return from function
  • Result
    • 0x42280000

Quiz5-ProblemD

# Takes input N (a0), returns its log base 2 in a0 logint: addi sp, sp, -4 sw t0, 0(sp) add t0, a0, zero# k = N add a0, zero, zero# i = 0 logloop: beq t0, zero, logloop_end # Exit if k == 0 srai t0, t0, 1 # k >>= 1 addi a0, a0, 1 # i++ j logloop logloop_end: addi a0, a0, -1 # Return i - 1 lw t0, 0(sp) addi sp, sp, 4 jr ra # Takes inputs N(a0) and n(a1), reverses the number in binary reverse: addi sp, sp, -28 sw ra, 0(sp) sw s0, 4(sp) sw s1, 8(sp) sw s2, 12(sp) sw s3, 16(sp) sw s4, 20(sp) sw s5, 24(sp) call logint# Now a0 has log2(N) addi s0, zero, 1 # j = 1 add s1, zero, zero # p = 0 forloop_reverse: bgt s0, a0, forloop_reverse_end sub s2, a0, s0# s2 = a0 - s0 add is3, zero, 1 sll s3, s3, s2 and s3, a1, s3 beq s3, zero, elses3 # If not, skip ifs3: addi s4, s0, -1 # s4 = j - 1 addi s5, zero, 1 sll s5, s5, s4 or s1, s1, s5 elses3: addi s0, s0, 1 j forloop_reverse forloop_reverse_end: add a0, s1, zero # Return p lw ra, 0(sp) lw s0, 4(sp) lw s1, 8(sp) lw s2, 12(sp) lw s3, 16(sp) lw s4, 20(sp) lw s5, 24(sp) addi sp, sp, 28 jr ra

1. Corrected Execution of logint

  • The purpose of logint is to calculate the integer base-2 logarithm (log2) of N.
    • Initial value: a0 = 42
    • Execution steps:
      ​​​​​​​​Iteration 1: t0 = 21, a0 = 1
      ​​​​​​​​Iteration 2: t0 = 10, a0 = 2
      ​​​​​​​​Iteration 3: t0 = 5,  a0 = 3
      ​​​​​​​​Iteration 4: t0 = 2,  a0 = 4
      ​​​​​​​​Iteration 5: t0 = 1,  a0 = 5
      ​​​​​​​​Iteration 6: t0 = 0, exit loop.
      
  • After exiting the loop, addi a0, a0, -1 is executed. Final result:
    • a0 = 5 - 1 = 4
  • Thus, the result of logint is a0 = 4.

2. Corrected Execution of reverse

  • The purpose of reverse is to reverse the binary representation of a number.

    • Initial values:
    • a0 = 4 (result from log2(N))
    • a1 = 42 (binary representation: 101010)
  • Execution process:

    1. Confirmed a0 = 4, s0 = 1, s1 = 0.
    2. Begin reversing the binary number 101010:
Bit 1: s0 = 1, extract bit 5 (most significant bit), which is 1 → reverse to the least significant bit, s1 = 0b000001.
Bit 2: s0 = 2, extract bit 4, which is 0 → skip.
Bit 3: s0 = 3, extract bit 3, which is 1 → reverse to the 2nd least significant bit, s1 = 0b000101.
Bit 4: s0 = 4, extract bit 2, which is 0 → skip.
Bit 5: s0 = 5, extract bit 1, which is 1 → reverse to the 4th least significant bit, s1 = 0b010101 (decimal value: 21).
  • Final result:
    • a0 = 11.
  • Thus, the result of reverse is a0 = 11.
  • Conclusion Result
    • logint result: a0 = 4
    • reverse result: a0 = 11

References

https://github.com/riscv/riscv-isa-manual/releases
https://acsa.ustc.edu.cn/ics/download/riscv/RISC-V-Reader-Chinese-v2p1.pdf
https://blog.csdn.net/qq_38798111/article/details/129718824
https://github.com/riscv-collab/riscv-gnu-toolchain
https://github.com/riscv-software-src/riscv-tests
https://hackmd.io/@sysprog/arch2024-quiz2-sol
https://hackmd.io/@sysprog/arch2024-quiz4-sol
https://hackmd.io/@sysprog/arch2024-quiz5-sol