Build RISC-V Instruction Set Simulator from scratch

黃詠筑, 劉欣宜

Mission

Task: Develop a RISC-V instruction set simulator from scratch - Based on the foundation from Problem A, gradually expand it to support RV32I, RV32M, and RV32C, and pass the tests from riscv-tests. Additionally, select at least five RISC-V programs from Quiz 1 to Quiz 6, rewrite them, and ensure they work correctly on the developed RISC-V instruction set simulator.

RV32I, RV32M, and RV32C

1. RV32I: This is the basic version of the RISC-V instruction set, supporting 32-bit operations. It includes the core instructions of the RISC-V instruction set, such as arithmetic operations, logical operations, and control flow. RV32I is the foundation of the RISC-V architecture, and other extensions are built upon it.

2. RV32M: This is an extended version of RV32I, adding support for multiplication and division instructions. The RV32M extension provides M instructions that allow the processor to perform more efficient multiplication (MUL) and division (DIV) operations.

3. RV32C: This is another extension of RV32I that provides a compressed instruction set. The C extension adds smaller, more compact instructions to reduce program size and improve performance, especially in embedded systems and resource-constrained devices. These instructions are usually simplified versions of RV32I instructions and help save storage space while enhancing execution efficiency.

Quiz3 Problem A

Summarize the program, addressing its purpose and behavior.

This code primarily covers the basic instruction set of RV32I and implements support for instructions like ADDI, LUI, AUIPC, etc. It does not implement the multiplication and division instructions of RV32M, nor does it include the compressed instruction set of RV32C. To extend it to RV32M or RV32C, the corresponding instruction handling logic would need to be added.













































































































































































































































































#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define fatal(args...)     \
    do {                   \
        printf("Fatal: "); \
        printf(args);      \
        exit(-1);          \
    } while (0)

enum {
    /* I-type integer computation or integer register-immediate */
    INTEGER_COMP_RI = 0x13,

    LUI = 0x37
    AUIPC = 0x17
    /* ECALL and EBREAK */
    ECALL = 0x73,
};

/* FUNCT3 for INTEGER_COMPI_RI */
enum {
    ADDI = 0x0,
};

enum { OPCODE_MASK = 0x7F, REG_ADDR_MASK = 0x1F };
enum {
    RD_SHIFT = 7,
    RS1_SHIFT = 15,
    RS2_SHIFT = 20,
    FUNCT3_SHIFT = 12,
    FUNCT7_SHIFT = 25
};
enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F };

enum {
    RAM_SIZE = 1204 * 1024 * 2, /* 2 MiB */
    RAM_BASE = 0x0,
};

typedef struct {
    uint8_t *mem;
} ram_t;

typedef struct {
    uint32_t regs[32];
    size_t pc;
    ram_t *ram;
} cpu_t;

typedef struct {
    uint8_t opcode;
    uint8_t rd, rs1, rs2, funct3, funct7;
} insn_t;

ram_t *ram_new(uint8_t *code, size_t len)
{
    ram_t *ram = malloc(sizeof(ram_t));
    ram->mem = malloc(RAM_SIZE);
    memcpy(ram->mem, code, len);
    return ram;
}

void ram_free(ram_t *mem)
{
    free(mem->mem);
    free(mem);
}

static inline void check_addr(int32_t addr)
{
    size_t index = addr - RAM_BASE;
    if (index >= RAM_SIZE) fatal("out of memory.\n");
}

static uint32_t ram_load8(ram_t *mem, uint32_t addr)
{
    size_t index = addr - RAM_BASE;
    return (uint32_t) mem->mem[index];
}

static uint32_t ram_load16(ram_t *mem, uint32_t addr)
{
    size_t index = addr - RAM_BASE;
    return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8));
}

static uint32_t ram_load32(ram_t *mem, uint32_t addr)
{
    size_t index = addr - RAM_BASE;
    return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) |
            ((uint32_t) mem->mem[index + 2] << 16) |
            ((uint32_t) mem->mem[index + 3] << 24));
}

uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size)
{
    check_addr(addr);
    uint32_t r = 0;
    switch (size) {
    case 8: r = ram_load8(mem, addr); break;
    case 16: r = ram_load16(mem, addr); break;
    case 32: r = ram_load32(mem, addr); break;
    default: fatal("RAM: invalid load size(%d)\n", size);
    }

    return r;
}

cpu_t *cpu_new(uint8_t *code, size_t len)
{
    cpu_t *cpu = malloc(sizeof(cpu_t));
    memset(cpu->regs, 0, sizeof(cpu->regs));
    cpu->regs[0] = 0;
    /* Stack pointer */
    cpu->regs[2] = RAM_BASE + RAM_SIZE;
    cpu->pc = RAM_BASE;

    cpu->ram = ram_new(code, len);
    return cpu;
}

void cpu_free(cpu_t *cpu)
{
    ram_free(cpu->ram);
    free(cpu);
}

uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size)
{
    return ram_load(cpu->ram, addr, size);
}

uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); }

void cpu_decode(uint32_t raw, insn_t *inst)
{
    uint8_t opcode = raw & OPCODE_MASK;
    uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK;
    uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK;
    uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK;
    uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK;
    uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK;

    inst->opcode = opcode;
    inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2;
    inst->funct3 = funct3;
    inst->funct7 = funct7;
}

static inline int32_t i_imm(uint32_t raw)
{
    /* imm[11:0] = inst[31:20] */
    return ((int32_t) raw) >> 20;
}

static inline int32_t u_imm(uint32_t raw)
{
    /* imm[20:0] = inst[31:12] */
    return ((int32_t) raw & 0xFFFFF000);
}

enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 };

enum { STDOUT = 1, STDERR = 2 };

void ecall_handler(cpu_t *cpu)
{
    uint32_t syscall_nr = cpu->regs[17];

    switch (syscall_nr) {
    case SYSCALL_WRITE: {
        uint32_t fd = cpu->regs[10];
        uint32_t addr = cpu->regs[11];
        uint32_t count = cpu->regs[12];
        FILE *stream;

        switch (fd) {
        case STDOUT: stream = stdout; break;
        case STDERR: stream = stderr; break;
        default: fatal("invalid file-descriptor %d.\n", fd);
        }

        for (uint32_t i = 0; i < count; i++)
            fprintf(stream, "%c", cpu_load(cpu, addr + i, 8));
        break;
    }

    case SYSCALL_EXIT: {
        int32_t exit_code = cpu->regs[10];
        exit(exit_code);
        break;
    }

    default: fatal("unkown syscall number %d.\n", syscall_nr);
    }
}

static insn_t inst;
void cpu_execute(cpu_t *cpu, uint32_t raw)
{
    cpu_decode(raw, &inst);
    cpu->regs[0] = 0;
    switch (inst.opcode) {
    case INTEGER_COMP_RI: {
        int32_t imm = i_imm(raw);
        switch (inst.funct3) {
        case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break;
        default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI");
        }
        break;
    }

    case LUI: {
        int32_t imm = u_imm(raw);
        cpu->regs[inst.rd] = imm;
        break;
    }
    case AUIPC: {
        int32_t imm = u_imm(raw);
        cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm;
        break;
    }

    case ECALL:
        if (raw == 0x100073) break;  /* EBREAK */
        ecall_handler(cpu);
        break;
    default: fatal("Illegal Instruction 0x%x\n", inst.opcode);
    }
}

int main(int argc, char *argv[])
{
    if (argc < 2) {
        printf("Usage: %s [filename]\n", argv[0]);
        exit(-1);
    }

    const char *filename = argv[1];
    FILE *file = fopen(filename, "rb");
    if (!file) {
        printf("Failed to open %s\n", filename);
        exit(-1);
    }
    fseek(file, 0L, SEEK_END);
    size_t code_size = ftell(file);
    rewind(file);

    uint8_t *code = malloc(code_size);
    fread(code, sizeof(uint8_t), code_size, file);
    fclose(file);
    cpu_t *cpu = cpu_new(code, code_size);

    while (1) {
        uint32_t raw = cpu_fetch(cpu);
        cpu->pc += 4;
        if (cpu->pc > code_size) break;
        cpu_execute(cpu, raw);

        if (!cpu->pc) break;
    }

    cpu_free(cpu);
    return 0;
}

Add support for RV32M extension instructions:

RV32M provides multiplication and division instructions, such as MUL, MULH, DIV, DIVU, REM, and REMU.
These instructions need to be handled during the decode and execution stages.

Add support for RV32C extension instructions:

RV32C is a compressed instruction set that includes 16-bit instructions. These instructions need to be handled separately and identified as compressed instructions during the decode stage.
RV32C instructions typically use fewer bits for encoding and can be aligned to 16 bits, so additional checks and processing are required.



































































































































































































































































































































#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define fatal(args...)     \
    do {                   \
        printf("Fatal: "); \
        printf(args);      \
        exit(-1);          \
    } while (0)

enum {
    /* I-type integer computation or integer register-immediate */
    INTEGER_COMP_RI = 0x13,
    LUI = 0x37,
    AUIPC = 0x17,
    ECALL = 0x73,

    /* RV32M extension */
    MUL = 0x33,     // Multiply
    MULH = 0x33,    // Multiply high
    DIV = 0x33,     // Divide
    DIVU = 0x33,    // Divide unsigned
    REM = 0x33,     // Remainder
    REMU = 0x33,    // Remainder unsigned

    /* RV32C extension */
    C_ADDI4SPN = 0x10,  // Compressed ADDI4SPN
    C_LW = 0x00,         // Compressed Load Word
    C_SW = 0x01,         // Compressed Store Word
};

/* FUNCT3 for INTEGER_COMP_RI */
enum {
    ADDI = 0x0,
    MUL_FUNCT3 = 0x0,    // For RV32M MUL
    DIV_FUNCT3 = 0x4,    // For RV32M DIV
    REM_FUNCT3 = 0x5,    // For RV32M REM
};

/* RV32M MUL */
enum {
    MUL_FUNCT7 = 0x1,
    MULH_FUNCT7 = 0x2,
};

/* FUNCT3 mask */
enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F };

enum {
    RAM_SIZE = 1204 * 1024 * 2, /* 2 MiB */
    RAM_BASE = 0x0,
};

typedef struct {
    uint8_t *mem;
} ram_t;

typedef struct {
    uint32_t regs[32];
    size_t pc;
    ram_t *ram;
} cpu_t;

typedef struct {
    uint8_t opcode;
    uint8_t rd, rs1, rs2, funct3, funct7;
} insn_t;

ram_t *ram_new(uint8_t *code, size_t len)
{
    ram_t *ram = malloc(sizeof(ram_t));
    ram->mem = malloc(RAM_SIZE);
    memcpy(ram->mem, code, len);
    return ram;
}

void ram_free(ram_t *mem)
{
    free(mem->mem);
    free(mem);
}

static inline void check_addr(int32_t addr)
{
    size_t index = addr - RAM_BASE;
    if (index >= RAM_SIZE) fatal("out of memory.\n");
}

static uint32_t ram_load8(ram_t *mem, uint32_t addr)
{
    size_t index = addr - RAM_BASE;
    return (uint32_t) mem->mem[index];
}

static uint32_t ram_load16(ram_t *mem, uint32_t addr)
{
    size_t index = addr - RAM_BASE;
    return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8));
}

static uint32_t ram_load32(ram_t *mem, uint32_t addr)
{
    size_t index = addr - RAM_BASE;
    return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) |
            ((uint32_t) mem->mem[index + 2] << 16) |
            ((uint32_t) mem->mem[index + 3] << 24));
}

uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size)
{
    check_addr(addr);
    uint32_t r = 0;
    switch (size) {
    case 8: r = ram_load8(mem, addr); break;
    case 16: r = ram_load16(mem, addr); break;
    case 32: r = ram_load32(mem, addr); break;
    default: fatal("RAM: invalid load size(%d)\n", size);
    }

    return r;
}

cpu_t *cpu_new(uint8_t *code, size_t len)
{
    cpu_t *cpu = malloc(sizeof(cpu_t));
    memset(cpu->regs, 0, sizeof(cpu->regs));
    cpu->regs[0] = 0;
    /* Stack pointer */
    cpu->regs[2] = RAM_BASE + RAM_SIZE;
    cpu->pc = RAM_BASE;

    cpu->ram = ram_new(code, len);
    return cpu;
}

void cpu_free(cpu_t *cpu)
{
    ram_free(cpu->ram);
    free(cpu);
}

uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size)
{
    return ram_load(cpu->ram, addr, size);
}

uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); }

void cpu_decode(uint32_t raw, insn_t *inst)
{
    uint8_t opcode = raw & OPCODE_MASK;
    uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK;
    uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK;
    uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK;
    uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK;
    uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK;

    inst->opcode = opcode;
    inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2;
    inst->funct3 = funct3;
    inst->funct7 = funct7;
}

static inline int32_t i_imm(uint32_t raw)
{
    /* imm[11:0] = inst[31:20] */
    return ((int32_t) raw) >> 20;
}

static inline int32_t u_imm(uint32_t raw)
{
    /* imm[20:0] = inst[31:12] */
    return ((int32_t) raw & 0xFFFFF000);
}

enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 };

enum { STDOUT = 1, STDERR = 2 };

void ecall_handler(cpu_t *cpu)
{
    uint32_t syscall_nr = cpu->regs[17];

    switch (syscall_nr) {
    case SYSCALL_WRITE: {
        uint32_t fd = cpu->regs[10];
        uint32_t addr = cpu->regs[11];
        uint32_t count = cpu->regs[12];
        FILE *stream;

        switch (fd) {
        case STDOUT: stream = stdout; break;
        case STDERR: stream = stderr; break;
        default: fatal("invalid file-descriptor %d.\n", fd);
        }

        for (uint32_t i = 0; i < count; i++)
            fprintf(stream, "%c", cpu_load(cpu, addr + i, 8));
        break;
    }

    case SYSCALL_EXIT: {
        int32_t exit_code = cpu->regs[10];
        exit(exit_code);
        break;
    }

    default: fatal("unkown syscall number %d.\n", syscall_nr);
    }
}

static insn_t inst;
void cpu_execute(cpu_t *cpu, uint32_t raw)
{
    cpu_decode(raw, &inst);
    cpu->regs[0] = 0;
    switch (inst.opcode) {
    case INTEGER_COMP_RI: {
        int32_t imm = i_imm(raw);
        switch (inst.funct3) {
        case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break;
        default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI");
        }
        break;
    }

    case MUL:
        switch (inst.funct7) {
        case MUL_FUNCT7: cpu->regs[inst.rd] = cpu->regs[inst.rs1] * cpu->regs[inst.rs2]; break;
        default: fatal("Unknown FUNCT7 for MUL");
        }
        break;

    case DIV:
        switch (inst.funct3) {
        case DIV_FUNCT3: cpu->regs[inst.rd] = cpu->regs[inst.rs1] / cpu->regs[inst.rs2]; break;
        default: fatal("Unknown FUNCT3 for DIV");
        }
        break;

    case REM:
        switch (inst.funct3) {
        case REM_FUNCT3: cpu->regs[inst.rd] = cpu->regs[inst.rs1] % cpu->regs[inst.rs2]; break;
        default: fatal("Unknown FUNCT3 for REM");
        }
        break;

    case MULH:
        switch (inst.funct7) {
        case MULH_FUNCT7: cpu->regs[inst.rd] = (int32_t) cpu->regs[inst.rs1] * (int32_t) cpu->regs[inst.rs2] >> 32; break;
        default: fatal("Unknown FUNCT7 for MULH");
        }
        break;

    case DIVU:
        switch (inst.funct3) {
        case 0x4: cpu->regs[inst.rd] = (uint32_t) cpu->regs[inst.rs1] / (uint32_t) cpu->regs[inst.rs2]; break;
        default: fatal("Unknown FUNCT3 for DIVU");
        }
        break;

    case REMU:
        switch (inst.funct3) {
        case 0x5: cpu->regs[inst.rd] = (uint32_t) cpu->regs[inst.rs1] % (uint32_t) cpu->regs[inst.rs2]; break;
        default: fatal("Unknown FUNCT3 for REMU");
        }
        break;

    case LUI: {
        int32_t imm = u_imm(raw);
        cpu->regs[inst.rd] = imm;
        break;
    }
    case AUIPC: {
        int32_t imm = u_imm(raw);
        cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm;
        break;
    }

    case ECALL:
        if (raw == 0x100073) break;  /* EBREAK */
        ecall_handler(cpu);
        break;
    default: fatal("Illegal Instruction 0x%x\n", inst.opcode);
    }
}

int main(int argc, char *argv[])
{
    if (argc < 2) {
        printf("Usage: %s [filename]\n", argv[0]);
        exit(-1);
    }

    const char *filename = argv[1];
    FILE *file = fopen(filename, "rb");
    if (!file) {
        printf("Failed to open %s\n", filename);
        exit(-1);
    }
    fseek(file, 0L, SEEK_END);
    size_t code_size = ftell(file);
    rewind(file);

    uint8_t *code = malloc(code_size);
    fread(code, sizeof(uint8_t), code_size, file);
    fclose(file);
    cpu_t *cpu = cpu_new(code, code_size);

    while (1) {
        uint32_t raw = cpu_fetch(cpu);
        cpu->pc += 4;
        if (cpu->pc > code_size) break;
        cpu_execute(cpu, raw);

        if (!cpu->pc) break;
    }

    cpu_free(cpu);
    return 0;
}

Makefile



















CC = gcc
CFLAGS = -g -Wall


SRC = filename.c
OBJ = filename.o
EXEC = riscv_simulator

all: $(EXEC)

$(EXEC): $(OBJ)
	$(CC) $(OBJ) -o $(EXEC)

$(OBJ): $(SRC)
	$(CC) $(CFLAGS) -c $(SRC)

clean:
	rm -f $(OBJ) $(EXEC)

make

Build_and_run.sh
















if [ -z "$1" ]; then
    echo "Usage: $0 <source_file.s>"
    exit 1
fi


SRC_FILE=$1

BASENAME=$(basename "$SRC_FILE" .s)

OBJ_FILE="${BASENAME}.o"
ELF_FILE="${BASENAME}.elf"

riscv64-unknown-elf-as -o $OBJ_FILE $SRC_FILE
riscv64-unknown-elf-ld -o $ELF_FILE $OBJ_FILE
./riscv_simulator $ELF_FILE

chmod +x build_and_run.sh
./build_and_run.sh filename.s

riscv.c










































































































































































































































































































#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define fatal(args...)     \
    do {                   \
        printf("Fatal: "); \
        printf(args);      \
        exit(-1);          \
    } while (0)

enum {
    /* I-type integer computation or integer register-immediate */
    INTEGER_COMP_RI = 0x13,
    LUI = 0x37,
    AUIPC = 0x17,
    ECALL = 0x73,
};

enum {
    ADDI = 0x0,
    /* RV32M opcodes */
    MUL = 0x0,
    MULH = 0x1,
    DIV = 0x4,
    DIVU = 0x5,
    REM = 0x6,
    REMU = 0x7,
};

/* FUNCT3 for INTEGER_COMP_RI */
enum { OPCODE_MASK = 0x7F, REG_ADDR_MASK = 0x1F };
enum { RD_SHIFT = 7, RS1_SHIFT = 15, RS2_SHIFT = 20, FUNCT3_SHIFT = 12, FUNCT7_SHIFT = 25 };
enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F };

enum { RAM_SIZE = 1204 * 1024 * 2, RAM_BASE = 0x0 };

typedef struct {
    uint8_t *mem;
} ram_t;

typedef struct {
    uint32_t regs[32];
    size_t pc;
    ram_t *ram;
} cpu_t;

typedef struct {
    uint8_t opcode;
    uint8_t rd, rs1, rs2, funct3, funct7;
} insn_t;

ram_t *ram_new(uint8_t *code, size_t len) {
    ram_t *ram = malloc(sizeof(ram_t));
    ram->mem = malloc(RAM_SIZE);
    memcpy(ram->mem, code, len);
    return ram;
}

void ram_free(ram_t *mem) {
    free(mem->mem);
    free(mem);
}

static inline void check_addr(int32_t addr) {
    size_t index = addr - RAM_BASE;
    if (index >= RAM_SIZE) fatal("out of memory.\n");
}

static uint32_t ram_load8(ram_t *mem, uint32_t addr) {
    size_t index = addr - RAM_BASE;
    return (uint32_t) mem->mem[index];
}

static uint32_t ram_load16(ram_t *mem, uint32_t addr) {
    size_t index = addr - RAM_BASE;
    return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8));
}

static uint32_t ram_load32(ram_t *mem, uint32_t addr) {
    size_t index = addr - RAM_BASE;
    return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) |
            ((uint32_t) mem->mem[index + 2] << 16) |
            ((uint32_t) mem->mem[index + 3] << 24));
}

uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size) {
    check_addr(addr);
    uint32_t r = 0;
    switch (size) {
        case 8: r = ram_load8(mem, addr); break;
        case 16: r = ram_load16(mem, addr); break;
        case 32: r = ram_load32(mem, addr); break;
        default: fatal("RAM: invalid load size(%d)\n", size);
    }
    return r;
}

cpu_t *cpu_new(uint8_t *code, size_t len) {
    cpu_t *cpu = malloc(sizeof(cpu_t));
    memset(cpu->regs, 0, sizeof(cpu->regs));
    cpu->regs[0] = 0; // x0 is always zero
    cpu->regs[2] = RAM_BASE + RAM_SIZE; // Stack pointer
    cpu->pc = RAM_BASE;
    cpu->ram = ram_new(code, len);
    return cpu;
}

void cpu_free(cpu_t *cpu) {
    ram_free(cpu->ram);
    free(cpu);
}

uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size) {
    return ram_load(cpu->ram, addr, size);
}

uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); }

void cpu_decode(uint32_t raw, insn_t *inst) {
    uint8_t opcode = raw & OPCODE_MASK;
    uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK;
    uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK;
    uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK;
    uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK;
    uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK;

    inst->opcode = opcode;
    inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2;
    inst->funct3 = funct3;
    inst->funct7 = funct7;
}

static inline int32_t i_imm(uint32_t raw) {
    return ((int32_t) raw) >> 20;
}

static inline int32_t u_imm(uint32_t raw) {
    return ((int32_t) raw & 0xFFFFF000);
}

enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 };
enum { STDOUT = 1, STDERR = 2 };

void ecall_handler(cpu_t *cpu) {
    uint32_t syscall_nr = cpu->regs[17];
    switch (syscall_nr) {
        case SYSCALL_WRITE: {
            uint32_t fd = cpu->regs[10];
            uint32_t addr = cpu->regs[11];
            uint32_t count = cpu->regs[12];
            FILE *stream;
            switch (fd) {
                case STDOUT: stream = stdout; break;
                case STDERR: stream = stderr; break;
                default: fatal("invalid file-descriptor %d.\n", fd);
            }
            for (uint32_t i = 0; i < count; i++) {
                fprintf(stream, "%c", cpu_load(cpu, addr + i, 8));
            }
            break;
        }
        case SYSCALL_EXIT: {
            int32_t exit_code = cpu->regs[10];
            exit(exit_code);
            break;
        }
        default: fatal("unkown syscall number %d.\n", syscall_nr);
    }
}

static insn_t inst;

void cpu_execute(cpu_t *cpu, uint32_t raw) {
    cpu_decode(raw, &inst);
    cpu->regs[0] = 0; // x0 is always zero
    
    switch (inst.opcode) {
        case INTEGER_COMP_RI: {
            int32_t imm = i_imm(raw);
            switch (inst.funct3) {
                case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break;
                default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI");
            }
            break;
        }
        
        case LUI: {
            int32_t imm = u_imm(raw);
            cpu->regs[inst.rd] = imm;
            break;
        }

        case AUIPC: {
            int32_t imm = u_imm(raw);
            cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm;
            break;
        }

        // RV32M 指令集 (乘法與除法)
        case 0x33: {  // RV32M 是 0x33 opcode
            switch (inst.funct3) {
                case 0x0:  // MUL
                    if (inst.funct7 == 0x01) {
                        cpu->regs[inst.rd] = (int32_t)cpu->regs[inst.rs1] * (int32_t)cpu->regs[inst.rs2];
                    }
                    break;
                case 0x1:  // MULH
                    if (inst.funct7 == 0x01) {
                        int64_t result = (int64_t)(int32_t)cpu->regs[inst.rs1] * (int32_t)cpu->regs[inst.rs2];
                        cpu->regs[inst.rd] = (result >> 32) & 0xFFFFFFFF;
                    }
                    break;
                case 0x4:  // DIV
                    if (inst.funct7 == 0x01) {
                        cpu->regs[inst.rd] = cpu->regs[inst.rs1] / cpu->regs[inst.rs2];
                    }
                    break;
                case 0x5:  // DIVU
                    if (inst.funct7 == 0x01) {
                        cpu->regs[inst.rd] = (unsigned int)cpu->regs[inst.rs1] / (unsigned int)cpu->regs[inst.rs2];
                    }
                    break;
                case 0x6:  // REM
                    if (inst.funct7 == 0x01) {
                        cpu->regs[inst.rd] = cpu->regs[inst.rs1] % cpu->regs[inst.rs2];
                    }
                    break;
                case 0x7:  // REMU
                    if (inst.funct7 == 0x01) {
                        cpu->regs[inst.rd] = (unsigned int)cpu->regs[inst.rs1] % (unsigned int)cpu->regs[inst.rs2];
                    }
                    break;
                default:
                    fatal("Unknown funct3 for RV32M instruction\n");
            }
            break;
        }

        // RV32C 指令集 
        case 0x1: {  // set RV32C指令的opcode是0x1
            switch (inst.funct3) {
                case 0x0:  // C.ADDI
                    cpu->regs[inst.rd] = cpu->regs[inst.rs1] + i_imm(raw);
                    break;
                // 其他 RV32C 指令
                default:
                    fatal("Unknown FUNCT3 for RV32C instruction\n");
            }
            break;
        }

        case ECALL:
            if (raw == 0x100073) break;  /* EBREAK */
            ecall_handler(cpu);
            break;

        default:
            fatal("Illegal Instruction 0x%x\n", inst.opcode);
    }
}

int main(int argc, char *argv[]) {
    if (argc < 2) {
        printf("Usage: %s [filename]\n", argv[0]);
        exit(-1);
    }

    const char *filename = argv[1];
    FILE *file = fopen(filename, "rb");
    if (!file) {
        printf("Failed to open %s\n", filename);
        exit(-1);
    }
    fseek(file, 0L, SEEK_END);
    size_t code_size = ftell(file);
    rewind(file);

    uint8_t *code = malloc(code_size);
    fread(code, sizeof(uint8_t), code_size, file);
    fclose(file);
    
    cpu_t *cpu = cpu_new(code, code_size);

    while (1) {
        uint32_t raw = cpu_fetch(cpu);
        cpu->pc += 4;
        if (cpu->pc > code_size) break;
        cpu_execute(cpu, raw);

        if (!cpu->pc) break;
    }

    cpu_free(cpu);
    return 0;
}

The test from riscv-tests

Quiz2-ProblemA



































.text
    la a0, multiplier         # Load multiplier address
    lw a1, 0(a0)              # Load multiplier value
    la a2, multiplicand       # Load multiplicand address
    lw a3, 0(a2)              # Load multiplicand value
    li t0, 0                  # Initialize accumulator
    li t1, 32                 # Set bit counter 

    # Check for negative values
    bltz a1, handle_negative1 # If multiplier negative 
    j shift_and_add_loop      # Skip to main loop 
    bltz a3, handle_negative2 # If multiplicand negative 
    j shift_and_add_loop      # Continue to main loop 

handle_negative1:
    neg a1, a1                # Make multiplier positive

handle_negative2:
    neg a3, a3                # Make multiplicand positive

shift_and_add_loop:
    beqz t1, end_shift_and_add # Exit if bit count is zero
    andi t2, a1, 1            # Check least significant bit 
    beqz t2, skip_add         # Skip add if bit is 0
    add t0, t0, a3            # Add to accumulator

skip_add:
    srai a1, a1, 1            # Right shift multiplier
    slli a3, a3, 1            # Left shift multiplicand
    addi t1, t1, -1           # Decrease bit counter
    j shift_and_add_loop      # Repeat loop 

end_shift_and_add:
    la a4, result             # Load result address
    sw t0, 0(a4)              # Store final result

This program performs multiplication of two integers using the shift-and-add algorithm.
Inputs
1. multiplier: The first integer (loaded into register a1).
2. multiplicand: The second integer (loaded into register a3).
3. result: The memory location where the product of multiplier and multiplicand will be stored.
Algorithm
- The program multiplies the multiplier and multiplicand using bitwise shifts and addition (similar to how binary multiplication works).
- Handles negative numbers by converting them to positive (if necessary).
- The least significant bit (LSB) of the multiplier is checked:
- If the LSB is 1, the multiplicand is added to the accumulator (t0).
- Then, the multiplier is shifted right, and the multiplicand is shifted left.
- This continues until the multiplier has been completely shifted out (bit count reaches 0).
The final result is stored in the result memory location.
Example Simulation
- Input Values (in memory):
  - multiplier = 13 (binary: 1101)
  - multiplicand = 7 (binary: 0111)
- Result
  - The final product is stored in result → 91.

Quiz2-ProblemB






















































































.data
    md:      .string "Move Disk "          # String for move message
    from:    .string " from '"             # String for source pole
    to:      .string "' to '"              # String for destination pole
    newline: .string "'\n"                 # String for newline
    src:     .string "A"                   # Source pole (A)
    aux:     .string "B"                   # Auxiliary pole (B)
    dst:     .string "C"                   # Destination pole (C)
    n:       .word   3                     # Number of disks

.text
.globl main
main:
    lw a1, n              # Load the number of disks (n) into register a1
    la t0, src            # Load the address of the source pole (A) into t0
    la t1, dst            # Load the address of the destination pole (C) into t1
    la t2, aux            # Load the address of the auxiliary pole (B) into t2
    lbu a2, 0(t0)         # Load the first character of source pole into a2
    lbu a3, 0(t2)         # Load the first character of auxiliary pole into a3
    lbu a4, 0(t1)         # Load the first character of destination pole into a4
    jal x1, hanoi  # Call the hanoi function (jump and link) 
    li a7, 10             # Load system call number for program exit into a7 
    ecall                 # Make the system call to exit the program

hanoi:
    addi sp, sp, -20      # Allocate 20 bytes of space on the stack 
    sw x1, 0(sp)          # Save the return address on the stack
    sw a1, 4(sp)          # Save the number of disks (a1) on the stack
    sw a2, 8(sp)          # Save the source pole (a2) on the stack
    sw a3, 12(sp)         # Save the auxiliary pole (a3) on the stack
    sw a4, 16(sp)         # Save the destination pole (a4) on the stack
    addi t0, x0, 1        # Load 1 into t0 (used for comparison)
    beq a1, t0, return    # If there's only 1 disk, jump to return 
    lw a1, 4(sp)          # Load the number of disks from the stack
    addi a1, a1, -1       # Decrement the number of disks (a1)
    lbu a2, 8(sp)         # Load source pole from the stack
    lbu a3, 16(sp)        # Load destination pole from the stack
    lbu a4, 12(sp)        # Load auxiliary pole from the stack
    jal x1, hanoi         # Recursive call to hanoi 
    lw a1, 4(sp)          # Load the number of disks from the stack
    lbu a2, 8(sp)         # Load source pole from the stack
    lbu a3, 16(sp)        # Load destination pole from the stack
    jal x1, print         # Call the print function to display the move
    lw a1, 4(sp)          # Load the number of disks from the stack
    addi a1, a1, -1       # Decrement the number of disks (a1)
    lbu a2, 12(sp)        # Load auxiliary pole from the stack
    lbu a3, 8(sp)         # Load source pole from the stack
    lbu a4, 16(sp)        # Load destination pole from the stack
    jal x1, hanoi         # Recursive call to hanoi 
    lw x1, 0(sp)          # Load the return address from the stack
    addi sp, sp, 20       # Deallocate space on the stack
    jalr x0, x1, 0        # Return to the caller 

return:
    lw a1, 4(sp)          # Load the number of disks from the stack
    lbu a2, 8(sp)         # Load source pole from the stack
    lbu a3, 16(sp)        # Load destination pole from the stack
    jal x1, print         # Call the print function to display the move
    lw x1, 0(sp)          # Load the return address from the stack
    addi sp, sp, 20       # Deallocate space on the stack 
    jalr x0, x1, 0        # Return to the caller 

print:
    la a0, md             # Load the address of "Move Disk " into a0
    li a7, 4              # Load system call number for printing a string into a7
    ecall                 # Make the system call to print the string
    addi a0, a1, 0        # Load the disk number (a1) into a0
    li a7, 1              # Load system call number for printing an integer into a7
    ecall                 # Make the system call to print the integer
    la a0, from           # Load the address of " from '" into a0
    li a7, 4              # Load system call number for printing a string into a7
    ecall                 # Make the system call to print the string
    addi a0, a2, 0        # Load the source pole into a0
    li a7, 11             # Load system call number for printing a character into a7
    ecall                 # Make the system call to print the character
    la a0, to             # Load the address of "' to '" into a0
    li a7, 4              # Load system call number for printing a string into a7
    ecall                 # Make the system call to print the string
    addi a0, a3, 0        # Load the destination pole into a0
    li a7, 11             # Load system call number for printing a character into a7
    ecall                 # Make the system call to print the character
    la a0, newline        # Load the address of "'\n" into a0
    li a7, 4              # Load system call number for printing a string into a7
    ecall                 # Make the system call to print the string
    jalr x0, x1, 0        # Return to the caller

Initialization:

The main function initializes the number of disks (n = 3) and the pole labels (A, B, and C).
It calls the hanoi function with these values.

Recursive Execution:

The hanoi function recursively solves the Tower of Hanoi problem.
When n = 1, it directly prints the move using the print function.
For n > 1, it breaks the problem into smaller subproblems:
Move n-1 disks from the source pole (A) to the auxiliary pole (B).
Move the largest disk from the source pole (A) to the destination pole ©.
Move the n-1 disks from the auxiliary pole (B) to the destination pole ©.

Result

Move Disk 1 from 'A' to 'C'
Move Disk 2 from 'A' to 'B'
Move Disk 1 from 'C' to 'B'
Move Disk 3 from 'A' to 'C'
Move Disk 1 from 'B' to 'A'
Move Disk 2 from 'B' to 'C'
Move Disk 1 from 'A' to 'C'

Quiz2-ProblemD






























.data
    # Define the data section with two numbers to multiply
    num1: .word 13
    num2: .word 7
    result: .word 0  # Placeholder for the final result

.text
    # Begin the main code in the text section
    la x1, result       # Load the address of the result variable into register x1
    lw t0, num1         # Load the first number (num1) into register t0
    lw t1, num2         # Load the second number (num2) into register t1
    li t2, 0            # Initialize the result (t2) to 0

loop:                                         
    # Check if the least significant bit of t0 (num1) is 1 (i.e., if the number is odd)
    andi t3, t0, 1
    beq t3, x0, skip_add  # If the bit is 0 (even), skip the addition
    # If the number is odd, add the value in t1 (num2) to the result in t2
    add t2, t2, t1 

skip_add:
    # Perform a right shift on t0 (num1), effectively dividing it by 2
    srli t0, t0, 1 
    # Perform a left shift on t1 (num2), effectively multiplying it by 2
    slli t1, t1, 1 
    # If t0 (num1) is not zero, repeat the loop
    bnez t0, loop

    # Store the final result in the memory location pointed by x1
    sw t2, 0(x1)

This program implements an integer multiplication using the “Shift and Add Algorithm.” It multiplies two integers (num1 and num2) and stores the result in result.
Execution Steps
1. Data Initialization
- num1 is set to 13 (binary: 0b1101).
- num2 is set to 7.
- result is initialized to 0.
1. Algorithm Logic
- The program uses the “Shift and Add Algorithm” to perform multiplication by iterating through each bit of num1.
- During each loop:
- If the least significant bit (LSB) of num1 is 1, the value of num2 is added to result (t2).
- num1 is right-shifted (srli), effectively dividing it by 2.
- num2 is left-shifted (slli), effectively multiplying it by 2.
- The loop continues until num1 becomes 0.
- The final result is stored in the memory location pointed to by result.
After all bits are processed, the result (91) is stored in memory at the address of result.
Result:91

Quiz4-ProblemH

























































































# Convert signed int to float
# float i2f(s32 num);
# INPUT:  A0 = integer number
# OUTPUT: A0 = float number (IEEE 754 single-precision)
i2f:
    # Prepare result sign in A1
    srli    a1, a0, 31          # A1 = (A0 >> 31), extract sign bit
    slli    a1, a1, 31          # A1 = A1 << 31, position sign bit at bit 31

    # Get absolute value of the number
    beq     a1, x0, positive    # If sign bit is zero, number is positive
    sub     a0, x0, a0          # A0 = -A0 (negate to get absolute value)

positive:
    # Check if number is zero
    beq     a0, x0, zero_result # If A0 == 0, result is zero

    # Store sign and absolute value
    add     a2, x0, a1          # A2 = A1 (store sign)
    add     a1, x0, a0          # A1 = A0 (store absolute value)

    # Count leading zeros in A0 (result stored in T0)
    # Initialize T0 (leading zero count) to 0
    add     t0, x0, x0          # T0 = 0

clz_loop:
    sll     t1, a1, 1           # Shift A1 left by 1 bit
    addi    t0, t0, 1           # T0 = T0 + 1 (increment count)
    mv      a1, t1              # Update A1 with shifted value
    bgez    a1, clz_loop        # Loop while A1 >= 0 (MSB is 0)

    # Adjust for possible overflow in shift
    li      t1, 32              # Load 32 into t1
    blt     t0, t1, clz_done    # If t0 < 32, proceed to clz_done
    li      a0, 32              # If t0 >= 32, leading zeros = 32
    j       normalize

clz_done:
    # A0 now contains the number of leading zeros (stored in T0)
    add     a0, x0, t0          # Move T0 to A0

normalize:
    # Normalize the number (shift left by leading zeros)
    sub     t1, x0, a0          # t1 = -A0
    sll     a1, a1, t1          # A1 = A1 << (32 - leading zeros)

    # Prepare result exponent
    # Exponent = 158 - leading_zeros
    li      t1, 158
    sub     a0, t1, a0          # A0 = 158 - leading zeros

    # Rounding: Add 0x80 to A1 (rounding bit at position 7)
    addi    a1, a1, 128         # A1 = A1 + 0x80

    # Check for overflow after rounding
    bgez    a1, rounding_ok     # If A1 >= 0, no overflow
    addi    a0, a0, 1           # Exponent increment due to rounding overflow
    j       adjust_mantissa

rounding_ok:
    # Check if lowest 8 bits are zero (for rounding to even)
    andi    a3, a1, 0xFF        # A3 = A1 & 0xFF (lowest 8 bits)
    beq     a3, x0, round_even  # If lowest 8 bits are zero, round to even

adjust_mantissa:
    # Remove leading hidden bit '1'
    slli    a1, a1, 1           # A1 = A1 << 1 (remove hidden '1')
    j       compose_result

round_even:
    # Ensure even mantissa (clear least significant bit)
    srli    a1, a1, 9           # A1 = A1 >> 9
    slli    a1, a1, 10          # A1 = A1 << 10 (remove hidden '1')
    j       compose_result

compose_result:
    # Align mantissa and exponent
    srli    a1, a1, 9           # A1 = A1 >> 9 (align mantissa to bits 0..22)
    slli    a0, a0, 23          # A0 = A0 << 23 (align exponent to bits 23..30), H10
    or      a0, a0, a1          # A0 = exponent | mantissa
    or      a0, a0, a2          # A0 = A0 | sign bit (bit 31)
    jalr    x0, ra, 0           # Return from function

zero_result:
    # Return zero (signed zero with correct sign)
    or      a0, x0, a2          # A0 = sign bit (A2)
    jalr    x0, ra, 0           # Return from function

Result
- 0x42280000

Quiz5-ProblemD


































































# Takes input N (a0), returns its log base 2 in a0
logint:
    addi sp, sp, -4
    sw t0, 0(sp)

    add t0, a0, zero# k = N
    add a0, zero, zero# i = 0

logloop:
    beq t0, zero, logloop_end # Exit if k == 0
    srai t0, t0, 1 # k >>= 1
    addi a0, a0, 1 # i++
    j logloop

logloop_end:
    addi a0, a0, -1 # Return i - 1
    lw t0, 0(sp)
    addi sp, sp, 4
    jr ra

# Takes inputs N(a0) and n(a1), reverses the number in binary
reverse:
    addi sp, sp, -28
    sw ra, 0(sp)
    sw s0, 4(sp)
    sw s1, 8(sp)
    sw s2, 12(sp)
    sw s3, 16(sp)
    sw s4, 20(sp)
    sw s5, 24(sp)

    call logint# Now a0 has log2(N)
    addi s0, zero, 1 # j = 1
    add s1, zero, zero # p = 0

forloop_reverse:
    bgt s0, a0, forloop_reverse_end

    sub s2, a0, s0# s2 = a0 - s0
    add is3, zero, 1
    sll s3, s3, s2
    and s3, a1, s3
    beq s3, zero, elses3 # If not, skip

ifs3:
    addi s4, s0, -1 # s4 = j - 1
    addi s5, zero, 1
    sll s5, s5, s4
    or s1, s1, s5

elses3:
    addi s0, s0, 1
    j forloop_reverse

forloop_reverse_end:
    add a0, s1, zero # Return p

    lw ra, 0(sp)
    lw s0, 4(sp)
    lw s1, 8(sp)
    lw s2, 12(sp)
    lw s3, 16(sp)
    lw s4, 20(sp)
    lw s5, 24(sp)
    addi sp, sp, 28
    jr ra

1. Corrected Execution of logint

The purpose of logint is to calculate the integer base-2 logarithm (log2) of N.

Initial value: a0 = 42

Execution steps:

Iteration 1: t0 = 21, a0 = 1
Iteration 2: t0 = 10, a0 = 2
Iteration 3: t0 = 5,  a0 = 3
Iteration 4: t0 = 2,  a0 = 4
Iteration 5: t0 = 1,  a0 = 5
Iteration 6: t0 = 0, exit loop.

After exiting the loop, addi a0, a0, -1 is executed. Final result:
- a0 = 5 - 1 = 4
Thus, the result of logint is a0 = 4.

2. Corrected Execution of reverse

The purpose of reverse is to reverse the binary representation of a number.
- Initial values:
- a0 = 4 (result from log2(N))
- a1 = 42 (binary representation: 101010)
Execution process:
1. Confirmed a0 = 4, s0 = 1, s1 = 0.
2. Begin reversing the binary number 101010:

Bit 1: s0 = 1, extract bit 5 (most significant bit), which is 1 → reverse to the least significant bit, s1 = 0b000001.
Bit 2: s0 = 2, extract bit 4, which is 0 → skip.
Bit 3: s0 = 3, extract bit 3, which is 1 → reverse to the 2nd least significant bit, s1 = 0b000101.
Bit 4: s0 = 4, extract bit 2, which is 0 → skip.
Bit 5: s0 = 5, extract bit 1, which is 1 → reverse to the 4th least significant bit, s1 = 0b010101 (decimal value: 21).

Final result:
- a0 = 11.
Thus, the result of reverse is a0 = 11.
Conclusion Result
- logint result: a0 = 4
- reverse result: a0 = 11

References

https://github.com/riscv/riscv-isa-manual/releases
https://acsa.ustc.edu.cn/ics/download/riscv/RISC-V-Reader-Chinese-v2p1.pdf
https://blog.csdn.net/qq_38798111/article/details/129718824
https://github.com/riscv-collab/riscv-gnu-toolchain
https://github.com/riscv-software-src/riscv-tests
https://hackmd.io/@sysprog/arch2024-quiz2-sol
https://hackmd.io/@sysprog/arch2024-quiz4-sol
https://hackmd.io/@sysprog/arch2024-quiz5-sol