黃詠筑, 劉欣宜
Task: Develop a RISC-V instruction set simulator from scratch - Based on the foundation from Problem A, gradually expand it to support RV32I
, RV32M
, and RV32C
, and pass the tests from riscv-tests. Additionally, select at least five RISC-V programs from Quiz 1 to Quiz 6, rewrite them, and ensure they work correctly on the developed RISC-V instruction set simulator.
1. RV32I: This is the basic version of the RISC-V instruction set, supporting 32-bit operations. It includes the core instructions of the RISC-V instruction set, such as arithmetic operations, logical operations, and control flow. RV32I is the foundation of the RISC-V architecture, and other extensions are built upon it.
2. RV32M: This is an extended version of RV32I, adding support for multiplication and division instructions. The RV32M extension provides M instructions that allow the processor to perform more efficient multiplication (MUL) and division (DIV) operations.
3. RV32C: This is another extension of RV32I that provides a compressed instruction set. The C extension adds smaller, more compact instructions to reduce program size and improve performance, especially in embedded systems and resource-constrained devices. These instructions are usually simplified versions of RV32I instructions and help save storage space while enhancing execution efficiency.
Summarize the program, addressing its purpose and behavior.
This code primarily covers the basic instruction set of RV32I
and implements support for instructions like ADDI, LUI, AUIPC, etc. It does not implement the multiplication and division instructions of RV32M
, nor does it include the compressed instruction set of RV32C
. To extend it to RV32M
or RV32C
, the corresponding instruction handling logic would need to be added.
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define fatal(args...) \
do { \
printf("Fatal: "); \
printf(args); \
exit(-1); \
} while (0)
enum {
/* I-type integer computation or integer register-immediate */
INTEGER_COMP_RI = 0x13,
LUI = 0x37
AUIPC = 0x17
/* ECALL and EBREAK */
ECALL = 0x73,
};
/* FUNCT3 for INTEGER_COMPI_RI */
enum {
ADDI = 0x0,
};
enum { OPCODE_MASK = 0x7F, REG_ADDR_MASK = 0x1F };
enum {
RD_SHIFT = 7,
RS1_SHIFT = 15,
RS2_SHIFT = 20,
FUNCT3_SHIFT = 12,
FUNCT7_SHIFT = 25
};
enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F };
enum {
RAM_SIZE = 1204 * 1024 * 2, /* 2 MiB */
RAM_BASE = 0x0,
};
typedef struct {
uint8_t *mem;
} ram_t;
typedef struct {
uint32_t regs[32];
size_t pc;
ram_t *ram;
} cpu_t;
typedef struct {
uint8_t opcode;
uint8_t rd, rs1, rs2, funct3, funct7;
} insn_t;
ram_t *ram_new(uint8_t *code, size_t len)
{
ram_t *ram = malloc(sizeof(ram_t));
ram->mem = malloc(RAM_SIZE);
memcpy(ram->mem, code, len);
return ram;
}
void ram_free(ram_t *mem)
{
free(mem->mem);
free(mem);
}
static inline void check_addr(int32_t addr)
{
size_t index = addr - RAM_BASE;
if (index >= RAM_SIZE) fatal("out of memory.\n");
}
static uint32_t ram_load8(ram_t *mem, uint32_t addr)
{
size_t index = addr - RAM_BASE;
return (uint32_t) mem->mem[index];
}
static uint32_t ram_load16(ram_t *mem, uint32_t addr)
{
size_t index = addr - RAM_BASE;
return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8));
}
static uint32_t ram_load32(ram_t *mem, uint32_t addr)
{
size_t index = addr - RAM_BASE;
return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) |
((uint32_t) mem->mem[index + 2] << 16) |
((uint32_t) mem->mem[index + 3] << 24));
}
uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size)
{
check_addr(addr);
uint32_t r = 0;
switch (size) {
case 8: r = ram_load8(mem, addr); break;
case 16: r = ram_load16(mem, addr); break;
case 32: r = ram_load32(mem, addr); break;
default: fatal("RAM: invalid load size(%d)\n", size);
}
return r;
}
cpu_t *cpu_new(uint8_t *code, size_t len)
{
cpu_t *cpu = malloc(sizeof(cpu_t));
memset(cpu->regs, 0, sizeof(cpu->regs));
cpu->regs[0] = 0;
/* Stack pointer */
cpu->regs[2] = RAM_BASE + RAM_SIZE;
cpu->pc = RAM_BASE;
cpu->ram = ram_new(code, len);
return cpu;
}
void cpu_free(cpu_t *cpu)
{
ram_free(cpu->ram);
free(cpu);
}
uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size)
{
return ram_load(cpu->ram, addr, size);
}
uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); }
void cpu_decode(uint32_t raw, insn_t *inst)
{
uint8_t opcode = raw & OPCODE_MASK;
uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK;
uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK;
uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK;
uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK;
uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK;
inst->opcode = opcode;
inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2;
inst->funct3 = funct3;
inst->funct7 = funct7;
}
static inline int32_t i_imm(uint32_t raw)
{
/* imm[11:0] = inst[31:20] */
return ((int32_t) raw) >> 20;
}
static inline int32_t u_imm(uint32_t raw)
{
/* imm[20:0] = inst[31:12] */
return ((int32_t) raw & 0xFFFFF000);
}
enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 };
enum { STDOUT = 1, STDERR = 2 };
void ecall_handler(cpu_t *cpu)
{
uint32_t syscall_nr = cpu->regs[17];
switch (syscall_nr) {
case SYSCALL_WRITE: {
uint32_t fd = cpu->regs[10];
uint32_t addr = cpu->regs[11];
uint32_t count = cpu->regs[12];
FILE *stream;
switch (fd) {
case STDOUT: stream = stdout; break;
case STDERR: stream = stderr; break;
default: fatal("invalid file-descriptor %d.\n", fd);
}
for (uint32_t i = 0; i < count; i++)
fprintf(stream, "%c", cpu_load(cpu, addr + i, 8));
break;
}
case SYSCALL_EXIT: {
int32_t exit_code = cpu->regs[10];
exit(exit_code);
break;
}
default: fatal("unkown syscall number %d.\n", syscall_nr);
}
}
static insn_t inst;
void cpu_execute(cpu_t *cpu, uint32_t raw)
{
cpu_decode(raw, &inst);
cpu->regs[0] = 0;
switch (inst.opcode) {
case INTEGER_COMP_RI: {
int32_t imm = i_imm(raw);
switch (inst.funct3) {
case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break;
default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI");
}
break;
}
case LUI: {
int32_t imm = u_imm(raw);
cpu->regs[inst.rd] = imm;
break;
}
case AUIPC: {
int32_t imm = u_imm(raw);
cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm;
break;
}
case ECALL:
if (raw == 0x100073) break; /* EBREAK */
ecall_handler(cpu);
break;
default: fatal("Illegal Instruction 0x%x\n", inst.opcode);
}
}
int main(int argc, char *argv[])
{
if (argc < 2) {
printf("Usage: %s [filename]\n", argv[0]);
exit(-1);
}
const char *filename = argv[1];
FILE *file = fopen(filename, "rb");
if (!file) {
printf("Failed to open %s\n", filename);
exit(-1);
}
fseek(file, 0L, SEEK_END);
size_t code_size = ftell(file);
rewind(file);
uint8_t *code = malloc(code_size);
fread(code, sizeof(uint8_t), code_size, file);
fclose(file);
cpu_t *cpu = cpu_new(code, code_size);
while (1) {
uint32_t raw = cpu_fetch(cpu);
cpu->pc += 4;
if (cpu->pc > code_size) break;
cpu_execute(cpu, raw);
if (!cpu->pc) break;
}
cpu_free(cpu);
return 0;
}
RV32M
extension instructions:RV32M
provides multiplication and division instructions, such as MUL
, MULH
, DIV
, DIVU
, REM
, and REMU
.RV32C
extension instructions:RV32C
is a compressed instruction set that includes 16-bit instructions. These instructions need to be handled separately and identified as compressed instructions during the decode stage.RV32C
instructions typically use fewer bits for encoding and can be aligned to 16 bits, so additional checks and processing are required.
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define fatal(args...) \
do { \
printf("Fatal: "); \
printf(args); \
exit(-1); \
} while (0)
enum {
/* I-type integer computation or integer register-immediate */
INTEGER_COMP_RI = 0x13,
LUI = 0x37,
AUIPC = 0x17,
ECALL = 0x73,
/* RV32M extension */
MUL = 0x33, // Multiply
MULH = 0x33, // Multiply high
DIV = 0x33, // Divide
DIVU = 0x33, // Divide unsigned
REM = 0x33, // Remainder
REMU = 0x33, // Remainder unsigned
/* RV32C extension */
C_ADDI4SPN = 0x10, // Compressed ADDI4SPN
C_LW = 0x00, // Compressed Load Word
C_SW = 0x01, // Compressed Store Word
};
/* FUNCT3 for INTEGER_COMP_RI */
enum {
ADDI = 0x0,
MUL_FUNCT3 = 0x0, // For RV32M MUL
DIV_FUNCT3 = 0x4, // For RV32M DIV
REM_FUNCT3 = 0x5, // For RV32M REM
};
/* RV32M MUL */
enum {
MUL_FUNCT7 = 0x1,
MULH_FUNCT7 = 0x2,
};
/* FUNCT3 mask */
enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F };
enum {
RAM_SIZE = 1204 * 1024 * 2, /* 2 MiB */
RAM_BASE = 0x0,
};
typedef struct {
uint8_t *mem;
} ram_t;
typedef struct {
uint32_t regs[32];
size_t pc;
ram_t *ram;
} cpu_t;
typedef struct {
uint8_t opcode;
uint8_t rd, rs1, rs2, funct3, funct7;
} insn_t;
ram_t *ram_new(uint8_t *code, size_t len)
{
ram_t *ram = malloc(sizeof(ram_t));
ram->mem = malloc(RAM_SIZE);
memcpy(ram->mem, code, len);
return ram;
}
void ram_free(ram_t *mem)
{
free(mem->mem);
free(mem);
}
static inline void check_addr(int32_t addr)
{
size_t index = addr - RAM_BASE;
if (index >= RAM_SIZE) fatal("out of memory.\n");
}
static uint32_t ram_load8(ram_t *mem, uint32_t addr)
{
size_t index = addr - RAM_BASE;
return (uint32_t) mem->mem[index];
}
static uint32_t ram_load16(ram_t *mem, uint32_t addr)
{
size_t index = addr - RAM_BASE;
return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8));
}
static uint32_t ram_load32(ram_t *mem, uint32_t addr)
{
size_t index = addr - RAM_BASE;
return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) |
((uint32_t) mem->mem[index + 2] << 16) |
((uint32_t) mem->mem[index + 3] << 24));
}
uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size)
{
check_addr(addr);
uint32_t r = 0;
switch (size) {
case 8: r = ram_load8(mem, addr); break;
case 16: r = ram_load16(mem, addr); break;
case 32: r = ram_load32(mem, addr); break;
default: fatal("RAM: invalid load size(%d)\n", size);
}
return r;
}
cpu_t *cpu_new(uint8_t *code, size_t len)
{
cpu_t *cpu = malloc(sizeof(cpu_t));
memset(cpu->regs, 0, sizeof(cpu->regs));
cpu->regs[0] = 0;
/* Stack pointer */
cpu->regs[2] = RAM_BASE + RAM_SIZE;
cpu->pc = RAM_BASE;
cpu->ram = ram_new(code, len);
return cpu;
}
void cpu_free(cpu_t *cpu)
{
ram_free(cpu->ram);
free(cpu);
}
uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size)
{
return ram_load(cpu->ram, addr, size);
}
uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); }
void cpu_decode(uint32_t raw, insn_t *inst)
{
uint8_t opcode = raw & OPCODE_MASK;
uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK;
uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK;
uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK;
uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK;
uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK;
inst->opcode = opcode;
inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2;
inst->funct3 = funct3;
inst->funct7 = funct7;
}
static inline int32_t i_imm(uint32_t raw)
{
/* imm[11:0] = inst[31:20] */
return ((int32_t) raw) >> 20;
}
static inline int32_t u_imm(uint32_t raw)
{
/* imm[20:0] = inst[31:12] */
return ((int32_t) raw & 0xFFFFF000);
}
enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 };
enum { STDOUT = 1, STDERR = 2 };
void ecall_handler(cpu_t *cpu)
{
uint32_t syscall_nr = cpu->regs[17];
switch (syscall_nr) {
case SYSCALL_WRITE: {
uint32_t fd = cpu->regs[10];
uint32_t addr = cpu->regs[11];
uint32_t count = cpu->regs[12];
FILE *stream;
switch (fd) {
case STDOUT: stream = stdout; break;
case STDERR: stream = stderr; break;
default: fatal("invalid file-descriptor %d.\n", fd);
}
for (uint32_t i = 0; i < count; i++)
fprintf(stream, "%c", cpu_load(cpu, addr + i, 8));
break;
}
case SYSCALL_EXIT: {
int32_t exit_code = cpu->regs[10];
exit(exit_code);
break;
}
default: fatal("unkown syscall number %d.\n", syscall_nr);
}
}
static insn_t inst;
void cpu_execute(cpu_t *cpu, uint32_t raw)
{
cpu_decode(raw, &inst);
cpu->regs[0] = 0;
switch (inst.opcode) {
case INTEGER_COMP_RI: {
int32_t imm = i_imm(raw);
switch (inst.funct3) {
case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break;
default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI");
}
break;
}
case MUL:
switch (inst.funct7) {
case MUL_FUNCT7: cpu->regs[inst.rd] = cpu->regs[inst.rs1] * cpu->regs[inst.rs2]; break;
default: fatal("Unknown FUNCT7 for MUL");
}
break;
case DIV:
switch (inst.funct3) {
case DIV_FUNCT3: cpu->regs[inst.rd] = cpu->regs[inst.rs1] / cpu->regs[inst.rs2]; break;
default: fatal("Unknown FUNCT3 for DIV");
}
break;
case REM:
switch (inst.funct3) {
case REM_FUNCT3: cpu->regs[inst.rd] = cpu->regs[inst.rs1] % cpu->regs[inst.rs2]; break;
default: fatal("Unknown FUNCT3 for REM");
}
break;
case MULH:
switch (inst.funct7) {
case MULH_FUNCT7: cpu->regs[inst.rd] = (int32_t) cpu->regs[inst.rs1] * (int32_t) cpu->regs[inst.rs2] >> 32; break;
default: fatal("Unknown FUNCT7 for MULH");
}
break;
case DIVU:
switch (inst.funct3) {
case 0x4: cpu->regs[inst.rd] = (uint32_t) cpu->regs[inst.rs1] / (uint32_t) cpu->regs[inst.rs2]; break;
default: fatal("Unknown FUNCT3 for DIVU");
}
break;
case REMU:
switch (inst.funct3) {
case 0x5: cpu->regs[inst.rd] = (uint32_t) cpu->regs[inst.rs1] % (uint32_t) cpu->regs[inst.rs2]; break;
default: fatal("Unknown FUNCT3 for REMU");
}
break;
case LUI: {
int32_t imm = u_imm(raw);
cpu->regs[inst.rd] = imm;
break;
}
case AUIPC: {
int32_t imm = u_imm(raw);
cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm;
break;
}
case ECALL:
if (raw == 0x100073) break; /* EBREAK */
ecall_handler(cpu);
break;
default: fatal("Illegal Instruction 0x%x\n", inst.opcode);
}
}
int main(int argc, char *argv[])
{
if (argc < 2) {
printf("Usage: %s [filename]\n", argv[0]);
exit(-1);
}
const char *filename = argv[1];
FILE *file = fopen(filename, "rb");
if (!file) {
printf("Failed to open %s\n", filename);
exit(-1);
}
fseek(file, 0L, SEEK_END);
size_t code_size = ftell(file);
rewind(file);
uint8_t *code = malloc(code_size);
fread(code, sizeof(uint8_t), code_size, file);
fclose(file);
cpu_t *cpu = cpu_new(code, code_size);
while (1) {
uint32_t raw = cpu_fetch(cpu);
cpu->pc += 4;
if (cpu->pc > code_size) break;
cpu_execute(cpu, raw);
if (!cpu->pc) break;
}
cpu_free(cpu);
return 0;
}
CC = gcc
CFLAGS = -g -Wall
SRC = filename.c
OBJ = filename.o
EXEC = riscv_simulator
all: $(EXEC)
$(EXEC): $(OBJ)
$(CC) $(OBJ) -o $(EXEC)
$(OBJ): $(SRC)
$(CC) $(CFLAGS) -c $(SRC)
clean:
rm -f $(OBJ) $(EXEC)
make
if [ -z "$1" ]; then
echo "Usage: $0 <source_file.s>"
exit 1
fi
SRC_FILE=$1
BASENAME=$(basename "$SRC_FILE" .s)
OBJ_FILE="${BASENAME}.o"
ELF_FILE="${BASENAME}.elf"
riscv64-unknown-elf-as -o $OBJ_FILE $SRC_FILE
riscv64-unknown-elf-ld -o $ELF_FILE $OBJ_FILE
./riscv_simulator $ELF_FILE
chmod +x build_and_run.sh
./build_and_run.sh filename.s
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define fatal(args...) \
do { \
printf("Fatal: "); \
printf(args); \
exit(-1); \
} while (0)
enum {
/* I-type integer computation or integer register-immediate */
INTEGER_COMP_RI = 0x13,
LUI = 0x37,
AUIPC = 0x17,
ECALL = 0x73,
};
enum {
ADDI = 0x0,
/* RV32M opcodes */
MUL = 0x0,
MULH = 0x1,
DIV = 0x4,
DIVU = 0x5,
REM = 0x6,
REMU = 0x7,
};
/* FUNCT3 for INTEGER_COMP_RI */
enum { OPCODE_MASK = 0x7F, REG_ADDR_MASK = 0x1F };
enum { RD_SHIFT = 7, RS1_SHIFT = 15, RS2_SHIFT = 20, FUNCT3_SHIFT = 12, FUNCT7_SHIFT = 25 };
enum { FUNCT3_MASK = 0x7, FUNCT7_MASK = 0x7F };
enum { RAM_SIZE = 1204 * 1024 * 2, RAM_BASE = 0x0 };
typedef struct {
uint8_t *mem;
} ram_t;
typedef struct {
uint32_t regs[32];
size_t pc;
ram_t *ram;
} cpu_t;
typedef struct {
uint8_t opcode;
uint8_t rd, rs1, rs2, funct3, funct7;
} insn_t;
ram_t *ram_new(uint8_t *code, size_t len) {
ram_t *ram = malloc(sizeof(ram_t));
ram->mem = malloc(RAM_SIZE);
memcpy(ram->mem, code, len);
return ram;
}
void ram_free(ram_t *mem) {
free(mem->mem);
free(mem);
}
static inline void check_addr(int32_t addr) {
size_t index = addr - RAM_BASE;
if (index >= RAM_SIZE) fatal("out of memory.\n");
}
static uint32_t ram_load8(ram_t *mem, uint32_t addr) {
size_t index = addr - RAM_BASE;
return (uint32_t) mem->mem[index];
}
static uint32_t ram_load16(ram_t *mem, uint32_t addr) {
size_t index = addr - RAM_BASE;
return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8));
}
static uint32_t ram_load32(ram_t *mem, uint32_t addr) {
size_t index = addr - RAM_BASE;
return ((uint32_t) mem->mem[index] | ((uint32_t) mem->mem[index + 1] << 8) |
((uint32_t) mem->mem[index + 2] << 16) |
((uint32_t) mem->mem[index + 3] << 24));
}
uint32_t ram_load(ram_t *mem, uint32_t addr, uint8_t size) {
check_addr(addr);
uint32_t r = 0;
switch (size) {
case 8: r = ram_load8(mem, addr); break;
case 16: r = ram_load16(mem, addr); break;
case 32: r = ram_load32(mem, addr); break;
default: fatal("RAM: invalid load size(%d)\n", size);
}
return r;
}
cpu_t *cpu_new(uint8_t *code, size_t len) {
cpu_t *cpu = malloc(sizeof(cpu_t));
memset(cpu->regs, 0, sizeof(cpu->regs));
cpu->regs[0] = 0; // x0 is always zero
cpu->regs[2] = RAM_BASE + RAM_SIZE; // Stack pointer
cpu->pc = RAM_BASE;
cpu->ram = ram_new(code, len);
return cpu;
}
void cpu_free(cpu_t *cpu) {
ram_free(cpu->ram);
free(cpu);
}
uint32_t cpu_load(cpu_t *cpu, uint32_t addr, uint8_t size) {
return ram_load(cpu->ram, addr, size);
}
uint32_t cpu_fetch(cpu_t *cpu) { return cpu_load(cpu, cpu->pc, 32); }
void cpu_decode(uint32_t raw, insn_t *inst) {
uint8_t opcode = raw & OPCODE_MASK;
uint8_t rd = (raw >> RD_SHIFT) & REG_ADDR_MASK;
uint8_t rs1 = (raw >> RS1_SHIFT) & REG_ADDR_MASK;
uint8_t rs2 = (raw >> RS2_SHIFT) & REG_ADDR_MASK;
uint8_t funct3 = (raw >> FUNCT3_SHIFT) & FUNCT3_MASK;
uint8_t funct7 = (raw >> FUNCT7_SHIFT) & FUNCT7_MASK;
inst->opcode = opcode;
inst->rd = rd, inst->rs1 = rs1, inst->rs2 = rs2;
inst->funct3 = funct3;
inst->funct7 = funct7;
}
static inline int32_t i_imm(uint32_t raw) {
return ((int32_t) raw) >> 20;
}
static inline int32_t u_imm(uint32_t raw) {
return ((int32_t) raw & 0xFFFFF000);
}
enum { SYSCALL_WRITE = 64, SYSCALL_EXIT = 93 };
enum { STDOUT = 1, STDERR = 2 };
void ecall_handler(cpu_t *cpu) {
uint32_t syscall_nr = cpu->regs[17];
switch (syscall_nr) {
case SYSCALL_WRITE: {
uint32_t fd = cpu->regs[10];
uint32_t addr = cpu->regs[11];
uint32_t count = cpu->regs[12];
FILE *stream;
switch (fd) {
case STDOUT: stream = stdout; break;
case STDERR: stream = stderr; break;
default: fatal("invalid file-descriptor %d.\n", fd);
}
for (uint32_t i = 0; i < count; i++) {
fprintf(stream, "%c", cpu_load(cpu, addr + i, 8));
}
break;
}
case SYSCALL_EXIT: {
int32_t exit_code = cpu->regs[10];
exit(exit_code);
break;
}
default: fatal("unkown syscall number %d.\n", syscall_nr);
}
}
static insn_t inst;
void cpu_execute(cpu_t *cpu, uint32_t raw) {
cpu_decode(raw, &inst);
cpu->regs[0] = 0; // x0 is always zero
switch (inst.opcode) {
case INTEGER_COMP_RI: {
int32_t imm = i_imm(raw);
switch (inst.funct3) {
case ADDI: cpu->regs[inst.rd] = cpu->regs[inst.rs1] + imm; break;
default: fatal("Unknown FUNCT3 for INTEGER_COMP_RI");
}
break;
}
case LUI: {
int32_t imm = u_imm(raw);
cpu->regs[inst.rd] = imm;
break;
}
case AUIPC: {
int32_t imm = u_imm(raw);
cpu->regs[inst.rd] = (cpu->pc + (-4)) + imm;
break;
}
// RV32M 指令集 (乘法與除法)
case 0x33: { // RV32M 是 0x33 opcode
switch (inst.funct3) {
case 0x0: // MUL
if (inst.funct7 == 0x01) {
cpu->regs[inst.rd] = (int32_t)cpu->regs[inst.rs1] * (int32_t)cpu->regs[inst.rs2];
}
break;
case 0x1: // MULH
if (inst.funct7 == 0x01) {
int64_t result = (int64_t)(int32_t)cpu->regs[inst.rs1] * (int32_t)cpu->regs[inst.rs2];
cpu->regs[inst.rd] = (result >> 32) & 0xFFFFFFFF;
}
break;
case 0x4: // DIV
if (inst.funct7 == 0x01) {
cpu->regs[inst.rd] = cpu->regs[inst.rs1] / cpu->regs[inst.rs2];
}
break;
case 0x5: // DIVU
if (inst.funct7 == 0x01) {
cpu->regs[inst.rd] = (unsigned int)cpu->regs[inst.rs1] / (unsigned int)cpu->regs[inst.rs2];
}
break;
case 0x6: // REM
if (inst.funct7 == 0x01) {
cpu->regs[inst.rd] = cpu->regs[inst.rs1] % cpu->regs[inst.rs2];
}
break;
case 0x7: // REMU
if (inst.funct7 == 0x01) {
cpu->regs[inst.rd] = (unsigned int)cpu->regs[inst.rs1] % (unsigned int)cpu->regs[inst.rs2];
}
break;
default:
fatal("Unknown funct3 for RV32M instruction\n");
}
break;
}
// RV32C 指令集
case 0x1: { // set RV32C指令的opcode是0x1
switch (inst.funct3) {
case 0x0: // C.ADDI
cpu->regs[inst.rd] = cpu->regs[inst.rs1] + i_imm(raw);
break;
// 其他 RV32C 指令
default:
fatal("Unknown FUNCT3 for RV32C instruction\n");
}
break;
}
case ECALL:
if (raw == 0x100073) break; /* EBREAK */
ecall_handler(cpu);
break;
default:
fatal("Illegal Instruction 0x%x\n", inst.opcode);
}
}
int main(int argc, char *argv[]) {
if (argc < 2) {
printf("Usage: %s [filename]\n", argv[0]);
exit(-1);
}
const char *filename = argv[1];
FILE *file = fopen(filename, "rb");
if (!file) {
printf("Failed to open %s\n", filename);
exit(-1);
}
fseek(file, 0L, SEEK_END);
size_t code_size = ftell(file);
rewind(file);
uint8_t *code = malloc(code_size);
fread(code, sizeof(uint8_t), code_size, file);
fclose(file);
cpu_t *cpu = cpu_new(code, code_size);
while (1) {
uint32_t raw = cpu_fetch(cpu);
cpu->pc += 4;
if (cpu->pc > code_size) break;
cpu_execute(cpu, raw);
if (!cpu->pc) break;
}
cpu_free(cpu);
return 0;
}
.text
la a0, multiplier # Load multiplier address
lw a1, 0(a0) # Load multiplier value
la a2, multiplicand # Load multiplicand address
lw a3, 0(a2) # Load multiplicand value
li t0, 0 # Initialize accumulator
li t1, 32 # Set bit counter
# Check for negative values
bltz a1, handle_negative1 # If multiplier negative
j shift_and_add_loop # Skip to main loop
bltz a3, handle_negative2 # If multiplicand negative
j shift_and_add_loop # Continue to main loop
handle_negative1:
neg a1, a1 # Make multiplier positive
handle_negative2:
neg a3, a3 # Make multiplicand positive
shift_and_add_loop:
beqz t1, end_shift_and_add # Exit if bit count is zero
andi t2, a1, 1 # Check least significant bit
beqz t2, skip_add # Skip add if bit is 0
add t0, t0, a3 # Add to accumulator
skip_add:
srai a1, a1, 1 # Right shift multiplier
slli a3, a3, 1 # Left shift multiplicand
addi t1, t1, -1 # Decrease bit counter
j shift_and_add_loop # Repeat loop
end_shift_and_add:
la a4, result # Load result address
sw t0, 0(a4) # Store final result
This program performs multiplication of two integers using the shift-and-add algorithm.
Inputs
Algorithm
The final result is stored in the result memory location.
Example Simulation
.data
md: .string "Move Disk " # String for move message
from: .string " from '" # String for source pole
to: .string "' to '" # String for destination pole
newline: .string "'\n" # String for newline
src: .string "A" # Source pole (A)
aux: .string "B" # Auxiliary pole (B)
dst: .string "C" # Destination pole (C)
n: .word 3 # Number of disks
.text
.globl main
main:
lw a1, n # Load the number of disks (n) into register a1
la t0, src # Load the address of the source pole (A) into t0
la t1, dst # Load the address of the destination pole (C) into t1
la t2, aux # Load the address of the auxiliary pole (B) into t2
lbu a2, 0(t0) # Load the first character of source pole into a2
lbu a3, 0(t2) # Load the first character of auxiliary pole into a3
lbu a4, 0(t1) # Load the first character of destination pole into a4
jal x1, hanoi # Call the hanoi function (jump and link)
li a7, 10 # Load system call number for program exit into a7
ecall # Make the system call to exit the program
hanoi:
addi sp, sp, -20 # Allocate 20 bytes of space on the stack
sw x1, 0(sp) # Save the return address on the stack
sw a1, 4(sp) # Save the number of disks (a1) on the stack
sw a2, 8(sp) # Save the source pole (a2) on the stack
sw a3, 12(sp) # Save the auxiliary pole (a3) on the stack
sw a4, 16(sp) # Save the destination pole (a4) on the stack
addi t0, x0, 1 # Load 1 into t0 (used for comparison)
beq a1, t0, return # If there's only 1 disk, jump to return
lw a1, 4(sp) # Load the number of disks from the stack
addi a1, a1, -1 # Decrement the number of disks (a1)
lbu a2, 8(sp) # Load source pole from the stack
lbu a3, 16(sp) # Load destination pole from the stack
lbu a4, 12(sp) # Load auxiliary pole from the stack
jal x1, hanoi # Recursive call to hanoi
lw a1, 4(sp) # Load the number of disks from the stack
lbu a2, 8(sp) # Load source pole from the stack
lbu a3, 16(sp) # Load destination pole from the stack
jal x1, print # Call the print function to display the move
lw a1, 4(sp) # Load the number of disks from the stack
addi a1, a1, -1 # Decrement the number of disks (a1)
lbu a2, 12(sp) # Load auxiliary pole from the stack
lbu a3, 8(sp) # Load source pole from the stack
lbu a4, 16(sp) # Load destination pole from the stack
jal x1, hanoi # Recursive call to hanoi
lw x1, 0(sp) # Load the return address from the stack
addi sp, sp, 20 # Deallocate space on the stack
jalr x0, x1, 0 # Return to the caller
return:
lw a1, 4(sp) # Load the number of disks from the stack
lbu a2, 8(sp) # Load source pole from the stack
lbu a3, 16(sp) # Load destination pole from the stack
jal x1, print # Call the print function to display the move
lw x1, 0(sp) # Load the return address from the stack
addi sp, sp, 20 # Deallocate space on the stack
jalr x0, x1, 0 # Return to the caller
print:
la a0, md # Load the address of "Move Disk " into a0
li a7, 4 # Load system call number for printing a string into a7
ecall # Make the system call to print the string
addi a0, a1, 0 # Load the disk number (a1) into a0
li a7, 1 # Load system call number for printing an integer into a7
ecall # Make the system call to print the integer
la a0, from # Load the address of " from '" into a0
li a7, 4 # Load system call number for printing a string into a7
ecall # Make the system call to print the string
addi a0, a2, 0 # Load the source pole into a0
li a7, 11 # Load system call number for printing a character into a7
ecall # Make the system call to print the character
la a0, to # Load the address of "' to '" into a0
li a7, 4 # Load system call number for printing a string into a7
ecall # Make the system call to print the string
addi a0, a3, 0 # Load the destination pole into a0
li a7, 11 # Load system call number for printing a character into a7
ecall # Make the system call to print the character
la a0, newline # Load the address of "'\n" into a0
li a7, 4 # Load system call number for printing a string into a7
ecall # Make the system call to print the string
jalr x0, x1, 0 # Return to the caller
Move Disk 1 from 'A' to 'C'
Move Disk 2 from 'A' to 'B'
Move Disk 1 from 'C' to 'B'
Move Disk 3 from 'A' to 'C'
Move Disk 1 from 'B' to 'A'
Move Disk 2 from 'B' to 'C'
Move Disk 1 from 'A' to 'C'
.data
# Define the data section with two numbers to multiply
num1: .word 13
num2: .word 7
result: .word 0 # Placeholder for the final result
.text
# Begin the main code in the text section
la x1, result # Load the address of the result variable into register x1
lw t0, num1 # Load the first number (num1) into register t0
lw t1, num2 # Load the second number (num2) into register t1
li t2, 0 # Initialize the result (t2) to 0
loop:
# Check if the least significant bit of t0 (num1) is 1 (i.e., if the number is odd)
andi t3, t0, 1
beq t3, x0, skip_add # If the bit is 0 (even), skip the addition
# If the number is odd, add the value in t1 (num2) to the result in t2
add t2, t2, t1
skip_add:
# Perform a right shift on t0 (num1), effectively dividing it by 2
srli t0, t0, 1
# Perform a left shift on t1 (num2), effectively multiplying it by 2
slli t1, t1, 1
# If t0 (num1) is not zero, repeat the loop
bnez t0, loop
# Store the final result in the memory location pointed by x1
sw t2, 0(x1)
91
# Convert signed int to float
# float i2f(s32 num);
# INPUT: A0 = integer number
# OUTPUT: A0 = float number (IEEE 754 single-precision)
i2f:
# Prepare result sign in A1
srli a1, a0, 31 # A1 = (A0 >> 31), extract sign bit
slli a1, a1, 31 # A1 = A1 << 31, position sign bit at bit 31
# Get absolute value of the number
beq a1, x0, positive # If sign bit is zero, number is positive
sub a0, x0, a0 # A0 = -A0 (negate to get absolute value)
positive:
# Check if number is zero
beq a0, x0, zero_result # If A0 == 0, result is zero
# Store sign and absolute value
add a2, x0, a1 # A2 = A1 (store sign)
add a1, x0, a0 # A1 = A0 (store absolute value)
# Count leading zeros in A0 (result stored in T0)
# Initialize T0 (leading zero count) to 0
add t0, x0, x0 # T0 = 0
clz_loop:
sll t1, a1, 1 # Shift A1 left by 1 bit
addi t0, t0, 1 # T0 = T0 + 1 (increment count)
mv a1, t1 # Update A1 with shifted value
bgez a1, clz_loop # Loop while A1 >= 0 (MSB is 0)
# Adjust for possible overflow in shift
li t1, 32 # Load 32 into t1
blt t0, t1, clz_done # If t0 < 32, proceed to clz_done
li a0, 32 # If t0 >= 32, leading zeros = 32
j normalize
clz_done:
# A0 now contains the number of leading zeros (stored in T0)
add a0, x0, t0 # Move T0 to A0
normalize:
# Normalize the number (shift left by leading zeros)
sub t1, x0, a0 # t1 = -A0
sll a1, a1, t1 # A1 = A1 << (32 - leading zeros)
# Prepare result exponent
# Exponent = 158 - leading_zeros
li t1, 158
sub a0, t1, a0 # A0 = 158 - leading zeros
# Rounding: Add 0x80 to A1 (rounding bit at position 7)
addi a1, a1, 128 # A1 = A1 + 0x80
# Check for overflow after rounding
bgez a1, rounding_ok # If A1 >= 0, no overflow
addi a0, a0, 1 # Exponent increment due to rounding overflow
j adjust_mantissa
rounding_ok:
# Check if lowest 8 bits are zero (for rounding to even)
andi a3, a1, 0xFF # A3 = A1 & 0xFF (lowest 8 bits)
beq a3, x0, round_even # If lowest 8 bits are zero, round to even
adjust_mantissa:
# Remove leading hidden bit '1'
slli a1, a1, 1 # A1 = A1 << 1 (remove hidden '1')
j compose_result
round_even:
# Ensure even mantissa (clear least significant bit)
srli a1, a1, 9 # A1 = A1 >> 9
slli a1, a1, 10 # A1 = A1 << 10 (remove hidden '1')
j compose_result
compose_result:
# Align mantissa and exponent
srli a1, a1, 9 # A1 = A1 >> 9 (align mantissa to bits 0..22)
slli a0, a0, 23 # A0 = A0 << 23 (align exponent to bits 23..30), H10
or a0, a0, a1 # A0 = exponent | mantissa
or a0, a0, a2 # A0 = A0 | sign bit (bit 31)
jalr x0, ra, 0 # Return from function
zero_result:
# Return zero (signed zero with correct sign)
or a0, x0, a2 # A0 = sign bit (A2)
jalr x0, ra, 0 # Return from function
0x42280000
# Takes input N (a0), returns its log base 2 in a0
logint:
addi sp, sp, -4
sw t0, 0(sp)
add t0, a0, zero# k = N
add a0, zero, zero# i = 0
logloop:
beq t0, zero, logloop_end # Exit if k == 0
srai t0, t0, 1 # k >>= 1
addi a0, a0, 1 # i++
j logloop
logloop_end:
addi a0, a0, -1 # Return i - 1
lw t0, 0(sp)
addi sp, sp, 4
jr ra
# Takes inputs N(a0) and n(a1), reverses the number in binary
reverse:
addi sp, sp, -28
sw ra, 0(sp)
sw s0, 4(sp)
sw s1, 8(sp)
sw s2, 12(sp)
sw s3, 16(sp)
sw s4, 20(sp)
sw s5, 24(sp)
call logint# Now a0 has log2(N)
addi s0, zero, 1 # j = 1
add s1, zero, zero # p = 0
forloop_reverse:
bgt s0, a0, forloop_reverse_end
sub s2, a0, s0# s2 = a0 - s0
add is3, zero, 1
sll s3, s3, s2
and s3, a1, s3
beq s3, zero, elses3 # If not, skip
ifs3:
addi s4, s0, -1 # s4 = j - 1
addi s5, zero, 1
sll s5, s5, s4
or s1, s1, s5
elses3:
addi s0, s0, 1
j forloop_reverse
forloop_reverse_end:
add a0, s1, zero # Return p
lw ra, 0(sp)
lw s0, 4(sp)
lw s1, 8(sp)
lw s2, 12(sp)
lw s3, 16(sp)
lw s4, 20(sp)
lw s5, 24(sp)
addi sp, sp, 28
jr ra
1. Corrected Execution of logint
Iteration 1: t0 = 21, a0 = 1
Iteration 2: t0 = 10, a0 = 2
Iteration 3: t0 = 5, a0 = 3
Iteration 4: t0 = 2, a0 = 4
Iteration 5: t0 = 1, a0 = 5
Iteration 6: t0 = 0, exit loop.
2. Corrected Execution of reverse
The purpose of reverse is to reverse the binary representation of a number.
Execution process:
Bit 1: s0 = 1, extract bit 5 (most significant bit), which is 1 → reverse to the least significant bit, s1 = 0b000001.
Bit 2: s0 = 2, extract bit 4, which is 0 → skip.
Bit 3: s0 = 3, extract bit 3, which is 1 → reverse to the 2nd least significant bit, s1 = 0b000101.
Bit 4: s0 = 4, extract bit 2, which is 0 → skip.
Bit 5: s0 = 5, extract bit 1, which is 1 → reverse to the 4th least significant bit, s1 = 0b010101 (decimal value: 21).
https://github.com/riscv/riscv-isa-manual/releases
https://acsa.ustc.edu.cn/ics/download/riscv/RISC-V-Reader-Chinese-v2p1.pdf
https://blog.csdn.net/qq_38798111/article/details/129718824
https://github.com/riscv-collab/riscv-gnu-toolchain
https://github.com/riscv-software-src/riscv-tests
https://hackmd.io/@sysprog/arch2024-quiz2-sol
https://hackmd.io/@sysprog/arch2024-quiz4-sol
https://hackmd.io/@sysprog/arch2024-quiz5-sol