https://michaeljclark.github.io/isa.html
https://github.com/x213212/riscv_emulator
# riscv_emulator
研讀一下riscv 架構的指令,找了一個模擬器來看實際要執行risc v指令內部會經過哪一些細節
# create riscv 架構的 binary
```makefile=
test.bin: test.c
/root/riscv-toolchain/bin/riscv64-unknown-elf-gcc -S test.c
/root/riscv-toolchain/bin/riscv64-unknown-elf-gcc -Wl,-Ttext=0x0 -nostdlib -march=rv64i -mabi=lp64 -o test test.s
/root/riscv-toolchain/bin/riscv64-unknown-elf-objcopy -O binary test test.bin
clean:
rm -f test
rm -f test.bin
rm -f test.s
```
```c=
int fact(int n);
int main() {
int a = 10;
/*return fact(a);*/
return a-11;
}
int fact(int n) {
if(n==1)
return n;
else
return n * fact(n-1);
}
```
到時候就會產生一個起始位置在0的binary .-nostdlib 跟之前的文章一樣不添加任何lib
# read_file
把編譯過的 binary 讀入記憶體中
```c=
void read_file(CPU *cpu, char *filename)
{
FILE *file;
uint8_t *buffer;
unsigned long fileLen;
// Open file
file = fopen(filename, "rb");
if (!file)
{
fprintf(stderr, "Unable to open file %s", filename);
}
// Get file length
fseek(file, 0, SEEK_END);
fileLen = ftell(file);
fseek(file, 0, SEEK_SET);
// Allocate memory
buffer = (uint8_t *)malloc(fileLen + 1);
if (!buffer)
{
fprintf(stderr, "Memory error!");
fclose(file);
}
// Read file contents into buffer
fread(buffer, fileLen, 1, file);
fclose(file);
// Print file contents in hex
/*for (int i=0; i<fileLen; i+=2) {*/
/*if (i%16==0) printf("\n%.8x: ", i);*/
/*printf("%02x%02x ", *(buffer+i), *(buffer+i+1));*/
/*}*/
/*printf("\n");*/
// copy the bin executable to dram
memcpy(cpu->bus.dram.mem, buffer, fileLen * sizeof(uint8_t));
free(buffer);
}
```
# start
```c=
int main(int argc, char *argv[])
{
if (argc != 2)
{
printf("Usage: rvemu <filename>\n");
exit(1);
}
// Initialize cpu, registers and program counter
struct CPU cpu;
cpu_init(&cpu);
// Read input file
read_file(&cpu, argv[1]);
// cpu loop
while (1)
{
// fetch
uint32_t inst = cpu_fetch(&cpu);
// Increment the program counter
// printf("next pc ->%x\n", cpu.pc);
printf("next -> %#.8lx ", cpu.pc ); // DEBUG
cpu.pc += 4;
// execute
if (!cpu_execute(&cpu, inst))
break;
dump_registers(&cpu);
if (cpu.pc == 0)
break;
}
/*dump_registers(&cpu);*/
return 0;
}
```
# init
進行初始化
```c=
struct CPU cpu;
cpu_init(&cpu);
// Read input file
read_file(&cpu, argv[1]);
```
這邊可以和到cpu 第x0 register 始終為0
cpu->pc 起始我們從 0x80000000 開始
#define DRAM_BASE 0x80000000
```c=
void cpu_init(CPU *cpu) {
cpu->regs[0] = 0x00; // register x0 hardwired to 0
cpu->regs[2] = DRAM_BASE + DRAM_SIZE; // Set stack pointer
cpu->pc = DRAM_BASE; // Set program counter to the base address
}
```
# start fetch
之後進入循環開始讀指令
```
// cpu loop
while (1)
{
// fetch
uint32_t inst = cpu_fetch(&cpu);
// Increment the program counter
// printf("next pc ->%x\n", cpu.pc);
printf("next -> %#.8lx ", cpu.pc ); // DEBUG
break;
cpu.pc += 4;
// execute
if (!cpu_execute(&cpu, inst))
break;
dump_registers(&cpu);
if (cpu.pc == 0)
break;
}
/*dump_registers(&cpu);*/
```
cpu_fetch會經由bus 去存去記憶體位置,這邊跟gb模擬器差不多,記憶體在小的嵌入式可以切成更多塊進行應用
```c=
uint32_t cpu_fetch(CPU *cpu) {
uint32_t inst = bus_load(&(cpu->bus), cpu->pc, 32);
return inst;
}
```
```c=
uint64_t cpu_load(CPU* cpu, uint64_t addr, uint64_t size) {
return bus_load(&(cpu->bus), addr, size);
}
void cpu_store(CPU* cpu, uint64_t addr, uint64_t size, uint64_t value) {
bus_store(&(cpu->bus), addr, size, value);
}
```
```c=
uint64_t bus_load(BUS* bus, uint64_t addr, uint64_t size) {
return dram_load(&(bus->dram), addr, size);
}
void bus_store(BUS* bus, uint64_t addr, uint64_t size, uint64_t value) {
dram_store(&(bus->dram), addr, size, value);
}
```
可以看到細節,mem大小為100mb or 4mb ?
#define DRAM_SIZE 1024*1024*1
typedef struct DRAM {
uint8_t mem[DRAM_SIZE]; // Dram memory of DRAM_SIZE
} DRAM;
```c=
uint64_t dram_load_8(DRAM* dram, uint64_t addr){
return (uint64_t) dram->mem[addr - DRAM_BASE];
}
uint64_t dram_load_16(DRAM* dram, uint64_t addr){
return (uint64_t) dram->mem[addr-DRAM_BASE]
| (uint64_t) dram->mem[addr-DRAM_BASE + 1] << 8;
}
uint64_t dram_load_32(DRAM* dram, uint64_t addr){
return (uint64_t) dram->mem[addr-DRAM_BASE]
| (uint64_t) dram->mem[addr-DRAM_BASE + 1] << 8
| (uint64_t) dram->mem[addr-DRAM_BASE + 2] << 16
| (uint64_t) dram->mem[addr-DRAM_BASE + 3] << 24;
}
uint64_t dram_load_64(DRAM* dram, uint64_t addr){
return (uint64_t) dram->mem[addr-DRAM_BASE]
| (uint64_t) dram->mem[addr-DRAM_BASE + 1] << 8
| (uint64_t) dram->mem[addr-DRAM_BASE + 2] << 16
| (uint64_t) dram->mem[addr-DRAM_BASE + 3] << 24
| (uint64_t) dram->mem[addr-DRAM_BASE + 4] << 32
| (uint64_t) dram->mem[addr-DRAM_BASE + 5] << 40
| (uint64_t) dram->mem[addr-DRAM_BASE + 6] << 48
| (uint64_t) dram->mem[addr-DRAM_BASE + 7] << 56;
}
uint64_t dram_load(DRAM* dram, uint64_t addr, uint64_t size) {
switch (size) {
case 8: return dram_load_8(dram, addr); break;
case 16: return dram_load_16(dram, addr); break;
case 32: return dram_load_32(dram, addr); break;
case 64: return dram_load_64(dram, addr); break;
default: ;
}
return 1;
}
void dram_store_8(DRAM* dram, uint64_t addr, uint64_t value) {
dram->mem[addr-DRAM_BASE] = (uint8_t) (value & 0xff);
}
void dram_store_16(DRAM* dram, uint64_t addr, uint64_t value) {
dram->mem[addr-DRAM_BASE] = (uint8_t) (value & 0xff);
dram->mem[addr-DRAM_BASE+1] = (uint8_t) ((value >> 8) & 0xff);
}
void dram_store_32(DRAM* dram, uint64_t addr, uint64_t value) {
dram->mem[addr-DRAM_BASE] = (uint8_t) (value & 0xff);
dram->mem[addr-DRAM_BASE + 1] = (uint8_t) ((value >> 8) & 0xff);
dram->mem[addr-DRAM_BASE + 2] = (uint8_t) ((value >> 16) & 0xff);
dram->mem[addr-DRAM_BASE + 3] = (uint8_t) ((value >> 24) & 0xff);
}
void dram_store_64(DRAM* dram, uint64_t addr, uint64_t value) {
dram->mem[addr-DRAM_BASE] = (uint8_t) (value & 0xff);
dram->mem[addr-DRAM_BASE + 1] = (uint8_t) ((value >> 8) & 0xff);
dram->mem[addr-DRAM_BASE + 2] = (uint8_t) ((value >> 16) & 0xff);
dram->mem[addr-DRAM_BASE + 3] = (uint8_t) ((value >> 24) & 0xff);
dram->mem[addr-DRAM_BASE + 4] = (uint8_t) ((value >> 32) & 0xff);
dram->mem[addr-DRAM_BASE + 5] = (uint8_t) ((value >> 40) & 0xff);
dram->mem[addr-DRAM_BASE + 6] = (uint8_t) ((value >> 48) & 0xff);
dram->mem[addr-DRAM_BASE + 7] = (uint8_t) ((value >> 56) & 0xff);
}
void dram_store(DRAM* dram, uint64_t addr, uint64_t size, uint64_t value) {
switch (size) {
case 8: dram_store_8(dram, addr, value); break;
case 16: dram_store_16(dram, addr, value); break;
case 32: dram_store_32(dram, addr, value); break;
case 64: dram_store_64(dram, addr, value); break;
default: ;
}
}
```
可以看細節 存取addr 進來後-去DRAM_BASE 進行存取
讀取則使用or 組合後再丟出uint64_t型態variable給呼叫端
再重新看這個fucntion
```c=
uint32_t cpu_fetch(CPU *cpu) {
uint32_t inst = bus_load(&(cpu->bus), cpu->pc, 32);
return inst;
}
```
每次讀4個bytes ,pc += 4
我們就可以每次得到一條指令
到這裡就可以看到pc每次都會+4直到cpu_execute執行異常才會跳出
```c=
printf("next -> %#.8lx ", cpu.pc ); // DEBUG
cpu.pc += 4;
// execute
if (!cpu_execute(&cpu, inst))
break;
```
# execute
執行一條指令
cpu_execute 這邊就要查詢riscv的 規格書
初始六個bits可以得出opcode,根據opcode可以對指令做出第一層分類,JAL、B_TYPE、S_TYPE
等等funct3 、funct7 又可以在分一層最後才會找到最終指令並執行 exec_BEQ、exec_JAL
```c=
int cpu_execute(CPU *cpu, uint32_t inst) {
int opcode = inst & 0x7f; // opcode in bits 6..0
int funct3 = (inst >> 12) & 0x7; // funct3 in bits 14..12
int funct7 = (inst >> 25) & 0x7f; // funct7 in bits 31..25
cpu->regs[0] = 0; // x0 hardwired to 0 at each cycle
printf("%s\n%#.8lx -> Inst: %#.8x <OpCode: %#.2x, funct3:%#x, funct7:%#x> %s",
ANSI_YELLOW, cpu->pc-4, inst, opcode, funct3, funct7, ANSI_RESET);
// DEBUG*/
// printf("%s\n%#.8lx -> %s", ANSI_YELLOW, cpu->pc-4, ANSI_RESET); // DEBUG
switch (opcode) {
case LUI: exec_LUI(cpu, inst); break;
case AUIPC: exec_AUIPC(cpu, inst); break;
case JAL: exec_JAL(cpu, inst); break;
case JALR: exec_JALR(cpu, inst); break;
case B_TYPE:
switch (funct3) {
case BEQ: exec_BEQ(cpu, inst); break;
case BNE: exec_BNE(cpu, inst); break;
case BLT: exec_BLT(cpu, inst); break;
case BGE: exec_BGE(cpu, inst); break;
case BLTU: exec_BLTU(cpu, inst); break;
case BGEU: exec_BGEU(cpu, inst); break;
default: ;
} break;
case LOAD:
switch (funct3) {
case LB : exec_LB(cpu, inst); break;
case LH : exec_LH(cpu, inst); break;
case LW : exec_LW(cpu, inst); break;
case LD : exec_LD(cpu, inst); break;
case LBU : exec_LBU(cpu, inst); break;
case LHU : exec_LHU(cpu, inst); break;
case LWU : exec_LWU(cpu, inst); break;
default: ;
} break;
case S_TYPE:
switch (funct3) {
case SB : exec_SB(cpu, inst); break;
case SH : exec_SH(cpu, inst); break;
case SW : exec_SW(cpu, inst); break;
case SD : exec_SD(cpu, inst); break;
default: ;
} break;
case I_TYPE:
switch (funct3) {
case ADDI: exec_ADDI(cpu, inst); break;
case SLLI: exec_SLLI(cpu, inst); break;
case SLTI: exec_SLTI(cpu, inst); break;
case SLTIU: exec_SLTIU(cpu, inst); break;
case XORI: exec_XORI(cpu, inst); break;
case SRI:
switch (funct7) {
case SRLI: exec_SRLI(cpu, inst); break;
case SRAI: exec_SRAI(cpu, inst); break;
default: ;
} break;
case ORI: exec_ORI(cpu, inst); break;
case ANDI: exec_ANDI(cpu, inst); break;
default:
fprintf(stderr,
"[-] ERROR-> opcode:0x%x, funct3:0x%x, funct7:0x%x\n"
, opcode, funct3, funct7);
return 0;
} break;
case R_TYPE:
switch (funct3) {
case ADDSUB:
switch (funct7) {
case ADD: exec_ADD(cpu, inst);
case SUB: exec_ADD(cpu, inst);
default: ;
} break;
case SLL: exec_SLL(cpu, inst); break;
case SLT: exec_SLT(cpu, inst); break;
case SLTU: exec_SLTU(cpu, inst); break;
case XOR: exec_XOR(cpu, inst); break;
case SR:
switch (funct7) {
case SRL: exec_SRL(cpu, inst); break;
case SRA: exec_SRA(cpu, inst); break;
default: ;
}
case OR: exec_OR(cpu, inst); break;
case AND: exec_AND(cpu, inst); break;
default:
fprintf(stderr,
"[-] ERROR-> opcode:0x%x, funct3:0x%x, funct7:0x%x\n"
, opcode, funct3, funct7);
return 0;
} break;
case FENCE: exec_FENCE(cpu, inst); break;
case I_TYPE_64:
switch (funct3) {
case ADDIW: exec_ADDIW(cpu, inst); break;
case SLLIW: exec_SLLIW(cpu, inst); break;
case SRIW :
switch (funct7) {
case SRLIW: exec_SRLIW(cpu, inst); break;
case SRAIW: exec_SRLIW(cpu, inst); break;
} break;
} break;
case R_TYPE_64:
switch (funct3) {
case ADDSUB:
switch (funct7) {
case ADDW: exec_ADDW(cpu, inst); break;
case SUBW: exec_SUBW(cpu, inst); break;
case MULW: exec_MULW(cpu, inst); break;
} break;
case DIVW: exec_DIVW(cpu, inst); break;
case SLLW: exec_SLLW(cpu, inst); break;
case SRW:
switch (funct7) {
case SRLW: exec_SRLW(cpu, inst); break;
case SRAW: exec_SRAW(cpu, inst); break;
case DIVUW: exec_DIVUW(cpu, inst); break;
} break;
case REMW: exec_REMW(cpu, inst); break;
case REMUW: exec_REMUW(cpu, inst); break;
default: ;
} break;
case CSR:
switch (funct3) {
case ECALLBREAK: exec_ECALLBREAK(cpu, inst); break;
case CSRRW : exec_CSRRW(cpu, inst); break;
case CSRRS : exec_CSRRS(cpu, inst); break;
case CSRRC : exec_CSRRC(cpu, inst); break;
case CSRRWI : exec_CSRRWI(cpu, inst); break;
case CSRRSI : exec_CSRRSI(cpu, inst); break;
case CSRRCI : exec_CSRRCI(cpu, inst); break;
default:
fprintf(stderr,
"[-] ERROR-> opcode:0x%x, funct3:0x%x, funct7:0x%x\n"
, opcode, funct3, funct7);
return 0;
} break;
case AMO_W:
switch (funct7 >> 2) { // since, funct[1:0] = aq, rl
case LR_W : exec_LR_W(cpu, inst); break;
case SC_W : exec_SC_W(cpu, inst); break;
case AMOSWAP_W : exec_AMOSWAP_W(cpu, inst); break;
case AMOADD_W : exec_AMOADD_W(cpu, inst); break;
case AMOXOR_W : exec_AMOXOR_W(cpu, inst); break;
case AMOAND_W : exec_AMOAND_W(cpu, inst); break;
case AMOOR_W : exec_AMOOR_W(cpu, inst); break;
case AMOMIN_W : exec_AMOMIN_W(cpu, inst); break;
case AMOMAX_W : exec_AMOMAX_W(cpu, inst); break;
case AMOMINU_W : exec_AMOMINU_W(cpu, inst); break;
case AMOMAXU_W : exec_AMOMAXU_W(cpu, inst); break;
default:
fprintf(stderr,
"[-] ERROR-> opcode:0x%x, funct3:0x%x, funct7:0x%x\n"
, opcode, funct3, funct7);
return 0;
} break;
case 0x00:
return 0;
default:
fprintf(stderr,
"[-] ERROR-> opcode:0x%x, funct3:0x%x, funct3:0x%x\n"
, opcode, funct3, funct7);
return 0;
/*exit(1);*/
}
return 1;
}
```
指令進來後一些常用要取得type某些特定的bit 區間再返回
```c=
//=====================================================================================
// Instruction Decoder Functions
//=====================================================================================
uint64_t rd(uint32_t inst) {
return (inst >> 7) & 0x1f; // rd in bits 11..7
}
uint64_t rs1(uint32_t inst) {
return (inst >> 15) & 0x1f; // rs1 in bits 19..15
}
uint64_t rs2(uint32_t inst) {
return (inst >> 20) & 0x1f; // rs2 in bits 24..20
}
uint64_t imm_I(uint32_t inst) {
// imm[11:0] = inst[31:20]
return ((int64_t)(int32_t) (inst & 0xfff00000)) >> 20; // right shift as signed?
}
uint64_t imm_S(uint32_t inst) {
// imm[11:5] = inst[31:25], imm[4:0] = inst[11:7]
return ((int64_t)(int32_t)(inst & 0xfe000000) >> 20)
| ((inst >> 7) & 0x1f);
}
uint64_t imm_B(uint32_t inst) {
// imm[12|10:5|4:1|11] = inst[31|30:25|11:8|7]
return ((int64_t)(int32_t)(inst & 0x80000000) >> 19)
| ((inst & 0x80) << 4) // imm[11]
| ((inst >> 20) & 0x7e0) // imm[10:5]
| ((inst >> 7) & 0x1e); // imm[4:1]
}
uint64_t imm_U(uint32_t inst) {
// imm[31:12] = inst[31:12]
return (int64_t)(int32_t)(inst & 0xfffff000);
}
uint64_t imm_J(uint32_t inst) {
// imm[20|10:1|11|19:12] = inst[31|30:21|20|19:12]
return (uint64_t)((int64_t)(int32_t)(inst & 0x80000000) >> 11)
| (inst & 0xff000) // imm[19:12]
| ((inst >> 9) & 0x800) // imm[11]
| ((inst >> 20) & 0x7fe); // imm[10:1]
}
uint32_t shamt(uint32_t inst) {
// shamt(shift amount) only required for immediate shift instructions
// shamt[4:5] = imm[5:0]
return (uint32_t) (imm_I(inst) & 0x1f); // TODO: 0x1f / 0x3f ?
}
uint64_t csr(uint32_t inst) {
// csr[11:0] = inst[31:20]
return ((inst & 0xfff00000) >> 20);
}
```
# jump
jump 的時候以exec_JAL來說
cpu->regs[rd(inst)] = cpu->pc;
會儲存當前的記憶體位置也就是跳躍的指令
實際上跳的時候
cpu->pc = cpu->pc + (int64_t) imm - 4;
這邊-4變成跳躍的前一行指令,再加上立即數完成跳躍
a=a+1
jump main <== store address
a=a+1 pc-4 ,addree + imm address
jump main <== store address
以這個模擬器來說還有其他文章有看到imm 可能要左移,這邊應該是compiler會處理好。
```c=
void exec_JAL(CPU* cpu, uint32_t inst) {
uint64_t imm = imm_J(inst);
cpu->regs[rd(inst)] = cpu->pc;
/*print_op("JAL-> rd:%ld, pc:%lx\n", rd(inst), cpu->pc);*/
cpu->pc = cpu->pc + (int64_t) imm - 4;
print_op("jal\n");
if (ADDR_MISALIGNED(cpu->pc)) {
fprintf(stderr, "JAL pc address misalligned");
exit(0);
}
}
```
exec_JALR常常搭配jal有跳就要跳回來,也可以看到
uint64_t tmp = cpu->pc;會儲存當前的指令位置
cpu->pc = (cpu->regs[rs1(inst)] + (int64_t) imm) & 0xfffffffe;
讀register再加上立即數進行跳躍。
cpu->regs[rd(inst)] = tmp;
```c=
void exec_JALR(CPU* cpu, uint32_t inst) {
uint64_t imm = imm_I(inst);
uint64_t tmp = cpu->pc;
cpu->pc = (cpu->regs[rs1(inst)] + (int64_t) imm) & 0xfffffffe;
cpu->regs[rd(inst)] = tmp;
/*print_op("NEXT -> %#lx, imm:%#lx\n", cpu->pc, imm);*/
print_op("jalr\n");
if (ADDR_MISALIGNED(cpu->pc)) {
fprintf(stderr, "JAL pc address misalligned");
exit(0);
}
}
```
# beq 、、、
這邊就比對register值,一樣是從比對的上一條指令加上立即數在跳躍
a=a+1 pc-4 ,addree + imm address
if(rs1==rs2 )jump main <== store address
```c=
void exec_BEQ(CPU* cpu, uint32_t inst) {
uint64_t imm = imm_B(inst);
if ((int64_t) cpu->regs[rs1(inst)] == (int64_t) cpu->regs[rs2(inst)])
cpu->pc = cpu->pc + (int64_t) imm - 4;
// -> a=1
// -> if(a == b )jump main
// main
print_op("beq\n");
}
void exec_BNE(CPU* cpu, uint32_t inst) {
uint64_t imm = imm_B(inst);
if ((int64_t) cpu->regs[rs1(inst)] != (int64_t) cpu->regs[rs2(inst)])
cpu->pc = (cpu->pc + (int64_t) imm - 4);
print_op("bne\n");
}
```
後面就是一些左移右移,原子操作的東西,要快速學習risc v asm可以看這邊的範例
https://github.com/x213212/riscv-operating-system-mooc/tree/main/code/asm
裡面有配合gdb可以進行debug,作者最終想在這個模擬器上運行一個linux
```c=
int fact(int n);
int main() {
int a = 10;
/*return fact(a);*/
return a-11;
}
int fact(int n) {
if(n==1)
return n;
else
return n * fact(n-1);
}
```
有可能如果在這進行加載os,裡面一些fucntion,如print有呼叫這些function,我們的模擬器就要解析這段asm然後我們把它對接我們外部系統的printf,這樣就可以從模擬器再去call glibc的lib 在進一步顯示到termial.
在研究有無虛擬指令也可先透過tests的測試檔案進行編譯查看.s檔案
![](https://i.imgur.com/MPwB0MC.png)
例如
sext.w 他是被等效成 addiw rd, rs, 0
```asm
addiw a5,a5,-12
sext.w a5,a5
```
![](https://i.imgur.com/Z64rQ8w.png)
也就是
```asm
addiw a5,a5,-12
addiw a5,a5,0
```
運行模擬器可以看到實際指令結果
![](https://i.imgur.com/Z21yuDK.png)