contributed by <weishiuan
>
Building on sammer1077's contributions, the objective is to ensure rv32emu-next successfully passes the riscv-arch-test. This involves:
The RISC-V architecture test ensures a CPU model meets the RISC-V specification:
After compiling and running tests, find the ELF files in riscv-arch-test/work/C directories. Use riscv-none-embed-objdump -D
to get more information.
Compliance Test checks specific instructions for functionality and decode correctness. Ensure correctness of results, x0 cannot be modified, and decode correctness. Any difference in the signature leads to test failure.
Compressed instructions are shorter in special cases:
The C-extension is compatible with other standard instructions and is interleaved with the 16-bit boundary.
Failed tests:
These tests are not supported by Compute Goto implementation.
Any instruction fetched by rv32emu-next can be checked for the last 2 bits:
while (rv->csr_cycle < cycles_target && !rv->halt) {
// Fetch the next instruction
inst = rv->io.mem_ifetch(rv, rv->PC);
if ((inst & 3) == 3) {
// Standard uncompressed instruction
uint32_t index = (inst & INST_6_2) >> 2;
TABLE_TYPE op = jump_table[index];
assert(op);
rv->inst_len = INST_32;
// Dispatch and execute the opcode
if (!op(rv, inst))
break;
// Increment the cycles csr
rv->csr_cycle++;
} else {
// Compressed instruction
const uint16_t c_index = ((inst & FR_C_15_13) >> 11) | (inst & FR_C_1_0);
const c_opcode_t op = c_opcodes[c_index];
assert(op);
rv->inst_len = INST_16;
// Dispatch and execute the compressed opcode
if (!op(rv, inst))
break;
// Increment the cycles csr
rv->csr_cycle++;
}
}
After running the compliance test, check and record each failed instruction. Go to the work directories in riscv-arch-tests
and find the corresponding .elf and .diff files.
.diff
illustrates the differences in test signature and reference signature.
.elf
can be opened with riscv-none-embed-objdump -D
to get the instruction of the test code.
Run ./rv32emu –trace FailInstructionTest.elf to trace the runtime information. Check each line, especially the test instruction. Manually calculate and compare the result to your printed message. If something is wrong, find the position of the error code based on the conclusion.
Note that some instruction tests will use another instruction; ensure correctness in other instruction usage.
if (rd == 2) {
// C.ADDI16SP
uint32_t tmp = (inst & 0x1000) >> 3 | (inst & 0x40) >> 2 | (inst & 0x20) << 1 |
(inst & 0x18) << 4 | (inst & 0x4) << 3;
const uint32_t imm = (tmp & 0x200) ? (0xfffffc00 | tmp) : tmp;
if (imm != 0) {
rv->X[rd] += imm;
} else {
// Handle reserved parts (add-on)
}
} else if (rd != 0) {
// C.LUI
uint32_t tmp = (inst & 0x1000) << 5 | (inst & 0x7c) << 10;
const int32_t imm = (tmp & 0x20000) ? (0xfffc0000 | tmp) : tmp;
if (imm != 0) {
rv->X[rd] = imm;
} else {
// Handle reserved parts
}
} else {
// HINTS
}
rv->PC += rv->inst_len;
return true;
}
c.jalr expands to jalr x1, 0(rs1). Special part in the test code:
inst_8:
auipc ra, 0x0
addi ra, ra, 16 # 80000240 <inst_8+0x10>
jalr ra
xori ra, ra, 2
j 80000244 # Jump to inst_8+0x14
xori ra, ra, 3
auipc a1, 0x0
addi a1, a1, -20 # 80000230 <inst_8>
andi a1, a1, -4
sub ra, ra, a1
sw ra, 32(a0)
Directly save Next PC to ra and jump to the instruction line to x's value:
// C.JALR
const uint32_t next_pc = rv->PC + 2;
rv->X[1] = next_pc;
rv->PC = rv->X[rs1];
if (rv->PC & 1) {
rv_except_inst_misaligned(rv, rv->PC);
return false;
}
// Branch is possible
return false;
Use a variable (rs_value)
to store the value in register x, save the Next PC to ra, and jump to rs_value
line:
// C.JALR
const int32_t rs_value = rv->X[rs1];
const uint32_t next_pc = rv->PC + rv->inst_len;
rv->X[rv_reg_ra] = next_pc;
rv->PC = rs_value;
if (rv->PC & 0x1) {
rv_except_inst_misaligned(rv, rv->PC);
return false;
}
// Branch is possible
return false;
In the rv32emu-next project, Compute Goto is utilized after all instruction calls, employing a unified DISPATH process. Despite the absence of reused code, the use of defined macros streamlines program maintenance. The implementation involves defining the macro op_xxxx
to ensure all instruction handlers have unique jumps, which are not shared.
#define TARGET(instr) \
op_##instr : EXEC(instr); \
DISPATCH();
This approach significantly improves the speed of rv32emu-next by enhancing the branch prediction of the program itself, rather than the RISC-V model's branch prediction.
To integrate Compute Goto with RV32C, several components are prepared:
A table storing pointers to functions is created to facilitate efficient dispatching.
typedef struct {
TABLE_TYPE_RVC entries[4][4];
} JumpTableRVC;
JumpTableRVC jump_table_rvc = {
{
{OP(caddi4spn), OP(caddi), OP(cslli), OP(unimp)}, // 00
{OP(cfld), OP(cjal), OP(cfldsp), OP(unimp)}, // 01
{OP(clw), OP(cli), OP(clwsp), OP(unimp)}, // 10
{OP(cflw), OP(clui), OP(cflwsp), OP(unimp)} // 11
},
{
{OP(unimp), OP(cmisc_alu), OP(ccr), OP(unimp)}, // 00
{OP(cfsd), OP(cj), OP(cfsdsp), OP(unimp)}, // 01
{OP(csw), OP(cbeqz), OP(cswsp), OP(unimp)}, // 10
{OP(cfsw), OP(cbnez), OP(cfswsp), OP(unimp)} // 11
}
};
This component checks whether the CPU has halted or if the trace cycle meets the target. It then fetches the instruction and determines whether it is compressed or uncompressed, adjusting the instruction length accordingly.
#define DISPATCH() \
{ \
if (rv->csr_cycle >= cycles_target || rv->halt) \
goto quit; \
/* fetch the next instruction */ \
inst = rv->io.mem_ifetch(rv, rv->PC); \
/* standard uncompressed instruction */ \
if ((inst & 3) == 3) { \
uint32_t index = (inst & INST_6_2) >> 2; \
rv->inst_len = INST_32; \
handle_uncompressed_instruction(rv, inst, jump_table); \
} else { \
/* Compressed Extension Instruction */ \
inst &= 0x0000FFFF; \
int16_t c_index = (inst & FC_FUNC3) >> 11 | (inst & FC_OPCODE); \
rv->inst_len = INST_16; \
handle_compressed_instruction(rv, inst, jump_table_rvc); \
} \
}
The EXEC component remains unchanged. It dispatches the opcode and increments the cycle counter.
#define EXEC(instr) \
do { \
if (!execute_opcode(rv, instr, inst)) \
goto quit; \
rv->csr_cycle++; \
} while (0)
This part ensures instruction fetching after handling the instruction.
#ifdef ENABLE_RV32C
EXEC(caddi4spn);
EXEC(caddi);
EXEC(cslli);
EXEC(cjal);
EXEC(clw);
EXEC(cli);
EXEC(clwsp);
EXEC(clui);
EXEC(cmisc_alu);
EXEC(ccr);
EXEC(cj);
EXEC(csw);
EXEC(cbeqz);
EXEC(cswsp);
EXEC(cbnez);
#endif
The project is currently in a pull request stage. Extensive modifications are planned in the commit messages based on the reviewer's advice. The team is committed to delivering the best results by the end of the project.
The above are outdated. Did you figure out how to simulate the RISC-V instructions along with the extensions yet?
This project focuses on integrating RV32C, contributed by ccs100203 and Uduru0522, with Compute Goto in rv32emu-next. RV32C, a part of the Standard extension for compressed instructions, optimizes instruction length based on specific conditions. Compliance test considerations include ensuring correct instruction functionality and checking for any "reserved" or "HINT" decode results. The Compute Goto technique enhances branch prediction accuracy by maintaining independently predicted jumps.