# SRV32 [TOC] ## IF stage ### module 1 : Immediate compute ```verilog= always @* begin case(inst[`OPCODE]) OP_AUIPC : imm = {inst[31:12], 12'd0}; // U-type OP_LUI : imm = {inst[31:12], 12'd0}; // U-type OP_JAL : imm = {{12{inst[31]}}, inst[19:12], inst[20], inst[30:21], 1'b0}; // J-type OP_JALR : imm = {{20{inst[31]}}, inst[31:20]}; // I-Type OP_BRANCH: imm = {{20{inst[31]}}, inst[7], inst[30:25], inst[11:8], 1'b0}; // B-type OP_LOAD : imm = {{20{inst[31]}}, inst[31:20]}; // I-type OP_STORE : imm = {{20{inst[31]}}, inst[31:25], inst[11:7]}; // S-type OP_ARITHI: imm = (inst[`FUNC3] == OP_SLL || inst[`FUNC3] == OP_SR) ? {27'h0, inst[24:20]} : {{20{inst[31]}}, inst[31:20]}; // I-type OP_ARITHR: imm = 'd0; // R-type OP_FENCE : imm = 'd0; OP_SYSTEM: imm = {20'h0, inst[31:20]}; default : imm = 'd0; endcase end ``` ### module 2 : controller for exe(include csr) ```verilog= always @(posedge clk or negedge resetb) begin if (!resetb) begin ex_imm <= 32'h0; ex_imm_sel <= 1'b0; ex_src1_sel <= 5'h0; ex_src2_sel <= 5'h0; ex_dst_sel <= 5'h0; ex_alu_op <= 3'h0; ex_subtype <= 1'b0; ex_memwr <= 1'b0; ex_alu <= 1'b0; ex_csr <= 1'b0; ex_csr_wr <= 1'b0; ex_lui <= 1'b0; ex_auipc <= 1'b0; ex_jal <= 1'b0; ex_jalr <= 1'b0; ex_branch <= 1'b0; ex_system <= 1'b0; ex_system_op <= 1'b0; ex_pc <= RESETVEC; ex_illegal <= 1'b0; `ifdef RV32M_ENABLED ex_mul <= 1'b0; `endif // RV32M_ENABLED end else if (!if_stall) begin ex_imm <= imm; ex_imm_sel <= (inst[`OPCODE] == OP_JALR ) || (inst[`OPCODE] == OP_LOAD ) || (inst[`OPCODE] == OP_ARITHI); ex_src1_sel <= inst[`RS1]; ex_src2_sel <= inst[`RS2]; ex_dst_sel <= inst[`RD]; ex_alu_op <= inst[`FUNC3]; ex_subtype <= inst[`SUBTYPE] && !(inst[`OPCODE] == OP_ARITHI && inst[`FUNC3] == OP_ADD); ex_memwr <= inst[`OPCODE] == OP_STORE; ex_alu <= (inst[`OPCODE] == OP_ARITHI) || ((inst[`OPCODE] == OP_ARITHR) && (inst[`FUNC7] == 'h00 || inst[`FUNC7] == 'h20)); //csr ex_csr <= (inst[`OPCODE] == OP_SYSTEM) && (inst[`FUNC3] != OP_ECALL); // CSRRS and CSRRC, if rs1==0, then the instruction // will not write to the CSR at all ex_csr_wr <= (inst[`OPCODE] == OP_SYSTEM) && (inst[`FUNC3] != OP_ECALL) && !(inst[`FUNC3] != OP_CSRRW && inst[`FUNC3] != OP_CSRRWI && inst[`RS1] == 5'h0); ex_lui <= inst[`OPCODE] == OP_LUI; ex_auipc <= inst[`OPCODE] == OP_AUIPC; ex_jal <= inst[`OPCODE] == OP_JAL; ex_jalr <= inst[`OPCODE] == OP_JALR; ex_branch <= inst[`OPCODE] == OP_BRANCH; ex_system <= (inst[`OPCODE] == OP_SYSTEM) && (inst[`FUNC3] == 3'b000); ex_system_op <= inst[`OPCODE] == OP_SYSTEM; ex_pc <= if_pc; ex_illegal <= !((inst[`OPCODE] == OP_AUIPC )|| (inst[`OPCODE] == OP_LUI )|| (inst[`OPCODE] == OP_JAL )|| (inst[`OPCODE] == OP_JALR )|| (inst[`OPCODE] == OP_BRANCH)|| ((inst[`OPCODE] == OP_LOAD ) && ((inst[`FUNC3] == OP_LB) || (inst[`FUNC3] == OP_LH) || (inst[`FUNC3] == OP_LW) || (inst[`FUNC3] == OP_LBU) || (inst[`FUNC3] == OP_LHU))) || ((inst[`OPCODE] == OP_STORE) && ((inst[`FUNC3] == OP_SB) || (inst[`FUNC3] == OP_SH) || (inst[`FUNC3] == OP_SW))) || (inst[`OPCODE] == OP_ARITHI)|| ((inst[`OPCODE] == OP_ARITHR) && //R-type (inst[`FUNC7] == 'h00 || inst[`FUNC7] == 'h20)) || `ifdef RV32M_ENABLED ((inst[`OPCODE] == OP_ARITHR) && (inst[`FUNC7] == 'h01)) || //multiply `endif // RV32M_ENABLED (inst[`OPCODE] == OP_FENCE )|| (inst[`OPCODE] == OP_SYSTEM)); `ifdef RV32M_ENABLED ex_mul <= (inst[`OPCODE] == OP_ARITHR) && (inst[`FUNC7] == 'h1); `endif // RV32M_ENABLED end end ``` ### module 3 : ex_mem2reg(for load) ```verilog= always @(posedge clk or negedge resetb) begin if (!resetb) ex_mem2reg <= 1'b0; else if (inst[`OPCODE] == OP_LOAD) ex_mem2reg <= 1'b1; else if (ex_mem2reg && dmem_rvalid) ex_mem2reg <= 1'b0; end ``` ### module 4 : instruction to EXE stage ```verilog= always @(posedge clk or negedge resetb) begin if (!resetb) begin ex_insn <= NOP; end else if (!if_stall) begin ex_insn <= inst; end end ``` ## EXE stage ### module 1 : determine the next_pc ```verilog= always @* begin branch_taken = !ex_flush; next_pc = fetch_pc + `IF_NEXT_PC; ex_ill_branch = 1'b0; case(1'b1) ex_jal : next_pc = ex_pc + ex_imm; ex_jalr : next_pc = alu_op1 + ex_imm; ex_branch: begin case(ex_alu_op) OP_BEQ : begin next_pc = (result_subs[32: 0] == 'd0) ? ex_pc + ex_imm : fetch_pc + `IF_NEXT_PC; if (result_subs[32: 0] != 'd0) branch_taken = 1'b0; end OP_BNE : begin next_pc = (result_subs[32: 0] != 'd0) ? ex_pc + ex_imm : fetch_pc + `IF_NEXT_PC; if (result_subs[32: 0] == 'd0) branch_taken = 1'b0; end OP_BLT : begin next_pc = result_subs[32] ? ex_pc + ex_imm : fetch_pc + `IF_NEXT_PC; if (!result_subs[32]) branch_taken = 1'b0; end OP_BGE : begin next_pc = !result_subs[32] ? ex_pc + ex_imm : fetch_pc + `IF_NEXT_PC; if (result_subs[32]) branch_taken = 1'b0; end OP_BLTU: begin next_pc = result_subu[32] ? ex_pc + ex_imm : fetch_pc + `IF_NEXT_PC; if (!result_subu[32]) branch_taken = 1'b0; end OP_BGEU: begin next_pc = !result_subu[32] ? ex_pc + ex_imm : fetch_pc + `IF_NEXT_PC; if (result_subu[32]) branch_taken = 1'b0; end default: begin next_pc = fetch_pc; ex_ill_branch = 1'b1; end endcase end default : begin next_pc = fetch_pc + `IF_NEXT_PC; branch_taken = 1'b0; end endcase end ``` ### module 2 : for multiplication and division ```verilog= `ifdef RV32M_ENABLED wire [63: 0] result_mul; wire [63: 0] result_mulsu; wire [63: 0] result_mulu; wire [31: 0] result_div; wire [31: 0] result_divu; wire [31: 0] result_rem; wire [31: 0] result_remu; assign result_mul[63: 0] = $signed ({{32{alu_op1[31]}}, alu_op1[31: 0]}) * $signed ({{32{alu_op2[31]}}, alu_op2[31: 0]}); assign result_mulu[63: 0] = $unsigned({{32{1'b0}}, alu_op1[31: 0]}) * $unsigned({{32{1'b0}}, alu_op2[31: 0]}); assign result_mulsu[63: 0] = $signed ({{32{alu_op1[31]}}, alu_op1[31: 0]}) * $unsigned({{32{1'b0}}, alu_op2[31: 0]}); // The result of divided by zero and (-MAX / -1) cannot be represented in twos complement. // Assign the value to pass RISC-V compliance test. assign result_div[31: 0] = (alu_op2 == 32'h00000000) ? 32'hffffffff : ((alu_op1 == 32'h80000000) && (alu_op2 == 32'hffffffff)) ? 32'h80000000 : $signed ($signed (alu_op1) / $signed (alu_op2)); assign result_divu[31: 0] = (alu_op2 == 32'h00000000) ? 32'hffffffff : $unsigned($unsigned(alu_op1) / $unsigned(alu_op2)); assign result_rem[31: 0] = (alu_op2 == 32'h00000000) ? alu_op1 : ((alu_op1 == 32'h80000000) && (alu_op2 == 32'hffffffff)) ? 32'h00000000 : $signed ($signed (alu_op1) % $signed (alu_op2)); assign result_remu[31: 0] = (alu_op2 == 32'h00000000) ? alu_op1 : $unsigned($unsigned(alu_op1) % $unsigned(alu_op2)); `endif // RV32M_ENABLED ``` ### module 3 : compute the ex_result ```verilog= always @* begin case(1'b1)//@@ ex_memwr: ex_result = alu_op2; ex_jal: ex_result = ex_pc + `EX_NEXT_PC; ex_jalr: ex_result = ex_pc + `EX_NEXT_PC; ex_lui: ex_result = ex_imm; ex_auipc: ex_result = ex_pc + ex_imm; ex_csr: ex_result = ex_csr_read; `ifdef RV32M_ENABLED ex_mul: case(ex_alu_op) OP_MUL : ex_result = result_mul [31: 0]; OP_MULH : ex_result = result_mul [63:32]; OP_MULSU : ex_result = result_mulsu[63:32]; OP_MULU : ex_result = result_mulu [63:32]; OP_DIV : ex_result = result_div [31: 0]; OP_DIVU : ex_result = result_divu [31: 0]; OP_REM : ex_result = result_rem [31: 0]; // OP_REMU default : ex_result = result_remu [31: 0]; endcase `endif // RV32M_ENABLED ex_alu: case(ex_alu_op) OP_ADD : if (ex_subtype == 1'b0) ex_result = alu_op1 + alu_op2; else ex_result = alu_op1 - alu_op2; // In RISC-V ISA spec, only shift amount // held in lower 5 bits of register OP_SLL : ex_result = alu_op1 << alu_op2[4:0]; OP_SLT : ex_result = result_subs[32] ? 'd1 : 'd0; OP_SLTU: ex_result = result_subu[32] ? 'd1 : 'd0; OP_XOR : ex_result = alu_op1 ^ alu_op2; OP_SR : if (ex_subtype == 1'b0) ex_result = alu_op1 >>> alu_op2[4:0]; // shift more than 32 is undefined else ex_result = $signed(alu_op1) >>> alu_op2[4:0]; // shift more than 32 is undefined OP_OR : ex_result = alu_op1 | alu_op2; // OP_AND default: ex_result = alu_op1 & alu_op2; endcase default: begin ex_result = 32'h0; end endcase end ``` ### module 4 : determine the fetch_pc - ex_flush - ex_trap - fetch_pc ```verilog= always @(posedge clk or negedge resetb) begin if (!resetb) begin fetch_pc <= RESETVEC; end else if (!ex_stall) begin fetch_pc <= (ex_flush) ? (fetch_pc + `EX_NEXT_PC) : (ex_trap) ? (ex_trap_pc) : {next_pc[31:1], 1'b0}; end end ``` ### module 5 : determine signals for write back stage ```verilog= always @(posedge clk or negedge resetb) begin if (!resetb) begin wb_result <= 32'h0; wb_alu2reg <= 1'b0; wb_dst_sel <= 5'h0; wb_branch <= 1'b0; wb_branch_nxt <= 1'b0; wb_mem2reg <= 1'b0; wb_raddr <= 2'h0; wb_alu_op <= 3'h0; end else if (!ex_stall) begin wb_result <= ex_result; wb_alu2reg <= ex_alu || ex_lui || ex_auipc || ex_jal || ex_jalr || ex_csr || `ifdef RV32M_ENABLED ex_mul || `endif (ex_mem2reg && !ex_ld_align_excp); wb_dst_sel <= ex_dst_sel; wb_branch <= branch_taken || ex_trap; wb_branch_nxt <= wb_branch; wb_mem2reg <= ex_mem2reg; wb_raddr <= dmem_raddr[1:0]; wb_alu_op <= ex_alu_op; end end ``` ### module 6 : wb_memwr for store instruction ```verilog= always @(posedge clk or negedge resetb) begin if (!resetb) wb_memwr <= 1'b0; else if (ex_memwr && !ex_flush && !ex_st_align_excp) wb_memwr <= 1'b1; else if (wb_memwr && dmem_wvalid) wb_memwr <= 1'b0; end ``` ### module 7 : ```verilog= always @(posedge clk or negedge resetb) begin if (!resetb) begin wb_waddr <= 32'h0; wb_wstrb <= 4'h0; wb_wdata <= 32'h0; end else if (!ex_stall && ex_memwr) begin wb_waddr <= ex_memaddr; case(ex_alu_op) OP_SB: begin wb_wdata <= {4{alu_op2[7:0]}}; case(ex_memaddr[1:0]) 2'b00: wb_wstrb <= 4'b0001; 2'b01: wb_wstrb <= 4'b0010; 2'b10: wb_wstrb <= 4'b0100; default:wb_wstrb <= 4'b1000; endcase end OP_SH: begin wb_wdata <= {2{alu_op2[15:0]}}; wb_wstrb <= ex_memaddr[1] ? 4'b1100 : 4'b0011; end OP_SW: begin wb_wdata <= alu_op2; wb_wstrb <= 4'hf; end default: begin wb_wdata <= 32'h0; wb_wstrb <= 4'hf; end endcase end end ``` ## WB stage ### module 1 ```verilog= assign imem_addr = fetch_pc; assign imem_ready = !stall_r && !wb_stall; assign wb_stall = stall_r || (wb_memwr && !dmem_wvalid) || (wb_mem2reg && !dmem_rresp); assign wb_flush = wb_nop || wb_nop_more; ``` ### module 2 ```verilog= always @(posedge clk or negedge resetb) begin if (!resetb) begin wb_nop <= 1'b0; wb_nop_more <= 1'b0; end else if (!ex_stall && !(wb_memwr && !dmem_wvalid)) begin wb_nop <= wb_branch; wb_nop_more <= wb_nop; end end ``` ### module 3 ```verilog= always @* begin case(wb_alu_op) OP_LB : begin case(wb_raddr[1:0]) 2'b00: wb_rdata[31: 0] = {{24{dmem_rdata[7]}}, dmem_rdata[ 7: 0]}; 2'b01: wb_rdata[31: 0] = {{24{dmem_rdata[15]}}, dmem_rdata[15: 8]}; 2'b10: wb_rdata[31: 0] = {{24{dmem_rdata[23]}}, dmem_rdata[23:16]}; 2'b11: wb_rdata[31: 0] = {{24{dmem_rdata[31]}}, dmem_rdata[31:24]}; endcase end OP_LH : begin wb_rdata = (wb_raddr[1]) ? {{16{dmem_rdata[31]}}, dmem_rdata[31:16]} : {{16{dmem_rdata[15]}}, dmem_rdata[15: 0]}; end OP_LW : begin wb_rdata = dmem_rdata; end OP_LBU : begin case(wb_raddr[1:0]) 2'b00: wb_rdata[31: 0] = {24'h0, dmem_rdata[7:0]}; 2'b01: wb_rdata[31: 0] = {24'h0, dmem_rdata[15:8]}; 2'b10: wb_rdata[31: 0] = {24'h0, dmem_rdata[23:16]}; 2'b11: wb_rdata[31: 0] = {24'h0, dmem_rdata[31:24]}; endcase end OP_LHU : begin wb_rdata = (wb_raddr[1]) ? {16'h0, dmem_rdata[31:16]} : {16'h0, dmem_rdata[15: 0]}; end default: begin wb_rdata = 32'h0; end endcase end ``` ## Trap CSR ### module 1 ```verilog= assign ex_trap = (ex_inst_ill_excp || ex_inst_align_excp || ex_ld_align_excp || ex_st_align_excp || ex_timer_irq || ex_sw_irq || ex_interrupt || ex_systemcall) && !ex_flush; assign ex_trap_pc = (ex_systemcall && ex_imm[1:0] == 2'b10) ? // mret csr_mepc : csr_mtvec[0] ? {csr_mtvec[31:2], 2'b00} + {26'h0, ex_mcause[3:0], 2'b00} : {csr_mtvec[31:2], 2'b00}; assign ex_csr_data = ex_alu_op[2] ? {27'h0, ex_src1_sel[4:0]} : reg_rdata1; assign ex_ret_pc = (ex_jal || ex_jalr || (ex_branch && branch_taken)) ? next_pc[31: 1] : ex_pc[31: 1] + 31'd2; ``` ### module 2 ```verilog= always @* begin ex_mcause = 32'h0; case(1'b1) ex_inst_ill_excp : ex_mcause = TRAP_INST_ILL; ex_inst_align_excp : ex_mcause = TRAP_INST_ALIGN; ex_ld_align_excp : ex_mcause = TRAP_LD_ALIGN; ex_st_align_excp : ex_mcause = TRAP_ST_ALIGN; ex_timer_irq : ex_mcause = INT_MTIME; ex_sw_irq : ex_mcause = INT_MSI; ex_interrupt : ex_mcause = INT_MEI; ex_systemcall : begin case (ex_imm[1:0]) 2'b00: ex_mcause = TRAP_ECALL; 2'b01: ex_mcause = TRAP_BREAK; 2'b10: ex_mcause = csr_mcause; // uret, sret, mret default: begin `ifndef SYNTHESIS $display("Illegal system call at PC 0x%08x\n", ex_pc); `endif ex_mcause = TRAP_INST_ILL; end endcase end endcase end ``` ### module 3 ```verilog= always @(posedge clk or negedge resetb) begin if (!resetb) begin csr_mcause <= 32'h0; csr_mepc <= 32'h0; csr_mtval <= 32'h0; csr_mstatus <= 32'h0; csr_mip <= 32'h0; end else if (!ex_stall && !ex_flush) begin case(1'b1) ex_inst_ill_excp : begin csr_mcause <= TRAP_INST_ILL; csr_mepc <= {ex_pc[31: 1], 1'b0}; csr_mtval <= ex_insn; csr_mstatus[MPIE] <= csr_mstatus[MIE]; csr_mstatus[MIE] <= 1'b0; csr_mip <= csr_mip; end ex_csr_wr : begin case (ex_imm[11: 0]) CSR_MEPC : begin csr_mepc <= !ex_alu_op[1] ? ex_csr_data : // CSRRW !ex_alu_op[0] ? (csr_mepc | ex_csr_data) : // CSRRS (csr_mepc & ~ex_csr_data); // CSRRC end CSR_MCAUSE : begin csr_mcause <= !ex_alu_op[1] ? ex_csr_data : // CSRRW !ex_alu_op[0] ? (csr_mcause | ex_csr_data) : // CSRRS (csr_mcause & ~ex_csr_data); // CSRRC end CSR_MTVAL : begin csr_mtval <= !ex_alu_op[1] ? ex_csr_data : // CSRRW !ex_alu_op[0] ? (csr_mtval | ex_csr_data) : // CSRRS (csr_mtval & ~ex_csr_data); // CSRRC end CSR_MSTATUS: begin csr_mstatus <= !ex_alu_op[1] ? ex_csr_data : // CSRRW !ex_alu_op[0] ? (csr_mstatus | ex_csr_data) : // CSRRS (csr_mstatus & ~ex_csr_data); // CSRRC end CSR_MIP : begin csr_mip <= !ex_alu_op[1] ? ex_csr_data : // CSRRW !ex_alu_op[0] ? (csr_mip | ex_csr_data) : // CSRRS (csr_mip & ~ex_csr_data); // CSRRC end default : ; endcase end ex_inst_align_excp : begin csr_mcause <= TRAP_INST_ALIGN; csr_mepc <= {ex_pc[31: 1], 1'b0}; csr_mtval <= {next_pc[31: 1], 1'b0}; csr_mstatus[MPIE] <= csr_mstatus[MIE]; csr_mstatus[MIE] <= 1'b0; csr_mip <= csr_mip; end ex_ld_align_excp : begin csr_mcause <= TRAP_LD_ALIGN; csr_mepc <= {ex_pc[31: 1], 1'b0}; csr_mtval <= ex_memaddr; csr_mstatus[MPIE] <= csr_mstatus[MIE]; csr_mstatus[MIE] <= 1'b0; csr_mip <= csr_mip; end ex_st_align_excp : begin csr_mcause <= TRAP_ST_ALIGN; csr_mepc <= {ex_pc[31: 1], 1'b0}; csr_mtval <= ex_memaddr; csr_mstatus[MPIE] <= csr_mstatus[MIE]; csr_mstatus[MIE] <= 1'b0; csr_mip <= csr_mip; end ex_timer_irq : begin csr_mcause <= INT_MTIME; csr_mepc <= {ex_ret_pc[31: 1], 1'b0}; csr_mtval <= 32'd0; // FIXME csr_mstatus[MPIE] <= csr_mstatus[MIE]; csr_mstatus[MIE] <= 1'b0; csr_mip[MTIP] <= 1'b1; end ex_sw_irq : begin csr_mcause <= INT_MSI; csr_mepc <= {ex_ret_pc[31: 1], 1'b0}; csr_mtval <= 32'd0; // FIXME csr_mstatus[MPIE] <= csr_mstatus[MIE]; csr_mstatus[MIE] <= 1'b0; csr_mip[MSIP] <= 1'b1; end ex_interrupt : begin csr_mcause <= INT_MEI; csr_mepc <= {ex_ret_pc[31: 1], 1'b0}; csr_mtval <= 32'd0; // FIXME csr_mstatus[MPIE] <= csr_mstatus[MIE]; csr_mstatus[MIE] <= 1'b0; csr_mip[MEIP] <= 1'b1; end ex_systemcall : begin case (ex_imm[1:0]) 2'b00: begin // ECALL csr_mcause <= TRAP_ECALL; csr_mepc <= {ex_pc[31: 1], 1'b0}; csr_mtval <= 32'd0; // FIXME csr_mstatus[MPIE] <= csr_mstatus[MIE]; csr_mstatus[MIE] <= 1'b0; csr_mip <= csr_mip; end 2'b01: begin // EBREAK csr_mcause <= TRAP_BREAK; csr_mepc <= {ex_pc[31: 1], 1'b0}; csr_mtval <= 32'd0; // FIXME csr_mstatus[MPIE] <= csr_mstatus[MIE]; csr_mstatus[MIE] <= 1'b0; csr_mip <= csr_mip; end 2'b10: begin // URET, SRET, MRET csr_mcause <= csr_mcause; // FIXME csr_mepc <= {ex_pc[31: 1], 1'b0}; // FIXME csr_mtval <= 32'd0; // FIXME csr_mstatus[MIE] <= csr_mstatus[MPIE]; csr_mip <= csr_mip; end default: begin csr_mcause <= TRAP_INST_ILL; csr_mepc <= {ex_pc[31: 1], 1'b0}; csr_mtval <= ex_insn; csr_mstatus <= csr_mstatus; csr_mip <= csr_mip; end endcase end endcase end end ```