# Final Project: VexRiscv contributed by <dck9661> ## GenSmallest ### Hardware This is a configurable small CPU with instruction Fetch, decode stage, Register file, execution Unit, memory instruction (LB/LH/SB/SW..), MUX, Hazard Unit, Branch Unit (JAL/BEQ/BLT/....). Corresponding Table : | IBusSimplePlugin | DBusSimplePlugin | RegFilePlugin | | -------- | -------- | -------- | | PC + instruction fetch | memory instructions + memory bus | register file | | IntAluPlugin | SrcPlugin | LightShifterPlugin | | -------- | -------- | -------- | | execution Unit | provide MUX Units | Implements SLL/SRL/SRA instructions | | HazardSimplePlugin | BranchPlugin| | -------- | -------- | | hazard detect unit | implement all branch/jump instructions | ```scala= package vexriscv.demo import vexriscv.plugin._ import vexriscv.{plugin, VexRiscv, VexRiscvConfig} import spinal.core._ /** * Created by spinalvm on 15.06.17. */ object GenSmallest extends App{ def cpu() = new VexRiscv( config = VexRiscvConfig( plugins = List( new IBusSimplePlugin( resetVector = 0x80000000l, cmdForkOnSecondStage = false, cmdForkPersistence = false, prediction = NONE, catchAccessFault = false, compressedGen = false ), new DBusSimplePlugin( catchAddressMisaligned = false, catchAccessFault = false ), new CsrPlugin(CsrPluginConfig.smallest), new DecoderSimplePlugin( catchIllegalInstruction = false ), new RegFilePlugin( regFileReadyKind = plugin.SYNC, zeroBoot = false ), new IntAluPlugin, new SrcPlugin( separatedAddSub = false, executeInsertion = false ), new LightShifterPlugin, new HazardSimplePlugin( bypassExecute = false, bypassMemory = false, bypassWriteBack = false, bypassWriteBackBuffer = false, pessimisticUseSrc = false, pessimisticWriteRegFile = false, pessimisticAddressMatch = false ), new BranchPlugin( earlyBranch = false, catchAddressMisaligned = false ), new YamlPlugin("cpu0.yaml") ) ) ) SpinalVerilog(cpu()) } ``` ### Software testbench * ISA tests from [RISC-V test](https://github.com/riscv/riscv-tests/tree/master/isa) * Dhrystone benchmark from [RiscvSocSoftware](https://github.com/SpinalHDL/VexRiscvSocSoftware) https://en.wikipedia.org/wiki/Dhrystone ### Validation 1. run the commands to generate the RTL code as VexRiscv.v 2. run the testbench need the verilator simulator ```cpp= sbt "runMain vexriscv.demo.GenSmallest" cd src/test/cpp/regression make clean run IBUS=SIMPLE DBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no ``` ```cpp g++ main.o verilated.o verilated_dpi.o VVexRiscv__ALL.a -pthread -o VVexRiscv -lm -lstdc++ make[1]: Leaving directory '/home/jacky/class/jserv/final/VexRiscv/src/test/cpp/regression/obj_dir' ./obj_dir/VVexRiscv BOOT Dhrystone Benchmark, Version 2.1 (Language: C) Program compiled without 'register' attribute Please give the number of runs through the benchmark: Execution starts, 200 runs through Dhrystone Execution ends Final values of the variables used in the benchmark: Int_Glob: 5 should be: 5 Bool_Glob: 1 should be: 1 Ch_1_Glob: A should be: A Ch_2_Glob: B should be: B Arr_1_Glob[8]: 7 should be: 7 Arr_2_Glob[8][7]: 210 should be: Number_Of_Runs + 10 Ptr_Glob-> Ptr_Comp: -2147458812 should be: (implementation-dependent) Discr: 0 should be: 0 Enum_Comp: 2 should be: 2 Int_Comp: 17 should be: 17 Str_Comp: DHRYSTONE PROGRAM, SOME STRING should be: DHRYSTONE PROGRAM, SOME STRING Next_Ptr_Glob-> Ptr_Comp: -2147458812 should be: (implementation-dependent), same as above Discr: 0 should be: 0 Enum_Comp: 1 should be: 1 Int_Comp: 18 should be: 18 Str_Comp: DHRYSTONE PROGRAM, SOME STRING should be: DHRYSTONE PROGRAM, SOME STRING Int_1_Loc: 5 should be: 5 Int_2_Loc: 13 should be: 13 Int_3_Loc: 7 should be: 7 Enum_Loc: 1 should be: 1 Str_1_Loc: DHRYSTONE PROGRAM, 1'ST STRING should be: DHRYSTONE PROGRAM, 1'ST STRING Str_2_Loc: DHRYSTONE PROGRAM, 2'ND STRING should be: DHRYSTONE PROGRAM, 2'ND STRING Clock cycles=255836 DMIPS per Mhz: 0.44 SUCCESS dhrystoneO3_Stall **************************************************************** Had simulate 1383185 clock cycles in 0.69031 s (2003.71 Khz) REGRESSION SUCCESS 792/792 **************************************************************** ``` ### Analyze When we make run, it will use verilator to transform the VexRiscv.v to VexRiscv.cpp and VexRiscv.h. Next step it will compile VexRiscv.cpp and main.cpp to make a execution file called VVexRiscv. ```cpp= run: compile ./obj_dir/VVexRiscv verilate: ../../../../VexRiscv.v rm -f VexRiscv.v*.bin cp ../../../../VexRiscv.v*.bin . | true verilator -cc ../../../../VexRiscv.v -O3 -CFLAGS -std=c++11 -LDFLAGS -pthread ${ADDCFLAGS} --gdbbt ${VERILATOR_ARGS} -Wno-UNOPTFLAT -Wno-WIDTH --x-assign unique --exe main.cpp compile: verilate make -j${THREAD_COUNT} -C obj_dir/ -f VVexRiscv.mk VVexRiscv ``` main.cpp just like a testbench which will load program(.hex for example) to VexRiscv and give initial port to VexRiscv like reset also we can get some profile from main.cpp because it can get some information from VexRiscv software cpu. ```cpp= for(const string &name : riscvTestMain){ redo(REDO,RiscvTest(name).run();) } class RiscvTest : public WorkspaceRegression{ public: RiscvTest(string name) : WorkspaceRegression(name) { loadHex("../../resources/hex/" + name + ".hex"); bootAt(0x800000bcu); . . . } Workspace* run(uint64_t timeout = 5000){ if(timeout == 0) timeout = 0x7FFFFFFFFFFFFFFF; currentTime = 4; // init trace dump #ifdef TRACE Verilated::traceEverOn(true); tfp = new VerilatedVcdC; top->trace(tfp, 99); tfp->open((string(name)+ ".vcd").c_str()); #endif // Reset top->clk = 0; top->reset = 0; top->eval(); currentTime = 3; for(SimElement* simElement : simElements) simElement->onReset(); top->reset = 1; top->eval(); top->clk = 1; top->eval(); top->clk = 0; top->eval(); . . . ``` ## Briey Soc ### hardware This is a 5 stage pipelined CPU with following feature: * Instruction Cache * Single cycle Barrel shifter * Single cycle MUL, 34 cycle DIV * Interruption support * Dynamic branch prediction * Debug port AXI4 bus has many advantages like : * A flexible topology * High bandwidth potential * Potential out of order request completion * Easy methods to meets clocks timings * Standard used by many IP * An hand-shaking methodology that fit with SpinalHDL Stream. All peripherals implement an APB3 bus to be interfaced: * Very simple bus (no burst) * Use very few resources * Standard used by many IP ![](https://i.imgur.com/iljjcoI.png) UART peripheral modul: Because SpinalHDL is a little hard to understand, I found a Uart example from [nandland](https://www.nandland.com/vhdl/modules/module-uart-serial-port-rs232.html) to make me realize how uart work. ![](https://i.imgur.com/6Mk7jCC.png) Verilog Receiver (uart_rx.v): ```cpp= // File Downloaded from http://www.nandland.com ////////////////////////////////////////////////////////////////////// // This file contains the UART Receiver. This receiver is able to // receive 8 bits of serial data, one start bit, one stop bit, // and no parity bit. When receive is complete o_rx_dv will be // driven high for one clock cycle. // // Set Parameter CLKS_PER_BIT as follows: // CLKS_PER_BIT = (Frequency of i_Clock)/(Frequency of UART) // Example: 10 MHz Clock, 115200 baud UART // (10000000)/(115200) = 87 module uart_rx #(parameter CLKS_PER_BIT) ( input i_Clock, input i_Rx_Serial, output o_Rx_DV, output [7:0] o_Rx_Byte ); parameter s_IDLE = 3'b000; parameter s_RX_START_BIT = 3'b001; parameter s_RX_DATA_BITS = 3'b010; parameter s_RX_STOP_BIT = 3'b011; parameter s_CLEANUP = 3'b100; reg r_Rx_Data_R = 1'b1; reg r_Rx_Data = 1'b1; reg [7:0] r_Clock_Count = 0; reg [2:0] r_Bit_Index = 0; //8 bits total reg [7:0] r_Rx_Byte = 0; reg r_Rx_DV = 0; reg [2:0] r_SM_Main = 0; // Purpose: Double-register the incoming data. // This allows it to be used in the UART RX Clock Domain. // (It removes problems caused by metastability) always @(posedge i_Clock) begin r_Rx_Data_R <= i_Rx_Serial; r_Rx_Data <= r_Rx_Data_R; end // Purpose: Control RX state machine always @(posedge i_Clock) begin case (r_SM_Main) s_IDLE : begin r_Rx_DV <= 1'b0; r_Clock_Count <= 0; r_Bit_Index <= 0; if (r_Rx_Data == 1'b0) // Start bit detected r_SM_Main <= s_RX_START_BIT; else r_SM_Main <= s_IDLE; end // Check middle of start bit to make sure it's still low s_RX_START_BIT : begin if (r_Clock_Count == (CLKS_PER_BIT-1)/2) begin if (r_Rx_Data == 1'b0) begin r_Clock_Count <= 0; // reset counter, found the middle r_SM_Main <= s_RX_DATA_BITS; end else r_SM_Main <= s_IDLE; end else begin r_Clock_Count <= r_Clock_Count + 1; r_SM_Main <= s_RX_START_BIT; end end // case: s_RX_START_BIT // Wait CLKS_PER_BIT-1 clock cycles to sample serial data s_RX_DATA_BITS : begin if (r_Clock_Count < CLKS_PER_BIT-1) begin r_Clock_Count <= r_Clock_Count + 1; r_SM_Main <= s_RX_DATA_BITS; end else begin r_Clock_Count <= 0; r_Rx_Byte[r_Bit_Index] <= r_Rx_Data; // Check if we have received all bits if (r_Bit_Index < 7) begin r_Bit_Index <= r_Bit_Index + 1; r_SM_Main <= s_RX_DATA_BITS; end else begin r_Bit_Index <= 0; r_SM_Main <= s_RX_STOP_BIT; end end end // case: s_RX_DATA_BITS // Receive Stop bit. Stop bit = 1 s_RX_STOP_BIT : begin // Wait CLKS_PER_BIT-1 clock cycles for Stop bit to finish if (r_Clock_Count < CLKS_PER_BIT-1) begin r_Clock_Count <= r_Clock_Count + 1; r_SM_Main <= s_RX_STOP_BIT; end else begin r_Rx_DV <= 1'b1; r_Clock_Count <= 0; r_SM_Main <= s_CLEANUP; end end // case: s_RX_STOP_BIT // Stay here 1 clock s_CLEANUP : begin r_SM_Main <= s_IDLE; r_Rx_DV <= 1'b0; end default : r_SM_Main <= s_IDLE; endcase end assign o_Rx_DV = r_Rx_DV; assign o_Rx_Byte = r_Rx_Byte; endmodule // uart_rx ``` Verilog Transmitter (uart_tx.v): ```cpp= ////////////////////////////////////////////////////////////////////// // File Downloaded from http://www.nandland.com ////////////////////////////////////////////////////////////////////// // This file contains the UART Transmitter. This transmitter is able // to transmit 8 bits of serial data, one start bit, one stop bit, // and no parity bit. When transmit is complete o_Tx_done will be // driven high for one clock cycle. // // Set Parameter CLKS_PER_BIT as follows: // CLKS_PER_BIT = (Frequency of i_Clock)/(Frequency of UART) // Example: 10 MHz Clock, 115200 baud UART // (10000000)/(115200) = 87 module uart_tx #(parameter CLKS_PER_BIT) ( input i_Clock, input i_Tx_DV, input [7:0] i_Tx_Byte, output o_Tx_Active, output reg o_Tx_Serial, output o_Tx_Done ); parameter s_IDLE = 3'b000; parameter s_TX_START_BIT = 3'b001; parameter s_TX_DATA_BITS = 3'b010; parameter s_TX_STOP_BIT = 3'b011; parameter s_CLEANUP = 3'b100; reg [2:0] r_SM_Main = 0; reg [7:0] r_Clock_Count = 0; reg [2:0] r_Bit_Index = 0; reg [7:0] r_Tx_Data = 0; reg r_Tx_Done = 0; reg r_Tx_Active = 0; always @(posedge i_Clock) begin case (r_SM_Main) s_IDLE : begin o_Tx_Serial <= 1'b1; // Drive Line High for Idle r_Tx_Done <= 1'b0; r_Clock_Count <= 0; r_Bit_Index <= 0; if (i_Tx_DV == 1'b1) begin r_Tx_Active <= 1'b1; r_Tx_Data <= i_Tx_Byte; r_SM_Main <= s_TX_START_BIT; end else r_SM_Main <= s_IDLE; end // case: s_IDLE // Send out Start Bit. Start bit = 0 s_TX_START_BIT : begin o_Tx_Serial <= 1'b0; // Wait CLKS_PER_BIT-1 clock cycles for start bit to finish if (r_Clock_Count < CLKS_PER_BIT-1) begin r_Clock_Count <= r_Clock_Count + 1; r_SM_Main <= s_TX_START_BIT; end else begin r_Clock_Count <= 0; r_SM_Main <= s_TX_DATA_BITS; end end // case: s_TX_START_BIT // Wait CLKS_PER_BIT-1 clock cycles for data bits to finish s_TX_DATA_BITS : begin o_Tx_Serial <= r_Tx_Data[r_Bit_Index]; if (r_Clock_Count < CLKS_PER_BIT-1) begin r_Clock_Count <= r_Clock_Count + 1; r_SM_Main <= s_TX_DATA_BITS; end else begin r_Clock_Count <= 0; // Check if we have sent out all bits if (r_Bit_Index < 7) begin r_Bit_Index <= r_Bit_Index + 1; r_SM_Main <= s_TX_DATA_BITS; end else begin r_Bit_Index <= 0; r_SM_Main <= s_TX_STOP_BIT; end end end // case: s_TX_DATA_BITS // Send out Stop bit. Stop bit = 1 s_TX_STOP_BIT : begin o_Tx_Serial <= 1'b1; // Wait CLKS_PER_BIT-1 clock cycles for Stop bit to finish if (r_Clock_Count < CLKS_PER_BIT-1) begin r_Clock_Count <= r_Clock_Count + 1; r_SM_Main <= s_TX_STOP_BIT; end else begin r_Tx_Done <= 1'b1; r_Clock_Count <= 0; r_SM_Main <= s_CLEANUP; r_Tx_Active <= 1'b0; end end // case: s_Tx_STOP_BIT // Stay here 1 clock s_CLEANUP : begin r_Tx_Done <= 1'b1; r_SM_Main <= s_IDLE; end default : r_SM_Main <= s_IDLE; endcase end assign o_Tx_Active = r_Tx_Active; assign o_Tx_Done = r_Tx_Done; endmodule ``` Verilog Testbench (uart_tb.v): ```cpp= ////////////////////////////////////////////////////////////////////// // File Downloaded from http://www.nandland.com ////////////////////////////////////////////////////////////////////// // This testbench will exercise both the UART Tx and Rx. // It sends out byte 0xAB over the transmitter // It then exercises the receive by receiving byte 0x3F `timescale 1ns/10ps `include "uart_tx.v" `include "uart_rx.v" module uart_tb (); // Testbench uses a 10 MHz clock // Want to interface to 115200 baud UART // 10000000 / 115200 = 87 Clocks Per Bit. parameter c_CLOCK_PERIOD_NS = 100; parameter c_CLKS_PER_BIT = 87; parameter c_BIT_PERIOD = 8600; reg r_Clock = 0; reg r_Tx_DV = 0; wire w_Tx_Done; reg [7:0] r_Tx_Byte = 0; reg r_Rx_Serial = 1; wire [7:0] w_Rx_Byte; // Takes in input byte and serializes it task UART_WRITE_BYTE; input [7:0] i_Data; integer ii; begin // Send Start Bit r_Rx_Serial <= 1'b0; #(c_BIT_PERIOD); #1000; // Send Data Byte for (ii=0; ii<8; ii=ii+1) begin r_Rx_Serial <= i_Data[ii]; #(c_BIT_PERIOD); end // Send Stop Bit r_Rx_Serial <= 1'b1; #(c_BIT_PERIOD); end endtask // UART_WRITE_BYTE uart_rx #(.CLKS_PER_BIT(c_CLKS_PER_BIT)) UART_RX_INST (.i_Clock(r_Clock), .i_Rx_Serial(r_Rx_Serial), .o_Rx_DV(), .o_Rx_Byte(w_Rx_Byte) ); uart_tx #(.CLKS_PER_BIT(c_CLKS_PER_BIT)) UART_TX_INST (.i_Clock(r_Clock), .i_Tx_DV(r_Tx_DV), .i_Tx_Byte(r_Tx_Byte), .o_Tx_Active(), .o_Tx_Serial(), .o_Tx_Done(w_Tx_Done) ); always #(c_CLOCK_PERIOD_NS/2) r_Clock <= !r_Clock; // Main Testing: initial begin // Tell UART to send a command (exercise Tx) @(posedge r_Clock); @(posedge r_Clock); r_Tx_DV <= 1'b1; r_Tx_Byte <= 8'hAB; @(posedge r_Clock); r_Tx_DV <= 1'b0; @(posedge w_Tx_Done); // Send a command to the UART (exercise Rx) @(posedge r_Clock); UART_WRITE_BYTE(8'h3F); @(posedge r_Clock); // Check that the correct command was received if (w_Rx_Byte == 8'h3F) $display("Test Passed - Correct Byte Received"); else $display("Test Failed - Incorrect Byte Received"); end endmodule ``` ### software benchmark There are four [testbench](https://github.com/SpinalHDL/VexRiscvSocSoftware/tree/master/projects/briey) provided by briey software: * Dhrystone * timers * uart * vga ### validation: Generate the Briey SoC Hardware ```cpp= sbt "runMain vexriscv.demo.Briey" ``` go in src/test/cpp/briey and run the simulation: ```cpp= make clean run ``` It will show below picture and wait for connection with OpenOCD? ![](https://i.imgur.com/lsZC9AP.png) :::info ### OpenOCD The Open On-Chip Debugger (OpenOCD) aims to provide debugging, in-system programming and boundary-scan testing for embedded target devices. Ref: [OpenOCD User’s Guide](http://openocd.org/doc/html/index.html#Top) ### OpenOCD and GDB OpenOCD complies with the remote gdbserver protocol and, as such, can be used to debug remote targets. Setting up GDB to work with OpenOCD can involve several components: Ref: [21 GDB and OpenOCD](http://openocd.org/doc/html/GDB-and-OpenOCD.html) ### Using OpenOCD `TCL` language is used in config file of OpenOCD, we can add some commands which we need in the openocd.cfg file and run openOCD with the config file. ![](https://i.imgur.com/KsuYa1Z.png) ::: To connect OpenOCD (https://github.com/SpinalHDL/openocd_riscv) to the simulation : ```cpp= src/openocd -f tcl/interface/jtag_tcp.cfg -c "set BRIEY_CPU0_YAML /home/spinalvm/Spinal/VexRiscv/cpu0.yaml" -f tcl/target/briey.cfg ``` ![](https://i.imgur.com/5RkVteY.png) Use the https://github.com/SpinalHDL/openocd_riscv tool to create a GDB server connected to the target (the simulated CPU) ```cpp= riscv64-unknown-elf-gdb ../VexRiscv/VexRiscvSocSoftware/projects/briey/dhrystone/build/dhrystone.elf ``` ![](https://i.imgur.com/8wdrPsd.png) Then we can see the CPU start to run and show the Dhrystone testbench result. ![](https://i.imgur.com/j9uWT0r.png) This is uart testbench result. It print 0-9A-Z in infinite loops. ![](https://i.imgur.com/MAuhVTo.png) This is VGA testbench it show the RGB square in VGA GUI interface. ![](https://i.imgur.com/K9hzWHt.png) ### analyze This is verilator's testbench. It provide clk,reset,sdram config. Take uartRx for example, it write some FSM to handle the data from CPU to testbench and testbench to CPU. If we want run the uart testbench on CPU, we need uart hardware module and initial input signal for uart port and initial clk,initial rst. In this example, This main.cpp and Briey.cpp which is generated by Briey.v with verilator use g++ to compile to a exucution file. When we exucute this filed, it will go to boot and wait for gdb to put a riscv elf program in to memory and start to test. ```cpp= #include "VBriey.h" #include "VBriey_Briey.h" //#include "VBriey_Axi4VgaCtrl.h" //#include "VBriey_VgaCtrl.h" #ifdef REF #include "VBriey_RiscvCore.h" #endif #include "verilated.h" #include "verilated_vcd_c.h" #include <stdio.h> #include <iostream> #include <stdlib.h> #include <stdint.h> #include <cstring> #include <string.h> #include <iostream> #include <fstream> #include <vector> #include <iomanip> #include <time.h> #include <unistd.h> #include "VBriey_VexRiscv.h" #include "../common/framework.h" #include "../common/jtag.h" #include "../common/uart.h" class SdramConfig{ public: uint32_t byteCount; uint32_t bankCount; uint32_t rowSize; uint32_t colSize; SdramConfig(uint32_t byteCount, uint32_t bankCount, uint32_t rowSize, uint32_t colSize){ this->byteCount = byteCount; this->bankCount = bankCount; this->rowSize = rowSize; this->colSize = colSize; } }; class SdramIo{ public: CData *BA; CData *DQM; CData *CASn; CData *CKE; CData *CSn; CData *RASn; CData *WEn; SData *ADDR; CData *DQ_read; CData *DQ_write; CData *DQ_writeEnable; }; class Sdram : public SimElement{ public: SdramConfig *config; SdramIo *io; uint32_t CAS; uint32_t burstLength; class Bank{ public: uint8_t *data; SdramConfig *config; bool opened; uint32_t openedRow; void init(SdramConfig *config){ this->config = config; data = new uint8_t[config->rowSize * config->colSize * config->byteCount]; opened = false; } virtual ~Bank(){ delete data; } void activate(uint32_t row){ if(opened) cout << "SDRAM error open unclosed bank" << endl; openedRow = row; opened = true; } void precharge(){ opened = false; } void write(uint32_t column, CData byteId, CData data){ if(!opened) cout << "SDRAM : write in closed bank" << endl; uint32_t addr = byteId + (column + openedRow * config->colSize) * config->byteCount; //printf("SDRAM : Write A=%08x D=%02x\n",addr,data); this->data[addr] = data; } CData read(uint32_t column, CData byteId){ if(!opened) cout << "SDRAM : write in closed bank" << endl; uint32_t addr = byteId + (column + openedRow * config->colSize) * config->byteCount; //printf("SDRAM : Read A=%08x D=%02x\n",addr,data[addr]); return data[addr]; } }; Bank* banks; CData * readShifter; Sdram(SdramConfig *config,SdramIo* io){ this->config = config; this->io = io; banks = new Bank[config->bankCount]; for(uint32_t bankId = 0;bankId < config->bankCount;bankId++) banks[bankId].init(config); readShifter = new CData[config->byteCount*3]; } virtual ~Sdram(){ delete banks; delete readShifter; } uint8_t ckeLast = 0; virtual void postCycle(){ if(CAS >= 2 && CAS <=3){ for(uint32_t byteId = 0;byteId != config->byteCount;byteId++){ io->DQ_read[byteId] = readShifter[byteId + (CAS-1)*config->byteCount]; } for(uint32_t latency = CAS-1;latency != 0;latency--){ //missing CKE for(uint32_t byteId = 0;byteId != config->byteCount;byteId++){ readShifter[byteId+latency*config->byteCount] = readShifter[byteId+(latency-1)*config->byteCount]; } } } } virtual void preCycle(){ if(!*io->CSn && ckeLast){ uint32_t code = ((*io->RASn) << 2) | ((*io->CASn) << 1) | ((*io->WEn) << 0); switch(code){ case 0: //Mode register set if(*io->BA == 0 && (*io->ADDR & 0x400) == 0){ CAS = ((*io->ADDR) >> 4) & 0x7; burstLength = ((*io->ADDR) >> 0) & 0x7; if((*io->ADDR & 0x388) != 0) cout << "SDRAM : ???" << endl; printf("SDRAM : MODE REGISTER DEFINITION CAS=%d burstLength=%d\n",CAS,burstLength); } break; case 2: //Bank precharge if((*io->ADDR & 0x400) != 0){ //all for(uint32_t bankId = 0;bankId < config->bankCount;bankId++) banks[bankId].precharge(); } else { //single banks[*io->BA].precharge(); } break; case 3: //Bank activate banks[*io->BA].activate(*io->ADDR & 0x7FF); break; case 4: //Write if((*io->ADDR & 0x400) != 0) cout << "SDRAM : Write autoprecharge not supported" << endl; if(*io->DQ_writeEnable == 0) cout << "SDRAM : Write Wrong DQ direction" << endl; for(uint32_t byteId = 0;byteId < config->byteCount;byteId++){ if(((*io->DQM >> byteId) & 1) == 0) banks[*io->BA].write(*io->ADDR, byteId ,io->DQ_write[byteId]); } break; case 5: //Read if((*io->ADDR & 0x400) != 0) cout << "SDRAM : READ autoprecharge not supported" << endl; if(*io->DQ_writeEnable != 0) cout << "SDRAM : READ Wrong DQ direction" << endl; //if(*io->DQM != config->byteCount-1) //cout << "SDRAM : READ wrong DQM" << endl; for(uint32_t byteId = 0;byteId < config->byteCount;byteId++){ readShifter[byteId] = banks[*io->BA].read(*io->ADDR, byteId); } break; case 1: // Self refresh break; case 7: // NOP break; default: cout << "SDRAM : unknown code" << endl; break; } } ckeLast = *io->CKE; } }; class VexRiscvTracer : public SimElement{ public: VBriey_VexRiscv *cpu; ofstream instructionTraces; ofstream regTraces; VexRiscvTracer(VBriey_VexRiscv *cpu){ this->cpu = cpu; #ifdef TRACE_INSTRUCTION instructionTraces.open ("instructionTrace.log"); #endif #ifdef TRACE_REG regTraces.open ("regTraces.log"); #endif } virtual void preCycle(){ #ifdef TRACE_INSTRUCTION if(cpu->writeBack_arbitration_isFiring){ instructionTraces << hex << setw(8) << cpu->writeBack_INSTRUCTION << endl; } #endif #ifdef TRACE_REG if(cpu->writeBack_RegFilePlugin_regFileWrite_valid == 1 && cpu->writeBack_RegFilePlugin_regFileWrite_payload_address != 0){ regTraces << " PC " << hex << setw(8) << cpu->writeBack_PC << " : reg[" << dec << setw(2) << (uint32_t)cpu->writeBack_RegFilePlugin_regFileWrite_payload_address << "] = " << hex << setw(8) << cpu->writeBack_RegFilePlugin_regFileWrite_payload_data << endl; } #endif } }; #include <SDL2/SDL.h> #include <assert.h> #include <stdint.h> #include <stdlib.h> class BrieyWorkspace : public Workspace<VBriey>{ public: BrieyWorkspace() : Workspace("Briey"){ ClockDomain *axiClk = new ClockDomain(&top->io_axiClk,NULL,20000,100000); ClockDomain *vgaClk = new ClockDomain(&top->io_vgaClk,NULL,40000,100000); AsyncReset *asyncReset = new AsyncReset(&top->io_asyncReset,50000); Jtag *jtag = new Jtag(&top->io_jtag_tms,&top->io_jtag_tdi,&top->io_jtag_tdo,&top->io_jtag_tck,80000); UartRx *uartRx = new UartRx(&top->io_uart_txd,1.0e12/115200); timeProcesses.push_back(axiClk); timeProcesses.push_back(vgaClk); timeProcesses.push_back(asyncReset); timeProcesses.push_back(jtag); timeProcesses.push_back(uartRx); SdramConfig *sdramConfig = new SdramConfig( 2, //byteCount 4, //bankCount 1 << 13, //rowSize 1 << 10 //colSize ); SdramIo *sdramIo = new SdramIo(); sdramIo->BA = &top->io_sdram_BA ; sdramIo->DQM = &top->io_sdram_DQM ; sdramIo->CASn = &top->io_sdram_CASn ; sdramIo->CKE = &top->io_sdram_CKE ; sdramIo->CSn = &top->io_sdram_CSn ; sdramIo->RASn = &top->io_sdram_RASn ; sdramIo->WEn = &top->io_sdram_WEn ; sdramIo->ADDR = &top->io_sdram_ADDR ; sdramIo->DQ_read = (CData*)&top->io_sdram_DQ_read ; sdramIo->DQ_write = (CData*)&top->io_sdram_DQ_write ; sdramIo->DQ_writeEnable = &top->io_sdram_DQ_writeEnable; Sdram *sdram = new Sdram(sdramConfig, sdramIo); axiClk->add(sdram); #ifdef TRACE //speedFactor = 100e-6; //cout << "Simulation caped to " << timeToSec << " of real time"<< endl; #endif axiClk->add(new VexRiscvTracer(top->Briey->axi_core_cpu)); #ifdef VGA Vga *vga = new Vga(top,640,480); vgaClk->add(vga); #endif top->io_coreInterrupt = 0; } /*bool trigged = false; uint32_t frameStartCounter = 0; virtual void dump(uint64_t i){ if(!trigged) { if(top->Briey->axi_vgaCtrl->vga_ctrl->io_frameStart) { frameStartCounter++; if(frameStartCounter < 3*32) cout << "**\n" << endl; } if(top->Briey->axi_vgaCtrl->vga_ctrl->io_error && frameStartCounter > 3*32) trigged = true; } if(trigged)Workspace::dump(i); }*/ }; struct timespec timer_start(){ struct timespec start_time; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time); return start_time; } long timer_end(struct timespec start_time){ struct timespec end_time; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_time); uint64_t diffInNanos = end_time.tv_sec*1e9 + end_time.tv_nsec - start_time.tv_sec*1e9 - start_time.tv_nsec; return diffInNanos; } int main(int argc, char **argv, char **env) { Verilated::randReset(2); Verilated::commandArgs(argc, argv); printf("BOOT\n"); timespec startedAt = timer_start(); BrieyWorkspace().run(1e9); uint64_t duration = timer_end(startedAt); cout << endl << "****************************************************************" << endl; cout << "Had simulate " << workspaceCycles << " clock cycles in " << duration*1e-9 << " s (" << workspaceCycles / (duration*1e-9) << " Khz)" << endl; cout << "****************************************************************" << endl << endl; exit(0); } ``` ```cpp= class UartRx : public TimeProcess{ public: CData *rx; uint32_t uartTimeRate; UartRx(CData *rx, uint32_t uartTimeRate){ this->rx = rx; this->uartTimeRate = uartTimeRate; schedule(uartTimeRate); } enum State {START, DATA, STOP}; State state = START; char data; uint32_t counter; virtual void tick(){ switch(state){ case START: if(*rx == 0){ state = DATA; counter = 0; data = 0; schedule(uartTimeRate*5/4); } else { schedule(uartTimeRate/4); } break; case DATA: data |= (*rx) << counter++; if(counter == 8){ state = STOP; } schedule(uartTimeRate); break; case STOP: if(*rx){ cout << data << flush; } else { cout << "UART RX FRAME ERROR at " << time << endl; } schedule(uartTimeRate/4); state = START; break; } } }; ``` Next we can observe the appication code which need to compile with riscv-gcc then it can run on the riscv CPU. It use uart_write this function to write data into specific address then testbench can get finally data from this address. In the hardware uart module, it will use this address to distinguish uart from other module. ```cpp= #define UART ((Uart_Reg*)(0xF0010000)) ``` ```cpp= #include <stdio.h> #include <string.h> #include <stdint.h> #include <stdlib.h> #include <briey.h> void print(char *str){ while(*str){ uart_write(UART,*(str++)); } } int main() { Uart_Config uartConfig; uartConfig.dataLength = 8; uartConfig.parity = NONE; uartConfig.stop = ONE; uartConfig.clockDivider = 50000000/8/115200-1; uart_applyConfig(UART,&uartConfig); print("Hello !\n"); while(1){ for(uint32_t idx = '0';idx <= '9';idx++){ uart_write(UART, idx); } for(uint32_t idx = 'a';idx <= 'z';idx++){ uart_write(UART, idx); } for(uint32_t idx = 'A';idx <= 'Z';idx++){ uart_write(UART, idx); } } } void irqCallback(){ } ```