黃若綾, 蔡雅彤, 林靖婷
This CPU is designed as an auxiliary processor in FPGA designs and ASICs. It supports a wide range of configurations for flexibility in performance, size, and feature set. Example configurations include:
PicoRV32 comes in three core variations:
picorv32
: Simple native memory interface.picorv32_axi
: AXI4-Lite Master interface for compatibility with AXI-based systems.picorv32_wb
: Wishbone Master interface.Additional modules include an AXI4 adapter and PCPI cores for implementing custom instructions.
Module | Description |
---|---|
picorv32 |
The PicoRV32 CPU |
picorv32_axi |
CPU with AXI4-Lite interface |
picorv32_axi_adapter |
Adapter from PicoRV32 Memory Interface to AXI4-Lite |
picorv32_wb |
CPU with Wishbone Master interface |
picorv32_pcpi_mul |
PCPI core implementing MUL[H[SU |
picorv32_pcpi_fast_mul |
Single-cycle multiplier version of pcpi_mul |
picorv32_pcpi_div |
PCPI core implementing DIV[U]/REM[U] instructions |
firmware/
: Simple test firmware for IRQ handling and PCPI cores.tests/
: Instruction-level tests from riscv-tests.dhrystone/
: Dhrystone benchmark.picosoc/
: Example SoC using PicoRV32.scripts/
: Synthesis and hardware configuration scripts.git clone https://github.com/YosysHQ/picorv32.git
cd ~/picorv32
make download-tools
make -j8 build-tools
sudo apt-get install autoconf automake autotools-dev curl python3 libmpc-dev libmpfr-dev libgmp-dev gawk build-essential bison flex texinfo gperf libtool patchutils bc zlib1g-dev libexpat-dev ninja-build
The RISC-V GNU toolchain and libraries will be install in /opt/riscv32i
:
sudo mkdir /opt/riscv32i
sudo chown $USER /opt/riscv32i
git clone https://github.com/riscv/riscv-gnu-toolchain riscv-gnu-toolchain-rv32i
cd riscv-gnu-toolchain-rv32i
git submodule update --init --recursive
mkdir build; cd build
../configure --with-arch=rv32i --prefix=/opt/riscv32i
make -j8
Run make test_vcd
in the picorv32 folder, and the result:
iverilog -o testbench.vvp -DCOMPRESSED_ISA testbench.v picorv32.v
chmod -x testbench.vvp
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32imc -o firmware/start.o firmware/start.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/irq.o firmware/irq.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/print.o firmware/print.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/hello.o firmware/hello.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/sieve.o firmware/sieve.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/multest.o firmware/multest.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/stats.o firmware/stats.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im -o tests/addi.o -DTEST_FUNC_NAME=addi \
-DTEST_FUNC_TXT='"addi"' -DTEST_FUNC_RET=addi_ret tests/addi.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im -o tests/add.o -DTEST_FUNC_NAME=add \
-DTEST_FUNC_TXT='"add"' -DTEST_FUNC_RET=add_ret tests/add.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im -o tests/andi.o -DTEST_FUNC_NAME=andi \
-DTEST_FUNC_TXT='"andi"' -DTEST_FUNC_RET=andi_ret tests/andi.S
...
-DTEST_FUNC_TXT='"xor"' -DTEST_FUNC_RET=xor_ret tests/xor.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -Os -mabi=ilp32 -march=rv32imc -ffreestanding -nostdlib -o firmware/firmware.elf \
-Wl,--build-id=none,-Bstatic,-T,firmware/sections.lds,-Map,firmware/firmware.map,--strip-debug \
firmware/start.o firmware/irq.o firmware/print.o firmware/hello.o firmware/sieve.o firmware/multest.o firmware/stats.o tests/addi.o tests/add.o tests/andi.o tests/and.o tests/auipc.o tests/beq.o tests/bge.o tests/bgeu.o tests/blt.o tests/bltu.o tests/bne.o tests/div.o tests/divu.o tests/jalr.o tests/jal.o tests/j.o tests/lb.o tests/lbu.o tests/lh.o tests/lhu.o tests/lui.o tests/lw.o tests/mulh.o tests/mulhsu.o tests/mulhu.o tests/mul.o tests/ori.o tests/or.o tests/rem.o tests/remu.o tests/sb.o tests/sh.o tests/simple.o tests/slli.o tests/sll.o tests/slti.o tests/slt.o tests/srai.o tests/sra.o tests/srli.o tests/srl.o tests/sub.o tests/sw.o tests/xori.o tests/xor.o -lgcc
/opt/riscv32i/lib/gcc/riscv32-unknown-elf/14.2.0/../../../../riscv32-unknown-elf/bin/ld: warning: firmware/firmware.elf has a LOAD segment with RWX permissions
chmod -x firmware/firmware.elf
/opt/riscv32i/bin/riscv32-unknown-elf-objcopy -O binary firmware/firmware.elf firmware/firmware.bin
chmod -x firmware/firmware.bin
python3 firmware/makehex.py firmware/firmware.bin 32768 > firmware/firmware.hex
vvp -N testbench.vvp +vcd +trace +noerror
VCD info: dumpfile testbench.vcd opened for output.
hello world
lui..OK
auipc..OK
j..OK
jal..OK
jalr..OK
beq..OK
bne..OK
blt..OK
bge..OK
bltu..OK
bgeu..OK
lb..OK
lh..OK
lw..OK
lbu..OK
lhu..OK
sb..OK
sh..OK
sw..OK
addi..OK
slti..OK
xori..OK
ori..OK
andi..OK
slli..OK
srli..OK
srai..OK
add..OK
sub..OK
sll..OK
slt..OK
xor..OK
srl..OK
sra..OK
or..OK
and..OK
mulh..OK
mulhsu..OK
mulhu..OK
mul..OK
div..OK
divu..OK
rem..OK
remu..OK
simple..OK
1st prime is 2.
2nd prime is 3.
...
30th prime is 113.
31st prime is 127.
checksum: 1772A48F OK
input [FFFFFFFF] 80000000 [FFFFFFFF] FFFFFFFF
hard mul 80000000 00000000 80000000 7FFFFFFF
soft mul 80000000 00000000 80000000 7FFFFFFF OK
hard div 80000000 00000000 00000000 80000000
soft div 80000000 00000000 00000000 80000000 OK
input [00000000] 00000000 [00000000] 00000000
hard mul 00000000 00000000 00000000 00000000
...
hard div FFFFFFFF 00000000 1B9D5F9C 38BAA671
soft div FFFFFFFF 00000000 1B9D5F9C 38BAA671 OK
Cycle counter ......... 484187
Instruction counter ... 105596
CPI: 4.58
DONE
------------------------------------------------------------
EBREAK instruction at 0x0000072A
pc 0000072D x8 00000000 x16 1B639DFB x24 00000000
x1 000006FC x9 00000000 x17 1B639DFB x25 00000000
x2 00020000 x10 20000000 x18 00000000 x26 00000000
x3 DEADBEEF x11 075BCD15 x19 00003A94 x27 00000000
x4 DEADBEEF x12 0000004F x20 00000000 x28 38BAA671
x5 0000108E x13 0000004E x21 00000000 x29 00000001
x6 00000000 x14 00000045 x22 00000000 x30 00000000
x7 00000000 x15 0000000A x23 00000000 x31 00000000
------------------------------------------------------------
Number of fast external IRQs counted: 60
Number of slow external IRQs counted: 7
Number of timer IRQs counted: 22
Finished writing testbench.trace.
TRAP after 526389 clock cycles
ALL TESTS PASSED.
Run gtkwave testbench.vcd
to check the wave file
This extension is intended to provide some combination of code size reduction, performance improvement, and energy reduction. According to different operation properties, it is divided into four categories, Zba, Zbb, Zbc and Zbs extension.
The detail could be found in the document.
Extension | Operation |
---|---|
Zba | Address generation instructions |
Zbb | Basic bit-manipulation |
Zbc | Carry-less multiplication |
Zbs | Single-bit instructions |
Zba extension for RV32 includes the following instructions:
Mnemonic | Instruction | Type |
---|---|---|
sh1add rd, rs1, rs2 | Shift left by 1 and add | R-type |
sh2add rd, rs1, rs2 | Shift left by 2 and add | R-type |
sh3add rd, rs1, rs2 | Shift left by 3 and add | R-type |
Zbb extension for RV32 includes the following instructions:
Mnemonic | Instruction | Type |
---|---|---|
andn rd, rs1, rs2 | AND with inverted operand | R-type |
orn rd, rs1, rs2 | OR with inverted operand | R-type |
xnor rd, rs1, rs2 | Exclusive OR | R-type |
max rd, rs1, rs2 | Maximum | R-type |
maxu rd, rs1, rs2 | Unsigned maximum | R-type |
min rd, rs1, rs2 | Minimum | R-type |
minu rd, rs1, rs2 | Unsigned minimum | R-type |
rol rd, rs1, rs2 | Rotate left (Register) | R-type |
ror rd, rs1, rs2 | Rotate right (Register) | R-type |
clz rd, rs | Count leading zero bits | I-type |
ctz rd, rs | Count trailing zero bits | I-type |
cpop rd, rs | Count set bits | I-type |
sext.b rd, rs | Sign-extend byte | I-type |
sext.h rd, rs | Sign-extend halfword | I-type |
zext_h rd, rs | Sign-extend halfword | I-type |
rori rd, rs | Rotate right (Immediate) | I-type |
orc.b rd, rs | Bitwise OR-Combine, byte granule | I-type |
rev8 rd, rs | Byte-reverse register | I-type |
Mnemonic | Instruction | Type |
---|---|---|
clmul rd, rs1, rs2 | Carry-less multiply (low-part) | R-type |
clmulh rd, rs1, rs2 | Carry-less multiply (high-part) | R-type |
clmulr rd, rs1, rs2 | Exclusive OR | R-type |
Mnemonic | Instruction | Type |
---|---|---|
bclr rd, rs1, rs2 | Single-Bit Clear (Register) | R-type |
bext rd, rs1, rs2 | Single-Bit Extract (Register) | R-type |
binv rd, rs1, rs2 | Single-Bit Invert (Register) | R-type |
bset rd, rs1, rs2 | Single-Bit Set (Register) | R-type |
bclri rd, rs1, imm | Single-Bit Clear (Immediate) | I-type |
bexti rd, rs1, imm | Single-Bit Extract (Immediate) | I-type |
binvi rd, rs1, imm | Single-Bit Invert (Immediate) | I-type |
bseti rd, rs1, imm | Single-Bit Set (Immediate) | I-type |
Download the official testbench from riscv-tests. In the isa
folder, we can find the test cases for Zba, Zbb, Zbc, Zbs extensions (rv32uzba, rv32uzbb, rv32uzbc, rv32uzbs). Copy the assembly files into picorv32/tests/
(Some files will need to modify from 64-bit version).
Run make test_vcd
to check whether the added instructions can be operate correctly.
iverilog -o testbench.vvp -DCOMPRESSED_ISA testbench.v picorv32.v
chmod -x testbench.vvp
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32imc_zbb -o firmware/start.o firmware/start.S
...
lui..OK
auipc..OK
...
remu..OK
+sh1add..OK
+sh2add..OK
+sh3add..OK
simple..OK
...
1st prime is 2.
2nd prime is 3.
...
31st prime is 127.
...
Cycle counter ......... 467924
Instruction counter ... 102217
CPI: 4.57
DONE
------------------------------------------------------------
EBREAK instruction at 0x0000074E
pc 00000751 x8 00000000 x16 F98C5E4E x24 00000000
x1 00000720 x9 00000000 x17 1B639DFB x25 00000000
x2 00020000 x10 20000000 x18 00000000 x26 00000000
x3 DEADBEEF x11 075BCD15 x19 00003A7C x27 00000000
x4 DEADBEEF x12 0000004F x20 00000000 x28 1B639DFB
x5 0000107C x13 0000004E x21 00000000 x29 38BAA670
x6 1B639DFB x14 00000045 x22 00000000 x30 00000000
x7 00000000 x15 0000000A x23 00000000 x31 00000000
------------------------------------------------------------
Number of fast external IRQs counted: 58
Number of slow external IRQs counted: 7
Number of timer IRQs counted: 25
Finished writing testbench.trace.
TRAP after 509379 clock cycles
ALL TESTS PASSED.
iverilog -o testbench.vvp -DCOMPRESSED_ISA testbench.v picorv32.v
chmod -x testbench.vvp
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32imc_zbb -o firmware/start.o firmware/start.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic_zbb -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/irq.o firmware/irq.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic_zbb -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/print.o firmware/print.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic_zbb -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/hello.o firmware/hello.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic_zbb -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/sieve.o firmware/sieve.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic_zbb -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/multest.o firmware/multest.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32ic_zbb -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/stats.o firmware/stats.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zbb -o tests/addi.o -DTEST_FUNC_NAME=addi \
-DTEST_FUNC_TXT='"addi"' -DTEST_FUNC_RET=addi_ret tests/addi.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zbb -o tests/add.o -DTEST_FUNC_NAME=add \
-DTEST_FUNC_TXT='"add"' -DTEST_FUNC_RET=add_ret tests/add.S
...
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zbb -o tests/zext_h.o -DTEST_FUNC_NAME=zext_h \
-DTEST_FUNC_TXT='"zext_h"' -DTEST_FUNC_RET=zext_h_ret tests/zext_h.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -Os -mabi=ilp32 -march=rv32imc -ffreestanding -nostdlib -o firmware/firmware.elf \
-Wl,--build-id=none,-Bstatic,-T,firmware/sections.lds,-Map,firmware/firmware.map,--strip-debug \
firmware/start.o firmware/irq.o firmware/print.o firmware/hello.o firmware/sieve.o firmware/multest.o firmware/stats.o tests/addi.o tests/add.o tests/andi.o tests/andn.o tests/and.o tests/auipc.o tests/beq.o tests/bge.o tests/bgeu.o tests/blt.o tests/bltu.o tests/bne.o tests/clz.o tests/cpop.o tests/ctz.o tests/div.o tests/divu.o tests/jalr.o tests/jal.o tests/j.o tests/lb.o tests/lbu.o tests/lh.o tests/lhu.o tests/lui.o tests/lw.o tests/max.o tests/maxu.o tests/min.o tests/minu.o tests/mulh.o tests/mulhsu.o tests/mulhu.o tests/mul.o tests/orc_b.o tests/ori.o tests/orn.o tests/or.o tests/rem.o tests/remu.o tests/rev8.o tests/rol.o tests/rori.o tests/ror.o tests/sb.o tests/sext_b.o tests/sext_h.o tests/sh.o tests/simple.o tests/slli.o tests/sll.o tests/slti.o tests/slt.o tests/srai.o tests/sra.o tests/srli.o tests/srl.o tests/sub.o tests/sw.o tests/xnor.o tests/xori.o tests/xor.o tests/zext_h.o -lgcc
/opt/riscv32i/lib/gcc/riscv32-unknown-elf/14.2.0/../../../../riscv32-unknown-elf/bin/ld: warning: firmware/firmware.elf has a LOAD segment with RWX permissions
chmod -x firmware/firmware.elf
/opt/riscv32i/bin/riscv32-unknown-elf-objcopy -O binary firmware/firmware.elf firmware/firmware.bin
chmod -x firmware/firmware.bin
python3 firmware/makehex.py firmware/firmware.bin 32768 > firmware/firmware.hex
vvp -N testbench.vvp +vcd +trace +noerror
VCD info: dumpfile testbench.vcd opened for output.
hello world
lui..OK
auipc..OK
...
simple..OK
+andn..OK
+orn..OK
+xnor..OK
+max..OK
+maxu..OK
+min..OK
+minu..OK
+rol..OK
+ror..OK
+clz..OK
+ctz..OK
+cpop..OK
+sext_b..OK
+sext_h..OK
+zext_h..OK
+rori..OK
+orc_b..OK
+rev8..OK
1st prime is 2.
2nd prime is 3.
...
31st prime is 127.
checksum: 1772A48F OK
input [FFFFFFFF] 80000000 [FFFFFFFF] FFFFFFFF
hard mul 80000000 00000000 80000000 7FFFFFFF
soft mul 80000000 00000000 80000000 7FFFFFFF OK
...
hard div FFFFFFFF 00000000 1B9D5F9C 38BAA671
soft div FFFFFFFF 00000000 1B9D5F9C 38BAA671 OK
Cycle counter ......... 512131
Instruction counter ... 113483
CPI: 4.51
DONE
------------------------------------------------------------
EBREAK instruction at 0x00000802
pc 00000805 x8 00000000 x16 1B639DFB x24 00000000
x1 000007D4 x9 00000000 x17 1B639DFB x25 00000000
x2 00020000 x10 20000000 x18 00000000 x26 00000000
x3 DEADBEEF x11 075BCD15 x19 0000484C x27 00000000
x4 DEADBEEF x12 0000004F x20 00000000 x28 38BAA671
x5 0000108E x13 0000004E x21 00000000 x29 00000001
x6 00000000 x14 00000045 x22 00000000 x30 00000000
x7 00000000 x15 0000000A x23 00000000 x31 00000000
------------------------------------------------------------
Number of fast external IRQs counted: 64
Number of slow external IRQs counted: 8
Number of timer IRQs counted: 32
Finished writing testbench.trace.
TRAP after 555053 clock cycles
ALL TESTS PASSED.
iverilog -o testbench.vvp -DCOMPRESSED_ISA testbench.v picorv32.v
chmod -x testbench.vvp
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32imc_zba_zbc_zbs -o firmware/start.o firmware/start.S
...
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zba_zbc_zbs -o tests/addi.o -DTEST_FUNC_NAME=addi \
-DTEST_FUNC_TXT='"addi"' -DTEST_FUNC_RET=addi_ret tests/addi.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zba_zbc_zbs -o tests/add.o -DTEST_FUNC_NAME=add \
-DTEST_FUNC_TXT='"add"' -DTEST_FUNC_RET=add_ret tests/add.S
...
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zba_zbc_zbs -o tests/clmulh.o -DTEST_FUNC_NAME=clmulh \
-DTEST_FUNC_TXT='"clmulh"' -DTEST_FUNC_RET=clmulh_ret tests/clmulh.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zba_zbc_zbs -o tests/clmulr.o -DTEST_FUNC_NAME=clmulr \
-DTEST_FUNC_TXT='"clmulr"' -DTEST_FUNC_RET=clmulr_ret tests/clmulr.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zba_zbc_zbs -o tests/clmul.o -DTEST_FUNC_NAME=clmul \
-DTEST_FUNC_TXT='"clmul"' -DTEST_FUNC_RET=clmul_ret tests/clmul.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zba_zbc_zbs -o tests/div.o -DTEST_FUNC_NAME=div \
...
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zba_zbc_zbs -o tests/xor.o -DTEST_FUNC_NAME=xor \
-DTEST_FUNC_TXT='"xor"' -DTEST_FUNC_RET=xor_ret tests/xor.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -Os -mabi=ilp32 -march=rv32imc_zba_zbc_zbs -ffreestanding -nostdlib -o firmware/firmware.elf \
-Wl,--build-id=none,-Bstatic,-T,firmware/sections.lds,-Map,firmware/firmware.map,--strip-debug \
firmware/start.o firmware/irq.o firmware/print.o firmware/hello.o firmware/sieve.o firmware/multest.o firmware/stats.o tests/addi.o tests/add.o tests/andi.o tests/and.o tests/auipc.o tests/bclri.o tests/bclr.o tests/beq.o tests/bexti.o tests/bext.o tests/bge.o tests/bgeu.o tests/binvi.o tests/binv.o tests/blt.o tests/bltu.o tests/bne.o tests/bseti.o tests/bset.o tests/clmulh.o tests/clmulr.o tests/clmul.o tests/div.o tests/divu.o tests/jalr.o tests/jal.o tests/j.o tests/lb.o tests/lbu.o tests/lh.o tests/lhu.o tests/lui.o tests/lw.o tests/mulh.o tests/mulhsu.o tests/mulhu.o tests/mul.o tests/ori.o tests/or.o tests/rem.o tests/remu.o tests/sb.o tests/sh1add.o tests/sh2add.o tests/sh3add.o tests/sh.o tests/simple.o tests/slli.o tests/sll.o tests/slti.o tests/slt.o tests/srai.o tests/sra.o tests/srli.o tests/srl.o tests/sub.o tests/sw.o tests/xori.o tests/xor.o -lgcc
chmod -x firmware/firmware.elf
/opt/riscv32i/bin/riscv32-unknown-elf-objcopy -O binary firmware/firmware.elf firmware/firmware.bin
chmod -x firmware/firmware.bin
python3 firmware/makehex.py firmware/firmware.bin 32768 > firmware/firmware.hex
vvp -N testbench.vvp +vcd +trace +noerror
VCD info: dumpfile testbench.vcd opened for output.
hello world
lui..OK
auipc..OK
...
binv..OK
binvi..OK
bext..OK
bexti..OK
bset..OK
bseti..OK
+clmul..OK
+clmulh..OK
+clmulr..OK
simple..OK
1st prime is 2.
2nd prime is 3.
...
30th prime is 113.
31st prime is 127.
checksum: 1772A48F OK
input [FFFFFFFF] 80000000 [FFFFFFFF] FFFFFFFF
hard mul 80000000 00000000 80000000 7FFFFFFF
soft mul 80000000 00000000 80000000 7FFFFFFF OK
hard div 80000000 00000000 00000000 80000000
soft div 80000000 00000000 00000000 80000000 OK
...
hard div FFFFFFFF 00000000 1B9D5F9C 38BAA671
soft div FFFFFFFF 00000000 1B9D5F9C 38BAA671 OK
Cycle counter ......... 488750
Instruction counter ... 108076
CPI: 4.52
DONE
------------------------------------------------------------
EBREAK instruction at 0x000007D2
pc 000007D5 x8 00000000 x16 F98C5E4E x24 00000000
x1 000007A4 x9 00000000 x17 1B639DFB x25 00000000
x2 00020000 x10 20000000 x18 00000000 x26 00000000
x3 DEADBEEF x11 075BCD15 x19 000070F0 x27 00000000
x4 DEADBEEF x12 0000004F x20 00000000 x28 1B639DFB
x5 0000107C x13 0000004E x21 00000000 x29 38BAA670
x6 1B639DFB x14 00000045 x22 00000000 x30 00000000
x7 00000000 x15 0000000A x23 00000000 x31 00000000
------------------------------------------------------------
Number of fast external IRQs counted: 61
Number of slow external IRQs counted: 7
Number of timer IRQs counted: 33
Finished writing testbench.trace.
TRAP after 530878 clock cycles
ALL TESTS PASSED.
iverilog -o testbench.vvp -DCOMPRESSED_ISA testbench.v picorv32.v
chmod -x testbench.vvp
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32imc_zbs -o firmware/start.o firmware/start.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32imc_zbs -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/irq.o firmware/irq.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32imc_zbs -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/print.o firmware/print.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32imc_zbs -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/hello.o firmware/hello.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32imc_zbs -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/sieve.o firmware/sieve.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32imc_zbs -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/multest.o firmware/multest.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32imc_zbs -Os --std=c99 -Werror -Wall -Wextra -Wshadow -Wundef -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wredundant-decls -Wstrict-prototypes -Wmissing-prototypes -pedantic -ffreestanding -nostdlib -o firmware/stats.o firmware/stats.c
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zbs -o tests/addi.o -DTEST_FUNC_NAME=addi \
-DTEST_FUNC_TXT='"addi"' -DTEST_FUNC_RET=addi_ret tests/addi.S
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -c -mabi=ilp32 -march=rv32im_zbs -o tests/add.o -DTEST_FUNC_NAME=add \
-DTEST_FUNC_TXT='"add"' -DTEST_FUNC_RET=add_ret tests/add.S
...
/opt/riscv32i/bin/riscv32-unknown-elf-gcc -Os -mabi=ilp32 -march=rv32imc_zbs -ffreestanding -nostdlib -o firmware/firmware.elf \
-Wl,--build-id=none,-Bstatic,-T,firmware/sections.lds,-Map,firmware/firmware.map,--strip-debug \
firmware/start.o firmware/irq.o firmware/print.o firmware/hello.o firmware/sieve.o firmware/multest.o firmware/stats.o tests/addi.o tests/add.o tests/andi.o tests/and.o tests/auipc.o tests/bclri.o tests/bclr.o tests/beq.o tests/bexti.o tests/bext.o tests/bge.o tests/bgeu.o tests/binvi.o tests/binv.o tests/blt.o tests/bltu.o tests/bne.o tests/bseti.o tests/bset.o tests/div.o tests/divu.o tests/jalr.o tests/jal.o tests/j.o tests/lb.o tests/lbu.o tests/lh.o tests/lhu.o tests/lui.o tests/lw.o tests/mulh.o tests/mulhsu.o tests/mulhu.o tests/mul.o tests/ori.o tests/or.o tests/rem.o tests/remu.o tests/sb.o tests/sh.o tests/simple.o tests/slli.o tests/sll.o tests/slti.o tests/slt.o tests/srai.o tests/sra.o tests/srli.o tests/srl.o tests/sub.o tests/sw.o tests/xori.o tests/xor.o -lgcc
/opt/riscv32i/lib/gcc/riscv32-unknown-elf/14.2.0/../../../../riscv32-unknown-elf/bin/ld: warning: firmware/firmware.elf has a LOAD segment with RWX permissions
chmod -x firmware/firmware.elf
/opt/riscv32i/bin/riscv32-unknown-elf-objcopy -O binary firmware/firmware.elf firmware/firmware.bin
chmod -x firmware/firmware.bin
python3 firmware/makehex.py firmware/firmware.bin 32768 > firmware/firmware.hex
vvp -N testbench.vvp +vcd +trace +noerror
VCD info: dumpfile testbench.vcd opened for output.
hello world
lui..OK
auipc..OK
...
simple..OK
+bclr..OK
+bext..OK
+binv..OK
+bset..OK
+bclri..OK
+bexti..OK
+binvi..OK
+bseti..OK
1st prime is 2.
2nd prime is 3.
...
31st prime is 127.
checksum: 1772A48F OK
input [FFFFFFFF] 80000000 [FFFFFFFF] FFFFFFFF
hard mul 80000000 00000000 80000000 7FFFFFFF
soft mul 80000000 00000000 80000000 7FFFFFFF OK
...
soft mul 1B639DFB F98C5E4E 324704BF 324704BF OK
hard div FFFFFFFF 00000000 1B9D5F9C 38BAA671
soft div FFFFFFFF 00000000 1B9D5F9C 38BAA671 OK
Cycle counter ......... 400810
Instruction counter .... 82641
CPI: 4.85
DONE
------------------------------------------------------------
EBREAK instruction at 0x0000078A
pc 0000078D x8 00000000 x16 00000000 x24 00000000
x1 0000075C x9 00000000 x17 00000000 x25 00000000
x2 00020000 x10 20000000 x18 00000000 x26 00000000
x3 DEADBEEF x11 075BCD15 x19 00006098 x27 00000000
x4 DEADBEEF x12 0000004F x20 00000000 x28 00000019
x5 0000E3B0 x13 0000004E x21 00000000 x29 00000000
x6 FF00FF00 x14 00000045 x22 00000000 x30 00000000
x7 00000000 x15 0000000A x23 00000000 x31 00000000
------------------------------------------------------------
Number of fast external IRQs counted: 49
Number of slow external IRQs counted: 6
Number of timer IRQs counted: 26
Finished writing testbench.trace.
TRAP after 434128 clock cycles
ALL TESTS PASSED.
We performed validation on the CLZ instruction in the B extension and compared it with the CLZ implementation in RV32I.
CLZ C implement
Below is the optimized CLZ function in C code:
static inline int clz32(uint32_t x) {
int n = 0;
int mask = (x & 0xFFFF0000) == 0;
n += mask * 16;
x <<= mask * 16;
mask = (x & 0xFF000000) == 0;
n += mask * 8;
x <<= mask * 8;
mask = (x & 0xF0000000) == 0;
n += mask * 4;
x <<= mask * 4;
mask = (x & 0xC0000000) == 0;
n += mask * 2;
x <<= mask * 2;
mask = (x & 0x80000000) == 0;
n += mask * 1;
n += (x == 0) ? (32 - n) : 0;
return n;
}
CLZ assembly
And the following is the assembly of clz c code accordingly:
clz32:
addi sp, sp, -16 # Allocate stack space
sw s0, 0(sp) # Save s0
sw s1, 4(sp) # Save s1
li s0, 0 # Initialize n = 0
li s1, 0xFFFF0000 # Check if the upper 16 bits are 0
and t2, a0, s1
bnez t2, skip_16 # If the upper 16 bits are not 0, skip
addi s0, s0, 16
slli a0, a0, 16 # x <<= 16
skip_16:
li s1, 0xFF000000 # Check if the upper 8 bits are 0
and t2, a0, s1
bnez t2, skip_8
addi s0, s0, 8
slli a0, a0, 8
skip_8:
li s1, 0xF0000000 # Check if the upper 4 bits are 0
and t2, a0, s1
bnez t2, skip_4
addi s0, s0, 4
slli a0, a0, 4
skip_4:
li s1, 0xC0000000 # Check if the upper 2 bits are 0
and t2, a0, s1
bnez t2, skip_2
addi s0, s0, 2
slli a0, a0, 2
skip_2:
li s1, 0x80000000 # Check if the highest bit is 0
and t2, a0, s1
bnez t2, end_clz
addi s0, s0, 1
end_clz:
mv a0, s0 # Return n
lw s0, 0(sp) # Restore s0
lw s1, 4(sp) # Restore s1
addi sp, sp, 16 # Reclaim stack space
ret
Validation result
We implemented clztest.c to peform and check cycles in these two situations.
/* clztest.c in picorv32/firmware */
#include "firmware.h"
int clz32(uint32_t x) {
int n = 0;
// Check if the upper 16 bits are 0
int mask = ((x & 0xFFFF0000) == 0);
n += mask << 4; // mask * 16
x <<= mask << 4; // x <<= (mask * 16)
// Check if the upper 8 bits are 0
mask = ((x & 0xFF000000) == 0);
n += mask << 3; // mask * 8
x <<= mask << 3; // x <<= (mask * 8)
// Check if the upper 4 bits are 0
mask = ((x & 0xF0000000) == 0);
n += mask << 2; // mask * 4
x <<= mask << 2; // x <<= (mask * 4)
// Check if the upper 2 bits are 0
mask = ((x & 0xC0000000) == 0);
n += mask << 1; // mask * 2
x <<= mask << 1; // x <<= (mask * 2)
// Check if the highest bit is 0
mask = ((x & 0x80000000) == 0);
n += mask; // mask * 1
// If x is 0, return 32, otherwise return n
return (x == 0) ? 32 : n;
}
void clz_software_test(uint32_t input) {
unsigned int start_cycles, end_cycles;
__asm__ volatile ("rdcycle %0" : "=r"(start_cycles));
uint32_t result = clz32(input);
__asm__ volatile ("rdcycle %0" : "=r"(end_cycles));
print_str("Software CLZ: ");
print_hex(result, 2);
print_str("\n");
print_str("Cycles: ");
print_dec(end_cycles - start_cycles);
print_str("\n");
}
void clz_hardware_test(uint32_t input) {
unsigned int start_cycles, end_cycles;
__asm__ volatile ("rdcycle %0" : "=r"(start_cycles));
uint32_t result = hard_clz(input);
__asm__ volatile ("rdcycle %0" : "=r"(end_cycles));
print_str("Hardware CLZ: ");
print_hex(result, 2);
print_str("\n");
print_str("Cycles: ");
print_dec(end_cycles - start_cycles);
print_str("\n");
}
void clztest(void) {
uint32_t test_data[3] = {0xFFFFFFFF, 0x00000000, 0x0EAB1234};
for (int i = 0; i < 3; i++) {
print_str("Input: ");
print_hex(test_data[i], 8);
print_str("\n");
clz_software_test(test_data[i]);
clz_hardware_test(test_data[i]);
print_str("\n");
}
}
And below is the validation result, which shows the cycles of 5 clz examples.
Input: FFFFFFFF
RV32i implemented CLZ: 0 Cycles: 116
RV32i_zbb CLZ: 0 Cycles: 20
Input: 00000000
RV32i implemented CLZ: 32 Cycles: 101
RV32i_zbb CLZ: 32 Cycles: 20
Input: 0EAB1234
RV32i implemented CLZ: 4 Cycles: 117
RV32i_zbb CLZ: 4 Cycles: 20
Input: 000ABCDE
RV32i implemented CLZ: 12 Cycles: 119
RV32i_zbb CLZ: 12 Cycles: 20
Input: 000000AA
RV32i implemented CLZ: 24 Cycles: 122
RV32i_zbb CLZ: 24 Cycles: 20
DONE