# Assignment2: RISC-V Toolchain ###### tags: `riscv` ## Count Leading Zero ```contributed by 鄭惟 ``` (https://hackmd.io/@WeiCheng14159/rkUifs2Hw) :::spoiler **Assembly Code** ```assembly= .data input: .word 0x0000000f one: .word 0x80000000 str1: .string "clz value of " str2: .string " is " .text main: lw a0, input # Load input from static data jal ra, clz # Jump-and-link to the 'clz' label # Print the result to console mv a1, a0 lw a0, input jal ra, printResult # Exit program li a7, 10 ecall clz: # t0 = one # t1 = cnt = 32 # t2 = res # a0 = i lw t0, one li t1, 32 li t2, 0 _beg: bne t1, zero, cnt _ret: mv a0, t2 ret cnt: addi t1,t1,-1 and t3, a0, t0 # i & one bne t3, zero, _ret addi t2, t2, 1 srli t0, t0, 1 j _beg # --- printResult --- # a0: input # a1: result printResult: mv t0, a0 mv t1, a1 la a0, str1 li a7, 4 ecall mv a0, t0 li a7, 1 ecall la a0, str2 li a7, 4 ecall mv a0, t1 li a7, 1 ecall ret ``` ::: :::spoiler **C code** ```c= unsigned int clz(unsigned int i) { unsigned int one = 0x80000000; unsigned int res = 0; for (int cnt = 0; cnt < 32; cnt++) { if ((i & one) == 0) res++; else return res; one = one >> 1; } return res; } ``` ::: ### Rewrite assembly programs into C implementation ```c= unsigned int clz(unsigned int i) { unsigned int one = 0x80000000; unsigned int res = 0; for (int cnt = 0; cnt < 32; cnt++) { if ((i & one) == 0) res++; else return res; one = one >> 1; } return res; } void _start() { volatile char* tx = (volatile char*) 0x40002000; const char *str1 = "clz value of "; const char *str2 = " is "; const char *input = "0x0fffffff"; unsigned int *p, res = clz(0x0fffffff); while (*str1) { *tx = *str1; str1++; } while (*input) { *tx = *input; input++; } while (*str2) { *tx = *str2; str2++; } p = &res; *tx = *p + '0'; } ``` --- ### Result **without optimization** Run ``` $ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -nostdlib clz.c -o clz $ ./emu-rv32i clz clz value of 0x0fffffff is 4 >>> Execution time: 43216 ns >>> Instruction count: 407 (IPS=9417808) >>> Jumps: 45 (11.06%) - 11 forwards, 34 backwards >>> Branching T=33 (82.50%) F=7 (17.50%) ``` :::spoiler Objdump ``` $ riscv-none-embed-objdump -d clz clz: file format elf32-littleriscv Disassembly of section .text: 00010054 <clz>: 10054: fd010113 addi sp,sp,-48 10058: 02812623 sw s0,44(sp) 1005c: 03010413 addi s0,sp,48 10060: fca42e23 sw a0,-36(s0) 10064: 800007b7 lui a5,0x80000 10068: fef42623 sw a5,-20(s0) 1006c: fe042423 sw zero,-24(s0) 10070: fe042223 sw zero,-28(s0) 10074: 0440006f j 100b8 <clz+0x64> 10078: fdc42703 lw a4,-36(s0) 1007c: fec42783 lw a5,-20(s0) 10080: 00f777b3 and a5,a4,a5 10084: 00079a63 bnez a5,10098 <clz+0x44> 10088: fe842783 lw a5,-24(s0) 1008c: 00178793 addi a5,a5,1 # 80000001 <__global_pointer$+0x7ffee5fa> 10090: fef42423 sw a5,-24(s0) 10094: 00c0006f j 100a0 <clz+0x4c> 10098: fe842783 lw a5,-24(s0) 1009c: 02c0006f j 100c8 <clz+0x74> 100a0: fec42783 lw a5,-20(s0) 100a4: 0017d793 srli a5,a5,0x1 100a8: fef42623 sw a5,-20(s0) 100ac: fe442783 lw a5,-28(s0) 100b0: 00178793 addi a5,a5,1 100b4: fef42223 sw a5,-28(s0) 100b8: fe442703 lw a4,-28(s0) 100bc: 01f00793 li a5,31 100c0: fae7dce3 bge a5,a4,10078 <clz+0x24> 100c4: fe842783 lw a5,-24(s0) 100c8: 00078513 mv a0,a5 100cc: 02c12403 lw s0,44(sp) 100d0: 03010113 addi sp,sp,48 100d4: 00008067 ret 000100d8 <_start>: 100d8: fd010113 addi sp,sp,-48 100dc: 02112623 sw ra,44(sp) 100e0: 02812423 sw s0,40(sp) 100e4: 03010413 addi s0,sp,48 100e8: 400027b7 lui a5,0x40002 100ec: fef42023 sw a5,-32(s0) 100f0: 000107b7 lui a5,0x10 100f4: 1e478793 addi a5,a5,484 # 101e4 <_start+0x10c> 100f8: fef42623 sw a5,-20(s0) 100fc: 000107b7 lui a5,0x10 10100: 1f478793 addi a5,a5,500 # 101f4 <_start+0x11c> 10104: fef42423 sw a5,-24(s0) 10108: 000107b7 lui a5,0x10 1010c: 1fc78793 addi a5,a5,508 # 101fc <_start+0x124> 10110: fef42223 sw a5,-28(s0) 10114: 100007b7 lui a5,0x10000 10118: fff78513 addi a0,a5,-1 # fffffff <__global_pointer$+0xffee5f8> 1011c: f39ff0ef jal ra,10054 <clz> 10120: 00050793 mv a5,a0 10124: fcf42c23 sw a5,-40(s0) 10128: 0200006f j 10148 <_start+0x70> 1012c: fec42783 lw a5,-20(s0) 10130: 0007c703 lbu a4,0(a5) 10134: fe042783 lw a5,-32(s0) 10138: 00e78023 sb a4,0(a5) 1013c: fec42783 lw a5,-20(s0) 10140: 00178793 addi a5,a5,1 10144: fef42623 sw a5,-20(s0) 10148: fec42783 lw a5,-20(s0) 1014c: 0007c783 lbu a5,0(a5) 10150: fc079ee3 bnez a5,1012c <_start+0x54> 10154: 0200006f j 10174 <_start+0x9c> 10158: fe442783 lw a5,-28(s0) 1015c: 0007c703 lbu a4,0(a5) 10160: fe042783 lw a5,-32(s0) 10164: 00e78023 sb a4,0(a5) 10168: fe442783 lw a5,-28(s0) 1016c: 00178793 addi a5,a5,1 10170: fef42223 sw a5,-28(s0) 10174: fe442783 lw a5,-28(s0) 10178: 0007c783 lbu a5,0(a5) 1017c: fc079ee3 bnez a5,10158 <_start+0x80> 10180: 0200006f j 101a0 <_start+0xc8> 10184: fe842783 lw a5,-24(s0) 10188: 0007c703 lbu a4,0(a5) 1018c: fe042783 lw a5,-32(s0) 10190: 00e78023 sb a4,0(a5) 10194: fe842783 lw a5,-24(s0) 10198: 00178793 addi a5,a5,1 1019c: fef42423 sw a5,-24(s0) 101a0: fe842783 lw a5,-24(s0) 101a4: 0007c783 lbu a5,0(a5) 101a8: fc079ee3 bnez a5,10184 <_start+0xac> 101ac: fd840793 addi a5,s0,-40 101b0: fcf42e23 sw a5,-36(s0) 101b4: fdc42783 lw a5,-36(s0) 101b8: 0007a783 lw a5,0(a5) 101bc: 0ff7f793 andi a5,a5,255 101c0: 03078793 addi a5,a5,48 101c4: 0ff7f713 andi a4,a5,255 101c8: fe042783 lw a5,-32(s0) 101cc: 00e78023 sb a4,0(a5) 101d0: 00000013 nop 101d4: 02c12083 lw ra,44(sp) 101d8: 02812403 lw s0,40(sp) 101dc: 03010113 addi sp,sp,48 101e0: 00008067 ret ``` ::: Instruction State ``` Instructions Stat: LUI = 6 JAL = 10 JALR = 2 BNE = 35 BGE = 5 LW = 145 LBU = 57 SB = 28 SW = 52 ADDI = 55 ANDI = 2 SRLI = 4 AND = 5 LI* = 6 Five Most Frequent: 1) LW = 145 (35.63%) 2) LBU = 57 (14.00%) 3) ADDI = 55 (13.51%) 4) SW = 52 (12.78%) 5) BNE = 35 (8.60%) ``` Readelf ``` $ riscv-none-embed-readelf -h clz ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x100d8 Start of program headers: 52 (bytes into file) Start of section headers: 944 (bytes into file) Flags: 0x0 Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 1 Size of section headers: 40 (bytes) Number of section headers: 7 Section header string table index: 6 ``` Size ``` $ riscv-none-embed-size clz text data bss dec hex filename 435 0 0 435 1b3 clz ``` --- **with O3 optimization** Run ``` $ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -O3 -nostdlib clz.c -o clz $ ./emu-rv32i clz clz value of 0x0fffffff is 4 >>> Execution time: 17247 ns >>> Instruction count: 150 (IPS=8697164) >>> Jumps: 28 (18.67%) - 0 forwards, 28 backwards >>> Branching T=27 (77.14%) F=8 (22.86%) ``` :::spoiler Objdump ``` clz: file format elf32-littleriscv Disassembly of section .text: 00010054 <clz>: 10054: 00000793 li a5,0 10058: 80000737 lui a4,0x80000 1005c: 02000613 li a2,32 10060: 0100006f j 10070 <clz+0x1c> 10064: 00178793 addi a5,a5,1 10068: 00175713 srli a4,a4,0x1 1006c: 00c78663 beq a5,a2,10078 <clz+0x24> 10070: 00e576b3 and a3,a0,a4 10074: fe0688e3 beqz a3,10064 <clz+0x10> 10078: 00078513 mv a0,a5 1007c: 00008067 ret 00010080 <_start>: 10080: 10000637 lui a2,0x10000 10084: 00000793 li a5,0 10088: 80000737 lui a4,0x80000 1008c: 02000593 li a1,32 10090: fff60613 addi a2,a2,-1 # fffffff <__global_pointer$+0xffee6c3> 10094: 00175713 srli a4,a4,0x1 10098: 00178793 addi a5,a5,1 1009c: 00c776b3 and a3,a4,a2 100a0: 00b78463 beq a5,a1,100a8 <_start+0x28> 100a4: fe0688e3 beqz a3,10094 <_start+0x14> 100a8: 00010737 lui a4,0x10 100ac: 11870713 addi a4,a4,280 # 10118 <_start+0x98> 100b0: 06300693 li a3,99 100b4: 40002637 lui a2,0x40002 100b8: 00d60023 sb a3,0(a2) # 40002000 <__global_pointer$+0x3fff06c4> 100bc: 00170713 addi a4,a4,1 100c0: 00074683 lbu a3,0(a4) 100c4: fe069ae3 bnez a3,100b8 <_start+0x38> 100c8: 00010737 lui a4,0x10 100cc: 12870713 addi a4,a4,296 # 10128 <_start+0xa8> 100d0: 03000693 li a3,48 100d4: 40002637 lui a2,0x40002 100d8: 00d60023 sb a3,0(a2) # 40002000 <__global_pointer$+0x3fff06c4> 100dc: 00170713 addi a4,a4,1 100e0: 00074683 lbu a3,0(a4) 100e4: fe069ae3 bnez a3,100d8 <_start+0x58> 100e8: 00010737 lui a4,0x10 100ec: 13470713 addi a4,a4,308 # 10134 <_start+0xb4> 100f0: 02000693 li a3,32 100f4: 40002637 lui a2,0x40002 100f8: 00d60023 sb a3,0(a2) # 40002000 <__global_pointer$+0x3fff06c4> 100fc: 00170713 addi a4,a4,1 10100: 00074683 lbu a3,0(a4) 10104: fe069ae3 bnez a3,100f8 <_start+0x78> 10108: 03078793 addi a5,a5,48 1010c: 0ff7f793 andi a5,a5,255 10110: 00f60023 sb a5,0(a2) 10114: 00008067 ret ``` ::: Instruction State ``` Instructions Stat: LUI = 8 JALR = 1 BEQ = 8 BNE = 27 LBU = 27 SB = 28 ADDI = 41 ANDI = 1 SRLI = 4 AND = 4 LI* = 5 Five Most Frequent: 1) ADDI = 41 (27.33%) 2) SB = 28 (18.67%) 3) BNE = 27 (18.00%) 4) LBU = 27 (18.00%) 5) LUI = 8 (5.33%) ``` Readelf ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x10080 Start of program headers: 52 (bytes into file) Start of section headers: 740 (bytes into file) Flags: 0x0 Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 1 Size of section headers: 40 (bytes) Number of section headers: 7 Section header string table index: 6 ``` Size ``` text data bss dec hex filename 232 0 0 232 e8 clz ``` --- **with Os optimization** Run ``` $ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -Os -nostdlib clz.c -o clz $ ./emu-rv32i clz clz value of 0x0fffffff is 4 >>> Execution time: 17117 ns >>> Instruction count: 190 (IPS=11100075) >>> Jumps: 62 (32.63%) - 29 forwards, 33 backwards >>> Branching T=32 (82.05%) F=7 (17.95%) ``` :::spoiler Objdump ``` clz: file format elf32-littleriscv Disassembly of section .text: 00010054 <clz>: 10054: 00000793 li a5,0 10058: 80000737 lui a4,0x80000 1005c: 02000693 li a3,32 10060: 00e57633 and a2,a0,a4 10064: 00061863 bnez a2,10074 <clz+0x20> 10068: 00178793 addi a5,a5,1 1006c: 00175713 srli a4,a4,0x1 10070: fed798e3 bne a5,a3,10060 <clz+0xc> 10074: 00078513 mv a0,a5 10078: 00008067 ret 0001007c <_start>: 1007c: 10000537 lui a0,0x10000 10080: ff010113 addi sp,sp,-16 10084: fff50513 addi a0,a0,-1 # fffffff <__global_pointer$+0xffee6cf> 10088: 00112623 sw ra,12(sp) 1008c: fc9ff0ef jal ra,10054 <clz> 10090: 000107b7 lui a5,0x10 10094: 10c78793 addi a5,a5,268 # 1010c <_start+0x90> 10098: 400026b7 lui a3,0x40002 1009c: 0007c703 lbu a4,0(a5) 100a0: 04071463 bnez a4,100e8 <_start+0x6c> 100a4: 000107b7 lui a5,0x10 100a8: 11c78793 addi a5,a5,284 # 1011c <_start+0xa0> 100ac: 400026b7 lui a3,0x40002 100b0: 0007c703 lbu a4,0(a5) 100b4: 04071063 bnez a4,100f4 <_start+0x78> 100b8: 000107b7 lui a5,0x10 100bc: 12878793 addi a5,a5,296 # 10128 <_start+0xac> 100c0: 400026b7 lui a3,0x40002 100c4: 0007c703 lbu a4,0(a5) 100c8: 02071c63 bnez a4,10100 <_start+0x84> 100cc: 03050513 addi a0,a0,48 100d0: 0ff57513 andi a0,a0,255 100d4: 400027b7 lui a5,0x40002 100d8: 00a78023 sb a0,0(a5) # 40002000 <__global_pointer$+0x3fff06d0> 100dc: 00c12083 lw ra,12(sp) 100e0: 01010113 addi sp,sp,16 100e4: 00008067 ret 100e8: 00e68023 sb a4,0(a3) # 40002000 <__global_pointer$+0x3fff06d0> 100ec: 00178793 addi a5,a5,1 100f0: fadff06f j 1009c <_start+0x20> 100f4: 00e68023 sb a4,0(a3) 100f8: 00178793 addi a5,a5,1 100fc: fb5ff06f j 100b0 <_start+0x34> 10100: 00e68023 sb a4,0(a3) 10104: 00178793 addi a5,a5,1 10108: fbdff06f j 100c4 <_start+0x48> ``` ::: Instruction State ``` Instructions Stat: LUI = 9 JAL = 28 JALR = 2 BNE = 39 LW = 1 LBU = 30 SB = 28 SW = 1 ADDI = 41 ANDI = 1 SRLI = 4 AND = 5 LI* = 2 Five Most Frequent: 1) ADDI = 41 (21.58%) 2) BNE = 39 (20.53%) 3) LBU = 30 (15.79%) 4) JAL = 28 (14.74%) 5) SB = 28 (14.74%) ``` Readelf ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x1007c Start of program headers: 52 (bytes into file) Start of section headers: 728 (bytes into file) Flags: 0x0 Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 1 Size of section headers: 40 (bytes) Number of section headers: 7 Section header string table index: 6 ``` Size ``` text data bss dec hex filename 220 0 0 220 dc clz ``` --- | | O0 | O3 | Os | | ----------------- | ----------- | ----------- | ----------- | | Execution time | 43216 ns | 17247 ns | 17117 ns | | Instruction count | 407 | 150 | 190 | | Jumps | 45 (11.06%) | 28 (18.67%) | 62 (32.63%) | | Jumps forwards | 11 | 0 | 29 | | Jumps backwards | 34 | 28 | 33 | | Branching True | 33 (82.50%) | 27 (77.14%) | 32 (82.05%) | | Branching False | 7 (17.50%) | 8 (22.86%) | 7 (17.95%) | ---