Try   HackMD

Assignment2: RISC-V Toolchain

tags: riscv

Count Leading Zero

contributed by 鄭惟 (https://hackmd.io/@WeiCheng14159/rkUifs2Hw)

Assembly Code
.data input: .word 0x0000000f one: .word 0x80000000 str1: .string "clz value of " str2: .string " is " .text main: lw a0, input # Load input from static data jal ra, clz # Jump-and-link to the 'clz' label # Print the result to console mv a1, a0 lw a0, input jal ra, printResult # Exit program li a7, 10 ecall clz: # t0 = one # t1 = cnt = 32 # t2 = res # a0 = i lw t0, one li t1, 32 li t2, 0 _beg: bne t1, zero, cnt _ret: mv a0, t2 ret cnt: addi t1,t1,-1 and t3, a0, t0 # i & one bne t3, zero, _ret addi t2, t2, 1 srli t0, t0, 1 j _beg # --- printResult --- # a0: input # a1: result printResult: mv t0, a0 mv t1, a1 la a0, str1 li a7, 4 ecall mv a0, t0 li a7, 1 ecall la a0, str2 li a7, 4 ecall mv a0, t1 li a7, 1 ecall ret
C code
unsigned int clz(unsigned int i) { unsigned int one = 0x80000000; unsigned int res = 0; for (int cnt = 0; cnt < 32; cnt++) { if ((i & one) == 0) res++; else return res; one = one >> 1; } return res; }

Rewrite assembly programs into C implementation

unsigned int clz(unsigned int i) { unsigned int one = 0x80000000; unsigned int res = 0; for (int cnt = 0; cnt < 32; cnt++) { if ((i & one) == 0) res++; else return res; one = one >> 1; } return res; } void _start() { volatile char* tx = (volatile char*) 0x40002000; const char *str1 = "clz value of "; const char *str2 = " is "; const char *input = "0x0fffffff"; unsigned int *p, res = clz(0x0fffffff); while (*str1) { *tx = *str1; str1++; } while (*input) { *tx = *input; input++; } while (*str2) { *tx = *str2; str2++; } p = &res; *tx = *p + '0'; }

Result

without optimization
Run

$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -nostdlib clz.c -o clz
$ ./emu-rv32i clz 
clz value of 0x0fffffff is 4
>>> Execution time: 43216 ns
>>> Instruction count: 407 (IPS=9417808)
>>> Jumps: 45 (11.06%) - 11 forwards, 34 backwards
>>> Branching T=33 (82.50%) F=7 (17.50%)
Objdump
$ riscv-none-embed-objdump -d clz

clz:     file format elf32-littleriscv


Disassembly of section .text:

00010054 <clz>:
   10054:	fd010113          	addi	sp,sp,-48
   10058:	02812623          	sw	s0,44(sp)
   1005c:	03010413          	addi	s0,sp,48
   10060:	fca42e23          	sw	a0,-36(s0)
   10064:	800007b7          	lui	a5,0x80000
   10068:	fef42623          	sw	a5,-20(s0)
   1006c:	fe042423          	sw	zero,-24(s0)
   10070:	fe042223          	sw	zero,-28(s0)
   10074:	0440006f          	j	100b8 <clz+0x64>
   10078:	fdc42703          	lw	a4,-36(s0)
   1007c:	fec42783          	lw	a5,-20(s0)
   10080:	00f777b3          	and	a5,a4,a5
   10084:	00079a63          	bnez	a5,10098 <clz+0x44>
   10088:	fe842783          	lw	a5,-24(s0)
   1008c:	00178793          	addi	a5,a5,1 # 80000001 <__global_pointer$+0x7ffee5fa>
   10090:	fef42423          	sw	a5,-24(s0)
   10094:	00c0006f          	j	100a0 <clz+0x4c>
   10098:	fe842783          	lw	a5,-24(s0)
   1009c:	02c0006f          	j	100c8 <clz+0x74>
   100a0:	fec42783          	lw	a5,-20(s0)
   100a4:	0017d793          	srli	a5,a5,0x1
   100a8:	fef42623          	sw	a5,-20(s0)
   100ac:	fe442783          	lw	a5,-28(s0)
   100b0:	00178793          	addi	a5,a5,1
   100b4:	fef42223          	sw	a5,-28(s0)
   100b8:	fe442703          	lw	a4,-28(s0)
   100bc:	01f00793          	li	a5,31
   100c0:	fae7dce3          	bge	a5,a4,10078 <clz+0x24>
   100c4:	fe842783          	lw	a5,-24(s0)
   100c8:	00078513          	mv	a0,a5
   100cc:	02c12403          	lw	s0,44(sp)
   100d0:	03010113          	addi	sp,sp,48
   100d4:	00008067          	ret

000100d8 <_start>:
   100d8:	fd010113          	addi	sp,sp,-48
   100dc:	02112623          	sw	ra,44(sp)
   100e0:	02812423          	sw	s0,40(sp)
   100e4:	03010413          	addi	s0,sp,48
   100e8:	400027b7          	lui	a5,0x40002
   100ec:	fef42023          	sw	a5,-32(s0)
   100f0:	000107b7          	lui	a5,0x10
   100f4:	1e478793          	addi	a5,a5,484 # 101e4 <_start+0x10c>
   100f8:	fef42623          	sw	a5,-20(s0)
   100fc:	000107b7          	lui	a5,0x10
   10100:	1f478793          	addi	a5,a5,500 # 101f4 <_start+0x11c>
   10104:	fef42423          	sw	a5,-24(s0)
   10108:	000107b7          	lui	a5,0x10
   1010c:	1fc78793          	addi	a5,a5,508 # 101fc <_start+0x124>
   10110:	fef42223          	sw	a5,-28(s0)
   10114:	100007b7          	lui	a5,0x10000
   10118:	fff78513          	addi	a0,a5,-1 # fffffff <__global_pointer$+0xffee5f8>
   1011c:	f39ff0ef          	jal	ra,10054 <clz>
   10120:	00050793          	mv	a5,a0
   10124:	fcf42c23          	sw	a5,-40(s0)
   10128:	0200006f          	j	10148 <_start+0x70>
   1012c:	fec42783          	lw	a5,-20(s0)
   10130:	0007c703          	lbu	a4,0(a5)
   10134:	fe042783          	lw	a5,-32(s0)
   10138:	00e78023          	sb	a4,0(a5)
   1013c:	fec42783          	lw	a5,-20(s0)
   10140:	00178793          	addi	a5,a5,1
   10144:	fef42623          	sw	a5,-20(s0)
   10148:	fec42783          	lw	a5,-20(s0)
   1014c:	0007c783          	lbu	a5,0(a5)
   10150:	fc079ee3          	bnez	a5,1012c <_start+0x54>
   10154:	0200006f          	j	10174 <_start+0x9c>
   10158:	fe442783          	lw	a5,-28(s0)
   1015c:	0007c703          	lbu	a4,0(a5)
   10160:	fe042783          	lw	a5,-32(s0)
   10164:	00e78023          	sb	a4,0(a5)
   10168:	fe442783          	lw	a5,-28(s0)
   1016c:	00178793          	addi	a5,a5,1
   10170:	fef42223          	sw	a5,-28(s0)
   10174:	fe442783          	lw	a5,-28(s0)
   10178:	0007c783          	lbu	a5,0(a5)
   1017c:	fc079ee3          	bnez	a5,10158 <_start+0x80>
   10180:	0200006f          	j	101a0 <_start+0xc8>
   10184:	fe842783          	lw	a5,-24(s0)
   10188:	0007c703          	lbu	a4,0(a5)
   1018c:	fe042783          	lw	a5,-32(s0)
   10190:	00e78023          	sb	a4,0(a5)
   10194:	fe842783          	lw	a5,-24(s0)
   10198:	00178793          	addi	a5,a5,1
   1019c:	fef42423          	sw	a5,-24(s0)
   101a0:	fe842783          	lw	a5,-24(s0)
   101a4:	0007c783          	lbu	a5,0(a5)
   101a8:	fc079ee3          	bnez	a5,10184 <_start+0xac>
   101ac:	fd840793          	addi	a5,s0,-40
   101b0:	fcf42e23          	sw	a5,-36(s0)
   101b4:	fdc42783          	lw	a5,-36(s0)
   101b8:	0007a783          	lw	a5,0(a5)
   101bc:	0ff7f793          	andi	a5,a5,255
   101c0:	03078793          	addi	a5,a5,48
   101c4:	0ff7f713          	andi	a4,a5,255
   101c8:	fe042783          	lw	a5,-32(s0)
   101cc:	00e78023          	sb	a4,0(a5)
   101d0:	00000013          	nop
   101d4:	02c12083          	lw	ra,44(sp)
   101d8:	02812403          	lw	s0,40(sp)
   101dc:	03010113          	addi	sp,sp,48
   101e0:	00008067          	ret

Instruction State

Instructions Stat:
LUI	= 6
JAL	= 10
JALR	= 2
BNE	= 35
BGE	= 5
LW	= 145
LBU	= 57
SB	= 28
SW	= 52
ADDI	= 55
ANDI	= 2
SRLI	= 4
AND	= 5
LI*	= 6

Five Most Frequent:
1) LW	= 145 (35.63%)
2) LBU	= 57 (14.00%)
3) ADDI	= 55 (13.51%)
4) SW	= 52 (12.78%)
5) BNE	= 35 (8.60%)

Readelf

$ riscv-none-embed-readelf -h clz
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x100d8
  Start of program headers:          52 (bytes into file)
  Start of section headers:          944 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         1
  Size of section headers:           40 (bytes)
  Number of section headers:         7
  Section header string table index: 6

Size

$ riscv-none-embed-size clz
   text	   data	    bss	    dec	    hex	filename
    435	      0	      0	    435	    1b3	clz

with O3 optimization

Run

$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -O3 -nostdlib clz.c -o clz
$ ./emu-rv32i clz 
clz value of 0x0fffffff is 4
>>> Execution time: 17247 ns
>>> Instruction count: 150 (IPS=8697164)
>>> Jumps: 28 (18.67%) - 0 forwards, 28 backwards
>>> Branching T=27 (77.14%) F=8 (22.86%)
Objdump
clz:     file format elf32-littleriscv


Disassembly of section .text:

00010054 <clz>:
   10054:	00000793          	li	a5,0
   10058:	80000737          	lui	a4,0x80000
   1005c:	02000613          	li	a2,32
   10060:	0100006f          	j	10070 <clz+0x1c>
   10064:	00178793          	addi	a5,a5,1
   10068:	00175713          	srli	a4,a4,0x1
   1006c:	00c78663          	beq	a5,a2,10078 <clz+0x24>
   10070:	00e576b3          	and	a3,a0,a4
   10074:	fe0688e3          	beqz	a3,10064 <clz+0x10>
   10078:	00078513          	mv	a0,a5
   1007c:	00008067          	ret

00010080 <_start>:
   10080:	10000637          	lui	a2,0x10000
   10084:	00000793          	li	a5,0
   10088:	80000737          	lui	a4,0x80000
   1008c:	02000593          	li	a1,32
   10090:	fff60613          	addi	a2,a2,-1 # fffffff <__global_pointer$+0xffee6c3>
   10094:	00175713          	srli	a4,a4,0x1
   10098:	00178793          	addi	a5,a5,1
   1009c:	00c776b3          	and	a3,a4,a2
   100a0:	00b78463          	beq	a5,a1,100a8 <_start+0x28>
   100a4:	fe0688e3          	beqz	a3,10094 <_start+0x14>
   100a8:	00010737          	lui	a4,0x10
   100ac:	11870713          	addi	a4,a4,280 # 10118 <_start+0x98>
   100b0:	06300693          	li	a3,99
   100b4:	40002637          	lui	a2,0x40002
   100b8:	00d60023          	sb	a3,0(a2) # 40002000 <__global_pointer$+0x3fff06c4>
   100bc:	00170713          	addi	a4,a4,1
   100c0:	00074683          	lbu	a3,0(a4)
   100c4:	fe069ae3          	bnez	a3,100b8 <_start+0x38>
   100c8:	00010737          	lui	a4,0x10
   100cc:	12870713          	addi	a4,a4,296 # 10128 <_start+0xa8>
   100d0:	03000693          	li	a3,48
   100d4:	40002637          	lui	a2,0x40002
   100d8:	00d60023          	sb	a3,0(a2) # 40002000 <__global_pointer$+0x3fff06c4>
   100dc:	00170713          	addi	a4,a4,1
   100e0:	00074683          	lbu	a3,0(a4)
   100e4:	fe069ae3          	bnez	a3,100d8 <_start+0x58>
   100e8:	00010737          	lui	a4,0x10
   100ec:	13470713          	addi	a4,a4,308 # 10134 <_start+0xb4>
   100f0:	02000693          	li	a3,32
   100f4:	40002637          	lui	a2,0x40002
   100f8:	00d60023          	sb	a3,0(a2) # 40002000 <__global_pointer$+0x3fff06c4>
   100fc:	00170713          	addi	a4,a4,1
   10100:	00074683          	lbu	a3,0(a4)
   10104:	fe069ae3          	bnez	a3,100f8 <_start+0x78>
   10108:	03078793          	addi	a5,a5,48
   1010c:	0ff7f793          	andi	a5,a5,255
   10110:	00f60023          	sb	a5,0(a2)
   10114:	00008067          	ret

Instruction State

Instructions Stat:
LUI	= 8
JALR	= 1
BEQ	= 8
BNE	= 27
LBU	= 27
SB	= 28
ADDI	= 41
ANDI	= 1
SRLI	= 4
AND	= 4
LI*	= 5

Five Most Frequent:
1) ADDI	= 41 (27.33%)
2) SB	= 28 (18.67%)
3) BNE	= 27 (18.00%)
4) LBU	= 27 (18.00%)
5) LUI	= 8 (5.33%)

Readelf

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x10080
  Start of program headers:          52 (bytes into file)
  Start of section headers:          740 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         1
  Size of section headers:           40 (bytes)
  Number of section headers:         7
  Section header string table index: 6

Size

   text	   data	    bss	    dec	    hex	filename
    232	      0	      0	    232	     e8	clz

with Os optimization

Run

$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -Os -nostdlib clz.c -o clz
$ ./emu-rv32i clz 
clz value of 0x0fffffff is 4
>>> Execution time: 17117 ns
>>> Instruction count: 190 (IPS=11100075)
>>> Jumps: 62 (32.63%) - 29 forwards, 33 backwards
>>> Branching T=32 (82.05%) F=7 (17.95%)
Objdump
clz:     file format elf32-littleriscv


Disassembly of section .text:

00010054 <clz>:
   10054:	00000793          	li	a5,0
   10058:	80000737          	lui	a4,0x80000
   1005c:	02000693          	li	a3,32
   10060:	00e57633          	and	a2,a0,a4
   10064:	00061863          	bnez	a2,10074 <clz+0x20>
   10068:	00178793          	addi	a5,a5,1
   1006c:	00175713          	srli	a4,a4,0x1
   10070:	fed798e3          	bne	a5,a3,10060 <clz+0xc>
   10074:	00078513          	mv	a0,a5
   10078:	00008067          	ret

0001007c <_start>:
   1007c:	10000537          	lui	a0,0x10000
   10080:	ff010113          	addi	sp,sp,-16
   10084:	fff50513          	addi	a0,a0,-1 # fffffff <__global_pointer$+0xffee6cf>
   10088:	00112623          	sw	ra,12(sp)
   1008c:	fc9ff0ef          	jal	ra,10054 <clz>
   10090:	000107b7          	lui	a5,0x10
   10094:	10c78793          	addi	a5,a5,268 # 1010c <_start+0x90>
   10098:	400026b7          	lui	a3,0x40002
   1009c:	0007c703          	lbu	a4,0(a5)
   100a0:	04071463          	bnez	a4,100e8 <_start+0x6c>
   100a4:	000107b7          	lui	a5,0x10
   100a8:	11c78793          	addi	a5,a5,284 # 1011c <_start+0xa0>
   100ac:	400026b7          	lui	a3,0x40002
   100b0:	0007c703          	lbu	a4,0(a5)
   100b4:	04071063          	bnez	a4,100f4 <_start+0x78>
   100b8:	000107b7          	lui	a5,0x10
   100bc:	12878793          	addi	a5,a5,296 # 10128 <_start+0xac>
   100c0:	400026b7          	lui	a3,0x40002
   100c4:	0007c703          	lbu	a4,0(a5)
   100c8:	02071c63          	bnez	a4,10100 <_start+0x84>
   100cc:	03050513          	addi	a0,a0,48
   100d0:	0ff57513          	andi	a0,a0,255
   100d4:	400027b7          	lui	a5,0x40002
   100d8:	00a78023          	sb	a0,0(a5) # 40002000 <__global_pointer$+0x3fff06d0>
   100dc:	00c12083          	lw	ra,12(sp)
   100e0:	01010113          	addi	sp,sp,16
   100e4:	00008067          	ret
   100e8:	00e68023          	sb	a4,0(a3) # 40002000 <__global_pointer$+0x3fff06d0>
   100ec:	00178793          	addi	a5,a5,1
   100f0:	fadff06f          	j	1009c <_start+0x20>
   100f4:	00e68023          	sb	a4,0(a3)
   100f8:	00178793          	addi	a5,a5,1
   100fc:	fb5ff06f          	j	100b0 <_start+0x34>
   10100:	00e68023          	sb	a4,0(a3)
   10104:	00178793          	addi	a5,a5,1
   10108:	fbdff06f          	j	100c4 <_start+0x48>

Instruction State

Instructions Stat:
LUI	= 9
JAL	= 28
JALR	= 2
BNE	= 39
LW	= 1
LBU	= 30
SB	= 28
SW	= 1
ADDI	= 41
ANDI	= 1
SRLI	= 4
AND	= 5
LI*	= 2

Five Most Frequent:
1) ADDI	= 41 (21.58%)
2) BNE	= 39 (20.53%)
3) LBU	= 30 (15.79%)
4) JAL	= 28 (14.74%)
5) SB	= 28 (14.74%)

Readelf

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x1007c
  Start of program headers:          52 (bytes into file)
  Start of section headers:          728 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         1
  Size of section headers:           40 (bytes)
  Number of section headers:         7
  Section header string table index: 6

Size

   text	   data	    bss	    dec	    hex	filename
    220	      0	      0	    220	     dc	clz

O0 O3 Os
Execution time 43216 ns 17247 ns 17117 ns
Instruction count 407 150 190
Jumps 45 (11.06%) 28 (18.67%) 62 (32.63%)
Jumps forwards 11 0 29
Jumps backwards 34 28 33
Branching True 33 (82.50%) 27 (77.14%) 32 (82.05%)
Branching False 7 (17.50%) 8 (22.86%) 7 (17.95%)