Try   HackMD

Assignment2: RISC-V Toolchain

tags: riscv

Bubble Sort

contributed by 王傑世 (https://hackmd.io/4-oWQOprRnCLu3ZeJy31kA?view)

Assembly code
.data arr: .word 2, 3, 7, 4, 1 .text main: la s0, arr mv t3, s0 addi s1, s1, 4 addi t0, zero, -1 jal ra, bbsort mv t0, zero addi s1, s1, 1 mv s0, t3 j print bbsort: addi sp, sp, -12 sw ra, 8(sp) sw s1, 4(sp) sw s0, 0(sp) oloop: addi t0, t0, 1 mv t1, zero sub t2, s1, t0 blt t0, s1, iloop addi sp, sp, 12 jr ra iloop: mv s0, t3 bge t1, t2, oloop slli t4, t1, 2 add s0, s0, t4 lw a2, 0(s0) lw a3, 4(s0) addi t1, t1, 1 blt a2, a3, swap j iloop swap: sw a2, 4(s0) sw a3, 0(s0) j iloop print: bge t0, s1, end lw a0, 0(s0) li a7, 1 ecall addi t0, t0, 1 addi s0, s0, 4 j print end:
C code
void BBSort(int *arr, int size) { for (int i = 0; i < size; ++i) for (int j = 0; j < size - i; ++j) if (arr[j] < arr[j + 1]) { int temp = arr[j]; arr[j] = arr[j + 1]; arr[j + 1] = temp; } } int main() { int arr[] = {2, 3, 7, 4, 1}; int size = 5; BBSort(arr, size - 1); for (int i = 0; i < size; ++i) { printf("%d\n", arr[i]); } return 0; }

Rewrite assembly programs into C implementation

void BBSort(int *arr, int size) { for (int i = 0; i < size; ++i) for (int j = 0; j < size - i; ++j) if (arr[j] < arr[j + 1]) { int temp = arr[j]; arr[j] = arr[j + 1]; arr[j + 1] = temp; } } void _start() { volatile char* tx = (volatile char*) 0x40002000; int *p, size = 5, arr[5]; arr[0] = 2; arr[1] = 3; arr[2] = 7; arr[3] = 4; arr[4] = 1; BBSort(arr, size - 1); p = arr; while (size--) { *tx = *p++ + '0'; } }
  • Undefined reference to memcpy when using Os optimization
void _start() { volatile char* tx = (volatile char*) 0x40002000; int *p; int arr[] = {2, 3, 7, 4, 1}, size = 5; BBSort(arr, size - 1); p = arr; while (size--) { *tx = *p++ + '0'; } }
$riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -Os -nostdlib bbsort.c -o bbsort

/home/dcmc/riscv-none-embed-gcc/8.2.0-3.1/bin/../lib/gcc/riscv-none-embed/8.2.0/../../../../riscv-none-embed/bin/ld: /tmp/ccsfBVO3.o: in function `_start':
bbsort.c:(.text+0x58): undefined reference to `memcpy'
collect2: error: ld returned 1 exit status

Result

without optimization
Run

$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -nostdlib bbsort.c -o bbsort_pure
$ ./emu-rv32i bbsort_pure
74321
>>> Execution time: 20031 ns
>>> Instruction count: 488 (IPS=24362238)
>>> Jumps: 33 (6.76%) - 12 forwards, 21 backwards
>>> Branching T=24 (68.57%) F=11 (31.43%)
Objdump
$ riscv-none-embed-objdump -d bbsort_pure
bbsort_pure:     file format elf32-littleriscv

Disassembly of section .text:

00010054 <BBSort>:
   10054:	fd010113          	addi	sp,sp,-48
   10058:	02812623          	sw	s0,44(sp)
   1005c:	03010413          	addi	s0,sp,48
   10060:	fca42e23          	sw	a0,-36(s0)
   10064:	fcb42c23          	sw	a1,-40(s0)
   10068:	fe042623          	sw	zero,-20(s0)
   1006c:	0c80006f          	j	10134 <BBSort+0xe0>
   10070:	fe042423          	sw	zero,-24(s0)
   10074:	0a00006f          	j	10114 <BBSort+0xc0>
   10078:	fe842783          	lw	a5,-24(s0)
   1007c:	00279793          	slli	a5,a5,0x2
   10080:	fdc42703          	lw	a4,-36(s0)
   10084:	00f707b3          	add	a5,a4,a5
   10088:	0007a703          	lw	a4,0(a5)
   1008c:	fe842783          	lw	a5,-24(s0)
   10090:	00178793          	addi	a5,a5,1
   10094:	00279793          	slli	a5,a5,0x2
   10098:	fdc42683          	lw	a3,-36(s0)
   1009c:	00f687b3          	add	a5,a3,a5
   100a0:	0007a783          	lw	a5,0(a5)
   100a4:	06f75263          	bge	a4,a5,10108 <BBSort+0xb4>
   100a8:	fe842783          	lw	a5,-24(s0)
   100ac:	00279793          	slli	a5,a5,0x2
   100b0:	fdc42703          	lw	a4,-36(s0)
   100b4:	00f707b3          	add	a5,a4,a5
   100b8:	0007a783          	lw	a5,0(a5)
   100bc:	fef42223          	sw	a5,-28(s0)
   100c0:	fe842783          	lw	a5,-24(s0)
   100c4:	00178793          	addi	a5,a5,1
   100c8:	00279793          	slli	a5,a5,0x2
   100cc:	fdc42703          	lw	a4,-36(s0)
   100d0:	00f70733          	add	a4,a4,a5
   100d4:	fe842783          	lw	a5,-24(s0)
   100d8:	00279793          	slli	a5,a5,0x2
   100dc:	fdc42683          	lw	a3,-36(s0)
   100e0:	00f687b3          	add	a5,a3,a5
   100e4:	00072703          	lw	a4,0(a4)
   100e8:	00e7a023          	sw	a4,0(a5)
   100ec:	fe842783          	lw	a5,-24(s0)
   100f0:	00178793          	addi	a5,a5,1
   100f4:	00279793          	slli	a5,a5,0x2
   100f8:	fdc42703          	lw	a4,-36(s0)
   100fc:	00f707b3          	add	a5,a4,a5
   10100:	fe442703          	lw	a4,-28(s0)
   10104:	00e7a023          	sw	a4,0(a5)
   10108:	fe842783          	lw	a5,-24(s0)
   1010c:	00178793          	addi	a5,a5,1
   10110:	fef42423          	sw	a5,-24(s0)
   10114:	fd842703          	lw	a4,-40(s0)
   10118:	fec42783          	lw	a5,-20(s0)
   1011c:	40f707b3          	sub	a5,a4,a5
   10120:	fe842703          	lw	a4,-24(s0)
   10124:	f4f74ae3          	blt	a4,a5,10078 <BBSort+0x24>
   10128:	fec42783          	lw	a5,-20(s0)
   1012c:	00178793          	addi	a5,a5,1
   10130:	fef42623          	sw	a5,-20(s0)
   10134:	fec42703          	lw	a4,-20(s0)
   10138:	fd842783          	lw	a5,-40(s0)
   1013c:	f2f74ae3          	blt	a4,a5,10070 <BBSort+0x1c>
   10140:	00000013          	nop
   10144:	02c12403          	lw	s0,44(sp)
   10148:	03010113          	addi	sp,sp,48
   1014c:	00008067          	ret

00010150 <_start>:
   10150:	fd010113          	addi	sp,sp,-48
   10154:	02112623          	sw	ra,44(sp)
   10158:	02812423          	sw	s0,40(sp)
   1015c:	03010413          	addi	s0,sp,48
   10160:	400027b7          	lui	a5,0x40002
   10164:	fef42223          	sw	a5,-28(s0)
   10168:	00500793          	li	a5,5
   1016c:	fef42423          	sw	a5,-24(s0)
   10170:	00200793          	li	a5,2
   10174:	fcf42823          	sw	a5,-48(s0)
   10178:	00300793          	li	a5,3
   1017c:	fcf42a23          	sw	a5,-44(s0)
   10180:	00700793          	li	a5,7
   10184:	fcf42c23          	sw	a5,-40(s0)
   10188:	00400793          	li	a5,4
   1018c:	fcf42e23          	sw	a5,-36(s0)
   10190:	00100793          	li	a5,1
   10194:	fef42023          	sw	a5,-32(s0)
   10198:	fe842783          	lw	a5,-24(s0)
   1019c:	fff78713          	addi	a4,a5,-1 # 40001fff <__global_pointer$+0x3fff05fb>
   101a0:	fd040793          	addi	a5,s0,-48
   101a4:	00070593          	mv	a1,a4
   101a8:	00078513          	mv	a0,a5
   101ac:	ea9ff0ef          	jal	ra,10054 <BBSort>
   101b0:	fd040793          	addi	a5,s0,-48
   101b4:	fef42623          	sw	a5,-20(s0)
   101b8:	0280006f          	j	101e0 <_start+0x90>
   101bc:	fec42783          	lw	a5,-20(s0)
   101c0:	00478713          	addi	a4,a5,4
   101c4:	fee42623          	sw	a4,-20(s0)
   101c8:	0007a783          	lw	a5,0(a5)
   101cc:	0ff7f793          	andi	a5,a5,255
   101d0:	03078793          	addi	a5,a5,48
   101d4:	0ff7f713          	andi	a4,a5,255
   101d8:	fe442783          	lw	a5,-28(s0)
   101dc:	00e78023          	sb	a4,0(a5)
   101e0:	fe842783          	lw	a5,-24(s0)
   101e4:	fff78713          	addi	a4,a5,-1
   101e8:	fee42423          	sw	a4,-24(s0)
   101ec:	fc0798e3          	bnez	a5,101bc <_start+0x6c>
   101f0:	00000013          	nop
   101f4:	02c12083          	lw	ra,44(sp)
   101f8:	02812403          	lw	s0,40(sp)
   101fc:	03010113          	addi	sp,sp,48
   10200:	00008067          	ret

Instruction State

Instructions Stat:
LUI	= 1
JAL	= 7
JALR	= 2
BNE	= 6
BLT	= 19
BGE	= 10
LW	= 206
SB	= 5
SW	= 58
ADDI	= 69
ANDI	= 10
SLLI	= 40
ADD	= 40
SUB	= 14
LI*	= 8

Five Most Frequent:
1) LW	= 206 (42.21%)
2) ADDI	= 69 (14.14%)
3) SW	= 58 (11.89%)
4) SLLI	= 40 (8.20%)
5) ADD	= 40 (8.20%)

Readelf

$ riscv-none-embed-readelf -h bbsort_pure 
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x10150
  Start of program headers:          52 (bytes into file)
  Start of section headers:          920 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         1
  Size of section headers:           40 (bytes)
  Number of section headers:         6
  Section header string table index: 5

Size

$ riscv-none-embed-size bbsort_pure 
   text	   data	    bss	    dec	    hex	filename
    432	      0	      0	    432	    1b0	bbsort_pure


with O3 optimization
Run

$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -O3 -nostdlib bbsort.c -o bbsort_O3
$ ./emu-rv32i bbsort_O3
74321
>>> Execution time: 7310 ns
>>> Instruction count: 50 (IPS=6839945)
>>> Jumps: 4 (8.00%) - 3 forwards, 1 backwards
>>> Branching T=3 (50.00%) F=3 (50.00%)
Objdump
$ riscv-none-embed-objdump -d bbsort_O3

bbsort_O3:     file format elf32-littleriscv

Disassembly of section .text:

00010054 <BBSort>:
   10054:	02b05a63          	blez	a1,10088 <BBSort+0x34>
   10058:	00259613          	slli	a2,a1,0x2
   1005c:	00c50633          	add	a2,a0,a2
   10060:	00050793          	mv	a5,a0
   10064:	0007a703          	lw	a4,0(a5)
   10068:	0047a683          	lw	a3,4(a5)
   1006c:	00d75663          	bge	a4,a3,10078 <BBSort+0x24>
   10070:	00d7a023          	sw	a3,0(a5)
   10074:	00e7a223          	sw	a4,4(a5)
   10078:	00478793          	addi	a5,a5,4
   1007c:	fec794e3          	bne	a5,a2,10064 <BBSort+0x10>
   10080:	ffc60613          	addi	a2,a2,-4
   10084:	fcc51ee3          	bne	a0,a2,10060 <BBSort+0xc>
   10088:	00008067          	ret

0001008c <_start>:
   1008c:	fe010113          	addi	sp,sp,-32
   10090:	00100713          	li	a4,1
   10094:	00e12e23          	sw	a4,28(sp)
   10098:	00400713          	li	a4,4
   1009c:	00700793          	li	a5,7
   100a0:	00e12a23          	sw	a4,20(sp)
   100a4:	00200713          	li	a4,2
   100a8:	00e12c23          	sw	a4,24(sp)
   100ac:	00f12823          	sw	a5,16(sp)
   100b0:	00300713          	li	a4,3
   100b4:	00400693          	li	a3,4
   100b8:	00200613          	li	a2,2
   100bc:	00f75a63          	bge	a4,a5,100d0 <_start+0x44>
   100c0:	00070593          	mv	a1,a4
   100c4:	00e12823          	sw	a4,16(sp)
   100c8:	00078713          	mv	a4,a5
   100cc:	00058793          	mv	a5,a1
   100d0:	00d7dc63          	bge	a5,a3,100e8 <_start+0x5c>
   100d4:	00068593          	mv	a1,a3
   100d8:	00d12823          	sw	a3,16(sp)
   100dc:	00f12a23          	sw	a5,20(sp)
   100e0:	00078693          	mv	a3,a5
   100e4:	00058793          	mv	a5,a1
   100e8:	08c6c663          	blt	a3,a2,10174 <_start+0xe8>
   100ec:	00f75a63          	bge	a4,a5,10100 <_start+0x74>
   100f0:	00070613          	mv	a2,a4
   100f4:	00e12823          	sw	a4,16(sp)
   100f8:	00078713          	mv	a4,a5
   100fc:	00060793          	mv	a5,a2
   10100:	00d7d863          	bge	a5,a3,10110 <_start+0x84>
   10104:	00f12a23          	sw	a5,20(sp)
   10108:	00d12823          	sw	a3,16(sp)
   1010c:	00068793          	mv	a5,a3
   10110:	00f75663          	bge	a4,a5,1011c <_start+0x90>
   10114:	00e12823          	sw	a4,16(sp)
   10118:	00078713          	mv	a4,a5
   1011c:	03070713          	addi	a4,a4,48
   10120:	400027b7          	lui	a5,0x40002
   10124:	0ff77713          	andi	a4,a4,255
   10128:	00e78023          	sb	a4,0(a5) # 40002000 <__global_pointer$+0x3fff067c>
   1012c:	01012703          	lw	a4,16(sp)
   10130:	03070713          	addi	a4,a4,48
   10134:	0ff77713          	andi	a4,a4,255
   10138:	00e78023          	sb	a4,0(a5)
   1013c:	01412703          	lw	a4,20(sp)
   10140:	03070713          	addi	a4,a4,48
   10144:	0ff77713          	andi	a4,a4,255
   10148:	00e78023          	sb	a4,0(a5)
   1014c:	01812703          	lw	a4,24(sp)
   10150:	03070713          	addi	a4,a4,48
   10154:	0ff77713          	andi	a4,a4,255
   10158:	00e78023          	sb	a4,0(a5)
   1015c:	01c12703          	lw	a4,28(sp)
   10160:	03070713          	addi	a4,a4,48
   10164:	0ff77713          	andi	a4,a4,255
   10168:	00e78023          	sb	a4,0(a5)
   1016c:	02010113          	addi	sp,sp,32
   10170:	00008067          	ret
   10174:	00d12c23          	sw	a3,24(sp)
   10178:	00c12a23          	sw	a2,20(sp)
   1017c:	00060693          	mv	a3,a2
   10180:	f6dff06f          	j	100ec <_start+0x60>

Instruction State

Instructions Stat:
LUI	= 1
JALR	= 1
BLT	= 1
BGE	= 5
LW	= 4
SB	= 5
SW	= 7
ADDI	= 20
ANDI	= 5
LI*	= 7

Five Most Frequent:
1) ADDI	= 20 (40.00%)
2) SW	= 7 (14.00%)
3) LI*	= 7 (14.00%)
4) BGE	= 5 (10.00%)
5) SB	= 5 (10.00%)

Readelf

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x1008c
  Start of program headers:          52 (bytes into file)
  Start of section headers:          792 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         1
  Size of section headers:           40 (bytes)
  Number of section headers:         6
  Section header string table index: 5

Size

   text	   data	    bss	    dec	    hex	filename
    304	      0	      0	    304	    130	bbsort_O3

with Os optimization
Run

$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -O3 -nostdlib bbsort.c -o bbsort_Os
$ ./emu-rv32i bbsort_Os
74321
>>> Execution time: 9482 ns
>>> Instruction count: 160 (IPS=16874077)
>>> Jumps: 26 (16.25%) - 10 forwards, 16 backwards
>>> Branching T=19 (63.33%) F=11 (36.67%)

Objdump
$ riscv-none-embed-objdump -d bbsort_Os

bbsort_Os:     file format elf32-littleriscv


Disassembly of section .text:

00010054 <BBSort>:
   10054:	00058713          	mv	a4,a1
   10058:	40e587b3          	sub	a5,a1,a4
   1005c:	00b7c463          	blt	a5,a1,10064 <BBSort+0x10>
   10060:	00008067          	ret
   10064:	00050793          	mv	a5,a0
   10068:	00000693          	li	a3,0
   1006c:	0007a603          	lw	a2,0(a5)
   10070:	0047a803          	lw	a6,4(a5)
   10074:	01065663          	bge	a2,a6,10080 <BBSort+0x2c>
   10078:	0107a023          	sw	a6,0(a5)
   1007c:	00c7a223          	sw	a2,4(a5)
   10080:	00168693          	addi	a3,a3,1
   10084:	00478793          	addi	a5,a5,4
   10088:	fee6c2e3          	blt	a3,a4,1006c <BBSort+0x18>
   1008c:	fff70713          	addi	a4,a4,-1
   10090:	fc9ff06f          	j	10058 <BBSort+0x4>

00010094 <_start>:
   10094:	fd010113          	addi	sp,sp,-48
   10098:	00200793          	li	a5,2
   1009c:	00f12623          	sw	a5,12(sp)
   100a0:	00300793          	li	a5,3
   100a4:	00f12823          	sw	a5,16(sp)
   100a8:	00700793          	li	a5,7
   100ac:	00f12a23          	sw	a5,20(sp)
   100b0:	00400793          	li	a5,4
   100b4:	00f12c23          	sw	a5,24(sp)
   100b8:	00400593          	li	a1,4
   100bc:	00100793          	li	a5,1
   100c0:	00c10513          	addi	a0,sp,12
   100c4:	02112623          	sw	ra,44(sp)
   100c8:	00f12e23          	sw	a5,28(sp)
   100cc:	f89ff0ef          	jal	ra,10054 <BBSort>
   100d0:	00000713          	li	a4,0
   100d4:	40002637          	lui	a2,0x40002
   100d8:	01400693          	li	a3,20
   100dc:	00c10793          	addi	a5,sp,12
   100e0:	00e787b3          	add	a5,a5,a4
   100e4:	0007a783          	lw	a5,0(a5)
   100e8:	00470713          	addi	a4,a4,4
   100ec:	03078793          	addi	a5,a5,48
   100f0:	0ff7f793          	andi	a5,a5,255
   100f4:	00f60023          	sb	a5,0(a2) # 40002000 <__global_pointer$+0x3fff06f8>
   100f8:	fed712e3          	bne	a4,a3,100dc <_start+0x48>
   100fc:	02c12083          	lw	ra,44(sp)
   10100:	03010113          	addi	sp,sp,48
   10104:	00008067          	ret

Instruction State

Instructions Stat:
LUI	= 1
JAL	= 5
JALR	= 2
BNE	= 5
BLT	= 15
BGE	= 10
LW	= 26
SB	= 5
SW	= 16
ADDI	= 59
ANDI	= 5
ADD	= 5
SUB	= 5
LI*	= 12

Five Most Frequent:
1) ADDI	= 59 (36.88%)
2) LW	= 26 (16.25%)
3) SW	= 16 (10.00%)
4) BLT	= 15 (9.38%)
5) LI*	= 12 (7.50%)

Readelf

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x10094
  Start of program headers:          52 (bytes into file)
  Start of section headers:          668 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         1
  Size of section headers:           40 (bytes)
  Number of section headers:         6
  Section header string table index: 5

Size

   text	   data	    bss	    dec	    hex	filename
    180	      0	      0	    180	     b4	bbsort_Os

O0 O3 Os
Execution time 20031 ns 7310 ns 9482 ns
Instruction count 488 50 160
Jumps 33 (6.76%) 4 (8.00%) 26 (16.25%)
Jumps forwards 12 3 10
Jumps backwards 21 1 16
Branching True 24 (68.57%) 3 (50.00%) 19 (63.33%)
Branching False 11 (31.43%) 3 (50.00%) 11 (36.67%)