# Assignment2: RISC-V Toolchain ###### tags: `riscv` ## Bubble Sort ``` contributed by 王傑世 ``` (https://hackmd.io/4-oWQOprRnCLu3ZeJy31kA?view) :::spoiler **Assembly code** ```assembly= .data arr: .word 2, 3, 7, 4, 1 .text main: la s0, arr mv t3, s0 addi s1, s1, 4 addi t0, zero, -1 jal ra, bbsort mv t0, zero addi s1, s1, 1 mv s0, t3 j print bbsort: addi sp, sp, -12 sw ra, 8(sp) sw s1, 4(sp) sw s0, 0(sp) oloop: addi t0, t0, 1 mv t1, zero sub t2, s1, t0 blt t0, s1, iloop addi sp, sp, 12 jr ra iloop: mv s0, t3 bge t1, t2, oloop slli t4, t1, 2 add s0, s0, t4 lw a2, 0(s0) lw a3, 4(s0) addi t1, t1, 1 blt a2, a3, swap j iloop swap: sw a2, 4(s0) sw a3, 0(s0) j iloop print: bge t0, s1, end lw a0, 0(s0) li a7, 1 ecall addi t0, t0, 1 addi s0, s0, 4 j print end: ``` ::: :::spoiler **C code** ```c= void BBSort(int *arr, int size) { for (int i = 0; i < size; ++i) for (int j = 0; j < size - i; ++j) if (arr[j] < arr[j + 1]) { int temp = arr[j]; arr[j] = arr[j + 1]; arr[j + 1] = temp; } } int main() { int arr[] = {2, 3, 7, 4, 1}; int size = 5; BBSort(arr, size - 1); for (int i = 0; i < size; ++i) { printf("%d\n", arr[i]); } return 0; } ``` ::: ### Rewrite assembly programs into C implementation ```c= void BBSort(int *arr, int size) { for (int i = 0; i < size; ++i) for (int j = 0; j < size - i; ++j) if (arr[j] < arr[j + 1]) { int temp = arr[j]; arr[j] = arr[j + 1]; arr[j + 1] = temp; } } void _start() { volatile char* tx = (volatile char*) 0x40002000; int *p, size = 5, arr[5]; arr[0] = 2; arr[1] = 3; arr[2] = 7; arr[3] = 4; arr[4] = 1; BBSort(arr, size - 1); p = arr; while (size--) { *tx = *p++ + '0'; } } ``` - Undefined reference to `memcpy` when using Os optimization ```c= void _start() { volatile char* tx = (volatile char*) 0x40002000; int *p; int arr[] = {2, 3, 7, 4, 1}, size = 5; BBSort(arr, size - 1); p = arr; while (size--) { *tx = *p++ + '0'; } } ``` ``` $riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -Os -nostdlib bbsort.c -o bbsort /home/dcmc/riscv-none-embed-gcc/8.2.0-3.1/bin/../lib/gcc/riscv-none-embed/8.2.0/../../../../riscv-none-embed/bin/ld: /tmp/ccsfBVO3.o: in function `_start': bbsort.c:(.text+0x58): undefined reference to `memcpy' collect2: error: ld returned 1 exit status ``` --- ### Result **without optimization** Run ``` $ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -nostdlib bbsort.c -o bbsort_pure $ ./emu-rv32i bbsort_pure 74321 >>> Execution time: 20031 ns >>> Instruction count: 488 (IPS=24362238) >>> Jumps: 33 (6.76%) - 12 forwards, 21 backwards >>> Branching T=24 (68.57%) F=11 (31.43%) ``` :::spoiler Objdump ``` $ riscv-none-embed-objdump -d bbsort_pure bbsort_pure: file format elf32-littleriscv Disassembly of section .text: 00010054 <BBSort>: 10054: fd010113 addi sp,sp,-48 10058: 02812623 sw s0,44(sp) 1005c: 03010413 addi s0,sp,48 10060: fca42e23 sw a0,-36(s0) 10064: fcb42c23 sw a1,-40(s0) 10068: fe042623 sw zero,-20(s0) 1006c: 0c80006f j 10134 <BBSort+0xe0> 10070: fe042423 sw zero,-24(s0) 10074: 0a00006f j 10114 <BBSort+0xc0> 10078: fe842783 lw a5,-24(s0) 1007c: 00279793 slli a5,a5,0x2 10080: fdc42703 lw a4,-36(s0) 10084: 00f707b3 add a5,a4,a5 10088: 0007a703 lw a4,0(a5) 1008c: fe842783 lw a5,-24(s0) 10090: 00178793 addi a5,a5,1 10094: 00279793 slli a5,a5,0x2 10098: fdc42683 lw a3,-36(s0) 1009c: 00f687b3 add a5,a3,a5 100a0: 0007a783 lw a5,0(a5) 100a4: 06f75263 bge a4,a5,10108 <BBSort+0xb4> 100a8: fe842783 lw a5,-24(s0) 100ac: 00279793 slli a5,a5,0x2 100b0: fdc42703 lw a4,-36(s0) 100b4: 00f707b3 add a5,a4,a5 100b8: 0007a783 lw a5,0(a5) 100bc: fef42223 sw a5,-28(s0) 100c0: fe842783 lw a5,-24(s0) 100c4: 00178793 addi a5,a5,1 100c8: 00279793 slli a5,a5,0x2 100cc: fdc42703 lw a4,-36(s0) 100d0: 00f70733 add a4,a4,a5 100d4: fe842783 lw a5,-24(s0) 100d8: 00279793 slli a5,a5,0x2 100dc: fdc42683 lw a3,-36(s0) 100e0: 00f687b3 add a5,a3,a5 100e4: 00072703 lw a4,0(a4) 100e8: 00e7a023 sw a4,0(a5) 100ec: fe842783 lw a5,-24(s0) 100f0: 00178793 addi a5,a5,1 100f4: 00279793 slli a5,a5,0x2 100f8: fdc42703 lw a4,-36(s0) 100fc: 00f707b3 add a5,a4,a5 10100: fe442703 lw a4,-28(s0) 10104: 00e7a023 sw a4,0(a5) 10108: fe842783 lw a5,-24(s0) 1010c: 00178793 addi a5,a5,1 10110: fef42423 sw a5,-24(s0) 10114: fd842703 lw a4,-40(s0) 10118: fec42783 lw a5,-20(s0) 1011c: 40f707b3 sub a5,a4,a5 10120: fe842703 lw a4,-24(s0) 10124: f4f74ae3 blt a4,a5,10078 <BBSort+0x24> 10128: fec42783 lw a5,-20(s0) 1012c: 00178793 addi a5,a5,1 10130: fef42623 sw a5,-20(s0) 10134: fec42703 lw a4,-20(s0) 10138: fd842783 lw a5,-40(s0) 1013c: f2f74ae3 blt a4,a5,10070 <BBSort+0x1c> 10140: 00000013 nop 10144: 02c12403 lw s0,44(sp) 10148: 03010113 addi sp,sp,48 1014c: 00008067 ret 00010150 <_start>: 10150: fd010113 addi sp,sp,-48 10154: 02112623 sw ra,44(sp) 10158: 02812423 sw s0,40(sp) 1015c: 03010413 addi s0,sp,48 10160: 400027b7 lui a5,0x40002 10164: fef42223 sw a5,-28(s0) 10168: 00500793 li a5,5 1016c: fef42423 sw a5,-24(s0) 10170: 00200793 li a5,2 10174: fcf42823 sw a5,-48(s0) 10178: 00300793 li a5,3 1017c: fcf42a23 sw a5,-44(s0) 10180: 00700793 li a5,7 10184: fcf42c23 sw a5,-40(s0) 10188: 00400793 li a5,4 1018c: fcf42e23 sw a5,-36(s0) 10190: 00100793 li a5,1 10194: fef42023 sw a5,-32(s0) 10198: fe842783 lw a5,-24(s0) 1019c: fff78713 addi a4,a5,-1 # 40001fff <__global_pointer$+0x3fff05fb> 101a0: fd040793 addi a5,s0,-48 101a4: 00070593 mv a1,a4 101a8: 00078513 mv a0,a5 101ac: ea9ff0ef jal ra,10054 <BBSort> 101b0: fd040793 addi a5,s0,-48 101b4: fef42623 sw a5,-20(s0) 101b8: 0280006f j 101e0 <_start+0x90> 101bc: fec42783 lw a5,-20(s0) 101c0: 00478713 addi a4,a5,4 101c4: fee42623 sw a4,-20(s0) 101c8: 0007a783 lw a5,0(a5) 101cc: 0ff7f793 andi a5,a5,255 101d0: 03078793 addi a5,a5,48 101d4: 0ff7f713 andi a4,a5,255 101d8: fe442783 lw a5,-28(s0) 101dc: 00e78023 sb a4,0(a5) 101e0: fe842783 lw a5,-24(s0) 101e4: fff78713 addi a4,a5,-1 101e8: fee42423 sw a4,-24(s0) 101ec: fc0798e3 bnez a5,101bc <_start+0x6c> 101f0: 00000013 nop 101f4: 02c12083 lw ra,44(sp) 101f8: 02812403 lw s0,40(sp) 101fc: 03010113 addi sp,sp,48 10200: 00008067 ret ``` ::: Instruction State ``` Instructions Stat: LUI = 1 JAL = 7 JALR = 2 BNE = 6 BLT = 19 BGE = 10 LW = 206 SB = 5 SW = 58 ADDI = 69 ANDI = 10 SLLI = 40 ADD = 40 SUB = 14 LI* = 8 Five Most Frequent: 1) LW = 206 (42.21%) 2) ADDI = 69 (14.14%) 3) SW = 58 (11.89%) 4) SLLI = 40 (8.20%) 5) ADD = 40 (8.20%) ``` Readelf ``` $ riscv-none-embed-readelf -h bbsort_pure ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x10150 Start of program headers: 52 (bytes into file) Start of section headers: 920 (bytes into file) Flags: 0x0 Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 1 Size of section headers: 40 (bytes) Number of section headers: 6 Section header string table index: 5 ``` Size ``` $ riscv-none-embed-size bbsort_pure text data bss dec hex filename 432 0 0 432 1b0 bbsort_pure ``` --- **with O3 optimization** Run ``` $ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -O3 -nostdlib bbsort.c -o bbsort_O3 $ ./emu-rv32i bbsort_O3 74321 >>> Execution time: 7310 ns >>> Instruction count: 50 (IPS=6839945) >>> Jumps: 4 (8.00%) - 3 forwards, 1 backwards >>> Branching T=3 (50.00%) F=3 (50.00%) ``` :::spoiler Objdump ``` $ riscv-none-embed-objdump -d bbsort_O3 bbsort_O3: file format elf32-littleriscv Disassembly of section .text: 00010054 <BBSort>: 10054: 02b05a63 blez a1,10088 <BBSort+0x34> 10058: 00259613 slli a2,a1,0x2 1005c: 00c50633 add a2,a0,a2 10060: 00050793 mv a5,a0 10064: 0007a703 lw a4,0(a5) 10068: 0047a683 lw a3,4(a5) 1006c: 00d75663 bge a4,a3,10078 <BBSort+0x24> 10070: 00d7a023 sw a3,0(a5) 10074: 00e7a223 sw a4,4(a5) 10078: 00478793 addi a5,a5,4 1007c: fec794e3 bne a5,a2,10064 <BBSort+0x10> 10080: ffc60613 addi a2,a2,-4 10084: fcc51ee3 bne a0,a2,10060 <BBSort+0xc> 10088: 00008067 ret 0001008c <_start>: 1008c: fe010113 addi sp,sp,-32 10090: 00100713 li a4,1 10094: 00e12e23 sw a4,28(sp) 10098: 00400713 li a4,4 1009c: 00700793 li a5,7 100a0: 00e12a23 sw a4,20(sp) 100a4: 00200713 li a4,2 100a8: 00e12c23 sw a4,24(sp) 100ac: 00f12823 sw a5,16(sp) 100b0: 00300713 li a4,3 100b4: 00400693 li a3,4 100b8: 00200613 li a2,2 100bc: 00f75a63 bge a4,a5,100d0 <_start+0x44> 100c0: 00070593 mv a1,a4 100c4: 00e12823 sw a4,16(sp) 100c8: 00078713 mv a4,a5 100cc: 00058793 mv a5,a1 100d0: 00d7dc63 bge a5,a3,100e8 <_start+0x5c> 100d4: 00068593 mv a1,a3 100d8: 00d12823 sw a3,16(sp) 100dc: 00f12a23 sw a5,20(sp) 100e0: 00078693 mv a3,a5 100e4: 00058793 mv a5,a1 100e8: 08c6c663 blt a3,a2,10174 <_start+0xe8> 100ec: 00f75a63 bge a4,a5,10100 <_start+0x74> 100f0: 00070613 mv a2,a4 100f4: 00e12823 sw a4,16(sp) 100f8: 00078713 mv a4,a5 100fc: 00060793 mv a5,a2 10100: 00d7d863 bge a5,a3,10110 <_start+0x84> 10104: 00f12a23 sw a5,20(sp) 10108: 00d12823 sw a3,16(sp) 1010c: 00068793 mv a5,a3 10110: 00f75663 bge a4,a5,1011c <_start+0x90> 10114: 00e12823 sw a4,16(sp) 10118: 00078713 mv a4,a5 1011c: 03070713 addi a4,a4,48 10120: 400027b7 lui a5,0x40002 10124: 0ff77713 andi a4,a4,255 10128: 00e78023 sb a4,0(a5) # 40002000 <__global_pointer$+0x3fff067c> 1012c: 01012703 lw a4,16(sp) 10130: 03070713 addi a4,a4,48 10134: 0ff77713 andi a4,a4,255 10138: 00e78023 sb a4,0(a5) 1013c: 01412703 lw a4,20(sp) 10140: 03070713 addi a4,a4,48 10144: 0ff77713 andi a4,a4,255 10148: 00e78023 sb a4,0(a5) 1014c: 01812703 lw a4,24(sp) 10150: 03070713 addi a4,a4,48 10154: 0ff77713 andi a4,a4,255 10158: 00e78023 sb a4,0(a5) 1015c: 01c12703 lw a4,28(sp) 10160: 03070713 addi a4,a4,48 10164: 0ff77713 andi a4,a4,255 10168: 00e78023 sb a4,0(a5) 1016c: 02010113 addi sp,sp,32 10170: 00008067 ret 10174: 00d12c23 sw a3,24(sp) 10178: 00c12a23 sw a2,20(sp) 1017c: 00060693 mv a3,a2 10180: f6dff06f j 100ec <_start+0x60> ``` ::: Instruction State ``` Instructions Stat: LUI = 1 JALR = 1 BLT = 1 BGE = 5 LW = 4 SB = 5 SW = 7 ADDI = 20 ANDI = 5 LI* = 7 Five Most Frequent: 1) ADDI = 20 (40.00%) 2) SW = 7 (14.00%) 3) LI* = 7 (14.00%) 4) BGE = 5 (10.00%) 5) SB = 5 (10.00%) ``` Readelf ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x1008c Start of program headers: 52 (bytes into file) Start of section headers: 792 (bytes into file) Flags: 0x0 Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 1 Size of section headers: 40 (bytes) Number of section headers: 6 Section header string table index: 5 ``` Size ``` text data bss dec hex filename 304 0 0 304 130 bbsort_O3 ``` --- **with Os optimization** Run ``` $ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -O3 -nostdlib bbsort.c -o bbsort_Os $ ./emu-rv32i bbsort_Os 74321 >>> Execution time: 9482 ns >>> Instruction count: 160 (IPS=16874077) >>> Jumps: 26 (16.25%) - 10 forwards, 16 backwards >>> Branching T=19 (63.33%) F=11 (36.67%) ``` :::spoiler Objdump ``` $ riscv-none-embed-objdump -d bbsort_Os bbsort_Os: file format elf32-littleriscv Disassembly of section .text: 00010054 <BBSort>: 10054: 00058713 mv a4,a1 10058: 40e587b3 sub a5,a1,a4 1005c: 00b7c463 blt a5,a1,10064 <BBSort+0x10> 10060: 00008067 ret 10064: 00050793 mv a5,a0 10068: 00000693 li a3,0 1006c: 0007a603 lw a2,0(a5) 10070: 0047a803 lw a6,4(a5) 10074: 01065663 bge a2,a6,10080 <BBSort+0x2c> 10078: 0107a023 sw a6,0(a5) 1007c: 00c7a223 sw a2,4(a5) 10080: 00168693 addi a3,a3,1 10084: 00478793 addi a5,a5,4 10088: fee6c2e3 blt a3,a4,1006c <BBSort+0x18> 1008c: fff70713 addi a4,a4,-1 10090: fc9ff06f j 10058 <BBSort+0x4> 00010094 <_start>: 10094: fd010113 addi sp,sp,-48 10098: 00200793 li a5,2 1009c: 00f12623 sw a5,12(sp) 100a0: 00300793 li a5,3 100a4: 00f12823 sw a5,16(sp) 100a8: 00700793 li a5,7 100ac: 00f12a23 sw a5,20(sp) 100b0: 00400793 li a5,4 100b4: 00f12c23 sw a5,24(sp) 100b8: 00400593 li a1,4 100bc: 00100793 li a5,1 100c0: 00c10513 addi a0,sp,12 100c4: 02112623 sw ra,44(sp) 100c8: 00f12e23 sw a5,28(sp) 100cc: f89ff0ef jal ra,10054 <BBSort> 100d0: 00000713 li a4,0 100d4: 40002637 lui a2,0x40002 100d8: 01400693 li a3,20 100dc: 00c10793 addi a5,sp,12 100e0: 00e787b3 add a5,a5,a4 100e4: 0007a783 lw a5,0(a5) 100e8: 00470713 addi a4,a4,4 100ec: 03078793 addi a5,a5,48 100f0: 0ff7f793 andi a5,a5,255 100f4: 00f60023 sb a5,0(a2) # 40002000 <__global_pointer$+0x3fff06f8> 100f8: fed712e3 bne a4,a3,100dc <_start+0x48> 100fc: 02c12083 lw ra,44(sp) 10100: 03010113 addi sp,sp,48 10104: 00008067 ret ``` ::: Instruction State ``` Instructions Stat: LUI = 1 JAL = 5 JALR = 2 BNE = 5 BLT = 15 BGE = 10 LW = 26 SB = 5 SW = 16 ADDI = 59 ANDI = 5 ADD = 5 SUB = 5 LI* = 12 Five Most Frequent: 1) ADDI = 59 (36.88%) 2) LW = 26 (16.25%) 3) SW = 16 (10.00%) 4) BLT = 15 (9.38%) 5) LI* = 12 (7.50%) ``` Readelf ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x10094 Start of program headers: 52 (bytes into file) Start of section headers: 668 (bytes into file) Flags: 0x0 Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 1 Size of section headers: 40 (bytes) Number of section headers: 6 Section header string table index: 5 ``` Size ``` text data bss dec hex filename 180 0 0 180 b4 bbsort_Os ``` --- | | O0 | O3 | Os | | ----------------- | ----------- | ----------- | ----------- | | Execution time | 20031 ns | 7310 ns | 9482 ns | | Instruction count | 488 | 50 | 160 | | Jumps | 33 (6.76%) | 4 (8.00%) | 26 (16.25%) | | Jumps forwards | 12 | 3 | 10 | | Jumps backwards | 21 | 1 | 16 | | Branching True | 24 (68.57%) | 3 (50.00%) | 19 (63.33%) | | Branching False | 11 (31.43%) | 3 (50.00%) | 11 (36.67%) | ---