--- tags: CA2020 --- # Assignment2: RISC-V Toolchain-1 contributed by < `kevin30292` > ## Insertion Sort The following assembly program was implemented by `施丞宥`. ```cpp .data arr: .word 2, 3, 7, 4, 1 str1: .string "Sorted array = " .text main: la s0, arr addi t0, x0, 5 addi t1, x0 ,0 jal ra, Loopi # Print the result to console jal ra, print # Exit program li a7, 10 ecall Loopi: addi t1, t1, 1 slli t4, t1, 2 add s1, s0, t4 lw t5, 0(s1) add t3, t5, x0 addi t2, t1, -1 blt t1, t0, Loopj jr ra Loopj: slli t4, t2, 2 add s1, s0, t4 lw t6, 0(s1) blt t2, x0, Loopi bge t3, t6, Loopi sw t6, 4(s1) sw t3, 0(s1) addi t2, t2, -1 j Loopj print: la a0, str1 li a7, 4 ecall lw t0, 0(s0) mv a0, t0 li a7, 1 ecall lw t0, 4(s0) mv a0, t0 li a7, 1 ecall lw t0, 8(s0) mv a0, t0 li a7, 1 ecall lw t0, 12(s0) mv a0, t0 li a7, 1 ecall lw t0, 16(s0) mv a0, t0 li a7, 1 ecall ret ``` ## Rewrite into C implementation `insertion.c` ```c= #include <stdio.h> int _start() { int arr[5] = {2, 3, 7, 4, 1}; int t1 = 0; for(t1 = 1; t1 < 5; t1++) { int t3 = arr[t1]; int t2 = t1 - 1; while(t1 < 5) { int t6 = arr[t2]; if(t2 < 0) { break; } else if(t3 >= t6) { break; } else { arr[t2+1] = t6; arr[t2] = t3; t2--; } } } volatile char *tx = (volatile char *)0x40002000; const char *result1 = "Sorted array: "; while (*result1) { *tx = *result1; result1++; } for(int i=0; i<5; i++) { *tx = (char)(arr[i] + '0'); *tx = ' '; } return 0; } ``` ## Execute with `rv32emu` ``` $ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -O3 -nostdlib -o insertion insertion.c ``` ![](https://i.imgur.com/FJoxB2J.png) ## ELF files generated by C compiler ![](https://i.imgur.com/dnSK6eZ.png) ``` 00010054 <_start>: 10054: 000107b7 lui a5,0x10 10058: 1c478793 addi a5,a5,452 # 101c4 <_start+0x170> 1005c: 0007a683 lw a3,0(a5) 10060: 0047a703 lw a4,4(a5) 10064: 0087a583 lw a1,8(a5) 10068: 00c7a603 lw a2,12(a5) 1006c: 0107a783 lw a5,16(a5) 10070: fe010113 addi sp,sp,-32 10074: 00d12623 sw a3,12(sp) 10078: 00e12823 sw a4,16(sp) 1007c: 00b12a23 sw a1,20(sp) 10080: 00c12c23 sw a2,24(sp) 10084: 00f12e23 sw a5,28(sp) 10088: 00d75663 bge a4,a3,10094 <_start+0x40> 1008c: 00d12823 sw a3,16(sp) 10090: 00e12623 sw a4,12(sp) 10094: 01412703 lw a4,20(sp) 10098: 01012783 lw a5,16(sp) 1009c: 02f75063 bge a4,a5,100bc <_start+0x68> 100a0: 00c12683 lw a3,12(sp) 100a4: 00f12a23 sw a5,20(sp) 100a8: 00e12823 sw a4,16(sp) 100ac: 00d75663 bge a4,a3,100b8 <_start+0x64> 100b0: 00e12623 sw a4,12(sp) 100b4: 00d12823 sw a3,16(sp) 100b8: 00078713 mv a4,a5 100bc: 01812783 lw a5,24(sp) 100c0: 02e7d863 bge a5,a4,100f0 <_start+0x9c> 100c4: 01012683 lw a3,16(sp) 100c8: 00e12c23 sw a4,24(sp) 100cc: 00f12a23 sw a5,20(sp) 100d0: 00d7de63 bge a5,a3,100ec <_start+0x98> 100d4: 00c12603 lw a2,12(sp) 100d8: 00d12a23 sw a3,20(sp) 100dc: 00f12823 sw a5,16(sp) 100e0: 00c7d663 bge a5,a2,100ec <_start+0x98> 100e4: 00f12623 sw a5,12(sp) 100e8: 00c12823 sw a2,16(sp) 100ec: 00070793 mv a5,a4 100f0: 01c12703 lw a4,28(sp) 100f4: 02f75e63 bge a4,a5,10130 <_start+0xdc> 100f8: 01412683 lw a3,20(sp) 100fc: 00f12e23 sw a5,28(sp) 10100: 00e12c23 sw a4,24(sp) 10104: 02d75663 bge a4,a3,10130 <_start+0xdc> 10108: 01012783 lw a5,16(sp) 1010c: 00d12c23 sw a3,24(sp) 10110: 00e12a23 sw a4,20(sp) 10114: 00f75e63 bge a4,a5,10130 <_start+0xdc> 10118: 00c12683 lw a3,12(sp) 1011c: 00f12a23 sw a5,20(sp) 10120: 00e12823 sw a4,16(sp) 10124: 00d75663 bge a4,a3,10130 <_start+0xdc> 10128: 00d12823 sw a3,16(sp) 1012c: 00e12623 sw a4,12(sp) 10130: 000107b7 lui a5,0x10 10134: 1d878793 addi a5,a5,472 # 101d8 <_start+0x184> 10138: 05300713 li a4,83 1013c: 400026b7 lui a3,0x40002 10140: 00e68023 sb a4,0(a3) # 40002000 <__global_pointer$+0x3fff0618> 10144: 00178793 addi a5,a5,1 10148: 0007c703 lbu a4,0(a5) 1014c: fe071ae3 bnez a4,10140 <_start+0xec> 10150: 00c12703 lw a4,12(sp) 10154: 02000793 li a5,32 10158: 00000513 li a0,0 1015c: 03070713 addi a4,a4,48 10160: 0ff77713 andi a4,a4,255 10164: 00e68023 sb a4,0(a3) 10168: 00f68023 sb a5,0(a3) 1016c: 01012703 lw a4,16(sp) 10170: 03070713 addi a4,a4,48 10174: 0ff77713 andi a4,a4,255 10178: 00e68023 sb a4,0(a3) 1017c: 00f68023 sb a5,0(a3) 10180: 01412703 lw a4,20(sp) 10184: 03070713 addi a4,a4,48 10188: 0ff77713 andi a4,a4,255 1018c: 00e68023 sb a4,0(a3) 10190: 00f68023 sb a5,0(a3) 10194: 01812703 lw a4,24(sp) 10198: 03070713 addi a4,a4,48 1019c: 0ff77713 andi a4,a4,255 101a0: 00e68023 sb a4,0(a3) 101a4: 00f68023 sb a5,0(a3) 101a8: 01c12703 lw a4,28(sp) 101ac: 03070713 addi a4,a4,48 101b0: 0ff77713 andi a4,a4,255 101b4: 00e68023 sb a4,0(a3) 101b8: 00f68023 sb a5,0(a3) 101bc: 02010113 addi sp,sp,32 101c0: 00008067 ret ``` #### Compare the assembly 1. The ELF files generated by C compiler didn't branch back. ->It separate the for-loop and let it execute continuous. 2. It's use stack to store the array temporary. ->It doesn't need to access memory so many times.