# Lab2: RISC-V `RV32I[MA]` emulator with ELF support
###### tags: `RISC-V`
* choose [Insertion sort](https://hackmd.io/PdOkIOH1Qt-EK3O_P9a8uA?view) assembly program from [施丞宥](https://github.com/charley871103/Lab1-RV32I-Simulator)
```cpp=
.data
arr: .word 2, 3, 7, 4, 1
str1: .string "Sorted array = "
.text
main:
la s0, arr
addi t0, x0, 5 # initial n
addi t1, x0 ,0 # initial i
jal ra, Loopi
# Print the result to console
jal ra, print
# Exit program
li a7, 10
ecall
Loopi:
addi t1, t1, 1 # i++
slli t4, t1, 2 # get the address of data[i]
add s1, s0, t4
lw t5, 0(s1) # t5=data[i]
add t3, t5, x0 # temp=data[i]
addi t2, t1, -1 # j=i-1
blt t1, t0, Loopj # if(i<n) jump
jr ra
Loopj:
slli t4, t2, 2 # get the address of data[j]
add s1, s0, t4
lw t6, 0(s1) # t6=data[j]
blt t2, x0, Loopi # if(j<0) leave Loopj
bge t3, t6, Loopi # if(temp>=data[j]) leave Loopj
sw t6, 4(s1) # data[j+1] = data[j]
sw t3, 0(s1) # data[j] = temp
addi t2, t2, -1 # j--
j Loopj
print:
la a0, str1
li a7, 4
ecall
lw t0, 0(s0)
mv a0, t0
li a7, 1
ecall
lw t0, 4(s0)
mv a0, t0
li a7, 1
ecall
lw t0, 8(s0)
mv a0, t0
li a7, 1
ecall
lw t0, 12(s0)
mv a0, t0
li a7, 1
ecall
lw t0, 16(s0)
mv a0, t0
li a7, 1
ecall
ret
```
* **Insertion sort** C program by 施丞宥
```cpp=
#include <stdio.h>
void main(void)
{
int data[5]={2,3,7,4,1};
int n=5;
int i, j, temp;
for (i = 1; i < n; i++)
{
temp = data[i];
for (j = i - 1; j >= 0 && data[j] > temp; j--)
{
data[j+1] = data[j];
}
data[j+1] = temp;
}
printf("Sorted array = ");
for(i=0; i<n; i++)
{
printf("%d ",data[i]);
}
printf("\n");
}
```
## Rewrite assembly to C
* **Insertion sort** by myself
```cpp=
/*
riscv64-unknown-elf-gcc -march=rv32i -mabi=ilp32 -O3 -nostdlib test1.c -o test1
*/
#include<stdio.h>
int _start()
{
int data[5] = {2,3,7,4,1};
int n = 5;
int i, j, temp;
for (i = 1; i < n; i++)
{
temp = data[i];
for (j = i - 1; j >= 0; j--)
{
if(temp < data[j])
data[j+1] = data[j];
else
break;
}
data[j+1] = temp;
}
volatile char *tx = (volatile char *)0x40002000;
const char *result = "Sorted array: ";
while (*result)
{
*tx = *result;
result++;
}
for (int i = 0; i < 5; i++)
{
*tx = (char)((data[i]+'0') & 0x000000ff);
*tx = ' ';
}
return 0;
}
```
objdump a assembly program based on C as above
```shell
$ riscv-none-embed-objdump -d test1
```
**compile in O3(optimize speed)**
```cpp=
Disassembly of section .text:
00010074 <_start>:
10074: 000117b7 lui a5,0x11
10078: 23478793 addi a5,a5,564 # 11234 <__DATA_BEGIN__>
1007c: 0047a683 lw a3,4(a5)
10080: 0007a703 lw a4,0(a5)
10084: 14e6d863 bge a3,a4,101d4 <_start+0x160>
10088: 00e7a223 sw a4,4(a5)
1008c: 00000713 li a4,0
10090: 00271713 slli a4,a4,0x2
10094: 00e78733 add a4,a5,a4
10098: 00d72023 sw a3,0(a4)
1009c: 0087a683 lw a3,8(a5)
100a0: 0047a703 lw a4,4(a5)
100a4: 12e6dc63 bge a3,a4,101dc <_start+0x168>
100a8: 0007a603 lw a2,0(a5)
100ac: 00e7a423 sw a4,8(a5)
100b0: 14c6da63 bge a3,a2,10204 <_start+0x190>
100b4: 00c7a223 sw a2,4(a5)
100b8: 00000713 li a4,0
100bc: 00271713 slli a4,a4,0x2
100c0: 00e78733 add a4,a5,a4
100c4: 00d72023 sw a3,0(a4)
100c8: 00c7a683 lw a3,12(a5)
100cc: 0087a703 lw a4,8(a5)
100d0: 10e6da63 bge a3,a4,101e4 <_start+0x170>
100d4: 0047a603 lw a2,4(a5)
100d8: 00e7a623 sw a4,12(a5)
100dc: 10c6dc63 bge a3,a2,101f4 <_start+0x180>
100e0: 0007a703 lw a4,0(a5)
100e4: 00c7a423 sw a2,8(a5)
100e8: 12e6d263 bge a3,a4,1020c <_start+0x198>
100ec: 00e7a223 sw a4,4(a5)
100f0: 00000713 li a4,0
100f4: 00271713 slli a4,a4,0x2
100f8: 00e78733 add a4,a5,a4
100fc: 00d72023 sw a3,0(a4)
10100: 0107a683 lw a3,16(a5)
10104: 00c7a703 lw a4,12(a5)
10108: 0ee6d263 bge a3,a4,101ec <_start+0x178>
1010c: 0087a603 lw a2,8(a5)
10110: 00e7a823 sw a4,16(a5)
10114: 0ec6d463 bge a3,a2,101fc <_start+0x188>
10118: 0047a703 lw a4,4(a5)
1011c: 00c7a623 sw a2,12(a5)
10120: 0ee6da63 bge a3,a4,10214 <_start+0x1a0>
10124: 0007a603 lw a2,0(a5)
10128: 00e7a423 sw a4,8(a5)
1012c: 0ec6d863 bge a3,a2,1021c <_start+0x1a8>
10130: 00c7a223 sw a2,4(a5)
10134: 00000713 li a4,0
10138: 00271713 slli a4,a4,0x2
1013c: 00e78733 add a4,a5,a4
10140: 00d72023 sw a3,0(a4)
10144: 00010737 lui a4,0x10
10148: 22470713 addi a4,a4,548 # 10224 <_start+0x1b0>
1014c: 05300693 li a3,83
10150: 40002637 lui a2,0x40002
10154: 00d60023 sb a3,0(a2) # 40002000 <__global_pointer$+0x3fff05cc>
10158: 00170713 addi a4,a4,1
1015c: 00074683 lbu a3,0(a4)
10160: fe069ae3 bnez a3,10154 <_start+0xe0>
10164: 0007a703 lw a4,0(a5)
10168: 0047a503 lw a0,4(a5)
1016c: 0087a583 lw a1,8(a5)
10170: 03070713 addi a4,a4,48
10174: 0ff77713 andi a4,a4,255
10178: 00e60023 sb a4,0(a2)
1017c: 03050513 addi a0,a0,48
10180: 02000713 li a4,32
10184: 00e60023 sb a4,0(a2)
10188: 0ff57513 andi a0,a0,255
1018c: 00c7a683 lw a3,12(a5)
10190: 00a60023 sb a0,0(a2)
10194: 00e60023 sb a4,0(a2)
10198: 03058593 addi a1,a1,48
1019c: 0107a783 lw a5,16(a5)
101a0: 0ff5f593 andi a1,a1,255
101a4: 00b60023 sb a1,0(a2)
101a8: 03068693 addi a3,a3,48
101ac: 00e60023 sb a4,0(a2)
101b0: 0ff6f693 andi a3,a3,255
101b4: 00d60023 sb a3,0(a2)
101b8: 03078793 addi a5,a5,48
101bc: 00e60023 sb a4,0(a2)
101c0: 0ff7f793 andi a5,a5,255
101c4: 00f60023 sb a5,0(a2)
101c8: 00e60023 sb a4,0(a2)
101cc: 00000513 li a0,0
101d0: 00008067 ret
101d4: 00100713 li a4,1
101d8: eb9ff06f j 10090 <_start+0x1c>
101dc: 00200713 li a4,2
101e0: eddff06f j 100bc <_start+0x48>
101e4: 00300713 li a4,3
101e8: f0dff06f j 100f4 <_start+0x80>
101ec: 00400713 li a4,4
101f0: f49ff06f j 10138 <_start+0xc4>
101f4: 00200713 li a4,2
101f8: efdff06f j 100f4 <_start+0x80>
101fc: 00300713 li a4,3
10200: f39ff06f j 10138 <_start+0xc4>
10204: 00100713 li a4,1
10208: eb5ff06f j 100bc <_start+0x48>
1020c: 00100713 li a4,1
10210: ee5ff06f j 100f4 <_start+0x80>
10214: 00200713 li a4,2
10218: f21ff06f j 10138 <_start+0xc4>
1021c: 00100713 li a4,1
10220: f19ff06f j 10138 <_start+0xc4>
```
its make check result
```
./emu-rv32i test1
Sorted array: 1 2 3 4 7
>>> Execution time: 279953 ns
>>> Instruction count: 135 (IPS=482223)
>>> Jumps: 20 (14.81%) - 3 forwards, 17 backwards
>>> Branching T=16 (72.73%) F=6 (27.27%)
```
**compile in O3(optimize speed)**
```
Disassembly of section .text:
00010074 <_start>:
10074: 000117b7 lui a5,0x11
10078: 13078693 addi a3,a5,304 # 11130 <__DATA_BEGIN__>
1007c: 00468693 addi a3,a3,4
10080: 00000613 li a2,0
10084: 13078793 addi a5,a5,304
10088: fff00313 li t1,-1
1008c: 00400893 li a7,4
10090: 0006a503 lw a0,0(a3)
10094: 00060713 mv a4,a2
10098: 00068593 mv a1,a3
1009c: ffc5a803 lw a6,-4(a1)
100a0: 01055a63 bge a0,a6,100b4 <_start+0x40>
100a4: 0105a023 sw a6,0(a1)
100a8: fff70713 addi a4,a4,-1
100ac: ffc58593 addi a1,a1,-4
100b0: fe6716e3 bne a4,t1,1009c <_start+0x28>
100b4: 00170713 addi a4,a4,1
100b8: 00271713 slli a4,a4,0x2
100bc: 00e78733 add a4,a5,a4
100c0: 00a72023 sw a0,0(a4)
100c4: 00160613 addi a2,a2,1
100c8: 00468693 addi a3,a3,4
100cc: fd1612e3 bne a2,a7,10090 <_start+0x1c>
100d0: 00010737 lui a4,0x10
100d4: 12070713 addi a4,a4,288 # 10120 <_start+0xac>
100d8: 40002637 lui a2,0x40002
100dc: 00074683 lbu a3,0(a4)
100e0: 02069a63 bnez a3,10114 <_start+0xa0>
100e4: 01478613 addi a2,a5,20
100e8: 400026b7 lui a3,0x40002
100ec: 02000593 li a1,32
100f0: 0007a703 lw a4,0(a5)
100f4: 00478793 addi a5,a5,4
100f8: 03070713 addi a4,a4,48
100fc: 0ff77713 andi a4,a4,255
10100: 00e68023 sb a4,0(a3) # 40002000 <__global_pointer$+0x3fff06d0>
10104: 00b68023 sb a1,0(a3)
10108: fec794e3 bne a5,a2,100f0 <_start+0x7c>
1010c: 00000513 li a0,0
10110: 00008067 ret
10114: 00d60023 sb a3,0(a2) # 40002000 <__global_pointer$+0x3fff06d0>
10118: 00170713 addi a4,a4,1
1011c: fc1ff06f j 100dc <_start+0x68>
```
its make check result
```
./emu-rv32i test1
Sorted array: 1 2 3 4 7
>>> Execution time: 382698 ns
>>> Instruction count: 199 (IPS=519992)
>>> Jumps: 43 (21.61%) - 17 forwards, 26 backwards
>>> Branching T=28 (75.68%) F=9 (24.32%)
```
### Compare between O3 and Os
In O3, instruction is used less than Os.
and in Os, Large numbers of jump and beq are used, I think it's because of working on the memory very often.
and ELF file
```shell
$ riscv-none-embed-readelf -h test1
```
Output:
```cpp=
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x10054
Start of program headers: 52 (bytes into file)
Start of section headers: 1012 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 1
Size of section headers: 40 (bytes)
Number of section headers: 7
Section header string table index: 6
```
size
```shell
$ riscv-none-embed-size test1
```
Output:
```
text data bss dec hex filename
524 0 0 524 20c test1
```