owned this note
owned this note
Published
Linked with GitHub
# Assignment2 RISC-V Toolchain
###### tags: `Computer Architecture`
## Insertion sort
The following assembly program was implemented by 施丞宥同學. You can get more detail imformation by clicking the link **[here](https://hackmd.io/PdOkIOH1Qt-EK3O_P9a8uA?view)**.
```clike=
.data
arr: .word 2, 3, 7, 4, 1
str1: .string "Sorted array = "
.text
main:
la s0, arr
addi t0, x0, 5 # initial n
addi t1, x0 ,0 # initial i
jal ra, Loopi
# Print the result to console
jal ra, print
# Exit program
li a7, 10
ecall
Loopi:
addi t1, t1, 1 # i++
slli t4, t1, 2 # get the address of data[i]
add s1, s0, t4
lw t5, 0(s1) # t5=data[i]
add t3, t5, x0 # temp=data[i]
addi t2, t1, -1 # j=i-1
blt t1, t0, Loopj # if(i<n) jump
jr ra
Loopj:
slli t4, t2, 2 # get the address of data[j]
add s1, s0, t4
lw t6, 0(s1) # t6=data[j]
blt t2, x0, Loopi # if(j<0) leave Loopj
bge t3, t6, Loopi # if(temp>=data[j]) leave Loopj
sw t6, 4(s1) # data[j+1] = data[j]
sw t3, 0(s1) # data[j] = temp
addi t2, t2, -1 # j--
j Loopj
print:
la a0, str1
li a7, 4
ecall
lw t0, 0(s0)
mv a0, t0
li a7, 1
ecall
lw t0, 4(s0)
mv a0, t0
li a7, 1
ecall
lw t0, 8(s0)
mv a0, t0
li a7, 1
ecall
lw t0, 12(s0)
mv a0, t0
li a7, 1
ecall
lw t0, 16(s0)
mv a0, t0
li a7, 1
ecall
ret
```
## Translate the assembly programs into C manually
```clike=
#define decrement -1
void insert_sort(int*, int);
void shift_element(int*, int*);
char int_to_char(int);
int _start()
{
int[] arr = {2, 3, 7, 4, 1};
int i, n = 5;
const char* str3 = "\n"
const char* str2 = "Unsorted array = ";
const char* str1 = "Sorted array = ";
volatile char *tx = (volatile char *)0x40002000;
while(*str2) {
*tx = *str2;
str2++;
}
for (i = 0; i < 5; i++)
{
*tx = int_to_char(*(arr+i));
}
while(*str3) {
*tx = *str3;
str3++;
}
while(*str1) {
*tx = *str1;
str1++;
}
insert_sort(arr, n);
for (i = 0; i < 5; i++)
{
*tx = int_to_char(*(arr+i));
}
return 0;
}
void shift_element(int* arr, int* i) {
/*An example of the function shift_element.
Assume the array is { 3, 10, 23, 7} the pointer i point to 7.
arr i
↓ ↓
3, 10, 23, 7
↑________」insert
This function shift 10 and 23 to right to make room for 7.
*/
int ival;
for (ival = *i; i > arr && *(i + decrement) > ival; i = i + decrement) {
*i = *(i + decrement);
}
*i = ival;
}
void insert_sort(int* arr, int arr_size) {
int* i, * last = arr + arr_size;
for (i = arr + 1; i < last; i++)
if (*i < *(i + decrement))
shift_element(arr, i);
}
char int_to_char(int num){
char str;
switch(num) {
case 0:
str='0';
break;
case 1:
str='1';
break;
case 2:
str='2';
break;
case 3:
str='3';
break;
case 4:
str='4';
break;
case 5:
str='5';
break;
case 6:
str='6';
break;
case 7:
str='7';
break;
case 8:
str='8';
break;
case 9:
str='9';
break;
default:
str='-';
break;
}
return str;
}
```
## Execute assembly programs on rv32emu emulator
Declaring the compile instruction with -O3 (optimized for speed), the generated assmebly code is like:






What a gigantic monster! :shocked_face_with_exploding_head::shocked_face_with_exploding_head::shocked_face_with_exploding_head:
And the result of execution is correct.

## Compare the assembly listing between handwritten and compiler optimized one
The instruction count of compiler-generated assembly code is obviously much more than the original one.
## The statistics of execution flow
After compiler-generated assmebly code was executed on emu-rv32i emulator, we can saw the summary on the terminal:

The size of ELF file:

Summary of execution:

## Optimize the generated assembly program
After compilaton, there are so many branch and jump in the <_start> section.
The multiple for loops and while loops results in this condition.
```clike=
while(*str2) {
*tx = *str2;
str2++;
}
for (i = 0; i < 5; i++)
{
*tx = int_to_char(*(arr+i));
}
while(*str3) {
*tx = *str3;
str3++;
}
while(*str1) {
*tx = *str1;
str1++;
}
insert_sort(arr, n);
for (i = 0; i < 5; i++)
{
*tx = int_to_char(*(arr+i));
}
```
How about trying to merge these loops into one loop to reduce the number of cycles?
```clike=
/*The total number of characters to print on screen is 40*/
int y = 0;
int k = 0;
for(i = 0; i < 40; i++){
if(i<15){
*tx = *str2;
str2++;
}else if (i<20){
*tx = int_to_char(*(arr+y));
y++;
}else if (i<22){
*tx = *str3;
str3++;
if(i==34){
insert_sort(arr, n);
}
}else if (i<35){
*tx = *str1;
str1++;
}else {
*tx = int_to_char(*(arr+k));
k++;
}
}
```
It looks like the execution time is even longer... :face_with_raised_eyebrow:

Branch and Jump instructions are more than the original one.
Conclusion: Using if-else statment to replace for loop or while loop is not necessary.