# Assignment2: GNU Toolchain
Contributed by [JengDeChang](https://github.com/00853029/Computer_Architecture/tree/main/hw2)
### Question Selection
#### Question
I chose the question from [林晉宇-Image scaling with Bilinear interpolation by float32 multiplication](https://hackmd.io/@linyu0425/SJHkb8lWT)
#### Motivation
The reason I want to work on this classmate's project is because it has a lot in common with my first assignment, and I want to learn from the ideas of other classmates through this assignment.
#### perfcounter/main.c
- Place the main code between `get_cycles()` to calculate the number of cycles.
```c
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#define IN_N 2
#define OUT_N 5
extern uint64_t get_cycles();
extern uint64_t get_instret();
/*
* Taken from the Sparkle-suite which is a collection of lightweight symmetric
* cryptographic algorithms currently in the final round of the NIST
* standardization effort.
* See https://sparkle-lwc.github.io/
*/
//extern void sparkle_asm(unsigned int *state, unsigned int ns);
#define WORDS 12
#define ROUNDS 7
uint32_t count_leading_zeros(uint32_t x) {
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
/* count ones (population count) */
x -= ((x >> 1) & 0x55555555);
x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
x = ((x >> 4) + x) & 0x0f0f0f0f;
x += (x >> 8);
x += (x >> 16);
return (32 - (x & 0x7f));
}
int32_t getbit(int32_t value, int n)
{
return (value >> n) & 1;
}
/* int32 multiply */
int32_t imul32(int32_t a, int32_t b)
{
int32_t r = 0;
while(1) {
if((b & 1) != 0) {
r = r + a;
}
b = b >> 1;
if(b == 0x0) break;
r = r >> 1;
}
return r;
}
/* float32 multiply */
float fmul32(float a, float b)
{
/* TODO: Special values like NaN and INF */
int32_t ia = *(int32_t *) &a, ib = *(int32_t *) &b;
/* sign */
int sa = ia >> 31;
int sb = ib >> 31;
/* mantissa */
int32_t ma = (ia & 0x7FFFFF) | 0x800000;
int32_t mb = (ib & 0x7FFFFF) | 0x800000;
/* exponent */
int32_t ea = ((ia >> 23) & 0xFF);
int32_t eb = ((ib >> 23) & 0xFF);
/* 'r' = result */
int32_t mrtmp = imul32(ma, mb);
int mshift = getbit(mrtmp, 24);
int32_t mr = mrtmp >> mshift;
int32_t ertmp = ea + eb - 127;
int32_t er;
if(mshift) er = ertmp + 1;
else er = ertmp;
int sr = sa ^ sb;
int32_t r = (sr << 31) | ((er & 0xFF) << 23) | (mr & 0x7FFFFF);
return *(float *) &r;
}
float fadd32(float a, float b) {
//printf("%f , %f ",a,b);
int32_t ia = *(int32_t *)&a, ib = *(int32_t *)&b;
int32_t temp;
if ((ia & 0x7fffffff) < (ib & 0x7fffffff)){
temp = ia;
ia = ib;
ib = temp;
}
/* sign */
int sa = ia >> 31;
int sb = ib >> 31;
/* mantissa */
int32_t ma = ia & 0x7fffff | 0x800000;
int32_t mb = ib & 0x7fffff | 0x800000;
/* exponent */
int32_t ea = (ia >> 23) & 0xff;
int32_t eb = (ib >> 23) & 0xff;
int32_t align = (ea - eb > 24) ? 24 : (ea - eb);
mb >>= align;
if (sa | sb) ma -= mb;
else ma += mb;
int32_t clz = count_leading_zeros(ma);
int32_t shift = 0;
if (clz <= 8) {
shift = 8 - clz;
ma >>= shift;
ea += shift;
}
else {
shift = clz - 8;
ma <<= shift;
ea -= shift;
}
int32_t r = ia & 0x80000000 | ea << 23 | ma & 0x7fffff;
return *(float *) &r;
}
int main(void)
{
unsigned int state[WORDS] = {0};
/* measure cycles */
uint64_t instret = get_instret();
uint64_t oldcount = get_cycles();
//--------------------------------------------------------------
float im_2[2][2] = {{0.95478,0.64721},
{0.823257,0.22245}};
float im_5[5][5] = {
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0}
};
im_5[0][0] = im_2[0][0];
im_5[0][OUT_N-1] = im_2[0][IN_N-1];
im_5[OUT_N-1][0] = im_2[IN_N-1][0];
im_5[OUT_N-1][OUT_N-1] = im_2[IN_N-1][IN_N-1];
for(int i=1;i<4;i++){
im_5[0][i] = fadd32 (fmul32(im_5[0][0] , (float)(OUT_N - 1 - i) / (float)(OUT_N - 1)) , fmul32(im_5[0][OUT_N-1] , (float)(i) / (float)(OUT_N-1)));
im_5[OUT_N-1][i] = fadd32 (fmul32(im_5[OUT_N-1][0] , (float)(OUT_N - 1 - i) /(float) (OUT_N-1)) , fmul32(im_5[OUT_N-1][OUT_N-1] , (float)(i) / (float)(OUT_N-1)));
}
for(int i=1;i<OUT_N-1;i++){
for(int j=0;j<OUT_N;j++){
im_5[i][j] = fadd32 (fmul32(im_5[0][j] , (float)(OUT_N - 1 - i) / (float)(OUT_N - 1)) , fmul32(im_5[OUT_N-1][j] , (float)(i) / (float)(OUT_N - 1)));
}
}
for(int i=0;i<OUT_N;i++){
for(int j=0;j<OUT_N;j++){
printf("%f ",im_5[i][j]);
}
printf("\n");
}
//----------------------------------------------------------------------
uint64_t cyclecount = get_cycles() - oldcount;
printf("cycle count: %u\n", (unsigned int) cyclecount);
printf("instret: %x\n", (unsigned) (instret & 0xffffffff));
memset(state, 0, WORDS * sizeof(uint32_t));
return 0;
}
```
---
## Compile and Execute
### -O1 Optimization
#### Assembly code
:::spoiler main function:
```c
000103b0 <main>:
103b0: f4010113 add sp,sp,-192
103b4: 0a112e23 sw ra,188(sp)
103b8: 0a812c23 sw s0,184(sp)
103bc: 0a912a23 sw s1,180(sp)
103c0: 0b212823 sw s2,176(sp)
103c4: 0b312623 sw s3,172(sp)
103c8: 0b412423 sw s4,168(sp)
103cc: 0b512223 sw s5,164(sp)
103d0: 0b612023 sw s6,160(sp)
103d4: 09712e23 sw s7,156(sp)
103d8: 09812c23 sw s8,152(sp)
103dc: 09912a23 sw s9,148(sp)
103e0: 09a12823 sw s10,144(sp)
103e4: 09b12623 sw s11,140(sp)
103e8: d75ff0ef jal 1015c <get_instret>
103ec: 00a12623 sw a0,12(sp)
103f0: d59ff0ef jal 10148 <get_cycles>
103f4: 00050d93 mv s11,a0
103f8: 05c00613 li a2,92
103fc: 00000593 li a1,0
10400: 02010513 add a0,sp,32
10404: 5a7000ef jal 111aa <memset>
10408: f301a783 lw a5,-208(gp) # 1e958 <__SDATA_BEGIN__+0x68>
1040c: 00f12e23 sw a5,28(sp)
10410: f341a783 lw a5,-204(gp) # 1e95c <__SDATA_BEGIN__+0x6c>
10414: 02f12623 sw a5,44(sp)
10418: f381a783 lw a5,-200(gp) # 1e960 <__SDATA_BEGIN__+0x70>
1041c: 06f12623 sw a5,108(sp)
10420: f3c1a783 lw a5,-196(gp) # 1e964 <__SDATA_BEGIN__+0x74>
10424: 06f12e23 sw a5,124(sp)
10428: 01c10413 add s0,sp,28
1042c: 00040a13 mv s4,s0
10430: 00100493 li s1,1
10434: 00400b13 li s6,4
10438: f401ad03 lw s10,-192(gp) # 1e968 <__SDATA_BEGIN__+0x78>
1043c: 000d0c93 mv s9,s10
10440: f341ac03 lw s8,-204(gp) # 1e95c <__SDATA_BEGIN__+0x6c>
10444: f3c1ab83 lw s7,-196(gp) # 1e964 <__SDATA_BEGIN__+0x74>
10448: 409b0533 sub a0,s6,s1
1044c: 49c000ef jal 108e8 <__floatsisf>
10450: 000d0593 mv a1,s10
10454: 1b0000ef jal 10604 <__mulsf3>
10458: 00050993 mv s3,a0
1045c: 00050593 mv a1,a0
10460: 01c12503 lw a0,28(sp)
10464: dcdff0ef jal 10230 <fmul32>
10468: 00050a93 mv s5,a0
1046c: 00048513 mv a0,s1
10470: 478000ef jal 108e8 <__floatsisf>
10474: 000c8593 mv a1,s9
10478: 18c000ef jal 10604 <__mulsf3>
1047c: 00050913 mv s2,a0
10480: 00050593 mv a1,a0
10484: 000c0513 mv a0,s8
10488: da9ff0ef jal 10230 <fmul32>
1048c: 00050593 mv a1,a0
10490: 000a8513 mv a0,s5
10494: e35ff0ef jal 102c8 <fadd32>
10498: 00aa2223 sw a0,4(s4)
1049c: 00098593 mv a1,s3
104a0: 06c12503 lw a0,108(sp)
104a4: d8dff0ef jal 10230 <fmul32>
104a8: 00050993 mv s3,a0
104ac: 00090593 mv a1,s2
104b0: 000b8513 mv a0,s7
104b4: d7dff0ef jal 10230 <fmul32>
104b8: 00050593 mv a1,a0
104bc: 00098513 mv a0,s3
104c0: e09ff0ef jal 102c8 <fadd32>
104c4: 04aa2a23 sw a0,84(s4)
104c8: 00148493 add s1,s1,1
104cc: 004a0a13 add s4,s4,4
104d0: f7649ce3 bne s1,s6,10448 <main+0x98>
104d4: 01400a93 li s5,20
104d8: 00100993 li s3,1
104dc: 00400c13 li s8,4
104e0: f401ac83 lw s9,-192(gp) # 1e968 <__SDATA_BEGIN__+0x78>
104e4: 01440b93 add s7,s0,20
104e8: 000c8b13 mv s6,s9
104ec: 413c0533 sub a0,s8,s3
104f0: 3f8000ef jal 108e8 <__floatsisf>
104f4: 000c8593 mv a1,s9
104f8: 10c000ef jal 10604 <__mulsf3>
104fc: 00050a13 mv s4,a0
10500: 00040493 mv s1,s0
10504: 000a0593 mv a1,s4
10508: 0004a503 lw a0,0(s1)
1050c: d25ff0ef jal 10230 <fmul32>
10510: 00050913 mv s2,a0
10514: 00098513 mv a0,s3
10518: 3d0000ef jal 108e8 <__floatsisf>
1051c: 000b0593 mv a1,s6
10520: 0e4000ef jal 10604 <__mulsf3>
10524: 00050593 mv a1,a0
10528: 0504a503 lw a0,80(s1)
1052c: d05ff0ef jal 10230 <fmul32>
10530: 00050593 mv a1,a0
10534: 00090513 mv a0,s2
10538: d91ff0ef jal 102c8 <fadd32>
1053c: 015487b3 add a5,s1,s5
10540: 00a7a023 sw a0,0(a5) # 80000000 <__BSS_END__+0x7ffe1084>
10544: 00448493 add s1,s1,4
10548: fb749ee3 bne s1,s7,10504 <main+0x154>
1054c: 00198993 add s3,s3,1
10550: 014a8a93 add s5,s5,20
10554: f9899ce3 bne s3,s8,104ec <main+0x13c>
10558: 06440a93 add s5,s0,100
1055c: 0001ca37 lui s4,0x1c
10560: 00500993 li s3,5
10564: 00040913 mv s2,s0
10568: 00000493 li s1,0
1056c: 00092503 lw a0,0(s2)
10570: 43a000ef jal 109aa <__extendsfdf2>
10574: 00050613 mv a2,a0
10578: 00058693 mv a3,a1
1057c: 430a0513 add a0,s4,1072 # 1c430 <__trunctfdf2+0x2ae>
10580: 11b000ef jal 10e9a <printf>
10584: 00148493 add s1,s1,1
10588: 00490913 add s2,s2,4
1058c: ff3490e3 bne s1,s3,1056c <main+0x1bc>
10590: 00a00513 li a0,10
10594: 139000ef jal 10ecc <putchar>
10598: 01440413 add s0,s0,20
1059c: fc8a94e3 bne s5,s0,10564 <main+0x1b4>
105a0: ba9ff0ef jal 10148 <get_cycles>
105a4: 41b505b3 sub a1,a0,s11
105a8: 0001c537 lui a0,0x1c
105ac: 43450513 add a0,a0,1076 # 1c434 <__trunctfdf2+0x2b2>
105b0: 0eb000ef jal 10e9a <printf>
105b4: 00c12583 lw a1,12(sp)
105b8: 0001c537 lui a0,0x1c
105bc: 44850513 add a0,a0,1096 # 1c448 <__trunctfdf2+0x2c6>
105c0: 0db000ef jal 10e9a <printf>
105c4: 00000513 li a0,0
105c8: 0bc12083 lw ra,188(sp)
105cc: 0b812403 lw s0,184(sp)
105d0: 0b412483 lw s1,180(sp)
105d4: 0b012903 lw s2,176(sp)
105d8: 0ac12983 lw s3,172(sp)
105dc: 0a812a03 lw s4,168(sp)
105e0: 0a412a83 lw s5,164(sp)
105e4: 0a012b03 lw s6,160(sp)
105e8: 09c12b83 lw s7,156(sp)
105ec: 09812c03 lw s8,152(sp)
105f0: 09412c83 lw s9,148(sp)
105f4: 09012d03 lw s10,144(sp)
105f8: 08c12d83 lw s11,140(sp)
105fc: 0c010113 add sp,sp,192
10600: 00008067 ret
```
:::
:::spoiler fmul function:
```c
00010230 <fmul32>:
10230: ff010113 add sp,sp,-16
10234: 00112623 sw ra,12(sp)
10238: 00812423 sw s0,8(sp)
1023c: 00912223 sw s1,4(sp)
10240: 00050493 mv s1,a0
10244: 00058413 mv s0,a1
10248: 00800537 lui a0,0x800
1024c: fff50793 add a5,a0,-1 # 7fffff <__BSS_END__+0x7e1083>
10250: 00b7f5b3 and a1,a5,a1
10254: 0097f7b3 and a5,a5,s1
10258: 00a5e5b3 or a1,a1,a0
1025c: 00a7e533 or a0,a5,a0
10260: fa5ff0ef jal 10204 <imul32>
10264: 41855793 sra a5,a0,0x18
10268: 0017f793 and a5,a5,1
1026c: 40f55733 sra a4,a0,a5
10270: 4174d613 sra a2,s1,0x17
10274: 0ff67613 zext.b a2,a2
10278: 41745693 sra a3,s0,0x17
1027c: 0ff6f693 zext.b a3,a3
10280: 00c787b3 add a5,a5,a2
10284: 00d787b3 add a5,a5,a3
10288: f8178793 add a5,a5,-127
1028c: 00944533 xor a0,s0,s1
10290: 800006b7 lui a3,0x80000
10294: 00d57533 and a0,a0,a3
10298: 00971713 sll a4,a4,0x9
1029c: 00975713 srl a4,a4,0x9
102a0: 00e56533 or a0,a0,a4
102a4: 01779793 sll a5,a5,0x17
102a8: 7f800737 lui a4,0x7f800
102ac: 00e7f7b3 and a5,a5,a4
102b0: 00f56533 or a0,a0,a5
102b4: 00c12083 lw ra,12(sp)
102b8: 00812403 lw s0,8(sp)
102bc: 00412483 lw s1,4(sp)
102c0: 01010113 add sp,sp,16
102c4: 00008067 ret
```
:::
:::spoiler fadd32 :
```c
000102c8 <fadd32>:
102c8: ff010113 add sp,sp,-16
102cc: 00112623 sw ra,12(sp)
102d0: 00812423 sw s0,8(sp)
102d4: 00912223 sw s1,4(sp)
102d8: 01212023 sw s2,0(sp)
102dc: 00050693 mv a3,a0
102e0: 00058913 mv s2,a1
102e4: 800007b7 lui a5,0x80000
102e8: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe1083>
102ec: 00a7f733 and a4,a5,a0
102f0: 00b7f7b3 and a5,a5,a1
102f4: 00f74663 blt a4,a5,10300 <fadd32+0x38>
102f8: 00050913 mv s2,a0
102fc: 00058693 mv a3,a1
10300: 00800737 lui a4,0x800
10304: fff70793 add a5,a4,-1 # 7fffff <__BSS_END__+0x7e1083>
10308: 00f97633 and a2,s2,a5
1030c: 00e66633 or a2,a2,a4
10310: 00f6f7b3 and a5,a3,a5
10314: 00e7e7b3 or a5,a5,a4
10318: 41795493 sra s1,s2,0x17
1031c: 0ff4f493 zext.b s1,s1
10320: 4176d713 sra a4,a3,0x17
10324: 0ff77713 zext.b a4,a4
10328: 40e48733 sub a4,s1,a4
1032c: 01800593 li a1,24
10330: 00e5d463 bge a1,a4,10338 <fadd32+0x70>
10334: 01800713 li a4,24
10338: 40e7d7b3 sra a5,a5,a4
1033c: 00d966b3 or a3,s2,a3
10340: 00f60433 add s0,a2,a5
10344: 0406ca63 bltz a3,10398 <fadd32+0xd0>
10348: 00040513 mv a0,s0
1034c: e25ff0ef jal 10170 <count_leading_zeros>
10350: 00800793 li a5,8
10354: 04a7c663 blt a5,a0,103a0 <fadd32+0xd8>
10358: 40a787b3 sub a5,a5,a0
1035c: 40f45433 sra s0,s0,a5
10360: 00f484b3 add s1,s1,a5
10364: 00941413 sll s0,s0,0x9
10368: 00945413 srl s0,s0,0x9
1036c: 01749493 sll s1,s1,0x17
10370: 00946433 or s0,s0,s1
10374: 800007b7 lui a5,0x80000
10378: 00f97533 and a0,s2,a5
1037c: 00a46533 or a0,s0,a0
10380: 00c12083 lw ra,12(sp)
10384: 00812403 lw s0,8(sp)
10388: 00412483 lw s1,4(sp)
1038c: 00012903 lw s2,0(sp)
10390: 01010113 add sp,sp,16
10394: 00008067 ret
10398: 40f60433 sub s0,a2,a5
1039c: fadff06f j 10348 <fadd32+0x80>
103a0: ff850513 add a0,a0,-8
103a4: 00a41433 sll s0,s0,a0
103a8: 40a484b3 sub s1,s1,a0
103ac: fb9ff06f j 10364 <fadd32+0x9c>
```
:::
#### ELF size
```
text data bss dec hex filename
53628 1896 1528 57052 dedc perfcount.elf
```
#### ELF header
```
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x100c4
Start of program headers: 52 (bytes into file)
Start of section headers: 70512 (bytes into file)
Flags: 0x1, RVC, soft-float ABI
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
```
#### Execute
```clike
0.954780 0.877887 0.800995 0.724102 0.647210
0.921899 0.826679 0.731460 0.636240 0.541020
0.889018 0.775471 0.661924 0.548377 0.434830
0.856138 0.724263 0.592389 0.460514 0.328640
0.823257 0.673055 0.522853 0.372652 0.222450
cycle count: 153162
instret: 2de
inferior exit code 0
```
<s>
:::danger
Avoid using screenshots that solely contain plain text. Here are the reasons why:
1. Text-based content is more efficiently searchable than having to browse through images iteratively.
2. The rendering engine of HackMD can consistently generate well-structured layouts with annotated text instead of relying on arbitrary pictures.
3. It provides a more accessible and user-friendly experience for individuals with visual impairments.
:notes: jserv
:::
</s>
### -O2 Optimized Assembly Code
#### Assembly code
:::spoiler main function:
```c
000100b0 <main>:
100b0: f4010113 add sp,sp,-192
100b4: 0a112e23 sw ra,188(sp)
100b8: 0a812c23 sw s0,184(sp)
100bc: 0a912a23 sw s1,180(sp)
100c0: 0b412423 sw s4,168(sp)
100c4: 0b512223 sw s5,164(sp)
100c8: 0b612023 sw s6,160(sp)
100cc: 09712e23 sw s7,156(sp)
100d0: 09812c23 sw s8,152(sp)
100d4: 09912a23 sw s9,148(sp)
100d8: 09a12823 sw s10,144(sp)
100dc: 0b212823 sw s2,176(sp)
100e0: 0b312623 sw s3,172(sp)
100e4: 09b12623 sw s11,140(sp)
100e8: 2c4000ef jal 103ac <get_instret>
100ec: 00a12423 sw a0,8(sp)
100f0: 2a8000ef jal 10398 <get_cycles>
100f4: 00050b13 mv s6,a0
100f8: 05c00613 li a2,92
100fc: 00000593 li a1,0
10100: 02010513 add a0,sp,32
10104: 01612623 sw s6,12(sp)
10108: 08e010ef jal 11196 <memset>
1010c: f301ad03 lw s10,-208(gp) # 1e940 <__SDATA_BEGIN__+0x68>
10110: f341ac83 lw s9,-204(gp) # 1e944 <__SDATA_BEGIN__+0x6c>
10114: f381ac03 lw s8,-200(gp) # 1e948 <__SDATA_BEGIN__+0x70>
10118: f3c1ab83 lw s7,-196(gp) # 1e94c <__SDATA_BEGIN__+0x74>
1011c: f401aa03 lw s4,-192(gp) # 1e950 <__SDATA_BEGIN__+0x78>
10120: 01c10493 add s1,sp,28
10124: 01a12e23 sw s10,28(sp)
10128: 03912623 sw s9,44(sp)
1012c: 07812623 sw s8,108(sp)
10130: 07712e23 sw s7,124(sp)
10134: 00048b13 mv s6,s1
10138: 00100413 li s0,1
1013c: 00400a93 li s5,4
10140: 408a8533 sub a0,s5,s0
10144: 790000ef jal 108d4 <__floatsisf>
10148: 000a0593 mv a1,s4
1014c: 4a4000ef jal 105f0 <__mulsf3>
10150: 00050593 mv a1,a0
10154: 00050d93 mv s11,a0
10158: 000d0513 mv a0,s10
1015c: 320000ef jal 1047c <fmul32>
10160: 00050993 mv s3,a0
10164: 00040513 mv a0,s0
10168: 76c000ef jal 108d4 <__floatsisf>
1016c: 000a0593 mv a1,s4
10170: 480000ef jal 105f0 <__mulsf3>
10174: 00050913 mv s2,a0
10178: 00050593 mv a1,a0
1017c: 000c8513 mv a0,s9
10180: 2fc000ef jal 1047c <fmul32>
10184: 00050593 mv a1,a0
10188: 00098513 mv a0,s3
1018c: 37c000ef jal 10508 <fadd32>
10190: 00ab2223 sw a0,4(s6)
10194: 000d8593 mv a1,s11
10198: 000c0513 mv a0,s8
1019c: 2e0000ef jal 1047c <fmul32>
101a0: 00050793 mv a5,a0
101a4: 00090593 mv a1,s2
101a8: 000b8513 mv a0,s7
101ac: 00078913 mv s2,a5
101b0: 2cc000ef jal 1047c <fmul32>
101b4: 00050593 mv a1,a0
101b8: 00090513 mv a0,s2
101bc: 34c000ef jal 10508 <fadd32>
101c0: 04ab2a23 sw a0,84(s6)
101c4: 00140413 add s0,s0,1
101c8: 004b0b13 add s6,s6,4
101cc: f7541ae3 bne s0,s5,10140 <main+0x90>
101d0: f401ac83 lw s9,-192(gp) # 1e950 <__SDATA_BEGIN__+0x78>
101d4: 01400c13 li s8,20
101d8: 00100a13 li s4,1
101dc: 01448993 add s3,s1,20
101e0: 00400d13 li s10,4
101e4: 414d0533 sub a0,s10,s4
101e8: 6ec000ef jal 108d4 <__floatsisf>
101ec: 000c8593 mv a1,s9
101f0: 400000ef jal 105f0 <__mulsf3>
101f4: 00050b93 mv s7,a0
101f8: 00048413 mv s0,s1
101fc: 00042503 lw a0,0(s0)
10200: 000b8593 mv a1,s7
10204: 278000ef jal 1047c <fmul32>
10208: 00050913 mv s2,a0
1020c: 000a0513 mv a0,s4
10210: 6c4000ef jal 108d4 <__floatsisf>
10214: 000c8593 mv a1,s9
10218: 3d8000ef jal 105f0 <__mulsf3>
1021c: 00050593 mv a1,a0
10220: 05042503 lw a0,80(s0)
10224: 258000ef jal 1047c <fmul32>
10228: 00050593 mv a1,a0
1022c: 00090513 mv a0,s2
10230: 2d8000ef jal 10508 <fadd32>
10234: 018407b3 add a5,s0,s8
10238: 00a7a023 sw a0,0(a5)
1023c: 00440413 add s0,s0,4
10240: fb341ee3 bne s0,s3,101fc <main+0x14c>
10244: 001a0a13 add s4,s4,1
10248: 014c0c13 add s8,s8,20
1024c: f9aa1ce3 bne s4,s10,101e4 <main+0x134>
10250: 06448b93 add s7,s1,100
10254: 0001ca37 lui s4,0x1c
10258: 00500913 li s2,5
1025c: 00000413 li s0,0
10260: 0004a503 lw a0,0(s1)
10264: 00140413 add s0,s0,1
10268: 00448493 add s1,s1,4
1026c: 72a000ef jal 10996 <__extendsfdf2>
10270: 00050613 mv a2,a0
10274: 00058693 mv a3,a1
10278: 418a0513 add a0,s4,1048 # 1c418 <__trunctfdf2+0x2aa>
1027c: 40b000ef jal 10e86 <printf>
10280: ff2410e3 bne s0,s2,10260 <main+0x1b0>
10284: 00a00513 li a0,10
10288: 431000ef jal 10eb8 <putchar>
1028c: 00098493 mv s1,s3
10290: 013b8663 beq s7,s3,1029c <main+0x1ec>
10294: 01498993 add s3,s3,20
10298: fc5ff06f j 1025c <main+0x1ac>
1029c: 0fc000ef jal 10398 <get_cycles>
102a0: 00c12783 lw a5,12(sp)
102a4: 40f505b3 sub a1,a0,a5
102a8: 0001c537 lui a0,0x1c
102ac: 41c50513 add a0,a0,1052 # 1c41c <__trunctfdf2+0x2ae>
102b0: 3d7000ef jal 10e86 <printf>
102b4: 00812583 lw a1,8(sp)
102b8: 0001c537 lui a0,0x1c
102bc: 43050513 add a0,a0,1072 # 1c430 <__trunctfdf2+0x2c2>
102c0: 3c7000ef jal 10e86 <printf>
102c4: 0bc12083 lw ra,188(sp)
102c8: 0b812403 lw s0,184(sp)
102cc: 0b412483 lw s1,180(sp)
102d0: 0b012903 lw s2,176(sp)
102d4: 0ac12983 lw s3,172(sp)
102d8: 0a812a03 lw s4,168(sp)
102dc: 0a412a83 lw s5,164(sp)
102e0: 0a012b03 lw s6,160(sp)
102e4: 09c12b83 lw s7,156(sp)
102e8: 09812c03 lw s8,152(sp)
102ec: 09412c83 lw s9,148(sp)
102f0: 09012d03 lw s10,144(sp)
102f4: 08c12d83 lw s11,140(sp)
102f8: 00000513 li a0,0
102fc: 0c010113 add sp,sp,192
10300: 00008067 ret
```
:::
:::spoiler fmul function:
```c
0001047c <fmul32>:
1047c: 00800737 lui a4,0x800
10480: fff70793 add a5,a4,-1 # 7fffff <__BSS_END__+0x7e109b>
10484: 00a7f633 and a2,a5,a0
10488: 41755693 sra a3,a0,0x17
1048c: 00b7f7b3 and a5,a5,a1
10490: 4175d893 sra a7,a1,0x17
10494: 00e66633 or a2,a2,a4
10498: 00e7e7b3 or a5,a5,a4
1049c: 0ff6f813 zext.b a6,a3
104a0: 0ff8f893 zext.b a7,a7
104a4: 00000713 li a4,0
104a8: 0017f693 and a3,a5,1
104ac: 4017d793 sra a5,a5,0x1
104b0: 00068463 beqz a3,104b8 <fmul32+0x3c>
104b4: 00c70733 add a4,a4,a2
104b8: 00078663 beqz a5,104c4 <fmul32+0x48>
104bc: 40175713 sra a4,a4,0x1
104c0: fe9ff06f j 104a8 <fmul32+0x2c>
104c4: 41875613 sra a2,a4,0x18
104c8: 011806b3 add a3,a6,a7
104cc: 00c037b3 snez a5,a2
104d0: 40c75733 sra a4,a4,a2
104d4: 00d787b3 add a5,a5,a3
104d8: f8178793 add a5,a5,-127
104dc: 00a5c5b3 xor a1,a1,a0
104e0: 800006b7 lui a3,0x80000
104e4: 00971713 sll a4,a4,0x9
104e8: 00d5f5b3 and a1,a1,a3
104ec: 00975713 srl a4,a4,0x9
104f0: 01779513 sll a0,a5,0x17
104f4: 7f8007b7 lui a5,0x7f800
104f8: 00e5e5b3 or a1,a1,a4
104fc: 00f57533 and a0,a0,a5
10500: 00a5e533 or a0,a1,a0
10504: 00008067 ret
```
:::
:::spoiler fadd32 function:
```c
00010508 <fadd32>:
10508: 800007b7 lui a5,0x80000
1050c: ff010113 add sp,sp,-16
10510: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe109b>
10514: 00a7f733 and a4,a5,a0
10518: 00112623 sw ra,12(sp)
1051c: 00812423 sw s0,8(sp)
10520: 00912223 sw s1,4(sp)
10524: 01212023 sw s2,0(sp)
10528: 00b7f7b3 and a5,a5,a1
1052c: 0af74463 blt a4,a5,105d4 <fadd32+0xcc>
10530: 00050913 mv s2,a0
10534: 00058693 mv a3,a1
10538: 008005b7 lui a1,0x800
1053c: 41795493 sra s1,s2,0x17
10540: 4176d793 sra a5,a3,0x17
10544: fff58713 add a4,a1,-1 # 7fffff <__BSS_END__+0x7e109b>
10548: 0ff4f493 zext.b s1,s1
1054c: 0ff7f793 zext.b a5,a5
10550: 00e97633 and a2,s2,a4
10554: 40f487b3 sub a5,s1,a5
10558: 00e6f733 and a4,a3,a4
1055c: 01800513 li a0,24
10560: 00b66633 or a2,a2,a1
10564: 00b76733 or a4,a4,a1
10568: 00f55463 bge a0,a5,10570 <fadd32+0x68>
1056c: 01800793 li a5,24
10570: 40f75733 sra a4,a4,a5
10574: 00d966b3 or a3,s2,a3
10578: 00e60433 add s0,a2,a4
1057c: 0006d463 bgez a3,10584 <fadd32+0x7c>
10580: 40e60433 sub s0,a2,a4
10584: 00040513 mv a0,s0
10588: e39ff0ef jal 103c0 <count_leading_zeros>
1058c: 00800793 li a5,8
10590: 04a7c863 blt a5,a0,105e0 <fadd32+0xd8>
10594: 40a787b3 sub a5,a5,a0
10598: 40f45433 sra s0,s0,a5
1059c: 00f484b3 add s1,s1,a5
105a0: 00941413 sll s0,s0,0x9
105a4: 01749493 sll s1,s1,0x17
105a8: 00945413 srl s0,s0,0x9
105ac: 800007b7 lui a5,0x80000
105b0: 00946433 or s0,s0,s1
105b4: 00f97533 and a0,s2,a5
105b8: 00c12083 lw ra,12(sp)
105bc: 00a46533 or a0,s0,a0
105c0: 00812403 lw s0,8(sp)
105c4: 00412483 lw s1,4(sp)
105c8: 00012903 lw s2,0(sp)
105cc: 01010113 add sp,sp,16
105d0: 00008067 ret
105d4: 00050693 mv a3,a0
105d8: 00058913 mv s2,a1
105dc: f5dff06f j 10538 <fadd32+0x30>
105e0: ff850513 add a0,a0,-8
105e4: 00a41433 sll s0,s0,a0
105e8: 40a484b3 sub s1,s1,a0
105ec: fb5ff06f j 105a0 <fadd32+0x98>
```
:::
#### ELF size
```
text data bss dec hex filename
53608 1896 1528 57032 dec8 perfcount.elf
```
#### ELF header
```
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x10316
Start of program headers: 52 (bytes into file)
Start of section headers: 70504 (bytes into file)
Flags: 0x1, RVC, soft-float ABI
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
```
#### Execute
```clike
0.954780 0.877887 0.800995 0.724102 0.647210
0.921899 0.826679 0.731460 0.636240 0.541020
0.889018 0.775471 0.661924 0.548377 0.434830
0.856138 0.724263 0.592389 0.460514 0.328640
0.823257 0.673055 0.522853 0.372652 0.222450
cycle count: 153485
instret: 2d4
inferior exit code 0
```
### -O3 Optimized Assembly Code
#### Assembly code
:::spoiler main function:
```c
000100b0 <main>:
100b0: f1010113 add sp,sp,-240
100b4: 0e112623 sw ra,236(sp)
100b8: 0e912223 sw s1,228(sp)
100bc: 0f212023 sw s2,224(sp)
100c0: 0d312e23 sw s3,220(sp)
100c4: 0d412c23 sw s4,216(sp)
100c8: 0d512a23 sw s5,212(sp)
100cc: 0d612823 sw s6,208(sp)
100d0: 0d712623 sw s7,204(sp)
100d4: 0d812423 sw s8,200(sp)
100d8: 0d912223 sw s9,196(sp)
100dc: 0da12023 sw s10,192(sp)
100e0: 0bb12e23 sw s11,188(sp)
100e4: 0e812423 sw s0,232(sp)
100e8: 5c8000ef jal 106b0 <get_instret>
100ec: 00050493 mv s1,a0
100f0: 5ac000ef jal 1069c <get_cycles>
100f4: 00050913 mv s2,a0
100f8: 05c00613 li a2,92
100fc: 00000593 li a1,0
10100: 05010513 add a0,sp,80
10104: 406010ef jal 1150a <memset>
10108: f3c1a783 lw a5,-196(gp) # 1e74c <__SDATA_BEGIN__+0x74>
1010c: f301a503 lw a0,-208(gp) # 1e740 <__SDATA_BEGIN__+0x68>
10110: f341a603 lw a2,-204(gp) # 1e744 <__SDATA_BEGIN__+0x6c>
10114: f381a683 lw a3,-200(gp) # 1e748 <__SDATA_BEGIN__+0x70>
10118: 3f7475b7 lui a1,0x3f747
1011c: 3f25b0b7 lui ra,0x3f25b
10120: 3f52c3b7 lui t2,0x3f52c
10124: 3e63d2b7 lui t0,0x3e63d
10128: 04c10713 add a4,sp,76
1012c: 00f47cb7 lui s9,0xf47
10130: 00a5bd37 lui s10,0xa5b
10134: 00d2cdb7 lui s11,0xd2c
10138: 00e3d9b7 lui s3,0xe3d
1013c: 0af12623 sw a5,172(sp)
10140: c7658593 add a1,a1,-906 # 3f746c76 <__BSS_END__+0x3f727f12>
10144: 008007b7 lui a5,0x800
10148: f8e08093 add ra,ra,-114 # 3f25af8e <__BSS_END__+0x3f23c22a>
1014c: 0f938393 add t2,t2,249 # 3f52c0f9 <__BSS_END__+0x3f50d395>
10150: 9ef28293 add t0,t0,-1553 # 3e63c9ef <__BSS_END__+0x3e61dc8b>
10154: 04a12623 sw a0,76(sp)
10158: 04c12e23 sw a2,92(sp)
1015c: 08d12e23 sw a3,156(sp)
10160: 00070c13 mv s8,a4
10164: 00100a93 li s5,1
10168: fff78a13 add s4,a5,-1 # 7fffff <__BSS_END__+0x7e129b>
1016c: c76c8c93 add s9,s9,-906 # f46c76 <__BSS_END__+0xf27f12>
10170: 80000bb7 lui s7,0x80000
10174: 7f800b37 lui s6,0x7f800
10178: f8ed0d13 add s10,s10,-114 # a5af8e <__BSS_END__+0xa3c22a>
1017c: 0f9d8d93 add s11,s11,249 # d2c0f9 <__BSS_END__+0xd0d395>
10180: 9ef98993 add s3,s3,-1553 # e3c9ef <__BSS_END__+0xe1dc8b>
10184: 02e12a23 sw a4,52(sp)
10188: 02b12223 sw a1,36(sp)
1018c: 02112423 sw ra,40(sp)
10190: 02712623 sw t2,44(sp)
10194: 02512823 sw t0,48(sp)
10198: 02912c23 sw s1,56(sp)
1019c: 03212e23 sw s2,60(sp)
101a0: 00400793 li a5,4
101a4: 41578533 sub a0,a5,s5
101a8: 2a1000ef jal 10c48 <__floatsisf>
101ac: f401a583 lw a1,-192(gp) # 1e750 <__SDATA_BEGIN__+0x78>
101b0: 7b4000ef jal 10964 <__mulsf3>
101b4: 00800737 lui a4,0x800
101b8: 00aa77b3 and a5,s4,a0
101bc: 00e7e4b3 or s1,a5,a4
101c0: 41755893 sra a7,a0,0x17
101c4: 00050813 mv a6,a0
101c8: 0ff8f893 zext.b a7,a7
101cc: 00048693 mv a3,s1
101d0: 00000713 li a4,0
101d4: 0016f793 and a5,a3,1
101d8: 4016d693 sra a3,a3,0x1
101dc: 00078463 beqz a5,101e4 <main+0x134>
101e0: 01970733 add a4,a4,s9
101e4: 00068663 beqz a3,101f0 <main+0x140>
101e8: 40175713 sra a4,a4,0x1
101ec: fe9ff06f j 101d4 <main+0x124>
101f0: fff88793 add a5,a7,-1
101f4: 41875593 sra a1,a4,0x18
101f8: 02f12023 sw a5,32(sp)
101fc: 40b75733 sra a4,a4,a1
10200: 00088613 mv a2,a7
10204: 00059463 bnez a1,1020c <main+0x15c>
10208: 00078613 mv a2,a5
1020c: 02412783 lw a5,36(sp)
10210: 01477733 and a4,a4,s4
10214: 01761613 sll a2,a2,0x17
10218: 0107c433 xor s0,a5,a6
1021c: 01747433 and s0,s0,s7
10220: 00e46433 or s0,s0,a4
10224: 01667633 and a2,a2,s6
10228: 00c467b3 or a5,s0,a2
1022c: 000a8513 mv a0,s5
10230: 00d12c23 sw a3,24(sp)
10234: 01112a23 sw a7,20(sp)
10238: 01012823 sw a6,16(sp)
1023c: 00f12623 sw a5,12(sp)
10240: 209000ef jal 10c48 <__floatsisf>
10244: f401a583 lw a1,-192(gp) # 1e750 <__SDATA_BEGIN__+0x78>
10248: 71c000ef jal 10964 <__mulsf3>
1024c: 00aa7633 and a2,s4,a0
10250: 008007b7 lui a5,0x800
10254: 01412883 lw a7,20(sp)
10258: 01812683 lw a3,24(sp)
1025c: 01012803 lw a6,16(sp)
10260: 00f66633 or a2,a2,a5
10264: 41755e13 sra t3,a0,0x17
10268: 00050913 mv s2,a0
1026c: 0ffe7413 zext.b s0,t3
10270: 00060713 mv a4,a2
10274: 00177793 and a5,a4,1
10278: 40175713 sra a4,a4,0x1
1027c: 00078463 beqz a5,10284 <main+0x1d4>
10280: 01a686b3 add a3,a3,s10
10284: 00070663 beqz a4,10290 <main+0x1e0>
10288: 4016d693 sra a3,a3,0x1
1028c: fe9ff06f j 10274 <main+0x1c4>
10290: 02812783 lw a5,40(sp)
10294: 4186d593 sra a1,a3,0x18
10298: 0015b513 seqz a0,a1
1029c: 40b6d6b3 sra a3,a3,a1
102a0: 40a40533 sub a0,s0,a0
102a4: 0127c5b3 xor a1,a5,s2
102a8: 0146f6b3 and a3,a3,s4
102ac: 0175f5b3 and a1,a1,s7
102b0: 01751513 sll a0,a0,0x17
102b4: 00d5e5b3 or a1,a1,a3
102b8: 01657533 and a0,a0,s6
102bc: 00a5e5b3 or a1,a1,a0
102c0: 00c12503 lw a0,12(sp)
102c4: 00c12e23 sw a2,28(sp)
102c8: 00e12c23 sw a4,24(sp)
102cc: 01112a23 sw a7,20(sp)
102d0: 01012823 sw a6,16(sp)
102d4: 538000ef jal 1080c <fadd32>
102d8: 01c12603 lw a2,28(sp)
102dc: 01812703 lw a4,24(sp)
102e0: 01412883 lw a7,20(sp)
102e4: 01012803 lw a6,16(sp)
102e8: 00ac2223 sw a0,4(s8)
102ec: 0014f793 and a5,s1,1
102f0: 4014d493 sra s1,s1,0x1
102f4: 00078463 beqz a5,102fc <main+0x24c>
102f8: 01b70733 add a4,a4,s11
102fc: 00048663 beqz s1,10308 <main+0x258>
10300: 40175713 sra a4,a4,0x1
10304: fe9ff06f j 102ec <main+0x23c>
10308: 41875693 sra a3,a4,0x18
1030c: 40d75733 sra a4,a4,a3
10310: 00069463 bnez a3,10318 <main+0x268>
10314: 02012883 lw a7,32(sp)
10318: 02c12783 lw a5,44(sp)
1031c: 01477733 and a4,a4,s4
10320: 01789893 sll a7,a7,0x17
10324: 0107c533 xor a0,a5,a6
10328: 01757533 and a0,a0,s7
1032c: 00e56533 or a0,a0,a4
10330: 0168f8b3 and a7,a7,s6
10334: 01156533 or a0,a0,a7
10338: 00167793 and a5,a2,1
1033c: 40165613 sra a2,a2,0x1
10340: 00078463 beqz a5,10348 <main+0x298>
10344: 013484b3 add s1,s1,s3
10348: 00060663 beqz a2,10354 <main+0x2a4>
1034c: 4014d493 sra s1,s1,0x1
10350: fe9ff06f j 10338 <main+0x288>
10354: 4184d693 sra a3,s1,0x18
10358: 00d03733 snez a4,a3
1035c: 40d4d7b3 sra a5,s1,a3
10360: 03012683 lw a3,48(sp)
10364: 00870733 add a4,a4,s0
10368: ffd70713 add a4,a4,-3 # 7ffffd <__BSS_END__+0x7e1299>
1036c: 0126c333 xor t1,a3,s2
10370: 01737333 and t1,t1,s7
10374: 01771713 sll a4,a4,0x17
10378: 0147f7b3 and a5,a5,s4
1037c: 00f367b3 or a5,t1,a5
10380: 016775b3 and a1,a4,s6
10384: 00b7e5b3 or a1,a5,a1
10388: 484000ef jal 1080c <fadd32>
1038c: 04ac2a23 sw a0,84(s8)
10390: 001a8a93 add s5,s5,1
10394: 00400793 li a5,4
10398: 004c0c13 add s8,s8,4
1039c: e0fa92e3 bne s5,a5,101a0 <main+0xf0>
103a0: 03412703 lw a4,52(sp)
103a4: f401a783 lw a5,-192(gp) # 1e750 <__SDATA_BEGIN__+0x78>
103a8: 03812483 lw s1,56(sp)
103ac: 03c12903 lw s2,60(sp)
103b0: 00800437 lui s0,0x800
103b4: 00f12c23 sw a5,24(sp)
103b8: 01470993 add s3,a4,20
103bc: 00100793 li a5,1
103c0: 01400d13 li s10,20
103c4: 00f12823 sw a5,16(sp)
103c8: fff40d93 add s11,s0,-1 # 7fffff <__BSS_END__+0x7e129b>
103cc: 80000ab7 lui s5,0x80000
103d0: 7f800a37 lui s4,0x7f800
103d4: 00e12a23 sw a4,20(sp)
103d8: 00912e23 sw s1,28(sp)
103dc: 03212023 sw s2,32(sp)
103e0: 01312623 sw s3,12(sp)
103e4: 01012483 lw s1,16(sp)
103e8: 00400793 li a5,4
103ec: 40978533 sub a0,a5,s1
103f0: 059000ef jal 10c48 <__floatsisf>
103f4: 01812983 lw s3,24(sp)
103f8: 00098593 mv a1,s3
103fc: 568000ef jal 10964 <__mulsf3>
10400: 00adfcb3 and s9,s11,a0
10404: 41755c13 sra s8,a0,0x17
10408: 00050913 mv s2,a0
1040c: 00048513 mv a0,s1
10410: 039000ef jal 10c48 <__floatsisf>
10414: 00098593 mv a1,s3
10418: 54c000ef jal 10964 <__mulsf3>
1041c: 01412983 lw s3,20(sp)
10420: 00adfbb3 and s7,s11,a0
10424: 41755b13 sra s6,a0,0x17
10428: 008cecb3 or s9,s9,s0
1042c: 0ffc7c13 zext.b s8,s8
10430: 00050493 mv s1,a0
10434: 008bebb3 or s7,s7,s0
10438: 0ffb7b13 zext.b s6,s6
1043c: 0009a503 lw a0,0(s3)
10440: 00000713 li a4,0
10444: 000c8793 mv a5,s9
10448: 01b575b3 and a1,a0,s11
1044c: 41755813 sra a6,a0,0x17
10450: 0085e5b3 or a1,a1,s0
10454: 0ff87813 zext.b a6,a6
10458: 0017f693 and a3,a5,1
1045c: 4017d793 sra a5,a5,0x1
10460: 00068463 beqz a3,10468 <main+0x3b8>
10464: 00b70733 add a4,a4,a1
10468: 00078663 beqz a5,10474 <main+0x3c4>
1046c: 40175713 sra a4,a4,0x1
10470: fe9ff06f j 10458 <main+0x3a8>
10474: 41875893 sra a7,a4,0x18
10478: 01880833 add a6,a6,s8
1047c: 011036b3 snez a3,a7
10480: 0509a583 lw a1,80(s3)
10484: 010686b3 add a3,a3,a6
10488: 41175733 sra a4,a4,a7
1048c: f8168693 add a3,a3,-127
10490: 01254533 xor a0,a0,s2
10494: 01b77733 and a4,a4,s11
10498: 01557533 and a0,a0,s5
1049c: 01769693 sll a3,a3,0x17
104a0: 00e56533 or a0,a0,a4
104a4: 0146f6b3 and a3,a3,s4
104a8: 01b5f833 and a6,a1,s11
104ac: 4175d893 sra a7,a1,0x17
104b0: 00d56533 or a0,a0,a3
104b4: 00886833 or a6,a6,s0
104b8: 0ff8f893 zext.b a7,a7
104bc: 000b8713 mv a4,s7
104c0: 00177693 and a3,a4,1
104c4: 40175713 sra a4,a4,0x1
104c8: 00068463 beqz a3,104d0 <main+0x420>
104cc: 010787b3 add a5,a5,a6
104d0: 00070663 beqz a4,104dc <main+0x42c>
104d4: 4017d793 sra a5,a5,0x1
104d8: fe9ff06f j 104c0 <main+0x410>
104dc: 4187d813 sra a6,a5,0x18
104e0: 016886b3 add a3,a7,s6
104e4: 01003733 snez a4,a6
104e8: 00d70733 add a4,a4,a3
104ec: 4107d7b3 sra a5,a5,a6
104f0: f8170713 add a4,a4,-127
104f4: 0095c5b3 xor a1,a1,s1
104f8: 01b7f7b3 and a5,a5,s11
104fc: 0155f5b3 and a1,a1,s5
10500: 01771713 sll a4,a4,0x17
10504: 00f5e5b3 or a1,a1,a5
10508: 01477733 and a4,a4,s4
1050c: 00e5e5b3 or a1,a1,a4
10510: 2fc000ef jal 1080c <fadd32>
10514: 01a987b3 add a5,s3,s10
10518: 00a7a023 sw a0,0(a5) # 800000 <__BSS_END__+0x7e129c>
1051c: 00c12783 lw a5,12(sp)
10520: 00498993 add s3,s3,4
10524: f1379ce3 bne a5,s3,1043c <main+0x38c>
10528: 01012783 lw a5,16(sp)
1052c: 00400713 li a4,4
10530: 014d0d13 add s10,s10,20
10534: 00178793 add a5,a5,1
10538: 00f12823 sw a5,16(sp)
1053c: eae794e3 bne a5,a4,103e4 <main+0x334>
10540: 01412703 lw a4,20(sp)
10544: 01c12483 lw s1,28(sp)
10548: 02012903 lw s2,32(sp)
1054c: 00c12983 lw s3,12(sp)
10550: 06470b13 add s6,a4,100
10554: 0001cab7 lui s5,0x1c
10558: 00500a13 li s4,5
1055c: 00070413 mv s0,a4
10560: 00000b93 li s7,0
10564: 00042503 lw a0,0(s0)
10568: 001b8b93 add s7,s7,1 # 80000001 <__BSS_END__+0x7ffe129d>
1056c: 00440413 add s0,s0,4
10570: 79a000ef jal 10d0a <__extendsfdf2>
10574: 00050613 mv a2,a0
10578: 00058693 mv a3,a1
1057c: 790a8513 add a0,s5,1936 # 1c790 <__trunctfdf2+0x2ae>
10580: 47b000ef jal 111fa <printf>
10584: ff4b90e3 bne s7,s4,10564 <main+0x4b4>
10588: 00a00513 li a0,10
1058c: 4a1000ef jal 1122c <putchar>
10590: 00098413 mv s0,s3
10594: 013b0663 beq s6,s3,105a0 <main+0x4f0>
10598: 01498993 add s3,s3,20
1059c: fc5ff06f j 10560 <main+0x4b0>
105a0: 0fc000ef jal 1069c <get_cycles>
105a4: 412505b3 sub a1,a0,s2
105a8: 0001c537 lui a0,0x1c
105ac: 79450513 add a0,a0,1940 # 1c794 <__trunctfdf2+0x2b2>
105b0: 44b000ef jal 111fa <printf>
105b4: 0001c537 lui a0,0x1c
105b8: 00048593 mv a1,s1
105bc: 7a850513 add a0,a0,1960 # 1c7a8 <__trunctfdf2+0x2c6>
105c0: 43b000ef jal 111fa <printf>
105c4: 0ec12083 lw ra,236(sp)
105c8: 0e812403 lw s0,232(sp)
105cc: 0e412483 lw s1,228(sp)
105d0: 0e012903 lw s2,224(sp)
105d4: 0dc12983 lw s3,220(sp)
105d8: 0d812a03 lw s4,216(sp)
105dc: 0d412a83 lw s5,212(sp)
105e0: 0d012b03 lw s6,208(sp)
105e4: 0cc12b83 lw s7,204(sp)
105e8: 0c812c03 lw s8,200(sp)
105ec: 0c412c83 lw s9,196(sp)
105f0: 0c012d03 lw s10,192(sp)
105f4: 0bc12d83 lw s11,188(sp)
105f8: 00000513 li a0,0
105fc: 0f010113 add sp,sp,240
10600: 00008067 ret
```
:::
:::spoiler fmul function:
```c
00010780 <fmul32>:
10780: 00800737 lui a4,0x800
10784: fff70793 add a5,a4,-1 # 7fffff <__BSS_END__+0x7e129b>
10788: 00a7f633 and a2,a5,a0
1078c: 41755693 sra a3,a0,0x17
10790: 00b7f7b3 and a5,a5,a1
10794: 4175d893 sra a7,a1,0x17
10798: 00e66633 or a2,a2,a4
1079c: 00e7e7b3 or a5,a5,a4
107a0: 0ff6f813 zext.b a6,a3
107a4: 0ff8f893 zext.b a7,a7
107a8: 00000713 li a4,0
107ac: 0017f693 and a3,a5,1
107b0: 4017d793 sra a5,a5,0x1
107b4: 00068463 beqz a3,107bc <fmul32+0x3c>
107b8: 00c70733 add a4,a4,a2
107bc: 00078663 beqz a5,107c8 <fmul32+0x48>
107c0: 40175713 sra a4,a4,0x1
107c4: fe9ff06f j 107ac <fmul32+0x2c>
107c8: 41875613 sra a2,a4,0x18
107cc: 011806b3 add a3,a6,a7
107d0: 00c037b3 snez a5,a2
107d4: 40c75733 sra a4,a4,a2
107d8: 00d787b3 add a5,a5,a3
107dc: f8178793 add a5,a5,-127
107e0: 00a5c5b3 xor a1,a1,a0
107e4: 800006b7 lui a3,0x80000
107e8: 00971713 sll a4,a4,0x9
107ec: 00d5f5b3 and a1,a1,a3
107f0: 00975713 srl a4,a4,0x9
107f4: 01779513 sll a0,a5,0x17
107f8: 7f8007b7 lui a5,0x7f800
107fc: 00e5e5b3 or a1,a1,a4
10800: 00f57533 and a0,a0,a5
10804: 00a5e533 or a0,a1,a0
10808: 00008067 ret
```
:::
:::spoiler fadd32 function:
```c
0001080c <fadd32>:
1080c: 800007b7 lui a5,0x80000
10810: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe129b>
10814: 00a7f733 and a4,a5,a0
10818: 00b7f7b3 and a5,a5,a1
1081c: 00050813 mv a6,a0
10820: 00058613 mv a2,a1
10824: 00f74663 blt a4,a5,10830 <fadd32+0x24>
10828: 00050613 mv a2,a0
1082c: 00058813 mv a6,a1
10830: 00800537 lui a0,0x800
10834: 41765693 sra a3,a2,0x17
10838: 41785793 sra a5,a6,0x17
1083c: fff50713 add a4,a0,-1 # 7fffff <__BSS_END__+0x7e129b>
10840: 0ff6f693 zext.b a3,a3
10844: 0ff7f793 zext.b a5,a5
10848: 00e675b3 and a1,a2,a4
1084c: 40f687b3 sub a5,a3,a5
10850: 00e87733 and a4,a6,a4
10854: 01800893 li a7,24
10858: 00a5e5b3 or a1,a1,a0
1085c: 00a76733 or a4,a4,a0
10860: 00f8d463 bge a7,a5,10868 <fadd32+0x5c>
10864: 01800793 li a5,24
10868: 40f757b3 sra a5,a4,a5
1086c: 01066833 or a6,a2,a6
10870: 00f58733 add a4,a1,a5
10874: 00085463 bgez a6,1087c <fadd32+0x70>
10878: 40f58733 sub a4,a1,a5
1087c: 00175793 srl a5,a4,0x1
10880: 00e7e7b3 or a5,a5,a4
10884: 0027d593 srl a1,a5,0x2
10888: 00b7e7b3 or a5,a5,a1
1088c: 0047d593 srl a1,a5,0x4
10890: 00b7e7b3 or a5,a5,a1
10894: 0087d593 srl a1,a5,0x8
10898: 00b7e7b3 or a5,a5,a1
1089c: 0107d593 srl a1,a5,0x10
108a0: 00b7e7b3 or a5,a5,a1
108a4: 55555537 lui a0,0x55555
108a8: 0017d593 srl a1,a5,0x1
108ac: 55550513 add a0,a0,1365 # 55555555 <__BSS_END__+0x555367f1>
108b0: 00a5f5b3 and a1,a1,a0
108b4: 40b787b3 sub a5,a5,a1
108b8: 33333537 lui a0,0x33333
108bc: 33350513 add a0,a0,819 # 33333333 <__BSS_END__+0x333145cf>
108c0: 0027d593 srl a1,a5,0x2
108c4: 00a5f5b3 and a1,a1,a0
108c8: 00a7f7b3 and a5,a5,a0
108cc: 00f585b3 add a1,a1,a5
108d0: 0045d793 srl a5,a1,0x4
108d4: 0f0f1537 lui a0,0xf0f1
108d8: 00b787b3 add a5,a5,a1
108dc: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d21ab>
108e0: 00a7f7b3 and a5,a5,a0
108e4: 0087d593 srl a1,a5,0x8
108e8: 00b787b3 add a5,a5,a1
108ec: 0107d593 srl a1,a5,0x10
108f0: 00b787b3 add a5,a5,a1
108f4: 07f7f793 and a5,a5,127
108f8: 02000593 li a1,32
108fc: 40f585b3 sub a1,a1,a5
10900: 00800513 li a0,8
10904: 02b54863 blt a0,a1,10934 <fadd32+0x128>
10908: fe878793 add a5,a5,-24
1090c: 40f75733 sra a4,a4,a5
10910: 00f686b3 add a3,a3,a5
10914: 00971713 sll a4,a4,0x9
10918: 01769693 sll a3,a3,0x17
1091c: 800007b7 lui a5,0x80000
10920: 00975713 srl a4,a4,0x9
10924: 00d76733 or a4,a4,a3
10928: 00f67533 and a0,a2,a5
1092c: 00a76533 or a0,a4,a0
10930: 00008067 ret
10934: 01800593 li a1,24
10938: 40f587b3 sub a5,a1,a5
1093c: 00f71733 sll a4,a4,a5
10940: 40f686b3 sub a3,a3,a5
10944: 00971713 sll a4,a4,0x9
10948: 01769693 sll a3,a3,0x17
1094c: 800007b7 lui a5,0x80000
10950: 00975713 srl a4,a4,0x9
10954: 00d76733 or a4,a4,a3
10958: 00f67533 and a0,a2,a5
1095c: 00a76533 or a0,a4,a0
10960: 00008067 ret
```
:::
#### ELF size
```
text data bss dec hex filename
54492 1896 1528 57916 e23c perfcount.elf
```
#### ELF header
```
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x10618
Start of program headers: 52 (bytes into file)
Start of section headers: 74088 (bytes into file)
Flags: 0x1, RVC, soft-float ABI
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
```
#### Execute
```clike
0.954780 0.877887 0.800995 0.724102 0.647210
0.921899 0.826679 0.731460 0.636240 0.541020
0.889018 0.775471 0.661924 0.548377 0.434830
0.856138 0.724263 0.592389 0.460514 0.328640
0.823257 0.673055 0.522853 0.372652 0.222450
cycle count: 150953
instret: 2d4
inferior exit code 0
```
### -Os Optimized Assembly Code
#### Assembly code
:::spoiler main function:
```c
00100b0 <main>:
100b0: f4010113 add sp,sp,-192
100b4: 0a112e23 sw ra,188(sp)
100b8: 0a812c23 sw s0,184(sp)
100bc: 0a912a23 sw s1,180(sp)
100c0: 0b212823 sw s2,176(sp)
100c4: 0b412423 sw s4,168(sp)
100c8: 0b512223 sw s5,164(sp)
100cc: 0b612023 sw s6,160(sp)
100d0: 09712e23 sw s7,156(sp)
100d4: 09812c23 sw s8,152(sp)
100d8: 09912a23 sw s9,148(sp)
100dc: 09a12823 sw s10,144(sp)
100e0: 09b12623 sw s11,140(sp)
100e4: 0b312623 sw s3,172(sp)
100e8: 2b4000ef jal 1039c <get_instret>
100ec: 00a12623 sw a0,12(sp)
100f0: 298000ef jal 10388 <get_cycles>
100f4: 00050a93 mv s5,a0
100f8: 05c00613 li a2,92
100fc: 00000593 li a1,0
10100: 02010513 add a0,sp,32
10104: 096010ef jal 1119a <memset>
10108: f301ad83 lw s11,-208(gp) # 1e948 <__SDATA_BEGIN__+0x68>
1010c: f341ac83 lw s9,-204(gp) # 1e94c <__SDATA_BEGIN__+0x6c>
10110: f381ac03 lw s8,-200(gp) # 1e950 <__SDATA_BEGIN__+0x70>
10114: f3c1ab83 lw s7,-196(gp) # 1e954 <__SDATA_BEGIN__+0x74>
10118: f401ad03 lw s10,-192(gp) # 1e958 <__SDATA_BEGIN__+0x78>
1011c: 01c10413 add s0,sp,28
10120: 01b12e23 sw s11,28(sp)
10124: 03912623 sw s9,44(sp)
10128: 07812623 sw s8,108(sp)
1012c: 07712e23 sw s7,124(sp)
10130: 00040b13 mv s6,s0
10134: 00100493 li s1,1
10138: 00400913 li s2,4
1013c: 40990533 sub a0,s2,s1
10140: 798000ef jal 108d8 <__floatsisf>
10144: 000d0593 mv a1,s10
10148: 4ac000ef jal 105f4 <__mulsf3>
1014c: 00050593 mv a1,a0
10150: 00050993 mv s3,a0
10154: 000d8513 mv a0,s11
10158: 314000ef jal 1046c <fmul32>
1015c: 00a12423 sw a0,8(sp)
10160: 00048513 mv a0,s1
10164: 774000ef jal 108d8 <__floatsisf>
10168: 000d0593 mv a1,s10
1016c: 488000ef jal 105f4 <__mulsf3>
10170: 00050593 mv a1,a0
10174: 00a12223 sw a0,4(sp)
10178: 000c8513 mv a0,s9
1017c: 2f0000ef jal 1046c <fmul32>
10180: 00050593 mv a1,a0
10184: 00812503 lw a0,8(sp)
10188: 00148493 add s1,s1,1
1018c: 004b0b13 add s6,s6,4
10190: 37c000ef jal 1050c <fadd32>
10194: 00098593 mv a1,s3
10198: 00ab2023 sw a0,0(s6)
1019c: 000c0513 mv a0,s8
101a0: 2cc000ef jal 1046c <fmul32>
101a4: 00412583 lw a1,4(sp)
101a8: 00050993 mv s3,a0
101ac: 000b8513 mv a0,s7
101b0: 2bc000ef jal 1046c <fmul32>
101b4: 00050593 mv a1,a0
101b8: 00098513 mv a0,s3
101bc: 350000ef jal 1050c <fadd32>
101c0: 04ab2823 sw a0,80(s6)
101c4: f7249ce3 bne s1,s2,1013c <main+0x8c>
101c8: f401ac03 lw s8,-192(gp) # 1e958 <__SDATA_BEGIN__+0x78>
101cc: 01400b13 li s6,20
101d0: 00100913 li s2,1
101d4: 00400c93 li s9,4
101d8: 01440d13 add s10,s0,20
101dc: 412c8533 sub a0,s9,s2
101e0: 6f8000ef jal 108d8 <__floatsisf>
101e4: 000c0593 mv a1,s8
101e8: 40c000ef jal 105f4 <__mulsf3>
101ec: 00050993 mv s3,a0
101f0: 00040493 mv s1,s0
101f4: 0004a503 lw a0,0(s1)
101f8: 00098593 mv a1,s3
101fc: 270000ef jal 1046c <fmul32>
10200: 00050b93 mv s7,a0
10204: 00090513 mv a0,s2
10208: 6d0000ef jal 108d8 <__floatsisf>
1020c: 000c0593 mv a1,s8
10210: 3e4000ef jal 105f4 <__mulsf3>
10214: 00050593 mv a1,a0
10218: 0504a503 lw a0,80(s1)
1021c: 250000ef jal 1046c <fmul32>
10220: 00050593 mv a1,a0
10224: 000b8513 mv a0,s7
10228: 2e4000ef jal 1050c <fadd32>
1022c: 016487b3 add a5,s1,s6
10230: 00a7a023 sw a0,0(a5)
10234: 00448493 add s1,s1,4
10238: fba49ee3 bne s1,s10,101f4 <main+0x144>
1023c: 00190913 add s2,s2,1
10240: 014b0b13 add s6,s6,20
10244: f9991ce3 bne s2,s9,101dc <main+0x12c>
10248: 06440993 add s3,s0,100
1024c: 0001cb37 lui s6,0x1c
10250: 00500b93 li s7,5
10254: 00040913 mv s2,s0
10258: 00000493 li s1,0
1025c: 00092503 lw a0,0(s2)
10260: 00148493 add s1,s1,1
10264: 00490913 add s2,s2,4
10268: 732000ef jal 1099a <__extendsfdf2>
1026c: 00050613 mv a2,a0
10270: 00058693 mv a3,a1
10274: 420b0513 add a0,s6,1056 # 1c420 <__trunctfdf2+0x2ae>
10278: 413000ef jal 10e8a <printf>
1027c: ff7490e3 bne s1,s7,1025c <main+0x1ac>
10280: 00a00513 li a0,10
10284: 01440413 add s0,s0,20
10288: 435000ef jal 10ebc <putchar>
1028c: fc8994e3 bne s3,s0,10254 <main+0x1a4>
10290: 0f8000ef jal 10388 <get_cycles>
10294: 415505b3 sub a1,a0,s5
10298: 0001c537 lui a0,0x1c
1029c: 42450513 add a0,a0,1060 # 1c424 <__trunctfdf2+0x2b2>
102a0: 3eb000ef jal 10e8a <printf>
102a4: 00c12583 lw a1,12(sp)
102a8: 0001c537 lui a0,0x1c
102ac: 43850513 add a0,a0,1080 # 1c438 <__trunctfdf2+0x2c6>
102b0: 3db000ef jal 10e8a <printf>
102b4: 0bc12083 lw ra,188(sp)
102b8: 0b812403 lw s0,184(sp)
102bc: 0b412483 lw s1,180(sp)
102c0: 0b012903 lw s2,176(sp)
102c4: 0ac12983 lw s3,172(sp)
102c8: 0a812a03 lw s4,168(sp)
102cc: 0a412a83 lw s5,164(sp)
102d0: 0a012b03 lw s6,160(sp)
102d4: 09c12b83 lw s7,156(sp)
102d8: 09812c03 lw s8,152(sp)
102dc: 09412c83 lw s9,148(sp)
102e0: 09012d03 lw s10,144(sp)
102e4: 08c12d83 lw s11,140(sp)
102e8: 00000513 li a0,0
102ec: 0c010113 add sp,sp,192
102f0: 00008067 ret
```
:::
:::spoiler fmul function:
```c
0001046c <fmul32>:
1046c: ff010113 add sp,sp,-16
10470: 00912223 sw s1,4(sp)
10474: 00050493 mv s1,a0
10478: 00800537 lui a0,0x800
1047c: fff50793 add a5,a0,-1 # 7fffff <__BSS_END__+0x7e1093>
10480: 00812423 sw s0,8(sp)
10484: 00058413 mv s0,a1
10488: 00b7f5b3 and a1,a5,a1
1048c: 0097f7b3 and a5,a5,s1
10490: 00a5e5b3 or a1,a1,a0
10494: 00a7e533 or a0,a5,a0
10498: 00112623 sw ra,12(sp)
1049c: fa9ff0ef jal 10444 <imul32>
104a0: 41855693 sra a3,a0,0x18
104a4: 4174d793 sra a5,s1,0x17
104a8: 41745613 sra a2,s0,0x17
104ac: 0016f693 and a3,a3,1
104b0: 0ff7f793 zext.b a5,a5
104b4: 0ff67613 zext.b a2,a2
104b8: 40d55733 sra a4,a0,a3
104bc: 00c787b3 add a5,a5,a2
104c0: 04069263 bnez a3,10504 <fmul32+0x98>
104c4: f8178793 add a5,a5,-127
104c8: 00944533 xor a0,s0,s1
104cc: 800006b7 lui a3,0x80000
104d0: 00971713 sll a4,a4,0x9
104d4: 00c12083 lw ra,12(sp)
104d8: 00812403 lw s0,8(sp)
104dc: 00975713 srl a4,a4,0x9
104e0: 00d57533 and a0,a0,a3
104e4: 00e56533 or a0,a0,a4
104e8: 01779793 sll a5,a5,0x17
104ec: 7f800737 lui a4,0x7f800
104f0: 00e7f7b3 and a5,a5,a4
104f4: 00412483 lw s1,4(sp)
104f8: 00f56533 or a0,a0,a5
104fc: 01010113 add sp,sp,16
10500: 00008067 ret
10504: f8278793 add a5,a5,-126
10508: fc1ff06f j 104c8 <fmul32+0x5c>
```
:::
:::spoiler fadd32 function:
```c
0001050c <fadd32>:
1050c: 800007b7 lui a5,0x80000
10510: ff010113 add sp,sp,-16
10514: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe1093>
10518: 00a7f733 and a4,a5,a0
1051c: 00112623 sw ra,12(sp)
10520: 00812423 sw s0,8(sp)
10524: 00912223 sw s1,4(sp)
10528: 01212023 sw s2,0(sp)
1052c: 00b7f7b3 and a5,a5,a1
10530: 0af74463 blt a4,a5,105d8 <fadd32+0xcc>
10534: 00050913 mv s2,a0
10538: 00058693 mv a3,a1
1053c: 00800737 lui a4,0x800
10540: fff70793 add a5,a4,-1 # 7fffff <__BSS_END__+0x7e1093>
10544: 00f97633 and a2,s2,a5
10548: 00f6f7b3 and a5,a3,a5
1054c: 00e66633 or a2,a2,a4
10550: 00e7e7b3 or a5,a5,a4
10554: 41795493 sra s1,s2,0x17
10558: 4176d713 sra a4,a3,0x17
1055c: 0ff4f493 zext.b s1,s1
10560: 0ff77713 zext.b a4,a4
10564: 40e48733 sub a4,s1,a4
10568: 01800593 li a1,24
1056c: 00e5d463 bge a1,a4,10574 <fadd32+0x68>
10570: 01800713 li a4,24
10574: 40e7d7b3 sra a5,a5,a4
10578: 00d966b3 or a3,s2,a3
1057c: 00f60433 add s0,a2,a5
10580: 0006d463 bgez a3,10588 <fadd32+0x7c>
10584: 40f60433 sub s0,a2,a5
10588: 00040513 mv a0,s0
1058c: e25ff0ef jal 103b0 <count_leading_zeros>
10590: 00800793 li a5,8
10594: 04a7c863 blt a5,a0,105e4 <fadd32+0xd8>
10598: 40a787b3 sub a5,a5,a0
1059c: 40f45433 sra s0,s0,a5
105a0: 00f484b3 add s1,s1,a5
105a4: 00941413 sll s0,s0,0x9
105a8: 01749493 sll s1,s1,0x17
105ac: 00945413 srl s0,s0,0x9
105b0: 800007b7 lui a5,0x80000
105b4: 00946433 or s0,s0,s1
105b8: 00f97533 and a0,s2,a5
105bc: 00c12083 lw ra,12(sp)
105c0: 00a46533 or a0,s0,a0
105c4: 00812403 lw s0,8(sp)
105c8: 00412483 lw s1,4(sp)
105cc: 00012903 lw s2,0(sp)
105d0: 01010113 add sp,sp,16
105d4: 00008067 ret
105d8: 00050693 mv a3,a0
105dc: 00058913 mv s2,a1
105e0: f5dff06f j 1053c <fadd32+0x30>
105e4: ff850513 add a0,a0,-8
105e8: 00a41433 sll s0,s0,a0
105ec: 40a484b3 sub s1,s1,a0
105f0: fb5ff06f j 105a4 <fadd32+0x98>
```
:::
#### ELF size
```
text data bss dec hex filename
53612 1896 1528 57036 decc perfcount.elf
```
#### ELF header
```
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x10308
Start of program headers: 52 (bytes into file)
Start of section headers: 70512 (bytes into file)
Flags: 0x1, RVC, soft-float ABI
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
```
#### Execute
```clike
0.954780 0.877887 0.800995 0.724102 0.647210
0.921899 0.826679 0.731460 0.636240 0.541020
0.889018 0.775471 0.661924 0.548377 0.434830
0.856138 0.724263 0.592389 0.460514 0.328640
0.823257 0.673055 0.522853 0.372652 0.222450
cycle count: 154013
instret: 2de
inferior exit code 0
```
### -Ofast Optimized Assembly Code
#### Assembly code
:::spoiler main function:
```c
0001080c <fadd32>:
1080c: 800007b7 lui a5,0x80000
10810: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe129b>
10814: 00a7f733 and a4,a5,a0
10818: 00b7f7b3 and a5,a5,a1
1081c: 00050813 mv a6,a0
10820: 00058613 mv a2,a1
10824: 00f74663 blt a4,a5,10830 <fadd32+0x24>
10828: 00050613 mv a2,a0
1082c: 00058813 mv a6,a1
10830: 00800537 lui a0,0x800
10834: 41765693 sra a3,a2,0x17
10838: 41785793 sra a5,a6,0x17
1083c: fff50713 add a4,a0,-1 # 7fffff <__BSS_END__+0x7e129b>
10840: 0ff6f693 zext.b a3,a3
10844: 0ff7f793 zext.b a5,a5
10848: 00e675b3 and a1,a2,a4
1084c: 40f687b3 sub a5,a3,a5
10850: 00e87733 and a4,a6,a4
10854: 01800893 li a7,24
10858: 00a5e5b3 or a1,a1,a0
1085c: 00a76733 or a4,a4,a0
10860: 00f8d463 bge a7,a5,10868 <fadd32+0x5c>
10864: 01800793 li a5,24
10868: 40f757b3 sra a5,a4,a5
1086c: 01066833 or a6,a2,a6
10870: 00f58733 add a4,a1,a5
10874: 00085463 bgez a6,1087c <fadd32+0x70>
10878: 40f58733 sub a4,a1,a5
1087c: 00175793 srl a5,a4,0x1
10880: 00e7e7b3 or a5,a5,a4
10884: 0027d593 srl a1,a5,0x2
10888: 00b7e7b3 or a5,a5,a1
1088c: 0047d593 srl a1,a5,0x4
10890: 00b7e7b3 or a5,a5,a1
10894: 0087d593 srl a1,a5,0x8
10898: 00b7e7b3 or a5,a5,a1
1089c: 0107d593 srl a1,a5,0x10
108a0: 00b7e7b3 or a5,a5,a1
108a4: 55555537 lui a0,0x55555
108a8: 0017d593 srl a1,a5,0x1
108ac: 55550513 add a0,a0,1365 # 55555555 <__BSS_END__+0x555367f1>
108b0: 00a5f5b3 and a1,a1,a0
108b4: 40b787b3 sub a5,a5,a1
108b8: 33333537 lui a0,0x33333
108bc: 33350513 add a0,a0,819 # 33333333 <__BSS_END__+0x333145cf>
108c0: 0027d593 srl a1,a5,0x2
108c4: 00a5f5b3 and a1,a1,a0
108c8: 00a7f7b3 and a5,a5,a0
108cc: 00f585b3 add a1,a1,a5
108d0: 0045d793 srl a5,a1,0x4
108d4: 0f0f1537 lui a0,0xf0f1
108d8: 00b787b3 add a5,a5,a1
108dc: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d21ab>
108e0: 00a7f7b3 and a5,a5,a0
108e4: 0087d593 srl a1,a5,0x8
108e8: 00b787b3 add a5,a5,a1
108ec: 0107d593 srl a1,a5,0x10
108f0: 00b787b3 add a5,a5,a1
108f4: 07f7f793 and a5,a5,127
108f8: 02000593 li a1,32
108fc: 40f585b3 sub a1,a1,a5
10900: 00800513 li a0,8
10904: 02b54863 blt a0,a1,10934 <fadd32+0x128>
10908: fe878793 add a5,a5,-24
1090c: 40f75733 sra a4,a4,a5
10910: 00f686b3 add a3,a3,a5
10914: 00971713 sll a4,a4,0x9
10918: 01769693 sll a3,a3,0x17
1091c: 800007b7 lui a5,0x80000
10920: 00975713 srl a4,a4,0x9
10924: 00d76733 or a4,a4,a3
10928: 00f67533 and a0,a2,a5
1092c: 00a76533 or a0,a4,a0
10930: 00008067 ret
10934: 01800593 li a1,24
10938: 40f587b3 sub a5,a1,a5
1093c: 00f71733 sll a4,a4,a5
10940: 40f686b3 sub a3,a3,a5
10944: 00971713 sll a4,a4,0x9
10948: 01769693 sll a3,a3,0x17
1094c: 800007b7 lui a5,0x80000
10950: 00975713 srl a4,a4,0x9
10954: 00d76733 or a4,a4,a3
10958: 00f67533 and a0,a2,a5
1095c: 00a76533 or a0,a4,a0
10960: 00008067 ret
```
:::
:::spoiler fmul function:
```c
00010780 <fmul32>:
10780: 00800737 lui a4,0x800
10784: fff70793 add a5,a4,-1 # 7fffff <__BSS_END__+0x7e129b>
10788: 00a7f633 and a2,a5,a0
1078c: 41755693 sra a3,a0,0x17
10790: 00b7f7b3 and a5,a5,a1
10794: 4175d893 sra a7,a1,0x17
10798: 00e66633 or a2,a2,a4
1079c: 00e7e7b3 or a5,a5,a4
107a0: 0ff6f813 zext.b a6,a3
107a4: 0ff8f893 zext.b a7,a7
107a8: 00000713 li a4,0
107ac: 0017f693 and a3,a5,1
107b0: 4017d793 sra a5,a5,0x1
107b4: 00068463 beqz a3,107bc <fmul32+0x3c>
107b8: 00c70733 add a4,a4,a2
107bc: 00078663 beqz a5,107c8 <fmul32+0x48>
107c0: 40175713 sra a4,a4,0x1
107c4: fe9ff06f j 107ac <fmul32+0x2c>
107c8: 41875613 sra a2,a4,0x18
107cc: 011806b3 add a3,a6,a7
107d0: 00c037b3 snez a5,a2
107d4: 40c75733 sra a4,a4,a2
107d8: 00d787b3 add a5,a5,a3
107dc: f8178793 add a5,a5,-127
107e0: 00a5c5b3 xor a1,a1,a0
107e4: 800006b7 lui a3,0x80000
107e8: 00971713 sll a4,a4,0x9
107ec: 00d5f5b3 and a1,a1,a3
107f0: 00975713 srl a4,a4,0x9
107f4: 01779513 sll a0,a5,0x17
107f8: 7f8007b7 lui a5,0x7f800
107fc: 00e5e5b3 or a1,a1,a4
10800: 00f57533 and a0,a0,a5
10804: 00a5e533 or a0,a1,a0
10808: 00008067 ret
```
:::
:::spoiler fadd32 function:
```c
0001080c <fadd32>:
1080c: 800007b7 lui a5,0x80000
10810: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe129b>
10814: 00a7f733 and a4,a5,a0
10818: 00b7f7b3 and a5,a5,a1
1081c: 00050813 mv a6,a0
10820: 00058613 mv a2,a1
10824: 00f74663 blt a4,a5,10830 <fadd32+0x24>
10828: 00050613 mv a2,a0
1082c: 00058813 mv a6,a1
10830: 00800537 lui a0,0x800
10834: 41765693 sra a3,a2,0x17
10838: 41785793 sra a5,a6,0x17
1083c: fff50713 add a4,a0,-1 # 7fffff <__BSS_END__+0x7e129b>
10840: 0ff6f693 zext.b a3,a3
10844: 0ff7f793 zext.b a5,a5
10848: 00e675b3 and a1,a2,a4
1084c: 40f687b3 sub a5,a3,a5
10850: 00e87733 and a4,a6,a4
10854: 01800893 li a7,24
10858: 00a5e5b3 or a1,a1,a0
1085c: 00a76733 or a4,a4,a0
10860: 00f8d463 bge a7,a5,10868 <fadd32+0x5c>
10864: 01800793 li a5,24
10868: 40f757b3 sra a5,a4,a5
1086c: 01066833 or a6,a2,a6
10870: 00f58733 add a4,a1,a5
10874: 00085463 bgez a6,1087c <fadd32+0x70>
10878: 40f58733 sub a4,a1,a5
1087c: 00175793 srl a5,a4,0x1
10880: 00e7e7b3 or a5,a5,a4
10884: 0027d593 srl a1,a5,0x2
10888: 00b7e7b3 or a5,a5,a1
1088c: 0047d593 srl a1,a5,0x4
10890: 00b7e7b3 or a5,a5,a1
10894: 0087d593 srl a1,a5,0x8
10898: 00b7e7b3 or a5,a5,a1
1089c: 0107d593 srl a1,a5,0x10
108a0: 00b7e7b3 or a5,a5,a1
108a4: 55555537 lui a0,0x55555
108a8: 0017d593 srl a1,a5,0x1
108ac: 55550513 add a0,a0,1365 # 55555555 <__BSS_END__+0x555367f1>
108b0: 00a5f5b3 and a1,a1,a0
108b4: 40b787b3 sub a5,a5,a1
108b8: 33333537 lui a0,0x33333
108bc: 33350513 add a0,a0,819 # 33333333 <__BSS_END__+0x333145cf>
108c0: 0027d593 srl a1,a5,0x2
108c4: 00a5f5b3 and a1,a1,a0
108c8: 00a7f7b3 and a5,a5,a0
108cc: 00f585b3 add a1,a1,a5
108d0: 0045d793 srl a5,a1,0x4
108d4: 0f0f1537 lui a0,0xf0f1
108d8: 00b787b3 add a5,a5,a1
108dc: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d21ab>
108e0: 00a7f7b3 and a5,a5,a0
108e4: 0087d593 srl a1,a5,0x8
108e8: 00b787b3 add a5,a5,a1
108ec: 0107d593 srl a1,a5,0x10
108f0: 00b787b3 add a5,a5,a1
108f4: 07f7f793 and a5,a5,127
108f8: 02000593 li a1,32
108fc: 40f585b3 sub a1,a1,a5
10900: 00800513 li a0,8
10904: 02b54863 blt a0,a1,10934 <fadd32+0x128>
10908: fe878793 add a5,a5,-24
1090c: 40f75733 sra a4,a4,a5
10910: 00f686b3 add a3,a3,a5
10914: 00971713 sll a4,a4,0x9
10918: 01769693 sll a3,a3,0x17
1091c: 800007b7 lui a5,0x80000
10920: 00975713 srl a4,a4,0x9
10924: 00d76733 or a4,a4,a3
10928: 00f67533 and a0,a2,a5
1092c: 00a76533 or a0,a4,a0
10930: 00008067 ret
10934: 01800593 li a1,24
10938: 40f587b3 sub a5,a1,a5
1093c: 00f71733 sll a4,a4,a5
10940: 40f686b3 sub a3,a3,a5
10944: 00971713 sll a4,a4,0x9
10948: 01769693 sll a3,a3,0x17
1094c: 800007b7 lui a5,0x80000
10950: 00975713 srl a4,a4,0x9
10954: 00d76733 or a4,a4,a3
10958: 00f67533 and a0,a2,a5
1095c: 00a76533 or a0,a4,a0
10960: 00008067 ret
```
:::
#### ELF size
```
text data bss dec hex filename
54492 1896 1528 57916 e23c perfcount.elf
```
#### ELF header
```
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x10618
Start of program headers: 52 (bytes into file)
Start of section headers: 74088 (bytes into file)
Flags: 0x1, RVC, soft-float ABI
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
```
#### Execute
```clike
0.954780 0.877887 0.800995 0.724102 0.647210
0.921899 0.826679 0.731460 0.636240 0.541020
0.889018 0.775471 0.661924 0.548377 0.434830
0.856138 0.724263 0.592389 0.460514 0.328640
0.823257 0.673055 0.522853 0.372652 0.222450
cycle count: 150953
instret: 2d4
inferior exit code 0
```
<s>
:::danger
Avoid using screenshots that solely contain plain text. Here are the reasons why:
1. Text-based content is more efficiently searchable than having to browse through images iteratively.
2. The rendering engine of HackMD can consistently generate well-structured layouts with annotated text instead of relying on arbitrary pictures.
3. It provides a more accessible and user-friendly experience for individuals with visual impairments.
:notes: jserv
:::
</s>
## Conclusion
| Optimization|Cycle number|Instret number|
| -------- | -------- | -------- |
| O1 | 153162 | 2de |
| O2 | 153485 | 2d4 |
| O3 | 150953 | 2d4 |
| Os | 154013 | 2de |
| Ofast | 150953 | 2d4 |
- The lowest cycle count is O3 and Ofast optimization
- The lowest instret count is O2, O3 and Ofast optimization.
:::warning
Show me the handwritten RISC-V assembly code.
:notes: jserv
:::