# Assignment2: GNU Toolchain Contributed by [JengDeChang](https://github.com/00853029/Computer_Architecture/tree/main/hw2) ### Question Selection #### Question I chose the question from [林晉宇-Image scaling with Bilinear interpolation by float32 multiplication](https://hackmd.io/@linyu0425/SJHkb8lWT) #### Motivation The reason I want to work on this classmate's project is because it has a lot in common with my first assignment, and I want to learn from the ideas of other classmates through this assignment. #### perfcounter/main.c - Place the main code between `get_cycles()` to calculate the number of cycles. ```c #include <stdint.h> #include <stdio.h> #include <string.h> #define IN_N 2 #define OUT_N 5 extern uint64_t get_cycles(); extern uint64_t get_instret(); /* * Taken from the Sparkle-suite which is a collection of lightweight symmetric * cryptographic algorithms currently in the final round of the NIST * standardization effort. * See https://sparkle-lwc.github.io/ */ //extern void sparkle_asm(unsigned int *state, unsigned int ns); #define WORDS 12 #define ROUNDS 7 uint32_t count_leading_zeros(uint32_t x) { x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); /* count ones (population count) */ x -= ((x >> 1) & 0x55555555); x = ((x >> 2) & 0x33333333) + (x & 0x33333333); x = ((x >> 4) + x) & 0x0f0f0f0f; x += (x >> 8); x += (x >> 16); return (32 - (x & 0x7f)); } int32_t getbit(int32_t value, int n) { return (value >> n) & 1; } /* int32 multiply */ int32_t imul32(int32_t a, int32_t b) { int32_t r = 0; while(1) { if((b & 1) != 0) { r = r + a; } b = b >> 1; if(b == 0x0) break; r = r >> 1; } return r; } /* float32 multiply */ float fmul32(float a, float b) { /* TODO: Special values like NaN and INF */ int32_t ia = *(int32_t *) &a, ib = *(int32_t *) &b; /* sign */ int sa = ia >> 31; int sb = ib >> 31; /* mantissa */ int32_t ma = (ia & 0x7FFFFF) | 0x800000; int32_t mb = (ib & 0x7FFFFF) | 0x800000; /* exponent */ int32_t ea = ((ia >> 23) & 0xFF); int32_t eb = ((ib >> 23) & 0xFF); /* 'r' = result */ int32_t mrtmp = imul32(ma, mb); int mshift = getbit(mrtmp, 24); int32_t mr = mrtmp >> mshift; int32_t ertmp = ea + eb - 127; int32_t er; if(mshift) er = ertmp + 1; else er = ertmp; int sr = sa ^ sb; int32_t r = (sr << 31) | ((er & 0xFF) << 23) | (mr & 0x7FFFFF); return *(float *) &r; } float fadd32(float a, float b) { //printf("%f , %f ",a,b); int32_t ia = *(int32_t *)&a, ib = *(int32_t *)&b; int32_t temp; if ((ia & 0x7fffffff) < (ib & 0x7fffffff)){ temp = ia; ia = ib; ib = temp; } /* sign */ int sa = ia >> 31; int sb = ib >> 31; /* mantissa */ int32_t ma = ia & 0x7fffff | 0x800000; int32_t mb = ib & 0x7fffff | 0x800000; /* exponent */ int32_t ea = (ia >> 23) & 0xff; int32_t eb = (ib >> 23) & 0xff; int32_t align = (ea - eb > 24) ? 24 : (ea - eb); mb >>= align; if (sa | sb) ma -= mb; else ma += mb; int32_t clz = count_leading_zeros(ma); int32_t shift = 0; if (clz <= 8) { shift = 8 - clz; ma >>= shift; ea += shift; } else { shift = clz - 8; ma <<= shift; ea -= shift; } int32_t r = ia & 0x80000000 | ea << 23 | ma & 0x7fffff; return *(float *) &r; } int main(void) { unsigned int state[WORDS] = {0}; /* measure cycles */ uint64_t instret = get_instret(); uint64_t oldcount = get_cycles(); //-------------------------------------------------------------- float im_2[2][2] = {{0.95478,0.64721}, {0.823257,0.22245}}; float im_5[5][5] = { {0,0,0,0,0}, {0,0,0,0,0}, {0,0,0,0,0}, {0,0,0,0,0}, {0,0,0,0,0} }; im_5[0][0] = im_2[0][0]; im_5[0][OUT_N-1] = im_2[0][IN_N-1]; im_5[OUT_N-1][0] = im_2[IN_N-1][0]; im_5[OUT_N-1][OUT_N-1] = im_2[IN_N-1][IN_N-1]; for(int i=1;i<4;i++){ im_5[0][i] = fadd32 (fmul32(im_5[0][0] , (float)(OUT_N - 1 - i) / (float)(OUT_N - 1)) , fmul32(im_5[0][OUT_N-1] , (float)(i) / (float)(OUT_N-1))); im_5[OUT_N-1][i] = fadd32 (fmul32(im_5[OUT_N-1][0] , (float)(OUT_N - 1 - i) /(float) (OUT_N-1)) , fmul32(im_5[OUT_N-1][OUT_N-1] , (float)(i) / (float)(OUT_N-1))); } for(int i=1;i<OUT_N-1;i++){ for(int j=0;j<OUT_N;j++){ im_5[i][j] = fadd32 (fmul32(im_5[0][j] , (float)(OUT_N - 1 - i) / (float)(OUT_N - 1)) , fmul32(im_5[OUT_N-1][j] , (float)(i) / (float)(OUT_N - 1))); } } for(int i=0;i<OUT_N;i++){ for(int j=0;j<OUT_N;j++){ printf("%f ",im_5[i][j]); } printf("\n"); } //---------------------------------------------------------------------- uint64_t cyclecount = get_cycles() - oldcount; printf("cycle count: %u\n", (unsigned int) cyclecount); printf("instret: %x\n", (unsigned) (instret & 0xffffffff)); memset(state, 0, WORDS * sizeof(uint32_t)); return 0; } ``` --- ## Compile and Execute ### -O1 Optimization #### Assembly code :::spoiler main function: ```c 000103b0 <main>: 103b0: f4010113 add sp,sp,-192 103b4: 0a112e23 sw ra,188(sp) 103b8: 0a812c23 sw s0,184(sp) 103bc: 0a912a23 sw s1,180(sp) 103c0: 0b212823 sw s2,176(sp) 103c4: 0b312623 sw s3,172(sp) 103c8: 0b412423 sw s4,168(sp) 103cc: 0b512223 sw s5,164(sp) 103d0: 0b612023 sw s6,160(sp) 103d4: 09712e23 sw s7,156(sp) 103d8: 09812c23 sw s8,152(sp) 103dc: 09912a23 sw s9,148(sp) 103e0: 09a12823 sw s10,144(sp) 103e4: 09b12623 sw s11,140(sp) 103e8: d75ff0ef jal 1015c <get_instret> 103ec: 00a12623 sw a0,12(sp) 103f0: d59ff0ef jal 10148 <get_cycles> 103f4: 00050d93 mv s11,a0 103f8: 05c00613 li a2,92 103fc: 00000593 li a1,0 10400: 02010513 add a0,sp,32 10404: 5a7000ef jal 111aa <memset> 10408: f301a783 lw a5,-208(gp) # 1e958 <__SDATA_BEGIN__+0x68> 1040c: 00f12e23 sw a5,28(sp) 10410: f341a783 lw a5,-204(gp) # 1e95c <__SDATA_BEGIN__+0x6c> 10414: 02f12623 sw a5,44(sp) 10418: f381a783 lw a5,-200(gp) # 1e960 <__SDATA_BEGIN__+0x70> 1041c: 06f12623 sw a5,108(sp) 10420: f3c1a783 lw a5,-196(gp) # 1e964 <__SDATA_BEGIN__+0x74> 10424: 06f12e23 sw a5,124(sp) 10428: 01c10413 add s0,sp,28 1042c: 00040a13 mv s4,s0 10430: 00100493 li s1,1 10434: 00400b13 li s6,4 10438: f401ad03 lw s10,-192(gp) # 1e968 <__SDATA_BEGIN__+0x78> 1043c: 000d0c93 mv s9,s10 10440: f341ac03 lw s8,-204(gp) # 1e95c <__SDATA_BEGIN__+0x6c> 10444: f3c1ab83 lw s7,-196(gp) # 1e964 <__SDATA_BEGIN__+0x74> 10448: 409b0533 sub a0,s6,s1 1044c: 49c000ef jal 108e8 <__floatsisf> 10450: 000d0593 mv a1,s10 10454: 1b0000ef jal 10604 <__mulsf3> 10458: 00050993 mv s3,a0 1045c: 00050593 mv a1,a0 10460: 01c12503 lw a0,28(sp) 10464: dcdff0ef jal 10230 <fmul32> 10468: 00050a93 mv s5,a0 1046c: 00048513 mv a0,s1 10470: 478000ef jal 108e8 <__floatsisf> 10474: 000c8593 mv a1,s9 10478: 18c000ef jal 10604 <__mulsf3> 1047c: 00050913 mv s2,a0 10480: 00050593 mv a1,a0 10484: 000c0513 mv a0,s8 10488: da9ff0ef jal 10230 <fmul32> 1048c: 00050593 mv a1,a0 10490: 000a8513 mv a0,s5 10494: e35ff0ef jal 102c8 <fadd32> 10498: 00aa2223 sw a0,4(s4) 1049c: 00098593 mv a1,s3 104a0: 06c12503 lw a0,108(sp) 104a4: d8dff0ef jal 10230 <fmul32> 104a8: 00050993 mv s3,a0 104ac: 00090593 mv a1,s2 104b0: 000b8513 mv a0,s7 104b4: d7dff0ef jal 10230 <fmul32> 104b8: 00050593 mv a1,a0 104bc: 00098513 mv a0,s3 104c0: e09ff0ef jal 102c8 <fadd32> 104c4: 04aa2a23 sw a0,84(s4) 104c8: 00148493 add s1,s1,1 104cc: 004a0a13 add s4,s4,4 104d0: f7649ce3 bne s1,s6,10448 <main+0x98> 104d4: 01400a93 li s5,20 104d8: 00100993 li s3,1 104dc: 00400c13 li s8,4 104e0: f401ac83 lw s9,-192(gp) # 1e968 <__SDATA_BEGIN__+0x78> 104e4: 01440b93 add s7,s0,20 104e8: 000c8b13 mv s6,s9 104ec: 413c0533 sub a0,s8,s3 104f0: 3f8000ef jal 108e8 <__floatsisf> 104f4: 000c8593 mv a1,s9 104f8: 10c000ef jal 10604 <__mulsf3> 104fc: 00050a13 mv s4,a0 10500: 00040493 mv s1,s0 10504: 000a0593 mv a1,s4 10508: 0004a503 lw a0,0(s1) 1050c: d25ff0ef jal 10230 <fmul32> 10510: 00050913 mv s2,a0 10514: 00098513 mv a0,s3 10518: 3d0000ef jal 108e8 <__floatsisf> 1051c: 000b0593 mv a1,s6 10520: 0e4000ef jal 10604 <__mulsf3> 10524: 00050593 mv a1,a0 10528: 0504a503 lw a0,80(s1) 1052c: d05ff0ef jal 10230 <fmul32> 10530: 00050593 mv a1,a0 10534: 00090513 mv a0,s2 10538: d91ff0ef jal 102c8 <fadd32> 1053c: 015487b3 add a5,s1,s5 10540: 00a7a023 sw a0,0(a5) # 80000000 <__BSS_END__+0x7ffe1084> 10544: 00448493 add s1,s1,4 10548: fb749ee3 bne s1,s7,10504 <main+0x154> 1054c: 00198993 add s3,s3,1 10550: 014a8a93 add s5,s5,20 10554: f9899ce3 bne s3,s8,104ec <main+0x13c> 10558: 06440a93 add s5,s0,100 1055c: 0001ca37 lui s4,0x1c 10560: 00500993 li s3,5 10564: 00040913 mv s2,s0 10568: 00000493 li s1,0 1056c: 00092503 lw a0,0(s2) 10570: 43a000ef jal 109aa <__extendsfdf2> 10574: 00050613 mv a2,a0 10578: 00058693 mv a3,a1 1057c: 430a0513 add a0,s4,1072 # 1c430 <__trunctfdf2+0x2ae> 10580: 11b000ef jal 10e9a <printf> 10584: 00148493 add s1,s1,1 10588: 00490913 add s2,s2,4 1058c: ff3490e3 bne s1,s3,1056c <main+0x1bc> 10590: 00a00513 li a0,10 10594: 139000ef jal 10ecc <putchar> 10598: 01440413 add s0,s0,20 1059c: fc8a94e3 bne s5,s0,10564 <main+0x1b4> 105a0: ba9ff0ef jal 10148 <get_cycles> 105a4: 41b505b3 sub a1,a0,s11 105a8: 0001c537 lui a0,0x1c 105ac: 43450513 add a0,a0,1076 # 1c434 <__trunctfdf2+0x2b2> 105b0: 0eb000ef jal 10e9a <printf> 105b4: 00c12583 lw a1,12(sp) 105b8: 0001c537 lui a0,0x1c 105bc: 44850513 add a0,a0,1096 # 1c448 <__trunctfdf2+0x2c6> 105c0: 0db000ef jal 10e9a <printf> 105c4: 00000513 li a0,0 105c8: 0bc12083 lw ra,188(sp) 105cc: 0b812403 lw s0,184(sp) 105d0: 0b412483 lw s1,180(sp) 105d4: 0b012903 lw s2,176(sp) 105d8: 0ac12983 lw s3,172(sp) 105dc: 0a812a03 lw s4,168(sp) 105e0: 0a412a83 lw s5,164(sp) 105e4: 0a012b03 lw s6,160(sp) 105e8: 09c12b83 lw s7,156(sp) 105ec: 09812c03 lw s8,152(sp) 105f0: 09412c83 lw s9,148(sp) 105f4: 09012d03 lw s10,144(sp) 105f8: 08c12d83 lw s11,140(sp) 105fc: 0c010113 add sp,sp,192 10600: 00008067 ret ``` ::: :::spoiler fmul function: ```c 00010230 <fmul32>: 10230: ff010113 add sp,sp,-16 10234: 00112623 sw ra,12(sp) 10238: 00812423 sw s0,8(sp) 1023c: 00912223 sw s1,4(sp) 10240: 00050493 mv s1,a0 10244: 00058413 mv s0,a1 10248: 00800537 lui a0,0x800 1024c: fff50793 add a5,a0,-1 # 7fffff <__BSS_END__+0x7e1083> 10250: 00b7f5b3 and a1,a5,a1 10254: 0097f7b3 and a5,a5,s1 10258: 00a5e5b3 or a1,a1,a0 1025c: 00a7e533 or a0,a5,a0 10260: fa5ff0ef jal 10204 <imul32> 10264: 41855793 sra a5,a0,0x18 10268: 0017f793 and a5,a5,1 1026c: 40f55733 sra a4,a0,a5 10270: 4174d613 sra a2,s1,0x17 10274: 0ff67613 zext.b a2,a2 10278: 41745693 sra a3,s0,0x17 1027c: 0ff6f693 zext.b a3,a3 10280: 00c787b3 add a5,a5,a2 10284: 00d787b3 add a5,a5,a3 10288: f8178793 add a5,a5,-127 1028c: 00944533 xor a0,s0,s1 10290: 800006b7 lui a3,0x80000 10294: 00d57533 and a0,a0,a3 10298: 00971713 sll a4,a4,0x9 1029c: 00975713 srl a4,a4,0x9 102a0: 00e56533 or a0,a0,a4 102a4: 01779793 sll a5,a5,0x17 102a8: 7f800737 lui a4,0x7f800 102ac: 00e7f7b3 and a5,a5,a4 102b0: 00f56533 or a0,a0,a5 102b4: 00c12083 lw ra,12(sp) 102b8: 00812403 lw s0,8(sp) 102bc: 00412483 lw s1,4(sp) 102c0: 01010113 add sp,sp,16 102c4: 00008067 ret ``` ::: :::spoiler fadd32 : ```c 000102c8 <fadd32>: 102c8: ff010113 add sp,sp,-16 102cc: 00112623 sw ra,12(sp) 102d0: 00812423 sw s0,8(sp) 102d4: 00912223 sw s1,4(sp) 102d8: 01212023 sw s2,0(sp) 102dc: 00050693 mv a3,a0 102e0: 00058913 mv s2,a1 102e4: 800007b7 lui a5,0x80000 102e8: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe1083> 102ec: 00a7f733 and a4,a5,a0 102f0: 00b7f7b3 and a5,a5,a1 102f4: 00f74663 blt a4,a5,10300 <fadd32+0x38> 102f8: 00050913 mv s2,a0 102fc: 00058693 mv a3,a1 10300: 00800737 lui a4,0x800 10304: fff70793 add a5,a4,-1 # 7fffff <__BSS_END__+0x7e1083> 10308: 00f97633 and a2,s2,a5 1030c: 00e66633 or a2,a2,a4 10310: 00f6f7b3 and a5,a3,a5 10314: 00e7e7b3 or a5,a5,a4 10318: 41795493 sra s1,s2,0x17 1031c: 0ff4f493 zext.b s1,s1 10320: 4176d713 sra a4,a3,0x17 10324: 0ff77713 zext.b a4,a4 10328: 40e48733 sub a4,s1,a4 1032c: 01800593 li a1,24 10330: 00e5d463 bge a1,a4,10338 <fadd32+0x70> 10334: 01800713 li a4,24 10338: 40e7d7b3 sra a5,a5,a4 1033c: 00d966b3 or a3,s2,a3 10340: 00f60433 add s0,a2,a5 10344: 0406ca63 bltz a3,10398 <fadd32+0xd0> 10348: 00040513 mv a0,s0 1034c: e25ff0ef jal 10170 <count_leading_zeros> 10350: 00800793 li a5,8 10354: 04a7c663 blt a5,a0,103a0 <fadd32+0xd8> 10358: 40a787b3 sub a5,a5,a0 1035c: 40f45433 sra s0,s0,a5 10360: 00f484b3 add s1,s1,a5 10364: 00941413 sll s0,s0,0x9 10368: 00945413 srl s0,s0,0x9 1036c: 01749493 sll s1,s1,0x17 10370: 00946433 or s0,s0,s1 10374: 800007b7 lui a5,0x80000 10378: 00f97533 and a0,s2,a5 1037c: 00a46533 or a0,s0,a0 10380: 00c12083 lw ra,12(sp) 10384: 00812403 lw s0,8(sp) 10388: 00412483 lw s1,4(sp) 1038c: 00012903 lw s2,0(sp) 10390: 01010113 add sp,sp,16 10394: 00008067 ret 10398: 40f60433 sub s0,a2,a5 1039c: fadff06f j 10348 <fadd32+0x80> 103a0: ff850513 add a0,a0,-8 103a4: 00a41433 sll s0,s0,a0 103a8: 40a484b3 sub s1,s1,a0 103ac: fb9ff06f j 10364 <fadd32+0x9c> ``` ::: #### ELF size ``` text data bss dec hex filename 53628 1896 1528 57052 dedc perfcount.elf ``` #### ELF header ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x100c4 Start of program headers: 52 (bytes into file) Start of section headers: 70512 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` #### Execute ```clike 0.954780 0.877887 0.800995 0.724102 0.647210 0.921899 0.826679 0.731460 0.636240 0.541020 0.889018 0.775471 0.661924 0.548377 0.434830 0.856138 0.724263 0.592389 0.460514 0.328640 0.823257 0.673055 0.522853 0.372652 0.222450 cycle count: 153162 instret: 2de inferior exit code 0 ``` <s> :::danger Avoid using screenshots that solely contain plain text. Here are the reasons why: 1. Text-based content is more efficiently searchable than having to browse through images iteratively. 2. The rendering engine of HackMD can consistently generate well-structured layouts with annotated text instead of relying on arbitrary pictures. 3. It provides a more accessible and user-friendly experience for individuals with visual impairments. :notes: jserv ::: </s> ### -O2 Optimized Assembly Code #### Assembly code :::spoiler main function: ```c 000100b0 <main>: 100b0: f4010113 add sp,sp,-192 100b4: 0a112e23 sw ra,188(sp) 100b8: 0a812c23 sw s0,184(sp) 100bc: 0a912a23 sw s1,180(sp) 100c0: 0b412423 sw s4,168(sp) 100c4: 0b512223 sw s5,164(sp) 100c8: 0b612023 sw s6,160(sp) 100cc: 09712e23 sw s7,156(sp) 100d0: 09812c23 sw s8,152(sp) 100d4: 09912a23 sw s9,148(sp) 100d8: 09a12823 sw s10,144(sp) 100dc: 0b212823 sw s2,176(sp) 100e0: 0b312623 sw s3,172(sp) 100e4: 09b12623 sw s11,140(sp) 100e8: 2c4000ef jal 103ac <get_instret> 100ec: 00a12423 sw a0,8(sp) 100f0: 2a8000ef jal 10398 <get_cycles> 100f4: 00050b13 mv s6,a0 100f8: 05c00613 li a2,92 100fc: 00000593 li a1,0 10100: 02010513 add a0,sp,32 10104: 01612623 sw s6,12(sp) 10108: 08e010ef jal 11196 <memset> 1010c: f301ad03 lw s10,-208(gp) # 1e940 <__SDATA_BEGIN__+0x68> 10110: f341ac83 lw s9,-204(gp) # 1e944 <__SDATA_BEGIN__+0x6c> 10114: f381ac03 lw s8,-200(gp) # 1e948 <__SDATA_BEGIN__+0x70> 10118: f3c1ab83 lw s7,-196(gp) # 1e94c <__SDATA_BEGIN__+0x74> 1011c: f401aa03 lw s4,-192(gp) # 1e950 <__SDATA_BEGIN__+0x78> 10120: 01c10493 add s1,sp,28 10124: 01a12e23 sw s10,28(sp) 10128: 03912623 sw s9,44(sp) 1012c: 07812623 sw s8,108(sp) 10130: 07712e23 sw s7,124(sp) 10134: 00048b13 mv s6,s1 10138: 00100413 li s0,1 1013c: 00400a93 li s5,4 10140: 408a8533 sub a0,s5,s0 10144: 790000ef jal 108d4 <__floatsisf> 10148: 000a0593 mv a1,s4 1014c: 4a4000ef jal 105f0 <__mulsf3> 10150: 00050593 mv a1,a0 10154: 00050d93 mv s11,a0 10158: 000d0513 mv a0,s10 1015c: 320000ef jal 1047c <fmul32> 10160: 00050993 mv s3,a0 10164: 00040513 mv a0,s0 10168: 76c000ef jal 108d4 <__floatsisf> 1016c: 000a0593 mv a1,s4 10170: 480000ef jal 105f0 <__mulsf3> 10174: 00050913 mv s2,a0 10178: 00050593 mv a1,a0 1017c: 000c8513 mv a0,s9 10180: 2fc000ef jal 1047c <fmul32> 10184: 00050593 mv a1,a0 10188: 00098513 mv a0,s3 1018c: 37c000ef jal 10508 <fadd32> 10190: 00ab2223 sw a0,4(s6) 10194: 000d8593 mv a1,s11 10198: 000c0513 mv a0,s8 1019c: 2e0000ef jal 1047c <fmul32> 101a0: 00050793 mv a5,a0 101a4: 00090593 mv a1,s2 101a8: 000b8513 mv a0,s7 101ac: 00078913 mv s2,a5 101b0: 2cc000ef jal 1047c <fmul32> 101b4: 00050593 mv a1,a0 101b8: 00090513 mv a0,s2 101bc: 34c000ef jal 10508 <fadd32> 101c0: 04ab2a23 sw a0,84(s6) 101c4: 00140413 add s0,s0,1 101c8: 004b0b13 add s6,s6,4 101cc: f7541ae3 bne s0,s5,10140 <main+0x90> 101d0: f401ac83 lw s9,-192(gp) # 1e950 <__SDATA_BEGIN__+0x78> 101d4: 01400c13 li s8,20 101d8: 00100a13 li s4,1 101dc: 01448993 add s3,s1,20 101e0: 00400d13 li s10,4 101e4: 414d0533 sub a0,s10,s4 101e8: 6ec000ef jal 108d4 <__floatsisf> 101ec: 000c8593 mv a1,s9 101f0: 400000ef jal 105f0 <__mulsf3> 101f4: 00050b93 mv s7,a0 101f8: 00048413 mv s0,s1 101fc: 00042503 lw a0,0(s0) 10200: 000b8593 mv a1,s7 10204: 278000ef jal 1047c <fmul32> 10208: 00050913 mv s2,a0 1020c: 000a0513 mv a0,s4 10210: 6c4000ef jal 108d4 <__floatsisf> 10214: 000c8593 mv a1,s9 10218: 3d8000ef jal 105f0 <__mulsf3> 1021c: 00050593 mv a1,a0 10220: 05042503 lw a0,80(s0) 10224: 258000ef jal 1047c <fmul32> 10228: 00050593 mv a1,a0 1022c: 00090513 mv a0,s2 10230: 2d8000ef jal 10508 <fadd32> 10234: 018407b3 add a5,s0,s8 10238: 00a7a023 sw a0,0(a5) 1023c: 00440413 add s0,s0,4 10240: fb341ee3 bne s0,s3,101fc <main+0x14c> 10244: 001a0a13 add s4,s4,1 10248: 014c0c13 add s8,s8,20 1024c: f9aa1ce3 bne s4,s10,101e4 <main+0x134> 10250: 06448b93 add s7,s1,100 10254: 0001ca37 lui s4,0x1c 10258: 00500913 li s2,5 1025c: 00000413 li s0,0 10260: 0004a503 lw a0,0(s1) 10264: 00140413 add s0,s0,1 10268: 00448493 add s1,s1,4 1026c: 72a000ef jal 10996 <__extendsfdf2> 10270: 00050613 mv a2,a0 10274: 00058693 mv a3,a1 10278: 418a0513 add a0,s4,1048 # 1c418 <__trunctfdf2+0x2aa> 1027c: 40b000ef jal 10e86 <printf> 10280: ff2410e3 bne s0,s2,10260 <main+0x1b0> 10284: 00a00513 li a0,10 10288: 431000ef jal 10eb8 <putchar> 1028c: 00098493 mv s1,s3 10290: 013b8663 beq s7,s3,1029c <main+0x1ec> 10294: 01498993 add s3,s3,20 10298: fc5ff06f j 1025c <main+0x1ac> 1029c: 0fc000ef jal 10398 <get_cycles> 102a0: 00c12783 lw a5,12(sp) 102a4: 40f505b3 sub a1,a0,a5 102a8: 0001c537 lui a0,0x1c 102ac: 41c50513 add a0,a0,1052 # 1c41c <__trunctfdf2+0x2ae> 102b0: 3d7000ef jal 10e86 <printf> 102b4: 00812583 lw a1,8(sp) 102b8: 0001c537 lui a0,0x1c 102bc: 43050513 add a0,a0,1072 # 1c430 <__trunctfdf2+0x2c2> 102c0: 3c7000ef jal 10e86 <printf> 102c4: 0bc12083 lw ra,188(sp) 102c8: 0b812403 lw s0,184(sp) 102cc: 0b412483 lw s1,180(sp) 102d0: 0b012903 lw s2,176(sp) 102d4: 0ac12983 lw s3,172(sp) 102d8: 0a812a03 lw s4,168(sp) 102dc: 0a412a83 lw s5,164(sp) 102e0: 0a012b03 lw s6,160(sp) 102e4: 09c12b83 lw s7,156(sp) 102e8: 09812c03 lw s8,152(sp) 102ec: 09412c83 lw s9,148(sp) 102f0: 09012d03 lw s10,144(sp) 102f4: 08c12d83 lw s11,140(sp) 102f8: 00000513 li a0,0 102fc: 0c010113 add sp,sp,192 10300: 00008067 ret ``` ::: :::spoiler fmul function: ```c 0001047c <fmul32>: 1047c: 00800737 lui a4,0x800 10480: fff70793 add a5,a4,-1 # 7fffff <__BSS_END__+0x7e109b> 10484: 00a7f633 and a2,a5,a0 10488: 41755693 sra a3,a0,0x17 1048c: 00b7f7b3 and a5,a5,a1 10490: 4175d893 sra a7,a1,0x17 10494: 00e66633 or a2,a2,a4 10498: 00e7e7b3 or a5,a5,a4 1049c: 0ff6f813 zext.b a6,a3 104a0: 0ff8f893 zext.b a7,a7 104a4: 00000713 li a4,0 104a8: 0017f693 and a3,a5,1 104ac: 4017d793 sra a5,a5,0x1 104b0: 00068463 beqz a3,104b8 <fmul32+0x3c> 104b4: 00c70733 add a4,a4,a2 104b8: 00078663 beqz a5,104c4 <fmul32+0x48> 104bc: 40175713 sra a4,a4,0x1 104c0: fe9ff06f j 104a8 <fmul32+0x2c> 104c4: 41875613 sra a2,a4,0x18 104c8: 011806b3 add a3,a6,a7 104cc: 00c037b3 snez a5,a2 104d0: 40c75733 sra a4,a4,a2 104d4: 00d787b3 add a5,a5,a3 104d8: f8178793 add a5,a5,-127 104dc: 00a5c5b3 xor a1,a1,a0 104e0: 800006b7 lui a3,0x80000 104e4: 00971713 sll a4,a4,0x9 104e8: 00d5f5b3 and a1,a1,a3 104ec: 00975713 srl a4,a4,0x9 104f0: 01779513 sll a0,a5,0x17 104f4: 7f8007b7 lui a5,0x7f800 104f8: 00e5e5b3 or a1,a1,a4 104fc: 00f57533 and a0,a0,a5 10500: 00a5e533 or a0,a1,a0 10504: 00008067 ret ``` ::: :::spoiler fadd32 function: ```c 00010508 <fadd32>: 10508: 800007b7 lui a5,0x80000 1050c: ff010113 add sp,sp,-16 10510: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe109b> 10514: 00a7f733 and a4,a5,a0 10518: 00112623 sw ra,12(sp) 1051c: 00812423 sw s0,8(sp) 10520: 00912223 sw s1,4(sp) 10524: 01212023 sw s2,0(sp) 10528: 00b7f7b3 and a5,a5,a1 1052c: 0af74463 blt a4,a5,105d4 <fadd32+0xcc> 10530: 00050913 mv s2,a0 10534: 00058693 mv a3,a1 10538: 008005b7 lui a1,0x800 1053c: 41795493 sra s1,s2,0x17 10540: 4176d793 sra a5,a3,0x17 10544: fff58713 add a4,a1,-1 # 7fffff <__BSS_END__+0x7e109b> 10548: 0ff4f493 zext.b s1,s1 1054c: 0ff7f793 zext.b a5,a5 10550: 00e97633 and a2,s2,a4 10554: 40f487b3 sub a5,s1,a5 10558: 00e6f733 and a4,a3,a4 1055c: 01800513 li a0,24 10560: 00b66633 or a2,a2,a1 10564: 00b76733 or a4,a4,a1 10568: 00f55463 bge a0,a5,10570 <fadd32+0x68> 1056c: 01800793 li a5,24 10570: 40f75733 sra a4,a4,a5 10574: 00d966b3 or a3,s2,a3 10578: 00e60433 add s0,a2,a4 1057c: 0006d463 bgez a3,10584 <fadd32+0x7c> 10580: 40e60433 sub s0,a2,a4 10584: 00040513 mv a0,s0 10588: e39ff0ef jal 103c0 <count_leading_zeros> 1058c: 00800793 li a5,8 10590: 04a7c863 blt a5,a0,105e0 <fadd32+0xd8> 10594: 40a787b3 sub a5,a5,a0 10598: 40f45433 sra s0,s0,a5 1059c: 00f484b3 add s1,s1,a5 105a0: 00941413 sll s0,s0,0x9 105a4: 01749493 sll s1,s1,0x17 105a8: 00945413 srl s0,s0,0x9 105ac: 800007b7 lui a5,0x80000 105b0: 00946433 or s0,s0,s1 105b4: 00f97533 and a0,s2,a5 105b8: 00c12083 lw ra,12(sp) 105bc: 00a46533 or a0,s0,a0 105c0: 00812403 lw s0,8(sp) 105c4: 00412483 lw s1,4(sp) 105c8: 00012903 lw s2,0(sp) 105cc: 01010113 add sp,sp,16 105d0: 00008067 ret 105d4: 00050693 mv a3,a0 105d8: 00058913 mv s2,a1 105dc: f5dff06f j 10538 <fadd32+0x30> 105e0: ff850513 add a0,a0,-8 105e4: 00a41433 sll s0,s0,a0 105e8: 40a484b3 sub s1,s1,a0 105ec: fb5ff06f j 105a0 <fadd32+0x98> ``` ::: #### ELF size ``` text data bss dec hex filename 53608 1896 1528 57032 dec8 perfcount.elf ``` #### ELF header ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x10316 Start of program headers: 52 (bytes into file) Start of section headers: 70504 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` #### Execute ```clike 0.954780 0.877887 0.800995 0.724102 0.647210 0.921899 0.826679 0.731460 0.636240 0.541020 0.889018 0.775471 0.661924 0.548377 0.434830 0.856138 0.724263 0.592389 0.460514 0.328640 0.823257 0.673055 0.522853 0.372652 0.222450 cycle count: 153485 instret: 2d4 inferior exit code 0 ``` ### -O3 Optimized Assembly Code #### Assembly code :::spoiler main function: ```c 000100b0 <main>: 100b0: f1010113 add sp,sp,-240 100b4: 0e112623 sw ra,236(sp) 100b8: 0e912223 sw s1,228(sp) 100bc: 0f212023 sw s2,224(sp) 100c0: 0d312e23 sw s3,220(sp) 100c4: 0d412c23 sw s4,216(sp) 100c8: 0d512a23 sw s5,212(sp) 100cc: 0d612823 sw s6,208(sp) 100d0: 0d712623 sw s7,204(sp) 100d4: 0d812423 sw s8,200(sp) 100d8: 0d912223 sw s9,196(sp) 100dc: 0da12023 sw s10,192(sp) 100e0: 0bb12e23 sw s11,188(sp) 100e4: 0e812423 sw s0,232(sp) 100e8: 5c8000ef jal 106b0 <get_instret> 100ec: 00050493 mv s1,a0 100f0: 5ac000ef jal 1069c <get_cycles> 100f4: 00050913 mv s2,a0 100f8: 05c00613 li a2,92 100fc: 00000593 li a1,0 10100: 05010513 add a0,sp,80 10104: 406010ef jal 1150a <memset> 10108: f3c1a783 lw a5,-196(gp) # 1e74c <__SDATA_BEGIN__+0x74> 1010c: f301a503 lw a0,-208(gp) # 1e740 <__SDATA_BEGIN__+0x68> 10110: f341a603 lw a2,-204(gp) # 1e744 <__SDATA_BEGIN__+0x6c> 10114: f381a683 lw a3,-200(gp) # 1e748 <__SDATA_BEGIN__+0x70> 10118: 3f7475b7 lui a1,0x3f747 1011c: 3f25b0b7 lui ra,0x3f25b 10120: 3f52c3b7 lui t2,0x3f52c 10124: 3e63d2b7 lui t0,0x3e63d 10128: 04c10713 add a4,sp,76 1012c: 00f47cb7 lui s9,0xf47 10130: 00a5bd37 lui s10,0xa5b 10134: 00d2cdb7 lui s11,0xd2c 10138: 00e3d9b7 lui s3,0xe3d 1013c: 0af12623 sw a5,172(sp) 10140: c7658593 add a1,a1,-906 # 3f746c76 <__BSS_END__+0x3f727f12> 10144: 008007b7 lui a5,0x800 10148: f8e08093 add ra,ra,-114 # 3f25af8e <__BSS_END__+0x3f23c22a> 1014c: 0f938393 add t2,t2,249 # 3f52c0f9 <__BSS_END__+0x3f50d395> 10150: 9ef28293 add t0,t0,-1553 # 3e63c9ef <__BSS_END__+0x3e61dc8b> 10154: 04a12623 sw a0,76(sp) 10158: 04c12e23 sw a2,92(sp) 1015c: 08d12e23 sw a3,156(sp) 10160: 00070c13 mv s8,a4 10164: 00100a93 li s5,1 10168: fff78a13 add s4,a5,-1 # 7fffff <__BSS_END__+0x7e129b> 1016c: c76c8c93 add s9,s9,-906 # f46c76 <__BSS_END__+0xf27f12> 10170: 80000bb7 lui s7,0x80000 10174: 7f800b37 lui s6,0x7f800 10178: f8ed0d13 add s10,s10,-114 # a5af8e <__BSS_END__+0xa3c22a> 1017c: 0f9d8d93 add s11,s11,249 # d2c0f9 <__BSS_END__+0xd0d395> 10180: 9ef98993 add s3,s3,-1553 # e3c9ef <__BSS_END__+0xe1dc8b> 10184: 02e12a23 sw a4,52(sp) 10188: 02b12223 sw a1,36(sp) 1018c: 02112423 sw ra,40(sp) 10190: 02712623 sw t2,44(sp) 10194: 02512823 sw t0,48(sp) 10198: 02912c23 sw s1,56(sp) 1019c: 03212e23 sw s2,60(sp) 101a0: 00400793 li a5,4 101a4: 41578533 sub a0,a5,s5 101a8: 2a1000ef jal 10c48 <__floatsisf> 101ac: f401a583 lw a1,-192(gp) # 1e750 <__SDATA_BEGIN__+0x78> 101b0: 7b4000ef jal 10964 <__mulsf3> 101b4: 00800737 lui a4,0x800 101b8: 00aa77b3 and a5,s4,a0 101bc: 00e7e4b3 or s1,a5,a4 101c0: 41755893 sra a7,a0,0x17 101c4: 00050813 mv a6,a0 101c8: 0ff8f893 zext.b a7,a7 101cc: 00048693 mv a3,s1 101d0: 00000713 li a4,0 101d4: 0016f793 and a5,a3,1 101d8: 4016d693 sra a3,a3,0x1 101dc: 00078463 beqz a5,101e4 <main+0x134> 101e0: 01970733 add a4,a4,s9 101e4: 00068663 beqz a3,101f0 <main+0x140> 101e8: 40175713 sra a4,a4,0x1 101ec: fe9ff06f j 101d4 <main+0x124> 101f0: fff88793 add a5,a7,-1 101f4: 41875593 sra a1,a4,0x18 101f8: 02f12023 sw a5,32(sp) 101fc: 40b75733 sra a4,a4,a1 10200: 00088613 mv a2,a7 10204: 00059463 bnez a1,1020c <main+0x15c> 10208: 00078613 mv a2,a5 1020c: 02412783 lw a5,36(sp) 10210: 01477733 and a4,a4,s4 10214: 01761613 sll a2,a2,0x17 10218: 0107c433 xor s0,a5,a6 1021c: 01747433 and s0,s0,s7 10220: 00e46433 or s0,s0,a4 10224: 01667633 and a2,a2,s6 10228: 00c467b3 or a5,s0,a2 1022c: 000a8513 mv a0,s5 10230: 00d12c23 sw a3,24(sp) 10234: 01112a23 sw a7,20(sp) 10238: 01012823 sw a6,16(sp) 1023c: 00f12623 sw a5,12(sp) 10240: 209000ef jal 10c48 <__floatsisf> 10244: f401a583 lw a1,-192(gp) # 1e750 <__SDATA_BEGIN__+0x78> 10248: 71c000ef jal 10964 <__mulsf3> 1024c: 00aa7633 and a2,s4,a0 10250: 008007b7 lui a5,0x800 10254: 01412883 lw a7,20(sp) 10258: 01812683 lw a3,24(sp) 1025c: 01012803 lw a6,16(sp) 10260: 00f66633 or a2,a2,a5 10264: 41755e13 sra t3,a0,0x17 10268: 00050913 mv s2,a0 1026c: 0ffe7413 zext.b s0,t3 10270: 00060713 mv a4,a2 10274: 00177793 and a5,a4,1 10278: 40175713 sra a4,a4,0x1 1027c: 00078463 beqz a5,10284 <main+0x1d4> 10280: 01a686b3 add a3,a3,s10 10284: 00070663 beqz a4,10290 <main+0x1e0> 10288: 4016d693 sra a3,a3,0x1 1028c: fe9ff06f j 10274 <main+0x1c4> 10290: 02812783 lw a5,40(sp) 10294: 4186d593 sra a1,a3,0x18 10298: 0015b513 seqz a0,a1 1029c: 40b6d6b3 sra a3,a3,a1 102a0: 40a40533 sub a0,s0,a0 102a4: 0127c5b3 xor a1,a5,s2 102a8: 0146f6b3 and a3,a3,s4 102ac: 0175f5b3 and a1,a1,s7 102b0: 01751513 sll a0,a0,0x17 102b4: 00d5e5b3 or a1,a1,a3 102b8: 01657533 and a0,a0,s6 102bc: 00a5e5b3 or a1,a1,a0 102c0: 00c12503 lw a0,12(sp) 102c4: 00c12e23 sw a2,28(sp) 102c8: 00e12c23 sw a4,24(sp) 102cc: 01112a23 sw a7,20(sp) 102d0: 01012823 sw a6,16(sp) 102d4: 538000ef jal 1080c <fadd32> 102d8: 01c12603 lw a2,28(sp) 102dc: 01812703 lw a4,24(sp) 102e0: 01412883 lw a7,20(sp) 102e4: 01012803 lw a6,16(sp) 102e8: 00ac2223 sw a0,4(s8) 102ec: 0014f793 and a5,s1,1 102f0: 4014d493 sra s1,s1,0x1 102f4: 00078463 beqz a5,102fc <main+0x24c> 102f8: 01b70733 add a4,a4,s11 102fc: 00048663 beqz s1,10308 <main+0x258> 10300: 40175713 sra a4,a4,0x1 10304: fe9ff06f j 102ec <main+0x23c> 10308: 41875693 sra a3,a4,0x18 1030c: 40d75733 sra a4,a4,a3 10310: 00069463 bnez a3,10318 <main+0x268> 10314: 02012883 lw a7,32(sp) 10318: 02c12783 lw a5,44(sp) 1031c: 01477733 and a4,a4,s4 10320: 01789893 sll a7,a7,0x17 10324: 0107c533 xor a0,a5,a6 10328: 01757533 and a0,a0,s7 1032c: 00e56533 or a0,a0,a4 10330: 0168f8b3 and a7,a7,s6 10334: 01156533 or a0,a0,a7 10338: 00167793 and a5,a2,1 1033c: 40165613 sra a2,a2,0x1 10340: 00078463 beqz a5,10348 <main+0x298> 10344: 013484b3 add s1,s1,s3 10348: 00060663 beqz a2,10354 <main+0x2a4> 1034c: 4014d493 sra s1,s1,0x1 10350: fe9ff06f j 10338 <main+0x288> 10354: 4184d693 sra a3,s1,0x18 10358: 00d03733 snez a4,a3 1035c: 40d4d7b3 sra a5,s1,a3 10360: 03012683 lw a3,48(sp) 10364: 00870733 add a4,a4,s0 10368: ffd70713 add a4,a4,-3 # 7ffffd <__BSS_END__+0x7e1299> 1036c: 0126c333 xor t1,a3,s2 10370: 01737333 and t1,t1,s7 10374: 01771713 sll a4,a4,0x17 10378: 0147f7b3 and a5,a5,s4 1037c: 00f367b3 or a5,t1,a5 10380: 016775b3 and a1,a4,s6 10384: 00b7e5b3 or a1,a5,a1 10388: 484000ef jal 1080c <fadd32> 1038c: 04ac2a23 sw a0,84(s8) 10390: 001a8a93 add s5,s5,1 10394: 00400793 li a5,4 10398: 004c0c13 add s8,s8,4 1039c: e0fa92e3 bne s5,a5,101a0 <main+0xf0> 103a0: 03412703 lw a4,52(sp) 103a4: f401a783 lw a5,-192(gp) # 1e750 <__SDATA_BEGIN__+0x78> 103a8: 03812483 lw s1,56(sp) 103ac: 03c12903 lw s2,60(sp) 103b0: 00800437 lui s0,0x800 103b4: 00f12c23 sw a5,24(sp) 103b8: 01470993 add s3,a4,20 103bc: 00100793 li a5,1 103c0: 01400d13 li s10,20 103c4: 00f12823 sw a5,16(sp) 103c8: fff40d93 add s11,s0,-1 # 7fffff <__BSS_END__+0x7e129b> 103cc: 80000ab7 lui s5,0x80000 103d0: 7f800a37 lui s4,0x7f800 103d4: 00e12a23 sw a4,20(sp) 103d8: 00912e23 sw s1,28(sp) 103dc: 03212023 sw s2,32(sp) 103e0: 01312623 sw s3,12(sp) 103e4: 01012483 lw s1,16(sp) 103e8: 00400793 li a5,4 103ec: 40978533 sub a0,a5,s1 103f0: 059000ef jal 10c48 <__floatsisf> 103f4: 01812983 lw s3,24(sp) 103f8: 00098593 mv a1,s3 103fc: 568000ef jal 10964 <__mulsf3> 10400: 00adfcb3 and s9,s11,a0 10404: 41755c13 sra s8,a0,0x17 10408: 00050913 mv s2,a0 1040c: 00048513 mv a0,s1 10410: 039000ef jal 10c48 <__floatsisf> 10414: 00098593 mv a1,s3 10418: 54c000ef jal 10964 <__mulsf3> 1041c: 01412983 lw s3,20(sp) 10420: 00adfbb3 and s7,s11,a0 10424: 41755b13 sra s6,a0,0x17 10428: 008cecb3 or s9,s9,s0 1042c: 0ffc7c13 zext.b s8,s8 10430: 00050493 mv s1,a0 10434: 008bebb3 or s7,s7,s0 10438: 0ffb7b13 zext.b s6,s6 1043c: 0009a503 lw a0,0(s3) 10440: 00000713 li a4,0 10444: 000c8793 mv a5,s9 10448: 01b575b3 and a1,a0,s11 1044c: 41755813 sra a6,a0,0x17 10450: 0085e5b3 or a1,a1,s0 10454: 0ff87813 zext.b a6,a6 10458: 0017f693 and a3,a5,1 1045c: 4017d793 sra a5,a5,0x1 10460: 00068463 beqz a3,10468 <main+0x3b8> 10464: 00b70733 add a4,a4,a1 10468: 00078663 beqz a5,10474 <main+0x3c4> 1046c: 40175713 sra a4,a4,0x1 10470: fe9ff06f j 10458 <main+0x3a8> 10474: 41875893 sra a7,a4,0x18 10478: 01880833 add a6,a6,s8 1047c: 011036b3 snez a3,a7 10480: 0509a583 lw a1,80(s3) 10484: 010686b3 add a3,a3,a6 10488: 41175733 sra a4,a4,a7 1048c: f8168693 add a3,a3,-127 10490: 01254533 xor a0,a0,s2 10494: 01b77733 and a4,a4,s11 10498: 01557533 and a0,a0,s5 1049c: 01769693 sll a3,a3,0x17 104a0: 00e56533 or a0,a0,a4 104a4: 0146f6b3 and a3,a3,s4 104a8: 01b5f833 and a6,a1,s11 104ac: 4175d893 sra a7,a1,0x17 104b0: 00d56533 or a0,a0,a3 104b4: 00886833 or a6,a6,s0 104b8: 0ff8f893 zext.b a7,a7 104bc: 000b8713 mv a4,s7 104c0: 00177693 and a3,a4,1 104c4: 40175713 sra a4,a4,0x1 104c8: 00068463 beqz a3,104d0 <main+0x420> 104cc: 010787b3 add a5,a5,a6 104d0: 00070663 beqz a4,104dc <main+0x42c> 104d4: 4017d793 sra a5,a5,0x1 104d8: fe9ff06f j 104c0 <main+0x410> 104dc: 4187d813 sra a6,a5,0x18 104e0: 016886b3 add a3,a7,s6 104e4: 01003733 snez a4,a6 104e8: 00d70733 add a4,a4,a3 104ec: 4107d7b3 sra a5,a5,a6 104f0: f8170713 add a4,a4,-127 104f4: 0095c5b3 xor a1,a1,s1 104f8: 01b7f7b3 and a5,a5,s11 104fc: 0155f5b3 and a1,a1,s5 10500: 01771713 sll a4,a4,0x17 10504: 00f5e5b3 or a1,a1,a5 10508: 01477733 and a4,a4,s4 1050c: 00e5e5b3 or a1,a1,a4 10510: 2fc000ef jal 1080c <fadd32> 10514: 01a987b3 add a5,s3,s10 10518: 00a7a023 sw a0,0(a5) # 800000 <__BSS_END__+0x7e129c> 1051c: 00c12783 lw a5,12(sp) 10520: 00498993 add s3,s3,4 10524: f1379ce3 bne a5,s3,1043c <main+0x38c> 10528: 01012783 lw a5,16(sp) 1052c: 00400713 li a4,4 10530: 014d0d13 add s10,s10,20 10534: 00178793 add a5,a5,1 10538: 00f12823 sw a5,16(sp) 1053c: eae794e3 bne a5,a4,103e4 <main+0x334> 10540: 01412703 lw a4,20(sp) 10544: 01c12483 lw s1,28(sp) 10548: 02012903 lw s2,32(sp) 1054c: 00c12983 lw s3,12(sp) 10550: 06470b13 add s6,a4,100 10554: 0001cab7 lui s5,0x1c 10558: 00500a13 li s4,5 1055c: 00070413 mv s0,a4 10560: 00000b93 li s7,0 10564: 00042503 lw a0,0(s0) 10568: 001b8b93 add s7,s7,1 # 80000001 <__BSS_END__+0x7ffe129d> 1056c: 00440413 add s0,s0,4 10570: 79a000ef jal 10d0a <__extendsfdf2> 10574: 00050613 mv a2,a0 10578: 00058693 mv a3,a1 1057c: 790a8513 add a0,s5,1936 # 1c790 <__trunctfdf2+0x2ae> 10580: 47b000ef jal 111fa <printf> 10584: ff4b90e3 bne s7,s4,10564 <main+0x4b4> 10588: 00a00513 li a0,10 1058c: 4a1000ef jal 1122c <putchar> 10590: 00098413 mv s0,s3 10594: 013b0663 beq s6,s3,105a0 <main+0x4f0> 10598: 01498993 add s3,s3,20 1059c: fc5ff06f j 10560 <main+0x4b0> 105a0: 0fc000ef jal 1069c <get_cycles> 105a4: 412505b3 sub a1,a0,s2 105a8: 0001c537 lui a0,0x1c 105ac: 79450513 add a0,a0,1940 # 1c794 <__trunctfdf2+0x2b2> 105b0: 44b000ef jal 111fa <printf> 105b4: 0001c537 lui a0,0x1c 105b8: 00048593 mv a1,s1 105bc: 7a850513 add a0,a0,1960 # 1c7a8 <__trunctfdf2+0x2c6> 105c0: 43b000ef jal 111fa <printf> 105c4: 0ec12083 lw ra,236(sp) 105c8: 0e812403 lw s0,232(sp) 105cc: 0e412483 lw s1,228(sp) 105d0: 0e012903 lw s2,224(sp) 105d4: 0dc12983 lw s3,220(sp) 105d8: 0d812a03 lw s4,216(sp) 105dc: 0d412a83 lw s5,212(sp) 105e0: 0d012b03 lw s6,208(sp) 105e4: 0cc12b83 lw s7,204(sp) 105e8: 0c812c03 lw s8,200(sp) 105ec: 0c412c83 lw s9,196(sp) 105f0: 0c012d03 lw s10,192(sp) 105f4: 0bc12d83 lw s11,188(sp) 105f8: 00000513 li a0,0 105fc: 0f010113 add sp,sp,240 10600: 00008067 ret ``` ::: :::spoiler fmul function: ```c 00010780 <fmul32>: 10780: 00800737 lui a4,0x800 10784: fff70793 add a5,a4,-1 # 7fffff <__BSS_END__+0x7e129b> 10788: 00a7f633 and a2,a5,a0 1078c: 41755693 sra a3,a0,0x17 10790: 00b7f7b3 and a5,a5,a1 10794: 4175d893 sra a7,a1,0x17 10798: 00e66633 or a2,a2,a4 1079c: 00e7e7b3 or a5,a5,a4 107a0: 0ff6f813 zext.b a6,a3 107a4: 0ff8f893 zext.b a7,a7 107a8: 00000713 li a4,0 107ac: 0017f693 and a3,a5,1 107b0: 4017d793 sra a5,a5,0x1 107b4: 00068463 beqz a3,107bc <fmul32+0x3c> 107b8: 00c70733 add a4,a4,a2 107bc: 00078663 beqz a5,107c8 <fmul32+0x48> 107c0: 40175713 sra a4,a4,0x1 107c4: fe9ff06f j 107ac <fmul32+0x2c> 107c8: 41875613 sra a2,a4,0x18 107cc: 011806b3 add a3,a6,a7 107d0: 00c037b3 snez a5,a2 107d4: 40c75733 sra a4,a4,a2 107d8: 00d787b3 add a5,a5,a3 107dc: f8178793 add a5,a5,-127 107e0: 00a5c5b3 xor a1,a1,a0 107e4: 800006b7 lui a3,0x80000 107e8: 00971713 sll a4,a4,0x9 107ec: 00d5f5b3 and a1,a1,a3 107f0: 00975713 srl a4,a4,0x9 107f4: 01779513 sll a0,a5,0x17 107f8: 7f8007b7 lui a5,0x7f800 107fc: 00e5e5b3 or a1,a1,a4 10800: 00f57533 and a0,a0,a5 10804: 00a5e533 or a0,a1,a0 10808: 00008067 ret ``` ::: :::spoiler fadd32 function: ```c 0001080c <fadd32>: 1080c: 800007b7 lui a5,0x80000 10810: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe129b> 10814: 00a7f733 and a4,a5,a0 10818: 00b7f7b3 and a5,a5,a1 1081c: 00050813 mv a6,a0 10820: 00058613 mv a2,a1 10824: 00f74663 blt a4,a5,10830 <fadd32+0x24> 10828: 00050613 mv a2,a0 1082c: 00058813 mv a6,a1 10830: 00800537 lui a0,0x800 10834: 41765693 sra a3,a2,0x17 10838: 41785793 sra a5,a6,0x17 1083c: fff50713 add a4,a0,-1 # 7fffff <__BSS_END__+0x7e129b> 10840: 0ff6f693 zext.b a3,a3 10844: 0ff7f793 zext.b a5,a5 10848: 00e675b3 and a1,a2,a4 1084c: 40f687b3 sub a5,a3,a5 10850: 00e87733 and a4,a6,a4 10854: 01800893 li a7,24 10858: 00a5e5b3 or a1,a1,a0 1085c: 00a76733 or a4,a4,a0 10860: 00f8d463 bge a7,a5,10868 <fadd32+0x5c> 10864: 01800793 li a5,24 10868: 40f757b3 sra a5,a4,a5 1086c: 01066833 or a6,a2,a6 10870: 00f58733 add a4,a1,a5 10874: 00085463 bgez a6,1087c <fadd32+0x70> 10878: 40f58733 sub a4,a1,a5 1087c: 00175793 srl a5,a4,0x1 10880: 00e7e7b3 or a5,a5,a4 10884: 0027d593 srl a1,a5,0x2 10888: 00b7e7b3 or a5,a5,a1 1088c: 0047d593 srl a1,a5,0x4 10890: 00b7e7b3 or a5,a5,a1 10894: 0087d593 srl a1,a5,0x8 10898: 00b7e7b3 or a5,a5,a1 1089c: 0107d593 srl a1,a5,0x10 108a0: 00b7e7b3 or a5,a5,a1 108a4: 55555537 lui a0,0x55555 108a8: 0017d593 srl a1,a5,0x1 108ac: 55550513 add a0,a0,1365 # 55555555 <__BSS_END__+0x555367f1> 108b0: 00a5f5b3 and a1,a1,a0 108b4: 40b787b3 sub a5,a5,a1 108b8: 33333537 lui a0,0x33333 108bc: 33350513 add a0,a0,819 # 33333333 <__BSS_END__+0x333145cf> 108c0: 0027d593 srl a1,a5,0x2 108c4: 00a5f5b3 and a1,a1,a0 108c8: 00a7f7b3 and a5,a5,a0 108cc: 00f585b3 add a1,a1,a5 108d0: 0045d793 srl a5,a1,0x4 108d4: 0f0f1537 lui a0,0xf0f1 108d8: 00b787b3 add a5,a5,a1 108dc: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d21ab> 108e0: 00a7f7b3 and a5,a5,a0 108e4: 0087d593 srl a1,a5,0x8 108e8: 00b787b3 add a5,a5,a1 108ec: 0107d593 srl a1,a5,0x10 108f0: 00b787b3 add a5,a5,a1 108f4: 07f7f793 and a5,a5,127 108f8: 02000593 li a1,32 108fc: 40f585b3 sub a1,a1,a5 10900: 00800513 li a0,8 10904: 02b54863 blt a0,a1,10934 <fadd32+0x128> 10908: fe878793 add a5,a5,-24 1090c: 40f75733 sra a4,a4,a5 10910: 00f686b3 add a3,a3,a5 10914: 00971713 sll a4,a4,0x9 10918: 01769693 sll a3,a3,0x17 1091c: 800007b7 lui a5,0x80000 10920: 00975713 srl a4,a4,0x9 10924: 00d76733 or a4,a4,a3 10928: 00f67533 and a0,a2,a5 1092c: 00a76533 or a0,a4,a0 10930: 00008067 ret 10934: 01800593 li a1,24 10938: 40f587b3 sub a5,a1,a5 1093c: 00f71733 sll a4,a4,a5 10940: 40f686b3 sub a3,a3,a5 10944: 00971713 sll a4,a4,0x9 10948: 01769693 sll a3,a3,0x17 1094c: 800007b7 lui a5,0x80000 10950: 00975713 srl a4,a4,0x9 10954: 00d76733 or a4,a4,a3 10958: 00f67533 and a0,a2,a5 1095c: 00a76533 or a0,a4,a0 10960: 00008067 ret ``` ::: #### ELF size ``` text data bss dec hex filename 54492 1896 1528 57916 e23c perfcount.elf ``` #### ELF header ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x10618 Start of program headers: 52 (bytes into file) Start of section headers: 74088 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` #### Execute ```clike 0.954780 0.877887 0.800995 0.724102 0.647210 0.921899 0.826679 0.731460 0.636240 0.541020 0.889018 0.775471 0.661924 0.548377 0.434830 0.856138 0.724263 0.592389 0.460514 0.328640 0.823257 0.673055 0.522853 0.372652 0.222450 cycle count: 150953 instret: 2d4 inferior exit code 0 ``` ### -Os Optimized Assembly Code #### Assembly code :::spoiler main function: ```c 00100b0 <main>: 100b0: f4010113 add sp,sp,-192 100b4: 0a112e23 sw ra,188(sp) 100b8: 0a812c23 sw s0,184(sp) 100bc: 0a912a23 sw s1,180(sp) 100c0: 0b212823 sw s2,176(sp) 100c4: 0b412423 sw s4,168(sp) 100c8: 0b512223 sw s5,164(sp) 100cc: 0b612023 sw s6,160(sp) 100d0: 09712e23 sw s7,156(sp) 100d4: 09812c23 sw s8,152(sp) 100d8: 09912a23 sw s9,148(sp) 100dc: 09a12823 sw s10,144(sp) 100e0: 09b12623 sw s11,140(sp) 100e4: 0b312623 sw s3,172(sp) 100e8: 2b4000ef jal 1039c <get_instret> 100ec: 00a12623 sw a0,12(sp) 100f0: 298000ef jal 10388 <get_cycles> 100f4: 00050a93 mv s5,a0 100f8: 05c00613 li a2,92 100fc: 00000593 li a1,0 10100: 02010513 add a0,sp,32 10104: 096010ef jal 1119a <memset> 10108: f301ad83 lw s11,-208(gp) # 1e948 <__SDATA_BEGIN__+0x68> 1010c: f341ac83 lw s9,-204(gp) # 1e94c <__SDATA_BEGIN__+0x6c> 10110: f381ac03 lw s8,-200(gp) # 1e950 <__SDATA_BEGIN__+0x70> 10114: f3c1ab83 lw s7,-196(gp) # 1e954 <__SDATA_BEGIN__+0x74> 10118: f401ad03 lw s10,-192(gp) # 1e958 <__SDATA_BEGIN__+0x78> 1011c: 01c10413 add s0,sp,28 10120: 01b12e23 sw s11,28(sp) 10124: 03912623 sw s9,44(sp) 10128: 07812623 sw s8,108(sp) 1012c: 07712e23 sw s7,124(sp) 10130: 00040b13 mv s6,s0 10134: 00100493 li s1,1 10138: 00400913 li s2,4 1013c: 40990533 sub a0,s2,s1 10140: 798000ef jal 108d8 <__floatsisf> 10144: 000d0593 mv a1,s10 10148: 4ac000ef jal 105f4 <__mulsf3> 1014c: 00050593 mv a1,a0 10150: 00050993 mv s3,a0 10154: 000d8513 mv a0,s11 10158: 314000ef jal 1046c <fmul32> 1015c: 00a12423 sw a0,8(sp) 10160: 00048513 mv a0,s1 10164: 774000ef jal 108d8 <__floatsisf> 10168: 000d0593 mv a1,s10 1016c: 488000ef jal 105f4 <__mulsf3> 10170: 00050593 mv a1,a0 10174: 00a12223 sw a0,4(sp) 10178: 000c8513 mv a0,s9 1017c: 2f0000ef jal 1046c <fmul32> 10180: 00050593 mv a1,a0 10184: 00812503 lw a0,8(sp) 10188: 00148493 add s1,s1,1 1018c: 004b0b13 add s6,s6,4 10190: 37c000ef jal 1050c <fadd32> 10194: 00098593 mv a1,s3 10198: 00ab2023 sw a0,0(s6) 1019c: 000c0513 mv a0,s8 101a0: 2cc000ef jal 1046c <fmul32> 101a4: 00412583 lw a1,4(sp) 101a8: 00050993 mv s3,a0 101ac: 000b8513 mv a0,s7 101b0: 2bc000ef jal 1046c <fmul32> 101b4: 00050593 mv a1,a0 101b8: 00098513 mv a0,s3 101bc: 350000ef jal 1050c <fadd32> 101c0: 04ab2823 sw a0,80(s6) 101c4: f7249ce3 bne s1,s2,1013c <main+0x8c> 101c8: f401ac03 lw s8,-192(gp) # 1e958 <__SDATA_BEGIN__+0x78> 101cc: 01400b13 li s6,20 101d0: 00100913 li s2,1 101d4: 00400c93 li s9,4 101d8: 01440d13 add s10,s0,20 101dc: 412c8533 sub a0,s9,s2 101e0: 6f8000ef jal 108d8 <__floatsisf> 101e4: 000c0593 mv a1,s8 101e8: 40c000ef jal 105f4 <__mulsf3> 101ec: 00050993 mv s3,a0 101f0: 00040493 mv s1,s0 101f4: 0004a503 lw a0,0(s1) 101f8: 00098593 mv a1,s3 101fc: 270000ef jal 1046c <fmul32> 10200: 00050b93 mv s7,a0 10204: 00090513 mv a0,s2 10208: 6d0000ef jal 108d8 <__floatsisf> 1020c: 000c0593 mv a1,s8 10210: 3e4000ef jal 105f4 <__mulsf3> 10214: 00050593 mv a1,a0 10218: 0504a503 lw a0,80(s1) 1021c: 250000ef jal 1046c <fmul32> 10220: 00050593 mv a1,a0 10224: 000b8513 mv a0,s7 10228: 2e4000ef jal 1050c <fadd32> 1022c: 016487b3 add a5,s1,s6 10230: 00a7a023 sw a0,0(a5) 10234: 00448493 add s1,s1,4 10238: fba49ee3 bne s1,s10,101f4 <main+0x144> 1023c: 00190913 add s2,s2,1 10240: 014b0b13 add s6,s6,20 10244: f9991ce3 bne s2,s9,101dc <main+0x12c> 10248: 06440993 add s3,s0,100 1024c: 0001cb37 lui s6,0x1c 10250: 00500b93 li s7,5 10254: 00040913 mv s2,s0 10258: 00000493 li s1,0 1025c: 00092503 lw a0,0(s2) 10260: 00148493 add s1,s1,1 10264: 00490913 add s2,s2,4 10268: 732000ef jal 1099a <__extendsfdf2> 1026c: 00050613 mv a2,a0 10270: 00058693 mv a3,a1 10274: 420b0513 add a0,s6,1056 # 1c420 <__trunctfdf2+0x2ae> 10278: 413000ef jal 10e8a <printf> 1027c: ff7490e3 bne s1,s7,1025c <main+0x1ac> 10280: 00a00513 li a0,10 10284: 01440413 add s0,s0,20 10288: 435000ef jal 10ebc <putchar> 1028c: fc8994e3 bne s3,s0,10254 <main+0x1a4> 10290: 0f8000ef jal 10388 <get_cycles> 10294: 415505b3 sub a1,a0,s5 10298: 0001c537 lui a0,0x1c 1029c: 42450513 add a0,a0,1060 # 1c424 <__trunctfdf2+0x2b2> 102a0: 3eb000ef jal 10e8a <printf> 102a4: 00c12583 lw a1,12(sp) 102a8: 0001c537 lui a0,0x1c 102ac: 43850513 add a0,a0,1080 # 1c438 <__trunctfdf2+0x2c6> 102b0: 3db000ef jal 10e8a <printf> 102b4: 0bc12083 lw ra,188(sp) 102b8: 0b812403 lw s0,184(sp) 102bc: 0b412483 lw s1,180(sp) 102c0: 0b012903 lw s2,176(sp) 102c4: 0ac12983 lw s3,172(sp) 102c8: 0a812a03 lw s4,168(sp) 102cc: 0a412a83 lw s5,164(sp) 102d0: 0a012b03 lw s6,160(sp) 102d4: 09c12b83 lw s7,156(sp) 102d8: 09812c03 lw s8,152(sp) 102dc: 09412c83 lw s9,148(sp) 102e0: 09012d03 lw s10,144(sp) 102e4: 08c12d83 lw s11,140(sp) 102e8: 00000513 li a0,0 102ec: 0c010113 add sp,sp,192 102f0: 00008067 ret ``` ::: :::spoiler fmul function: ```c 0001046c <fmul32>: 1046c: ff010113 add sp,sp,-16 10470: 00912223 sw s1,4(sp) 10474: 00050493 mv s1,a0 10478: 00800537 lui a0,0x800 1047c: fff50793 add a5,a0,-1 # 7fffff <__BSS_END__+0x7e1093> 10480: 00812423 sw s0,8(sp) 10484: 00058413 mv s0,a1 10488: 00b7f5b3 and a1,a5,a1 1048c: 0097f7b3 and a5,a5,s1 10490: 00a5e5b3 or a1,a1,a0 10494: 00a7e533 or a0,a5,a0 10498: 00112623 sw ra,12(sp) 1049c: fa9ff0ef jal 10444 <imul32> 104a0: 41855693 sra a3,a0,0x18 104a4: 4174d793 sra a5,s1,0x17 104a8: 41745613 sra a2,s0,0x17 104ac: 0016f693 and a3,a3,1 104b0: 0ff7f793 zext.b a5,a5 104b4: 0ff67613 zext.b a2,a2 104b8: 40d55733 sra a4,a0,a3 104bc: 00c787b3 add a5,a5,a2 104c0: 04069263 bnez a3,10504 <fmul32+0x98> 104c4: f8178793 add a5,a5,-127 104c8: 00944533 xor a0,s0,s1 104cc: 800006b7 lui a3,0x80000 104d0: 00971713 sll a4,a4,0x9 104d4: 00c12083 lw ra,12(sp) 104d8: 00812403 lw s0,8(sp) 104dc: 00975713 srl a4,a4,0x9 104e0: 00d57533 and a0,a0,a3 104e4: 00e56533 or a0,a0,a4 104e8: 01779793 sll a5,a5,0x17 104ec: 7f800737 lui a4,0x7f800 104f0: 00e7f7b3 and a5,a5,a4 104f4: 00412483 lw s1,4(sp) 104f8: 00f56533 or a0,a0,a5 104fc: 01010113 add sp,sp,16 10500: 00008067 ret 10504: f8278793 add a5,a5,-126 10508: fc1ff06f j 104c8 <fmul32+0x5c> ``` ::: :::spoiler fadd32 function: ```c 0001050c <fadd32>: 1050c: 800007b7 lui a5,0x80000 10510: ff010113 add sp,sp,-16 10514: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe1093> 10518: 00a7f733 and a4,a5,a0 1051c: 00112623 sw ra,12(sp) 10520: 00812423 sw s0,8(sp) 10524: 00912223 sw s1,4(sp) 10528: 01212023 sw s2,0(sp) 1052c: 00b7f7b3 and a5,a5,a1 10530: 0af74463 blt a4,a5,105d8 <fadd32+0xcc> 10534: 00050913 mv s2,a0 10538: 00058693 mv a3,a1 1053c: 00800737 lui a4,0x800 10540: fff70793 add a5,a4,-1 # 7fffff <__BSS_END__+0x7e1093> 10544: 00f97633 and a2,s2,a5 10548: 00f6f7b3 and a5,a3,a5 1054c: 00e66633 or a2,a2,a4 10550: 00e7e7b3 or a5,a5,a4 10554: 41795493 sra s1,s2,0x17 10558: 4176d713 sra a4,a3,0x17 1055c: 0ff4f493 zext.b s1,s1 10560: 0ff77713 zext.b a4,a4 10564: 40e48733 sub a4,s1,a4 10568: 01800593 li a1,24 1056c: 00e5d463 bge a1,a4,10574 <fadd32+0x68> 10570: 01800713 li a4,24 10574: 40e7d7b3 sra a5,a5,a4 10578: 00d966b3 or a3,s2,a3 1057c: 00f60433 add s0,a2,a5 10580: 0006d463 bgez a3,10588 <fadd32+0x7c> 10584: 40f60433 sub s0,a2,a5 10588: 00040513 mv a0,s0 1058c: e25ff0ef jal 103b0 <count_leading_zeros> 10590: 00800793 li a5,8 10594: 04a7c863 blt a5,a0,105e4 <fadd32+0xd8> 10598: 40a787b3 sub a5,a5,a0 1059c: 40f45433 sra s0,s0,a5 105a0: 00f484b3 add s1,s1,a5 105a4: 00941413 sll s0,s0,0x9 105a8: 01749493 sll s1,s1,0x17 105ac: 00945413 srl s0,s0,0x9 105b0: 800007b7 lui a5,0x80000 105b4: 00946433 or s0,s0,s1 105b8: 00f97533 and a0,s2,a5 105bc: 00c12083 lw ra,12(sp) 105c0: 00a46533 or a0,s0,a0 105c4: 00812403 lw s0,8(sp) 105c8: 00412483 lw s1,4(sp) 105cc: 00012903 lw s2,0(sp) 105d0: 01010113 add sp,sp,16 105d4: 00008067 ret 105d8: 00050693 mv a3,a0 105dc: 00058913 mv s2,a1 105e0: f5dff06f j 1053c <fadd32+0x30> 105e4: ff850513 add a0,a0,-8 105e8: 00a41433 sll s0,s0,a0 105ec: 40a484b3 sub s1,s1,a0 105f0: fb5ff06f j 105a4 <fadd32+0x98> ``` ::: #### ELF size ``` text data bss dec hex filename 53612 1896 1528 57036 decc perfcount.elf ``` #### ELF header ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x10308 Start of program headers: 52 (bytes into file) Start of section headers: 70512 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` #### Execute ```clike 0.954780 0.877887 0.800995 0.724102 0.647210 0.921899 0.826679 0.731460 0.636240 0.541020 0.889018 0.775471 0.661924 0.548377 0.434830 0.856138 0.724263 0.592389 0.460514 0.328640 0.823257 0.673055 0.522853 0.372652 0.222450 cycle count: 154013 instret: 2de inferior exit code 0 ``` ### -Ofast Optimized Assembly Code #### Assembly code :::spoiler main function: ```c 0001080c <fadd32>: 1080c: 800007b7 lui a5,0x80000 10810: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe129b> 10814: 00a7f733 and a4,a5,a0 10818: 00b7f7b3 and a5,a5,a1 1081c: 00050813 mv a6,a0 10820: 00058613 mv a2,a1 10824: 00f74663 blt a4,a5,10830 <fadd32+0x24> 10828: 00050613 mv a2,a0 1082c: 00058813 mv a6,a1 10830: 00800537 lui a0,0x800 10834: 41765693 sra a3,a2,0x17 10838: 41785793 sra a5,a6,0x17 1083c: fff50713 add a4,a0,-1 # 7fffff <__BSS_END__+0x7e129b> 10840: 0ff6f693 zext.b a3,a3 10844: 0ff7f793 zext.b a5,a5 10848: 00e675b3 and a1,a2,a4 1084c: 40f687b3 sub a5,a3,a5 10850: 00e87733 and a4,a6,a4 10854: 01800893 li a7,24 10858: 00a5e5b3 or a1,a1,a0 1085c: 00a76733 or a4,a4,a0 10860: 00f8d463 bge a7,a5,10868 <fadd32+0x5c> 10864: 01800793 li a5,24 10868: 40f757b3 sra a5,a4,a5 1086c: 01066833 or a6,a2,a6 10870: 00f58733 add a4,a1,a5 10874: 00085463 bgez a6,1087c <fadd32+0x70> 10878: 40f58733 sub a4,a1,a5 1087c: 00175793 srl a5,a4,0x1 10880: 00e7e7b3 or a5,a5,a4 10884: 0027d593 srl a1,a5,0x2 10888: 00b7e7b3 or a5,a5,a1 1088c: 0047d593 srl a1,a5,0x4 10890: 00b7e7b3 or a5,a5,a1 10894: 0087d593 srl a1,a5,0x8 10898: 00b7e7b3 or a5,a5,a1 1089c: 0107d593 srl a1,a5,0x10 108a0: 00b7e7b3 or a5,a5,a1 108a4: 55555537 lui a0,0x55555 108a8: 0017d593 srl a1,a5,0x1 108ac: 55550513 add a0,a0,1365 # 55555555 <__BSS_END__+0x555367f1> 108b0: 00a5f5b3 and a1,a1,a0 108b4: 40b787b3 sub a5,a5,a1 108b8: 33333537 lui a0,0x33333 108bc: 33350513 add a0,a0,819 # 33333333 <__BSS_END__+0x333145cf> 108c0: 0027d593 srl a1,a5,0x2 108c4: 00a5f5b3 and a1,a1,a0 108c8: 00a7f7b3 and a5,a5,a0 108cc: 00f585b3 add a1,a1,a5 108d0: 0045d793 srl a5,a1,0x4 108d4: 0f0f1537 lui a0,0xf0f1 108d8: 00b787b3 add a5,a5,a1 108dc: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d21ab> 108e0: 00a7f7b3 and a5,a5,a0 108e4: 0087d593 srl a1,a5,0x8 108e8: 00b787b3 add a5,a5,a1 108ec: 0107d593 srl a1,a5,0x10 108f0: 00b787b3 add a5,a5,a1 108f4: 07f7f793 and a5,a5,127 108f8: 02000593 li a1,32 108fc: 40f585b3 sub a1,a1,a5 10900: 00800513 li a0,8 10904: 02b54863 blt a0,a1,10934 <fadd32+0x128> 10908: fe878793 add a5,a5,-24 1090c: 40f75733 sra a4,a4,a5 10910: 00f686b3 add a3,a3,a5 10914: 00971713 sll a4,a4,0x9 10918: 01769693 sll a3,a3,0x17 1091c: 800007b7 lui a5,0x80000 10920: 00975713 srl a4,a4,0x9 10924: 00d76733 or a4,a4,a3 10928: 00f67533 and a0,a2,a5 1092c: 00a76533 or a0,a4,a0 10930: 00008067 ret 10934: 01800593 li a1,24 10938: 40f587b3 sub a5,a1,a5 1093c: 00f71733 sll a4,a4,a5 10940: 40f686b3 sub a3,a3,a5 10944: 00971713 sll a4,a4,0x9 10948: 01769693 sll a3,a3,0x17 1094c: 800007b7 lui a5,0x80000 10950: 00975713 srl a4,a4,0x9 10954: 00d76733 or a4,a4,a3 10958: 00f67533 and a0,a2,a5 1095c: 00a76533 or a0,a4,a0 10960: 00008067 ret ``` ::: :::spoiler fmul function: ```c 00010780 <fmul32>: 10780: 00800737 lui a4,0x800 10784: fff70793 add a5,a4,-1 # 7fffff <__BSS_END__+0x7e129b> 10788: 00a7f633 and a2,a5,a0 1078c: 41755693 sra a3,a0,0x17 10790: 00b7f7b3 and a5,a5,a1 10794: 4175d893 sra a7,a1,0x17 10798: 00e66633 or a2,a2,a4 1079c: 00e7e7b3 or a5,a5,a4 107a0: 0ff6f813 zext.b a6,a3 107a4: 0ff8f893 zext.b a7,a7 107a8: 00000713 li a4,0 107ac: 0017f693 and a3,a5,1 107b0: 4017d793 sra a5,a5,0x1 107b4: 00068463 beqz a3,107bc <fmul32+0x3c> 107b8: 00c70733 add a4,a4,a2 107bc: 00078663 beqz a5,107c8 <fmul32+0x48> 107c0: 40175713 sra a4,a4,0x1 107c4: fe9ff06f j 107ac <fmul32+0x2c> 107c8: 41875613 sra a2,a4,0x18 107cc: 011806b3 add a3,a6,a7 107d0: 00c037b3 snez a5,a2 107d4: 40c75733 sra a4,a4,a2 107d8: 00d787b3 add a5,a5,a3 107dc: f8178793 add a5,a5,-127 107e0: 00a5c5b3 xor a1,a1,a0 107e4: 800006b7 lui a3,0x80000 107e8: 00971713 sll a4,a4,0x9 107ec: 00d5f5b3 and a1,a1,a3 107f0: 00975713 srl a4,a4,0x9 107f4: 01779513 sll a0,a5,0x17 107f8: 7f8007b7 lui a5,0x7f800 107fc: 00e5e5b3 or a1,a1,a4 10800: 00f57533 and a0,a0,a5 10804: 00a5e533 or a0,a1,a0 10808: 00008067 ret ``` ::: :::spoiler fadd32 function: ```c 0001080c <fadd32>: 1080c: 800007b7 lui a5,0x80000 10810: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe129b> 10814: 00a7f733 and a4,a5,a0 10818: 00b7f7b3 and a5,a5,a1 1081c: 00050813 mv a6,a0 10820: 00058613 mv a2,a1 10824: 00f74663 blt a4,a5,10830 <fadd32+0x24> 10828: 00050613 mv a2,a0 1082c: 00058813 mv a6,a1 10830: 00800537 lui a0,0x800 10834: 41765693 sra a3,a2,0x17 10838: 41785793 sra a5,a6,0x17 1083c: fff50713 add a4,a0,-1 # 7fffff <__BSS_END__+0x7e129b> 10840: 0ff6f693 zext.b a3,a3 10844: 0ff7f793 zext.b a5,a5 10848: 00e675b3 and a1,a2,a4 1084c: 40f687b3 sub a5,a3,a5 10850: 00e87733 and a4,a6,a4 10854: 01800893 li a7,24 10858: 00a5e5b3 or a1,a1,a0 1085c: 00a76733 or a4,a4,a0 10860: 00f8d463 bge a7,a5,10868 <fadd32+0x5c> 10864: 01800793 li a5,24 10868: 40f757b3 sra a5,a4,a5 1086c: 01066833 or a6,a2,a6 10870: 00f58733 add a4,a1,a5 10874: 00085463 bgez a6,1087c <fadd32+0x70> 10878: 40f58733 sub a4,a1,a5 1087c: 00175793 srl a5,a4,0x1 10880: 00e7e7b3 or a5,a5,a4 10884: 0027d593 srl a1,a5,0x2 10888: 00b7e7b3 or a5,a5,a1 1088c: 0047d593 srl a1,a5,0x4 10890: 00b7e7b3 or a5,a5,a1 10894: 0087d593 srl a1,a5,0x8 10898: 00b7e7b3 or a5,a5,a1 1089c: 0107d593 srl a1,a5,0x10 108a0: 00b7e7b3 or a5,a5,a1 108a4: 55555537 lui a0,0x55555 108a8: 0017d593 srl a1,a5,0x1 108ac: 55550513 add a0,a0,1365 # 55555555 <__BSS_END__+0x555367f1> 108b0: 00a5f5b3 and a1,a1,a0 108b4: 40b787b3 sub a5,a5,a1 108b8: 33333537 lui a0,0x33333 108bc: 33350513 add a0,a0,819 # 33333333 <__BSS_END__+0x333145cf> 108c0: 0027d593 srl a1,a5,0x2 108c4: 00a5f5b3 and a1,a1,a0 108c8: 00a7f7b3 and a5,a5,a0 108cc: 00f585b3 add a1,a1,a5 108d0: 0045d793 srl a5,a1,0x4 108d4: 0f0f1537 lui a0,0xf0f1 108d8: 00b787b3 add a5,a5,a1 108dc: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d21ab> 108e0: 00a7f7b3 and a5,a5,a0 108e4: 0087d593 srl a1,a5,0x8 108e8: 00b787b3 add a5,a5,a1 108ec: 0107d593 srl a1,a5,0x10 108f0: 00b787b3 add a5,a5,a1 108f4: 07f7f793 and a5,a5,127 108f8: 02000593 li a1,32 108fc: 40f585b3 sub a1,a1,a5 10900: 00800513 li a0,8 10904: 02b54863 blt a0,a1,10934 <fadd32+0x128> 10908: fe878793 add a5,a5,-24 1090c: 40f75733 sra a4,a4,a5 10910: 00f686b3 add a3,a3,a5 10914: 00971713 sll a4,a4,0x9 10918: 01769693 sll a3,a3,0x17 1091c: 800007b7 lui a5,0x80000 10920: 00975713 srl a4,a4,0x9 10924: 00d76733 or a4,a4,a3 10928: 00f67533 and a0,a2,a5 1092c: 00a76533 or a0,a4,a0 10930: 00008067 ret 10934: 01800593 li a1,24 10938: 40f587b3 sub a5,a1,a5 1093c: 00f71733 sll a4,a4,a5 10940: 40f686b3 sub a3,a3,a5 10944: 00971713 sll a4,a4,0x9 10948: 01769693 sll a3,a3,0x17 1094c: 800007b7 lui a5,0x80000 10950: 00975713 srl a4,a4,0x9 10954: 00d76733 or a4,a4,a3 10958: 00f67533 and a0,a2,a5 1095c: 00a76533 or a0,a4,a0 10960: 00008067 ret ``` ::: #### ELF size ``` text data bss dec hex filename 54492 1896 1528 57916 e23c perfcount.elf ``` #### ELF header ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x10618 Start of program headers: 52 (bytes into file) Start of section headers: 74088 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` #### Execute ```clike 0.954780 0.877887 0.800995 0.724102 0.647210 0.921899 0.826679 0.731460 0.636240 0.541020 0.889018 0.775471 0.661924 0.548377 0.434830 0.856138 0.724263 0.592389 0.460514 0.328640 0.823257 0.673055 0.522853 0.372652 0.222450 cycle count: 150953 instret: 2d4 inferior exit code 0 ``` <s> :::danger Avoid using screenshots that solely contain plain text. Here are the reasons why: 1. Text-based content is more efficiently searchable than having to browse through images iteratively. 2. The rendering engine of HackMD can consistently generate well-structured layouts with annotated text instead of relying on arbitrary pictures. 3. It provides a more accessible and user-friendly experience for individuals with visual impairments. :notes: jserv ::: </s> ## Conclusion | Optimization|Cycle number|Instret number| | -------- | -------- | -------- | | O1 | 153162 | 2de | | O2 | 153485 | 2d4 | | O3 | 150953 | 2d4 | | Os | 154013 | 2de | | Ofast | 150953 | 2d4 | - The lowest cycle count is O3 and Ofast optimization - The lowest instret count is O2, O3 and Ofast optimization. :::warning Show me the handwritten RISC-V assembly code. :notes: jserv :::