# Assignment2: GNU Toolchain contributed by <[linyu425](https://github.com/linyu425/ComputerArchitecture/tree/main/hw2)> ## Choose a Question * Problem: I chose the **Convert RGB image into grayscale by using RV32I ISA** from [張正德](https://hackmd.io/@gofzKoaiTI6mFzp4FTuenw/HJg3Q-lb6) * Motivation: Because his topic also applies to problem C, and the grayscale image conversion is interesting. ### The origin code ::: spoiler C code ```c #include <stdio.h> #include <stdint.h> void swap(int32_t *x, int32_t *y){ int32_t t = *y; *y = *x; *x = t; return; } static inline int32_t getbit(int32_t value, int n) { return (value >> n) & 1; } /* int32 multiply */ int32_t imul32(int32_t a, int32_t b) { int32_t r = 0; while(b != 0){ if (b & 1){ r += a; } b = b >> 1; r = r >> 1; } r = r << 1; return r; } uint32_t count_leading_zeros(uint32_t x) { x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); /* count ones (population count) */ x -= ((x >> 1) & 0x55555555); x = ((x >> 2) & 0x33333333) + (x & 0x33333333); x = ((x >> 4) + x) & 0x0F0F0F0F; x += (x >> 8); x += (x >> 16); return (32 - (x & 0x7F)); } float unsigned_fadd32(float a,float b){ int32_t ia = *(int32_t *)&a, ib = *(int32_t *)&b; int32_t a_tmp = ia & 0x7FFFFFFF; int32_t b_tmp = ib & 0x7FFFFFFF; if (a_tmp < b_tmp) swap(&ia, &ib); /* mantissa */ int32_t ma = ia & 0x7FFFFF | 0x800000; int32_t mb = ib & 0x7FFFFF | 0x800000; /* exponent */ int32_t ea = (ia >> 23) & 0xFF; int32_t eb = (ib >> 23) & 0xFF; int32_t align = (ea - eb > 24) ? 24 : (ea - eb); mb >>= align; if ((ia ^ ib) >> 31) { ma -= mb; } else { ma += mb; } int32_t clz = count_leading_zeros(ma); int32_t shift = 0; if (clz <= 8) { shift = 8 - clz; ma >>= shift; ea += shift; } else { shift = clz - 8; ma <<= shift; ea -= shift; } int32_t r = ia & 0x80000000 | ea << 23 | ma & 0x7FFFFF; return *(float *) &r; } /* float32 multiply */ float fmul32(float a, float b) { int32_t ia = *(int32_t *) &a, ib = *(int32_t *) &b; /* sign */ int sa = ia >> 31; int sb = ib >> 31; /* mantissa */ int32_t ma = (ia & 0x7FFFFF) | 0x800000; int32_t mb = (ib & 0x7FFFFF) | 0x800000; /* exponent */ int32_t ea = ((ia >> 23) & 0xFF); int32_t eb = ((ib >> 23) & 0xFF); /* 'r' = result */ int32_t mrtmp = imul32(ma, mb); int mshift = getbit(mrtmp, 24); int32_t mr = mrtmp >> mshift; int32_t ertmp = ea + eb - 127; // int32_t er = mshift ? inc(ertmp) : ertmp; int32_t er = mshift + ertmp; int sr = sa ^ sb; int32_t r = (sr << 31) | ((er & 0xFF) << 23) | (mr & 0x7FFFFF); return *(float *) &r; } int main(){ float image[3][3][3] = {{{0.90251149,0.03265091,0.8831173},{0.2139775,0.0737501,0.0399187},{0.21527551,0.8881527,0.7846363}}, {{0.938326,0.64254336,0.0461617},{0.1413221,0.3307385,0.2508785},{0.3833867,0.689476,0.41071482}}, {{0.8925364,0.1480669,0.6812473},{0.9288288,0.23190344,0.3070017},{0.6414362,0.34707349,0.5142535}}}; float grayscale_image[3][3]; for(int i=0;i<3;i=i+1){ for(int j=0;j<3;j=j+1){ grayscale_image[i][j] = unsigned_fadd32(unsigned_fadd32(fmul32(image[i][j][0], 0.299) , fmul32(image[i][j][1], 0.587)) , fmul32(image[i][j][2], 0.114)); } } for(int i=0;i<3;i=i+1){ for(int j=0;j<3;j=j+1){ printf("%f ",grayscale_image[i][j]); } printf("\n"); } } ``` ::: ### Modified code ::: spoiler Modified C code ```c #include <stdint.h> #include <stdio.h> #include <string.h> extern uint64_t get_cycles(); extern uint64_t get_instret(); void swap(int32_t *x, int32_t *y){ int32_t t = *y; *y = *x; *x = t; return; } static inline int32_t getbit(int32_t value, int n) { return (value >> n) & 1; } /* int32 multiply */ int32_t imul32(int32_t a, int32_t b) { int32_t r = 0; while(b != 0){ if (b & 1){ r += a; } b = b >> 1; r = r >> 1; } r = r << 1; return r; } uint32_t count_leading_zeros(uint32_t x) { x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); /* count ones (population count) */ x -= ((x >> 1) & 0x55555555); x = ((x >> 2) & 0x33333333) + (x & 0x33333333); x = ((x >> 4) + x) & 0x0F0F0F0F; x += (x >> 8); x += (x >> 16); return (32 - (x & 0x7F)); } float unsigned_fadd32(float a,float b){ int32_t ia = *(int32_t *)&a, ib = *(int32_t *)&b; int32_t a_tmp = ia & 0x7FFFFFFF; int32_t b_tmp = ib & 0x7FFFFFFF; if (a_tmp < b_tmp) swap(&ia, &ib); /* mantissa */ int32_t ma = ia & 0x7FFFFF | 0x800000; int32_t mb = ib & 0x7FFFFF | 0x800000; /* exponent */ int32_t ea = (ia >> 23) & 0xFF; int32_t eb = (ib >> 23) & 0xFF; int32_t align = (ea - eb > 24) ? 24 : (ea - eb); mb >>= align; if ((ia ^ ib) >> 31) { ma -= mb; } else { ma += mb; } int32_t clz = count_leading_zeros(ma); int32_t shift = 0; if (clz <= 8) { shift = 8 - clz; ma >>= shift; ea += shift; } else { shift = clz - 8; ma <<= shift; ea -= shift; } int32_t r = ia & 0x80000000 | ea << 23 | ma & 0x7FFFFF; return *(float *) &r; } /* float32 multiply */ float fmul32(float a, float b) { int32_t ia = *(int32_t *) &a, ib = *(int32_t *) &b; /* sign */ int sa = ia >> 31; int sb = ib >> 31; /* mantissa */ int32_t ma = (ia & 0x7FFFFF) | 0x800000; int32_t mb = (ib & 0x7FFFFF) | 0x800000; /* exponent */ int32_t ea = ((ia >> 23) & 0xFF); int32_t eb = ((ib >> 23) & 0xFF); /* 'r' = result */ int32_t mrtmp = imul32(ma, mb); int mshift = getbit(mrtmp, 24); int32_t mr = mrtmp >> mshift; int32_t ertmp = ea + eb - 127; // int32_t er = mshift ? inc(ertmp) : ertmp; int32_t er = mshift + ertmp; int sr = sa ^ sb; int32_t r = (sr << 31) | ((er & 0xFF) << 23) | (mr & 0x7FFFFF); return *(float *) &r; } /* * Taken from the Sparkle-suite which is a collection of lightweight symmetric * cryptographic algorithms currently in the final round of the NIST * standardization effort. * See https://sparkle-lwc.github.io/ */ #define WORDS 12 #define ROUNDS 7 int main(void) { unsigned int state[WORDS] = {0}; /* measure cycles */ uint64_t instret = get_instret(); uint64_t oldcount = get_cycles(); float image[3][3][3] = {{{0.90251149,0.03265091,0.8831173},{0.2139775,0.0737501,0.0399187},{0.21527551,0.8881527,0.7846363}}, {{0.938326,0.64254336,0.0461617},{0.1413221,0.3307385,0.2508785},{0.3833867,0.689476,0.41071482}}, {{0.8925364,0.1480669,0.6812473},{0.9288288,0.23190344,0.3070017},{0.6414362,0.34707349,0.5142535}}}; float grayscale_image[3][3]; for(int i=0;i<3;i=i+1){ for(int j=0;j<3;j=j+1){ grayscale_image[i][j] = unsigned_fadd32(unsigned_fadd32(fmul32(image[i][j][0], 0.299) , fmul32(image[i][j][1], 0.587)) , fmul32(image[i][j][2], 0.114)); } } for(int i=0;i<3;i=i+1){ for(int j=0;j<3;j=j+1){ printf("%f ",grayscale_image[i][j]); } printf("\n"); } uint64_t cyclecount = get_cycles() - oldcount; printf("cycle count: %u\n", (unsigned int) cyclecount); printf("instret: %x\n", (unsigned) (instret & 0xffffffff)); memset(state, 0, WORDS * sizeof(uint32_t)); return 0; } ``` ::: ## **Compare Assembly Code** ### O1 Optimized Assembly Code :::spoiler Assembly Code ```c 000100b0 <main>: 100b0: f2010113 add sp,sp,-224 100b4: 0c112e23 sw ra,220(sp) 100b8: 0b712e23 sw s7,188(sp) 100bc: 0c812c23 sw s0,216(sp) 100c0: 0c912a23 sw s1,212(sp) 100c4: 0d212823 sw s2,208(sp) 100c8: 0d312623 sw s3,204(sp) 100cc: 0d412423 sw s4,200(sp) 100d0: 0d512223 sw s5,196(sp) 100d4: 0d612023 sw s6,192(sp) 100d8: 0b812c23 sw s8,184(sp) 100dc: 0b912a23 sw s9,180(sp) 100e0: 0ba12823 sw s10,176(sp) 100e4: 0bb12623 sw s11,172(sp) 100e8: 248000ef jal 10330 <get_instret> 100ec: 00a12623 sw a0,12(sp) 100f0: 22c000ef jal 1031c <get_cycles> 100f4: 0001c7b7 lui a5,0x1c 100f8: 31478793 add a5,a5,788 # 1c314 <__trunctfdf2+0x5b4> 100fc: 00050b93 mv s7,a0 10100: 03410713 add a4,sp,52 10104: 06478893 add a7,a5,100 10108: 0007a803 lw a6,0(a5) 1010c: 0047a503 lw a0,4(a5) 10110: 0087a583 lw a1,8(a5) 10114: 00c7a603 lw a2,12(a5) 10118: 0107a683 lw a3,16(a5) 1011c: 01072023 sw a6,0(a4) 10120: 00a72223 sw a0,4(a4) 10124: 00b72423 sw a1,8(a4) 10128: 00c72623 sw a2,12(a4) 1012c: 00d72823 sw a3,16(a4) 10130: 01478793 add a5,a5,20 10134: 01470713 add a4,a4,20 10138: fd1798e3 bne a5,a7,10108 <main+0x58> 1013c: f301ad03 lw s10,-208(gp) # 1d740 <__SDATA_BEGIN__+0x68> 10140: 0007a683 lw a3,0(a5) 10144: f341ac83 lw s9,-204(gp) # 1d744 <__SDATA_BEGIN__+0x6c> 10148: 0047a783 lw a5,4(a5) 1014c: f381ac03 lw s8,-200(gp) # 1d748 <__SDATA_BEGIN__+0x70> 10150: 01010993 add s3,sp,16 10154: 00d72023 sw a3,0(a4) 10158: 00f72223 sw a5,4(a4) 1015c: 03410913 add s2,sp,52 10160: 00098a93 mv s5,s3 10164: 00300a13 li s4,3 10168: 000a8b13 mv s6,s5 1016c: 00090413 mv s0,s2 10170: 00000d93 li s11,0 10174: 00042503 lw a0,0(s0) 10178: 000d0593 mv a1,s10 1017c: 001d8d93 add s11,s11,1 10180: 37c000ef jal 104fc <fmul32> 10184: 00050493 mv s1,a0 10188: 00442503 lw a0,4(s0) 1018c: 000c8593 mv a1,s9 10190: 00c40413 add s0,s0,12 10194: 368000ef jal 104fc <fmul32> 10198: 00050593 mv a1,a0 1019c: 00048513 mv a0,s1 101a0: 274000ef jal 10414 <unsigned_fadd32> 101a4: 00050493 mv s1,a0 101a8: ffc42503 lw a0,-4(s0) 101ac: 000c0593 mv a1,s8 101b0: 004b0b13 add s6,s6,4 101b4: 348000ef jal 104fc <fmul32> 101b8: 00050593 mv a1,a0 101bc: 00048513 mv a0,s1 101c0: 254000ef jal 10414 <unsigned_fadd32> 101c4: feab2e23 sw a0,-4(s6) 101c8: fb4d96e3 bne s11,s4,10174 <main+0xc4> 101cc: 02490913 add s2,s2,36 101d0: 0a010793 add a5,sp,160 101d4: 00ca8a93 add s5,s5,12 101d8: f8f918e3 bne s2,a5,10168 <main+0xb8> 101dc: 02498a93 add s5,s3,36 101e0: 0001ca37 lui s4,0x1c 101e4: 00300913 li s2,3 101e8: 00098493 mv s1,s3 101ec: 00000413 li s0,0 101f0: 0004a503 lw a0,0(s1) 101f4: 00140413 add s0,s0,1 101f8: 00448493 add s1,s1,4 101fc: 38c000ef jal 10588 <__extendsfdf2> 10200: 00050613 mv a2,a0 10204: 00058693 mv a3,a1 10208: 010a0513 add a0,s4,16 # 1c010 <__trunctfdf2+0x2b0> 1020c: 06d000ef jal 10a78 <printf> 10210: ff2410e3 bne s0,s2,101f0 <main+0x140> 10214: 00a00513 li a0,10 10218: 00c98993 add s3,s3,12 1021c: 08f000ef jal 10aaa <putchar> 10220: fd5994e3 bne s3,s5,101e8 <main+0x138> 10224: 0f8000ef jal 1031c <get_cycles> 10228: 417505b3 sub a1,a0,s7 1022c: 0001c537 lui a0,0x1c 10230: 01450513 add a0,a0,20 # 1c014 <__trunctfdf2+0x2b4> 10234: 045000ef jal 10a78 <printf> 10238: 00c12583 lw a1,12(sp) 1023c: 0001c537 lui a0,0x1c 10240: 02850513 add a0,a0,40 # 1c028 <__trunctfdf2+0x2c8> 10244: 035000ef jal 10a78 <printf> 10248: 0dc12083 lw ra,220(sp) 1024c: 0d812403 lw s0,216(sp) 10250: 0d412483 lw s1,212(sp) 10254: 0d012903 lw s2,208(sp) 10258: 0cc12983 lw s3,204(sp) 1025c: 0c812a03 lw s4,200(sp) 10260: 0c412a83 lw s5,196(sp) 10264: 0c012b03 lw s6,192(sp) 10268: 0bc12b83 lw s7,188(sp) 1026c: 0b812c03 lw s8,184(sp) 10270: 0b412c83 lw s9,180(sp) 10274: 0b012d03 lw s10,176(sp) 10278: 0ac12d83 lw s11,172(sp) 1027c: 00000513 li a0,0 10280: 0e010113 add sp,sp,224 10284: 00008067 ret 00010344 <swap>: 10344: 00052703 lw a4,0(a0) 10348: 0005a783 lw a5,0(a1) 1034c: 00e5a023 sw a4,0(a1) 10350: 00f52023 sw a5,0(a0) 10354: 00008067 ret 00010358 <imul32>: 10358: 02058663 beqz a1,10384 <imul32+0x2c> 1035c: 00000713 li a4,0 10360: 0015f693 and a3,a1,1 10364: 00070793 mv a5,a4 10368: 4015d593 sra a1,a1,0x1 1036c: 00068463 beqz a3,10374 <imul32+0x1c> 10370: 00e507b3 add a5,a0,a4 10374: 4017d713 sra a4,a5,0x1 10378: fe0594e3 bnez a1,10360 <imul32+0x8> 1037c: ffe7f513 and a0,a5,-2 10380: 00008067 ret 10384: 00000513 li a0,0 10388: 00008067 ret 0001038c <count_leading_zeros>: 1038c: 00155793 srl a5,a0,0x1 10390: 00a7e533 or a0,a5,a0 10394: 00255793 srl a5,a0,0x2 10398: 00a7e7b3 or a5,a5,a0 1039c: 0047d513 srl a0,a5,0x4 103a0: 00f56533 or a0,a0,a5 103a4: 00855713 srl a4,a0,0x8 103a8: 00a76733 or a4,a4,a0 103ac: 01075793 srl a5,a4,0x10 103b0: 00e7e7b3 or a5,a5,a4 103b4: 555556b7 lui a3,0x55555 103b8: 0017d713 srl a4,a5,0x1 103bc: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x555377f9> 103c0: 00d77733 and a4,a4,a3 103c4: 40e787b3 sub a5,a5,a4 103c8: 333336b7 lui a3,0x33333 103cc: 33368693 add a3,a3,819 # 33333333 <__BSS_END__+0x333155d7> 103d0: 0027d713 srl a4,a5,0x2 103d4: 00d77733 and a4,a4,a3 103d8: 00d7f7b3 and a5,a5,a3 103dc: 00f70733 add a4,a4,a5 103e0: 00475793 srl a5,a4,0x4 103e4: 0f0f16b7 lui a3,0xf0f1 103e8: 00e787b3 add a5,a5,a4 103ec: f0f68693 add a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d31b3> 103f0: 00d7f7b3 and a5,a5,a3 103f4: 0087d713 srl a4,a5,0x8 103f8: 00f70733 add a4,a4,a5 103fc: 01075793 srl a5,a4,0x10 10400: 00e787b3 add a5,a5,a4 10404: 07f7f793 and a5,a5,127 10408: 02000513 li a0,32 1040c: 40f50533 sub a0,a0,a5 10410: 00008067 ret 00010414 <unsigned_fadd32>: 10414: 800007b7 lui a5,0x80000 10418: ff010113 add sp,sp,-16 1041c: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe22a3> 10420: 00a7f733 and a4,a5,a0 10424: 00112623 sw ra,12(sp) 10428: 00812423 sw s0,8(sp) 1042c: 00912223 sw s1,4(sp) 10430: 01212023 sw s2,0(sp) 10434: 00b7f7b3 and a5,a5,a1 10438: 0af74463 blt a4,a5,104e0 <unsigned_fadd32+0xcc> 1043c: 00050913 mv s2,a0 10440: 00058693 mv a3,a1 10444: 008005b7 lui a1,0x800 10448: 41795413 sra s0,s2,0x17 1044c: 4176d793 sra a5,a3,0x17 10450: fff58713 add a4,a1,-1 # 7fffff <__BSS_END__+0x7e22a3> 10454: 0ff47413 zext.b s0,s0 10458: 0ff7f793 zext.b a5,a5 1045c: 00e97633 and a2,s2,a4 10460: 40f407b3 sub a5,s0,a5 10464: 00e6f733 and a4,a3,a4 10468: 01800513 li a0,24 1046c: 00b66633 or a2,a2,a1 10470: 00b76733 or a4,a4,a1 10474: 00f55463 bge a0,a5,1047c <unsigned_fadd32+0x68> 10478: 01800793 li a5,24 1047c: 40f75733 sra a4,a4,a5 10480: 00d946b3 xor a3,s2,a3 10484: 00e604b3 add s1,a2,a4 10488: 0006d463 bgez a3,10490 <unsigned_fadd32+0x7c> 1048c: 40e604b3 sub s1,a2,a4 10490: 00048513 mv a0,s1 10494: ef9ff0ef jal 1038c <count_leading_zeros> 10498: 00800793 li a5,8 1049c: 04a7c863 blt a5,a0,104ec <unsigned_fadd32+0xd8> 104a0: 40a787b3 sub a5,a5,a0 104a4: 40f4d4b3 sra s1,s1,a5 104a8: 00f40433 add s0,s0,a5 104ac: 00949493 sll s1,s1,0x9 104b0: 0094d493 srl s1,s1,0x9 104b4: 01741413 sll s0,s0,0x17 104b8: 800007b7 lui a5,0x80000 104bc: 00946433 or s0,s0,s1 104c0: 00f97533 and a0,s2,a5 104c4: 00c12083 lw ra,12(sp) 104c8: 00a46533 or a0,s0,a0 104cc: 00812403 lw s0,8(sp) 104d0: 00412483 lw s1,4(sp) 104d4: 00012903 lw s2,0(sp) 104d8: 01010113 add sp,sp,16 104dc: 00008067 ret 104e0: 00050693 mv a3,a0 104e4: 00058913 mv s2,a1 104e8: f5dff06f j 10444 <unsigned_fadd32+0x30> 104ec: ff850513 add a0,a0,-8 104f0: 00a494b3 sll s1,s1,a0 104f4: 40a40433 sub s0,s0,a0 104f8: fb5ff06f j 104ac <unsigned_fadd32+0x98> 000104fc <fmul32>: 104fc: 008006b7 lui a3,0x800 10500: fff68793 add a5,a3,-1 # 7fffff <__BSS_END__+0x7e22a3> 10504: 00a7f8b3 and a7,a5,a0 10508: 41755713 sra a4,a0,0x17 1050c: 00b7f7b3 and a5,a5,a1 10510: 4175d313 sra t1,a1,0x17 10514: 00d8e8b3 or a7,a7,a3 10518: 00d7e7b3 or a5,a5,a3 1051c: 0ff77813 zext.b a6,a4 10520: 0ff37313 zext.b t1,t1 10524: 00000693 li a3,0 10528: 0017f613 and a2,a5,1 1052c: 00068713 mv a4,a3 10530: 4017d793 sra a5,a5,0x1 10534: 00060463 beqz a2,1053c <fmul32+0x40> 10538: 00d88733 add a4,a7,a3 1053c: 40175693 sra a3,a4,0x1 10540: fe0794e3 bnez a5,10528 <fmul32+0x2c> 10544: 41875693 sra a3,a4,0x18 10548: ffe77793 and a5,a4,-2 1054c: 00680733 add a4,a6,t1 10550: 40d7d7b3 sra a5,a5,a3 10554: f8170713 add a4,a4,-127 10558: 00d70733 add a4,a4,a3 1055c: 00a5c533 xor a0,a1,a0 10560: 800006b7 lui a3,0x80000 10564: 00979793 sll a5,a5,0x9 10568: 00d57533 and a0,a0,a3 1056c: 0097d793 srl a5,a5,0x9 10570: 7f8006b7 lui a3,0x7f800 10574: 01771713 sll a4,a4,0x17 10578: 00d77733 and a4,a4,a3 1057c: 00f56533 or a0,a0,a5 10580: 00a76533 or a0,a4,a0 10584: 00008067 ret ``` ::: ### elf size ``` text data bss dec hex filename 52702 1888 1528 56118 db36 main.elf ``` ### elf header ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x100c2 Start of program headers: 52 (bytes into file) Start of section headers: 69836 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` ### execute ``` 0.389692 0.111821 0.675161 0.662995 0.264999 0.566176 0.431446 0.448845 0.454146 cycle count: 58723 instret: 2de inferior exit code 0 ``` :::danger Avoid using screenshots that solely contain plain text. Here are the reasons why: 1. Text-based content is more efficiently searchable than having to browse through images iteratively. 2. The rendering engine of HackMD can consistently generate well-structured layouts with annotated text instead of relying on arbitrary pictures. 3. It provides a more accessible and user-friendly experience for individuals with visual impairments. :notes: jserv ::: ### O2 Optimized Assembly code :::spoiler Assembly Code ```c 000100b0 <main>: 100b0: f2010113 add sp,sp,-224 100b4: 0c112e23 sw ra,220(sp) 100b8: 0b712e23 sw s7,188(sp) 100bc: 0c812c23 sw s0,216(sp) 100c0: 0c912a23 sw s1,212(sp) 100c4: 0d212823 sw s2,208(sp) 100c8: 0d312623 sw s3,204(sp) 100cc: 0d412423 sw s4,200(sp) 100d0: 0d512223 sw s5,196(sp) 100d4: 0d612023 sw s6,192(sp) 100d8: 0b812c23 sw s8,184(sp) 100dc: 0b912a23 sw s9,180(sp) 100e0: 0ba12823 sw s10,176(sp) 100e4: 0bb12623 sw s11,172(sp) 100e8: 248000ef jal 10330 <get_instret> 100ec: 00a12623 sw a0,12(sp) 100f0: 22c000ef jal 1031c <get_cycles> 100f4: 0001c7b7 lui a5,0x1c 100f8: 31478793 add a5,a5,788 # 1c314 <__trunctfdf2+0x5b4> 100fc: 00050b93 mv s7,a0 10100: 03410713 add a4,sp,52 10104: 06478893 add a7,a5,100 10108: 0007a803 lw a6,0(a5) 1010c: 0047a503 lw a0,4(a5) 10110: 0087a583 lw a1,8(a5) 10114: 00c7a603 lw a2,12(a5) 10118: 0107a683 lw a3,16(a5) 1011c: 01072023 sw a6,0(a4) 10120: 00a72223 sw a0,4(a4) 10124: 00b72423 sw a1,8(a4) 10128: 00c72623 sw a2,12(a4) 1012c: 00d72823 sw a3,16(a4) 10130: 01478793 add a5,a5,20 10134: 01470713 add a4,a4,20 10138: fd1798e3 bne a5,a7,10108 <main+0x58> 1013c: f301ad03 lw s10,-208(gp) # 1d740 <__SDATA_BEGIN__+0x68> 10140: 0007a683 lw a3,0(a5) 10144: f341ac83 lw s9,-204(gp) # 1d744 <__SDATA_BEGIN__+0x6c> 10148: 0047a783 lw a5,4(a5) 1014c: f381ac03 lw s8,-200(gp) # 1d748 <__SDATA_BEGIN__+0x70> 10150: 01010993 add s3,sp,16 10154: 00d72023 sw a3,0(a4) 10158: 00f72223 sw a5,4(a4) 1015c: 03410913 add s2,sp,52 10160: 00098a93 mv s5,s3 10164: 00300a13 li s4,3 10168: 000a8b13 mv s6,s5 1016c: 00090413 mv s0,s2 10170: 00000d93 li s11,0 10174: 00042503 lw a0,0(s0) 10178: 000d0593 mv a1,s10 1017c: 001d8d93 add s11,s11,1 10180: 37c000ef jal 104fc <fmul32> 10184: 00050493 mv s1,a0 10188: 00442503 lw a0,4(s0) 1018c: 000c8593 mv a1,s9 10190: 00c40413 add s0,s0,12 10194: 368000ef jal 104fc <fmul32> 10198: 00050593 mv a1,a0 1019c: 00048513 mv a0,s1 101a0: 274000ef jal 10414 <unsigned_fadd32> 101a4: 00050493 mv s1,a0 101a8: ffc42503 lw a0,-4(s0) 101ac: 000c0593 mv a1,s8 101b0: 004b0b13 add s6,s6,4 101b4: 348000ef jal 104fc <fmul32> 101b8: 00050593 mv a1,a0 101bc: 00048513 mv a0,s1 101c0: 254000ef jal 10414 <unsigned_fadd32> 101c4: feab2e23 sw a0,-4(s6) 101c8: fb4d96e3 bne s11,s4,10174 <main+0xc4> 101cc: 02490913 add s2,s2,36 101d0: 0a010793 add a5,sp,160 101d4: 00ca8a93 add s5,s5,12 101d8: f8f918e3 bne s2,a5,10168 <main+0xb8> 101dc: 02498a93 add s5,s3,36 101e0: 0001ca37 lui s4,0x1c 101e4: 00300913 li s2,3 101e8: 00098493 mv s1,s3 101ec: 00000413 li s0,0 101f0: 0004a503 lw a0,0(s1) 101f4: 00140413 add s0,s0,1 101f8: 00448493 add s1,s1,4 101fc: 38c000ef jal 10588 <__extendsfdf2> 10200: 00050613 mv a2,a0 10204: 00058693 mv a3,a1 10208: 010a0513 add a0,s4,16 # 1c010 <__trunctfdf2+0x2b0> 1020c: 06d000ef jal 10a78 <printf> 10210: ff2410e3 bne s0,s2,101f0 <main+0x140> 10214: 00a00513 li a0,10 10218: 00c98993 add s3,s3,12 1021c: 08f000ef jal 10aaa <putchar> 10220: fd5994e3 bne s3,s5,101e8 <main+0x138> 10224: 0f8000ef jal 1031c <get_cycles> 10228: 417505b3 sub a1,a0,s7 1022c: 0001c537 lui a0,0x1c 10230: 01450513 add a0,a0,20 # 1c014 <__trunctfdf2+0x2b4> 10234: 045000ef jal 10a78 <printf> 10238: 00c12583 lw a1,12(sp) 1023c: 0001c537 lui a0,0x1c 10240: 02850513 add a0,a0,40 # 1c028 <__trunctfdf2+0x2c8> 10244: 035000ef jal 10a78 <printf> 10248: 0dc12083 lw ra,220(sp) 1024c: 0d812403 lw s0,216(sp) 10250: 0d412483 lw s1,212(sp) 10254: 0d012903 lw s2,208(sp) 10258: 0cc12983 lw s3,204(sp) 1025c: 0c812a03 lw s4,200(sp) 10260: 0c412a83 lw s5,196(sp) 10264: 0c012b03 lw s6,192(sp) 10268: 0bc12b83 lw s7,188(sp) 1026c: 0b812c03 lw s8,184(sp) 10270: 0b412c83 lw s9,180(sp) 10274: 0b012d03 lw s10,176(sp) 10278: 0ac12d83 lw s11,172(sp) 1027c: 00000513 li a0,0 10280: 0e010113 add sp,sp,224 10284: 00008067 ret 00010344 <swap>: 10344: 00052703 lw a4,0(a0) 10348: 0005a783 lw a5,0(a1) 1034c: 00e5a023 sw a4,0(a1) 10350: 00f52023 sw a5,0(a0) 10354: 00008067 ret 00010358 <imul32>: 10358: 02058663 beqz a1,10384 <imul32+0x2c> 1035c: 00000713 li a4,0 10360: 0015f693 and a3,a1,1 10364: 00070793 mv a5,a4 10368: 4015d593 sra a1,a1,0x1 1036c: 00068463 beqz a3,10374 <imul32+0x1c> 10370: 00e507b3 add a5,a0,a4 10374: 4017d713 sra a4,a5,0x1 10378: fe0594e3 bnez a1,10360 <imul32+0x8> 1037c: ffe7f513 and a0,a5,-2 10380: 00008067 ret 10384: 00000513 li a0,0 10388: 00008067 ret 0001038c <count_leading_zeros>: 1038c: 00155793 srl a5,a0,0x1 10390: 00a7e533 or a0,a5,a0 10394: 00255793 srl a5,a0,0x2 10398: 00a7e7b3 or a5,a5,a0 1039c: 0047d513 srl a0,a5,0x4 103a0: 00f56533 or a0,a0,a5 103a4: 00855713 srl a4,a0,0x8 103a8: 00a76733 or a4,a4,a0 103ac: 01075793 srl a5,a4,0x10 103b0: 00e7e7b3 or a5,a5,a4 103b4: 555556b7 lui a3,0x55555 103b8: 0017d713 srl a4,a5,0x1 103bc: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x555377f9> 103c0: 00d77733 and a4,a4,a3 103c4: 40e787b3 sub a5,a5,a4 103c8: 333336b7 lui a3,0x33333 103cc: 33368693 add a3,a3,819 # 33333333 <__BSS_END__+0x333155d7> 103d0: 0027d713 srl a4,a5,0x2 103d4: 00d77733 and a4,a4,a3 103d8: 00d7f7b3 and a5,a5,a3 103dc: 00f70733 add a4,a4,a5 103e0: 00475793 srl a5,a4,0x4 103e4: 0f0f16b7 lui a3,0xf0f1 103e8: 00e787b3 add a5,a5,a4 103ec: f0f68693 add a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d31b3> 103f0: 00d7f7b3 and a5,a5,a3 103f4: 0087d713 srl a4,a5,0x8 103f8: 00f70733 add a4,a4,a5 103fc: 01075793 srl a5,a4,0x10 10400: 00e787b3 add a5,a5,a4 10404: 07f7f793 and a5,a5,127 10408: 02000513 li a0,32 1040c: 40f50533 sub a0,a0,a5 10410: 00008067 ret 00010414 <unsigned_fadd32>: 10414: 800007b7 lui a5,0x80000 10418: ff010113 add sp,sp,-16 1041c: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe22a3> 10420: 00a7f733 and a4,a5,a0 10424: 00112623 sw ra,12(sp) 10428: 00812423 sw s0,8(sp) 1042c: 00912223 sw s1,4(sp) 10430: 01212023 sw s2,0(sp) 10434: 00b7f7b3 and a5,a5,a1 10438: 0af74463 blt a4,a5,104e0 <unsigned_fadd32+0xcc> 1043c: 00050913 mv s2,a0 10440: 00058693 mv a3,a1 10444: 008005b7 lui a1,0x800 10448: 41795413 sra s0,s2,0x17 1044c: 4176d793 sra a5,a3,0x17 10450: fff58713 add a4,a1,-1 # 7fffff <__BSS_END__+0x7e22a3> 10454: 0ff47413 zext.b s0,s0 10458: 0ff7f793 zext.b a5,a5 1045c: 00e97633 and a2,s2,a4 10460: 40f407b3 sub a5,s0,a5 10464: 00e6f733 and a4,a3,a4 10468: 01800513 li a0,24 1046c: 00b66633 or a2,a2,a1 10470: 00b76733 or a4,a4,a1 10474: 00f55463 bge a0,a5,1047c <unsigned_fadd32+0x68> 10478: 01800793 li a5,24 1047c: 40f75733 sra a4,a4,a5 10480: 00d946b3 xor a3,s2,a3 10484: 00e604b3 add s1,a2,a4 10488: 0006d463 bgez a3,10490 <unsigned_fadd32+0x7c> 1048c: 40e604b3 sub s1,a2,a4 10490: 00048513 mv a0,s1 10494: ef9ff0ef jal 1038c <count_leading_zeros> 10498: 00800793 li a5,8 1049c: 04a7c863 blt a5,a0,104ec <unsigned_fadd32+0xd8> 104a0: 40a787b3 sub a5,a5,a0 104a4: 40f4d4b3 sra s1,s1,a5 104a8: 00f40433 add s0,s0,a5 104ac: 00949493 sll s1,s1,0x9 104b0: 0094d493 srl s1,s1,0x9 104b4: 01741413 sll s0,s0,0x17 104b8: 800007b7 lui a5,0x80000 104bc: 00946433 or s0,s0,s1 104c0: 00f97533 and a0,s2,a5 104c4: 00c12083 lw ra,12(sp) 104c8: 00a46533 or a0,s0,a0 104cc: 00812403 lw s0,8(sp) 104d0: 00412483 lw s1,4(sp) 104d4: 00012903 lw s2,0(sp) 104d8: 01010113 add sp,sp,16 104dc: 00008067 ret 104e0: 00050693 mv a3,a0 104e4: 00058913 mv s2,a1 104e8: f5dff06f j 10444 <unsigned_fadd32+0x30> 104ec: ff850513 add a0,a0,-8 104f0: 00a494b3 sll s1,s1,a0 104f4: 40a40433 sub s0,s0,a0 104f8: fb5ff06f j 104ac <unsigned_fadd32+0x98> 000104fc <fmul32>: 104fc: 008006b7 lui a3,0x800 10500: fff68793 add a5,a3,-1 # 7fffff <__BSS_END__+0x7e22a3> 10504: 00a7f8b3 and a7,a5,a0 10508: 41755713 sra a4,a0,0x17 1050c: 00b7f7b3 and a5,a5,a1 10510: 4175d313 sra t1,a1,0x17 10514: 00d8e8b3 or a7,a7,a3 10518: 00d7e7b3 or a5,a5,a3 1051c: 0ff77813 zext.b a6,a4 10520: 0ff37313 zext.b t1,t1 10524: 00000693 li a3,0 10528: 0017f613 and a2,a5,1 1052c: 00068713 mv a4,a3 10530: 4017d793 sra a5,a5,0x1 10534: 00060463 beqz a2,1053c <fmul32+0x40> 10538: 00d88733 add a4,a7,a3 1053c: 40175693 sra a3,a4,0x1 10540: fe0794e3 bnez a5,10528 <fmul32+0x2c> 10544: 41875693 sra a3,a4,0x18 10548: ffe77793 and a5,a4,-2 1054c: 00680733 add a4,a6,t1 10550: 40d7d7b3 sra a5,a5,a3 10554: f8170713 add a4,a4,-127 10558: 00d70733 add a4,a4,a3 1055c: 00a5c533 xor a0,a1,a0 10560: 800006b7 lui a3,0x80000 10564: 00979793 sll a5,a5,0x9 10568: 00d57533 and a0,a0,a3 1056c: 0097d793 srl a5,a5,0x9 10570: 7f8006b7 lui a3,0x7f800 10574: 01771713 sll a4,a4,0x17 10578: 00d77733 and a4,a4,a3 1057c: 00f56533 or a0,a0,a5 10580: 00a76533 or a0,a4,a0 10584: 00008067 ret ``` ::: ### elf size ``` text data bss dec hex filename 52674 1888 1528 56090 db1a main.elf ``` ### elf header ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x1029a Start of program headers: 52 (bytes into file) Start of section headers: 69852 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` ### execute ``` 0.389692 0.111821 0.675161 0.662995 0.264999 0.566176 0.431446 0.448845 0.454146 cycle count: 58610 instret: 2de inferior exit code 0 ``` ### O3 Optimized Assembly code :::spoiler Assembly Code ```c 000100b0 <main>: 100b0: f2010113 add sp,sp,-224 100b4: 0c112e23 sw ra,220(sp) 100b8: 0c812c23 sw s0,216(sp) 100bc: 0c912a23 sw s1,212(sp) 100c0: 0d212823 sw s2,208(sp) 100c4: 0d312623 sw s3,204(sp) 100c8: 0d412423 sw s4,200(sp) 100cc: 0d512223 sw s5,196(sp) 100d0: 0d612023 sw s6,192(sp) 100d4: 0b712e23 sw s7,188(sp) 100d8: 0b812c23 sw s8,184(sp) 100dc: 0b912a23 sw s9,180(sp) 100e0: 0ba12823 sw s10,176(sp) 100e4: 0bb12623 sw s11,172(sp) 100e8: 3cc000ef jal 104b4 <get_instret> 100ec: 00a12423 sw a0,8(sp) 100f0: 3b0000ef jal 104a0 <get_cycles> 100f4: 0001c7b7 lui a5,0x1c 100f8: 51c78793 add a5,a5,1308 # 1c51c <__trunctfdf2+0x5b0> 100fc: 00a12623 sw a0,12(sp) 10100: 03410713 add a4,sp,52 10104: 06478893 add a7,a5,100 10108: 0007a803 lw a6,0(a5) 1010c: 0047a503 lw a0,4(a5) 10110: 0087a583 lw a1,8(a5) 10114: 00c7a603 lw a2,12(a5) 10118: 0107a683 lw a3,16(a5) 1011c: 01072023 sw a6,0(a4) 10120: 00a72223 sw a0,4(a4) 10124: 00b72423 sw a1,8(a4) 10128: 00c72623 sw a2,12(a4) 1012c: 00d72823 sw a3,16(a4) 10130: 01478793 add a5,a5,20 10134: 01470713 add a4,a4,20 10138: fd1798e3 bne a5,a7,10108 <main+0x58> 1013c: 0007a683 lw a3,0(a5) 10140: 0047a783 lw a5,4(a5) 10144: 01010b13 add s6,sp,16 10148: 00800937 lui s2,0x800 1014c: 3e991cb7 lui s9,0x3e991 10150: 3f164c37 lui s8,0x3f164 10154: 3de98bb7 lui s7,0x3de98 10158: 00d72023 sw a3,0(a4) 1015c: 00f72223 sw a5,4(a4) 10160: 03410a93 add s5,sp,52 10164: 01612223 sw s6,4(sp) 10168: fff90d93 add s11,s2,-1 # 7fffff <__BSS_END__+0x7e1247> 1016c: 687c8c93 add s9,s9,1671 # 3e991687 <__BSS_END__+0x3e9728cf> 10170: 80000a37 lui s4,0x80000 10174: 7f8009b7 lui s3,0x7f800 10178: 5a2c0c13 add s8,s8,1442 # 3f1645a2 <__BSS_END__+0x3f1457ea> 1017c: 8d5b8b93 add s7,s7,-1835 # 3de978d5 <__BSS_END__+0x3de78b1d> 10180: 00412483 lw s1,4(sp) 10184: 000a8d13 mv s10,s5 10188: 00000413 li s0,0 1018c: 000d2e03 lw t3,0(s10) 10190: 009915b7 lui a1,0x991 10194: 01800513 li a0,24 10198: 01be7333 and t1,t3,s11 1019c: 417e5793 sra a5,t3,0x17 101a0: 01236333 or t1,t1,s2 101a4: 0ff7f893 zext.b a7,a5 101a8: 00000713 li a4,0 101ac: 68758593 add a1,a1,1671 # 991687 <__BSS_END__+0x9728cf> 101b0: 0015f793 and a5,a1,1 101b4: 00e30833 add a6,t1,a4 101b8: fff50513 add a0,a0,-1 101bc: 22078e63 beqz a5,103f8 <main+0x348> 101c0: 4015d593 sra a1,a1,0x1 101c4: 40185713 sra a4,a6,0x1 101c8: fe0514e3 bnez a0,101b0 <main+0x100> 101cc: 00080713 mv a4,a6 101d0: 41875593 sra a1,a4,0x18 101d4: 004d2303 lw t1,4(s10) 101d8: ffe77713 and a4,a4,-2 101dc: ffe88793 add a5,a7,-2 101e0: 019e4533 xor a0,t3,s9 101e4: 40b75733 sra a4,a4,a1 101e8: 00b787b3 add a5,a5,a1 101ec: 01b77733 and a4,a4,s11 101f0: 01779793 sll a5,a5,0x17 101f4: 01457533 and a0,a0,s4 101f8: 00e56533 or a0,a0,a4 101fc: 01b37eb3 and t4,t1,s11 10200: 0137f733 and a4,a5,s3 10204: 009645b7 lui a1,0x964 10208: 41735793 sra a5,t1,0x17 1020c: 00e56533 or a0,a0,a4 10210: 012eeeb3 or t4,t4,s2 10214: 0ff7fe13 zext.b t3,a5 10218: 01800813 li a6,24 1021c: 00000713 li a4,0 10220: 5a258593 add a1,a1,1442 # 9645a2 <__BSS_END__+0x9457ea> 10224: 0015f793 and a5,a1,1 10228: 00ee88b3 add a7,t4,a4 1022c: fff80813 add a6,a6,-1 10230: 1a078a63 beqz a5,103e4 <main+0x334> 10234: 4015d593 sra a1,a1,0x1 10238: 4018d713 sra a4,a7,0x1 1023c: fe0814e3 bnez a6,10224 <main+0x174> 10240: 00088713 mv a4,a7 10244: 41875813 sra a6,a4,0x18 10248: fffe0793 add a5,t3,-1 1024c: ffe77713 and a4,a4,-2 10250: 018345b3 xor a1,t1,s8 10254: 41075733 sra a4,a4,a6 10258: 010787b3 add a5,a5,a6 1025c: 01b77733 and a4,a4,s11 10260: 0145f5b3 and a1,a1,s4 10264: 01779793 sll a5,a5,0x17 10268: 00e5e5b3 or a1,a1,a4 1026c: 0137f7b3 and a5,a5,s3 10270: 00f5e5b3 or a1,a1,a5 10274: 330000ef jal 105a4 <unsigned_fadd32> 10278: 008d2e83 lw t4,8(s10) 1027c: 00e985b7 lui a1,0xe98 10280: 01800813 li a6,24 10284: 01befe33 and t3,t4,s11 10288: 417ed793 sra a5,t4,0x17 1028c: 012e6e33 or t3,t3,s2 10290: 0ff7f313 zext.b t1,a5 10294: 00000713 li a4,0 10298: 8d558593 add a1,a1,-1835 # e978d5 <__BSS_END__+0xe78b1d> 1029c: 0015f793 and a5,a1,1 102a0: 00ee08b3 add a7,t3,a4 102a4: fff80813 add a6,a6,-1 102a8: 12078463 beqz a5,103d0 <main+0x320> 102ac: 4015d593 sra a1,a1,0x1 102b0: 4018d713 sra a4,a7,0x1 102b4: fe0814e3 bnez a6,1029c <main+0x1ec> 102b8: 00088713 mv a4,a7 102bc: 41875813 sra a6,a4,0x18 102c0: ffc30793 add a5,t1,-4 102c4: ffe77713 and a4,a4,-2 102c8: 017ec5b3 xor a1,t4,s7 102cc: 41075733 sra a4,a4,a6 102d0: 010787b3 add a5,a5,a6 102d4: 01b77733 and a4,a4,s11 102d8: 0145f5b3 and a1,a1,s4 102dc: 01779793 sll a5,a5,0x17 102e0: 0137f7b3 and a5,a5,s3 102e4: 00e5e5b3 or a1,a1,a4 102e8: 00f5e5b3 or a1,a1,a5 102ec: 2b8000ef jal 105a4 <unsigned_fadd32> 102f0: 00a4a023 sw a0,0(s1) 102f4: 00140413 add s0,s0,1 102f8: 00300793 li a5,3 102fc: 00cd0d13 add s10,s10,12 10300: 00448493 add s1,s1,4 10304: e8f414e3 bne s0,a5,1018c <main+0xdc> 10308: 00412783 lw a5,4(sp) 1030c: 024a8a93 add s5,s5,36 10310: 00c78793 add a5,a5,12 10314: 00f12223 sw a5,4(sp) 10318: 0a010793 add a5,sp,160 1031c: e75792e3 bne a5,s5,10180 <main+0xd0> 10320: 024b0a13 add s4,s6,36 10324: 0001c9b7 lui s3,0x1c 10328: 00300913 li s2,3 1032c: 000b0493 mv s1,s6 10330: 00000413 li s0,0 10334: 0004a503 lw a0,0(s1) 10338: 00140413 add s0,s0,1 1033c: 00448493 add s1,s1,4 10340: 458000ef jal 10798 <__extendsfdf2> 10344: 00050613 mv a2,a0 10348: 00058693 mv a3,a1 1034c: 21898513 add a0,s3,536 # 1c218 <__trunctfdf2+0x2ac> 10350: 139000ef jal 10c88 <printf> 10354: ff2410e3 bne s0,s2,10334 <main+0x284> 10358: 00a00513 li a0,10 1035c: 00cb0b13 add s6,s6,12 10360: 15b000ef jal 10cba <putchar> 10364: fd4b14e3 bne s6,s4,1032c <main+0x27c> 10368: 138000ef jal 104a0 <get_cycles> 1036c: 00c12783 lw a5,12(sp) 10370: 40f505b3 sub a1,a0,a5 10374: 0001c537 lui a0,0x1c 10378: 21c50513 add a0,a0,540 # 1c21c <__trunctfdf2+0x2b0> 1037c: 10d000ef jal 10c88 <printf> 10380: 00812583 lw a1,8(sp) 10384: 0001c537 lui a0,0x1c 10388: 23050513 add a0,a0,560 # 1c230 <__trunctfdf2+0x2c4> 1038c: 0fd000ef jal 10c88 <printf> 10390: 0dc12083 lw ra,220(sp) 10394: 0d812403 lw s0,216(sp) 10398: 0d412483 lw s1,212(sp) 1039c: 0d012903 lw s2,208(sp) 103a0: 0cc12983 lw s3,204(sp) 103a4: 0c812a03 lw s4,200(sp) 103a8: 0c412a83 lw s5,196(sp) 103ac: 0c012b03 lw s6,192(sp) 103b0: 0bc12b83 lw s7,188(sp) 103b4: 0b812c03 lw s8,184(sp) 103b8: 0b412c83 lw s9,180(sp) 103bc: 0b012d03 lw s10,176(sp) 103c0: 0ac12d83 lw s11,172(sp) 103c4: 00000513 li a0,0 103c8: 0e010113 add sp,sp,224 103cc: 00008067 ret 103d0: 40175793 sra a5,a4,0x1 103d4: 4015d593 sra a1,a1,0x1 103d8: ee0802e3 beqz a6,102bc <main+0x20c> 103dc: 00078713 mv a4,a5 103e0: ebdff06f j 1029c <main+0x1ec> 103e4: 40175793 sra a5,a4,0x1 103e8: 4015d593 sra a1,a1,0x1 103ec: e4080ce3 beqz a6,10244 <main+0x194> 103f0: 00078713 mv a4,a5 103f4: e31ff06f j 10224 <main+0x174> 103f8: 40175793 sra a5,a4,0x1 103fc: 4015d593 sra a1,a1,0x1 10400: dc0508e3 beqz a0,101d0 <main+0x120> 10404: 00078713 mv a4,a5 10408: da9ff06f j 101b0 <main+0x100> 000104c8 <swap>: 104c8: 00052703 lw a4,0(a0) 104cc: 0005a783 lw a5,0(a1) 104d0: 00e5a023 sw a4,0(a1) 104d4: 00f52023 sw a5,0(a0) 104d8: 00008067 ret 000104dc <imul32>: 104dc: 02058c63 beqz a1,10514 <imul32+0x38> 104e0: 00000713 li a4,0 104e4: 0015f793 and a5,a1,1 104e8: 00a706b3 add a3,a4,a0 104ec: 4015d593 sra a1,a1,0x1 104f0: 00078a63 beqz a5,10504 <imul32+0x28> 104f4: 4016d713 sra a4,a3,0x1 104f8: fe0596e3 bnez a1,104e4 <imul32+0x8> 104fc: 00171513 sll a0,a4,0x1 10500: 00008067 ret 10504: 40175713 sra a4,a4,0x1 10508: fc059ee3 bnez a1,104e4 <imul32+0x8> 1050c: 00171513 sll a0,a4,0x1 10510: 00008067 ret 10514: 00000513 li a0,0 10518: 00008067 ret 0001051c <count_leading_zeros>: 1051c: 00155793 srl a5,a0,0x1 10520: 00a7e533 or a0,a5,a0 10524: 00255793 srl a5,a0,0x2 10528: 00a7e7b3 or a5,a5,a0 1052c: 0047d513 srl a0,a5,0x4 10530: 00f56533 or a0,a0,a5 10534: 00855713 srl a4,a0,0x8 10538: 00a76733 or a4,a4,a0 1053c: 01075793 srl a5,a4,0x10 10540: 00e7e7b3 or a5,a5,a4 10544: 555556b7 lui a3,0x55555 10548: 0017d713 srl a4,a5,0x1 1054c: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x5553679d> 10550: 00d77733 and a4,a4,a3 10554: 40e787b3 sub a5,a5,a4 10558: 333336b7 lui a3,0x33333 1055c: 33368693 add a3,a3,819 # 33333333 <__BSS_END__+0x3331457b> 10560: 0027d713 srl a4,a5,0x2 10564: 00d77733 and a4,a4,a3 10568: 00d7f7b3 and a5,a5,a3 1056c: 00f70733 add a4,a4,a5 10570: 00475793 srl a5,a4,0x4 10574: 0f0f16b7 lui a3,0xf0f1 10578: 00e787b3 add a5,a5,a4 1057c: f0f68693 add a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d2157> 10580: 00d7f7b3 and a5,a5,a3 10584: 0087d713 srl a4,a5,0x8 10588: 00f70733 add a4,a4,a5 1058c: 01075793 srl a5,a4,0x10 10590: 00e787b3 add a5,a5,a4 10594: 07f7f793 and a5,a5,127 10598: 02000513 li a0,32 1059c: 40f50533 sub a0,a0,a5 105a0: 00008067 ret 000105a4 <unsigned_fadd32>: 105a4: 800007b7 lui a5,0x80000 105a8: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe1247> 105ac: 00a7f733 and a4,a5,a0 105b0: 00b7f7b3 and a5,a5,a1 105b4: 00050813 mv a6,a0 105b8: 00058613 mv a2,a1 105bc: 00f74663 blt a4,a5,105c8 <unsigned_fadd32+0x24> 105c0: 00050613 mv a2,a0 105c4: 00058813 mv a6,a1 105c8: 008008b7 lui a7,0x800 105cc: 41765713 sra a4,a2,0x17 105d0: 41785793 sra a5,a6,0x17 105d4: fff88693 add a3,a7,-1 # 7fffff <__BSS_END__+0x7e1247> 105d8: 0ff77713 zext.b a4,a4 105dc: 0ff7f793 zext.b a5,a5 105e0: 00d675b3 and a1,a2,a3 105e4: 40f707b3 sub a5,a4,a5 105e8: 00d876b3 and a3,a6,a3 105ec: 01800513 li a0,24 105f0: 0115e5b3 or a1,a1,a7 105f4: 0116e6b3 or a3,a3,a7 105f8: 00f55463 bge a0,a5,10600 <unsigned_fadd32+0x5c> 105fc: 01800793 li a5,24 10600: 40f6d7b3 sra a5,a3,a5 10604: 01064833 xor a6,a2,a6 10608: 00f586b3 add a3,a1,a5 1060c: 00085463 bgez a6,10614 <unsigned_fadd32+0x70> 10610: 40f586b3 sub a3,a1,a5 10614: 0016d793 srl a5,a3,0x1 10618: 00d7e7b3 or a5,a5,a3 1061c: 0027d593 srl a1,a5,0x2 10620: 00b7e7b3 or a5,a5,a1 10624: 0047d593 srl a1,a5,0x4 10628: 00b7e7b3 or a5,a5,a1 1062c: 0087d593 srl a1,a5,0x8 10630: 00b7e7b3 or a5,a5,a1 10634: 0107d593 srl a1,a5,0x10 10638: 00b7e7b3 or a5,a5,a1 1063c: 55555537 lui a0,0x55555 10640: 0017d593 srl a1,a5,0x1 10644: 55550513 add a0,a0,1365 # 55555555 <__BSS_END__+0x5553679d> 10648: 00a5f5b3 and a1,a1,a0 1064c: 40b787b3 sub a5,a5,a1 10650: 33333537 lui a0,0x33333 10654: 33350513 add a0,a0,819 # 33333333 <__BSS_END__+0x3331457b> 10658: 0027d593 srl a1,a5,0x2 1065c: 00a5f5b3 and a1,a1,a0 10660: 00a7f7b3 and a5,a5,a0 10664: 00f585b3 add a1,a1,a5 10668: 0045d793 srl a5,a1,0x4 1066c: 0f0f1537 lui a0,0xf0f1 10670: 00b787b3 add a5,a5,a1 10674: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d2157> 10678: 00a7f7b3 and a5,a5,a0 1067c: 0087d593 srl a1,a5,0x8 10680: 00b787b3 add a5,a5,a1 10684: 0107d593 srl a1,a5,0x10 10688: 00b787b3 add a5,a5,a1 1068c: 07f7f793 and a5,a5,127 10690: 02000593 li a1,32 10694: 40f585b3 sub a1,a1,a5 10698: 00800513 li a0,8 1069c: 02b54863 blt a0,a1,106cc <unsigned_fadd32+0x128> 106a0: fe878793 add a5,a5,-24 106a4: 40f6d6b3 sra a3,a3,a5 106a8: 00f70733 add a4,a4,a5 106ac: 00969693 sll a3,a3,0x9 106b0: 0096d693 srl a3,a3,0x9 106b4: 800007b7 lui a5,0x80000 106b8: 01771713 sll a4,a4,0x17 106bc: 00d76733 or a4,a4,a3 106c0: 00f67533 and a0,a2,a5 106c4: 00a76533 or a0,a4,a0 106c8: 00008067 ret 106cc: 01800593 li a1,24 106d0: 40f587b3 sub a5,a1,a5 106d4: 00f696b3 sll a3,a3,a5 106d8: 40f70733 sub a4,a4,a5 106dc: 00969693 sll a3,a3,0x9 106e0: 0096d693 srl a3,a3,0x9 106e4: 800007b7 lui a5,0x80000 106e8: 01771713 sll a4,a4,0x17 106ec: 00d76733 or a4,a4,a3 106f0: 00f67533 and a0,a2,a5 106f4: 00a76533 or a0,a4,a0 106f8: 00008067 ret 000106fc <fmul32>: 106fc: 008006b7 lui a3,0x800 10700: fff68793 add a5,a3,-1 # 7fffff <__BSS_END__+0x7e1247> 10704: 00a7f8b3 and a7,a5,a0 10708: 41755713 sra a4,a0,0x17 1070c: 00b7f7b3 and a5,a5,a1 10710: 4175d313 sra t1,a1,0x17 10714: 00d8e8b3 or a7,a7,a3 10718: 00d7e7b3 or a5,a5,a3 1071c: 0ff77613 zext.b a2,a4 10720: 0ff37313 zext.b t1,t1 10724: 00000693 li a3,0 10728: 0017f713 and a4,a5,1 1072c: 00d88833 add a6,a7,a3 10730: 4017d793 sra a5,a5,0x1 10734: 04070a63 beqz a4,10788 <fmul32+0x8c> 10738: 40185693 sra a3,a6,0x1 1073c: fe0796e3 bnez a5,10728 <fmul32+0x2c> 10740: 00080693 mv a3,a6 10744: 4186d813 sra a6,a3,0x18 10748: ffe6f793 and a5,a3,-2 1074c: 00660733 add a4,a2,t1 10750: 4107d7b3 sra a5,a5,a6 10754: f8170713 add a4,a4,-127 10758: 01070733 add a4,a4,a6 1075c: 00a5c533 xor a0,a1,a0 10760: 800006b7 lui a3,0x80000 10764: 00979793 sll a5,a5,0x9 10768: 00d57533 and a0,a0,a3 1076c: 0097d793 srl a5,a5,0x9 10770: 7f8006b7 lui a3,0x7f800 10774: 01771713 sll a4,a4,0x17 10778: 00d77733 and a4,a4,a3 1077c: 00f56533 or a0,a0,a5 10780: 00a76533 or a0,a4,a0 10784: 00008067 ret 10788: 4016d713 sra a4,a3,0x1 1078c: fa078ce3 beqz a5,10744 <fmul32+0x48> 10790: 00070693 mv a3,a4 10794: f95ff06f j 10728 <fmul32+0x2c> ``` ::: ### elf size ``` text data bss dec hex filename 53198 1876 1528 56602 dd1a main.elf ``` ### elf header ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x1041e Start of program headers: 52 (bytes into file) Start of section headers: 69944 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` ### execute ``` 0.389692 0.111821 0.675161 0.662995 0.264999 0.566176 0.431446 0.448845 0.454146 cycle count: 59137 instret: 2c8 inferior exit code 0 ``` ### Os Optimized Assembly code :::spoiler Assembly Code ```c 000100b0 <main>: 100b0: f2010113 add sp,sp,-224 100b4: 0c112e23 sw ra,220(sp) 100b8: 0c812c23 sw s0,216(sp) 100bc: 0c912a23 sw s1,212(sp) 100c0: 0d312623 sw s3,204(sp) 100c4: 0d412423 sw s4,200(sp) 100c8: 0b712e23 sw s7,188(sp) 100cc: 0b912a23 sw s9,180(sp) 100d0: 0ba12823 sw s10,176(sp) 100d4: 0bb12623 sw s11,172(sp) 100d8: 0d212823 sw s2,208(sp) 100dc: 0d512223 sw s5,196(sp) 100e0: 0d612023 sw s6,192(sp) 100e4: 0b812c23 sw s8,184(sp) 100e8: 208000ef jal 102f0 <get_instret> 100ec: 00a12623 sw a0,12(sp) 100f0: 1ec000ef jal 102dc <get_cycles> 100f4: 0001c5b7 lui a1,0x1c 100f8: 00050a13 mv s4,a0 100fc: 06c00613 li a2,108 10100: 2cc58593 add a1,a1,716 # 1c2cc <__trunctfdf2+0x5ae> 10104: 03410513 add a0,sp,52 10108: 4ed000ef jal 10df4 <memcpy> 1010c: f301ac83 lw s9,-208(gp) # 1d740 <__SDATA_BEGIN__+0x68> 10110: f341ad03 lw s10,-204(gp) # 1d744 <__SDATA_BEGIN__+0x6c> 10114: f381ad83 lw s11,-200(gp) # 1d748 <__SDATA_BEGIN__+0x70> 10118: 01010493 add s1,sp,16 1011c: 03410413 add s0,sp,52 10120: 00048b93 mv s7,s1 10124: 00300993 li s3,3 10128: 000b8b13 mv s6,s7 1012c: 00040913 mv s2,s0 10130: 00000a93 li s5,0 10134: 00092503 lw a0,0(s2) 10138: 000c8593 mv a1,s9 1013c: 001a8a93 add s5,s5,1 10140: 370000ef jal 104b0 <fmul32> 10144: 00050c13 mv s8,a0 10148: 00492503 lw a0,4(s2) 1014c: 000d0593 mv a1,s10 10150: 00c90913 add s2,s2,12 10154: 35c000ef jal 104b0 <fmul32> 10158: 00050593 mv a1,a0 1015c: 000c0513 mv a0,s8 10160: 268000ef jal 103c8 <unsigned_fadd32> 10164: 00050c13 mv s8,a0 10168: ffc92503 lw a0,-4(s2) 1016c: 000d8593 mv a1,s11 10170: 004b0b13 add s6,s6,4 10174: 33c000ef jal 104b0 <fmul32> 10178: 00050593 mv a1,a0 1017c: 000c0513 mv a0,s8 10180: 248000ef jal 103c8 <unsigned_fadd32> 10184: feab2e23 sw a0,-4(s6) 10188: fb3a96e3 bne s5,s3,10134 <main+0x84> 1018c: 02440413 add s0,s0,36 10190: 0a010793 add a5,sp,160 10194: 00cb8b93 add s7,s7,12 10198: f8f418e3 bne s0,a5,10128 <main+0x78> 1019c: 02448a93 add s5,s1,36 101a0: 0001cb37 lui s6,0x1c 101a4: 00300b93 li s7,3 101a8: 00048913 mv s2,s1 101ac: 00000413 li s0,0 101b0: 00092503 lw a0,0(s2) 101b4: 00140413 add s0,s0,1 101b8: 00490913 add s2,s2,4 101bc: 390000ef jal 1054c <__extendsfdf2> 101c0: 00050613 mv a2,a0 101c4: 00058693 mv a3,a1 101c8: fc8b0513 add a0,s6,-56 # 1bfc8 <__trunctfdf2+0x2aa> 101cc: 071000ef jal 10a3c <printf> 101d0: ff7410e3 bne s0,s7,101b0 <main+0x100> 101d4: 00a00513 li a0,10 101d8: 00c48493 add s1,s1,12 101dc: 093000ef jal 10a6e <putchar> 101e0: fd5494e3 bne s1,s5,101a8 <main+0xf8> 101e4: 0f8000ef jal 102dc <get_cycles> 101e8: 414505b3 sub a1,a0,s4 101ec: 0001c537 lui a0,0x1c 101f0: fcc50513 add a0,a0,-52 # 1bfcc <__trunctfdf2+0x2ae> 101f4: 049000ef jal 10a3c <printf> 101f8: 00c12583 lw a1,12(sp) 101fc: 0001c537 lui a0,0x1c 10200: fe050513 add a0,a0,-32 # 1bfe0 <__trunctfdf2+0x2c2> 10204: 039000ef jal 10a3c <printf> 10208: 0dc12083 lw ra,220(sp) 1020c: 0d812403 lw s0,216(sp) 10210: 0d412483 lw s1,212(sp) 10214: 0d012903 lw s2,208(sp) 10218: 0cc12983 lw s3,204(sp) 1021c: 0c812a03 lw s4,200(sp) 10220: 0c412a83 lw s5,196(sp) 10224: 0c012b03 lw s6,192(sp) 10228: 0bc12b83 lw s7,188(sp) 1022c: 0b812c03 lw s8,184(sp) 10230: 0b412c83 lw s9,180(sp) 10234: 0b012d03 lw s10,176(sp) 10238: 0ac12d83 lw s11,172(sp) 1023c: 00000513 li a0,0 10240: 0e010113 add sp,sp,224 10244: 00008067 ret 00010304 <swap>: 10304: 00052703 lw a4,0(a0) 10308: 0005a783 lw a5,0(a1) 1030c: 00e5a023 sw a4,0(a1) 10310: 00f52023 sw a5,0(a0) 10314: 00008067 ret 00010318 <imul32>: 10318: 00000793 li a5,0 1031c: 00059663 bnez a1,10328 <imul32+0x10> 10320: 00179513 sll a0,a5,0x1 10324: 00008067 ret 10328: 0015f713 and a4,a1,1 1032c: 00070463 beqz a4,10334 <imul32+0x1c> 10330: 00a787b3 add a5,a5,a0 10334: 4015d593 sra a1,a1,0x1 10338: 4017d793 sra a5,a5,0x1 1033c: fe1ff06f j 1031c <imul32+0x4> 00010340 <count_leading_zeros>: 10340: 00155793 srl a5,a0,0x1 10344: 00a7e533 or a0,a5,a0 10348: 00255793 srl a5,a0,0x2 1034c: 00a7e7b3 or a5,a5,a0 10350: 0047d513 srl a0,a5,0x4 10354: 00f56533 or a0,a0,a5 10358: 00855713 srl a4,a0,0x8 1035c: 00a76733 or a4,a4,a0 10360: 01075793 srl a5,a4,0x10 10364: 00e7e7b3 or a5,a5,a4 10368: 555556b7 lui a3,0x55555 1036c: 0017d713 srl a4,a5,0x1 10370: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x555377f9> 10374: 00d77733 and a4,a4,a3 10378: 40e787b3 sub a5,a5,a4 1037c: 333336b7 lui a3,0x33333 10380: 33368693 add a3,a3,819 # 33333333 <__BSS_END__+0x333155d7> 10384: 0027d713 srl a4,a5,0x2 10388: 00d77733 and a4,a4,a3 1038c: 00d7f7b3 and a5,a5,a3 10390: 00f70733 add a4,a4,a5 10394: 00475793 srl a5,a4,0x4 10398: 00e787b3 add a5,a5,a4 1039c: 0f0f1737 lui a4,0xf0f1 103a0: f0f70713 add a4,a4,-241 # f0f0f0f <__BSS_END__+0xf0d31b3> 103a4: 00e7f7b3 and a5,a5,a4 103a8: 0087d713 srl a4,a5,0x8 103ac: 00f70733 add a4,a4,a5 103b0: 01075793 srl a5,a4,0x10 103b4: 00e787b3 add a5,a5,a4 103b8: 07f7f793 and a5,a5,127 103bc: 02000513 li a0,32 103c0: 40f50533 sub a0,a0,a5 103c4: 00008067 ret 000103c8 <unsigned_fadd32>: 103c8: 800007b7 lui a5,0x80000 103cc: ff010113 add sp,sp,-16 103d0: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe22a3> 103d4: 00a7f733 and a4,a5,a0 103d8: 00112623 sw ra,12(sp) 103dc: 00812423 sw s0,8(sp) 103e0: 00912223 sw s1,4(sp) 103e4: 01212023 sw s2,0(sp) 103e8: 00b7f7b3 and a5,a5,a1 103ec: 0af74463 blt a4,a5,10494 <unsigned_fadd32+0xcc> 103f0: 00050913 mv s2,a0 103f4: 00058693 mv a3,a1 103f8: 00800737 lui a4,0x800 103fc: fff70793 add a5,a4,-1 # 7fffff <__BSS_END__+0x7e22a3> 10400: 00f97633 and a2,s2,a5 10404: 00f6f7b3 and a5,a3,a5 10408: 00e66633 or a2,a2,a4 1040c: 00e7e7b3 or a5,a5,a4 10410: 41795413 sra s0,s2,0x17 10414: 4176d713 sra a4,a3,0x17 10418: 0ff47413 zext.b s0,s0 1041c: 0ff77713 zext.b a4,a4 10420: 40e40733 sub a4,s0,a4 10424: 01800593 li a1,24 10428: 00e5d463 bge a1,a4,10430 <unsigned_fadd32+0x68> 1042c: 01800713 li a4,24 10430: 40e7d7b3 sra a5,a5,a4 10434: 00d946b3 xor a3,s2,a3 10438: 00f604b3 add s1,a2,a5 1043c: 0006d463 bgez a3,10444 <unsigned_fadd32+0x7c> 10440: 40f604b3 sub s1,a2,a5 10444: 00048513 mv a0,s1 10448: ef9ff0ef jal 10340 <count_leading_zeros> 1044c: 00800793 li a5,8 10450: 04a7c863 blt a5,a0,104a0 <unsigned_fadd32+0xd8> 10454: 40a787b3 sub a5,a5,a0 10458: 40f4d4b3 sra s1,s1,a5 1045c: 00f40433 add s0,s0,a5 10460: 00949493 sll s1,s1,0x9 10464: 0094d493 srl s1,s1,0x9 10468: 01741413 sll s0,s0,0x17 1046c: 800007b7 lui a5,0x80000 10470: 00946433 or s0,s0,s1 10474: 00f97533 and a0,s2,a5 10478: 00c12083 lw ra,12(sp) 1047c: 00a46533 or a0,s0,a0 10480: 00812403 lw s0,8(sp) 10484: 00412483 lw s1,4(sp) 10488: 00012903 lw s2,0(sp) 1048c: 01010113 add sp,sp,16 10490: 00008067 ret 10494: 00050693 mv a3,a0 10498: 00058913 mv s2,a1 1049c: f5dff06f j 103f8 <unsigned_fadd32+0x30> 104a0: ff850513 add a0,a0,-8 104a4: 00a494b3 sll s1,s1,a0 104a8: 40a40433 sub s0,s0,a0 104ac: fb5ff06f j 10460 <unsigned_fadd32+0x98> 000104b0 <fmul32>: 104b0: ff010113 add sp,sp,-16 104b4: 008007b7 lui a5,0x800 104b8: 01212023 sw s2,0(sp) 104bc: fff78913 add s2,a5,-1 # 7fffff <__BSS_END__+0x7e22a3> 104c0: 00812423 sw s0,8(sp) 104c4: 00912223 sw s1,4(sp) 104c8: 00058413 mv s0,a1 104cc: 00050493 mv s1,a0 104d0: 00b975b3 and a1,s2,a1 104d4: 00a97533 and a0,s2,a0 104d8: 00f5e5b3 or a1,a1,a5 104dc: 00f56533 or a0,a0,a5 104e0: 00112623 sw ra,12(sp) 104e4: e35ff0ef jal 10318 <imul32> 104e8: 4174d793 sra a5,s1,0x17 104ec: 41745693 sra a3,s0,0x17 104f0: 0ff6f693 zext.b a3,a3 104f4: 0ff7f793 zext.b a5,a5 104f8: 41855713 sra a4,a0,0x18 104fc: 00d787b3 add a5,a5,a3 10500: 00177713 and a4,a4,1 10504: f8178793 add a5,a5,-127 10508: 00e787b3 add a5,a5,a4 1050c: 7f8006b7 lui a3,0x7f800 10510: 01779793 sll a5,a5,0x17 10514: 00d7f7b3 and a5,a5,a3 10518: 00944433 xor s0,s0,s1 1051c: 800006b7 lui a3,0x80000 10520: 40e55533 sra a0,a0,a4 10524: 00d47433 and s0,s0,a3 10528: 01257533 and a0,a0,s2 1052c: 00c12083 lw ra,12(sp) 10530: 00a46533 or a0,s0,a0 10534: 00812403 lw s0,8(sp) 10538: 00412483 lw s1,4(sp) 1053c: 00012903 lw s2,0(sp) 10540: 00a7e533 or a0,a5,a0 10544: 01010113 add sp,sp,16 10548: 00008067 ret ``` ::: ### elf size ``` text data bss dec hex filename 52608 1888 1528 56024 dad8 main.elf ``` ### elf header ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x1025a Start of program headers: 52 (bytes into file) Start of section headers: 69852 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` ### execute ``` 0.389692 0.111821 0.675161 0.662995 0.264999 0.566176 0.431446 0.448845 0.454146 cycle count: 59036 instret: 2de inferior exit code 0 ``` ### Ofast Optimized Assembly code :::spoiler Assembly Code ```c 000100b0 <main>: 100b0: f2010113 add sp,sp,-224 100b4: 0c112e23 sw ra,220(sp) 100b8: 0c812c23 sw s0,216(sp) 100bc: 0c912a23 sw s1,212(sp) 100c0: 0d212823 sw s2,208(sp) 100c4: 0d312623 sw s3,204(sp) 100c8: 0d412423 sw s4,200(sp) 100cc: 0d512223 sw s5,196(sp) 100d0: 0d612023 sw s6,192(sp) 100d4: 0b712e23 sw s7,188(sp) 100d8: 0b812c23 sw s8,184(sp) 100dc: 0b912a23 sw s9,180(sp) 100e0: 0ba12823 sw s10,176(sp) 100e4: 0bb12623 sw s11,172(sp) 100e8: 3cc000ef jal 104b4 <get_instret> 100ec: 00a12423 sw a0,8(sp) 100f0: 3b0000ef jal 104a0 <get_cycles> 100f4: 0001c7b7 lui a5,0x1c 100f8: 51c78793 add a5,a5,1308 # 1c51c <__trunctfdf2+0x5b0> 100fc: 00a12623 sw a0,12(sp) 10100: 03410713 add a4,sp,52 10104: 06478893 add a7,a5,100 10108: 0007a803 lw a6,0(a5) 1010c: 0047a503 lw a0,4(a5) 10110: 0087a583 lw a1,8(a5) 10114: 00c7a603 lw a2,12(a5) 10118: 0107a683 lw a3,16(a5) 1011c: 01072023 sw a6,0(a4) 10120: 00a72223 sw a0,4(a4) 10124: 00b72423 sw a1,8(a4) 10128: 00c72623 sw a2,12(a4) 1012c: 00d72823 sw a3,16(a4) 10130: 01478793 add a5,a5,20 10134: 01470713 add a4,a4,20 10138: fd1798e3 bne a5,a7,10108 <main+0x58> 1013c: 0007a683 lw a3,0(a5) 10140: 0047a783 lw a5,4(a5) 10144: 01010b13 add s6,sp,16 10148: 00800937 lui s2,0x800 1014c: 3e991cb7 lui s9,0x3e991 10150: 3f164c37 lui s8,0x3f164 10154: 3de98bb7 lui s7,0x3de98 10158: 00d72023 sw a3,0(a4) 1015c: 00f72223 sw a5,4(a4) 10160: 03410a93 add s5,sp,52 10164: 01612223 sw s6,4(sp) 10168: fff90d93 add s11,s2,-1 # 7fffff <__BSS_END__+0x7e1247> 1016c: 687c8c93 add s9,s9,1671 # 3e991687 <__BSS_END__+0x3e9728cf> 10170: 80000a37 lui s4,0x80000 10174: 7f8009b7 lui s3,0x7f800 10178: 5a2c0c13 add s8,s8,1442 # 3f1645a2 <__BSS_END__+0x3f1457ea> 1017c: 8d5b8b93 add s7,s7,-1835 # 3de978d5 <__BSS_END__+0x3de78b1d> 10180: 00412483 lw s1,4(sp) 10184: 000a8d13 mv s10,s5 10188: 00000413 li s0,0 1018c: 000d2e03 lw t3,0(s10) 10190: 009915b7 lui a1,0x991 10194: 01800513 li a0,24 10198: 01be7333 and t1,t3,s11 1019c: 417e5793 sra a5,t3,0x17 101a0: 01236333 or t1,t1,s2 101a4: 0ff7f893 zext.b a7,a5 101a8: 00000713 li a4,0 101ac: 68758593 add a1,a1,1671 # 991687 <__BSS_END__+0x9728cf> 101b0: 0015f793 and a5,a1,1 101b4: 00e30833 add a6,t1,a4 101b8: fff50513 add a0,a0,-1 101bc: 22078e63 beqz a5,103f8 <main+0x348> 101c0: 4015d593 sra a1,a1,0x1 101c4: 40185713 sra a4,a6,0x1 101c8: fe0514e3 bnez a0,101b0 <main+0x100> 101cc: 00080713 mv a4,a6 101d0: 41875593 sra a1,a4,0x18 101d4: 004d2303 lw t1,4(s10) 101d8: ffe77713 and a4,a4,-2 101dc: ffe88793 add a5,a7,-2 101e0: 019e4533 xor a0,t3,s9 101e4: 40b75733 sra a4,a4,a1 101e8: 00b787b3 add a5,a5,a1 101ec: 01b77733 and a4,a4,s11 101f0: 01779793 sll a5,a5,0x17 101f4: 01457533 and a0,a0,s4 101f8: 00e56533 or a0,a0,a4 101fc: 01b37eb3 and t4,t1,s11 10200: 0137f733 and a4,a5,s3 10204: 009645b7 lui a1,0x964 10208: 41735793 sra a5,t1,0x17 1020c: 00e56533 or a0,a0,a4 10210: 012eeeb3 or t4,t4,s2 10214: 0ff7fe13 zext.b t3,a5 10218: 01800813 li a6,24 1021c: 00000713 li a4,0 10220: 5a258593 add a1,a1,1442 # 9645a2 <__BSS_END__+0x9457ea> 10224: 0015f793 and a5,a1,1 10228: 00ee88b3 add a7,t4,a4 1022c: fff80813 add a6,a6,-1 10230: 1a078a63 beqz a5,103e4 <main+0x334> 10234: 4015d593 sra a1,a1,0x1 10238: 4018d713 sra a4,a7,0x1 1023c: fe0814e3 bnez a6,10224 <main+0x174> 10240: 00088713 mv a4,a7 10244: 41875813 sra a6,a4,0x18 10248: fffe0793 add a5,t3,-1 1024c: ffe77713 and a4,a4,-2 10250: 018345b3 xor a1,t1,s8 10254: 41075733 sra a4,a4,a6 10258: 010787b3 add a5,a5,a6 1025c: 01b77733 and a4,a4,s11 10260: 0145f5b3 and a1,a1,s4 10264: 01779793 sll a5,a5,0x17 10268: 00e5e5b3 or a1,a1,a4 1026c: 0137f7b3 and a5,a5,s3 10270: 00f5e5b3 or a1,a1,a5 10274: 330000ef jal 105a4 <unsigned_fadd32> 10278: 008d2e83 lw t4,8(s10) 1027c: 00e985b7 lui a1,0xe98 10280: 01800813 li a6,24 10284: 01befe33 and t3,t4,s11 10288: 417ed793 sra a5,t4,0x17 1028c: 012e6e33 or t3,t3,s2 10290: 0ff7f313 zext.b t1,a5 10294: 00000713 li a4,0 10298: 8d558593 add a1,a1,-1835 # e978d5 <__BSS_END__+0xe78b1d> 1029c: 0015f793 and a5,a1,1 102a0: 00ee08b3 add a7,t3,a4 102a4: fff80813 add a6,a6,-1 102a8: 12078463 beqz a5,103d0 <main+0x320> 102ac: 4015d593 sra a1,a1,0x1 102b0: 4018d713 sra a4,a7,0x1 102b4: fe0814e3 bnez a6,1029c <main+0x1ec> 102b8: 00088713 mv a4,a7 102bc: 41875813 sra a6,a4,0x18 102c0: ffc30793 add a5,t1,-4 102c4: ffe77713 and a4,a4,-2 102c8: 017ec5b3 xor a1,t4,s7 102cc: 41075733 sra a4,a4,a6 102d0: 010787b3 add a5,a5,a6 102d4: 01b77733 and a4,a4,s11 102d8: 0145f5b3 and a1,a1,s4 102dc: 01779793 sll a5,a5,0x17 102e0: 0137f7b3 and a5,a5,s3 102e4: 00e5e5b3 or a1,a1,a4 102e8: 00f5e5b3 or a1,a1,a5 102ec: 2b8000ef jal 105a4 <unsigned_fadd32> 102f0: 00a4a023 sw a0,0(s1) 102f4: 00140413 add s0,s0,1 102f8: 00300793 li a5,3 102fc: 00cd0d13 add s10,s10,12 10300: 00448493 add s1,s1,4 10304: e8f414e3 bne s0,a5,1018c <main+0xdc> 10308: 00412783 lw a5,4(sp) 1030c: 024a8a93 add s5,s5,36 10310: 00c78793 add a5,a5,12 10314: 00f12223 sw a5,4(sp) 10318: 0a010793 add a5,sp,160 1031c: e75792e3 bne a5,s5,10180 <main+0xd0> 10320: 024b0a13 add s4,s6,36 10324: 0001c9b7 lui s3,0x1c 10328: 00300913 li s2,3 1032c: 000b0493 mv s1,s6 10330: 00000413 li s0,0 10334: 0004a503 lw a0,0(s1) 10338: 00140413 add s0,s0,1 1033c: 00448493 add s1,s1,4 10340: 458000ef jal 10798 <__extendsfdf2> 10344: 00050613 mv a2,a0 10348: 00058693 mv a3,a1 1034c: 21898513 add a0,s3,536 # 1c218 <__trunctfdf2+0x2ac> 10350: 139000ef jal 10c88 <printf> 10354: ff2410e3 bne s0,s2,10334 <main+0x284> 10358: 00a00513 li a0,10 1035c: 00cb0b13 add s6,s6,12 10360: 15b000ef jal 10cba <putchar> 10364: fd4b14e3 bne s6,s4,1032c <main+0x27c> 10368: 138000ef jal 104a0 <get_cycles> 1036c: 00c12783 lw a5,12(sp) 10370: 40f505b3 sub a1,a0,a5 10374: 0001c537 lui a0,0x1c 10378: 21c50513 add a0,a0,540 # 1c21c <__trunctfdf2+0x2b0> 1037c: 10d000ef jal 10c88 <printf> 10380: 00812583 lw a1,8(sp) 10384: 0001c537 lui a0,0x1c 10388: 23050513 add a0,a0,560 # 1c230 <__trunctfdf2+0x2c4> 1038c: 0fd000ef jal 10c88 <printf> 10390: 0dc12083 lw ra,220(sp) 10394: 0d812403 lw s0,216(sp) 10398: 0d412483 lw s1,212(sp) 1039c: 0d012903 lw s2,208(sp) 103a0: 0cc12983 lw s3,204(sp) 103a4: 0c812a03 lw s4,200(sp) 103a8: 0c412a83 lw s5,196(sp) 103ac: 0c012b03 lw s6,192(sp) 103b0: 0bc12b83 lw s7,188(sp) 103b4: 0b812c03 lw s8,184(sp) 103b8: 0b412c83 lw s9,180(sp) 103bc: 0b012d03 lw s10,176(sp) 103c0: 0ac12d83 lw s11,172(sp) 103c4: 00000513 li a0,0 103c8: 0e010113 add sp,sp,224 103cc: 00008067 ret 103d0: 40175793 sra a5,a4,0x1 103d4: 4015d593 sra a1,a1,0x1 103d8: ee0802e3 beqz a6,102bc <main+0x20c> 103dc: 00078713 mv a4,a5 103e0: ebdff06f j 1029c <main+0x1ec> 103e4: 40175793 sra a5,a4,0x1 103e8: 4015d593 sra a1,a1,0x1 103ec: e4080ce3 beqz a6,10244 <main+0x194> 103f0: 00078713 mv a4,a5 103f4: e31ff06f j 10224 <main+0x174> 103f8: 40175793 sra a5,a4,0x1 103fc: 4015d593 sra a1,a1,0x1 10400: dc0508e3 beqz a0,101d0 <main+0x120> 10404: 00078713 mv a4,a5 10408: da9ff06f j 101b0 <main+0x100> 000104c8 <swap>: 104c8: 00052703 lw a4,0(a0) 104cc: 0005a783 lw a5,0(a1) 104d0: 00e5a023 sw a4,0(a1) 104d4: 00f52023 sw a5,0(a0) 104d8: 00008067 ret 000104dc <imul32>: 104dc: 02058c63 beqz a1,10514 <imul32+0x38> 104e0: 00000713 li a4,0 104e4: 0015f793 and a5,a1,1 104e8: 00a706b3 add a3,a4,a0 104ec: 4015d593 sra a1,a1,0x1 104f0: 00078a63 beqz a5,10504 <imul32+0x28> 104f4: 4016d713 sra a4,a3,0x1 104f8: fe0596e3 bnez a1,104e4 <imul32+0x8> 104fc: 00171513 sll a0,a4,0x1 10500: 00008067 ret 10504: 40175713 sra a4,a4,0x1 10508: fc059ee3 bnez a1,104e4 <imul32+0x8> 1050c: 00171513 sll a0,a4,0x1 10510: 00008067 ret 10514: 00000513 li a0,0 10518: 00008067 ret 0001051c <count_leading_zeros>: 1051c: 00155793 srl a5,a0,0x1 10520: 00a7e533 or a0,a5,a0 10524: 00255793 srl a5,a0,0x2 10528: 00a7e7b3 or a5,a5,a0 1052c: 0047d513 srl a0,a5,0x4 10530: 00f56533 or a0,a0,a5 10534: 00855713 srl a4,a0,0x8 10538: 00a76733 or a4,a4,a0 1053c: 01075793 srl a5,a4,0x10 10540: 00e7e7b3 or a5,a5,a4 10544: 555556b7 lui a3,0x55555 10548: 0017d713 srl a4,a5,0x1 1054c: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x5553679d> 10550: 00d77733 and a4,a4,a3 10554: 40e787b3 sub a5,a5,a4 10558: 333336b7 lui a3,0x33333 1055c: 33368693 add a3,a3,819 # 33333333 <__BSS_END__+0x3331457b> 10560: 0027d713 srl a4,a5,0x2 10564: 00d77733 and a4,a4,a3 10568: 00d7f7b3 and a5,a5,a3 1056c: 00f70733 add a4,a4,a5 10570: 00475793 srl a5,a4,0x4 10574: 0f0f16b7 lui a3,0xf0f1 10578: 00e787b3 add a5,a5,a4 1057c: f0f68693 add a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d2157> 10580: 00d7f7b3 and a5,a5,a3 10584: 0087d713 srl a4,a5,0x8 10588: 00f70733 add a4,a4,a5 1058c: 01075793 srl a5,a4,0x10 10590: 00e787b3 add a5,a5,a4 10594: 07f7f793 and a5,a5,127 10598: 02000513 li a0,32 1059c: 40f50533 sub a0,a0,a5 105a0: 00008067 ret 000105a4 <unsigned_fadd32>: 105a4: 800007b7 lui a5,0x80000 105a8: fff78793 add a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe1247> 105ac: 00a7f733 and a4,a5,a0 105b0: 00b7f7b3 and a5,a5,a1 105b4: 00050813 mv a6,a0 105b8: 00058613 mv a2,a1 105bc: 00f74663 blt a4,a5,105c8 <unsigned_fadd32+0x24> 105c0: 00050613 mv a2,a0 105c4: 00058813 mv a6,a1 105c8: 008008b7 lui a7,0x800 105cc: 41765713 sra a4,a2,0x17 105d0: 41785793 sra a5,a6,0x17 105d4: fff88693 add a3,a7,-1 # 7fffff <__BSS_END__+0x7e1247> 105d8: 0ff77713 zext.b a4,a4 105dc: 0ff7f793 zext.b a5,a5 105e0: 00d675b3 and a1,a2,a3 105e4: 40f707b3 sub a5,a4,a5 105e8: 00d876b3 and a3,a6,a3 105ec: 01800513 li a0,24 105f0: 0115e5b3 or a1,a1,a7 105f4: 0116e6b3 or a3,a3,a7 105f8: 00f55463 bge a0,a5,10600 <unsigned_fadd32+0x5c> 105fc: 01800793 li a5,24 10600: 40f6d7b3 sra a5,a3,a5 10604: 01064833 xor a6,a2,a6 10608: 00f586b3 add a3,a1,a5 1060c: 00085463 bgez a6,10614 <unsigned_fadd32+0x70> 10610: 40f586b3 sub a3,a1,a5 10614: 0016d793 srl a5,a3,0x1 10618: 00d7e7b3 or a5,a5,a3 1061c: 0027d593 srl a1,a5,0x2 10620: 00b7e7b3 or a5,a5,a1 10624: 0047d593 srl a1,a5,0x4 10628: 00b7e7b3 or a5,a5,a1 1062c: 0087d593 srl a1,a5,0x8 10630: 00b7e7b3 or a5,a5,a1 10634: 0107d593 srl a1,a5,0x10 10638: 00b7e7b3 or a5,a5,a1 1063c: 55555537 lui a0,0x55555 10640: 0017d593 srl a1,a5,0x1 10644: 55550513 add a0,a0,1365 # 55555555 <__BSS_END__+0x5553679d> 10648: 00a5f5b3 and a1,a1,a0 1064c: 40b787b3 sub a5,a5,a1 10650: 33333537 lui a0,0x33333 10654: 33350513 add a0,a0,819 # 33333333 <__BSS_END__+0x3331457b> 10658: 0027d593 srl a1,a5,0x2 1065c: 00a5f5b3 and a1,a1,a0 10660: 00a7f7b3 and a5,a5,a0 10664: 00f585b3 add a1,a1,a5 10668: 0045d793 srl a5,a1,0x4 1066c: 0f0f1537 lui a0,0xf0f1 10670: 00b787b3 add a5,a5,a1 10674: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d2157> 10678: 00a7f7b3 and a5,a5,a0 1067c: 0087d593 srl a1,a5,0x8 10680: 00b787b3 add a5,a5,a1 10684: 0107d593 srl a1,a5,0x10 10688: 00b787b3 add a5,a5,a1 1068c: 07f7f793 and a5,a5,127 10690: 02000593 li a1,32 10694: 40f585b3 sub a1,a1,a5 10698: 00800513 li a0,8 1069c: 02b54863 blt a0,a1,106cc <unsigned_fadd32+0x128> 106a0: fe878793 add a5,a5,-24 106a4: 40f6d6b3 sra a3,a3,a5 106a8: 00f70733 add a4,a4,a5 106ac: 00969693 sll a3,a3,0x9 106b0: 0096d693 srl a3,a3,0x9 106b4: 800007b7 lui a5,0x80000 106b8: 01771713 sll a4,a4,0x17 106bc: 00d76733 or a4,a4,a3 106c0: 00f67533 and a0,a2,a5 106c4: 00a76533 or a0,a4,a0 106c8: 00008067 ret 106cc: 01800593 li a1,24 106d0: 40f587b3 sub a5,a1,a5 106d4: 00f696b3 sll a3,a3,a5 106d8: 40f70733 sub a4,a4,a5 106dc: 00969693 sll a3,a3,0x9 106e0: 0096d693 srl a3,a3,0x9 106e4: 800007b7 lui a5,0x80000 106e8: 01771713 sll a4,a4,0x17 106ec: 00d76733 or a4,a4,a3 106f0: 00f67533 and a0,a2,a5 106f4: 00a76533 or a0,a4,a0 106f8: 00008067 ret 000106fc <fmul32>: 106fc: 008006b7 lui a3,0x800 10700: fff68793 add a5,a3,-1 # 7fffff <__BSS_END__+0x7e1247> 10704: 00a7f8b3 and a7,a5,a0 10708: 41755713 sra a4,a0,0x17 1070c: 00b7f7b3 and a5,a5,a1 10710: 4175d313 sra t1,a1,0x17 10714: 00d8e8b3 or a7,a7,a3 10718: 00d7e7b3 or a5,a5,a3 1071c: 0ff77613 zext.b a2,a4 10720: 0ff37313 zext.b t1,t1 10724: 00000693 li a3,0 10728: 0017f713 and a4,a5,1 1072c: 00d88833 add a6,a7,a3 10730: 4017d793 sra a5,a5,0x1 10734: 04070a63 beqz a4,10788 <fmul32+0x8c> 10738: 40185693 sra a3,a6,0x1 1073c: fe0796e3 bnez a5,10728 <fmul32+0x2c> 10740: 00080693 mv a3,a6 10744: 4186d813 sra a6,a3,0x18 10748: ffe6f793 and a5,a3,-2 1074c: 00660733 add a4,a2,t1 10750: 4107d7b3 sra a5,a5,a6 10754: f8170713 add a4,a4,-127 10758: 01070733 add a4,a4,a6 1075c: 00a5c533 xor a0,a1,a0 10760: 800006b7 lui a3,0x80000 10764: 00979793 sll a5,a5,0x9 10768: 00d57533 and a0,a0,a3 1076c: 0097d793 srl a5,a5,0x9 10770: 7f8006b7 lui a3,0x7f800 10774: 01771713 sll a4,a4,0x17 10778: 00d77733 and a4,a4,a3 1077c: 00f56533 or a0,a0,a5 10780: 00a76533 or a0,a4,a0 10784: 00008067 ret 10788: 4016d713 sra a4,a3,0x1 1078c: fa078ce3 beqz a5,10744 <fmul32+0x48> 10790: 00070693 mv a3,a4 10794: f95ff06f j 10728 <fmul32+0x2c> ``` ::: ### elf size ``` text data bss dec hex filename 53198 1876 1528 56602 dd1a main.elf ``` ### elf header ``` ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x1041e Start of program headers: 52 (bytes into file) Start of section headers: 69944 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` ### execute ``` 0.389692 0.111821 0.675161 0.662995 0.264999 0.566176 0.431446 0.448845 0.454146 cycle count: 59137 instret: 2c8 inferior exit code 0 ``` ## conclusion * O2 uses the minimum number of cycles. * O3 and Ofast use the maximum number of cycles. * Although O3 and Ofast have the fewest instret, they require more cycles. On the contrary, O1 using the fewest cycles, has a higher instret. :::warning Show me the handwritten RISC-V assembly code. :notes: jserv :::