Try   HackMD

Assignment2: GNU Toolchain

contributed by <linyu425>

Choose a Question

  • Problem: I chose the Convert RGB image into grayscale by using RV32I ISA from 張正德
  • Motivation: Because his topic also applies to problem C, and the grayscale image conversion is interesting.

The origin code

C code
#include <stdio.h>
#include <stdint.h>
void swap(int32_t *x, int32_t *y){
    int32_t t = *y;
    *y = *x;
    *x = t;
    return;
}
static inline int32_t getbit(int32_t value, int n)
{
    return (value >> n) & 1;
}
/* int32 multiply */
int32_t imul32(int32_t a, int32_t b)
{
    int32_t r = 0;
    while(b != 0){
        if (b & 1){
            r += a;
        }
        b = b >> 1;
        r = r >> 1;
    }
    r = r << 1;
    return r;
}
uint32_t count_leading_zeros(uint32_t x) {
    x |= (x >> 1);
    x |= (x >> 2);
    x |= (x >> 4);
    x |= (x >> 8);
    x |= (x >> 16);

    /* count ones (population count) */
    x -= ((x >> 1) & 0x55555555);
    x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
    x = ((x >> 4) + x) & 0x0F0F0F0F;
    x += (x >> 8);
    x += (x >> 16);

    return (32 - (x & 0x7F));
}

float unsigned_fadd32(float a,float b){
    int32_t ia = *(int32_t *)&a, ib = *(int32_t *)&b;

    int32_t a_tmp = ia & 0x7FFFFFFF;
    int32_t b_tmp = ib & 0x7FFFFFFF;

    if (a_tmp < b_tmp)
        swap(&ia, &ib);

    /* mantissa */
    int32_t ma = ia & 0x7FFFFF | 0x800000;
    int32_t mb = ib & 0x7FFFFF | 0x800000;
    /* exponent */
    int32_t ea = (ia >> 23) & 0xFF;
    int32_t eb = (ib >> 23) & 0xFF;

    int32_t align = (ea - eb > 24) ? 24 : (ea - eb);

    mb >>= align;
    if ((ia ^ ib) >> 31) {
        ma -= mb;
    } else {
        ma += mb;
    }

    int32_t clz = count_leading_zeros(ma);
    int32_t shift = 0;
    if (clz <= 8) {
        shift = 8 - clz;
        ma >>= shift;
        ea += shift;
    } else {
        shift = clz - 8;
        ma <<= shift;
        ea -= shift;
    }

    int32_t r = ia & 0x80000000 | ea << 23 | ma & 0x7FFFFF;
    return *(float *) &r;
}
/* float32 multiply */
float fmul32(float a, float b)
{
    int32_t ia = *(int32_t *) &a, ib = *(int32_t *) &b;

    /* sign */
    int sa = ia >> 31;
    int sb = ib >> 31;

    /* mantissa */
    int32_t ma = (ia & 0x7FFFFF) | 0x800000;
    int32_t mb = (ib & 0x7FFFFF) | 0x800000;

    /* exponent */
    int32_t ea = ((ia >> 23) & 0xFF);
    int32_t eb = ((ib >> 23) & 0xFF);

    /* 'r' = result */
    int32_t mrtmp = imul32(ma, mb);
    int mshift = getbit(mrtmp, 24);

    int32_t mr = mrtmp >> mshift;
    int32_t ertmp = ea + eb - 127;
    // int32_t er = mshift ? inc(ertmp) : ertmp;
    int32_t er = mshift + ertmp;

    int sr = sa ^ sb;
    int32_t r = (sr << 31) | ((er & 0xFF) << 23) | (mr & 0x7FFFFF);
    return *(float *) &r;
}


int main(){
    float image[3][3][3] = {{{0.90251149,0.03265091,0.8831173},{0.2139775,0.0737501,0.0399187},{0.21527551,0.8881527,0.7846363}},
    {{0.938326,0.64254336,0.0461617},{0.1413221,0.3307385,0.2508785},{0.3833867,0.689476,0.41071482}},
    {{0.8925364,0.1480669,0.6812473},{0.9288288,0.23190344,0.3070017},{0.6414362,0.34707349,0.5142535}}};
    float grayscale_image[3][3];
    for(int i=0;i<3;i=i+1){
        for(int j=0;j<3;j=j+1){
            grayscale_image[i][j] = unsigned_fadd32(unsigned_fadd32(fmul32(image[i][j][0], 0.299) , fmul32(image[i][j][1], 0.587)) , fmul32(image[i][j][2], 0.114));
        }
    }
    for(int i=0;i<3;i=i+1){
        for(int j=0;j<3;j=j+1){
            printf("%f ",grayscale_image[i][j]);
        }
        printf("\n");
    }

}

Modified code

Modified C code
#include <stdint.h>
#include <stdio.h>
#include <string.h>

extern uint64_t get_cycles();
extern uint64_t get_instret();

void swap(int32_t *x, int32_t *y){
    int32_t t = *y;
    *y = *x;
    *x = t;
    return;
}
static inline int32_t getbit(int32_t value, int n)
{
    return (value >> n) & 1;
}
/* int32 multiply */
int32_t imul32(int32_t a, int32_t b)
{
    int32_t r = 0;
    while(b != 0){
        if (b & 1){
            r += a;
        }
        b = b >> 1;
        r = r >> 1;
    }
    r = r << 1;
    return r;
}
uint32_t count_leading_zeros(uint32_t x) {
    x |= (x >> 1);
    x |= (x >> 2);
    x |= (x >> 4);
    x |= (x >> 8);
    x |= (x >> 16);

    /* count ones (population count) */
    x -= ((x >> 1) & 0x55555555);
    x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
    x = ((x >> 4) + x) & 0x0F0F0F0F;
    x += (x >> 8);
    x += (x >> 16);

    return (32 - (x & 0x7F));
}

float unsigned_fadd32(float a,float b){
    int32_t ia = *(int32_t *)&a, ib = *(int32_t *)&b;

    int32_t a_tmp = ia & 0x7FFFFFFF;
    int32_t b_tmp = ib & 0x7FFFFFFF;

    if (a_tmp < b_tmp)
        swap(&ia, &ib);

    /* mantissa */
    int32_t ma = ia & 0x7FFFFF | 0x800000;
    int32_t mb = ib & 0x7FFFFF | 0x800000;
    /* exponent */
    int32_t ea = (ia >> 23) & 0xFF;
    int32_t eb = (ib >> 23) & 0xFF;

    int32_t align = (ea - eb > 24) ? 24 : (ea - eb);

    mb >>= align;
    if ((ia ^ ib) >> 31) {
        ma -= mb;
    } else {
        ma += mb;
    }

    int32_t clz = count_leading_zeros(ma);
    int32_t shift = 0;
    if (clz <= 8) {
        shift = 8 - clz;
        ma >>= shift;
        ea += shift;
    } else {
        shift = clz - 8;
        ma <<= shift;
        ea -= shift;
    }

    int32_t r = ia & 0x80000000 | ea << 23 | ma & 0x7FFFFF;
    return *(float *) &r;
}
/* float32 multiply */
float fmul32(float a, float b)
{
    int32_t ia = *(int32_t *) &a, ib = *(int32_t *) &b;

    /* sign */
    int sa = ia >> 31;
    int sb = ib >> 31;

    /* mantissa */
    int32_t ma = (ia & 0x7FFFFF) | 0x800000;
    int32_t mb = (ib & 0x7FFFFF) | 0x800000;

    /* exponent */
    int32_t ea = ((ia >> 23) & 0xFF);
    int32_t eb = ((ib >> 23) & 0xFF);

    /* 'r' = result */
    int32_t mrtmp = imul32(ma, mb);
    int mshift = getbit(mrtmp, 24);

    int32_t mr = mrtmp >> mshift;
    int32_t ertmp = ea + eb - 127;
    // int32_t er = mshift ? inc(ertmp) : ertmp;
    int32_t er = mshift + ertmp;

    int sr = sa ^ sb;
    int32_t r = (sr << 31) | ((er & 0xFF) << 23) | (mr & 0x7FFFFF);
    return *(float *) &r;
}

/*
 * Taken from the Sparkle-suite which is a collection of lightweight symmetric
 * cryptographic algorithms currently in the final round of the NIST
 * standardization effort.
 * See https://sparkle-lwc.github.io/
 */

#define WORDS 12
#define ROUNDS 7

int main(void)
{
    unsigned int state[WORDS] = {0};

    /* measure cycles */
    uint64_t instret = get_instret();
    uint64_t oldcount = get_cycles();

    float image[3][3][3] = {{{0.90251149,0.03265091,0.8831173},{0.2139775,0.0737501,0.0399187},{0.21527551,0.8881527,0.7846363}},
    {{0.938326,0.64254336,0.0461617},{0.1413221,0.3307385,0.2508785},{0.3833867,0.689476,0.41071482}},
    {{0.8925364,0.1480669,0.6812473},{0.9288288,0.23190344,0.3070017},{0.6414362,0.34707349,0.5142535}}};
    float grayscale_image[3][3];
    for(int i=0;i<3;i=i+1){
        for(int j=0;j<3;j=j+1){
            grayscale_image[i][j] = unsigned_fadd32(unsigned_fadd32(fmul32(image[i][j][0], 0.299) , fmul32(image[i][j][1], 0.587)) , fmul32(image[i][j][2], 0.114));
        }
    }
    for(int i=0;i<3;i=i+1){
        for(int j=0;j<3;j=j+1){
            printf("%f ",grayscale_image[i][j]);
        }
        printf("\n");
    }

    uint64_t cyclecount = get_cycles() - oldcount;

    printf("cycle count: %u\n", (unsigned int) cyclecount);
    printf("instret: %x\n", (unsigned) (instret & 0xffffffff));

    memset(state, 0, WORDS * sizeof(uint32_t));

    return 0;
}

Compare Assembly Code

O1 Optimized Assembly Code

Assembly Code
000100b0 <main>:
   100b0:	f2010113          	add	sp,sp,-224
   100b4:	0c112e23          	sw	ra,220(sp)
   100b8:	0b712e23          	sw	s7,188(sp)
   100bc:	0c812c23          	sw	s0,216(sp)
   100c0:	0c912a23          	sw	s1,212(sp)
   100c4:	0d212823          	sw	s2,208(sp)
   100c8:	0d312623          	sw	s3,204(sp)
   100cc:	0d412423          	sw	s4,200(sp)
   100d0:	0d512223          	sw	s5,196(sp)
   100d4:	0d612023          	sw	s6,192(sp)
   100d8:	0b812c23          	sw	s8,184(sp)
   100dc:	0b912a23          	sw	s9,180(sp)
   100e0:	0ba12823          	sw	s10,176(sp)
   100e4:	0bb12623          	sw	s11,172(sp)
   100e8:	248000ef          	jal	10330 <get_instret>
   100ec:	00a12623          	sw	a0,12(sp)
   100f0:	22c000ef          	jal	1031c <get_cycles>
   100f4:	0001c7b7          	lui	a5,0x1c
   100f8:	31478793          	add	a5,a5,788 # 1c314 <__trunctfdf2+0x5b4>
   100fc:	00050b93          	mv	s7,a0
   10100:	03410713          	add	a4,sp,52
   10104:	06478893          	add	a7,a5,100
   10108:	0007a803          	lw	a6,0(a5)
   1010c:	0047a503          	lw	a0,4(a5)
   10110:	0087a583          	lw	a1,8(a5)
   10114:	00c7a603          	lw	a2,12(a5)
   10118:	0107a683          	lw	a3,16(a5)
   1011c:	01072023          	sw	a6,0(a4)
   10120:	00a72223          	sw	a0,4(a4)
   10124:	00b72423          	sw	a1,8(a4)
   10128:	00c72623          	sw	a2,12(a4)
   1012c:	00d72823          	sw	a3,16(a4)
   10130:	01478793          	add	a5,a5,20
   10134:	01470713          	add	a4,a4,20
   10138:	fd1798e3          	bne	a5,a7,10108 <main+0x58>
   1013c:	f301ad03          	lw	s10,-208(gp) # 1d740 <__SDATA_BEGIN__+0x68>
   10140:	0007a683          	lw	a3,0(a5)
   10144:	f341ac83          	lw	s9,-204(gp) # 1d744 <__SDATA_BEGIN__+0x6c>
   10148:	0047a783          	lw	a5,4(a5)
   1014c:	f381ac03          	lw	s8,-200(gp) # 1d748 <__SDATA_BEGIN__+0x70>
   10150:	01010993          	add	s3,sp,16
   10154:	00d72023          	sw	a3,0(a4)
   10158:	00f72223          	sw	a5,4(a4)
   1015c:	03410913          	add	s2,sp,52
   10160:	00098a93          	mv	s5,s3
   10164:	00300a13          	li	s4,3
   10168:	000a8b13          	mv	s6,s5
   1016c:	00090413          	mv	s0,s2
   10170:	00000d93          	li	s11,0
   10174:	00042503          	lw	a0,0(s0)
   10178:	000d0593          	mv	a1,s10
   1017c:	001d8d93          	add	s11,s11,1
   10180:	37c000ef          	jal	104fc <fmul32>
   10184:	00050493          	mv	s1,a0
   10188:	00442503          	lw	a0,4(s0)
   1018c:	000c8593          	mv	a1,s9
   10190:	00c40413          	add	s0,s0,12
   10194:	368000ef          	jal	104fc <fmul32>
   10198:	00050593          	mv	a1,a0
   1019c:	00048513          	mv	a0,s1
   101a0:	274000ef          	jal	10414 <unsigned_fadd32>
   101a4:	00050493          	mv	s1,a0
   101a8:	ffc42503          	lw	a0,-4(s0)
   101ac:	000c0593          	mv	a1,s8
   101b0:	004b0b13          	add	s6,s6,4
   101b4:	348000ef          	jal	104fc <fmul32>
   101b8:	00050593          	mv	a1,a0
   101bc:	00048513          	mv	a0,s1
   101c0:	254000ef          	jal	10414 <unsigned_fadd32>
   101c4:	feab2e23          	sw	a0,-4(s6)
   101c8:	fb4d96e3          	bne	s11,s4,10174 <main+0xc4>
   101cc:	02490913          	add	s2,s2,36
   101d0:	0a010793          	add	a5,sp,160
   101d4:	00ca8a93          	add	s5,s5,12
   101d8:	f8f918e3          	bne	s2,a5,10168 <main+0xb8>
   101dc:	02498a93          	add	s5,s3,36
   101e0:	0001ca37          	lui	s4,0x1c
   101e4:	00300913          	li	s2,3
   101e8:	00098493          	mv	s1,s3
   101ec:	00000413          	li	s0,0
   101f0:	0004a503          	lw	a0,0(s1)
   101f4:	00140413          	add	s0,s0,1
   101f8:	00448493          	add	s1,s1,4
   101fc:	38c000ef          	jal	10588 <__extendsfdf2>
   10200:	00050613          	mv	a2,a0
   10204:	00058693          	mv	a3,a1
   10208:	010a0513          	add	a0,s4,16 # 1c010 <__trunctfdf2+0x2b0>
   1020c:	06d000ef          	jal	10a78 <printf>
   10210:	ff2410e3          	bne	s0,s2,101f0 <main+0x140>
   10214:	00a00513          	li	a0,10
   10218:	00c98993          	add	s3,s3,12
   1021c:	08f000ef          	jal	10aaa <putchar>
   10220:	fd5994e3          	bne	s3,s5,101e8 <main+0x138>
   10224:	0f8000ef          	jal	1031c <get_cycles>
   10228:	417505b3          	sub	a1,a0,s7
   1022c:	0001c537          	lui	a0,0x1c
   10230:	01450513          	add	a0,a0,20 # 1c014 <__trunctfdf2+0x2b4>
   10234:	045000ef          	jal	10a78 <printf>
   10238:	00c12583          	lw	a1,12(sp)
   1023c:	0001c537          	lui	a0,0x1c
   10240:	02850513          	add	a0,a0,40 # 1c028 <__trunctfdf2+0x2c8>
   10244:	035000ef          	jal	10a78 <printf>
   10248:	0dc12083          	lw	ra,220(sp)
   1024c:	0d812403          	lw	s0,216(sp)
   10250:	0d412483          	lw	s1,212(sp)
   10254:	0d012903          	lw	s2,208(sp)
   10258:	0cc12983          	lw	s3,204(sp)
   1025c:	0c812a03          	lw	s4,200(sp)
   10260:	0c412a83          	lw	s5,196(sp)
   10264:	0c012b03          	lw	s6,192(sp)
   10268:	0bc12b83          	lw	s7,188(sp)
   1026c:	0b812c03          	lw	s8,184(sp)
   10270:	0b412c83          	lw	s9,180(sp)
   10274:	0b012d03          	lw	s10,176(sp)
   10278:	0ac12d83          	lw	s11,172(sp)
   1027c:	00000513          	li	a0,0
   10280:	0e010113          	add	sp,sp,224
   10284:	00008067          	ret

00010344 <swap>:
   10344:	00052703          	lw	a4,0(a0)
   10348:	0005a783          	lw	a5,0(a1)
   1034c:	00e5a023          	sw	a4,0(a1)
   10350:	00f52023          	sw	a5,0(a0)
   10354:	00008067          	ret

00010358 <imul32>:
   10358:	02058663          	beqz	a1,10384 <imul32+0x2c>
   1035c:	00000713          	li	a4,0
   10360:	0015f693          	and	a3,a1,1
   10364:	00070793          	mv	a5,a4
   10368:	4015d593          	sra	a1,a1,0x1
   1036c:	00068463          	beqz	a3,10374 <imul32+0x1c>
   10370:	00e507b3          	add	a5,a0,a4
   10374:	4017d713          	sra	a4,a5,0x1
   10378:	fe0594e3          	bnez	a1,10360 <imul32+0x8>
   1037c:	ffe7f513          	and	a0,a5,-2
   10380:	00008067          	ret
   10384:	00000513          	li	a0,0
   10388:	00008067          	ret

0001038c <count_leading_zeros>:
   1038c:	00155793          	srl	a5,a0,0x1
   10390:	00a7e533          	or	a0,a5,a0
   10394:	00255793          	srl	a5,a0,0x2
   10398:	00a7e7b3          	or	a5,a5,a0
   1039c:	0047d513          	srl	a0,a5,0x4
   103a0:	00f56533          	or	a0,a0,a5
   103a4:	00855713          	srl	a4,a0,0x8
   103a8:	00a76733          	or	a4,a4,a0
   103ac:	01075793          	srl	a5,a4,0x10
   103b0:	00e7e7b3          	or	a5,a5,a4
   103b4:	555556b7          	lui	a3,0x55555
   103b8:	0017d713          	srl	a4,a5,0x1
   103bc:	55568693          	add	a3,a3,1365 # 55555555 <__BSS_END__+0x555377f9>
   103c0:	00d77733          	and	a4,a4,a3
   103c4:	40e787b3          	sub	a5,a5,a4
   103c8:	333336b7          	lui	a3,0x33333
   103cc:	33368693          	add	a3,a3,819 # 33333333 <__BSS_END__+0x333155d7>
   103d0:	0027d713          	srl	a4,a5,0x2
   103d4:	00d77733          	and	a4,a4,a3
   103d8:	00d7f7b3          	and	a5,a5,a3
   103dc:	00f70733          	add	a4,a4,a5
   103e0:	00475793          	srl	a5,a4,0x4
   103e4:	0f0f16b7          	lui	a3,0xf0f1
   103e8:	00e787b3          	add	a5,a5,a4
   103ec:	f0f68693          	add	a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d31b3>
   103f0:	00d7f7b3          	and	a5,a5,a3
   103f4:	0087d713          	srl	a4,a5,0x8
   103f8:	00f70733          	add	a4,a4,a5
   103fc:	01075793          	srl	a5,a4,0x10
   10400:	00e787b3          	add	a5,a5,a4
   10404:	07f7f793          	and	a5,a5,127
   10408:	02000513          	li	a0,32
   1040c:	40f50533          	sub	a0,a0,a5
   10410:	00008067          	ret

00010414 <unsigned_fadd32>:
   10414:	800007b7          	lui	a5,0x80000
   10418:	ff010113          	add	sp,sp,-16
   1041c:	fff78793          	add	a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe22a3>
   10420:	00a7f733          	and	a4,a5,a0
   10424:	00112623          	sw	ra,12(sp)
   10428:	00812423          	sw	s0,8(sp)
   1042c:	00912223          	sw	s1,4(sp)
   10430:	01212023          	sw	s2,0(sp)
   10434:	00b7f7b3          	and	a5,a5,a1
   10438:	0af74463          	blt	a4,a5,104e0 <unsigned_fadd32+0xcc>
   1043c:	00050913          	mv	s2,a0
   10440:	00058693          	mv	a3,a1
   10444:	008005b7          	lui	a1,0x800
   10448:	41795413          	sra	s0,s2,0x17
   1044c:	4176d793          	sra	a5,a3,0x17
   10450:	fff58713          	add	a4,a1,-1 # 7fffff <__BSS_END__+0x7e22a3>
   10454:	0ff47413          	zext.b	s0,s0
   10458:	0ff7f793          	zext.b	a5,a5
   1045c:	00e97633          	and	a2,s2,a4
   10460:	40f407b3          	sub	a5,s0,a5
   10464:	00e6f733          	and	a4,a3,a4
   10468:	01800513          	li	a0,24
   1046c:	00b66633          	or	a2,a2,a1
   10470:	00b76733          	or	a4,a4,a1
   10474:	00f55463          	bge	a0,a5,1047c <unsigned_fadd32+0x68>
   10478:	01800793          	li	a5,24
   1047c:	40f75733          	sra	a4,a4,a5
   10480:	00d946b3          	xor	a3,s2,a3
   10484:	00e604b3          	add	s1,a2,a4
   10488:	0006d463          	bgez	a3,10490 <unsigned_fadd32+0x7c>
   1048c:	40e604b3          	sub	s1,a2,a4
   10490:	00048513          	mv	a0,s1
   10494:	ef9ff0ef          	jal	1038c <count_leading_zeros>
   10498:	00800793          	li	a5,8
   1049c:	04a7c863          	blt	a5,a0,104ec <unsigned_fadd32+0xd8>
   104a0:	40a787b3          	sub	a5,a5,a0
   104a4:	40f4d4b3          	sra	s1,s1,a5
   104a8:	00f40433          	add	s0,s0,a5
   104ac:	00949493          	sll	s1,s1,0x9
   104b0:	0094d493          	srl	s1,s1,0x9
   104b4:	01741413          	sll	s0,s0,0x17
   104b8:	800007b7          	lui	a5,0x80000
   104bc:	00946433          	or	s0,s0,s1
   104c0:	00f97533          	and	a0,s2,a5
   104c4:	00c12083          	lw	ra,12(sp)
   104c8:	00a46533          	or	a0,s0,a0
   104cc:	00812403          	lw	s0,8(sp)
   104d0:	00412483          	lw	s1,4(sp)
   104d4:	00012903          	lw	s2,0(sp)
   104d8:	01010113          	add	sp,sp,16
   104dc:	00008067          	ret
   104e0:	00050693          	mv	a3,a0
   104e4:	00058913          	mv	s2,a1
   104e8:	f5dff06f          	j	10444 <unsigned_fadd32+0x30>
   104ec:	ff850513          	add	a0,a0,-8
   104f0:	00a494b3          	sll	s1,s1,a0
   104f4:	40a40433          	sub	s0,s0,a0
   104f8:	fb5ff06f          	j	104ac <unsigned_fadd32+0x98>

000104fc <fmul32>:
   104fc:	008006b7          	lui	a3,0x800
   10500:	fff68793          	add	a5,a3,-1 # 7fffff <__BSS_END__+0x7e22a3>
   10504:	00a7f8b3          	and	a7,a5,a0
   10508:	41755713          	sra	a4,a0,0x17
   1050c:	00b7f7b3          	and	a5,a5,a1
   10510:	4175d313          	sra	t1,a1,0x17
   10514:	00d8e8b3          	or	a7,a7,a3
   10518:	00d7e7b3          	or	a5,a5,a3
   1051c:	0ff77813          	zext.b	a6,a4
   10520:	0ff37313          	zext.b	t1,t1
   10524:	00000693          	li	a3,0
   10528:	0017f613          	and	a2,a5,1
   1052c:	00068713          	mv	a4,a3
   10530:	4017d793          	sra	a5,a5,0x1
   10534:	00060463          	beqz	a2,1053c <fmul32+0x40>
   10538:	00d88733          	add	a4,a7,a3
   1053c:	40175693          	sra	a3,a4,0x1
   10540:	fe0794e3          	bnez	a5,10528 <fmul32+0x2c>
   10544:	41875693          	sra	a3,a4,0x18
   10548:	ffe77793          	and	a5,a4,-2
   1054c:	00680733          	add	a4,a6,t1
   10550:	40d7d7b3          	sra	a5,a5,a3
   10554:	f8170713          	add	a4,a4,-127
   10558:	00d70733          	add	a4,a4,a3
   1055c:	00a5c533          	xor	a0,a1,a0
   10560:	800006b7          	lui	a3,0x80000
   10564:	00979793          	sll	a5,a5,0x9
   10568:	00d57533          	and	a0,a0,a3
   1056c:	0097d793          	srl	a5,a5,0x9
   10570:	7f8006b7          	lui	a3,0x7f800
   10574:	01771713          	sll	a4,a4,0x17
   10578:	00d77733          	and	a4,a4,a3
   1057c:	00f56533          	or	a0,a0,a5
   10580:	00a76533          	or	a0,a4,a0
   10584:	00008067          	ret

elf size

   text	   data	    bss	    dec	    hex	filename
  52702	   1888	   1528	  56118	   db36	main.elf

elf header

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x100c2
  Start of program headers:          52 (bytes into file)
  Start of section headers:          69836 (bytes into file)
  Flags:                             0x1, RVC, soft-float ABI
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

execute

0.389692 0.111821 0.675161 
0.662995 0.264999 0.566176 
0.431446 0.448845 0.454146 
cycle count: 58723
instret: 2de
inferior exit code 0

Avoid using screenshots that solely contain plain text. Here are the reasons why:

  1. Text-based content is more efficiently searchable than having to browse through images iteratively.
  2. The rendering engine of HackMD can consistently generate well-structured layouts with annotated text instead of relying on arbitrary pictures.
  3. It provides a more accessible and user-friendly experience for individuals with visual impairments.

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

O2 Optimized Assembly code

Assembly Code
000100b0 <main>:
   100b0:	f2010113          	add	sp,sp,-224
   100b4:	0c112e23          	sw	ra,220(sp)
   100b8:	0b712e23          	sw	s7,188(sp)
   100bc:	0c812c23          	sw	s0,216(sp)
   100c0:	0c912a23          	sw	s1,212(sp)
   100c4:	0d212823          	sw	s2,208(sp)
   100c8:	0d312623          	sw	s3,204(sp)
   100cc:	0d412423          	sw	s4,200(sp)
   100d0:	0d512223          	sw	s5,196(sp)
   100d4:	0d612023          	sw	s6,192(sp)
   100d8:	0b812c23          	sw	s8,184(sp)
   100dc:	0b912a23          	sw	s9,180(sp)
   100e0:	0ba12823          	sw	s10,176(sp)
   100e4:	0bb12623          	sw	s11,172(sp)
   100e8:	248000ef          	jal	10330 <get_instret>
   100ec:	00a12623          	sw	a0,12(sp)
   100f0:	22c000ef          	jal	1031c <get_cycles>
   100f4:	0001c7b7          	lui	a5,0x1c
   100f8:	31478793          	add	a5,a5,788 # 1c314 <__trunctfdf2+0x5b4>
   100fc:	00050b93          	mv	s7,a0
   10100:	03410713          	add	a4,sp,52
   10104:	06478893          	add	a7,a5,100
   10108:	0007a803          	lw	a6,0(a5)
   1010c:	0047a503          	lw	a0,4(a5)
   10110:	0087a583          	lw	a1,8(a5)
   10114:	00c7a603          	lw	a2,12(a5)
   10118:	0107a683          	lw	a3,16(a5)
   1011c:	01072023          	sw	a6,0(a4)
   10120:	00a72223          	sw	a0,4(a4)
   10124:	00b72423          	sw	a1,8(a4)
   10128:	00c72623          	sw	a2,12(a4)
   1012c:	00d72823          	sw	a3,16(a4)
   10130:	01478793          	add	a5,a5,20
   10134:	01470713          	add	a4,a4,20
   10138:	fd1798e3          	bne	a5,a7,10108 <main+0x58>
   1013c:	f301ad03          	lw	s10,-208(gp) # 1d740 <__SDATA_BEGIN__+0x68>
   10140:	0007a683          	lw	a3,0(a5)
   10144:	f341ac83          	lw	s9,-204(gp) # 1d744 <__SDATA_BEGIN__+0x6c>
   10148:	0047a783          	lw	a5,4(a5)
   1014c:	f381ac03          	lw	s8,-200(gp) # 1d748 <__SDATA_BEGIN__+0x70>
   10150:	01010993          	add	s3,sp,16
   10154:	00d72023          	sw	a3,0(a4)
   10158:	00f72223          	sw	a5,4(a4)
   1015c:	03410913          	add	s2,sp,52
   10160:	00098a93          	mv	s5,s3
   10164:	00300a13          	li	s4,3
   10168:	000a8b13          	mv	s6,s5
   1016c:	00090413          	mv	s0,s2
   10170:	00000d93          	li	s11,0
   10174:	00042503          	lw	a0,0(s0)
   10178:	000d0593          	mv	a1,s10
   1017c:	001d8d93          	add	s11,s11,1
   10180:	37c000ef          	jal	104fc <fmul32>
   10184:	00050493          	mv	s1,a0
   10188:	00442503          	lw	a0,4(s0)
   1018c:	000c8593          	mv	a1,s9
   10190:	00c40413          	add	s0,s0,12
   10194:	368000ef          	jal	104fc <fmul32>
   10198:	00050593          	mv	a1,a0
   1019c:	00048513          	mv	a0,s1
   101a0:	274000ef          	jal	10414 <unsigned_fadd32>
   101a4:	00050493          	mv	s1,a0
   101a8:	ffc42503          	lw	a0,-4(s0)
   101ac:	000c0593          	mv	a1,s8
   101b0:	004b0b13          	add	s6,s6,4
   101b4:	348000ef          	jal	104fc <fmul32>
   101b8:	00050593          	mv	a1,a0
   101bc:	00048513          	mv	a0,s1
   101c0:	254000ef          	jal	10414 <unsigned_fadd32>
   101c4:	feab2e23          	sw	a0,-4(s6)
   101c8:	fb4d96e3          	bne	s11,s4,10174 <main+0xc4>
   101cc:	02490913          	add	s2,s2,36
   101d0:	0a010793          	add	a5,sp,160
   101d4:	00ca8a93          	add	s5,s5,12
   101d8:	f8f918e3          	bne	s2,a5,10168 <main+0xb8>
   101dc:	02498a93          	add	s5,s3,36
   101e0:	0001ca37          	lui	s4,0x1c
   101e4:	00300913          	li	s2,3
   101e8:	00098493          	mv	s1,s3
   101ec:	00000413          	li	s0,0
   101f0:	0004a503          	lw	a0,0(s1)
   101f4:	00140413          	add	s0,s0,1
   101f8:	00448493          	add	s1,s1,4
   101fc:	38c000ef          	jal	10588 <__extendsfdf2>
   10200:	00050613          	mv	a2,a0
   10204:	00058693          	mv	a3,a1
   10208:	010a0513          	add	a0,s4,16 # 1c010 <__trunctfdf2+0x2b0>
   1020c:	06d000ef          	jal	10a78 <printf>
   10210:	ff2410e3          	bne	s0,s2,101f0 <main+0x140>
   10214:	00a00513          	li	a0,10
   10218:	00c98993          	add	s3,s3,12
   1021c:	08f000ef          	jal	10aaa <putchar>
   10220:	fd5994e3          	bne	s3,s5,101e8 <main+0x138>
   10224:	0f8000ef          	jal	1031c <get_cycles>
   10228:	417505b3          	sub	a1,a0,s7
   1022c:	0001c537          	lui	a0,0x1c
   10230:	01450513          	add	a0,a0,20 # 1c014 <__trunctfdf2+0x2b4>
   10234:	045000ef          	jal	10a78 <printf>
   10238:	00c12583          	lw	a1,12(sp)
   1023c:	0001c537          	lui	a0,0x1c
   10240:	02850513          	add	a0,a0,40 # 1c028 <__trunctfdf2+0x2c8>
   10244:	035000ef          	jal	10a78 <printf>
   10248:	0dc12083          	lw	ra,220(sp)
   1024c:	0d812403          	lw	s0,216(sp)
   10250:	0d412483          	lw	s1,212(sp)
   10254:	0d012903          	lw	s2,208(sp)
   10258:	0cc12983          	lw	s3,204(sp)
   1025c:	0c812a03          	lw	s4,200(sp)
   10260:	0c412a83          	lw	s5,196(sp)
   10264:	0c012b03          	lw	s6,192(sp)
   10268:	0bc12b83          	lw	s7,188(sp)
   1026c:	0b812c03          	lw	s8,184(sp)
   10270:	0b412c83          	lw	s9,180(sp)
   10274:	0b012d03          	lw	s10,176(sp)
   10278:	0ac12d83          	lw	s11,172(sp)
   1027c:	00000513          	li	a0,0
   10280:	0e010113          	add	sp,sp,224
   10284:	00008067          	ret

00010344 <swap>:
   10344:	00052703          	lw	a4,0(a0)
   10348:	0005a783          	lw	a5,0(a1)
   1034c:	00e5a023          	sw	a4,0(a1)
   10350:	00f52023          	sw	a5,0(a0)
   10354:	00008067          	ret

00010358 <imul32>:
   10358:	02058663          	beqz	a1,10384 <imul32+0x2c>
   1035c:	00000713          	li	a4,0
   10360:	0015f693          	and	a3,a1,1
   10364:	00070793          	mv	a5,a4
   10368:	4015d593          	sra	a1,a1,0x1
   1036c:	00068463          	beqz	a3,10374 <imul32+0x1c>
   10370:	00e507b3          	add	a5,a0,a4
   10374:	4017d713          	sra	a4,a5,0x1
   10378:	fe0594e3          	bnez	a1,10360 <imul32+0x8>
   1037c:	ffe7f513          	and	a0,a5,-2
   10380:	00008067          	ret
   10384:	00000513          	li	a0,0
   10388:	00008067          	ret

0001038c <count_leading_zeros>:
   1038c:	00155793          	srl	a5,a0,0x1
   10390:	00a7e533          	or	a0,a5,a0
   10394:	00255793          	srl	a5,a0,0x2
   10398:	00a7e7b3          	or	a5,a5,a0
   1039c:	0047d513          	srl	a0,a5,0x4
   103a0:	00f56533          	or	a0,a0,a5
   103a4:	00855713          	srl	a4,a0,0x8
   103a8:	00a76733          	or	a4,a4,a0
   103ac:	01075793          	srl	a5,a4,0x10
   103b0:	00e7e7b3          	or	a5,a5,a4
   103b4:	555556b7          	lui	a3,0x55555
   103b8:	0017d713          	srl	a4,a5,0x1
   103bc:	55568693          	add	a3,a3,1365 # 55555555 <__BSS_END__+0x555377f9>
   103c0:	00d77733          	and	a4,a4,a3
   103c4:	40e787b3          	sub	a5,a5,a4
   103c8:	333336b7          	lui	a3,0x33333
   103cc:	33368693          	add	a3,a3,819 # 33333333 <__BSS_END__+0x333155d7>
   103d0:	0027d713          	srl	a4,a5,0x2
   103d4:	00d77733          	and	a4,a4,a3
   103d8:	00d7f7b3          	and	a5,a5,a3
   103dc:	00f70733          	add	a4,a4,a5
   103e0:	00475793          	srl	a5,a4,0x4
   103e4:	0f0f16b7          	lui	a3,0xf0f1
   103e8:	00e787b3          	add	a5,a5,a4
   103ec:	f0f68693          	add	a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d31b3>
   103f0:	00d7f7b3          	and	a5,a5,a3
   103f4:	0087d713          	srl	a4,a5,0x8
   103f8:	00f70733          	add	a4,a4,a5
   103fc:	01075793          	srl	a5,a4,0x10
   10400:	00e787b3          	add	a5,a5,a4
   10404:	07f7f793          	and	a5,a5,127
   10408:	02000513          	li	a0,32
   1040c:	40f50533          	sub	a0,a0,a5
   10410:	00008067          	ret

00010414 <unsigned_fadd32>:
   10414:	800007b7          	lui	a5,0x80000
   10418:	ff010113          	add	sp,sp,-16
   1041c:	fff78793          	add	a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe22a3>
   10420:	00a7f733          	and	a4,a5,a0
   10424:	00112623          	sw	ra,12(sp)
   10428:	00812423          	sw	s0,8(sp)
   1042c:	00912223          	sw	s1,4(sp)
   10430:	01212023          	sw	s2,0(sp)
   10434:	00b7f7b3          	and	a5,a5,a1
   10438:	0af74463          	blt	a4,a5,104e0 <unsigned_fadd32+0xcc>
   1043c:	00050913          	mv	s2,a0
   10440:	00058693          	mv	a3,a1
   10444:	008005b7          	lui	a1,0x800
   10448:	41795413          	sra	s0,s2,0x17
   1044c:	4176d793          	sra	a5,a3,0x17
   10450:	fff58713          	add	a4,a1,-1 # 7fffff <__BSS_END__+0x7e22a3>
   10454:	0ff47413          	zext.b	s0,s0
   10458:	0ff7f793          	zext.b	a5,a5
   1045c:	00e97633          	and	a2,s2,a4
   10460:	40f407b3          	sub	a5,s0,a5
   10464:	00e6f733          	and	a4,a3,a4
   10468:	01800513          	li	a0,24
   1046c:	00b66633          	or	a2,a2,a1
   10470:	00b76733          	or	a4,a4,a1
   10474:	00f55463          	bge	a0,a5,1047c <unsigned_fadd32+0x68>
   10478:	01800793          	li	a5,24
   1047c:	40f75733          	sra	a4,a4,a5
   10480:	00d946b3          	xor	a3,s2,a3
   10484:	00e604b3          	add	s1,a2,a4
   10488:	0006d463          	bgez	a3,10490 <unsigned_fadd32+0x7c>
   1048c:	40e604b3          	sub	s1,a2,a4
   10490:	00048513          	mv	a0,s1
   10494:	ef9ff0ef          	jal	1038c <count_leading_zeros>
   10498:	00800793          	li	a5,8
   1049c:	04a7c863          	blt	a5,a0,104ec <unsigned_fadd32+0xd8>
   104a0:	40a787b3          	sub	a5,a5,a0
   104a4:	40f4d4b3          	sra	s1,s1,a5
   104a8:	00f40433          	add	s0,s0,a5
   104ac:	00949493          	sll	s1,s1,0x9
   104b0:	0094d493          	srl	s1,s1,0x9
   104b4:	01741413          	sll	s0,s0,0x17
   104b8:	800007b7          	lui	a5,0x80000
   104bc:	00946433          	or	s0,s0,s1
   104c0:	00f97533          	and	a0,s2,a5
   104c4:	00c12083          	lw	ra,12(sp)
   104c8:	00a46533          	or	a0,s0,a0
   104cc:	00812403          	lw	s0,8(sp)
   104d0:	00412483          	lw	s1,4(sp)
   104d4:	00012903          	lw	s2,0(sp)
   104d8:	01010113          	add	sp,sp,16
   104dc:	00008067          	ret
   104e0:	00050693          	mv	a3,a0
   104e4:	00058913          	mv	s2,a1
   104e8:	f5dff06f          	j	10444 <unsigned_fadd32+0x30>
   104ec:	ff850513          	add	a0,a0,-8
   104f0:	00a494b3          	sll	s1,s1,a0
   104f4:	40a40433          	sub	s0,s0,a0
   104f8:	fb5ff06f          	j	104ac <unsigned_fadd32+0x98>

000104fc <fmul32>:
   104fc:	008006b7          	lui	a3,0x800
   10500:	fff68793          	add	a5,a3,-1 # 7fffff <__BSS_END__+0x7e22a3>
   10504:	00a7f8b3          	and	a7,a5,a0
   10508:	41755713          	sra	a4,a0,0x17
   1050c:	00b7f7b3          	and	a5,a5,a1
   10510:	4175d313          	sra	t1,a1,0x17
   10514:	00d8e8b3          	or	a7,a7,a3
   10518:	00d7e7b3          	or	a5,a5,a3
   1051c:	0ff77813          	zext.b	a6,a4
   10520:	0ff37313          	zext.b	t1,t1
   10524:	00000693          	li	a3,0
   10528:	0017f613          	and	a2,a5,1
   1052c:	00068713          	mv	a4,a3
   10530:	4017d793          	sra	a5,a5,0x1
   10534:	00060463          	beqz	a2,1053c <fmul32+0x40>
   10538:	00d88733          	add	a4,a7,a3
   1053c:	40175693          	sra	a3,a4,0x1
   10540:	fe0794e3          	bnez	a5,10528 <fmul32+0x2c>
   10544:	41875693          	sra	a3,a4,0x18
   10548:	ffe77793          	and	a5,a4,-2
   1054c:	00680733          	add	a4,a6,t1
   10550:	40d7d7b3          	sra	a5,a5,a3
   10554:	f8170713          	add	a4,a4,-127
   10558:	00d70733          	add	a4,a4,a3
   1055c:	00a5c533          	xor	a0,a1,a0
   10560:	800006b7          	lui	a3,0x80000
   10564:	00979793          	sll	a5,a5,0x9
   10568:	00d57533          	and	a0,a0,a3
   1056c:	0097d793          	srl	a5,a5,0x9
   10570:	7f8006b7          	lui	a3,0x7f800
   10574:	01771713          	sll	a4,a4,0x17
   10578:	00d77733          	and	a4,a4,a3
   1057c:	00f56533          	or	a0,a0,a5
   10580:	00a76533          	or	a0,a4,a0
   10584:	00008067          	ret

elf size

​text	   data	    bss	    dec	    hex	filename
​​52674	   1888	   1528	  56090	   db1a	main.elf

elf header

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x1029a
  Start of program headers:          52 (bytes into file)
  Start of section headers:          69852 (bytes into file)
  Flags:                             0x1, RVC, soft-float ABI
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

execute

0.389692 0.111821 0.675161 
0.662995 0.264999 0.566176 
0.431446 0.448845 0.454146 
cycle count: 58610
instret: 2de
inferior exit code 0

O3 Optimized Assembly code

Assembly Code
000100b0 <main>:
   100b0:	f2010113          	add	sp,sp,-224
   100b4:	0c112e23          	sw	ra,220(sp)
   100b8:	0c812c23          	sw	s0,216(sp)
   100bc:	0c912a23          	sw	s1,212(sp)
   100c0:	0d212823          	sw	s2,208(sp)
   100c4:	0d312623          	sw	s3,204(sp)
   100c8:	0d412423          	sw	s4,200(sp)
   100cc:	0d512223          	sw	s5,196(sp)
   100d0:	0d612023          	sw	s6,192(sp)
   100d4:	0b712e23          	sw	s7,188(sp)
   100d8:	0b812c23          	sw	s8,184(sp)
   100dc:	0b912a23          	sw	s9,180(sp)
   100e0:	0ba12823          	sw	s10,176(sp)
   100e4:	0bb12623          	sw	s11,172(sp)
   100e8:	3cc000ef          	jal	104b4 <get_instret>
   100ec:	00a12423          	sw	a0,8(sp)
   100f0:	3b0000ef          	jal	104a0 <get_cycles>
   100f4:	0001c7b7          	lui	a5,0x1c
   100f8:	51c78793          	add	a5,a5,1308 # 1c51c <__trunctfdf2+0x5b0>
   100fc:	00a12623          	sw	a0,12(sp)
   10100:	03410713          	add	a4,sp,52
   10104:	06478893          	add	a7,a5,100
   10108:	0007a803          	lw	a6,0(a5)
   1010c:	0047a503          	lw	a0,4(a5)
   10110:	0087a583          	lw	a1,8(a5)
   10114:	00c7a603          	lw	a2,12(a5)
   10118:	0107a683          	lw	a3,16(a5)
   1011c:	01072023          	sw	a6,0(a4)
   10120:	00a72223          	sw	a0,4(a4)
   10124:	00b72423          	sw	a1,8(a4)
   10128:	00c72623          	sw	a2,12(a4)
   1012c:	00d72823          	sw	a3,16(a4)
   10130:	01478793          	add	a5,a5,20
   10134:	01470713          	add	a4,a4,20
   10138:	fd1798e3          	bne	a5,a7,10108 <main+0x58>
   1013c:	0007a683          	lw	a3,0(a5)
   10140:	0047a783          	lw	a5,4(a5)
   10144:	01010b13          	add	s6,sp,16
   10148:	00800937          	lui	s2,0x800
   1014c:	3e991cb7          	lui	s9,0x3e991
   10150:	3f164c37          	lui	s8,0x3f164
   10154:	3de98bb7          	lui	s7,0x3de98
   10158:	00d72023          	sw	a3,0(a4)
   1015c:	00f72223          	sw	a5,4(a4)
   10160:	03410a93          	add	s5,sp,52
   10164:	01612223          	sw	s6,4(sp)
   10168:	fff90d93          	add	s11,s2,-1 # 7fffff <__BSS_END__+0x7e1247>
   1016c:	687c8c93          	add	s9,s9,1671 # 3e991687 <__BSS_END__+0x3e9728cf>
   10170:	80000a37          	lui	s4,0x80000
   10174:	7f8009b7          	lui	s3,0x7f800
   10178:	5a2c0c13          	add	s8,s8,1442 # 3f1645a2 <__BSS_END__+0x3f1457ea>
   1017c:	8d5b8b93          	add	s7,s7,-1835 # 3de978d5 <__BSS_END__+0x3de78b1d>
   10180:	00412483          	lw	s1,4(sp)
   10184:	000a8d13          	mv	s10,s5
   10188:	00000413          	li	s0,0
   1018c:	000d2e03          	lw	t3,0(s10)
   10190:	009915b7          	lui	a1,0x991
   10194:	01800513          	li	a0,24
   10198:	01be7333          	and	t1,t3,s11
   1019c:	417e5793          	sra	a5,t3,0x17
   101a0:	01236333          	or	t1,t1,s2
   101a4:	0ff7f893          	zext.b	a7,a5
   101a8:	00000713          	li	a4,0
   101ac:	68758593          	add	a1,a1,1671 # 991687 <__BSS_END__+0x9728cf>
   101b0:	0015f793          	and	a5,a1,1
   101b4:	00e30833          	add	a6,t1,a4
   101b8:	fff50513          	add	a0,a0,-1
   101bc:	22078e63          	beqz	a5,103f8 <main+0x348>
   101c0:	4015d593          	sra	a1,a1,0x1
   101c4:	40185713          	sra	a4,a6,0x1
   101c8:	fe0514e3          	bnez	a0,101b0 <main+0x100>
   101cc:	00080713          	mv	a4,a6
   101d0:	41875593          	sra	a1,a4,0x18
   101d4:	004d2303          	lw	t1,4(s10)
   101d8:	ffe77713          	and	a4,a4,-2
   101dc:	ffe88793          	add	a5,a7,-2
   101e0:	019e4533          	xor	a0,t3,s9
   101e4:	40b75733          	sra	a4,a4,a1
   101e8:	00b787b3          	add	a5,a5,a1
   101ec:	01b77733          	and	a4,a4,s11
   101f0:	01779793          	sll	a5,a5,0x17
   101f4:	01457533          	and	a0,a0,s4
   101f8:	00e56533          	or	a0,a0,a4
   101fc:	01b37eb3          	and	t4,t1,s11
   10200:	0137f733          	and	a4,a5,s3
   10204:	009645b7          	lui	a1,0x964
   10208:	41735793          	sra	a5,t1,0x17
   1020c:	00e56533          	or	a0,a0,a4
   10210:	012eeeb3          	or	t4,t4,s2
   10214:	0ff7fe13          	zext.b	t3,a5
   10218:	01800813          	li	a6,24
   1021c:	00000713          	li	a4,0
   10220:	5a258593          	add	a1,a1,1442 # 9645a2 <__BSS_END__+0x9457ea>
   10224:	0015f793          	and	a5,a1,1
   10228:	00ee88b3          	add	a7,t4,a4
   1022c:	fff80813          	add	a6,a6,-1
   10230:	1a078a63          	beqz	a5,103e4 <main+0x334>
   10234:	4015d593          	sra	a1,a1,0x1
   10238:	4018d713          	sra	a4,a7,0x1
   1023c:	fe0814e3          	bnez	a6,10224 <main+0x174>
   10240:	00088713          	mv	a4,a7
   10244:	41875813          	sra	a6,a4,0x18
   10248:	fffe0793          	add	a5,t3,-1
   1024c:	ffe77713          	and	a4,a4,-2
   10250:	018345b3          	xor	a1,t1,s8
   10254:	41075733          	sra	a4,a4,a6
   10258:	010787b3          	add	a5,a5,a6
   1025c:	01b77733          	and	a4,a4,s11
   10260:	0145f5b3          	and	a1,a1,s4
   10264:	01779793          	sll	a5,a5,0x17
   10268:	00e5e5b3          	or	a1,a1,a4
   1026c:	0137f7b3          	and	a5,a5,s3
   10270:	00f5e5b3          	or	a1,a1,a5
   10274:	330000ef          	jal	105a4 <unsigned_fadd32>
   10278:	008d2e83          	lw	t4,8(s10)
   1027c:	00e985b7          	lui	a1,0xe98
   10280:	01800813          	li	a6,24
   10284:	01befe33          	and	t3,t4,s11
   10288:	417ed793          	sra	a5,t4,0x17
   1028c:	012e6e33          	or	t3,t3,s2
   10290:	0ff7f313          	zext.b	t1,a5
   10294:	00000713          	li	a4,0
   10298:	8d558593          	add	a1,a1,-1835 # e978d5 <__BSS_END__+0xe78b1d>
   1029c:	0015f793          	and	a5,a1,1
   102a0:	00ee08b3          	add	a7,t3,a4
   102a4:	fff80813          	add	a6,a6,-1
   102a8:	12078463          	beqz	a5,103d0 <main+0x320>
   102ac:	4015d593          	sra	a1,a1,0x1
   102b0:	4018d713          	sra	a4,a7,0x1
   102b4:	fe0814e3          	bnez	a6,1029c <main+0x1ec>
   102b8:	00088713          	mv	a4,a7
   102bc:	41875813          	sra	a6,a4,0x18
   102c0:	ffc30793          	add	a5,t1,-4
   102c4:	ffe77713          	and	a4,a4,-2
   102c8:	017ec5b3          	xor	a1,t4,s7
   102cc:	41075733          	sra	a4,a4,a6
   102d0:	010787b3          	add	a5,a5,a6
   102d4:	01b77733          	and	a4,a4,s11
   102d8:	0145f5b3          	and	a1,a1,s4
   102dc:	01779793          	sll	a5,a5,0x17
   102e0:	0137f7b3          	and	a5,a5,s3
   102e4:	00e5e5b3          	or	a1,a1,a4
   102e8:	00f5e5b3          	or	a1,a1,a5
   102ec:	2b8000ef          	jal	105a4 <unsigned_fadd32>
   102f0:	00a4a023          	sw	a0,0(s1)
   102f4:	00140413          	add	s0,s0,1
   102f8:	00300793          	li	a5,3
   102fc:	00cd0d13          	add	s10,s10,12
   10300:	00448493          	add	s1,s1,4
   10304:	e8f414e3          	bne	s0,a5,1018c <main+0xdc>
   10308:	00412783          	lw	a5,4(sp)
   1030c:	024a8a93          	add	s5,s5,36
   10310:	00c78793          	add	a5,a5,12
   10314:	00f12223          	sw	a5,4(sp)
   10318:	0a010793          	add	a5,sp,160
   1031c:	e75792e3          	bne	a5,s5,10180 <main+0xd0>
   10320:	024b0a13          	add	s4,s6,36
   10324:	0001c9b7          	lui	s3,0x1c
   10328:	00300913          	li	s2,3
   1032c:	000b0493          	mv	s1,s6
   10330:	00000413          	li	s0,0
   10334:	0004a503          	lw	a0,0(s1)
   10338:	00140413          	add	s0,s0,1
   1033c:	00448493          	add	s1,s1,4
   10340:	458000ef          	jal	10798 <__extendsfdf2>
   10344:	00050613          	mv	a2,a0
   10348:	00058693          	mv	a3,a1
   1034c:	21898513          	add	a0,s3,536 # 1c218 <__trunctfdf2+0x2ac>
   10350:	139000ef          	jal	10c88 <printf>
   10354:	ff2410e3          	bne	s0,s2,10334 <main+0x284>
   10358:	00a00513          	li	a0,10
   1035c:	00cb0b13          	add	s6,s6,12
   10360:	15b000ef          	jal	10cba <putchar>
   10364:	fd4b14e3          	bne	s6,s4,1032c <main+0x27c>
   10368:	138000ef          	jal	104a0 <get_cycles>
   1036c:	00c12783          	lw	a5,12(sp)
   10370:	40f505b3          	sub	a1,a0,a5
   10374:	0001c537          	lui	a0,0x1c
   10378:	21c50513          	add	a0,a0,540 # 1c21c <__trunctfdf2+0x2b0>
   1037c:	10d000ef          	jal	10c88 <printf>
   10380:	00812583          	lw	a1,8(sp)
   10384:	0001c537          	lui	a0,0x1c
   10388:	23050513          	add	a0,a0,560 # 1c230 <__trunctfdf2+0x2c4>
   1038c:	0fd000ef          	jal	10c88 <printf>
   10390:	0dc12083          	lw	ra,220(sp)
   10394:	0d812403          	lw	s0,216(sp)
   10398:	0d412483          	lw	s1,212(sp)
   1039c:	0d012903          	lw	s2,208(sp)
   103a0:	0cc12983          	lw	s3,204(sp)
   103a4:	0c812a03          	lw	s4,200(sp)
   103a8:	0c412a83          	lw	s5,196(sp)
   103ac:	0c012b03          	lw	s6,192(sp)
   103b0:	0bc12b83          	lw	s7,188(sp)
   103b4:	0b812c03          	lw	s8,184(sp)
   103b8:	0b412c83          	lw	s9,180(sp)
   103bc:	0b012d03          	lw	s10,176(sp)
   103c0:	0ac12d83          	lw	s11,172(sp)
   103c4:	00000513          	li	a0,0
   103c8:	0e010113          	add	sp,sp,224
   103cc:	00008067          	ret
   103d0:	40175793          	sra	a5,a4,0x1
   103d4:	4015d593          	sra	a1,a1,0x1
   103d8:	ee0802e3          	beqz	a6,102bc <main+0x20c>
   103dc:	00078713          	mv	a4,a5
   103e0:	ebdff06f          	j	1029c <main+0x1ec>
   103e4:	40175793          	sra	a5,a4,0x1
   103e8:	4015d593          	sra	a1,a1,0x1
   103ec:	e4080ce3          	beqz	a6,10244 <main+0x194>
   103f0:	00078713          	mv	a4,a5
   103f4:	e31ff06f          	j	10224 <main+0x174>
   103f8:	40175793          	sra	a5,a4,0x1
   103fc:	4015d593          	sra	a1,a1,0x1
   10400:	dc0508e3          	beqz	a0,101d0 <main+0x120>
   10404:	00078713          	mv	a4,a5
   10408:	da9ff06f          	j	101b0 <main+0x100>
   
000104c8 <swap>:
   104c8:	00052703          	lw	a4,0(a0)
   104cc:	0005a783          	lw	a5,0(a1)
   104d0:	00e5a023          	sw	a4,0(a1)
   104d4:	00f52023          	sw	a5,0(a0)
   104d8:	00008067          	ret

000104dc <imul32>:
   104dc:	02058c63          	beqz	a1,10514 <imul32+0x38>
   104e0:	00000713          	li	a4,0
   104e4:	0015f793          	and	a5,a1,1
   104e8:	00a706b3          	add	a3,a4,a0
   104ec:	4015d593          	sra	a1,a1,0x1
   104f0:	00078a63          	beqz	a5,10504 <imul32+0x28>
   104f4:	4016d713          	sra	a4,a3,0x1
   104f8:	fe0596e3          	bnez	a1,104e4 <imul32+0x8>
   104fc:	00171513          	sll	a0,a4,0x1
   10500:	00008067          	ret
   10504:	40175713          	sra	a4,a4,0x1
   10508:	fc059ee3          	bnez	a1,104e4 <imul32+0x8>
   1050c:	00171513          	sll	a0,a4,0x1
   10510:	00008067          	ret
   10514:	00000513          	li	a0,0
   10518:	00008067          	ret

0001051c <count_leading_zeros>:
   1051c:	00155793          	srl	a5,a0,0x1
   10520:	00a7e533          	or	a0,a5,a0
   10524:	00255793          	srl	a5,a0,0x2
   10528:	00a7e7b3          	or	a5,a5,a0
   1052c:	0047d513          	srl	a0,a5,0x4
   10530:	00f56533          	or	a0,a0,a5
   10534:	00855713          	srl	a4,a0,0x8
   10538:	00a76733          	or	a4,a4,a0
   1053c:	01075793          	srl	a5,a4,0x10
   10540:	00e7e7b3          	or	a5,a5,a4
   10544:	555556b7          	lui	a3,0x55555
   10548:	0017d713          	srl	a4,a5,0x1
   1054c:	55568693          	add	a3,a3,1365 # 55555555 <__BSS_END__+0x5553679d>
   10550:	00d77733          	and	a4,a4,a3
   10554:	40e787b3          	sub	a5,a5,a4
   10558:	333336b7          	lui	a3,0x33333
   1055c:	33368693          	add	a3,a3,819 # 33333333 <__BSS_END__+0x3331457b>
   10560:	0027d713          	srl	a4,a5,0x2
   10564:	00d77733          	and	a4,a4,a3
   10568:	00d7f7b3          	and	a5,a5,a3
   1056c:	00f70733          	add	a4,a4,a5
   10570:	00475793          	srl	a5,a4,0x4
   10574:	0f0f16b7          	lui	a3,0xf0f1
   10578:	00e787b3          	add	a5,a5,a4
   1057c:	f0f68693          	add	a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d2157>
   10580:	00d7f7b3          	and	a5,a5,a3
   10584:	0087d713          	srl	a4,a5,0x8
   10588:	00f70733          	add	a4,a4,a5
   1058c:	01075793          	srl	a5,a4,0x10
   10590:	00e787b3          	add	a5,a5,a4
   10594:	07f7f793          	and	a5,a5,127
   10598:	02000513          	li	a0,32
   1059c:	40f50533          	sub	a0,a0,a5
   105a0:	00008067          	ret

000105a4 <unsigned_fadd32>:
   105a4:	800007b7          	lui	a5,0x80000
   105a8:	fff78793          	add	a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe1247>
   105ac:	00a7f733          	and	a4,a5,a0
   105b0:	00b7f7b3          	and	a5,a5,a1
   105b4:	00050813          	mv	a6,a0
   105b8:	00058613          	mv	a2,a1
   105bc:	00f74663          	blt	a4,a5,105c8 <unsigned_fadd32+0x24>
   105c0:	00050613          	mv	a2,a0
   105c4:	00058813          	mv	a6,a1
   105c8:	008008b7          	lui	a7,0x800
   105cc:	41765713          	sra	a4,a2,0x17
   105d0:	41785793          	sra	a5,a6,0x17
   105d4:	fff88693          	add	a3,a7,-1 # 7fffff <__BSS_END__+0x7e1247>
   105d8:	0ff77713          	zext.b	a4,a4
   105dc:	0ff7f793          	zext.b	a5,a5
   105e0:	00d675b3          	and	a1,a2,a3
   105e4:	40f707b3          	sub	a5,a4,a5
   105e8:	00d876b3          	and	a3,a6,a3
   105ec:	01800513          	li	a0,24
   105f0:	0115e5b3          	or	a1,a1,a7
   105f4:	0116e6b3          	or	a3,a3,a7
   105f8:	00f55463          	bge	a0,a5,10600 <unsigned_fadd32+0x5c>
   105fc:	01800793          	li	a5,24
   10600:	40f6d7b3          	sra	a5,a3,a5
   10604:	01064833          	xor	a6,a2,a6
   10608:	00f586b3          	add	a3,a1,a5
   1060c:	00085463          	bgez	a6,10614 <unsigned_fadd32+0x70>
   10610:	40f586b3          	sub	a3,a1,a5
   10614:	0016d793          	srl	a5,a3,0x1
   10618:	00d7e7b3          	or	a5,a5,a3
   1061c:	0027d593          	srl	a1,a5,0x2
   10620:	00b7e7b3          	or	a5,a5,a1
   10624:	0047d593          	srl	a1,a5,0x4
   10628:	00b7e7b3          	or	a5,a5,a1
   1062c:	0087d593          	srl	a1,a5,0x8
   10630:	00b7e7b3          	or	a5,a5,a1
   10634:	0107d593          	srl	a1,a5,0x10
   10638:	00b7e7b3          	or	a5,a5,a1
   1063c:	55555537          	lui	a0,0x55555
   10640:	0017d593          	srl	a1,a5,0x1
   10644:	55550513          	add	a0,a0,1365 # 55555555 <__BSS_END__+0x5553679d>
   10648:	00a5f5b3          	and	a1,a1,a0
   1064c:	40b787b3          	sub	a5,a5,a1
   10650:	33333537          	lui	a0,0x33333
   10654:	33350513          	add	a0,a0,819 # 33333333 <__BSS_END__+0x3331457b>
   10658:	0027d593          	srl	a1,a5,0x2
   1065c:	00a5f5b3          	and	a1,a1,a0
   10660:	00a7f7b3          	and	a5,a5,a0
   10664:	00f585b3          	add	a1,a1,a5
   10668:	0045d793          	srl	a5,a1,0x4
   1066c:	0f0f1537          	lui	a0,0xf0f1
   10670:	00b787b3          	add	a5,a5,a1
   10674:	f0f50513          	add	a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d2157>
   10678:	00a7f7b3          	and	a5,a5,a0
   1067c:	0087d593          	srl	a1,a5,0x8
   10680:	00b787b3          	add	a5,a5,a1
   10684:	0107d593          	srl	a1,a5,0x10
   10688:	00b787b3          	add	a5,a5,a1
   1068c:	07f7f793          	and	a5,a5,127
   10690:	02000593          	li	a1,32
   10694:	40f585b3          	sub	a1,a1,a5
   10698:	00800513          	li	a0,8
   1069c:	02b54863          	blt	a0,a1,106cc <unsigned_fadd32+0x128>
   106a0:	fe878793          	add	a5,a5,-24
   106a4:	40f6d6b3          	sra	a3,a3,a5
   106a8:	00f70733          	add	a4,a4,a5
   106ac:	00969693          	sll	a3,a3,0x9
   106b0:	0096d693          	srl	a3,a3,0x9
   106b4:	800007b7          	lui	a5,0x80000
   106b8:	01771713          	sll	a4,a4,0x17
   106bc:	00d76733          	or	a4,a4,a3
   106c0:	00f67533          	and	a0,a2,a5
   106c4:	00a76533          	or	a0,a4,a0
   106c8:	00008067          	ret
   106cc:	01800593          	li	a1,24
   106d0:	40f587b3          	sub	a5,a1,a5
   106d4:	00f696b3          	sll	a3,a3,a5
   106d8:	40f70733          	sub	a4,a4,a5
   106dc:	00969693          	sll	a3,a3,0x9
   106e0:	0096d693          	srl	a3,a3,0x9
   106e4:	800007b7          	lui	a5,0x80000
   106e8:	01771713          	sll	a4,a4,0x17
   106ec:	00d76733          	or	a4,a4,a3
   106f0:	00f67533          	and	a0,a2,a5
   106f4:	00a76533          	or	a0,a4,a0
   106f8:	00008067          	ret

000106fc <fmul32>:
   106fc:	008006b7          	lui	a3,0x800
   10700:	fff68793          	add	a5,a3,-1 # 7fffff <__BSS_END__+0x7e1247>
   10704:	00a7f8b3          	and	a7,a5,a0
   10708:	41755713          	sra	a4,a0,0x17
   1070c:	00b7f7b3          	and	a5,a5,a1
   10710:	4175d313          	sra	t1,a1,0x17
   10714:	00d8e8b3          	or	a7,a7,a3
   10718:	00d7e7b3          	or	a5,a5,a3
   1071c:	0ff77613          	zext.b	a2,a4
   10720:	0ff37313          	zext.b	t1,t1
   10724:	00000693          	li	a3,0
   10728:	0017f713          	and	a4,a5,1
   1072c:	00d88833          	add	a6,a7,a3
   10730:	4017d793          	sra	a5,a5,0x1
   10734:	04070a63          	beqz	a4,10788 <fmul32+0x8c>
   10738:	40185693          	sra	a3,a6,0x1
   1073c:	fe0796e3          	bnez	a5,10728 <fmul32+0x2c>
   10740:	00080693          	mv	a3,a6
   10744:	4186d813          	sra	a6,a3,0x18
   10748:	ffe6f793          	and	a5,a3,-2
   1074c:	00660733          	add	a4,a2,t1
   10750:	4107d7b3          	sra	a5,a5,a6
   10754:	f8170713          	add	a4,a4,-127
   10758:	01070733          	add	a4,a4,a6
   1075c:	00a5c533          	xor	a0,a1,a0
   10760:	800006b7          	lui	a3,0x80000
   10764:	00979793          	sll	a5,a5,0x9
   10768:	00d57533          	and	a0,a0,a3
   1076c:	0097d793          	srl	a5,a5,0x9
   10770:	7f8006b7          	lui	a3,0x7f800
   10774:	01771713          	sll	a4,a4,0x17
   10778:	00d77733          	and	a4,a4,a3
   1077c:	00f56533          	or	a0,a0,a5
   10780:	00a76533          	or	a0,a4,a0
   10784:	00008067          	ret
   10788:	4016d713          	sra	a4,a3,0x1
   1078c:	fa078ce3          	beqz	a5,10744 <fmul32+0x48>
   10790:	00070693          	mv	a3,a4
   10794:	f95ff06f          	j	10728 <fmul32+0x2c>

elf size

   text	   data	    bss	    dec	    hex	filename
  53198	   1876	   1528	  56602	   dd1a	main.elf

elf header

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x1041e
  Start of program headers:          52 (bytes into file)
  Start of section headers:          69944 (bytes into file)
  Flags:                             0x1, RVC, soft-float ABI
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

execute

0.389692 0.111821 0.675161 
0.662995 0.264999 0.566176 
0.431446 0.448845 0.454146 
cycle count: 59137
instret: 2c8
inferior exit code 0

Os Optimized Assembly code

Assembly Code
000100b0 <main>:
   100b0:	f2010113          	add	sp,sp,-224
   100b4:	0c112e23          	sw	ra,220(sp)
   100b8:	0c812c23          	sw	s0,216(sp)
   100bc:	0c912a23          	sw	s1,212(sp)
   100c0:	0d312623          	sw	s3,204(sp)
   100c4:	0d412423          	sw	s4,200(sp)
   100c8:	0b712e23          	sw	s7,188(sp)
   100cc:	0b912a23          	sw	s9,180(sp)
   100d0:	0ba12823          	sw	s10,176(sp)
   100d4:	0bb12623          	sw	s11,172(sp)
   100d8:	0d212823          	sw	s2,208(sp)
   100dc:	0d512223          	sw	s5,196(sp)
   100e0:	0d612023          	sw	s6,192(sp)
   100e4:	0b812c23          	sw	s8,184(sp)
   100e8:	208000ef          	jal	102f0 <get_instret>
   100ec:	00a12623          	sw	a0,12(sp)
   100f0:	1ec000ef          	jal	102dc <get_cycles>
   100f4:	0001c5b7          	lui	a1,0x1c
   100f8:	00050a13          	mv	s4,a0
   100fc:	06c00613          	li	a2,108
   10100:	2cc58593          	add	a1,a1,716 # 1c2cc <__trunctfdf2+0x5ae>
   10104:	03410513          	add	a0,sp,52
   10108:	4ed000ef          	jal	10df4 <memcpy>
   1010c:	f301ac83          	lw	s9,-208(gp) # 1d740 <__SDATA_BEGIN__+0x68>
   10110:	f341ad03          	lw	s10,-204(gp) # 1d744 <__SDATA_BEGIN__+0x6c>
   10114:	f381ad83          	lw	s11,-200(gp) # 1d748 <__SDATA_BEGIN__+0x70>
   10118:	01010493          	add	s1,sp,16
   1011c:	03410413          	add	s0,sp,52
   10120:	00048b93          	mv	s7,s1
   10124:	00300993          	li	s3,3
   10128:	000b8b13          	mv	s6,s7
   1012c:	00040913          	mv	s2,s0
   10130:	00000a93          	li	s5,0
   10134:	00092503          	lw	a0,0(s2)
   10138:	000c8593          	mv	a1,s9
   1013c:	001a8a93          	add	s5,s5,1
   10140:	370000ef          	jal	104b0 <fmul32>
   10144:	00050c13          	mv	s8,a0
   10148:	00492503          	lw	a0,4(s2)
   1014c:	000d0593          	mv	a1,s10
   10150:	00c90913          	add	s2,s2,12
   10154:	35c000ef          	jal	104b0 <fmul32>
   10158:	00050593          	mv	a1,a0
   1015c:	000c0513          	mv	a0,s8
   10160:	268000ef          	jal	103c8 <unsigned_fadd32>
   10164:	00050c13          	mv	s8,a0
   10168:	ffc92503          	lw	a0,-4(s2)
   1016c:	000d8593          	mv	a1,s11
   10170:	004b0b13          	add	s6,s6,4
   10174:	33c000ef          	jal	104b0 <fmul32>
   10178:	00050593          	mv	a1,a0
   1017c:	000c0513          	mv	a0,s8
   10180:	248000ef          	jal	103c8 <unsigned_fadd32>
   10184:	feab2e23          	sw	a0,-4(s6)
   10188:	fb3a96e3          	bne	s5,s3,10134 <main+0x84>
   1018c:	02440413          	add	s0,s0,36
   10190:	0a010793          	add	a5,sp,160
   10194:	00cb8b93          	add	s7,s7,12
   10198:	f8f418e3          	bne	s0,a5,10128 <main+0x78>
   1019c:	02448a93          	add	s5,s1,36
   101a0:	0001cb37          	lui	s6,0x1c
   101a4:	00300b93          	li	s7,3
   101a8:	00048913          	mv	s2,s1
   101ac:	00000413          	li	s0,0
   101b0:	00092503          	lw	a0,0(s2)
   101b4:	00140413          	add	s0,s0,1
   101b8:	00490913          	add	s2,s2,4
   101bc:	390000ef          	jal	1054c <__extendsfdf2>
   101c0:	00050613          	mv	a2,a0
   101c4:	00058693          	mv	a3,a1
   101c8:	fc8b0513          	add	a0,s6,-56 # 1bfc8 <__trunctfdf2+0x2aa>
   101cc:	071000ef          	jal	10a3c <printf>
   101d0:	ff7410e3          	bne	s0,s7,101b0 <main+0x100>
   101d4:	00a00513          	li	a0,10
   101d8:	00c48493          	add	s1,s1,12
   101dc:	093000ef          	jal	10a6e <putchar>
   101e0:	fd5494e3          	bne	s1,s5,101a8 <main+0xf8>
   101e4:	0f8000ef          	jal	102dc <get_cycles>
   101e8:	414505b3          	sub	a1,a0,s4
   101ec:	0001c537          	lui	a0,0x1c
   101f0:	fcc50513          	add	a0,a0,-52 # 1bfcc <__trunctfdf2+0x2ae>
   101f4:	049000ef          	jal	10a3c <printf>
   101f8:	00c12583          	lw	a1,12(sp)
   101fc:	0001c537          	lui	a0,0x1c
   10200:	fe050513          	add	a0,a0,-32 # 1bfe0 <__trunctfdf2+0x2c2>
   10204:	039000ef          	jal	10a3c <printf>
   10208:	0dc12083          	lw	ra,220(sp)
   1020c:	0d812403          	lw	s0,216(sp)
   10210:	0d412483          	lw	s1,212(sp)
   10214:	0d012903          	lw	s2,208(sp)
   10218:	0cc12983          	lw	s3,204(sp)
   1021c:	0c812a03          	lw	s4,200(sp)
   10220:	0c412a83          	lw	s5,196(sp)
   10224:	0c012b03          	lw	s6,192(sp)
   10228:	0bc12b83          	lw	s7,188(sp)
   1022c:	0b812c03          	lw	s8,184(sp)
   10230:	0b412c83          	lw	s9,180(sp)
   10234:	0b012d03          	lw	s10,176(sp)
   10238:	0ac12d83          	lw	s11,172(sp)
   1023c:	00000513          	li	a0,0
   10240:	0e010113          	add	sp,sp,224
   10244:	00008067          	ret
   
00010304 <swap>:
   10304:	00052703          	lw	a4,0(a0)
   10308:	0005a783          	lw	a5,0(a1)
   1030c:	00e5a023          	sw	a4,0(a1)
   10310:	00f52023          	sw	a5,0(a0)
   10314:	00008067          	ret

00010318 <imul32>:
   10318:	00000793          	li	a5,0
   1031c:	00059663          	bnez	a1,10328 <imul32+0x10>
   10320:	00179513          	sll	a0,a5,0x1
   10324:	00008067          	ret
   10328:	0015f713          	and	a4,a1,1
   1032c:	00070463          	beqz	a4,10334 <imul32+0x1c>
   10330:	00a787b3          	add	a5,a5,a0
   10334:	4015d593          	sra	a1,a1,0x1
   10338:	4017d793          	sra	a5,a5,0x1
   1033c:	fe1ff06f          	j	1031c <imul32+0x4>

00010340 <count_leading_zeros>:
   10340:	00155793          	srl	a5,a0,0x1
   10344:	00a7e533          	or	a0,a5,a0
   10348:	00255793          	srl	a5,a0,0x2
   1034c:	00a7e7b3          	or	a5,a5,a0
   10350:	0047d513          	srl	a0,a5,0x4
   10354:	00f56533          	or	a0,a0,a5
   10358:	00855713          	srl	a4,a0,0x8
   1035c:	00a76733          	or	a4,a4,a0
   10360:	01075793          	srl	a5,a4,0x10
   10364:	00e7e7b3          	or	a5,a5,a4
   10368:	555556b7          	lui	a3,0x55555
   1036c:	0017d713          	srl	a4,a5,0x1
   10370:	55568693          	add	a3,a3,1365 # 55555555 <__BSS_END__+0x555377f9>
   10374:	00d77733          	and	a4,a4,a3
   10378:	40e787b3          	sub	a5,a5,a4
   1037c:	333336b7          	lui	a3,0x33333
   10380:	33368693          	add	a3,a3,819 # 33333333 <__BSS_END__+0x333155d7>
   10384:	0027d713          	srl	a4,a5,0x2
   10388:	00d77733          	and	a4,a4,a3
   1038c:	00d7f7b3          	and	a5,a5,a3
   10390:	00f70733          	add	a4,a4,a5
   10394:	00475793          	srl	a5,a4,0x4
   10398:	00e787b3          	add	a5,a5,a4
   1039c:	0f0f1737          	lui	a4,0xf0f1
   103a0:	f0f70713          	add	a4,a4,-241 # f0f0f0f <__BSS_END__+0xf0d31b3>
   103a4:	00e7f7b3          	and	a5,a5,a4
   103a8:	0087d713          	srl	a4,a5,0x8
   103ac:	00f70733          	add	a4,a4,a5
   103b0:	01075793          	srl	a5,a4,0x10
   103b4:	00e787b3          	add	a5,a5,a4
   103b8:	07f7f793          	and	a5,a5,127
   103bc:	02000513          	li	a0,32
   103c0:	40f50533          	sub	a0,a0,a5
   103c4:	00008067          	ret

000103c8 <unsigned_fadd32>:
   103c8:	800007b7          	lui	a5,0x80000
   103cc:	ff010113          	add	sp,sp,-16
   103d0:	fff78793          	add	a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe22a3>
   103d4:	00a7f733          	and	a4,a5,a0
   103d8:	00112623          	sw	ra,12(sp)
   103dc:	00812423          	sw	s0,8(sp)
   103e0:	00912223          	sw	s1,4(sp)
   103e4:	01212023          	sw	s2,0(sp)
   103e8:	00b7f7b3          	and	a5,a5,a1
   103ec:	0af74463          	blt	a4,a5,10494 <unsigned_fadd32+0xcc>
   103f0:	00050913          	mv	s2,a0
   103f4:	00058693          	mv	a3,a1
   103f8:	00800737          	lui	a4,0x800
   103fc:	fff70793          	add	a5,a4,-1 # 7fffff <__BSS_END__+0x7e22a3>
   10400:	00f97633          	and	a2,s2,a5
   10404:	00f6f7b3          	and	a5,a3,a5
   10408:	00e66633          	or	a2,a2,a4
   1040c:	00e7e7b3          	or	a5,a5,a4
   10410:	41795413          	sra	s0,s2,0x17
   10414:	4176d713          	sra	a4,a3,0x17
   10418:	0ff47413          	zext.b	s0,s0
   1041c:	0ff77713          	zext.b	a4,a4
   10420:	40e40733          	sub	a4,s0,a4
   10424:	01800593          	li	a1,24
   10428:	00e5d463          	bge	a1,a4,10430 <unsigned_fadd32+0x68>
   1042c:	01800713          	li	a4,24
   10430:	40e7d7b3          	sra	a5,a5,a4
   10434:	00d946b3          	xor	a3,s2,a3
   10438:	00f604b3          	add	s1,a2,a5
   1043c:	0006d463          	bgez	a3,10444 <unsigned_fadd32+0x7c>
   10440:	40f604b3          	sub	s1,a2,a5
   10444:	00048513          	mv	a0,s1
   10448:	ef9ff0ef          	jal	10340 <count_leading_zeros>
   1044c:	00800793          	li	a5,8
   10450:	04a7c863          	blt	a5,a0,104a0 <unsigned_fadd32+0xd8>
   10454:	40a787b3          	sub	a5,a5,a0
   10458:	40f4d4b3          	sra	s1,s1,a5
   1045c:	00f40433          	add	s0,s0,a5
   10460:	00949493          	sll	s1,s1,0x9
   10464:	0094d493          	srl	s1,s1,0x9
   10468:	01741413          	sll	s0,s0,0x17
   1046c:	800007b7          	lui	a5,0x80000
   10470:	00946433          	or	s0,s0,s1
   10474:	00f97533          	and	a0,s2,a5
   10478:	00c12083          	lw	ra,12(sp)
   1047c:	00a46533          	or	a0,s0,a0
   10480:	00812403          	lw	s0,8(sp)
   10484:	00412483          	lw	s1,4(sp)
   10488:	00012903          	lw	s2,0(sp)
   1048c:	01010113          	add	sp,sp,16
   10490:	00008067          	ret
   10494:	00050693          	mv	a3,a0
   10498:	00058913          	mv	s2,a1
   1049c:	f5dff06f          	j	103f8 <unsigned_fadd32+0x30>
   104a0:	ff850513          	add	a0,a0,-8
   104a4:	00a494b3          	sll	s1,s1,a0
   104a8:	40a40433          	sub	s0,s0,a0
   104ac:	fb5ff06f          	j	10460 <unsigned_fadd32+0x98>

000104b0 <fmul32>:
   104b0:	ff010113          	add	sp,sp,-16
   104b4:	008007b7          	lui	a5,0x800
   104b8:	01212023          	sw	s2,0(sp)
   104bc:	fff78913          	add	s2,a5,-1 # 7fffff <__BSS_END__+0x7e22a3>
   104c0:	00812423          	sw	s0,8(sp)
   104c4:	00912223          	sw	s1,4(sp)
   104c8:	00058413          	mv	s0,a1
   104cc:	00050493          	mv	s1,a0
   104d0:	00b975b3          	and	a1,s2,a1
   104d4:	00a97533          	and	a0,s2,a0
   104d8:	00f5e5b3          	or	a1,a1,a5
   104dc:	00f56533          	or	a0,a0,a5
   104e0:	00112623          	sw	ra,12(sp)
   104e4:	e35ff0ef          	jal	10318 <imul32>
   104e8:	4174d793          	sra	a5,s1,0x17
   104ec:	41745693          	sra	a3,s0,0x17
   104f0:	0ff6f693          	zext.b	a3,a3
   104f4:	0ff7f793          	zext.b	a5,a5
   104f8:	41855713          	sra	a4,a0,0x18
   104fc:	00d787b3          	add	a5,a5,a3
   10500:	00177713          	and	a4,a4,1
   10504:	f8178793          	add	a5,a5,-127
   10508:	00e787b3          	add	a5,a5,a4
   1050c:	7f8006b7          	lui	a3,0x7f800
   10510:	01779793          	sll	a5,a5,0x17
   10514:	00d7f7b3          	and	a5,a5,a3
   10518:	00944433          	xor	s0,s0,s1
   1051c:	800006b7          	lui	a3,0x80000
   10520:	40e55533          	sra	a0,a0,a4
   10524:	00d47433          	and	s0,s0,a3
   10528:	01257533          	and	a0,a0,s2
   1052c:	00c12083          	lw	ra,12(sp)
   10530:	00a46533          	or	a0,s0,a0
   10534:	00812403          	lw	s0,8(sp)
   10538:	00412483          	lw	s1,4(sp)
   1053c:	00012903          	lw	s2,0(sp)
   10540:	00a7e533          	or	a0,a5,a0
   10544:	01010113          	add	sp,sp,16
   10548:	00008067          	ret

elf size

   text	   data	    bss	    dec	    hex	filename
  52608	   1888	   1528	  56024	   dad8	main.elf

elf header

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x1025a
  Start of program headers:          52 (bytes into file)
  Start of section headers:          69852 (bytes into file)
  Flags:                             0x1, RVC, soft-float ABI
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

execute

0.389692 0.111821 0.675161 
0.662995 0.264999 0.566176 
0.431446 0.448845 0.454146 
cycle count: 59036
instret: 2de
inferior exit code 0

Ofast Optimized Assembly code

Assembly Code
000100b0 <main>:
   100b0:	f2010113          	add	sp,sp,-224
   100b4:	0c112e23          	sw	ra,220(sp)
   100b8:	0c812c23          	sw	s0,216(sp)
   100bc:	0c912a23          	sw	s1,212(sp)
   100c0:	0d212823          	sw	s2,208(sp)
   100c4:	0d312623          	sw	s3,204(sp)
   100c8:	0d412423          	sw	s4,200(sp)
   100cc:	0d512223          	sw	s5,196(sp)
   100d0:	0d612023          	sw	s6,192(sp)
   100d4:	0b712e23          	sw	s7,188(sp)
   100d8:	0b812c23          	sw	s8,184(sp)
   100dc:	0b912a23          	sw	s9,180(sp)
   100e0:	0ba12823          	sw	s10,176(sp)
   100e4:	0bb12623          	sw	s11,172(sp)
   100e8:	3cc000ef          	jal	104b4 <get_instret>
   100ec:	00a12423          	sw	a0,8(sp)
   100f0:	3b0000ef          	jal	104a0 <get_cycles>
   100f4:	0001c7b7          	lui	a5,0x1c
   100f8:	51c78793          	add	a5,a5,1308 # 1c51c <__trunctfdf2+0x5b0>
   100fc:	00a12623          	sw	a0,12(sp)
   10100:	03410713          	add	a4,sp,52
   10104:	06478893          	add	a7,a5,100
   10108:	0007a803          	lw	a6,0(a5)
   1010c:	0047a503          	lw	a0,4(a5)
   10110:	0087a583          	lw	a1,8(a5)
   10114:	00c7a603          	lw	a2,12(a5)
   10118:	0107a683          	lw	a3,16(a5)
   1011c:	01072023          	sw	a6,0(a4)
   10120:	00a72223          	sw	a0,4(a4)
   10124:	00b72423          	sw	a1,8(a4)
   10128:	00c72623          	sw	a2,12(a4)
   1012c:	00d72823          	sw	a3,16(a4)
   10130:	01478793          	add	a5,a5,20
   10134:	01470713          	add	a4,a4,20
   10138:	fd1798e3          	bne	a5,a7,10108 <main+0x58>
   1013c:	0007a683          	lw	a3,0(a5)
   10140:	0047a783          	lw	a5,4(a5)
   10144:	01010b13          	add	s6,sp,16
   10148:	00800937          	lui	s2,0x800
   1014c:	3e991cb7          	lui	s9,0x3e991
   10150:	3f164c37          	lui	s8,0x3f164
   10154:	3de98bb7          	lui	s7,0x3de98
   10158:	00d72023          	sw	a3,0(a4)
   1015c:	00f72223          	sw	a5,4(a4)
   10160:	03410a93          	add	s5,sp,52
   10164:	01612223          	sw	s6,4(sp)
   10168:	fff90d93          	add	s11,s2,-1 # 7fffff <__BSS_END__+0x7e1247>
   1016c:	687c8c93          	add	s9,s9,1671 # 3e991687 <__BSS_END__+0x3e9728cf>
   10170:	80000a37          	lui	s4,0x80000
   10174:	7f8009b7          	lui	s3,0x7f800
   10178:	5a2c0c13          	add	s8,s8,1442 # 3f1645a2 <__BSS_END__+0x3f1457ea>
   1017c:	8d5b8b93          	add	s7,s7,-1835 # 3de978d5 <__BSS_END__+0x3de78b1d>
   10180:	00412483          	lw	s1,4(sp)
   10184:	000a8d13          	mv	s10,s5
   10188:	00000413          	li	s0,0
   1018c:	000d2e03          	lw	t3,0(s10)
   10190:	009915b7          	lui	a1,0x991
   10194:	01800513          	li	a0,24
   10198:	01be7333          	and	t1,t3,s11
   1019c:	417e5793          	sra	a5,t3,0x17
   101a0:	01236333          	or	t1,t1,s2
   101a4:	0ff7f893          	zext.b	a7,a5
   101a8:	00000713          	li	a4,0
   101ac:	68758593          	add	a1,a1,1671 # 991687 <__BSS_END__+0x9728cf>
   101b0:	0015f793          	and	a5,a1,1
   101b4:	00e30833          	add	a6,t1,a4
   101b8:	fff50513          	add	a0,a0,-1
   101bc:	22078e63          	beqz	a5,103f8 <main+0x348>
   101c0:	4015d593          	sra	a1,a1,0x1
   101c4:	40185713          	sra	a4,a6,0x1
   101c8:	fe0514e3          	bnez	a0,101b0 <main+0x100>
   101cc:	00080713          	mv	a4,a6
   101d0:	41875593          	sra	a1,a4,0x18
   101d4:	004d2303          	lw	t1,4(s10)
   101d8:	ffe77713          	and	a4,a4,-2
   101dc:	ffe88793          	add	a5,a7,-2
   101e0:	019e4533          	xor	a0,t3,s9
   101e4:	40b75733          	sra	a4,a4,a1
   101e8:	00b787b3          	add	a5,a5,a1
   101ec:	01b77733          	and	a4,a4,s11
   101f0:	01779793          	sll	a5,a5,0x17
   101f4:	01457533          	and	a0,a0,s4
   101f8:	00e56533          	or	a0,a0,a4
   101fc:	01b37eb3          	and	t4,t1,s11
   10200:	0137f733          	and	a4,a5,s3
   10204:	009645b7          	lui	a1,0x964
   10208:	41735793          	sra	a5,t1,0x17
   1020c:	00e56533          	or	a0,a0,a4
   10210:	012eeeb3          	or	t4,t4,s2
   10214:	0ff7fe13          	zext.b	t3,a5
   10218:	01800813          	li	a6,24
   1021c:	00000713          	li	a4,0
   10220:	5a258593          	add	a1,a1,1442 # 9645a2 <__BSS_END__+0x9457ea>
   10224:	0015f793          	and	a5,a1,1
   10228:	00ee88b3          	add	a7,t4,a4
   1022c:	fff80813          	add	a6,a6,-1
   10230:	1a078a63          	beqz	a5,103e4 <main+0x334>
   10234:	4015d593          	sra	a1,a1,0x1
   10238:	4018d713          	sra	a4,a7,0x1
   1023c:	fe0814e3          	bnez	a6,10224 <main+0x174>
   10240:	00088713          	mv	a4,a7
   10244:	41875813          	sra	a6,a4,0x18
   10248:	fffe0793          	add	a5,t3,-1
   1024c:	ffe77713          	and	a4,a4,-2
   10250:	018345b3          	xor	a1,t1,s8
   10254:	41075733          	sra	a4,a4,a6
   10258:	010787b3          	add	a5,a5,a6
   1025c:	01b77733          	and	a4,a4,s11
   10260:	0145f5b3          	and	a1,a1,s4
   10264:	01779793          	sll	a5,a5,0x17
   10268:	00e5e5b3          	or	a1,a1,a4
   1026c:	0137f7b3          	and	a5,a5,s3
   10270:	00f5e5b3          	or	a1,a1,a5
   10274:	330000ef          	jal	105a4 <unsigned_fadd32>
   10278:	008d2e83          	lw	t4,8(s10)
   1027c:	00e985b7          	lui	a1,0xe98
   10280:	01800813          	li	a6,24
   10284:	01befe33          	and	t3,t4,s11
   10288:	417ed793          	sra	a5,t4,0x17
   1028c:	012e6e33          	or	t3,t3,s2
   10290:	0ff7f313          	zext.b	t1,a5
   10294:	00000713          	li	a4,0
   10298:	8d558593          	add	a1,a1,-1835 # e978d5 <__BSS_END__+0xe78b1d>
   1029c:	0015f793          	and	a5,a1,1
   102a0:	00ee08b3          	add	a7,t3,a4
   102a4:	fff80813          	add	a6,a6,-1
   102a8:	12078463          	beqz	a5,103d0 <main+0x320>
   102ac:	4015d593          	sra	a1,a1,0x1
   102b0:	4018d713          	sra	a4,a7,0x1
   102b4:	fe0814e3          	bnez	a6,1029c <main+0x1ec>
   102b8:	00088713          	mv	a4,a7
   102bc:	41875813          	sra	a6,a4,0x18
   102c0:	ffc30793          	add	a5,t1,-4
   102c4:	ffe77713          	and	a4,a4,-2
   102c8:	017ec5b3          	xor	a1,t4,s7
   102cc:	41075733          	sra	a4,a4,a6
   102d0:	010787b3          	add	a5,a5,a6
   102d4:	01b77733          	and	a4,a4,s11
   102d8:	0145f5b3          	and	a1,a1,s4
   102dc:	01779793          	sll	a5,a5,0x17
   102e0:	0137f7b3          	and	a5,a5,s3
   102e4:	00e5e5b3          	or	a1,a1,a4
   102e8:	00f5e5b3          	or	a1,a1,a5
   102ec:	2b8000ef          	jal	105a4 <unsigned_fadd32>
   102f0:	00a4a023          	sw	a0,0(s1)
   102f4:	00140413          	add	s0,s0,1
   102f8:	00300793          	li	a5,3
   102fc:	00cd0d13          	add	s10,s10,12
   10300:	00448493          	add	s1,s1,4
   10304:	e8f414e3          	bne	s0,a5,1018c <main+0xdc>
   10308:	00412783          	lw	a5,4(sp)
   1030c:	024a8a93          	add	s5,s5,36
   10310:	00c78793          	add	a5,a5,12
   10314:	00f12223          	sw	a5,4(sp)
   10318:	0a010793          	add	a5,sp,160
   1031c:	e75792e3          	bne	a5,s5,10180 <main+0xd0>
   10320:	024b0a13          	add	s4,s6,36
   10324:	0001c9b7          	lui	s3,0x1c
   10328:	00300913          	li	s2,3
   1032c:	000b0493          	mv	s1,s6
   10330:	00000413          	li	s0,0
   10334:	0004a503          	lw	a0,0(s1)
   10338:	00140413          	add	s0,s0,1
   1033c:	00448493          	add	s1,s1,4
   10340:	458000ef          	jal	10798 <__extendsfdf2>
   10344:	00050613          	mv	a2,a0
   10348:	00058693          	mv	a3,a1
   1034c:	21898513          	add	a0,s3,536 # 1c218 <__trunctfdf2+0x2ac>
   10350:	139000ef          	jal	10c88 <printf>
   10354:	ff2410e3          	bne	s0,s2,10334 <main+0x284>
   10358:	00a00513          	li	a0,10
   1035c:	00cb0b13          	add	s6,s6,12
   10360:	15b000ef          	jal	10cba <putchar>
   10364:	fd4b14e3          	bne	s6,s4,1032c <main+0x27c>
   10368:	138000ef          	jal	104a0 <get_cycles>
   1036c:	00c12783          	lw	a5,12(sp)
   10370:	40f505b3          	sub	a1,a0,a5
   10374:	0001c537          	lui	a0,0x1c
   10378:	21c50513          	add	a0,a0,540 # 1c21c <__trunctfdf2+0x2b0>
   1037c:	10d000ef          	jal	10c88 <printf>
   10380:	00812583          	lw	a1,8(sp)
   10384:	0001c537          	lui	a0,0x1c
   10388:	23050513          	add	a0,a0,560 # 1c230 <__trunctfdf2+0x2c4>
   1038c:	0fd000ef          	jal	10c88 <printf>
   10390:	0dc12083          	lw	ra,220(sp)
   10394:	0d812403          	lw	s0,216(sp)
   10398:	0d412483          	lw	s1,212(sp)
   1039c:	0d012903          	lw	s2,208(sp)
   103a0:	0cc12983          	lw	s3,204(sp)
   103a4:	0c812a03          	lw	s4,200(sp)
   103a8:	0c412a83          	lw	s5,196(sp)
   103ac:	0c012b03          	lw	s6,192(sp)
   103b0:	0bc12b83          	lw	s7,188(sp)
   103b4:	0b812c03          	lw	s8,184(sp)
   103b8:	0b412c83          	lw	s9,180(sp)
   103bc:	0b012d03          	lw	s10,176(sp)
   103c0:	0ac12d83          	lw	s11,172(sp)
   103c4:	00000513          	li	a0,0
   103c8:	0e010113          	add	sp,sp,224
   103cc:	00008067          	ret
   103d0:	40175793          	sra	a5,a4,0x1
   103d4:	4015d593          	sra	a1,a1,0x1
   103d8:	ee0802e3          	beqz	a6,102bc <main+0x20c>
   103dc:	00078713          	mv	a4,a5
   103e0:	ebdff06f          	j	1029c <main+0x1ec>
   103e4:	40175793          	sra	a5,a4,0x1
   103e8:	4015d593          	sra	a1,a1,0x1
   103ec:	e4080ce3          	beqz	a6,10244 <main+0x194>
   103f0:	00078713          	mv	a4,a5
   103f4:	e31ff06f          	j	10224 <main+0x174>
   103f8:	40175793          	sra	a5,a4,0x1
   103fc:	4015d593          	sra	a1,a1,0x1
   10400:	dc0508e3          	beqz	a0,101d0 <main+0x120>
   10404:	00078713          	mv	a4,a5
   10408:	da9ff06f          	j	101b0 <main+0x100>
   
000104c8 <swap>:
   104c8:	00052703          	lw	a4,0(a0)
   104cc:	0005a783          	lw	a5,0(a1)
   104d0:	00e5a023          	sw	a4,0(a1)
   104d4:	00f52023          	sw	a5,0(a0)
   104d8:	00008067          	ret

000104dc <imul32>:
   104dc:	02058c63          	beqz	a1,10514 <imul32+0x38>
   104e0:	00000713          	li	a4,0
   104e4:	0015f793          	and	a5,a1,1
   104e8:	00a706b3          	add	a3,a4,a0
   104ec:	4015d593          	sra	a1,a1,0x1
   104f0:	00078a63          	beqz	a5,10504 <imul32+0x28>
   104f4:	4016d713          	sra	a4,a3,0x1
   104f8:	fe0596e3          	bnez	a1,104e4 <imul32+0x8>
   104fc:	00171513          	sll	a0,a4,0x1
   10500:	00008067          	ret
   10504:	40175713          	sra	a4,a4,0x1
   10508:	fc059ee3          	bnez	a1,104e4 <imul32+0x8>
   1050c:	00171513          	sll	a0,a4,0x1
   10510:	00008067          	ret
   10514:	00000513          	li	a0,0
   10518:	00008067          	ret

0001051c <count_leading_zeros>:
   1051c:	00155793          	srl	a5,a0,0x1
   10520:	00a7e533          	or	a0,a5,a0
   10524:	00255793          	srl	a5,a0,0x2
   10528:	00a7e7b3          	or	a5,a5,a0
   1052c:	0047d513          	srl	a0,a5,0x4
   10530:	00f56533          	or	a0,a0,a5
   10534:	00855713          	srl	a4,a0,0x8
   10538:	00a76733          	or	a4,a4,a0
   1053c:	01075793          	srl	a5,a4,0x10
   10540:	00e7e7b3          	or	a5,a5,a4
   10544:	555556b7          	lui	a3,0x55555
   10548:	0017d713          	srl	a4,a5,0x1
   1054c:	55568693          	add	a3,a3,1365 # 55555555 <__BSS_END__+0x5553679d>
   10550:	00d77733          	and	a4,a4,a3
   10554:	40e787b3          	sub	a5,a5,a4
   10558:	333336b7          	lui	a3,0x33333
   1055c:	33368693          	add	a3,a3,819 # 33333333 <__BSS_END__+0x3331457b>
   10560:	0027d713          	srl	a4,a5,0x2
   10564:	00d77733          	and	a4,a4,a3
   10568:	00d7f7b3          	and	a5,a5,a3
   1056c:	00f70733          	add	a4,a4,a5
   10570:	00475793          	srl	a5,a4,0x4
   10574:	0f0f16b7          	lui	a3,0xf0f1
   10578:	00e787b3          	add	a5,a5,a4
   1057c:	f0f68693          	add	a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d2157>
   10580:	00d7f7b3          	and	a5,a5,a3
   10584:	0087d713          	srl	a4,a5,0x8
   10588:	00f70733          	add	a4,a4,a5
   1058c:	01075793          	srl	a5,a4,0x10
   10590:	00e787b3          	add	a5,a5,a4
   10594:	07f7f793          	and	a5,a5,127
   10598:	02000513          	li	a0,32
   1059c:	40f50533          	sub	a0,a0,a5
   105a0:	00008067          	ret

000105a4 <unsigned_fadd32>:
   105a4:	800007b7          	lui	a5,0x80000
   105a8:	fff78793          	add	a5,a5,-1 # 7fffffff <__BSS_END__+0x7ffe1247>
   105ac:	00a7f733          	and	a4,a5,a0
   105b0:	00b7f7b3          	and	a5,a5,a1
   105b4:	00050813          	mv	a6,a0
   105b8:	00058613          	mv	a2,a1
   105bc:	00f74663          	blt	a4,a5,105c8 <unsigned_fadd32+0x24>
   105c0:	00050613          	mv	a2,a0
   105c4:	00058813          	mv	a6,a1
   105c8:	008008b7          	lui	a7,0x800
   105cc:	41765713          	sra	a4,a2,0x17
   105d0:	41785793          	sra	a5,a6,0x17
   105d4:	fff88693          	add	a3,a7,-1 # 7fffff <__BSS_END__+0x7e1247>
   105d8:	0ff77713          	zext.b	a4,a4
   105dc:	0ff7f793          	zext.b	a5,a5
   105e0:	00d675b3          	and	a1,a2,a3
   105e4:	40f707b3          	sub	a5,a4,a5
   105e8:	00d876b3          	and	a3,a6,a3
   105ec:	01800513          	li	a0,24
   105f0:	0115e5b3          	or	a1,a1,a7
   105f4:	0116e6b3          	or	a3,a3,a7
   105f8:	00f55463          	bge	a0,a5,10600 <unsigned_fadd32+0x5c>
   105fc:	01800793          	li	a5,24
   10600:	40f6d7b3          	sra	a5,a3,a5
   10604:	01064833          	xor	a6,a2,a6
   10608:	00f586b3          	add	a3,a1,a5
   1060c:	00085463          	bgez	a6,10614 <unsigned_fadd32+0x70>
   10610:	40f586b3          	sub	a3,a1,a5
   10614:	0016d793          	srl	a5,a3,0x1
   10618:	00d7e7b3          	or	a5,a5,a3
   1061c:	0027d593          	srl	a1,a5,0x2
   10620:	00b7e7b3          	or	a5,a5,a1
   10624:	0047d593          	srl	a1,a5,0x4
   10628:	00b7e7b3          	or	a5,a5,a1
   1062c:	0087d593          	srl	a1,a5,0x8
   10630:	00b7e7b3          	or	a5,a5,a1
   10634:	0107d593          	srl	a1,a5,0x10
   10638:	00b7e7b3          	or	a5,a5,a1
   1063c:	55555537          	lui	a0,0x55555
   10640:	0017d593          	srl	a1,a5,0x1
   10644:	55550513          	add	a0,a0,1365 # 55555555 <__BSS_END__+0x5553679d>
   10648:	00a5f5b3          	and	a1,a1,a0
   1064c:	40b787b3          	sub	a5,a5,a1
   10650:	33333537          	lui	a0,0x33333
   10654:	33350513          	add	a0,a0,819 # 33333333 <__BSS_END__+0x3331457b>
   10658:	0027d593          	srl	a1,a5,0x2
   1065c:	00a5f5b3          	and	a1,a1,a0
   10660:	00a7f7b3          	and	a5,a5,a0
   10664:	00f585b3          	add	a1,a1,a5
   10668:	0045d793          	srl	a5,a1,0x4
   1066c:	0f0f1537          	lui	a0,0xf0f1
   10670:	00b787b3          	add	a5,a5,a1
   10674:	f0f50513          	add	a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d2157>
   10678:	00a7f7b3          	and	a5,a5,a0
   1067c:	0087d593          	srl	a1,a5,0x8
   10680:	00b787b3          	add	a5,a5,a1
   10684:	0107d593          	srl	a1,a5,0x10
   10688:	00b787b3          	add	a5,a5,a1
   1068c:	07f7f793          	and	a5,a5,127
   10690:	02000593          	li	a1,32
   10694:	40f585b3          	sub	a1,a1,a5
   10698:	00800513          	li	a0,8
   1069c:	02b54863          	blt	a0,a1,106cc <unsigned_fadd32+0x128>
   106a0:	fe878793          	add	a5,a5,-24
   106a4:	40f6d6b3          	sra	a3,a3,a5
   106a8:	00f70733          	add	a4,a4,a5
   106ac:	00969693          	sll	a3,a3,0x9
   106b0:	0096d693          	srl	a3,a3,0x9
   106b4:	800007b7          	lui	a5,0x80000
   106b8:	01771713          	sll	a4,a4,0x17
   106bc:	00d76733          	or	a4,a4,a3
   106c0:	00f67533          	and	a0,a2,a5
   106c4:	00a76533          	or	a0,a4,a0
   106c8:	00008067          	ret
   106cc:	01800593          	li	a1,24
   106d0:	40f587b3          	sub	a5,a1,a5
   106d4:	00f696b3          	sll	a3,a3,a5
   106d8:	40f70733          	sub	a4,a4,a5
   106dc:	00969693          	sll	a3,a3,0x9
   106e0:	0096d693          	srl	a3,a3,0x9
   106e4:	800007b7          	lui	a5,0x80000
   106e8:	01771713          	sll	a4,a4,0x17
   106ec:	00d76733          	or	a4,a4,a3
   106f0:	00f67533          	and	a0,a2,a5
   106f4:	00a76533          	or	a0,a4,a0
   106f8:	00008067          	ret

000106fc <fmul32>:
   106fc:	008006b7          	lui	a3,0x800
   10700:	fff68793          	add	a5,a3,-1 # 7fffff <__BSS_END__+0x7e1247>
   10704:	00a7f8b3          	and	a7,a5,a0
   10708:	41755713          	sra	a4,a0,0x17
   1070c:	00b7f7b3          	and	a5,a5,a1
   10710:	4175d313          	sra	t1,a1,0x17
   10714:	00d8e8b3          	or	a7,a7,a3
   10718:	00d7e7b3          	or	a5,a5,a3
   1071c:	0ff77613          	zext.b	a2,a4
   10720:	0ff37313          	zext.b	t1,t1
   10724:	00000693          	li	a3,0
   10728:	0017f713          	and	a4,a5,1
   1072c:	00d88833          	add	a6,a7,a3
   10730:	4017d793          	sra	a5,a5,0x1
   10734:	04070a63          	beqz	a4,10788 <fmul32+0x8c>
   10738:	40185693          	sra	a3,a6,0x1
   1073c:	fe0796e3          	bnez	a5,10728 <fmul32+0x2c>
   10740:	00080693          	mv	a3,a6
   10744:	4186d813          	sra	a6,a3,0x18
   10748:	ffe6f793          	and	a5,a3,-2
   1074c:	00660733          	add	a4,a2,t1
   10750:	4107d7b3          	sra	a5,a5,a6
   10754:	f8170713          	add	a4,a4,-127
   10758:	01070733          	add	a4,a4,a6
   1075c:	00a5c533          	xor	a0,a1,a0
   10760:	800006b7          	lui	a3,0x80000
   10764:	00979793          	sll	a5,a5,0x9
   10768:	00d57533          	and	a0,a0,a3
   1076c:	0097d793          	srl	a5,a5,0x9
   10770:	7f8006b7          	lui	a3,0x7f800
   10774:	01771713          	sll	a4,a4,0x17
   10778:	00d77733          	and	a4,a4,a3
   1077c:	00f56533          	or	a0,a0,a5
   10780:	00a76533          	or	a0,a4,a0
   10784:	00008067          	ret
   10788:	4016d713          	sra	a4,a3,0x1
   1078c:	fa078ce3          	beqz	a5,10744 <fmul32+0x48>
   10790:	00070693          	mv	a3,a4
   10794:	f95ff06f          	j	10728 <fmul32+0x2c>

elf size

   text	   data	    bss	    dec	    hex	filename
  53198	   1876	   1528	  56602	   dd1a	main.elf

elf header

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x1041e
  Start of program headers:          52 (bytes into file)
  Start of section headers:          69944 (bytes into file)
  Flags:                             0x1, RVC, soft-float ABI
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

execute

0.389692 0.111821 0.675161 
0.662995 0.264999 0.566176 
0.431446 0.448845 0.454146 
cycle count: 59137
instret: 2c8
inferior exit code 0

conclusion

  • O2 uses the minimum number of cycles.
  • O3 and Ofast use the maximum number of cycles.
  • Although O3 and Ofast have the fewest instret, they require more cycles. On the contrary, O1 using the fewest cycles, has a higher instret.

Show me the handwritten RISC-V assembly code.

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv