# Assignment2: GNU Toolchain
contributed by < [TRChen11011](https://github.com/TRChen11011/GNU-Toolchain) >
## Install VMware and Ubuntu
I use ==Unbuntu Linux 22.04== on VMware
Because using clz to implement multification is cool.
## Instr
* Setting the environment var
> cd $HOME
> source riscv-none-elf-gcc/setenv
* make
> make
* Run code
> build/rv32emu test.elf
* Display the assembler mnemonics for the machine instructions and store to .txt file
> riscv-none-elf-objdump -d test > test.txt
* Check elf size
> riscv-none-elf-size ./test
## Before we modify the code
we use perfcount to implement CSR.
CSR is use to count the cycle of the code.
**perfcount/main.c**
```c=
#include <stdint.h>
#include <stdio.h>
#include <string.h>
//those are functions in the same dir in perfcounter used to take CSR
extern uint64_t get_cycles();
extern uint64_t get_instret();
int main(void)
{
/* measure cycles */
uint64_t instret = get_instret();
uint64_t oldcount = get_cycles();
/* fill the C code you choose here, so you can get the CSR*/
uint64_t cyclecount = get_cycles() - oldcount;
printf("cycle count: %u\n", (unsigned int) cyclecount);
printf("instret: %x\n", (unsigned) (instret & 0xffffffff));
return 0;
}
```
And we have to modify Makefile
```=
.PHONY: clean
include ../../mk/toolchain.mk
/*-Ofast , -O1 , -O2 ... you want need to chane in Makefile*/
CFLAGS = -march=rv32i_zicsr_zifencei -mabi=ilp32 -O1 -Wall
OBJS = \
/*if you want add file, fill in*/
getcycles.o \
getinstret.o \
main.o
BIN = perfcount.elf /*the final result you generate*/
%.o: %.S
$(CROSS_COMPILE)gcc $(CFLAGS) -c -o $@ $<
%.o: %.c
$(CROSS_COMPILE)gcc $(CFLAGS) -c -o $@ $<
all: $(BIN)
$(BIN): $(OBJS)
$(CROSS_COMPILE)gcc -o $@ $^
clean:
$(RM) $(BIN) $(OBJS)
```
## Choosen Project
I choose the project form < [陳彥佑](https://hackmd.io/ZlqmPMmHQieGU7-esbziRw?both) >
The project is to use clz to implement a 32bits * 32bits multiplier.
### C code after modify to CSR
```c=
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
extern uint64_t get_cycles();
extern uint64_t get_instret();
uint16_t CLZ_32(uint32_t x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
x -= ((x >> 1) & 0x55555555);
x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
x = ((x >> 4) + x) & 0x0f0f0f0f;
x += (x >> 8);
x += (x >> 16);
return (32 - (x & 0x3f));
}
uint64_t efficient_int_mul(uint32_t A, uint32_t B) {
uint16_t n = CLZ_32(A);
uint16_t m = CLZ_32(B);
uint16_t result_bits;
if(n>m)
result_bits = n;
else{
result_bits = m;
uint32_t temp = A;
A = B;
B = temp;
}
uint64_t result = 0;
for (int i = 0; i < 32-result_bits; i++) {
if ((A >> i) & 1) {
result += ((uint64_t)B << i);
}
}
return result;
}
int main(void)
{
unsigned int state[WORDS] = {0};
/* measure cycles */
uint64_t instret = get_instret();
uint64_t oldcount = get_cycles();
uint32_t A = 0x12345678;
uint32_t B = 0xffffdddd;
uint64_t result = efficient_int_mul(A, B);
uint64_t cyclecount = get_cycles() - oldcount;
printf("cycle count: %u\n", (unsigned int) cyclecount);
printf("instret: %x\n", (unsigned) (instret & 0xffffffff));
printf("uint64: %"PRIX64"\n", result);
return 0;
}
```
## Assembly code
### -O1 Assembly code
```asm=
0001016c <CLZ_32>:
1016c: 00155793 srl a5,a0,0x1
10170: 00a7e533 or a0,a5,a0
10174: 00255793 srl a5,a0,0x2
10178: 00a7e7b3 or a5,a5,a0
1017c: 0047d513 srl a0,a5,0x4
10180: 00f56533 or a0,a0,a5
10184: 00855713 srl a4,a0,0x8
10188: 00a76733 or a4,a4,a0
1018c: 01075793 srl a5,a4,0x10
10190: 00e7e7b3 or a5,a5,a4
10194: 0017d713 srl a4,a5,0x1
10198: 555556b7 lui a3,0x55555
1019c: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x55537805>
101a0: 00d77733 and a4,a4,a3
101a4: 40e787b3 sub a5,a5,a4
101a8: 0027d713 srl a4,a5,0x2
101ac: 333336b7 lui a3,0x33333
101b0: 33368693 add a3,a3,819 # 33333333 <__BSS_END__+0x333155e3>
101b4: 00d77733 and a4,a4,a3
101b8: 00d7f7b3 and a5,a5,a3
101bc: 00f70733 add a4,a4,a5
101c0: 00475793 srl a5,a4,0x4
101c4: 00e787b3 add a5,a5,a4
101c8: 0f0f1737 lui a4,0xf0f1
101cc: f0f70713 add a4,a4,-241 # f0f0f0f <__BSS_END__+0xf0d31bf>
101d0: 00e7f7b3 and a5,a5,a4
101d4: 0087d713 srl a4,a5,0x8
101d8: 00f70733 add a4,a4,a5
101dc: 01075793 srl a5,a4,0x10
101e0: 00e787b3 add a5,a5,a4
101e4: 03f7f793 and a5,a5,63
101e8: 02000513 li a0,32
101ec: 40f50533 sub a0,a0,a5
101f0: 01051513 sll a0,a0,0x10
101f4: 01055513 srl a0,a0,0x10
101f8: 00008067 ret
000101fc <efficient_int_mul>:
101fc: ff010113 add sp,sp,-16
10200: 00112623 sw ra,12(sp)
10204: 00812423 sw s0,8(sp)
10208: 00912223 sw s1,4(sp)
1020c: 01212023 sw s2,0(sp)
10210: 00050493 mv s1,a0
10214: 00058413 mv s0,a1
10218: f55ff0ef jal 1016c <CLZ_32>
1021c: 00050913 mv s2,a0
10220: 00040513 mv a0,s0
10224: f49ff0ef jal 1016c <CLZ_32>
10228: 01257a63 bgeu a0,s2,1023c <efficient_int_mul+0x40>
1022c: 00090513 mv a0,s2
10230: 00040793 mv a5,s0
10234: 00048413 mv s0,s1
10238: 00078493 mv s1,a5
1023c: 02000613 li a2,32
10240: 40a60633 sub a2,a2,a0
10244: 06c05463 blez a2,102ac <efficient_int_mul+0xb0>
10248: 00000793 li a5,0
1024c: 00000513 li a0,0
10250: 00000593 li a1,0
10254: 0014de13 srl t3,s1,0x1
10258: 01f00313 li t1,31
1025c: 00000893 li a7,0
10260: 02c0006f j 1028c <efficient_int_mul+0x90>
10264: 40f30733 sub a4,t1,a5
10268: 00ee5733 srl a4,t3,a4
1026c: 00f496b3 sll a3,s1,a5
10270: 00d506b3 add a3,a0,a3
10274: 00a6b833 sltu a6,a3,a0
10278: 00e585b3 add a1,a1,a4
1027c: 00068513 mv a0,a3
10280: 00b805b3 add a1,a6,a1
10284: 00178793 add a5,a5,1
10288: 02c78663 beq a5,a2,102b4 <efficient_int_mul+0xb8>
1028c: 00f45733 srl a4,s0,a5
10290: 00177713 and a4,a4,1
10294: fe0708e3 beqz a4,10284 <efficient_int_mul+0x88>
10298: fe078713 add a4,a5,-32
1029c: fc0744e3 bltz a4,10264 <efficient_int_mul+0x68>
102a0: 00e49733 sll a4,s1,a4
102a4: 00088693 mv a3,a7
102a8: fc9ff06f j 10270 <efficient_int_mul+0x74>
102ac: 00000513 li a0,0
102b0: 00000593 li a1,0
102b4: 00c12083 lw ra,12(sp)
102b8: 00812403 lw s0,8(sp)
102bc: 00412483 lw s1,4(sp)
102c0: 00012903 lw s2,0(sp)
102c4: 01010113 add sp,sp,16
102c8: 00008067 ret
000102cc <main>:
102cc: fe010113 add sp,sp,-32
102d0: 00112e23 sw ra,28(sp)
102d4: 00812c23 sw s0,24(sp)
102d8: 00912a23 sw s1,20(sp)
102dc: 01212823 sw s2,16(sp)
102e0: 01312623 sw s3,12(sp)
102e4: e75ff0ef jal 10158 <get_instret>
102e8: 00050913 mv s2,a0
102ec: e59ff0ef jal 10144 <get_cycles>
102f0: 00050993 mv s3,a0
102f4: ffffe5b7 lui a1,0xffffe
102f8: ddd58593 add a1,a1,-547 # ffffdddd <__BSS_END__+0xfffe008d>
102fc: 12345537 lui a0,0x12345
10300: 67850513 add a0,a0,1656 # 12345678 <__BSS_END__+0x12327928>
10304: ef9ff0ef jal 101fc <efficient_int_mul>
10308: 00050493 mv s1,a0
1030c: 00058413 mv s0,a1
10310: e35ff0ef jal 10144 <get_cycles>
10314: 413505b3 sub a1,a0,s3
10318: 0001c537 lui a0,0x1c
1031c: bf050513 add a0,a0,-1040 # 1bbf0 <__clzsi2+0x6e>
10320: 424000ef jal 10744 <printf>
10324: 00090593 mv a1,s2
10328: 0001c537 lui a0,0x1c
1032c: c0450513 add a0,a0,-1020 # 1bc04 <__clzsi2+0x82>
10330: 414000ef jal 10744 <printf>
10334: 00048613 mv a2,s1
10338: 00040693 mv a3,s0
1033c: 0001c537 lui a0,0x1c
10340: c1450513 add a0,a0,-1004 # 1bc14 <__clzsi2+0x92>
10344: 400000ef jal 10744 <printf>
10348: 00000513 li a0,0
1034c: 01c12083 lw ra,28(sp)
10350: 01812403 lw s0,24(sp)
10354: 01412483 lw s1,20(sp)
10358: 01012903 lw s2,16(sp)
1035c: 00c12983 lw s3,12(sp)
10360: 02010113 add sp,sp,32
10364: 00008067 ret
```
#### elf size

#### execute

- observe
- Total cycle count : `393`
- Total instruction count : `2c7`
- Register use : `$ra, $sp, $a0~$a7, $s0~$s3, $t0,%t1`
### -O2 Assembly code
```asm=
00010208 <CLZ_32>:
10208: 00155793 srl a5,a0,0x1
1020c: 00a7e533 or a0,a5,a0
10210: 00255793 srl a5,a0,0x2
10214: 00a7e7b3 or a5,a5,a0
10218: 0047d513 srl a0,a5,0x4
1021c: 00f56533 or a0,a0,a5
10220: 00855713 srl a4,a0,0x8
10224: 00a76733 or a4,a4,a0
10228: 01075793 srl a5,a4,0x10
1022c: 00e7e7b3 or a5,a5,a4
10230: 555556b7 lui a3,0x55555
10234: 0017d713 srl a4,a5,0x1
10238: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x55537805>
1023c: 00d77733 and a4,a4,a3
10240: 40e787b3 sub a5,a5,a4
10244: 333336b7 lui a3,0x33333
10248: 33368693 add a3,a3,819 # 33333333 <__BSS_END__+0x333155e3>
1024c: 0027d713 srl a4,a5,0x2
10250: 00d77733 and a4,a4,a3
10254: 00d7f7b3 and a5,a5,a3
10258: 00f70733 add a4,a4,a5
1025c: 00475793 srl a5,a4,0x4
10260: 0f0f16b7 lui a3,0xf0f1
10264: 00e787b3 add a5,a5,a4
10268: f0f68693 add a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d31bf>
1026c: 00d7f7b3 and a5,a5,a3
10270: 0087d713 srl a4,a5,0x8
10274: 00f70733 add a4,a4,a5
10278: 01075793 srl a5,a4,0x10
1027c: 00e787b3 add a5,a5,a4
10280: 03f7f793 and a5,a5,63
10284: 02000513 li a0,32
10288: 40f50533 sub a0,a0,a5
1028c: 01051513 sll a0,a0,0x10
10290: 01055513 srl a0,a0,0x10
10294: 00008067 ret
00010298 <efficient_int_mul>:
10298: ff010113 add sp,sp,-16
1029c: 00812423 sw s0,8(sp)
102a0: 00912223 sw s1,4(sp)
102a4: 01212023 sw s2,0(sp)
102a8: 00112623 sw ra,12(sp)
102ac: 00058413 mv s0,a1
102b0: 00050493 mv s1,a0
102b4: f55ff0ef jal 10208 <CLZ_32>
102b8: 00050913 mv s2,a0
102bc: 00040513 mv a0,s0
102c0: f49ff0ef jal 10208 <CLZ_32>
102c4: 01257a63 bgeu a0,s2,102d8 <efficient_int_mul+0x40>
102c8: 00040793 mv a5,s0
102cc: 00090513 mv a0,s2
102d0: 00048413 mv s0,s1
102d4: 00078493 mv s1,a5
102d8: 02000613 li a2,32
102dc: 40a60633 sub a2,a2,a0
102e0: 06c05263 blez a2,10344 <efficient_int_mul+0xac>
102e4: 00000793 li a5,0
102e8: 00000513 li a0,0
102ec: 00000593 li a1,0
102f0: 0014d313 srl t1,s1,0x1
102f4: 01f00893 li a7,31
102f8: 0280006f j 10320 <efficient_int_mul+0x88>
102fc: 00d496b3 sll a3,s1,a3
10300: 00000713 li a4,0
10304: 00e50733 add a4,a0,a4
10308: 00a73833 sltu a6,a4,a0
1030c: 00d585b3 add a1,a1,a3
10310: 00070513 mv a0,a4
10314: 00b805b3 add a1,a6,a1
10318: 00178793 add a5,a5,1
1031c: 02f60863 beq a2,a5,1034c <efficient_int_mul+0xb4>
10320: 00f45733 srl a4,s0,a5
10324: 00177713 and a4,a4,1
10328: fe078693 add a3,a5,-32
1032c: fe0706e3 beqz a4,10318 <efficient_int_mul+0x80>
10330: 40f88833 sub a6,a7,a5
10334: fc06d4e3 bgez a3,102fc <efficient_int_mul+0x64>
10338: 00f49733 sll a4,s1,a5
1033c: 010356b3 srl a3,t1,a6
10340: fc5ff06f j 10304 <efficient_int_mul+0x6c>
10344: 00000513 li a0,0
10348: 00000593 li a1,0
1034c: 00c12083 lw ra,12(sp)
10350: 00812403 lw s0,8(sp)
10354: 00412483 lw s1,4(sp)
10358: 00012903 lw s2,0(sp)
1035c: 01010113 add sp,sp,16
10360: 00008067 ret
000100b0 <main>:
100b0: fe010113 add sp,sp,-32
100b4: 00112e23 sw ra,28(sp)
100b8: 00812c23 sw s0,24(sp)
100bc: 00912a23 sw s1,20(sp)
100c0: 01212823 sw s2,16(sp)
100c4: 01312623 sw s3,12(sp)
100c8: 12c000ef jal 101f4 <get_instret>
100cc: 00050913 mv s2,a0
100d0: 110000ef jal 101e0 <get_cycles>
100d4: 00050993 mv s3,a0
100d8: ffffe5b7 lui a1,0xffffe
100dc: 12345537 lui a0,0x12345
100e0: ddd58593 add a1,a1,-547 # ffffdddd <__BSS_END__+0xfffe008d>
100e4: 67850513 add a0,a0,1656 # 12345678 <__BSS_END__+0x12327928>
100e8: 1b0000ef jal 10298 <efficient_int_mul>
100ec: 00050493 mv s1,a0
100f0: 00058413 mv s0,a1
100f4: 0ec000ef jal 101e0 <get_cycles>
100f8: 413505b3 sub a1,a0,s3
100fc: 0001c537 lui a0,0x1c
10100: bf050513 add a0,a0,-1040 # 1bbf0 <__clzsi2+0x72>
10104: 63c000ef jal 10740 <printf>
10108: 0001c537 lui a0,0x1c
1010c: 00090593 mv a1,s2
10110: c0450513 add a0,a0,-1020 # 1bc04 <__clzsi2+0x86>
10114: 62c000ef jal 10740 <printf>
10118: 0001c537 lui a0,0x1c
1011c: 00048613 mv a2,s1
10120: 00040693 mv a3,s0
10124: c1450513 add a0,a0,-1004 # 1bc14 <__clzsi2+0x96>
10128: 618000ef jal 10740 <printf>
1012c: 01c12083 lw ra,28(sp)
10130: 01812403 lw s0,24(sp)
10134: 01412483 lw s1,20(sp)
10138: 01012903 lw s2,16(sp)
1013c: 00c12983 lw s3,12(sp)
10140: 00000513 li a0,0
10144: 02010113 add sp,sp,32
10148: 00008067 ret
```
#### elf size

#### execute

- observe
- Total cycle count : `421`
- Total instruction count : `2c7`
- Register use : `$ra, $sp, $a0~$a7, $s0~$s3, $t1`
### -O3 Assembly code
```asm=
00010208 <CLZ_32>:
10208: 00155793 srl a5,a0,0x1
1020c: 00a7e533 or a0,a5,a0
10210: 00255793 srl a5,a0,0x2
10214: 00a7e7b3 or a5,a5,a0
10218: 0047d513 srl a0,a5,0x4
1021c: 00f56533 or a0,a0,a5
10220: 00855713 srl a4,a0,0x8
10224: 00a76733 or a4,a4,a0
10228: 01075793 srl a5,a4,0x10
1022c: 00e7e7b3 or a5,a5,a4
10230: 555556b7 lui a3,0x55555
10234: 0017d713 srl a4,a5,0x1
10238: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x55537805>
1023c: 00d77733 and a4,a4,a3
10240: 40e787b3 sub a5,a5,a4
10244: 333336b7 lui a3,0x33333
10248: 33368693 add a3,a3,819 # 33333333 <__BSS_END__+0x333155e3>
1024c: 0027d713 srl a4,a5,0x2
10250: 00d77733 and a4,a4,a3
10254: 00d7f7b3 and a5,a5,a3
10258: 00f70733 add a4,a4,a5
1025c: 00475793 srl a5,a4,0x4
10260: 0f0f16b7 lui a3,0xf0f1
10264: 00e787b3 add a5,a5,a4
10268: f0f68693 add a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d31bf>
1026c: 00d7f7b3 and a5,a5,a3
10270: 0087d713 srl a4,a5,0x8
10274: 00f70733 add a4,a4,a5
10278: 01075793 srl a5,a4,0x10
1027c: 00e787b3 add a5,a5,a4
10280: 03f7f793 and a5,a5,63
10284: 02000513 li a0,32
10288: 40f50533 sub a0,a0,a5
1028c: 01051513 sll a0,a0,0x10
10290: 01055513 srl a0,a0,0x10
10294: 00008067 ret
00010298 <efficient_int_mul>:
10298: 00155713 srl a4,a0,0x1
1029c: 0015d793 srl a5,a1,0x1
102a0: 00a76733 or a4,a4,a0
102a4: 00b7e7b3 or a5,a5,a1
102a8: 0027d693 srl a3,a5,0x2
102ac: 00058613 mv a2,a1
102b0: 00275593 srl a1,a4,0x2
102b4: 00b76733 or a4,a4,a1
102b8: 00d7e7b3 or a5,a5,a3
102bc: 00475593 srl a1,a4,0x4
102c0: 0047d693 srl a3,a5,0x4
102c4: 00b76733 or a4,a4,a1
102c8: 00d7e7b3 or a5,a5,a3
102cc: 00875593 srl a1,a4,0x8
102d0: 0087d693 srl a3,a5,0x8
102d4: 00b76733 or a4,a4,a1
102d8: 00d7e7b3 or a5,a5,a3
102dc: 01075593 srl a1,a4,0x10
102e0: 0107d693 srl a3,a5,0x10
102e4: 00b76733 or a4,a4,a1
102e8: 00d7e7b3 or a5,a5,a3
102ec: 555556b7 lui a3,0x55555
102f0: 0017d593 srl a1,a5,0x1
102f4: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x55537805>
102f8: 00050813 mv a6,a0
102fc: 00175513 srl a0,a4,0x1
10300: 00d57533 and a0,a0,a3
10304: 00d5f6b3 and a3,a1,a3
10308: 40a70733 sub a4,a4,a0
1030c: 40d787b3 sub a5,a5,a3
10310: 33333537 lui a0,0x33333
10314: 33350513 add a0,a0,819 # 33333333 <__BSS_END__+0x333155e3>
10318: 00275593 srl a1,a4,0x2
1031c: 0027d693 srl a3,a5,0x2
10320: 00a77733 and a4,a4,a0
10324: 00a7f7b3 and a5,a5,a0
10328: 00a5f5b3 and a1,a1,a0
1032c: 00a6f6b3 and a3,a3,a0
10330: 00e585b3 add a1,a1,a4
10334: 00f686b3 add a3,a3,a5
10338: 0046d713 srl a4,a3,0x4
1033c: 0045d793 srl a5,a1,0x4
10340: 0f0f1537 lui a0,0xf0f1
10344: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d31bf>
10348: 00b787b3 add a5,a5,a1
1034c: 00d70733 add a4,a4,a3
10350: 00a7f7b3 and a5,a5,a0
10354: 00a77733 and a4,a4,a0
10358: 0087d593 srl a1,a5,0x8
1035c: 00875693 srl a3,a4,0x8
10360: 00b787b3 add a5,a5,a1
10364: 00d70733 add a4,a4,a3
10368: 01075693 srl a3,a4,0x10
1036c: 0107d593 srl a1,a5,0x10
10370: 00d70733 add a4,a4,a3
10374: 00b787b3 add a5,a5,a1
10378: 02000693 li a3,32
1037c: 03f7f793 and a5,a5,63
10380: 03f77713 and a4,a4,63
10384: 40f687b3 sub a5,a3,a5
10388: 40e68733 sub a4,a3,a4
1038c: 01079793 sll a5,a5,0x10
10390: 01071713 sll a4,a4,0x10
10394: 0107d793 srl a5,a5,0x10
10398: 01075713 srl a4,a4,0x10
1039c: 00f77a63 bgeu a4,a5,103b0 <efficient_int_mul+0x118>
103a0: 00060693 mv a3,a2
103a4: 00078713 mv a4,a5
103a8: 00080613 mv a2,a6
103ac: 00068813 mv a6,a3
103b0: 02000893 li a7,32
103b4: 40e888b3 sub a7,a7,a4
103b8: 07105463 blez a7,10420 <efficient_int_mul+0x188>
103bc: 00000793 li a5,0
103c0: 00000513 li a0,0
103c4: 00000593 li a1,0
103c8: 00185e93 srl t4,a6,0x1
103cc: 01f00e13 li t3,31
103d0: 0280006f j 103f8 <efficient_int_mul+0x160>
103d4: 00d816b3 sll a3,a6,a3
103d8: 00000713 li a4,0
103dc: 00e50733 add a4,a0,a4
103e0: 00a73333 sltu t1,a4,a0
103e4: 00d585b3 add a1,a1,a3
103e8: 00070513 mv a0,a4
103ec: 00b305b3 add a1,t1,a1
103f0: 00178793 add a5,a5,1
103f4: 02f88463 beq a7,a5,1041c <efficient_int_mul+0x184>
103f8: 00f65733 srl a4,a2,a5
103fc: 00177713 and a4,a4,1
10400: fe078693 add a3,a5,-32
10404: fe0706e3 beqz a4,103f0 <efficient_int_mul+0x158>
10408: 40fe0333 sub t1,t3,a5
1040c: fc06d4e3 bgez a3,103d4 <efficient_int_mul+0x13c>
10410: 00f81733 sll a4,a6,a5
10414: 006ed6b3 srl a3,t4,t1
10418: fc5ff06f j 103dc <efficient_int_mul+0x144>
1041c: 00008067 ret
10420: 00000513 li a0,0
10424: 00000593 li a1,0
10428: 00008067 ret
000100b0 <main>:
100b0: fe010113 add sp,sp,-32
100b4: 00112e23 sw ra,28(sp)
100b8: 00812c23 sw s0,24(sp)
100bc: 00912a23 sw s1,20(sp)
100c0: 01212823 sw s2,16(sp)
100c4: 01312623 sw s3,12(sp)
100c8: 12c000ef jal 101f4 <get_instret>
100cc: 00050913 mv s2,a0
100d0: 110000ef jal 101e0 <get_cycles>
100d4: 00050993 mv s3,a0
100d8: ffffe5b7 lui a1,0xffffe
100dc: 12345537 lui a0,0x12345
100e0: ddd58593 add a1,a1,-547 # ffffdddd <__BSS_END__+0xfffe008d>
100e4: 67850513 add a0,a0,1656 # 12345678 <__BSS_END__+0x12327928>
100e8: 1b0000ef jal 10298 <efficient_int_mul>
100ec: 00050493 mv s1,a0
100f0: 00058413 mv s0,a1
100f4: 0ec000ef jal 101e0 <get_cycles>
100f8: 413505b3 sub a1,a0,s3
100fc: 0001c537 lui a0,0x1c
10100: cb850513 add a0,a0,-840 # 1bcb8 <__clzsi2+0x72>
10104: 704000ef jal 10808 <printf>
10108: 0001c537 lui a0,0x1c
1010c: 00090593 mv a1,s2
10110: ccc50513 add a0,a0,-820 # 1bccc <__clzsi2+0x86>
10114: 6f4000ef jal 10808 <printf>
10118: 0001c537 lui a0,0x1c
1011c: 00048613 mv a2,s1
10120: 00040693 mv a3,s0
10124: cdc50513 add a0,a0,-804 # 1bcdc <__clzsi2+0x96>
10128: 6e0000ef jal 10808 <printf>
1012c: 01c12083 lw ra,28(sp)
10130: 01812403 lw s0,24(sp)
10134: 01412483 lw s1,20(sp)
10138: 01012903 lw s2,16(sp)
1013c: 00c12983 lw s3,12(sp)
10140: 00000513 li a0,0
10144: 02010113 add sp,sp,32
10148: 00008067 ret
```
#### elf size

#### execute

- observe
- Total cycle count : `398`
- Total instruction count : `2c7`
- Register use : `$ra, $sp, $a0~$a7, $s0~$s3, $t1, $t3, $t4`
### -Os Assembly code
```asm=
000101f8 <CLZ_32>:
101f8: 00155793 srl a5,a0,0x1
101fc: 00a7e533 or a0,a5,a0
10200: 00255793 srl a5,a0,0x2
10204: 00a7e7b3 or a5,a5,a0
10208: 0047d513 srl a0,a5,0x4
1020c: 00f56533 or a0,a0,a5
10210: 00855713 srl a4,a0,0x8
10214: 00a76733 or a4,a4,a0
10218: 01075793 srl a5,a4,0x10
1021c: 00e7e7b3 or a5,a5,a4
10220: 555556b7 lui a3,0x55555
10224: 0017d713 srl a4,a5,0x1
10228: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x55537805>
1022c: 00d77733 and a4,a4,a3
10230: 40e787b3 sub a5,a5,a4
10234: 333336b7 lui a3,0x33333
10238: 33368693 add a3,a3,819 # 33333333 <__BSS_END__+0x333155e3>
1023c: 0027d713 srl a4,a5,0x2
10240: 00d77733 and a4,a4,a3
10244: 00d7f7b3 and a5,a5,a3
10248: 00f70733 add a4,a4,a5
1024c: 00475793 srl a5,a4,0x4
10250: 00e787b3 add a5,a5,a4
10254: 0f0f1737 lui a4,0xf0f1
10258: f0f70713 add a4,a4,-241 # f0f0f0f <__BSS_END__+0xf0d31bf>
1025c: 00e7f7b3 and a5,a5,a4
10260: 0087d713 srl a4,a5,0x8
10264: 00f70733 add a4,a4,a5
10268: 01075793 srl a5,a4,0x10
1026c: 00e787b3 add a5,a5,a4
10270: 03f7f793 and a5,a5,63
10274: 02000513 li a0,32
10278: 40f50533 sub a0,a0,a5
1027c: 01051513 sll a0,a0,0x10
10280: 01055513 srl a0,a0,0x10
10284: 00008067 ret
00010288 <efficient_int_mul>:
10288: fe010113 add sp,sp,-32
1028c: 00812c23 sw s0,24(sp)
10290: 00912a23 sw s1,20(sp)
10294: 01312623 sw s3,12(sp)
10298: 00112e23 sw ra,28(sp)
1029c: 01212823 sw s2,16(sp)
102a0: 01412423 sw s4,8(sp)
102a4: 01512223 sw s5,4(sp)
102a8: 00058413 mv s0,a1
102ac: 00050993 mv s3,a0
102b0: f49ff0ef jal 101f8 <CLZ_32>
102b4: 00050493 mv s1,a0
102b8: 00040513 mv a0,s0
102bc: f3dff0ef jal 101f8 <CLZ_32>
102c0: 00957a63 bgeu a0,s1,102d4 <efficient_int_mul+0x4c>
102c4: 00040793 mv a5,s0
102c8: 00048513 mv a0,s1
102cc: 00098413 mv s0,s3
102d0: 00078993 mv s3,a5
102d4: 02000a93 li s5,32
102d8: 00000913 li s2,0
102dc: 00000a13 li s4,0
102e0: 00000493 li s1,0
102e4: 40aa8ab3 sub s5,s5,a0
102e8: 03594863 blt s2,s5,10318 <efficient_int_mul+0x90>
102ec: 01c12083 lw ra,28(sp)
102f0: 01812403 lw s0,24(sp)
102f4: 01012903 lw s2,16(sp)
102f8: 00c12983 lw s3,12(sp)
102fc: 00412a83 lw s5,4(sp)
10300: 000a0513 mv a0,s4
10304: 00048593 mv a1,s1
10308: 00812a03 lw s4,8(sp)
1030c: 01412483 lw s1,20(sp)
10310: 02010113 add sp,sp,32
10314: 00008067 ret
10318: 012457b3 srl a5,s0,s2
1031c: 0017f793 and a5,a5,1
10320: 02078463 beqz a5,10348 <efficient_int_mul+0xc0>
10324: 00090613 mv a2,s2
10328: 00098513 mv a0,s3
1032c: 00000593 li a1,0
10330: 020000ef jal 10350 <__ashldi3>
10334: 00aa0533 add a0,s4,a0
10338: 014537b3 sltu a5,a0,s4
1033c: 00b484b3 add s1,s1,a1
10340: 00050a13 mv s4,a0
10344: 009784b3 add s1,a5,s1
10348: 00190913 add s2,s2,1
1034c: f9dff06f j 102e8 <efficient_int_mul+0x60>
000100b0 <main>:
100b0: fe010113 add sp,sp,-32
100b4: 00112e23 sw ra,28(sp)
100b8: 00812c23 sw s0,24(sp)
100bc: 00912a23 sw s1,20(sp)
100c0: 124000ef jal 101e4 <get_instret>
100c4: 00050413 mv s0,a0
100c8: 108000ef jal 101d0 <get_cycles>
100cc: 00050493 mv s1,a0
100d0: ffffe5b7 lui a1,0xffffe
100d4: 12345537 lui a0,0x12345
100d8: ddd58593 add a1,a1,-547 # ffffdddd <__BSS_END__+0xfffe008d>
100dc: 67850513 add a0,a0,1656 # 12345678 <__BSS_END__+0x12327928>
100e0: 1a8000ef jal 10288 <efficient_int_mul>
100e4: 00a12623 sw a0,12(sp)
100e8: 00b12423 sw a1,8(sp)
100ec: 0e4000ef jal 101d0 <get_cycles>
100f0: 409505b3 sub a1,a0,s1
100f4: 0001c537 lui a0,0x1c
100f8: c0050513 add a0,a0,-1024 # 1bc00 <__clzsi2+0x70>
100fc: 656000ef jal 10752 <printf>
10100: 0001c537 lui a0,0x1c
10104: 00040593 mv a1,s0
10108: c1450513 add a0,a0,-1004 # 1bc14 <__clzsi2+0x84>
1010c: 646000ef jal 10752 <printf>
10110: 00c12603 lw a2,12(sp)
10114: 00812683 lw a3,8(sp)
10118: 0001c537 lui a0,0x1c
1011c: c2450513 add a0,a0,-988 # 1bc24 <__clzsi2+0x94>
10120: 632000ef jal 10752 <printf>
10124: 01c12083 lw ra,28(sp)
10128: 01812403 lw s0,24(sp)
1012c: 01412483 lw s1,20(sp)
10130: 00000513 li a0,0
10134: 02010113 add sp,sp,32
10138: 00008067 ret
```
#### elf size

#### execute

- observe
- Total cycle count : `530`
- Total instruction count : `2c5`
- Register use : `$ra, $sp, $a0~$a7, $s0~$s3`
### -Ofast Assembly code
```asm=
00010208 <CLZ_32>:
10208: 00155793 srl a5,a0,0x1
1020c: 00a7e533 or a0,a5,a0
10210: 00255793 srl a5,a0,0x2
10214: 00a7e7b3 or a5,a5,a0
10218: 0047d513 srl a0,a5,0x4
1021c: 00f56533 or a0,a0,a5
10220: 00855713 srl a4,a0,0x8
10224: 00a76733 or a4,a4,a0
10228: 01075793 srl a5,a4,0x10
1022c: 00e7e7b3 or a5,a5,a4
10230: 555556b7 lui a3,0x55555
10234: 0017d713 srl a4,a5,0x1
10238: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x55537805>
1023c: 00d77733 and a4,a4,a3
10240: 40e787b3 sub a5,a5,a4
10244: 333336b7 lui a3,0x33333
10248: 33368693 add a3,a3,819 # 33333333 <__BSS_END__+0x333155e3>
1024c: 0027d713 srl a4,a5,0x2
10250: 00d77733 and a4,a4,a3
10254: 00d7f7b3 and a5,a5,a3
10258: 00f70733 add a4,a4,a5
1025c: 00475793 srl a5,a4,0x4
10260: 0f0f16b7 lui a3,0xf0f1
10264: 00e787b3 add a5,a5,a4
10268: f0f68693 add a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d31bf>
1026c: 00d7f7b3 and a5,a5,a3
10270: 0087d713 srl a4,a5,0x8
10274: 00f70733 add a4,a4,a5
10278: 01075793 srl a5,a4,0x10
1027c: 00e787b3 add a5,a5,a4
10280: 03f7f793 and a5,a5,63
10284: 02000513 li a0,32
10288: 40f50533 sub a0,a0,a5
1028c: 01051513 sll a0,a0,0x10
10290: 01055513 srl a0,a0,0x10
10294: 00008067 ret
00010298 <efficient_int_mul>:
10298: 00155713 srl a4,a0,0x1
1029c: 0015d793 srl a5,a1,0x1
102a0: 00a76733 or a4,a4,a0
102a4: 00b7e7b3 or a5,a5,a1
102a8: 0027d693 srl a3,a5,0x2
102ac: 00058613 mv a2,a1
102b0: 00275593 srl a1,a4,0x2
102b4: 00b76733 or a4,a4,a1
102b8: 00d7e7b3 or a5,a5,a3
102bc: 00475593 srl a1,a4,0x4
102c0: 0047d693 srl a3,a5,0x4
102c4: 00b76733 or a4,a4,a1
102c8: 00d7e7b3 or a5,a5,a3
102cc: 00875593 srl a1,a4,0x8
102d0: 0087d693 srl a3,a5,0x8
102d4: 00b76733 or a4,a4,a1
102d8: 00d7e7b3 or a5,a5,a3
102dc: 01075593 srl a1,a4,0x10
102e0: 0107d693 srl a3,a5,0x10
102e4: 00b76733 or a4,a4,a1
102e8: 00d7e7b3 or a5,a5,a3
102ec: 555556b7 lui a3,0x55555
102f0: 0017d593 srl a1,a5,0x1
102f4: 55568693 add a3,a3,1365 # 55555555 <__BSS_END__+0x55537805>
102f8: 00050813 mv a6,a0
102fc: 00175513 srl a0,a4,0x1
10300: 00d57533 and a0,a0,a3
10304: 00d5f6b3 and a3,a1,a3
10308: 40a70733 sub a4,a4,a0
1030c: 40d787b3 sub a5,a5,a3
10310: 33333537 lui a0,0x33333
10314: 33350513 add a0,a0,819 # 33333333 <__BSS_END__+0x333155e3>
10318: 00275593 srl a1,a4,0x2
1031c: 0027d693 srl a3,a5,0x2
10320: 00a77733 and a4,a4,a0
10324: 00a7f7b3 and a5,a5,a0
10328: 00a5f5b3 and a1,a1,a0
1032c: 00a6f6b3 and a3,a3,a0
10330: 00e585b3 add a1,a1,a4
10334: 00f686b3 add a3,a3,a5
10338: 0046d713 srl a4,a3,0x4
1033c: 0045d793 srl a5,a1,0x4
10340: 0f0f1537 lui a0,0xf0f1
10344: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d31bf>
10348: 00b787b3 add a5,a5,a1
1034c: 00d70733 add a4,a4,a3
10350: 00a7f7b3 and a5,a5,a0
10354: 00a77733 and a4,a4,a0
10358: 0087d593 srl a1,a5,0x8
1035c: 00875693 srl a3,a4,0x8
10360: 00b787b3 add a5,a5,a1
10364: 00d70733 add a4,a4,a3
10368: 01075693 srl a3,a4,0x10
1036c: 0107d593 srl a1,a5,0x10
10370: 00d70733 add a4,a4,a3
10374: 00b787b3 add a5,a5,a1
10378: 02000693 li a3,32
1037c: 03f7f793 and a5,a5,63
10380: 03f77713 and a4,a4,63
10384: 40f687b3 sub a5,a3,a5
10388: 40e68733 sub a4,a3,a4
1038c: 01079793 sll a5,a5,0x10
10390: 01071713 sll a4,a4,0x10
10394: 0107d793 srl a5,a5,0x10
10398: 01075713 srl a4,a4,0x10
1039c: 00f77a63 bgeu a4,a5,103b0 <efficient_int_mul+0x118>
103a0: 00060693 mv a3,a2
103a4: 00078713 mv a4,a5
103a8: 00080613 mv a2,a6
103ac: 00068813 mv a6,a3
103b0: 02000893 li a7,32
103b4: 40e888b3 sub a7,a7,a4
103b8: 07105463 blez a7,10420 <efficient_int_mul+0x188>
103bc: 00000793 li a5,0
103c0: 00000513 li a0,0
103c4: 00000593 li a1,0
103c8: 00185e93 srl t4,a6,0x1
103cc: 01f00e13 li t3,31
103d0: 0280006f j 103f8 <efficient_int_mul+0x160>
103d4: 00d816b3 sll a3,a6,a3
103d8: 00000713 li a4,0
103dc: 00e50733 add a4,a0,a4
103e0: 00a73333 sltu t1,a4,a0
103e4: 00d585b3 add a1,a1,a3
103e8: 00070513 mv a0,a4
103ec: 00b305b3 add a1,t1,a1
103f0: 00178793 add a5,a5,1
103f4: 02f88463 beq a7,a5,1041c <efficient_int_mul+0x184>
103f8: 00f65733 srl a4,a2,a5
103fc: 00177713 and a4,a4,1
10400: fe078693 add a3,a5,-32
10404: fe0706e3 beqz a4,103f0 <efficient_int_mul+0x158>
10408: 40fe0333 sub t1,t3,a5
1040c: fc06d4e3 bgez a3,103d4 <efficient_int_mul+0x13c>
10410: 00f81733 sll a4,a6,a5
10414: 006ed6b3 srl a3,t4,t1
10418: fc5ff06f j 103dc <efficient_int_mul+0x144>
1041c: 00008067 ret
10420: 00000513 li a0,0
10424: 00000593 li a1,0
10428: 00008067 ret
000100b0 <main>:
100b0: fe010113 add sp,sp,-32
100b4: 00112e23 sw ra,28(sp)
100b8: 00812c23 sw s0,24(sp)
100bc: 00912a23 sw s1,20(sp)
100c0: 01212823 sw s2,16(sp)
100c4: 01312623 sw s3,12(sp)
100c8: 12c000ef jal 101f4 <get_instret>
100cc: 00050913 mv s2,a0
100d0: 110000ef jal 101e0 <get_cycles>
100d4: 00050993 mv s3,a0
100d8: ffffe5b7 lui a1,0xffffe
100dc: 12345537 lui a0,0x12345
100e0: ddd58593 add a1,a1,-547 # ffffdddd <__BSS_END__+0xfffe008d>
100e4: 67850513 add a0,a0,1656 # 12345678 <__BSS_END__+0x12327928>
100e8: 1b0000ef jal 10298 <efficient_int_mul>
100ec: 00050493 mv s1,a0
100f0: 00058413 mv s0,a1
100f4: 0ec000ef jal 101e0 <get_cycles>
100f8: 413505b3 sub a1,a0,s3
100fc: 0001c537 lui a0,0x1c
10100: cb850513 add a0,a0,-840 # 1bcb8 <__clzsi2+0x72>
10104: 704000ef jal 10808 <printf>
10108: 0001c537 lui a0,0x1c
1010c: 00090593 mv a1,s2
10110: ccc50513 add a0,a0,-820 # 1bccc <__clzsi2+0x86>
10114: 6f4000ef jal 10808 <printf>
10118: 0001c537 lui a0,0x1c
1011c: 00048613 mv a2,s1
10120: 00040693 mv a3,s0
10124: cdc50513 add a0,a0,-804 # 1bcdc <__clzsi2+0x96>
10128: 6e0000ef jal 10808 <printf>
1012c: 01c12083 lw ra,28(sp)
10130: 01812403 lw s0,24(sp)
10134: 01412483 lw s1,20(sp)
10138: 01012903 lw s2,16(sp)
1013c: 00c12983 lw s3,12(sp)
10140: 00000513 li a0,0
10144: 02010113 add sp,sp,32
10148: 00008067 ret
```
#### elf size

#### execute

- observe
- Total cycle count : `398`
- Total instruction count : `2c7`
- Register use : `$ra, $sp, $a0~$a7, $s0~$s3, $t1, $t4`
## Conclusion
|-|-O1|-O2|-O3|-Os|-Ofast|
|:-:|:-:|:-:|:-:|:-:|:-:|
|instret|2c7|2c7|2c7|**2c5**|2c7|
|Cycle|**393**|421|398|530|398|
|Code lines|132|131|181|**126**|181|
- Although the `instruction count` is similar between -`O1` and `-Ofast`, the cycle count is significantly reduced in `-O1`.
- `-Os` incurs the highest `cycle`, while `-O1` has the lowest, with a `decrease from 530 cycles to 393 cycles.`
- `-O3` and `-Ofast` use the most lines of code, while `-Os` uses the fewest.