# Assignment2: RISC-V Toolchain contributed by <[`hugo0406`](https://github.com/hugo0406/Computer-Architecture/tree/main/hw2)> ## Select a Subject The following subject is picked from the Assignment 1 > [林柏全](https://github.com/chuan0306/Computer-Architecture-Homework-1) Finding First String of 1-bits of a given length by CLZ **Subject description:** Find the first consecutive 1-bits string with a length of n. **Motivation:** The reason why I chose this subject as my assignment 2 is that I have a interest in topics like string searching The original **C** implementation of the subject is as follows. :::spoiler **Source Code** ```c= uint16_t count_leading_zeros(uint64_t x) { x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); x |= (x >> 32); x -= ((x >> 1) & 0x5555555555555555); x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333); x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0f; x += (x >> 8); x += (x >> 16); x += (x >> 32); return (64 - (x & 0x7f)); } int find_string(uint64_t x, int n){ int clz; // numbers of consecutive leading bits (0 or 1) int pos = 0; // position of first '1' bit in consecutive '1' bit string from significant bit while(x != 0){ clz = count_leading_zeros(x); x = x << clz; pos = pos + clz; clz = count_leading_zeros(~x); if (clz >= n) return pos; x = x << clz; pos = pos + clz; } return -1; } int main() { uint64_t test_data[] = {0x0f00000000000000, 0x0000000000000000, 0x0123456789abcdef}; for (int i = 0; i < 3; i++) { uint64_t x = test_data[i]; int n = 4; int result = find_string(x, n); printf("Test Case %d: Input: 0x%016lx, Result: %d\n", i+1, x, result); } return 0; ``` ::: ## Analysis the original C code In order to analyze the performance of original code, I use extern functions (`getcycles.s` , `getinstret.s`) where the code in [perfcounter](https://github.com/sysprog21/rv32emu/tree/master/tests/perfcounter) to get current clock cycle and current instrution count.Then,I split the original C code into two C files(`main.c` , `func.c`). In `main.c`, there is only the main function ,while in `func.c`, there are all the other functions. - `main.c` ```c= #include <stdint.h> #include <stdio.h> extern uint64_t get_cycles(); extern uint64_t get_instret(); extern int find_string(uint64_t x, int n); int main() { int result[3]; uint64_t test_data[] = {0x0f00000000000000, 0x0000000000000000, 0x0123456789abcdef}; uint64_t instret = get_instret(); uint64_t start = get_cycles(); for (int i = 0; i < 3; i++) { uint64_t x = test_data[i]; int n = 4; result[i] = find_string(x, n); } uint64_t fin = get_instret(); uint64_t end = get_cycles(); uint64_t cyclecount = end - start; uint64_t instrcount = fin - instret; for (int i = 0; i < 3; i++) { printf("Test Case %d: Input: 0x%016lx, Result: %d\n", i+1, test_data[i], result[i]);} printf("cycle count: %u\n", (unsigned int) cyclecount); printf("instret: %u\n", (unsigned) (instrcount)); return 0; } ``` - `func.c` ```c= #include <stdio.h> #include <stdint.h> /* Counting leading zeros function */ uint16_t count_leading_zeros(uint64_t x) { x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); x |= (x >> 32); /* Count ones (population count) */ x -= ((x >> 1) & 0x5555555555555555); x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333); x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0f; x += (x >> 8); x += (x >> 16); x += (x >> 32); return (64 - (x & 0x7f)); } int find_string(uint64_t x, int n){ int clz; // numbers of consecutive leading bits (0 or 1) int pos = 0; // position of fist '1' bit in consecutive '1' bit string from significant bit while(x != 0){ clz = count_leading_zeros(x); x = x << clz; pos = pos + clz; clz = count_leading_zeros(~x); if (clz >= n) return pos; x = x << clz; pos = pos + clz; } return -1; } ``` I modify a makefile where in [perfcounter](https://github.com/sysprog21/rv32emu/tree/master/tests/perfcounter) to compile C programs - `makefile` ```= PHONY: clean include ../../mk/toolchain.mk CFLAGS = -march=rv32i_zicsr_zifencei -mabi=ilp32 -O0 -Wall OBJS = \ getcycles.o \ getinstret.o \ main.o\ func.o BIN = main.elf %.o: %.s $(CROSS_COMPILE)gcc $(CFLAGS) -c -o $@ $< %.o: %.c $(CROSS_COMPILE)gcc $(CFLAGS) -c -o $@ $< all: $(BIN) $(BIN): $(OBJS) $(CROSS_COMPILE)gcc -o $@ $^ clean: $(RM) $(BIN) $(OBJS) ``` Using `make` to Compile C Programs Execution on rv32emu and show the results Display the assembler mnemonics for the machine instructions ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-objdump -d main.elf > main.txt cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ vim main.txt ``` :::spoiler **Part of disassembly code** ``` main.elf: file format elf32-littleriscv Disassembly of section .text: 00010094 <exit>: 10094: 1141 add sp,sp,-16 10096: 4581 li a1,0 10098: c422 sw s0,8(sp) 1009a: c606 sw ra,12(sp) 1009c: 842a mv s0,a0 1009e: 0f0010ef jal 1118e <__call_exitprocs> 100a2: f4c1a783 lw a5,-180(gp) # 1e794 <__stdio_exit_handler> 100a6: c391 beqz a5,100aa <exit+0x16> 100a8: 9782 jalr a5 100aa: 8522 mv a0,s0 100ac: 7a1090ef jal 1a04c <_exit> ... 000102a0 <count_leading_zeros>: 102a0: 01f59713 sll a4,a1,0x1f 102a4: 00155793 srl a5,a0,0x1 102a8: 00f767b3 or a5,a4,a5 102ac: 0015d713 srl a4,a1,0x1 102b0: 00a7e533 or a0,a5,a0 102b4: 00b765b3 or a1,a4,a1 102b8: 01e59713 sll a4,a1,0x1e 102bc: 00255793 srl a5,a0,0x2 102c0: 00f767b3 or a5,a4,a5 102c4: 0025d613 srl a2,a1,0x2 102c8: 00a7e533 or a0,a5,a0 102cc: 00b66633 or a2,a2,a1 102d0: 01c61713 sll a4,a2,0x1c 102d4: 00455793 srl a5,a0,0x4 102d8: 00f767b3 or a5,a4,a5 102dc: 00465693 srl a3,a2,0x4 102e0: 00a7e733 or a4,a5,a0 102e4: 00c6e6b3 or a3,a3,a2 102e8: 01869613 sll a2,a3,0x18 102ec: 00875793 srl a5,a4,0x8 102f0: 00f667b3 or a5,a2,a5 102f4: 0086d613 srl a2,a3,0x8 102f8: 00e7e7b3 or a5,a5,a4 102fc: 00d66633 or a2,a2,a3 10300: 01061713 sll a4,a2,0x10 10304: 0107d693 srl a3,a5,0x10 10308: 00d766b3 or a3,a4,a3 1030c: 01065713 srl a4,a2,0x10 10310: 00f6e6b3 or a3,a3,a5 10314: 00c76733 or a4,a4,a2 10318: 00d766b3 or a3,a4,a3 1031c: 01f71613 sll a2,a4,0x1f 10320: 0016d793 srl a5,a3,0x1 10324: 00f667b3 or a5,a2,a5 10328: 00175593 srl a1,a4,0x1 1032c: 55555637 lui a2,0x55555 10330: 55560613 add a2,a2,1365 # 55555555 <__BSS_END__+0x55537805> 10334: 00c7f7b3 and a5,a5,a2 10338: 00c5f633 and a2,a1,a2 1033c: 40f687b3 sub a5,a3,a5 10340: 00f6b6b3 sltu a3,a3,a5 10344: 40c70733 sub a4,a4,a2 10348: 40d70733 sub a4,a4,a3 1034c: 01e71613 sll a2,a4,0x1e 10350: 0027d693 srl a3,a5,0x2 10354: 00d666b3 or a3,a2,a3 10358: 00275593 srl a1,a4,0x2 1035c: 33333637 lui a2,0x33333 10360: 33360613 add a2,a2,819 # 33333333 <__BSS_END__+0x333155e3> 10364: 00c6f6b3 and a3,a3,a2 10368: 00c5f5b3 and a1,a1,a2 1036c: 00c7f7b3 and a5,a5,a2 10370: 00c77733 and a4,a4,a2 10374: 00f687b3 add a5,a3,a5 10378: 00d7b6b3 sltu a3,a5,a3 1037c: 00e58733 add a4,a1,a4 10380: 00e686b3 add a3,a3,a4 10384: 01c69613 sll a2,a3,0x1c 10388: 0047d713 srl a4,a5,0x4 1038c: 00e66733 or a4,a2,a4 10390: 0046d613 srl a2,a3,0x4 10394: 00f707b3 add a5,a4,a5 10398: 00e7b733 sltu a4,a5,a4 1039c: 00d606b3 add a3,a2,a3 103a0: 00d70733 add a4,a4,a3 103a4: 0f0f16b7 lui a3,0xf0f1 103a8: f0f68693 add a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d31bf> 103ac: 00d7f7b3 and a5,a5,a3 103b0: 00d77733 and a4,a4,a3 103b4: 01871613 sll a2,a4,0x18 103b8: 0087d693 srl a3,a5,0x8 103bc: 00d666b3 or a3,a2,a3 103c0: 00875613 srl a2,a4,0x8 103c4: 00f687b3 add a5,a3,a5 103c8: 00d7b6b3 sltu a3,a5,a3 103cc: 00e60733 add a4,a2,a4 103d0: 00e686b3 add a3,a3,a4 103d4: 01069613 sll a2,a3,0x10 103d8: 0107d713 srl a4,a5,0x10 103dc: 00e66733 or a4,a2,a4 103e0: 0106d613 srl a2,a3,0x10 103e4: 00f707b3 add a5,a4,a5 103e8: 00e7b733 sltu a4,a5,a4 103ec: 00d606b3 add a3,a2,a3 103f0: 00d70733 add a4,a4,a3 103f4: 00f70733 add a4,a4,a5 103f8: 07f77713 and a4,a4,127 103fc: 04000513 li a0,64 10400: 40e50533 sub a0,a0,a4 10404: 01051513 sll a0,a0,0x10 10408: 01055513 srl a0,a0,0x10 1040c: 00008067 ret 00010410 <find_string>: 10410: fe010113 add sp,sp,-32 10414: 00112e23 sw ra,28(sp) 10418: 00812c23 sw s0,24(sp) 1041c: 00b567b3 or a5,a0,a1 10420: 0c078e63 beqz a5,104fc <find_string+0xec> 10424: 00912a23 sw s1,20(sp) 10428: 01212823 sw s2,16(sp) 1042c: 01312623 sw s3,12(sp) 10430: 01412423 sw s4,8(sp) 10434: 01512223 sw s5,4(sp) 10438: 01612023 sw s6,0(sp) 1043c: 00050493 mv s1,a0 10440: 00058913 mv s2,a1 10444: 00060a13 mv s4,a2 10448: 00000413 li s0,0 1044c: 01f00b13 li s6,31 10450: 00000a93 li s5,0 10454: 0440006f j 10498 <find_string+0x88> 10458: 0014d793 srl a5,s1,0x1 1045c: 40ab0733 sub a4,s6,a0 10460: 00e7d7b3 srl a5,a5,a4 10464: 00a919b3 sll s3,s2,a0 10468: 0137e9b3 or s3,a5,s3 1046c: 00a494b3 sll s1,s1,a0 10470: 0440006f j 104b4 <find_string+0xa4> 10474: 0014d793 srl a5,s1,0x1 10478: 40ab0733 sub a4,s6,a0 1047c: 00e7d7b3 srl a5,a5,a4 10480: 00a99933 sll s2,s3,a0 10484: 0127e933 or s2,a5,s2 10488: 00a494b3 sll s1,s1,a0 1048c: 00a40433 add s0,s0,a0 10490: 0124e7b3 or a5,s1,s2 10494: 04078463 beqz a5,104dc <find_string+0xcc> 10498: 00048513 mv a0,s1 1049c: 00090593 mv a1,s2 104a0: e01ff0ef jal 102a0 <count_leading_zeros> 104a4: fe050993 add s3,a0,-32 104a8: fa09c8e3 bltz s3,10458 <find_string+0x48> 104ac: 013499b3 sll s3,s1,s3 104b0: 000a8493 mv s1,s5 104b4: 00850433 add s0,a0,s0 104b8: fff4c513 not a0,s1 104bc: fff9c593 not a1,s3 104c0: de1ff0ef jal 102a0 <count_leading_zeros> 104c4: 05455063 bge a0,s4,10504 <find_string+0xf4> 104c8: fe050913 add s2,a0,-32 104cc: fa0944e3 bltz s2,10474 <find_string+0x64> 104d0: 01249933 sll s2,s1,s2 104d4: 000a8493 mv s1,s5 104d8: fb5ff06f j 1048c <find_string+0x7c> 104dc: fff00413 li s0,-1 104e0: 01412483 lw s1,20(sp) 104e4: 01012903 lw s2,16(sp) 104e8: 00c12983 lw s3,12(sp) 104ec: 00812a03 lw s4,8(sp) 104f0: 00412a83 lw s5,4(sp) 104f4: 00012b03 lw s6,0(sp) 104f8: 0240006f j 1051c <find_string+0x10c> 104fc: fff00413 li s0,-1 10500: 01c0006f j 1051c <find_string+0x10c> 10504: 01412483 lw s1,20(sp) 10508: 01012903 lw s2,16(sp) 1050c: 00c12983 lw s3,12(sp) 10510: 00812a03 lw s4,8(sp) 10514: 00412a83 lw s5,4(sp) 10518: 00012b03 lw s6,0(sp) 1051c: 00040513 mv a0,s0 10520: 01c12083 lw ra,28(sp) 10524: 01812403 lw s0,24(sp) 10528: 02010113 add sp,sp,32 1052c: 00008067 ret ... ``` ::: <br> Display the ELF file header ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x100c4 Start of program headers: 52 (bytes into file) Start of section headers: 69548 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` List the section sizes and the total size for `main.elf` ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf text data bss dec hex filename 53148 1876 1528 56552 dce8 main.elf ``` ## Adapt the original assembly code :::spoiler **Origignal Assembly Code** ```s= .data t1_u: .word 0x0f000000 # upper bits of test data 1, test_data1[0~31] t1_l: .word 0x00000000 # lower bits of test data 2, test_data2[32~63] t2_u: .word 0x00000000 t2_l: .word 0x00000000 t3_u: .word 0x01234567 t3_l: .word 0x89abcdef .text main: # initial setting la t0, t1_u # load address of upper bits of test data 1 into s0 la t1, t2_u la t2, t3_u addi sp, sp, -12 sw t0, 0(sp) sw t1, 4(sp) sw t2, 8(sp) add s0, zero, t0 # s0 = & test_data_upper add s1, zero, zero # int i (used for test data loop control) addi s2, zero, 3 # upper bound of i (used for loop control) addi s3, zero, 4 addi s4, zero, -1 # be used to do not operation main_for_loop: # call finding_string procedure #mv a0, s0 # a0 = & test_data_1_upper jal ra, fs li a7, 1 ecall li a0, 0 li a7, 11 ecall addi s1, s1, 1 addi s0, s0, 8 blt s1, s2, -12 li a7, 11 ecall j Exit fs: addi sp, sp, -16 sw ra, 0(sp) sw s1, 4(sp) sw s2, 8(sp) sw s3, 12(sp) #sw s4, 16(sp) addi s1, s0, 4 # s1 = & test_date_lower lw a1, 0(s0) # a1 = value of test_data upper lw a2, 0(s1) # a2 = value of test_date lower, test_data = [a1, a2] #li s2, 0 # s2 = clz = 0 li s2, 0 # s2 = pos = 0 x_equal_0_check: bne a1, zero, x_not_equal_0 bne a2, zero, x_not_equal_0 x_eqaul_0: addi a0, zero, -1 j fs_end x_not_equal_0: jal ra, CLZ # x = x << clz li t0, 32 sub t0, t0, a0 srl a4, a2, t0 sll a3, a1, a0 or a3, a3, a4 # a1 = a1 << clz sll a4, a2, a0 # a2 = a2 << clz, x([a3, a4]) = x([a1, a2]) << clz # pos = pos + clz add s2, s2, a0 # x = -x, [a3, a4] = - [a1, a2] xor a1, a3, s4 xor a2, a4, s4 jal ra, CLZ # check: clz > n bge a0, s3, 32 ## < case # x = x << clz sub t0, t0, a0 srl a2, a4, t0 sll a1, a3, a0 or a1, a1, a2 # a1 = a3 << clz sll a2, a4, a0 # a2 = a4 << clz, x([a1, a2]) = x([a3, a4]) << clz # pos = pos + clz add s2, s2, a0 j x_equal_0_check ## >= base mv a0, s2 j fs_end fs_end: lw ra, 0(sp) lw s1, 4(sp) lw s2, 8(sp) lw s3, 12(sp) addi sp, sp, 16 jalr ra CLZ: addi sp, sp, -4 sw ra, 0(sp) mv t0, a1 mv t1, a2 li t4, 0x55555555 li t5, 0x33333333 li t6, 0x0f0f0f0f # x |= (x>>1); srli t3, t1, 1 # shift lower bits of test data right with 1 bit slli t2, t0, 31 # shift upper bits of test data left with 31 bits or t3, t2, t3 # combine to get new lower bits of test data srli t2, t0, 1 # shift upper bound of test data right with 1 bit or t0, t0, t2 # [0~31]x | [0~31](x >> 1) or t1, t1, t3 # [32~63]x | [32~63](x >> 1) # x |= (x>>2); srli t3, t1, 2 slli t2, t0, 30 or t3, t2, t3 srli t2, t0, 2 or t0, t0, t2 or t1, t1, t3 # x |= (x>>4); srli t3, t1, 4 slli t2, t0, 28 or t3, t2, t3 srli t2, t0, 4 or t0, t0, t2 or t1, t1, t3 # x |= (x>>8); srli t3, t1, 8 slli t2, t0, 24 or t3, t2, t3 srli t2, t0, 8 or t0, t0, t2 or t1, t1, t3 # x |= (x>>16); srli t3, t1, 16 slli t2, t0, 16 or t3, t2, t3 srli t2, t0, 16 or t0, t0, t2 or t1, t1, t3 # x |= (x>>32) li t2, 0 add t3, t0, zero or t0, t0, t2 or t1, t1, t3 # x -= ((x>>1) & 0x5555555555555555) ## [t2, t3] = x>>1 ([t0, t1]>>1) srli t3, t1, 1 slli t2, t0, 31 or t3, t2, t3 srli t2, t0, 1 ## (x>>1) & 0x5~ and t2, t2, t4 and t3, t3, t4 # [t2, t3] = (x>>1)&0x5~ sub t3, t1, t3 blt t1, t3, 16 # if underflow then jump add t1, t3, zero # t1=t3 sub t0, t0, t2 # no underflow at lower bits, [t0, t1]=> x -= ((x>>1) & 0x5555555555555555) beq zero, zero, 12 addi t0, t0, -1 # underflow at lower bits sub t0, t0, t2 #[t0, t1] => x -= ((x>>1) & 0x5555555555555555) # x = ((x>>2)&0x333333333333333) + (x & 0x3333333333333333) ## [t2, t3] = x>>2 ([t0, t1]>>2) srli t3, t1, 2 slli t2, t0, 30 or t3, t3, t2 srli t2, t0, 2 # [t2, t3] = x>>2 ## (x>>1) & 0x3~ and t2, t2, t5 and t3, t3, t5 # [t2, t3] = ((x>>2)&0x3~) ## x & 0x3~ and t0, t0, t5 and t1, t1, t5 # [t0, t1] = (x & 0x3~) add t1, t1, t3 add t0, t0, t2 ## overflow detection (lower bits) or t4, t1, zero xor t4, s4, t4 # nor t5, t1, zero (t4 = ~(s4 | zero)) bgeu t4, t3, 8 # if no overflow then jump addi t0, t0, 1 # if overflow upper bits plus 1 # x += ((x>>4)+x) & 0x0f~0f ## [t2, t3] = x>>4 ([t0, t1]>>4) srli t3, t1, 4 slli t2, t0, 28 or t3, t3, t2 srli t2, t0, 4 ## (x>>4) + x add t1, t1, t3 add t0, t0, t2 ## overflow detection (lower bits) or t4, t1, zero xor t4, s4, t4 # nor t5, t1, zero (t4 = ~(s4 | zero)) bgeu t4, t3, 8 # if no overflow then jump addi t0, t0, 1 # if overflow upper bits plus 1 ## ((x>>4) + x) & 0x0f~0f and t0, t0, t6 and t1, t1, t6 # x += x(x>>8) srli t3, t1, 8 slli t2, t0, 24 or t3, t3, t2 srli t2, t0, 8 # [t2, t3] = x>>8 add t0, t0, t2 add t1, t1, t3 ## overflow detection or t4, t1, zero xor t4, s4, t4 bgeu t4, t3, 8 addi t0, t0, 1 # x += x(x>>16) srli t3, t1, 16 slli t2, t0, 16 or t3, t3, t2 srli t2, t0, 16 # [t2, t3] = x>>8 add t0, t0, t2 add t1, t1, t3 ## overflow detection or t4, t1, zero xor t4, s4, t4 bgeu t4, t3, 8 addi t0, t0, 1 # x += (x>>32) add t3, t0, zero add t2, zero, zero add t0, t0, t2 add t1, t1, t3 ## overflow detection or t4, t1, zero xor t4, s4, t4 bgeu t4, t3, 8 addi t0, t0, 1 # 64 - (x & (0x7f)) li t4, 0x7f li a0, 64 and t1, t1, t4 sub a0, a0, t1 lw ra, 0(sp) addi sp, sp, 4 jalr ra Exit: nop ``` ::: <br> ## Optimization ### -O1 Optimization #### execution and the result ![](https://hackmd.io/_uploads/By33wqvfT.png) #### ELF file header ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x100c2 Start of program headers: 52 (bytes into file) Start of section headers: 69492 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` #### Size ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf text data bss dec hex filename 52048 1876 1528 55452 d89c main.elf ``` ### -O2 Optimization #### execution and the result ![](https://hackmd.io/_uploads/HySXPiPGT.png) #### ELF file header ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x101f6 Start of program headers: 52 (bytes into file) Start of section headers: 69492 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` #### Size ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf text data bss dec hex filename 52068 1876 1528 55472 d8b0 main.elf ``` ### -O3 Optimization #### execution and the result ![](https://hackmd.io/_uploads/rJClgnvGa.png) #### ELF file header ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x101f6 Start of program headers: 52 (bytes into file) Start of section headers: 69492 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` #### Size ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf text data bss dec hex filename 52616 1876 1528 56020 dad4 main.elf ``` ### -Os Optimization #### execution and the result ![](https://hackmd.io/_uploads/B18fMhPzT.png) #### ELF file header ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x101d2 Start of program headers: 52 (bytes into file) Start of section headers: 69564 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` #### Size ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf text data bss dec hex filename 51938 1876 1528 55342 d82e main.elf ``` ### -Ofast Optimization #### execution and the result ![](https://hackmd.io/_uploads/Hk5-7nwza.png) #### ELF file header ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: RISC-V Version: 0x1 Entry point address: 0x101f6 Start of program headers: 52 (bytes into file) Start of section headers: 69492 (bytes into file) Flags: 0x1, RVC, soft-float ABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 3 Size of section headers: 40 (bytes) Number of section headers: 15 Section header string table index: 14 ``` #### Size ``` cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf text data bss dec hex filename 52616 1876 1528 56020 dad4 main.elf ``` ## Reference 1. [Using make to Compile Programs](https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dhg/index.html) 2. [GCC中-O1 -O2 -O3 優化的原理是什麼?](https://www.getit01.com/p20180111527090458/)