Try   HackMD

Assignment2: RISC-V Toolchain

contributed by <hugo0406>

Select a Subject

The following subject is picked from the Assignment 1

林柏全 Finding First String of 1-bits of a given length by CLZ

Subject description: Find the first consecutive 1-bits string with a length of n.

Motivation: The reason why I chose this subject as my assignment 2 is that I have a interest in topics like string searching

The original C implementation of the subject is as follows.

Source Code
uint16_t count_leading_zeros(uint64_t x) { x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); x |= (x >> 32); x -= ((x >> 1) & 0x5555555555555555); x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333); x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0f; x += (x >> 8); x += (x >> 16); x += (x >> 32); return (64 - (x & 0x7f)); } int find_string(uint64_t x, int n){ int clz; // numbers of consecutive leading bits (0 or 1) int pos = 0; // position of first '1' bit in consecutive '1' bit string from significant bit while(x != 0){ clz = count_leading_zeros(x); x = x << clz; pos = pos + clz; clz = count_leading_zeros(~x); if (clz >= n) return pos; x = x << clz; pos = pos + clz; } return -1; } int main() { uint64_t test_data[] = {0x0f00000000000000, 0x0000000000000000, 0x0123456789abcdef}; for (int i = 0; i < 3; i++) { uint64_t x = test_data[i]; int n = 4; int result = find_string(x, n); printf("Test Case %d: Input: 0x%016lx, Result: %d\n", i+1, x, result); } return 0;

Analysis the original C code

In order to analyze the performance of original code, I use extern functions (getcycles.s , getinstret.s) where the code in perfcounter to get current clock cycle and current instrution count.Then,I split the original C code into two C files(main.c , func.c).

In main.c, there is only the main function ,while in func.c, there are all the other functions.

  • main.c
#include <stdint.h> #include <stdio.h> extern uint64_t get_cycles(); extern uint64_t get_instret(); extern int find_string(uint64_t x, int n); int main() { int result[3]; uint64_t test_data[] = {0x0f00000000000000, 0x0000000000000000, 0x0123456789abcdef}; uint64_t instret = get_instret(); uint64_t start = get_cycles(); for (int i = 0; i < 3; i++) { uint64_t x = test_data[i]; int n = 4; result[i] = find_string(x, n); } uint64_t fin = get_instret(); uint64_t end = get_cycles(); uint64_t cyclecount = end - start; uint64_t instrcount = fin - instret; for (int i = 0; i < 3; i++) { printf("Test Case %d: Input: 0x%016lx, Result: %d\n", i+1, test_data[i], result[i]);} printf("cycle count: %u\n", (unsigned int) cyclecount); printf("instret: %u\n", (unsigned) (instrcount)); return 0; }
  • func.c
#include <stdio.h> #include <stdint.h> /* Counting leading zeros function */ uint16_t count_leading_zeros(uint64_t x) { x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); x |= (x >> 32); /* Count ones (population count) */ x -= ((x >> 1) & 0x5555555555555555); x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333); x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0f; x += (x >> 8); x += (x >> 16); x += (x >> 32); return (64 - (x & 0x7f)); } int find_string(uint64_t x, int n){ int clz; // numbers of consecutive leading bits (0 or 1) int pos = 0; // position of fist '1' bit in consecutive '1' bit string from significant bit while(x != 0){ clz = count_leading_zeros(x); x = x << clz; pos = pos + clz; clz = count_leading_zeros(~x); if (clz >= n) return pos; x = x << clz; pos = pos + clz; } return -1; }

I modify a makefile where in perfcounter to compile C programs

  • makefile
PHONY: clean include ../../mk/toolchain.mk CFLAGS = -march=rv32i_zicsr_zifencei -mabi=ilp32 -O0 -Wall OBJS = \ getcycles.o \ getinstret.o \ main.o\ func.o BIN = main.elf %.o: %.s $(CROSS_COMPILE)gcc $(CFLAGS) -c -o $@ $< %.o: %.c $(CROSS_COMPILE)gcc $(CFLAGS) -c -o $@ $< all: $(BIN) $(BIN): $(OBJS) $(CROSS_COMPILE)gcc -o $@ $^ clean: $(RM) $(BIN) $(OBJS)

Using make to Compile C Programs

Execution on rv32emu and show the results

Display the assembler mnemonics for the machine instructions

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-objdump -d main.elf > main.txt
cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ vim main.txt
Part of disassembly code
main.elf:     file format elf32-littleriscv


Disassembly of section .text:

00010094 <exit>:
   10094:       1141                    add     sp,sp,-16
   10096:       4581                    li      a1,0
   10098:       c422                    sw      s0,8(sp)
   1009a:       c606                    sw      ra,12(sp)
   1009c:       842a                    mv      s0,a0
   1009e:       0f0010ef                jal     1118e <__call_exitprocs>
   100a2:       f4c1a783                lw      a5,-180(gp) # 1e794 <__stdio_exit_handler>
   100a6:       c391                    beqz    a5,100aa <exit+0x16>
   100a8:       9782                    jalr    a5
   100aa:       8522                    mv      a0,s0
   100ac:       7a1090ef                jal     1a04c <_exit>
        ...

000102a0 <count_leading_zeros>:
   102a0:       01f59713                sll     a4,a1,0x1f
   102a4:       00155793                srl     a5,a0,0x1
   102a8:       00f767b3                or      a5,a4,a5
   102ac:       0015d713                srl     a4,a1,0x1
   102b0:       00a7e533                or      a0,a5,a0
   102b4:       00b765b3                or      a1,a4,a1
   102b8:       01e59713                sll     a4,a1,0x1e
   102bc:       00255793                srl     a5,a0,0x2
   102c0:       00f767b3                or      a5,a4,a5
   102c4:       0025d613                srl     a2,a1,0x2
   102c8:       00a7e533                or      a0,a5,a0
   102cc:       00b66633                or      a2,a2,a1
   102d0:       01c61713                sll     a4,a2,0x1c
   102d4:       00455793                srl     a5,a0,0x4
   102d8:       00f767b3                or      a5,a4,a5
   102dc:       00465693                srl     a3,a2,0x4
   102e0:       00a7e733                or      a4,a5,a0
   102e4:       00c6e6b3                or      a3,a3,a2
   102e8:       01869613                sll     a2,a3,0x18
   102ec:       00875793                srl     a5,a4,0x8
   102f0:       00f667b3                or      a5,a2,a5
   102f4:       0086d613                srl     a2,a3,0x8
   102f8:       00e7e7b3                or      a5,a5,a4
   102fc:       00d66633                or      a2,a2,a3
   10300:       01061713                sll     a4,a2,0x10
   10304:       0107d693                srl     a3,a5,0x10
   10308:       00d766b3                or      a3,a4,a3
   1030c:       01065713                srl     a4,a2,0x10
   10310:       00f6e6b3                or      a3,a3,a5
   10314:       00c76733                or      a4,a4,a2
   10318:       00d766b3                or      a3,a4,a3
   1031c:       01f71613                sll     a2,a4,0x1f
   10320:       0016d793                srl     a5,a3,0x1
   10324:       00f667b3                or      a5,a2,a5
   10328:       00175593                srl     a1,a4,0x1
   1032c:       55555637                lui     a2,0x55555
   10330:       55560613                add     a2,a2,1365 # 55555555 <__BSS_END__+0x55537805>
   10334:       00c7f7b3                and     a5,a5,a2
   10338:       00c5f633                and     a2,a1,a2
   1033c:       40f687b3                sub     a5,a3,a5
   10340:       00f6b6b3                sltu    a3,a3,a5
   10344:       40c70733                sub     a4,a4,a2
   10348:       40d70733                sub     a4,a4,a3
   1034c:       01e71613                sll     a2,a4,0x1e
   10350:       0027d693                srl     a3,a5,0x2
   10354:       00d666b3                or      a3,a2,a3
   10358:       00275593                srl     a1,a4,0x2
   1035c:       33333637                lui     a2,0x33333
   10360:       33360613                add     a2,a2,819 # 33333333 <__BSS_END__+0x333155e3>
   10364:       00c6f6b3                and     a3,a3,a2
   10368:       00c5f5b3                and     a1,a1,a2
   1036c:       00c7f7b3                and     a5,a5,a2
   10370:       00c77733                and     a4,a4,a2
   10374:       00f687b3                add     a5,a3,a5
   10378:       00d7b6b3                sltu    a3,a5,a3
   1037c:       00e58733                add     a4,a1,a4
   10380:       00e686b3                add     a3,a3,a4
   10384:       01c69613                sll     a2,a3,0x1c
   10388:       0047d713                srl     a4,a5,0x4
   1038c:       00e66733                or      a4,a2,a4
   10390:       0046d613                srl     a2,a3,0x4
   10394:       00f707b3                add     a5,a4,a5
   10398:       00e7b733                sltu    a4,a5,a4
   1039c:       00d606b3                add     a3,a2,a3
   103a0:       00d70733                add     a4,a4,a3
   103a4:       0f0f16b7                lui     a3,0xf0f1
   103a8:       f0f68693                add     a3,a3,-241 # f0f0f0f <__BSS_END__+0xf0d31bf>
   103ac:       00d7f7b3                and     a5,a5,a3
   103b0:       00d77733                and     a4,a4,a3
   103b4:       01871613                sll     a2,a4,0x18
   103b8:       0087d693                srl     a3,a5,0x8
   103bc:       00d666b3                or      a3,a2,a3
   103c0:       00875613                srl     a2,a4,0x8
   103c4:       00f687b3                add     a5,a3,a5
   103c8:       00d7b6b3                sltu    a3,a5,a3
   103cc:       00e60733                add     a4,a2,a4
   103d0:       00e686b3                add     a3,a3,a4
   103d4:       01069613                sll     a2,a3,0x10
   103d8:       0107d713                srl     a4,a5,0x10
   103dc:       00e66733                or      a4,a2,a4
   103e0:       0106d613                srl     a2,a3,0x10
   103e4:       00f707b3                add     a5,a4,a5
   103e8:       00e7b733                sltu    a4,a5,a4
   103ec:       00d606b3                add     a3,a2,a3
   103f0:       00d70733                add     a4,a4,a3
   103f4:       00f70733                add     a4,a4,a5
   103f8:       07f77713                and     a4,a4,127
   103fc:       04000513                li      a0,64
   10400:       40e50533                sub     a0,a0,a4
   10404:       01051513                sll     a0,a0,0x10
   10408:       01055513                srl     a0,a0,0x10
   1040c:       00008067                ret

00010410 <find_string>:
   10410:       fe010113                add     sp,sp,-32
   10414:       00112e23                sw      ra,28(sp)
   10418:       00812c23                sw      s0,24(sp)
   1041c:       00b567b3                or      a5,a0,a1
   10420:       0c078e63                beqz    a5,104fc <find_string+0xec>
   10424:       00912a23                sw      s1,20(sp)
   10428:       01212823                sw      s2,16(sp)
   1042c:       01312623                sw      s3,12(sp)
   10430:       01412423                sw      s4,8(sp)
   10434:       01512223                sw      s5,4(sp)
   10438:       01612023                sw      s6,0(sp)
   1043c:       00050493                mv      s1,a0
   10440:       00058913                mv      s2,a1
   10444:       00060a13                mv      s4,a2
   10448:       00000413                li      s0,0
   1044c:       01f00b13                li      s6,31
   10450:       00000a93                li      s5,0
   10454:       0440006f                j       10498 <find_string+0x88>
   10458:       0014d793                srl     a5,s1,0x1
   1045c:       40ab0733                sub     a4,s6,a0
   10460:       00e7d7b3                srl     a5,a5,a4
   10464:       00a919b3                sll     s3,s2,a0
   10468:       0137e9b3                or      s3,a5,s3
   1046c:       00a494b3                sll     s1,s1,a0
   10470:       0440006f                j       104b4 <find_string+0xa4>
   10474:       0014d793                srl     a5,s1,0x1
   10478:       40ab0733                sub     a4,s6,a0
   1047c:       00e7d7b3                srl     a5,a5,a4
   10480:       00a99933                sll     s2,s3,a0
   10484:       0127e933                or      s2,a5,s2
   10488:       00a494b3                sll     s1,s1,a0
   1048c:       00a40433                add     s0,s0,a0
   10490:       0124e7b3                or      a5,s1,s2
   10494:       04078463                beqz    a5,104dc <find_string+0xcc>
   10498:       00048513                mv      a0,s1
   1049c:       00090593                mv      a1,s2
   104a0:       e01ff0ef                jal     102a0 <count_leading_zeros>
   104a4:       fe050993                add     s3,a0,-32
   104a8:       fa09c8e3                bltz    s3,10458 <find_string+0x48>
   104ac:       013499b3                sll     s3,s1,s3
   104b0:       000a8493                mv      s1,s5
   104b4:       00850433                add     s0,a0,s0
   104b8:       fff4c513                not     a0,s1
   104bc:       fff9c593                not     a1,s3
   104c0:       de1ff0ef                jal     102a0 <count_leading_zeros>
   104c4:       05455063                bge     a0,s4,10504 <find_string+0xf4>
   104c8:       fe050913                add     s2,a0,-32
   104cc:       fa0944e3                bltz    s2,10474 <find_string+0x64>
   104d0:       01249933                sll     s2,s1,s2
   104d4:       000a8493                mv      s1,s5
   104d8:       fb5ff06f                j       1048c <find_string+0x7c>
   104dc:       fff00413                li      s0,-1
   104e0:       01412483                lw      s1,20(sp)
   104e4:       01012903                lw      s2,16(sp)
   104e8:       00c12983                lw      s3,12(sp)
   104ec:       00812a03                lw      s4,8(sp)
   104f0:       00412a83                lw      s5,4(sp)
   104f4:       00012b03                lw      s6,0(sp)
   104f8:       0240006f                j       1051c <find_string+0x10c>
   104fc:       fff00413                li      s0,-1
   10500:       01c0006f                j       1051c <find_string+0x10c>
   10504:       01412483                lw      s1,20(sp)
   10508:       01012903                lw      s2,16(sp)
   1050c:       00c12983                lw      s3,12(sp)
   10510:       00812a03                lw      s4,8(sp)
   10514:       00412a83                lw      s5,4(sp)
   10518:       00012b03                lw      s6,0(sp)
   1051c:       00040513                mv      a0,s0
   10520:       01c12083                lw      ra,28(sp)
   10524:       01812403                lw      s0,24(sp)
   10528:       02010113                add     sp,sp,32
   1052c:       00008067                ret
   
        ...

Display the ELF file header

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf 
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x100c4
  Start of program headers:          52 (bytes into file)
  Start of section headers:          69548 (bytes into file)
  Flags:                             0x1, RVC, soft-float ABI
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

List the section sizes and the total size for main.elf

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf
   text	   data	    bss	    dec	    hex	filename
  53148	   1876	   1528	  56552	   dce8	main.elf

Adapt the original assembly code

Origignal Assembly Code
.data t1_u: .word 0x0f000000 # upper bits of test data 1, test_data1[0~31] t1_l: .word 0x00000000 # lower bits of test data 2, test_data2[32~63] t2_u: .word 0x00000000 t2_l: .word 0x00000000 t3_u: .word 0x01234567 t3_l: .word 0x89abcdef .text main: # initial setting la t0, t1_u # load address of upper bits of test data 1 into s0 la t1, t2_u la t2, t3_u addi sp, sp, -12 sw t0, 0(sp) sw t1, 4(sp) sw t2, 8(sp) add s0, zero, t0 # s0 = & test_data_upper add s1, zero, zero # int i (used for test data loop control) addi s2, zero, 3 # upper bound of i (used for loop control) addi s3, zero, 4 addi s4, zero, -1 # be used to do not operation main_for_loop: # call finding_string procedure #mv a0, s0 # a0 = & test_data_1_upper jal ra, fs li a7, 1 ecall li a0, 0 li a7, 11 ecall addi s1, s1, 1 addi s0, s0, 8 blt s1, s2, -12 li a7, 11 ecall j Exit fs: addi sp, sp, -16 sw ra, 0(sp) sw s1, 4(sp) sw s2, 8(sp) sw s3, 12(sp) #sw s4, 16(sp) addi s1, s0, 4 # s1 = & test_date_lower lw a1, 0(s0) # a1 = value of test_data upper lw a2, 0(s1) # a2 = value of test_date lower, test_data = [a1, a2] #li s2, 0 # s2 = clz = 0 li s2, 0 # s2 = pos = 0 x_equal_0_check: bne a1, zero, x_not_equal_0 bne a2, zero, x_not_equal_0 x_eqaul_0: addi a0, zero, -1 j fs_end x_not_equal_0: jal ra, CLZ # x = x << clz li t0, 32 sub t0, t0, a0 srl a4, a2, t0 sll a3, a1, a0 or a3, a3, a4 # a1 = a1 << clz sll a4, a2, a0 # a2 = a2 << clz, x([a3, a4]) = x([a1, a2]) << clz # pos = pos + clz add s2, s2, a0 # x = -x, [a3, a4] = - [a1, a2] xor a1, a3, s4 xor a2, a4, s4 jal ra, CLZ # check: clz > n bge a0, s3, 32 ## < case # x = x << clz sub t0, t0, a0 srl a2, a4, t0 sll a1, a3, a0 or a1, a1, a2 # a1 = a3 << clz sll a2, a4, a0 # a2 = a4 << clz, x([a1, a2]) = x([a3, a4]) << clz # pos = pos + clz add s2, s2, a0 j x_equal_0_check ## >= base mv a0, s2 j fs_end fs_end: lw ra, 0(sp) lw s1, 4(sp) lw s2, 8(sp) lw s3, 12(sp) addi sp, sp, 16 jalr ra CLZ: addi sp, sp, -4 sw ra, 0(sp) mv t0, a1 mv t1, a2 li t4, 0x55555555 li t5, 0x33333333 li t6, 0x0f0f0f0f # x |= (x>>1); srli t3, t1, 1 # shift lower bits of test data right with 1 bit slli t2, t0, 31 # shift upper bits of test data left with 31 bits or t3, t2, t3 # combine to get new lower bits of test data srli t2, t0, 1 # shift upper bound of test data right with 1 bit or t0, t0, t2 # [0~31]x | [0~31](x >> 1) or t1, t1, t3 # [32~63]x | [32~63](x >> 1) # x |= (x>>2); srli t3, t1, 2 slli t2, t0, 30 or t3, t2, t3 srli t2, t0, 2 or t0, t0, t2 or t1, t1, t3 # x |= (x>>4); srli t3, t1, 4 slli t2, t0, 28 or t3, t2, t3 srli t2, t0, 4 or t0, t0, t2 or t1, t1, t3 # x |= (x>>8); srli t3, t1, 8 slli t2, t0, 24 or t3, t2, t3 srli t2, t0, 8 or t0, t0, t2 or t1, t1, t3 # x |= (x>>16); srli t3, t1, 16 slli t2, t0, 16 or t3, t2, t3 srli t2, t0, 16 or t0, t0, t2 or t1, t1, t3 # x |= (x>>32) li t2, 0 add t3, t0, zero or t0, t0, t2 or t1, t1, t3 # x -= ((x>>1) & 0x5555555555555555) ## [t2, t3] = x>>1 ([t0, t1]>>1) srli t3, t1, 1 slli t2, t0, 31 or t3, t2, t3 srli t2, t0, 1 ## (x>>1) & 0x5~ and t2, t2, t4 and t3, t3, t4 # [t2, t3] = (x>>1)&0x5~ sub t3, t1, t3 blt t1, t3, 16 # if underflow then jump add t1, t3, zero # t1=t3 sub t0, t0, t2 # no underflow at lower bits, [t0, t1]=> x -= ((x>>1) & 0x5555555555555555) beq zero, zero, 12 addi t0, t0, -1 # underflow at lower bits sub t0, t0, t2 #[t0, t1] => x -= ((x>>1) & 0x5555555555555555) # x = ((x>>2)&0x333333333333333) + (x & 0x3333333333333333) ## [t2, t3] = x>>2 ([t0, t1]>>2) srli t3, t1, 2 slli t2, t0, 30 or t3, t3, t2 srli t2, t0, 2 # [t2, t3] = x>>2 ## (x>>1) & 0x3~ and t2, t2, t5 and t3, t3, t5 # [t2, t3] = ((x>>2)&0x3~) ## x & 0x3~ and t0, t0, t5 and t1, t1, t5 # [t0, t1] = (x & 0x3~) add t1, t1, t3 add t0, t0, t2 ## overflow detection (lower bits) or t4, t1, zero xor t4, s4, t4 # nor t5, t1, zero (t4 = ~(s4 | zero)) bgeu t4, t3, 8 # if no overflow then jump addi t0, t0, 1 # if overflow upper bits plus 1 # x += ((x>>4)+x) & 0x0f~0f ## [t2, t3] = x>>4 ([t0, t1]>>4) srli t3, t1, 4 slli t2, t0, 28 or t3, t3, t2 srli t2, t0, 4 ## (x>>4) + x add t1, t1, t3 add t0, t0, t2 ## overflow detection (lower bits) or t4, t1, zero xor t4, s4, t4 # nor t5, t1, zero (t4 = ~(s4 | zero)) bgeu t4, t3, 8 # if no overflow then jump addi t0, t0, 1 # if overflow upper bits plus 1 ## ((x>>4) + x) & 0x0f~0f and t0, t0, t6 and t1, t1, t6 # x += x(x>>8) srli t3, t1, 8 slli t2, t0, 24 or t3, t3, t2 srli t2, t0, 8 # [t2, t3] = x>>8 add t0, t0, t2 add t1, t1, t3 ## overflow detection or t4, t1, zero xor t4, s4, t4 bgeu t4, t3, 8 addi t0, t0, 1 # x += x(x>>16) srli t3, t1, 16 slli t2, t0, 16 or t3, t3, t2 srli t2, t0, 16 # [t2, t3] = x>>8 add t0, t0, t2 add t1, t1, t3 ## overflow detection or t4, t1, zero xor t4, s4, t4 bgeu t4, t3, 8 addi t0, t0, 1 # x += (x>>32) add t3, t0, zero add t2, zero, zero add t0, t0, t2 add t1, t1, t3 ## overflow detection or t4, t1, zero xor t4, s4, t4 bgeu t4, t3, 8 addi t0, t0, 1 # 64 - (x & (0x7f)) li t4, 0x7f li a0, 64 and t1, t1, t4 sub a0, a0, t1 lw ra, 0(sp) addi sp, sp, 4 jalr ra Exit: nop

Optimization

-O1 Optimization

execution and the result

ELF file header

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf 
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x100c2
  Start of program headers:          52 (bytes into file)
  Start of section headers:          69492 (bytes into file)
  Flags:                             0x1, RVC, soft-float ABI
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

Size

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf
   text	   data	    bss	    dec	    hex	filename
  52048	   1876	   1528	  55452	   d89c	main.elf

-O2 Optimization

execution and the result

ELF file header

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x101f6
  Start of program headers:          52 (bytes into file)
  Start of section headers:          69492 (bytes into file)
  Flags:                             0x1, RVC, soft-float ABI
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

Size

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf
   text	   data	    bss	    dec	    hex	filename
  52068	   1876	   1528	  55472	   d8b0	main.elf

-O3 Optimization

execution and the result

ELF file header

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf 
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x101f6
  Start of program headers:          52 (bytes into file)
  Start of section headers:          69492 (bytes into file)
  Flags:                             0x1, RVC, soft-float ABI
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

Size

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf 
   text	   data	    bss	    dec	    hex	filename
  52616	   1876	   1528	  56020	   dad4	main.elf

-Os Optimization

execution and the result

ELF file header

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf 
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x101d2
  Start of program headers:          52 (bytes into file)
  Start of section headers:          69564 (bytes into file)
  Flags:                             0x1, RVC, soft-float ABI
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

Size

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf
   text	   data	    bss	    dec	    hex	filename
  51938	   1876	   1528	  55342	   d82e	main.elf

-Ofast Optimization

execution and the result

ELF file header

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-readelf -h main.elf
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x101f6
  Start of program headers:          52 (bytes into file)
  Start of section headers:          69492 (bytes into file)
  Flags:                             0x1, RVC, soft-float ABI
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

Size

cychen@cychen-VirtualBox:~/rv32emu/tests/hw2$ riscv-none-elf-size main.elf 
   text	   data	    bss	    dec	    hex	filename
  52616	   1876	   1528	  56020	   dad4	main.elf

Reference

  1. Using make to Compile Programs
  2. GCC中-O1 -O2 -O3 優化的原理是什麼?