Try   HackMD

Assignment 2 : RISC-V Toolchain

contributed by <bclegend>

Lab2: RISC-V RV32I[MACF] emulator with ELF support

Question Selection

Question

I chose the question from 洪佑杭's Assignment1 Multiplication overflow prediction for unsigned int using CLZ.

Motivation

I chose this question because I used to confuse about the use of CLZ(Count leading zeros),And I notice that the predict of overflow in integer multiplicator will be a good use of CLZ, and I have some interest in multiplicator for interger.

Code

His code

C code
#include <stdint.h>
#include <stdbool.h>


// test case a: no overflow, predict result is false
uint64_t a_x0 = 0x0000000000000000;
uint64_t a_x1 = 0x0000000000000000;
// test case b: no overflow, predict result is false
uint64_t b_x0 = 0x0000000000000001;
uint64_t b_x1 = 0x0000000000000010;
// test case c: no overflow, but predict result is true
uint64_t c_x0 = 0x0000000000000002;
uint64_t c_x1 = 0x4000000000000000;
// test case d: overflow, and predict result is true
uint64_t d_x0 = 0x0000000000000003;
uint64_t d_x1 = 0x7FFFFFFFFFFFFFFF;


uint16_t count_leading_zeros(uint64_t x)
{
    x |= (x >> 1);
    x |= (x >> 2);
    x |= (x >> 4);
    x |= (x >> 8);
    x |= (x >> 16);
    x |= (x >> 32);
	
    /* count ones (population count) */
    x -= ((x >> 1) & 0x5555555555555555);
    x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333);
    x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0f;
    x += (x >> 8);
    x += (x >> 16);
    x += (x >> 32);
    
    return (64 - (x & 0x7f));
}

bool predict_if_mul_overflow(uint64_t *x0, uint64_t *x1)
{
    int32_t exp_x0 = 63 - (int32_t)count_leading_zeros(*x0);
    int32_t exp_x1 = 63 - (int32_t)count_leading_zeros(*x1);
    if ((exp_x0 + 1) + (exp_x1 + 1) >= 64)
        return true;
    else
        return false;
}

void main()
{
    printf("%d\n", predict_if_mul_overflow(&a_x0, &a_x1));
    printf("%d\n", predict_if_mul_overflow(&b_x0, &b_x1));
    printf("%d\n", predict_if_mul_overflow(&c_x0, &c_x1));
    printf("%d\n", predict_if_mul_overflow(&d_x0, &d_x1));
    return;
}

assembly code
.data
    # will not overflow, and will predict as false
    cmp_data_1: .dword 0x0000000000000000, 0x0000000000000000
    # will not overflow, and will predict as false
    cmp_data_2: .dword 0x0000000000000001, 0x0000000000000010
    # will not overflow, but will predict as true
    cmp_data_3: .dword 0x0000000000000002, 0x4000000000000000
    # will overflow, and will predict as true
    cmp_data_4: .dword 0x0000000000000003, 0x7FFFFFFFFFFFFFFF
    

.text
# assume little endian
main:
    addi sp, sp, -16
    
    # push four pointers of test data onto the stack
    la t0, cmp_data_1
    sw t0, 0(sp)
    la t0, cmp_data_2
    sw t0, 4(sp)
    la t0, cmp_data_3
    sw t0, 8(sp)
    la t0, cmp_data_4
    sw t0, 12(sp)
    
    # for testing
    #li a0, 0
    #li a1, 0x00
    #jal ra clz
    #jal ra print_dec
    #j exit
 
    addi s0, zero, 4    # s0 is the goal iteration count
    addi s1, zero, 0    # s1 is the counter
    addi s2, sp, 0      # s2 now points to cmp_data_1
main_loop:
    lw a0, 0(s2)        # a0 stores the pointer to first data in cmp_data_x
    addi a1, a0, 8      # a1 stores the pointer to second data in cmp_data_x
    jal ra, pimo
    
    li a7, 1            # tell ecall to print decimal
    ecall               # print result of pimo (which is in a0)
    li a0, 32           # 32 is " " in ASCII
    li a7, 11           # tell ecall to print char
    ecall               # print space
    
    addi s2, s2, 4      # s2 points to next cmp_data_x
    addi s1, s1, 1      # counter++
    bne s1, s0, main_loop
    
    addi sp, sp, 16
    j exit
    
    
# predict if multiplication overflow:
pimo:
    addi sp, sp, -20
    sw ra, 0(sp)
    sw s0, 4(sp)
    sw s1, 8(sp)
    sw s2, 12(sp)
    sw s3, 16(sp)
    
    mv s0, a0       # s0 is address of x0
    mv s1, a1       # s1 is address of x1
    
    lw a0, 0(s0)
    lw a1, 4(s0)    # a0 a1 is now the value of x0
    jal ra, clz
    li s2, 63
    sub s2, s2, a0  # s2 is now exp_x0
    
    lw a0, 0(s1)
    lw a1, 4(s1)    # a1 a0 is now the value of x1
    jal ra, clz
    li s3, 63
    sub s3, s3, a0  # s3 is now exp_x1
 
    add s2, s2, s3
    addi s2, s2, 2  # s2 is (exp_x0 + 1) + (exp_x1 + 1)
    li t0, 64
    bge s2, t0, pimo_ret_t
    li a0, 0        # return false
    j pimo_end
pimo_ret_t:
    li a0, 1        # return true
pimo_end:
    lw ra, 0(sp)
    lw s0, 4(sp)
    lw s1, 8(sp)
    lw s2, 12(sp)
    lw s3, 16(sp)
    addi sp, sp, 20
    ret


# count leading zeros
clz:
    addi sp, sp, -4
    sw ra, 0(sp)
    
    # a0 a1 = x

    bne a1, zero, clz_fill_ones_upper
clz_fill_ones_lower:
    srli t0, a0, 1
    or a0, a0, t0
    srli t0, a0, 2
    or a0, a0, t0
    srli t0, a0, 4
    or a0, a0, t0
    srli t0, a0, 8
    or a0, a0, t0
    srli t0, a0, 16
    or a0, a0, t0
    j clz_fill_ones_end
clz_fill_ones_upper:
    srli t1, a1, 1
    or a1, a1, t1
    srli t1, a1, 2
    or a1, a1, t1
    srli t1, a1, 4
    or a1, a1, t1
    srli t1, a1, 8
    or a1, a1, t1
    srli t1, a1, 16
    or a1, a1, t1
    li a0, 0xffffffff
clz_fill_ones_end:
    
    
    # x -= ((x >> 1) & 0x5555555555555555);
    srli t0, a0, 1
    slli t1, a1, 31
    or t0, t0, t1
    srli t1, a1, 1      # t0 t1 = x >> 1
    
    li t2, 0x55555555   # t2 is the mask
    and t0, t0, t2
    and t1, t1, t2      # t0 t1 = (x >> 1) & 0x5555555555555555
 
    sltu t3, a0, t0     # t3 is the borrow bit
    sub a0, a0, t0
    sub a1, a1, t1
    sub a1, a1, t3      # a0 a1 = x - (t0 t1)
    
    
    # x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333);
    srli t0, a0, 2
    slli t1, a1, 30
    or t0, t0, t1
    srli t1, a1, 2      # t0 t1 = x >> 2
    
    li t2, 0x33333333   # t2 is the mask
    and t0, t0, t2
    and t1, t1, t2      # t0 t1 = (x >> 2) & 0x3333333333333333
    and t4, a0, t2
    and t5, a1, t2      # t4 t5 = x & 0x3333333333333333
    
    add a0, t0, t4
    sltu t3, a0, t0     # t3 is the carry bit
    add a1, t1, t5
    add a1, a1, t3      # a0 a1 = (t0 t1) + (t4 t5)
    
    
    # x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0f;
    srli t0, a0, 4
    slli t1, a1, 28
    or t0, t0, t1
    srli t1, a1, 4      # t0 t1 = x >> 4
    
    add t0, t0, a0
    sltu t3, t0, a0     # t3 is the carry bit
    add t1, t1, a1
    add t1, t1, t3      # t0 t1 = (x >> 4) + x
    
    li t2, 0x0f0f0f0f   # t2 is the mask
    and a0, t0, t2
    and a1, t1, t2      # a0 a1 = (t0 t1) & 0x0f0f0f0f0f0f0f0f
    
    
    # x += (x >> 8);
    srli t0, a0, 8
    slli t1, a1, 24
    or t0, t0, t1
    srli t1, a1, 8      # t0 t1 = x >> 8
    
    add a0, a0, t0
    sltu t3, a0, t0     # t3 is the carry bit
    add a1, a1, t1
    add a1, a1, t3      # a0 a1 = x + (x >> 8)
    
    
    # x += (x >> 16);
    srli t0, a0, 16
    slli t1, a1, 16
    or t0, t0, t1
    srli t1, a1, 16     # t0 t1 = x >> 16
    
    add a0, a0, t0
    sltu t3, a0, t0     # t3 is the carry bit
    add a1, a1, t1
    add a1, a1, t3      # a0 a1 = x + (x >> 16)
    
    
    # x += (x >> 32);
    mv t0, a1
    mv t1, zero         # t0 t1 = x >> 32
    
    add a0, a0, t0
    sltu t3, a0, t0     # t3 is the carry bit
    add a1, a1, t1
    add a1, a1, t3      # a0 a1 = x + (x >> 32)
    
    
    # return (64 - (x & 0x7f));
    andi a0, a0, 0x7f   # a0 = (x & 0x7f)
    li t0, 64
    sub a0, t0, a0      # a0 = (64 - (x & 0x7f))
    
    
    lw ra, 0(sp)
    addi sp, sp, 4
    ret

# util func
print_hex:
    addi sp, sp, -4
    sw ra, 0(sp)
    li a7, 34
    ecall       # print value
    li a0, 32   # 32 is " " in ASCII
    li a7, 11
    ecall       # print space
    lw ra, 0(sp)
    addi sp, sp, 4
    ret

print_dec:
    addi sp, sp, -4
    sw ra, 0(sp)
    li a7, 1
    ecall       # print value
    li a0, 32   # 32 is " " in ASCII
    li a7, 11
    ecall       # print space
    lw ra, 0(sp)
    addi sp, sp, 4
    ret

exit:
    nop

This is his code's cycles in Ripes

Image Not Showing Possible Reasons
  • The image was uploaded to a note which you don't have access to
  • The note which the image was originally uploaded to has been deleted
Learn More →

Original

Modify a little to run in rv32emu

I reduce some unuse part and modify this code fit in rv32emu.

.org 0
.global _start

/* newlib system calls */
.set STDOUT,1
.set SYSEXIT, 93
.set SYSWRITE, 64

.data
    # will not overflow, and will predict as false
cmp_data_1: .dword 0x0000000000000000, 0x0000000000000000
    # will not overflow, and will predict as false
cmp_data_2: .dword 0x0000000000000001, 0x0000000000000010
    # will not overflow, but will predict as true
cmp_data_3: .dword 0x0000000000000002, 0x4000000000000000
    # will overflow, and will predict as true
cmp_data_4: .dword 0x0000000000000003, 0x7FFFFFFFFFFFFFFF

nextline:    .ascii  "\n"
             .set str_next_len, .-nextline

blank:      .ascii  " "
             .set str_blank_len, .-blank



.text
# assume little endian
_start:
    addi sp, sp, -16
    
    # push four pointers of test data onto the stack
    la t0, cmp_data_1
    sw t0, 0(sp)
    la t0, cmp_data_2
    sw t0, 4(sp)
    la t0, cmp_data_3
    sw t0, 8(sp)
    la t0, cmp_data_4
    sw t0, 12(sp)
 
    addi s0, zero, 4    # s0 is the goal iteration count
    addi s1, zero, 0    # s1 is the counter
    addi s2, sp, 0      # s2 now points to cmp_data_1
main_loop:
    lw a0, 0(s2)        # a0 stores the pointer to first data in cmp_data_x
    addi a1, a0, 8      # a1 stores the pointer to second data in cmp_data_x
    jal ra, pimo

    ### print for rv32emu
    addi a1, a0, 48
    addi sp, sp, -4
    sw a1, 0(sp)
    addi a1, sp, 0
    li a7, SYSWRITE
    li a0, STDOUT
    li a2, 4
    ecall
    addi sp,sp,4  
    ###

    # printf("\n");
    li  a7, SYSWRITE
    li  a0, 1
    la  a1, nextline
    li  a2, 1
    ecall
    
    addi s2, s2, 4      # s2 points to next cmp_data_x
    addi s1, s1, 1      # counter++
    bne s1, s0, main_loop
    
    addi sp, sp, 16
    j exit
    
    
# predict if multiplication overflow:
pimo:
    addi sp, sp, -20
    sw ra, 0(sp)
    sw s0, 4(sp)
    sw s1, 8(sp)
    sw s2, 12(sp)
    sw s3, 16(sp)
    
    mv s0, a0       # s0 is address of x0
    mv s1, a1       # s1 is address of x1
    
    lw a0, 0(s0)
    lw a1, 4(s0)    # a0 a1 is now the value of x0
    jal ra, clz
    li s2, 63
    sub s2, s2, a0  # s2 is now exp_x0
    
    lw a0, 0(s1)
    lw a1, 4(s1)    # a1 a0 is now the value of x1
    jal ra, clz
    li s3, 63
    sub s3, s3, a0  # s3 is now exp_x1
 
    add s2, s2, s3
    addi s2, s2, 2  # s2 is (exp_x0 + 1) + (exp_x1 + 1)
    li t0, 64
    bge s2, t0, pimo_ret_t
    li a0, 0        # return false
    j pimo_end
pimo_ret_t:
    li a0, 1        # return true
pimo_end:
    lw ra, 0(sp)
    lw s0, 4(sp)
    lw s1, 8(sp)
    lw s2, 12(sp)
    lw s3, 16(sp)
    addi sp, sp, 20
    ret


# count leading zeros
clz:
    addi sp, sp, -4
    sw ra, 0(sp)
    
    # a0 a1 = x

    bne a1, zero, clz_fill_ones_upper
clz_fill_ones_lower:
    srli t0, a0, 1
    or a0, a0, t0
    srli t0, a0, 2
    or a0, a0, t0
    srli t0, a0, 4
    or a0, a0, t0
    srli t0, a0, 8
    or a0, a0, t0
    srli t0, a0, 16
    or a0, a0, t0
    j clz_fill_ones_end
clz_fill_ones_upper:
    srli t1, a1, 1
    or a1, a1, t1
    srli t1, a1, 2
    or a1, a1, t1
    srli t1, a1, 4
    or a1, a1, t1
    srli t1, a1, 8
    or a1, a1, t1
    srli t1, a1, 16
    or a1, a1, t1
    li a0, 0xffffffff
clz_fill_ones_end:
    
    
    # x -= ((x >> 1) & 0x5555555555555555);
    srli t0, a0, 1
    slli t1, a1, 31
    or t0, t0, t1
    srli t1, a1, 1      # t0 t1 = x >> 1
    
    li t2, 0x55555555   # t2 is the mask
    and t0, t0, t2
    and t1, t1, t2      # t0 t1 = (x >> 1) & 0x5555555555555555
 
    sltu t3, a0, t0     # t3 is the borrow bit
    sub a0, a0, t0
    sub a1, a1, t1
    sub a1, a1, t3      # a0 a1 = x - (t0 t1)
    
    
    # x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333);
    srli t0, a0, 2
    slli t1, a1, 30
    or t0, t0, t1
    srli t1, a1, 2      # t0 t1 = x >> 2
    
    li t2, 0x33333333   # t2 is the mask
    and t0, t0, t2
    and t1, t1, t2      # t0 t1 = (x >> 2) & 0x3333333333333333
    and t4, a0, t2
    and t5, a1, t2      # t4 t5 = x & 0x3333333333333333
    
    add a0, t0, t4
    sltu t3, a0, t0     # t3 is the carry bit
    add a1, t1, t5
    add a1, a1, t3      # a0 a1 = (t0 t1) + (t4 t5)
    
    
    # x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0f;
    srli t0, a0, 4
    slli t1, a1, 28
    or t0, t0, t1
    srli t1, a1, 4      # t0 t1 = x >> 4
    
    add t0, t0, a0
    sltu t3, t0, a0     # t3 is the carry bit
    add t1, t1, a1
    add t1, t1, t3      # t0 t1 = (x >> 4) + x
    
    li t2, 0x0f0f0f0f   # t2 is the mask
    and a0, t0, t2
    and a1, t1, t2      # a0 a1 = (t0 t1) & 0x0f0f0f0f0f0f0f0f
    
    
    # x += (x >> 8);
    srli t0, a0, 8
    slli t1, a1, 24
    or t0, t0, t1
    srli t1, a1, 8      # t0 t1 = x >> 8
    
    add a0, a0, t0
    sltu t3, a0, t0     # t3 is the carry bit
    add a1, a1, t1
    add a1, a1, t3      # a0 a1 = x + (x >> 8)
    
    
    # x += (x >> 16);
    srli t0, a0, 16
    slli t1, a1, 16
    or t0, t0, t1
    srli t1, a1, 16     # t0 t1 = x >> 16
    
    add a0, a0, t0
    sltu t3, a0, t0     # t3 is the carry bit
    add a1, a1, t1
    add a1, a1, t3      # a0 a1 = x + (x >> 16)
    
    
    # x += (x >> 32);
    mv t0, a1
    mv t1, zero         # t0 t1 = x >> 32
    
    add a0, a0, t0
    sltu t3, a0, t0     # t3 is the carry bit
    add a1, a1, t1
    add a1, a1, t3      # a0 a1 = x + (x >> 32)
    
    
    # return (64 - (x & 0x7f));
    andi a0, a0, 0x7f   # a0 = (x & 0x7f)
    li t0, 64
    sub a0, t0, a0      # a0 = (64 - (x & 0x7f))
    
    
    lw ra, 0(sp)
    addi sp, sp, 4
    ret

exit:
    li  a7, SYSEXIT	    
    addi a0, x0, 0
    ecall
  • Size
$ riscv-none-elf-size original.elf
player1@player1:~/Desktop/2023_ComputerArchitecture/Hw02$ riscv-none-elf-size original.elf
   text	   data	    bss	    dec	    hex	filename
    728	      0	      0	    728	    2d8	original.elf
  • ELF Header
$ riscv-none-elf-readelf -h original.elf 
player1@player1:~/Desktop/2023_ComputerArchitecture/Hw02$ riscv-none-elf-readelf -h original.elf 
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x0
  Start of program headers:          52 (bytes into file)
  Start of section headers:          5580 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         2
  Size of section headers:           40 (bytes)
  Number of section headers:         6
  Section header string table index: 5
  • Instruction Frequency histogram
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
  1. addi      26.04% [44        ] ███████████████████████████████████████████████████████████████████████████████████
  2. unknown   15.98% [27        ] ██████████████████████████████████████████████████▉
  3. srli      11.83% [20        ] █████████████████████████████████████▋
  4. add        9.47% [16        ] ██████████████████████████████▏
  5. or         8.88% [15        ] ████████████████████████████▎
  6. lw         6.51% [11        ] ████████████████████▊
  7. sw         6.51% [11        ] ████████████████████▊
  8. and        4.73% [8         ] ███████████████
  9. jal        3.55% [6         ] ███████████▎
 10. sub        3.55% [6         ] ███████████▎
 11. sltu       3.55% [6         ] ███████████▎
 12. auipc      2.96% [5         ] █████████▍
 13. slli       2.96% [5         ] █████████▍
 14. lui        1.78% [3         ] █████▋
 15. ecall      1.78% [3         ] █████▋
 16. jalr       1.18% [2         ] ███▊
 17. bne        1.18% [2         ] ███▊
 18. cnop       1.18% [2         ] ███▊

Optimized by riscv-none-elf-gcc

riscv-none-elf-gcc -Q --help=optimizers
The following options control optimizations:
  -O<number>                  		
  -Ofast                      		
  -Og                         		
  -Os                         		
  -Oz                         		
  -faggressive-loop-optimizations 	[enabled]
  -falign-functions           		[disabled]
  -falign-functions=          		
  -falign-jumps               		[disabled]
  -falign-jumps=              		
  -falign-labels              		[disabled]
  -falign-labels=             		
  -falign-loops               		[disabled]
  -falign-loops=              		

I will use -O0 -O1 -O2 -O3 -Os -Ofast to optimize the code and compare the different.

You shall use RDCYCLE/RDCYCLEH instruction for the statistics of your program’s execution.

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
jserv

I added the RDCYCLE instruction part at below.

#include <stdio.h>

typedef uint64_t ticks;
static inline ticks getticks(void)
{
    uint64_t result;
    uint32_t l, h, h2;
    asm volatile(
        "rdcycleh %0\n"
        "rdcycle %1\n"
        "rdcycleh %2\n"
        "sub %0, %0, %2\n"
        "seqz %0, %0\n"
        "sub %0, zero, %0\n"
        "and %1, %1, %0\n"
        : "=r"(h), "=r"(l), "=r"(h2));
    result = (((uint64_t) h) << 32) | ((uint64_t) l);
    return result;
}

static uint64_t fib(uint64_t n)
{
    if (n <= 1)
        return n;
    return fib(n - 1) + fib(n - 2);
}

int main()
{
    ticks t0 = getticks();
    fib(19);
    ticks t1 = getticks();
    printf("elapsed cycle: %" PRIu64 "\n", t1 - t0);
    return 0;
}

I add the code above to get cycle with each optimization compare to the original assembly cycle, with the sheet below we can see the hand write asssembly cycle has the overwhelming advantage of cycle.

O0 O1 O2 O3 Os Ofast Assembly
elapsed 6217 4528 4528 4445 4528 4445 933
Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
  • Use the Makefile with code below to get the ELF(Executatble Linkable Format) file
all: source_O0.elf source_O1.elf source_O2.elf source_O3.elf source_Os.elf source_Ofast.elf

source_O0.elf:
	riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -O0 source.c -o $@

source_O1.elf:
	riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -O1 source.c -o $@

source_O2.elf:
	riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -O2 source.c -o $@

source_O3.elf:
	riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -O3 source.c -o $@

source_Os.elf:
	riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -Os source.c -o $@

source_Ofast.elf:
	riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -Ofast source.c -o $@

-O0

  • Size
$ riscv-none-elf-size source_O0.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_O0.elf 
   text	   data	    bss	    dec	    hex	filename
  76692	   2372	   1548	  80612	  13ae4	source_O0.elf

  • ELF header
$ riscv-none-elf-readelf -h source_O0.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_O0.elf
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x100d8
  Start of program headers:          52 (bytes into file)
  Start of section headers:          94772 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

  • Save txt
riscv-none-elf-objdump -d Movezeroes_O0.elf >dis_objdump_O0.txt
  • Disassembly code
    With only main and predict_if_mul_overflow
main and predict_if_mul_overflow
000105a6 <main>: 105a6: 1141 add sp,sp,-16 105a8: c606 sw ra,12(sp) 105aa: c422 sw s0,8(sp) 105ac: 0800 add s0,sp,16 105ae: f9018593 add a1,gp,-112 # 1d7a0 <a_x1> 105b2: f8818513 add a0,gp,-120 # 1d798 <a_x0> 105b6: 3741 jal 10536 <predict_if_mul_overflow> 105b8: 87aa mv a5,a0 105ba: 85be mv a1,a5 105bc: 67f1 lui a5,0x1c 105be: ea078513 add a0,a5,-352 # 1bea0 <__clzsi2+0x70> 105c2: 2135 jal 109ee <printf> 105c4: f4018593 add a1,gp,-192 # 1d750 <b_x1> 105c8: f3818513 add a0,gp,-200 # 1d748 <b_x0> 105cc: 37ad jal 10536 <predict_if_mul_overflow> 105ce: 87aa mv a5,a0 105d0: 85be mv a1,a5 105d2: 67f1 lui a5,0x1c 105d4: ea078513 add a0,a5,-352 # 1bea0 <__clzsi2+0x70> 105d8: 2919 jal 109ee <printf> 105da: f5018593 add a1,gp,-176 # 1d760 <c_x1> 105de: f4818513 add a0,gp,-184 # 1d758 <c_x0> 105e2: 3f91 jal 10536 <predict_if_mul_overflow> 105e4: 87aa mv a5,a0 105e6: 85be mv a1,a5 105e8: 67f1 lui a5,0x1c 105ea: ea078513 add a0,a5,-352 # 1bea0 <__clzsi2+0x70> 105ee: 2101 jal 109ee <printf> 105f0: f6018593 add a1,gp,-160 # 1d770 <d_x1> 105f4: f5818513 add a0,gp,-168 # 1d768 <d_x0> 105f8: 3f3d jal 10536 <predict_if_mul_overflow> 105fa: 87aa mv a5,a0 105fc: 85be mv a1,a5 105fe: 67f1 lui a5,0x1c 10600: ea078513 add a0,a5,-352 # 1bea0 <__clzsi2+0x70> 10604: 26ed jal 109ee <printf> 10606: 4781 li a5,0 10608: 853e mv a0,a5 1060a: 40b2 lw ra,12(sp) 1060c: 4422 lw s0,8(sp) 1060e: 0141 add sp,sp,16 10610: 8082 ret 00010536 <predict_if_mul_overflow>: 10536: 7179 add sp,sp,-48 10538: d606 sw ra,44(sp) 1053a: d422 sw s0,40(sp) 1053c: 1800 add s0,sp,48 1053e: fca42e23 sw a0,-36(s0) 10542: fcb42c23 sw a1,-40(s0) 10546: fdc42783 lw a5,-36(s0) 1054a: 4398 lw a4,0(a5) 1054c: 43dc lw a5,4(a5) 1054e: 853a mv a0,a4 10550: 85be mv a1,a5 10552: 3ec5 jal 10142 <count_leading_zeros> 10554: 87aa mv a5,a0 10556: 873e mv a4,a5 10558: 03f00793 li a5,63 1055c: 8f99 sub a5,a5,a4 1055e: fef42623 sw a5,-20(s0) 10562: fd842783 lw a5,-40(s0) 10566: 4398 lw a4,0(a5) 10568: 43dc lw a5,4(a5) 1056a: 853a mv a0,a4 1056c: 85be mv a1,a5 1056e: 3ed1 jal 10142 <count_leading_zeros> 10570: 87aa mv a5,a0 10572: 873e mv a4,a5 10574: 03f00793 li a5,63 10578: 8f99 sub a5,a5,a4 1057a: fef42423 sw a5,-24(s0) 1057e: fec42783 lw a5,-20(s0) 10582: 00178713 add a4,a5,1 10586: fe842783 lw a5,-24(s0) 1058a: 0785 add a5,a5,1 1058c: 973e add a4,a4,a5 1058e: 03f00793 li a5,63 10592: 00e7d463 bge a5,a4,1059a <predict_if_mul_overflow+0x64> 10596: 4785 li a5,1 10598: a011 j 1059c <predict_if_mul_overflow+0x66> 1059a: 4781 li a5,0 1059c: 853e mv a0,a5 1059e: 50b2 lw ra,44(sp) 105a0: 5422 lw s0,40(sp) 105a2: 6145 add sp,sp,48 105a4: 8082 ret
  • Instruction Frequency histogram
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
  1. cmv       10.57% [1822      ] █████████████████████████████████████████████
  2. clwsp      8.54% [1472      ] ████████████████████████████████████▎
  3. addi       7.28% [1254      ] ██████████████████████████████▉
  4. cli        7.04% [1214      ] █████████████████████████████▉
  5. cswsp      6.67% [1150      ] ████████████████████████████▍
  6. caddi      4.34% [748       ] ██████████████████▍
  7. cj         3.95% [680       ] ████████████████▊
  8. jal        3.65% [630       ] ███████████████▌
  9. beq        2.84% [489       ] ████████████
 10. sw         2.69% [463       ] ███████████▍
 11. lw         2.67% [460       ] ███████████▎
 12. clw        2.31% [399       ] █████████▊
 13. cadd       2.25% [387       ] █████████▌
 14. andi       1.99% [343       ] ████████▍
 15. bne        1.93% [332       ] ████████▏
 16. cbeqz      1.87% [322       ] ███████▉
 17. cjr        1.84% [318       ] ███████▊
 18. csw        1.71% [294       ] ███████▎
 19. sub        1.48% [255       ] ██████▎
 20. bge        1.28% [221       ] █████▍
 21. lbu        1.26% [218       ] █████▍
 22. auipc      1.22% [210       ] █████▏
 23. blt        1.19% [205       ] █████
 24. or         1.13% [195       ] ████▊
 25. cbnez      1.12% [193       ] ████▊
 26. cslli      1.11% [191       ] ████▋
 27. slli       1.04% [179       ] ████▍
 28. srli       1.03% [178       ] ████▍

-O1

  • Size
$ riscv-none-elf-size source_O1.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_O1.elf
   text	   data	    bss	    dec	    hex	filename
  75784	   2372	   1548	  79704	  13758	source_O1.elf
source_O1.elf

  • ELF header
$ riscv-none-elf-readelf -h source_O1.elf
layer1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_O1.elf
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x100d8
  Start of program headers:          52 (bytes into file)
  Start of section headers:          94772 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

  • Save txt
$ riscv-none-elf-objdump -d source_O1.elf >./text/source_O1.txt
  • Disassembly code
    With only main and predict_if_mul_overflow
main and predict_if_mul_overflow
0001028a <main>:
   1028a:	1141                	add	sp,sp,-16
   1028c:	c606                	sw	ra,12(sp)
   1028e:	c422                	sw	s0,8(sp)
   10290:	f8818593          	add	a1,gp,-120 # 1d798 <a_x1>
   10294:	f9018513          	add	a0,gp,-112 # 1d7a0 <a_x0>
   10298:	3f75                	jal	10254 <predict_if_mul_overflow>
   1029a:	85aa                	mv	a1,a0
   1029c:	6471                	lui	s0,0x1c
   1029e:	b7040513          	add	a0,s0,-1168 # 1bb70 <__clzsi2+0x6e>
   102a2:	2939                	jal	106c0 <printf>
   102a4:	f5818593          	add	a1,gp,-168 # 1d768 <b_x1>
   102a8:	f6018513          	add	a0,gp,-160 # 1d770 <b_x0>
   102ac:	3765                	jal	10254 <predict_if_mul_overflow>
   102ae:	85aa                	mv	a1,a0
   102b0:	b7040513          	add	a0,s0,-1168
   102b4:	2131                	jal	106c0 <printf>
   102b6:	f4818593          	add	a1,gp,-184 # 1d758 <c_x1>
   102ba:	f5018513          	add	a0,gp,-176 # 1d760 <c_x0>
   102be:	3f59                	jal	10254 <predict_if_mul_overflow>
   102c0:	85aa                	mv	a1,a0
   102c2:	b7040513          	add	a0,s0,-1168
   102c6:	2eed                	jal	106c0 <printf>
   102c8:	f3818593          	add	a1,gp,-200 # 1d748 <d_x1>
   102cc:	f4018513          	add	a0,gp,-192 # 1d750 <d_x0>
   102d0:	3751                	jal	10254 <predict_if_mul_overflow>
   102d2:	85aa                	mv	a1,a0
   102d4:	b7040513          	add	a0,s0,-1168
   102d8:	26e5                	jal	106c0 <printf>
   102da:	4501                	li	a0,0
   102dc:	40b2                	lw	ra,12(sp)
   102de:	4422                	lw	s0,8(sp)
   102e0:	0141                	add	sp,sp,16
   102e2:	8082                	ret
      
00010254 <predict_if_mul_overflow>:
   10254:	1141                	add	sp,sp,-16
   10256:	c606                	sw	ra,12(sp)
   10258:	c422                	sw	s0,8(sp)
   1025a:	c226                	sw	s1,4(sp)
   1025c:	84ae                	mv	s1,a1
   1025e:	414c                	lw	a1,4(a0)
   10260:	4108                	lw	a0,0(a0)
   10262:	35c5                	jal	10142 <count_leading_zeros>
   10264:	842a                	mv	s0,a0
   10266:	4088                	lw	a0,0(s1)
   10268:	40cc                	lw	a1,4(s1)
   1026a:	3de1                	jal	10142 <count_leading_zeros>
   1026c:	04000793          	li	a5,64
   10270:	40878433          	sub	s0,a5,s0
   10274:	8f89                	sub	a5,a5,a0
   10276:	943e                	add	s0,s0,a5
   10278:	04042513          	slti	a0,s0,64
   1027c:	00154513          	xor	a0,a0,1
   10280:	40b2                	lw	ra,12(sp)
   10282:	4422                	lw	s0,8(sp)
   10284:	4492                	lw	s1,4(sp)
   10286:	0141                	add	sp,sp,16
   10288:	8082                	ret
  • Instruction Frequency histogram
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
  1. cmv       10.49% [1784      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
  2. clwsp      8.59% [1462      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏
  3. addi       7.33% [1246      ] ██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉
  4. cli        7.13% [1212      ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌
  5. cswsp      6.70% [1140      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍
  6. caddi      4.40% [749       ] █████████████████████████████████████████████████████████████████████████▉
  7. cj         3.99% [679       ] ██████████████████████████████████████████████████████████████████▉
  8. jal        3.70% [630       ] ██████████████████████████████████████████████████████████████▏
  9. beq        2.87% [489       ] ████████████████████████████████████████████████▏
 10. sw         2.41% [410       ] ████████████████████████████████████████▍
 11. clw        2.35% [399       ] ███████████████████████████████████████▎
 12. cadd       2.35% [399       ] ███████████████████████████████████████▎
 13. lw         2.22% [377       ] █████████████████████████████████████▏
 14. andi       2.02% [343       ] █████████████████████████████████▊
 15. bne        1.95% [332       ] ████████████████████████████████▊
 16. cbeqz      1.89% [322       ] ███████████████████████████████▊
 17. cjr        1.87% [318       ] ███████████████████████████████▎
 18. csw        1.73% [294       ] █████████████████████████████
 19. sub        1.49% [253       ] ████████████████████████▉
 20. bge        1.29% [220       ] █████████████████████▋
 21. lbu        1.28% [218       ] █████████████████████▌
 22. auipc      1.23% [210       ] ████████████████████▋
 23. blt        1.21% [205       ] ████████████████████▏
 24. cbnez      1.13% [193       ] ███████████████████
 25. cslli      1.09% [185       ] ██████████████████▎
 26. slli       1.08% [184       ] ██████████████████▏
 27. or         1.06% [180       ] █████████████████▊
 28. srli       1.05% [178       ] █████████████████▌

-O2

  • Size
$ riscv-none-elf-size source_O2.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_O2.elf
   text	   data	    bss	    dec	    hex	filename
  75784	   2372	   1548	  79704	  13758	source_O2.elf
  • ELF header
$ riscv-none-elf-readelf -h source_O2.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_O2.elf
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x1015c
  Start of program headers:          52 (bytes into file)
  Start of section headers:          94788 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14
  • Save txt
$ riscv-none-elf-objdump -d source_O2.elf >./text/source_O2.txt
  • Disassembly code
    With only main and predict_if_mul_overflow
main and predict_if_mul_overflow
000100b0 <main>:
   100b0:	1141                	add	sp,sp,-16
   100b2:	f8818593          	add	a1,gp,-120 # 1d798 <a_x1>
   100b6:	f9018513          	add	a0,gp,-112 # 1d7a0 <a_x0>
   100ba:	c606                	sw	ra,12(sp)
   100bc:	c422                	sw	s0,8(sp)
   100be:	2ac5                	jal	102ae <predict_if_mul_overflow>
   100c0:	6471                	lui	s0,0x1c
   100c2:	85aa                	mv	a1,a0
   100c4:	b7040513          	add	a0,s0,-1168 # 1bb70 <__clzsi2+0x6e>
   100c8:	2be5                	jal	106c0 <printf>
   100ca:	f5818593          	add	a1,gp,-168 # 1d768 <b_x1>
   100ce:	f6018513          	add	a0,gp,-160 # 1d770 <b_x0>
   100d2:	2af1                	jal	102ae <predict_if_mul_overflow>
   100d4:	85aa                	mv	a1,a0
   100d6:	b7040513          	add	a0,s0,-1168
   100da:	23dd                	jal	106c0 <printf>
   100dc:	f4818593          	add	a1,gp,-184 # 1d758 <c_x1>
   100e0:	f5018513          	add	a0,gp,-176 # 1d760 <c_x0>
   100e4:	22e9                	jal	102ae <predict_if_mul_overflow>
   100e6:	85aa                	mv	a1,a0
   100e8:	b7040513          	add	a0,s0,-1168
   100ec:	2bd1                	jal	106c0 <printf>
   100ee:	f3818593          	add	a1,gp,-200 # 1d748 <d_x1>
   100f2:	f4018513          	add	a0,gp,-192 # 1d750 <d_x0>
   100f6:	2a65                	jal	102ae <predict_if_mul_overflow>
   100f8:	85aa                	mv	a1,a0
   100fa:	b7040513          	add	a0,s0,-1168
   100fe:	23c9                	jal	106c0 <printf>
   10100:	40b2                	lw	ra,12(sp)
   10102:	4422                	lw	s0,8(sp)
   10104:	4501                	li	a0,0
   10106:	0141                	add	sp,sp,16
   10108:	8082                	ret
       
000102ae <predict_if_mul_overflow>:
   102ae:	1141                	add	sp,sp,-16
   102b0:	c226                	sw	s1,4(sp)
   102b2:	84ae                	mv	s1,a1
   102b4:	414c                	lw	a1,4(a0)
   102b6:	4108                	lw	a0,0(a0)
   102b8:	c606                	sw	ra,12(sp)
   102ba:	c422                	sw	s0,8(sp)
   102bc:	35c5                	jal	1019c <count_leading_zeros>
   102be:	40cc                	lw	a1,4(s1)
   102c0:	842a                	mv	s0,a0
   102c2:	4088                	lw	a0,0(s1)
   102c4:	3de1                	jal	1019c <count_leading_zeros>
   102c6:	04000793          	li	a5,64
   102ca:	40878433          	sub	s0,a5,s0
   102ce:	8f89                	sub	a5,a5,a0
   102d0:	943e                	add	s0,s0,a5
   102d2:	40b2                	lw	ra,12(sp)
   102d4:	04042513          	slti	a0,s0,64
   102d8:	4422                	lw	s0,8(sp)
   102da:	4492                	lw	s1,4(sp)
   102dc:	00154513          	xor	a0,a0,1
   102e0:	0141                	add	sp,sp,16
   102e2:	8082                	ret
  • Instruction Frequency histogram
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
  1. cmv       10.49% [1784      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
  2. clwsp      8.59% [1462      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏
  3. addi       7.33% [1246      ] ██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉
  4. cli        7.13% [1212      ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌
  5. cswsp      6.70% [1140      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍
  6. caddi      4.40% [749       ] █████████████████████████████████████████████████████████████████████████▉
  7. cj         3.99% [679       ] ██████████████████████████████████████████████████████████████████▉
  8. jal        3.70% [630       ] ██████████████████████████████████████████████████████████████▏
  9. beq        2.87% [489       ] ████████████████████████████████████████████████▏
 10. sw         2.41% [410       ] ████████████████████████████████████████▍
 11. clw        2.35% [399       ] ███████████████████████████████████████▎
 12. cadd       2.35% [399       ] ███████████████████████████████████████▎
 13. lw         2.22% [377       ] █████████████████████████████████████▏
 14. andi       2.02% [343       ] █████████████████████████████████▊
 15. bne        1.95% [332       ] ████████████████████████████████▊
 16. cbeqz      1.89% [322       ] ███████████████████████████████▊
 17. cjr        1.87% [318       ] ███████████████████████████████▎
 18. csw        1.73% [294       ] █████████████████████████████
 19. sub        1.49% [253       ] ████████████████████████▉
 20. bge        1.29% [220       ] █████████████████████▋
 21. lbu        1.28% [218       ] █████████████████████▌
 22. auipc      1.23% [210       ] ████████████████████▋
 23. blt        1.21% [205       ] ████████████████████▏
 24. cbnez      1.13% [193       ] ███████████████████
 25. cslli      1.09% [185       ] ██████████████████▎
 26. slli       1.08% [184       ] ██████████████████▏
 27. or         1.06% [180       ] █████████████████▊
 28. srli       1.05% [178       ] █████████████████▌

-O3

  • Size
$ riscv-none-elf-size source_O3.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_O3.elf 
   text	   data	    bss	    dec	    hex	filename
  76436	   2372	   1548	  80356	  139e4	source_O3.elf
  • ELF header
$ riscv-none-elf-readelf -h source_O3.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_O3.elf
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x1015c
  Start of program headers:          52 (bytes into file)
  Start of section headers:          94788 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

  • Save txt
$ riscv-none-elf-objdump -d source_O3.elf >./text/source_O3.txt
  • Disassembly code
    With only main and predict_if_mul_overflow
main and predict_if_mul_overflow
000100b0 <main>:
   100b0:	1141                	add	sp,sp,-16
   100b2:	f8818593          	add	a1,gp,-120 # 1d798 <a_x1>
   100b6:	f9018513          	add	a0,gp,-112 # 1d7a0 <a_x0>
   100ba:	c606                	sw	ra,12(sp)
   100bc:	c422                	sw	s0,8(sp)
   100be:	2ae5                	jal	102b6 <predict_if_mul_overflow>
   100c0:	6471                	lui	s0,0x1c
   100c2:	85aa                	mv	a1,a0
   100c4:	db840513          	add	a0,s0,-584 # 1bdb8 <__clzsi2+0x6e>
   100c8:	043000ef          	jal	1090a <printf>
   100cc:	f5818593          	add	a1,gp,-168 # 1d768 <b_x1>
   100d0:	f6018513          	add	a0,gp,-160 # 1d770 <b_x0>
   100d4:	22cd                	jal	102b6 <predict_if_mul_overflow>
   100d6:	85aa                	mv	a1,a0
   100d8:	db840513          	add	a0,s0,-584
   100dc:	02f000ef          	jal	1090a <printf>
   100e0:	f4818593          	add	a1,gp,-184 # 1d758 <c_x1>
   100e4:	f5018513          	add	a0,gp,-176 # 1d760 <c_x0>
   100e8:	22f9                	jal	102b6 <predict_if_mul_overflow>
   100ea:	85aa                	mv	a1,a0
   100ec:	db840513          	add	a0,s0,-584
   100f0:	01b000ef          	jal	1090a <printf>
   100f4:	f3818593          	add	a1,gp,-200 # 1d748 <d_x1>
   100f8:	f4018513          	add	a0,gp,-192 # 1d750 <d_x0>
   100fc:	2a6d                	jal	102b6 <predict_if_mul_overflow>
   100fe:	85aa                	mv	a1,a0
   10100:	db840513          	add	a0,s0,-584
   10104:	007000ef          	jal	1090a <printf>
   10108:	40b2                	lw	ra,12(sp)
   1010a:	4422                	lw	s0,8(sp)
   1010c:	4501                	li	a0,0
   1010e:	0141                	add	sp,sp,16
   10110:	8082                	ret
       
000102b6 <predict_if_mul_overflow>:
   102b6:	00452883          	lw	a7,4(a0)
   102ba:	0045a803          	lw	a6,4(a1)
   102be:	4108                	lw	a0,0(a0)
   102c0:	418c                	lw	a1,0(a1)
   102c2:	01f89613          	sll	a2,a7,0x1f
   102c6:	01f81693          	sll	a3,a6,0x1f
   102ca:	00155713          	srl	a4,a0,0x1
   102ce:	0015d793          	srl	a5,a1,0x1
   102d2:	8f51                	or	a4,a4,a2
   102d4:	8fd5                	or	a5,a5,a3
   102d6:	0018d613          	srl	a2,a7,0x1
   102da:	00185693          	srl	a3,a6,0x1
   102de:	01166633          	or	a2,a2,a7
   102e2:	0106e6b3          	or	a3,a3,a6
   102e6:	8d59                	or	a0,a0,a4
   102e8:	8ddd                	or	a1,a1,a5
   102ea:	01e61893          	sll	a7,a2,0x1e
   102ee:	01e69813          	sll	a6,a3,0x1e
   102f2:	00255713          	srl	a4,a0,0x2
   102f6:	0025d793          	srl	a5,a1,0x2
   102fa:	00e8e733          	or	a4,a7,a4
   102fe:	00f867b3          	or	a5,a6,a5
   10302:	00265893          	srl	a7,a2,0x2
   10306:	0026d813          	srl	a6,a3,0x2
   1030a:	01166633          	or	a2,a2,a7
   1030e:	0106e6b3          	or	a3,a3,a6
   10312:	8d59                	or	a0,a0,a4
   10314:	8ddd                	or	a1,a1,a5
   10316:	01c61893          	sll	a7,a2,0x1c
   1031a:	01c69813          	sll	a6,a3,0x1c
   1031e:	00455713          	srl	a4,a0,0x4
   10322:	0045d793          	srl	a5,a1,0x4
   10326:	00e8e733          	or	a4,a7,a4
   1032a:	00f867b3          	or	a5,a6,a5
   1032e:	00465893          	srl	a7,a2,0x4
   10332:	0046d813          	srl	a6,a3,0x4
   10336:	01166633          	or	a2,a2,a7
   1033a:	0106e6b3          	or	a3,a3,a6
   1033e:	8d59                	or	a0,a0,a4
   10340:	8ddd                	or	a1,a1,a5
   10342:	01861893          	sll	a7,a2,0x18
   10346:	01869813          	sll	a6,a3,0x18
   1034a:	00855713          	srl	a4,a0,0x8
   1034e:	0085d793          	srl	a5,a1,0x8
   10352:	00e8e733          	or	a4,a7,a4
   10356:	00f867b3          	or	a5,a6,a5
   1035a:	00865893          	srl	a7,a2,0x8
   1035e:	0086d813          	srl	a6,a3,0x8
   10362:	01166633          	or	a2,a2,a7
   10366:	0106e6b3          	or	a3,a3,a6
   1036a:	8d59                	or	a0,a0,a4
   1036c:	8ddd                	or	a1,a1,a5
   1036e:	01061893          	sll	a7,a2,0x10
   10372:	01069813          	sll	a6,a3,0x10
   10376:	01055713          	srl	a4,a0,0x10
   1037a:	0105d793          	srl	a5,a1,0x10
   1037e:	00e8e733          	or	a4,a7,a4
   10382:	00f867b3          	or	a5,a6,a5
   10386:	01065893          	srl	a7,a2,0x10
   1038a:	0106d813          	srl	a6,a3,0x10
   1038e:	01166633          	or	a2,a2,a7
   10392:	0106e6b3          	or	a3,a3,a6
   10396:	8d59                	or	a0,a0,a4
   10398:	8ddd                	or	a1,a1,a5
   1039a:	8d51                	or	a0,a0,a2
   1039c:	8dd5                	or	a1,a1,a3
   1039e:	01f61313          	sll	t1,a2,0x1f
   103a2:	01f69893          	sll	a7,a3,0x1f
   103a6:	00155713          	srl	a4,a0,0x1
   103aa:	0015d793          	srl	a5,a1,0x1
   103ae:	55555837          	lui	a6,0x55555
   103b2:	55580813          	add	a6,a6,1365 # 55555555 <__BSS_END__+0x555377b9>
   103b6:	00e36733          	or	a4,t1,a4
   103ba:	00f8e7b3          	or	a5,a7,a5
   103be:	00165313          	srl	t1,a2,0x1
   103c2:	0016d893          	srl	a7,a3,0x1
   103c6:	01077733          	and	a4,a4,a6
   103ca:	0107f7b3          	and	a5,a5,a6
   103ce:	40e50733          	sub	a4,a0,a4
   103d2:	40f587b3          	sub	a5,a1,a5
   103d6:	01037333          	and	t1,t1,a6
   103da:	0108f833          	and	a6,a7,a6
   103de:	00e53533          	sltu	a0,a0,a4
   103e2:	00f5b5b3          	sltu	a1,a1,a5
   103e6:	40660633          	sub	a2,a2,t1
   103ea:	410686b3          	sub	a3,a3,a6
   103ee:	8e09                	sub	a2,a2,a0
   103f0:	8e8d                	sub	a3,a3,a1
   103f2:	01e61313          	sll	t1,a2,0x1e
   103f6:	01e69893          	sll	a7,a3,0x1e
   103fa:	00275513          	srl	a0,a4,0x2
   103fe:	0027d593          	srl	a1,a5,0x2
   10402:	33333837          	lui	a6,0x33333
   10406:	33380813          	add	a6,a6,819 # 33333333 <__BSS_END__+0x33315597>
   1040a:	00a36533          	or	a0,t1,a0
   1040e:	00b8e5b3          	or	a1,a7,a1
   10412:	01057533          	and	a0,a0,a6
   10416:	0105f5b3          	and	a1,a1,a6
   1041a:	00265313          	srl	t1,a2,0x2
   1041e:	01077733          	and	a4,a4,a6
   10422:	0026d893          	srl	a7,a3,0x2
   10426:	0107f7b3          	and	a5,a5,a6
   1042a:	972a                	add	a4,a4,a0
   1042c:	97ae                	add	a5,a5,a1
   1042e:	01037333          	and	t1,t1,a6
   10432:	0108f8b3          	and	a7,a7,a6
   10436:	01067633          	and	a2,a2,a6
   1043a:	0106f6b3          	and	a3,a3,a6
   1043e:	961a                	add	a2,a2,t1
   10440:	96c6                	add	a3,a3,a7
   10442:	00a73533          	sltu	a0,a4,a0
   10446:	00b7b5b3          	sltu	a1,a5,a1
   1044a:	9532                	add	a0,a0,a2
   1044c:	95b6                	add	a1,a1,a3
   1044e:	01c51893          	sll	a7,a0,0x1c
   10452:	01c59813          	sll	a6,a1,0x1c
   10456:	00475613          	srl	a2,a4,0x4
   1045a:	0047d693          	srl	a3,a5,0x4
   1045e:	00c8e633          	or	a2,a7,a2
   10462:	00d866b3          	or	a3,a6,a3
   10466:	9732                	add	a4,a4,a2
   10468:	97b6                	add	a5,a5,a3
   1046a:	00455893          	srl	a7,a0,0x4
   1046e:	0045d813          	srl	a6,a1,0x4
   10472:	98aa                	add	a7,a7,a0
   10474:	95c2                	add	a1,a1,a6
   10476:	00c73633          	sltu	a2,a4,a2
   1047a:	00d7b6b3          	sltu	a3,a5,a3
   1047e:	0f0f1537          	lui	a0,0xf0f1
   10482:	f0f50513          	add	a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d3173>
   10486:	9646                	add	a2,a2,a7
   10488:	96ae                	add	a3,a3,a1
   1048a:	8e69                	and	a2,a2,a0
   1048c:	8fe9                	and	a5,a5,a0
   1048e:	8ee9                	and	a3,a3,a0
   10490:	8f69                	and	a4,a4,a0
   10492:	01861893          	sll	a7,a2,0x18
   10496:	01869813          	sll	a6,a3,0x18
   1049a:	00875513          	srl	a0,a4,0x8
   1049e:	0087d593          	srl	a1,a5,0x8
   104a2:	00a8e533          	or	a0,a7,a0
   104a6:	00b865b3          	or	a1,a6,a1
   104aa:	953a                	add	a0,a0,a4
   104ac:	95be                	add	a1,a1,a5
   104ae:	00865893          	srl	a7,a2,0x8
   104b2:	0086d813          	srl	a6,a3,0x8
   104b6:	9646                	add	a2,a2,a7
   104b8:	96c2                	add	a3,a3,a6
   104ba:	00e53733          	sltu	a4,a0,a4
   104be:	00f5b7b3          	sltu	a5,a1,a5
   104c2:	9732                	add	a4,a4,a2
   104c4:	97b6                	add	a5,a5,a3
   104c6:	01071893          	sll	a7,a4,0x10
   104ca:	01079813          	sll	a6,a5,0x10
   104ce:	01055693          	srl	a3,a0,0x10
   104d2:	0105d613          	srl	a2,a1,0x10
   104d6:	00d8e6b3          	or	a3,a7,a3
   104da:	00c86633          	or	a2,a6,a2
   104de:	96aa                	add	a3,a3,a0
   104e0:	962e                	add	a2,a2,a1
   104e2:	01075893          	srl	a7,a4,0x10
   104e6:	0107d813          	srl	a6,a5,0x10
   104ea:	9746                	add	a4,a4,a7
   104ec:	97c2                	add	a5,a5,a6
   104ee:	00a6b533          	sltu	a0,a3,a0
   104f2:	00b635b3          	sltu	a1,a2,a1
   104f6:	95be                	add	a1,a1,a5
   104f8:	953a                	add	a0,a0,a4
   104fa:	9536                	add	a0,a0,a3
   104fc:	962e                	add	a2,a2,a1
   104fe:	04000793          	li	a5,64
   10502:	07f67613          	and	a2,a2,127
   10506:	07f57513          	and	a0,a0,127
   1050a:	40a78533          	sub	a0,a5,a0
   1050e:	8f91                	sub	a5,a5,a2
   10510:	0542                	sll	a0,a0,0x10
   10512:	07c2                	sll	a5,a5,0x10
   10514:	04000713          	li	a4,64
   10518:	83c1                	srl	a5,a5,0x10
   1051a:	8141                	srl	a0,a0,0x10
   1051c:	40a70533          	sub	a0,a4,a0
   10520:	8f1d                	sub	a4,a4,a5
   10522:	953a                	add	a0,a0,a4
   10524:	04052513          	slti	a0,a0,64
   10528:	00154513          	xor	a0,a0,1
   1052c:	8082                	ret
  • Instruction Frequency histogram
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
  1. cmv       10.38% [1782      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
  2. clwsp      8.50% [1459      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
  3. addi       7.28% [1250      ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍
  4. cli        7.06% [1212      ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋
  5. cswsp      6.62% [1137      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎
  6. caddi      4.35% [747       ] █████████████████████████████████████████████████████████████████████████▊
  7. cj         3.95% [679       ] ███████████████████████████████████████████████████████████████████
  8. jal        3.69% [633       ] ██████████████████████████████████████████████████████████████▌
  9. beq        2.85% [489       ] ████████████████████████████████████████████████▎
 10. cadd       2.47% [425       ] █████████████████████████████████████████▉
 11. sw         2.39% [410       ] ████████████████████████████████████████▍
 12. clw        2.31% [397       ] ███████████████████████████████████████▏
 13. lw         2.21% [379       ] █████████████████████████████████████▍
 14. andi       2.01% [345       ] ██████████████████████████████████
 15. bne        1.93% [332       ] ████████████████████████████████▊
 16. cbeqz      1.88% [322       ] ███████████████████████████████▊
 17. cjr        1.85% [318       ] ███████████████████████████████▍
 18. csw        1.71% [294       ] █████████████████████████████
 19. sub        1.50% [258       ] █████████████████████████▍
 20. bge        1.28% [220       ] █████████████████████▋
 21. lbu        1.27% [218       ] █████████████████████▌
 22. srli       1.27% [218       ] █████████████████████▌
 23. auipc      1.22% [210       ] ████████████████████▋
 24. or         1.21% [208       ] ████████████████████▌
 25. blt        1.19% [205       ] ████████████████████▏
 26. slli       1.19% [204       ] ████████████████████▏
 27. cbnez      1.12% [193       ] ███████████████████
 28. cslli      1.09% [187       ] ██████████████████▍
 29. cor        1.01% [174       ] █████████████████▏

-Os

  • Size
$ riscv-none-elf-size source_Os.elf 
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_Os.elf
    text  data	    bss	    dec	    hex	filename
  75784	   2372	   1548	  79704	  13758	source_Os.elf
  • ELF header
$ riscv-none-elf-readelf -h source_Os.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_Os.elf
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x1015c
  Start of program headers:          52 (bytes into file)
  Start of section headers:          94788 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

  • Save txt
$ riscv-none-elf-objdump -d source_Os.elf >./text/source_Os.txt
  • Disassembly code
    With only main and predict_if_mul_overflow
main and predict_if_mul_overflow
000100b0 <main>:
   100b0:	1141                	add	sp,sp,-16
   100b2:	f8818593          	add	a1,gp,-120 # 1d798 <a_x1>
   100b6:	f9018513          	add	a0,gp,-112 # 1d7a0 <a_x0>
   100ba:	c606                	sw	ra,12(sp)
   100bc:	c422                	sw	s0,8(sp)
   100be:	2ac5                	jal	102ae <predict_if_mul_overflow>
   100c0:	6471                	lui	s0,0x1c
   100c2:	85aa                	mv	a1,a0
   100c4:	b7040513          	add	a0,s0,-1168 # 1bb70 <__clzsi2+0x6e>
   100c8:	2be5                	jal	106c0 <printf>
   100ca:	f5818593          	add	a1,gp,-168 # 1d768 <b_x1>
   100ce:	f6018513          	add	a0,gp,-160 # 1d770 <b_x0>
   100d2:	2af1                	jal	102ae <predict_if_mul_overflow>
   100d4:	85aa                	mv	a1,a0
   100d6:	b7040513          	add	a0,s0,-1168
   100da:	23dd                	jal	106c0 <printf>
   100dc:	f4818593          	add	a1,gp,-184 # 1d758 <c_x1>
   100e0:	f5018513          	add	a0,gp,-176 # 1d760 <c_x0>
   100e4:	22e9                	jal	102ae <predict_if_mul_overflow>
   100e6:	85aa                	mv	a1,a0
   100e8:	b7040513          	add	a0,s0,-1168
   100ec:	2bd1                	jal	106c0 <printf>
   100ee:	f3818593          	add	a1,gp,-200 # 1d748 <d_x1>
   100f2:	f4018513          	add	a0,gp,-192 # 1d750 <d_x0>
   100f6:	2a65                	jal	102ae <predict_if_mul_overflow>
   100f8:	85aa                	mv	a1,a0
   100fa:	b7040513          	add	a0,s0,-1168
   100fe:	23c9                	jal	106c0 <printf>
   10100:	40b2                	lw	ra,12(sp)
   10102:	4422                	lw	s0,8(sp)
   10104:	4501                	li	a0,0
   10106:	0141                	add	sp,sp,16
   10108:	8082                	ret
       
000102ae <predict_if_mul_overflow>:
   102ae:	1141                	add	sp,sp,-16
   102b0:	c226                	sw	s1,4(sp)
   102b2:	84ae                	mv	s1,a1
   102b4:	414c                	lw	a1,4(a0)
   102b6:	4108                	lw	a0,0(a0)
   102b8:	c606                	sw	ra,12(sp)
   102ba:	c422                	sw	s0,8(sp)
   102bc:	35c5                	jal	1019c <count_leading_zeros>
   102be:	40cc                	lw	a1,4(s1)
   102c0:	842a                	mv	s0,a0
   102c2:	4088                	lw	a0,0(s1)
   102c4:	3de1                	jal	1019c <count_leading_zeros>
   102c6:	04000793          	li	a5,64
   102ca:	40878433          	sub	s0,a5,s0
   102ce:	8f89                	sub	a5,a5,a0
   102d0:	943e                	add	s0,s0,a5
   102d2:	40b2                	lw	ra,12(sp)
   102d4:	04042513          	slti	a0,s0,64
   102d8:	4422                	lw	s0,8(sp)
   102da:	4492                	lw	s1,4(sp)
   102dc:	00154513          	xor	a0,a0,1
   102e0:	0141                	add	sp,sp,16
   102e2:	8082                	ret
  • Instruction Frequency histogram
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
  1. cmv       10.49% [1784      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
  2. clwsp      8.59% [1462      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏
  3. addi       7.33% [1246      ] ██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉
  4. cli        7.13% [1212      ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌
  5. cswsp      6.70% [1140      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍
  6. caddi      4.40% [749       ] █████████████████████████████████████████████████████████████████████████▉
  7. cj         3.99% [679       ] ██████████████████████████████████████████████████████████████████▉
  8. jal        3.70% [630       ] ██████████████████████████████████████████████████████████████▏
  9. beq        2.87% [489       ] ████████████████████████████████████████████████▏
 10. sw         2.41% [410       ] ████████████████████████████████████████▍
 11. clw        2.35% [399       ] ███████████████████████████████████████▎
 12. cadd       2.35% [399       ] ███████████████████████████████████████▎
 13. lw         2.22% [377       ] █████████████████████████████████████▏
 14. andi       2.02% [343       ] █████████████████████████████████▊
 15. bne        1.95% [332       ] ████████████████████████████████▊
 16. cbeqz      1.89% [322       ] ███████████████████████████████▊
 17. cjr        1.87% [318       ] ███████████████████████████████▎
 18. csw        1.73% [294       ] █████████████████████████████
 19. sub        1.49% [253       ] ████████████████████████▉
 20. bge        1.29% [220       ] █████████████████████▋
 21. lbu        1.28% [218       ] █████████████████████▌
 22. auipc      1.23% [210       ] ████████████████████▋
 23. blt        1.21% [205       ] ████████████████████▏
 24. cbnez      1.13% [193       ] ███████████████████
 25. cslli      1.09% [185       ] ██████████████████▎
 26. slli       1.08% [184       ] ██████████████████▏
 27. or         1.06% [180       ] █████████████████▊
 28. srli       1.05% [178       ] █████████████████▌

-Ofast

  • Size
$ riscv-none-elf-size source_Ofast.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_Ofast.elf 
   text	   data	    bss	    dec	    hex	filename
  76436	   2372	   1548	  80356	  139e4	source_Ofast.elf
  • ELF header
$ riscv-none-elf-readelf -h source_Ofast.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_Ofast.elf
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           RISC-V
  Version:                           0x1
  Entry point address:               0x1015c
  Start of program headers:          52 (bytes into file)
  Start of section headers:          94788 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         3
  Size of section headers:           40 (bytes)
  Number of section headers:         15
  Section header string table index: 14

  • Save txt
$ riscv-none-elf-objdump -d source_Ofast.elf >./text/source_Ofast.txt
  • Disassembly code
    With only main and predict_if_mul_overflow
main and predict_if_mul_overflow
000100b0 <main>:
   100b0:	1141                	add	sp,sp,-16
   100b2:	f8818593          	add	a1,gp,-120 # 1d798 <a_x1>
   100b6:	f9018513          	add	a0,gp,-112 # 1d7a0 <a_x0>
   100ba:	c606                	sw	ra,12(sp)
   100bc:	c422                	sw	s0,8(sp)
   100be:	2ae5                	jal	102b6 <predict_if_mul_overflow>
   100c0:	6471                	lui	s0,0x1c
   100c2:	85aa                	mv	a1,a0
   100c4:	db840513          	add	a0,s0,-584 # 1bdb8 <__clzsi2+0x6e>
   100c8:	043000ef          	jal	1090a <printf>
   100cc:	f5818593          	add	a1,gp,-168 # 1d768 <b_x1>
   100d0:	f6018513          	add	a0,gp,-160 # 1d770 <b_x0>
   100d4:	22cd                	jal	102b6 <predict_if_mul_overflow>
   100d6:	85aa                	mv	a1,a0
   100d8:	db840513          	add	a0,s0,-584
   100dc:	02f000ef          	jal	1090a <printf>
   100e0:	f4818593          	add	a1,gp,-184 # 1d758 <c_x1>
   100e4:	f5018513          	add	a0,gp,-176 # 1d760 <c_x0>
   100e8:	22f9                	jal	102b6 <predict_if_mul_overflow>
   100ea:	85aa                	mv	a1,a0
   100ec:	db840513          	add	a0,s0,-584
   100f0:	01b000ef          	jal	1090a <printf>
   100f4:	f3818593          	add	a1,gp,-200 # 1d748 <d_x1>
   100f8:	f4018513          	add	a0,gp,-192 # 1d750 <d_x0>
   100fc:	2a6d                	jal	102b6 <predict_if_mul_overflow>
   100fe:	85aa                	mv	a1,a0
   10100:	db840513          	add	a0,s0,-584
   10104:	007000ef          	jal	1090a <printf>
   10108:	40b2                	lw	ra,12(sp)
   1010a:	4422                	lw	s0,8(sp)
   1010c:	4501                	li	a0,0
   1010e:	0141                	add	sp,sp,16
   10110:	8082                	ret

000102b6 <predict_if_mul_overflow>:
   102b6:	00452883          	lw	a7,4(a0)
   102ba:	0045a803          	lw	a6,4(a1)
   102be:	4108                	lw	a0,0(a0)
   102c0:	418c                	lw	a1,0(a1)
   102c2:	01f89613          	sll	a2,a7,0x1f
   102c6:	01f81693          	sll	a3,a6,0x1f
   102ca:	00155713          	srl	a4,a0,0x1
   102ce:	0015d793          	srl	a5,a1,0x1
   102d2:	8f51                	or	a4,a4,a2
   102d4:	8fd5                	or	a5,a5,a3
   102d6:	0018d613          	srl	a2,a7,0x1
   102da:	00185693          	srl	a3,a6,0x1
   102de:	01166633          	or	a2,a2,a7
   102e2:	0106e6b3          	or	a3,a3,a6
   102e6:	8d59                	or	a0,a0,a4
   102e8:	8ddd                	or	a1,a1,a5
   102ea:	01e61893          	sll	a7,a2,0x1e
   102ee:	01e69813          	sll	a6,a3,0x1e
   102f2:	00255713          	srl	a4,a0,0x2
   102f6:	0025d793          	srl	a5,a1,0x2
   102fa:	00e8e733          	or	a4,a7,a4
   102fe:	00f867b3          	or	a5,a6,a5
   10302:	00265893          	srl	a7,a2,0x2
   10306:	0026d813          	srl	a6,a3,0x2
   1030a:	01166633          	or	a2,a2,a7
   1030e:	0106e6b3          	or	a3,a3,a6
   10312:	8d59                	or	a0,a0,a4
   10314:	8ddd                	or	a1,a1,a5
   10316:	01c61893          	sll	a7,a2,0x1c
   1031a:	01c69813          	sll	a6,a3,0x1c
   1031e:	00455713          	srl	a4,a0,0x4
   10322:	0045d793          	srl	a5,a1,0x4
   10326:	00e8e733          	or	a4,a7,a4
   1032a:	00f867b3          	or	a5,a6,a5
   1032e:	00465893          	srl	a7,a2,0x4
   10332:	0046d813          	srl	a6,a3,0x4
   10336:	01166633          	or	a2,a2,a7
   1033a:	0106e6b3          	or	a3,a3,a6
   1033e:	8d59                	or	a0,a0,a4
   10340:	8ddd                	or	a1,a1,a5
   10342:	01861893          	sll	a7,a2,0x18
   10346:	01869813          	sll	a6,a3,0x18
   1034a:	00855713          	srl	a4,a0,0x8
   1034e:	0085d793          	srl	a5,a1,0x8
   10352:	00e8e733          	or	a4,a7,a4
   10356:	00f867b3          	or	a5,a6,a5
   1035a:	00865893          	srl	a7,a2,0x8
   1035e:	0086d813          	srl	a6,a3,0x8
   10362:	01166633          	or	a2,a2,a7
   10366:	0106e6b3          	or	a3,a3,a6
   1036a:	8d59                	or	a0,a0,a4
   1036c:	8ddd                	or	a1,a1,a5
   1036e:	01061893          	sll	a7,a2,0x10
   10372:	01069813          	sll	a6,a3,0x10
   10376:	01055713          	srl	a4,a0,0x10
   1037a:	0105d793          	srl	a5,a1,0x10
   1037e:	00e8e733          	or	a4,a7,a4
   10382:	00f867b3          	or	a5,a6,a5
   10386:	01065893          	srl	a7,a2,0x10
   1038a:	0106d813          	srl	a6,a3,0x10
   1038e:	01166633          	or	a2,a2,a7
   10392:	0106e6b3          	or	a3,a3,a6
   10396:	8d59                	or	a0,a0,a4
   10398:	8ddd                	or	a1,a1,a5
   1039a:	8d51                	or	a0,a0,a2
   1039c:	8dd5                	or	a1,a1,a3
   1039e:	01f61313          	sll	t1,a2,0x1f
   103a2:	01f69893          	sll	a7,a3,0x1f
   103a6:	00155713          	srl	a4,a0,0x1
   103aa:	0015d793          	srl	a5,a1,0x1
   103ae:	55555837          	lui	a6,0x55555
   103b2:	55580813          	add	a6,a6,1365 # 55555555 <__BSS_END__+0x555377b9>
   103b6:	00e36733          	or	a4,t1,a4
   103ba:	00f8e7b3          	or	a5,a7,a5
   103be:	00165313          	srl	t1,a2,0x1
   103c2:	0016d893          	srl	a7,a3,0x1
   103c6:	01077733          	and	a4,a4,a6
   103ca:	0107f7b3          	and	a5,a5,a6
   103ce:	40e50733          	sub	a4,a0,a4
   103d2:	40f587b3          	sub	a5,a1,a5
   103d6:	01037333          	and	t1,t1,a6
   103da:	0108f833          	and	a6,a7,a6
   103de:	00e53533          	sltu	a0,a0,a4
   103e2:	00f5b5b3          	sltu	a1,a1,a5
   103e6:	40660633          	sub	a2,a2,t1
   103ea:	410686b3          	sub	a3,a3,a6
   103ee:	8e09                	sub	a2,a2,a0
   103f0:	8e8d                	sub	a3,a3,a1
   103f2:	01e61313          	sll	t1,a2,0x1e
   103f6:	01e69893          	sll	a7,a3,0x1e
   103fa:	00275513          	srl	a0,a4,0x2
   103fe:	0027d593          	srl	a1,a5,0x2
   10402:	33333837          	lui	a6,0x33333
   10406:	33380813          	add	a6,a6,819 # 33333333 <__BSS_END__+0x33315597>
   1040a:	00a36533          	or	a0,t1,a0
   1040e:	00b8e5b3          	or	a1,a7,a1
   10412:	01057533          	and	a0,a0,a6
   10416:	0105f5b3          	and	a1,a1,a6
   1041a:	00265313          	srl	t1,a2,0x2
   1041e:	01077733          	and	a4,a4,a6
   10422:	0026d893          	srl	a7,a3,0x2
   10426:	0107f7b3          	and	a5,a5,a6
   1042a:	972a                	add	a4,a4,a0
   1042c:	97ae                	add	a5,a5,a1
   1042e:	01037333          	and	t1,t1,a6
   10432:	0108f8b3          	and	a7,a7,a6
   10436:	01067633          	and	a2,a2,a6
   1043a:	0106f6b3          	and	a3,a3,a6
   1043e:	961a                	add	a2,a2,t1
   10440:	96c6                	add	a3,a3,a7
   10442:	00a73533          	sltu	a0,a4,a0
   10446:	00b7b5b3          	sltu	a1,a5,a1
   1044a:	9532                	add	a0,a0,a2
   1044c:	95b6                	add	a1,a1,a3
   1044e:	01c51893          	sll	a7,a0,0x1c
   10452:	01c59813          	sll	a6,a1,0x1c
   10456:	00475613          	srl	a2,a4,0x4
   1045a:	0047d693          	srl	a3,a5,0x4
   1045e:	00c8e633          	or	a2,a7,a2
   10462:	00d866b3          	or	a3,a6,a3
   10466:	9732                	add	a4,a4,a2
   10468:	97b6                	add	a5,a5,a3
   1046a:	00455893          	srl	a7,a0,0x4
   1046e:	0045d813          	srl	a6,a1,0x4
   10472:	98aa                	add	a7,a7,a0
   10474:	95c2                	add	a1,a1,a6
   10476:	00c73633          	sltu	a2,a4,a2
   1047a:	00d7b6b3          	sltu	a3,a5,a3
   1047e:	0f0f1537          	lui	a0,0xf0f1
   10482:	f0f50513          	add	a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d3173>
   10486:	9646                	add	a2,a2,a7
   10488:	96ae                	add	a3,a3,a1
   1048a:	8e69                	and	a2,a2,a0
   1048c:	8fe9                	and	a5,a5,a0
   1048e:	8ee9                	and	a3,a3,a0
   10490:	8f69                	and	a4,a4,a0
   10492:	01861893          	sll	a7,a2,0x18
   10496:	01869813          	sll	a6,a3,0x18
   1049a:	00875513          	srl	a0,a4,0x8
   1049e:	0087d593          	srl	a1,a5,0x8
   104a2:	00a8e533          	or	a0,a7,a0
   104a6:	00b865b3          	or	a1,a6,a1
   104aa:	953a                	add	a0,a0,a4
   104ac:	95be                	add	a1,a1,a5
   104ae:	00865893          	srl	a7,a2,0x8
   104b2:	0086d813          	srl	a6,a3,0x8
   104b6:	9646                	add	a2,a2,a7
   104b8:	96c2                	add	a3,a3,a6
   104ba:	00e53733          	sltu	a4,a0,a4
   104be:	00f5b7b3          	sltu	a5,a1,a5
   104c2:	9732                	add	a4,a4,a2
   104c4:	97b6                	add	a5,a5,a3
   104c6:	01071893          	sll	a7,a4,0x10
   104ca:	01079813          	sll	a6,a5,0x10
   104ce:	01055693          	srl	a3,a0,0x10
   104d2:	0105d613          	srl	a2,a1,0x10
   104d6:	00d8e6b3          	or	a3,a7,a3
   104da:	00c86633          	or	a2,a6,a2
   104de:	96aa                	add	a3,a3,a0
   104e0:	962e                	add	a2,a2,a1
   104e2:	01075893          	srl	a7,a4,0x10
   104e6:	0107d813          	srl	a6,a5,0x10
   104ea:	9746                	add	a4,a4,a7
   104ec:	97c2                	add	a5,a5,a6
   104ee:	00a6b533          	sltu	a0,a3,a0
   104f2:	00b635b3          	sltu	a1,a2,a1
   104f6:	95be                	add	a1,a1,a5
   104f8:	953a                	add	a0,a0,a4
   104fa:	9536                	add	a0,a0,a3
   104fc:	962e                	add	a2,a2,a1
   104fe:	04000793          	li	a5,64
   10502:	07f67613          	and	a2,a2,127
   10506:	07f57513          	and	a0,a0,127
   1050a:	40a78533          	sub	a0,a5,a0
   1050e:	8f91                	sub	a5,a5,a2
   10510:	0542                	sll	a0,a0,0x10
   10512:	07c2                	sll	a5,a5,0x10
   10514:	04000713          	li	a4,64
   10518:	83c1                	srl	a5,a5,0x10
   1051a:	8141                	srl	a0,a0,0x10
   1051c:	40a70533          	sub	a0,a4,a0
   10520:	8f1d                	sub	a4,a4,a5
   10522:	953a                	add	a0,a0,a4
   10524:	04052513          	slti	a0,a0,64
   10528:	00154513          	xor	a0,a0,1
   1052c:	8082                	ret
  • Instruction Frequency histogram
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
  1. cmv       10.38% [1782      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
  2. clwsp      8.50% [1459      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
  3. addi       7.28% [1250      ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍
  4. cli        7.06% [1212      ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋
  5. cswsp      6.62% [1137      ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎
  6. caddi      4.35% [747       ] █████████████████████████████████████████████████████████████████████████▊
  7. cj         3.95% [679       ] ███████████████████████████████████████████████████████████████████
  8. jal        3.69% [633       ] ██████████████████████████████████████████████████████████████▌
  9. beq        2.85% [489       ] ████████████████████████████████████████████████▎
 10. cadd       2.47% [425       ] █████████████████████████████████████████▉
 11. sw         2.39% [410       ] ████████████████████████████████████████▍
 12. clw        2.31% [397       ] ███████████████████████████████████████▏
 13. lw         2.21% [379       ] █████████████████████████████████████▍
 14. andi       2.01% [345       ] ██████████████████████████████████
 15. bne        1.93% [332       ] ████████████████████████████████▊
 16. cbeqz      1.88% [322       ] ███████████████████████████████▊
 17. cjr        1.85% [318       ] ███████████████████████████████▍
 18. csw        1.71% [294       ] █████████████████████████████
 19. sub        1.50% [258       ] █████████████████████████▍
 20. bge        1.28% [220       ] █████████████████████▋
 21. lbu        1.27% [218       ] █████████████████████▌
 22. srli       1.27% [218       ] █████████████████████▌
 23. auipc      1.22% [210       ] ████████████████████▋
 24. or         1.21% [208       ] ████████████████████▌
 25. blt        1.19% [205       ] ████████████████████▏
 26. slli       1.19% [204       ] ████████████████████▏
 27. cbnez      1.12% [193       ] ███████████████████
 28. cslli      1.09% [187       ] ██████████████████▍
 29. cor        1.01% [174       ] █████████████████▏

Observation

Based on the results above, we can see that the number of text used in the executable file compiled directly from assembly language is comparatively more concise than the one compiled from the C language. This result may lead us to consider why assembly language can produce more streamlined executable files. Here, we will discuss some possible reasons:
The conciseness of assembly language may stem from its directness, manual optimization, compact instruction set, and higher level of customized control. However, it also requires more time and effort as it lacks the abstractions and conveniences of high-level languages. Therefore, when choosing a programming language, one should make a wise choice based on project requirements and time constraints.

Reference

Computer Architecture HW2
Assignment1: RISC-V Assembly and Instruction Pipeline
rv32emu
編譯器和最佳化原理篇