contributed by <bclegend
>
Lab2: RISC-V RV32I[MACF] emulator with ELF support
I chose the question from 洪佑杭's Assignment1 Multiplication overflow prediction for unsigned int using CLZ.
I chose this question because I used to confuse about the use of CLZ(Count leading zeros),And I notice that the predict of overflow in integer multiplicator will be a good use of CLZ, and I have some interest in multiplicator for interger.
#include <stdint.h>
#include <stdbool.h>
// test case a: no overflow, predict result is false
uint64_t a_x0 = 0x0000000000000000;
uint64_t a_x1 = 0x0000000000000000;
// test case b: no overflow, predict result is false
uint64_t b_x0 = 0x0000000000000001;
uint64_t b_x1 = 0x0000000000000010;
// test case c: no overflow, but predict result is true
uint64_t c_x0 = 0x0000000000000002;
uint64_t c_x1 = 0x4000000000000000;
// test case d: overflow, and predict result is true
uint64_t d_x0 = 0x0000000000000003;
uint64_t d_x1 = 0x7FFFFFFFFFFFFFFF;
uint16_t count_leading_zeros(uint64_t x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
x |= (x >> 32);
/* count ones (population count) */
x -= ((x >> 1) & 0x5555555555555555);
x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333);
x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0f;
x += (x >> 8);
x += (x >> 16);
x += (x >> 32);
return (64 - (x & 0x7f));
}
bool predict_if_mul_overflow(uint64_t *x0, uint64_t *x1)
{
int32_t exp_x0 = 63 - (int32_t)count_leading_zeros(*x0);
int32_t exp_x1 = 63 - (int32_t)count_leading_zeros(*x1);
if ((exp_x0 + 1) + (exp_x1 + 1) >= 64)
return true;
else
return false;
}
void main()
{
printf("%d\n", predict_if_mul_overflow(&a_x0, &a_x1));
printf("%d\n", predict_if_mul_overflow(&b_x0, &b_x1));
printf("%d\n", predict_if_mul_overflow(&c_x0, &c_x1));
printf("%d\n", predict_if_mul_overflow(&d_x0, &d_x1));
return;
}
.data
# will not overflow, and will predict as false
cmp_data_1: .dword 0x0000000000000000, 0x0000000000000000
# will not overflow, and will predict as false
cmp_data_2: .dword 0x0000000000000001, 0x0000000000000010
# will not overflow, but will predict as true
cmp_data_3: .dword 0x0000000000000002, 0x4000000000000000
# will overflow, and will predict as true
cmp_data_4: .dword 0x0000000000000003, 0x7FFFFFFFFFFFFFFF
.text
# assume little endian
main:
addi sp, sp, -16
# push four pointers of test data onto the stack
la t0, cmp_data_1
sw t0, 0(sp)
la t0, cmp_data_2
sw t0, 4(sp)
la t0, cmp_data_3
sw t0, 8(sp)
la t0, cmp_data_4
sw t0, 12(sp)
# for testing
#li a0, 0
#li a1, 0x00
#jal ra clz
#jal ra print_dec
#j exit
addi s0, zero, 4 # s0 is the goal iteration count
addi s1, zero, 0 # s1 is the counter
addi s2, sp, 0 # s2 now points to cmp_data_1
main_loop:
lw a0, 0(s2) # a0 stores the pointer to first data in cmp_data_x
addi a1, a0, 8 # a1 stores the pointer to second data in cmp_data_x
jal ra, pimo
li a7, 1 # tell ecall to print decimal
ecall # print result of pimo (which is in a0)
li a0, 32 # 32 is " " in ASCII
li a7, 11 # tell ecall to print char
ecall # print space
addi s2, s2, 4 # s2 points to next cmp_data_x
addi s1, s1, 1 # counter++
bne s1, s0, main_loop
addi sp, sp, 16
j exit
# predict if multiplication overflow:
pimo:
addi sp, sp, -20
sw ra, 0(sp)
sw s0, 4(sp)
sw s1, 8(sp)
sw s2, 12(sp)
sw s3, 16(sp)
mv s0, a0 # s0 is address of x0
mv s1, a1 # s1 is address of x1
lw a0, 0(s0)
lw a1, 4(s0) # a0 a1 is now the value of x0
jal ra, clz
li s2, 63
sub s2, s2, a0 # s2 is now exp_x0
lw a0, 0(s1)
lw a1, 4(s1) # a1 a0 is now the value of x1
jal ra, clz
li s3, 63
sub s3, s3, a0 # s3 is now exp_x1
add s2, s2, s3
addi s2, s2, 2 # s2 is (exp_x0 + 1) + (exp_x1 + 1)
li t0, 64
bge s2, t0, pimo_ret_t
li a0, 0 # return false
j pimo_end
pimo_ret_t:
li a0, 1 # return true
pimo_end:
lw ra, 0(sp)
lw s0, 4(sp)
lw s1, 8(sp)
lw s2, 12(sp)
lw s3, 16(sp)
addi sp, sp, 20
ret
# count leading zeros
clz:
addi sp, sp, -4
sw ra, 0(sp)
# a0 a1 = x
bne a1, zero, clz_fill_ones_upper
clz_fill_ones_lower:
srli t0, a0, 1
or a0, a0, t0
srli t0, a0, 2
or a0, a0, t0
srli t0, a0, 4
or a0, a0, t0
srli t0, a0, 8
or a0, a0, t0
srli t0, a0, 16
or a0, a0, t0
j clz_fill_ones_end
clz_fill_ones_upper:
srli t1, a1, 1
or a1, a1, t1
srli t1, a1, 2
or a1, a1, t1
srli t1, a1, 4
or a1, a1, t1
srli t1, a1, 8
or a1, a1, t1
srli t1, a1, 16
or a1, a1, t1
li a0, 0xffffffff
clz_fill_ones_end:
# x -= ((x >> 1) & 0x5555555555555555);
srli t0, a0, 1
slli t1, a1, 31
or t0, t0, t1
srli t1, a1, 1 # t0 t1 = x >> 1
li t2, 0x55555555 # t2 is the mask
and t0, t0, t2
and t1, t1, t2 # t0 t1 = (x >> 1) & 0x5555555555555555
sltu t3, a0, t0 # t3 is the borrow bit
sub a0, a0, t0
sub a1, a1, t1
sub a1, a1, t3 # a0 a1 = x - (t0 t1)
# x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333);
srli t0, a0, 2
slli t1, a1, 30
or t0, t0, t1
srli t1, a1, 2 # t0 t1 = x >> 2
li t2, 0x33333333 # t2 is the mask
and t0, t0, t2
and t1, t1, t2 # t0 t1 = (x >> 2) & 0x3333333333333333
and t4, a0, t2
and t5, a1, t2 # t4 t5 = x & 0x3333333333333333
add a0, t0, t4
sltu t3, a0, t0 # t3 is the carry bit
add a1, t1, t5
add a1, a1, t3 # a0 a1 = (t0 t1) + (t4 t5)
# x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0f;
srli t0, a0, 4
slli t1, a1, 28
or t0, t0, t1
srli t1, a1, 4 # t0 t1 = x >> 4
add t0, t0, a0
sltu t3, t0, a0 # t3 is the carry bit
add t1, t1, a1
add t1, t1, t3 # t0 t1 = (x >> 4) + x
li t2, 0x0f0f0f0f # t2 is the mask
and a0, t0, t2
and a1, t1, t2 # a0 a1 = (t0 t1) & 0x0f0f0f0f0f0f0f0f
# x += (x >> 8);
srli t0, a0, 8
slli t1, a1, 24
or t0, t0, t1
srli t1, a1, 8 # t0 t1 = x >> 8
add a0, a0, t0
sltu t3, a0, t0 # t3 is the carry bit
add a1, a1, t1
add a1, a1, t3 # a0 a1 = x + (x >> 8)
# x += (x >> 16);
srli t0, a0, 16
slli t1, a1, 16
or t0, t0, t1
srli t1, a1, 16 # t0 t1 = x >> 16
add a0, a0, t0
sltu t3, a0, t0 # t3 is the carry bit
add a1, a1, t1
add a1, a1, t3 # a0 a1 = x + (x >> 16)
# x += (x >> 32);
mv t0, a1
mv t1, zero # t0 t1 = x >> 32
add a0, a0, t0
sltu t3, a0, t0 # t3 is the carry bit
add a1, a1, t1
add a1, a1, t3 # a0 a1 = x + (x >> 32)
# return (64 - (x & 0x7f));
andi a0, a0, 0x7f # a0 = (x & 0x7f)
li t0, 64
sub a0, t0, a0 # a0 = (64 - (x & 0x7f))
lw ra, 0(sp)
addi sp, sp, 4
ret
# util func
print_hex:
addi sp, sp, -4
sw ra, 0(sp)
li a7, 34
ecall # print value
li a0, 32 # 32 is " " in ASCII
li a7, 11
ecall # print space
lw ra, 0(sp)
addi sp, sp, 4
ret
print_dec:
addi sp, sp, -4
sw ra, 0(sp)
li a7, 1
ecall # print value
li a0, 32 # 32 is " " in ASCII
li a7, 11
ecall # print space
lw ra, 0(sp)
addi sp, sp, 4
ret
exit:
nop
This is his code's cycles in Ripes
Learn More →
I reduce some unuse part and modify this code fit in rv32emu.
.org 0
.global _start
/* newlib system calls */
.set STDOUT,1
.set SYSEXIT, 93
.set SYSWRITE, 64
.data
# will not overflow, and will predict as false
cmp_data_1: .dword 0x0000000000000000, 0x0000000000000000
# will not overflow, and will predict as false
cmp_data_2: .dword 0x0000000000000001, 0x0000000000000010
# will not overflow, but will predict as true
cmp_data_3: .dword 0x0000000000000002, 0x4000000000000000
# will overflow, and will predict as true
cmp_data_4: .dword 0x0000000000000003, 0x7FFFFFFFFFFFFFFF
nextline: .ascii "\n"
.set str_next_len, .-nextline
blank: .ascii " "
.set str_blank_len, .-blank
.text
# assume little endian
_start:
addi sp, sp, -16
# push four pointers of test data onto the stack
la t0, cmp_data_1
sw t0, 0(sp)
la t0, cmp_data_2
sw t0, 4(sp)
la t0, cmp_data_3
sw t0, 8(sp)
la t0, cmp_data_4
sw t0, 12(sp)
addi s0, zero, 4 # s0 is the goal iteration count
addi s1, zero, 0 # s1 is the counter
addi s2, sp, 0 # s2 now points to cmp_data_1
main_loop:
lw a0, 0(s2) # a0 stores the pointer to first data in cmp_data_x
addi a1, a0, 8 # a1 stores the pointer to second data in cmp_data_x
jal ra, pimo
### print for rv32emu
addi a1, a0, 48
addi sp, sp, -4
sw a1, 0(sp)
addi a1, sp, 0
li a7, SYSWRITE
li a0, STDOUT
li a2, 4
ecall
addi sp,sp,4
###
# printf("\n");
li a7, SYSWRITE
li a0, 1
la a1, nextline
li a2, 1
ecall
addi s2, s2, 4 # s2 points to next cmp_data_x
addi s1, s1, 1 # counter++
bne s1, s0, main_loop
addi sp, sp, 16
j exit
# predict if multiplication overflow:
pimo:
addi sp, sp, -20
sw ra, 0(sp)
sw s0, 4(sp)
sw s1, 8(sp)
sw s2, 12(sp)
sw s3, 16(sp)
mv s0, a0 # s0 is address of x0
mv s1, a1 # s1 is address of x1
lw a0, 0(s0)
lw a1, 4(s0) # a0 a1 is now the value of x0
jal ra, clz
li s2, 63
sub s2, s2, a0 # s2 is now exp_x0
lw a0, 0(s1)
lw a1, 4(s1) # a1 a0 is now the value of x1
jal ra, clz
li s3, 63
sub s3, s3, a0 # s3 is now exp_x1
add s2, s2, s3
addi s2, s2, 2 # s2 is (exp_x0 + 1) + (exp_x1 + 1)
li t0, 64
bge s2, t0, pimo_ret_t
li a0, 0 # return false
j pimo_end
pimo_ret_t:
li a0, 1 # return true
pimo_end:
lw ra, 0(sp)
lw s0, 4(sp)
lw s1, 8(sp)
lw s2, 12(sp)
lw s3, 16(sp)
addi sp, sp, 20
ret
# count leading zeros
clz:
addi sp, sp, -4
sw ra, 0(sp)
# a0 a1 = x
bne a1, zero, clz_fill_ones_upper
clz_fill_ones_lower:
srli t0, a0, 1
or a0, a0, t0
srli t0, a0, 2
or a0, a0, t0
srli t0, a0, 4
or a0, a0, t0
srli t0, a0, 8
or a0, a0, t0
srli t0, a0, 16
or a0, a0, t0
j clz_fill_ones_end
clz_fill_ones_upper:
srli t1, a1, 1
or a1, a1, t1
srli t1, a1, 2
or a1, a1, t1
srli t1, a1, 4
or a1, a1, t1
srli t1, a1, 8
or a1, a1, t1
srli t1, a1, 16
or a1, a1, t1
li a0, 0xffffffff
clz_fill_ones_end:
# x -= ((x >> 1) & 0x5555555555555555);
srli t0, a0, 1
slli t1, a1, 31
or t0, t0, t1
srli t1, a1, 1 # t0 t1 = x >> 1
li t2, 0x55555555 # t2 is the mask
and t0, t0, t2
and t1, t1, t2 # t0 t1 = (x >> 1) & 0x5555555555555555
sltu t3, a0, t0 # t3 is the borrow bit
sub a0, a0, t0
sub a1, a1, t1
sub a1, a1, t3 # a0 a1 = x - (t0 t1)
# x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333);
srli t0, a0, 2
slli t1, a1, 30
or t0, t0, t1
srli t1, a1, 2 # t0 t1 = x >> 2
li t2, 0x33333333 # t2 is the mask
and t0, t0, t2
and t1, t1, t2 # t0 t1 = (x >> 2) & 0x3333333333333333
and t4, a0, t2
and t5, a1, t2 # t4 t5 = x & 0x3333333333333333
add a0, t0, t4
sltu t3, a0, t0 # t3 is the carry bit
add a1, t1, t5
add a1, a1, t3 # a0 a1 = (t0 t1) + (t4 t5)
# x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0f;
srli t0, a0, 4
slli t1, a1, 28
or t0, t0, t1
srli t1, a1, 4 # t0 t1 = x >> 4
add t0, t0, a0
sltu t3, t0, a0 # t3 is the carry bit
add t1, t1, a1
add t1, t1, t3 # t0 t1 = (x >> 4) + x
li t2, 0x0f0f0f0f # t2 is the mask
and a0, t0, t2
and a1, t1, t2 # a0 a1 = (t0 t1) & 0x0f0f0f0f0f0f0f0f
# x += (x >> 8);
srli t0, a0, 8
slli t1, a1, 24
or t0, t0, t1
srli t1, a1, 8 # t0 t1 = x >> 8
add a0, a0, t0
sltu t3, a0, t0 # t3 is the carry bit
add a1, a1, t1
add a1, a1, t3 # a0 a1 = x + (x >> 8)
# x += (x >> 16);
srli t0, a0, 16
slli t1, a1, 16
or t0, t0, t1
srli t1, a1, 16 # t0 t1 = x >> 16
add a0, a0, t0
sltu t3, a0, t0 # t3 is the carry bit
add a1, a1, t1
add a1, a1, t3 # a0 a1 = x + (x >> 16)
# x += (x >> 32);
mv t0, a1
mv t1, zero # t0 t1 = x >> 32
add a0, a0, t0
sltu t3, a0, t0 # t3 is the carry bit
add a1, a1, t1
add a1, a1, t3 # a0 a1 = x + (x >> 32)
# return (64 - (x & 0x7f));
andi a0, a0, 0x7f # a0 = (x & 0x7f)
li t0, 64
sub a0, t0, a0 # a0 = (64 - (x & 0x7f))
lw ra, 0(sp)
addi sp, sp, 4
ret
exit:
li a7, SYSEXIT
addi a0, x0, 0
ecall
$ riscv-none-elf-size original.elf
player1@player1:~/Desktop/2023_ComputerArchitecture/Hw02$ riscv-none-elf-size original.elf
text data bss dec hex filename
728 0 0 728 2d8 original.elf
$ riscv-none-elf-readelf -h original.elf
player1@player1:~/Desktop/2023_ComputerArchitecture/Hw02$ riscv-none-elf-readelf -h original.elf
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x0
Start of program headers: 52 (bytes into file)
Start of section headers: 5580 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 2
Size of section headers: 40 (bytes)
Number of section headers: 6
Section header string table index: 5
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
1. addi 26.04% [44 ] ███████████████████████████████████████████████████████████████████████████████████
2. unknown 15.98% [27 ] ██████████████████████████████████████████████████▉
3. srli 11.83% [20 ] █████████████████████████████████████▋
4. add 9.47% [16 ] ██████████████████████████████▏
5. or 8.88% [15 ] ████████████████████████████▎
6. lw 6.51% [11 ] ████████████████████▊
7. sw 6.51% [11 ] ████████████████████▊
8. and 4.73% [8 ] ███████████████
9. jal 3.55% [6 ] ███████████▎
10. sub 3.55% [6 ] ███████████▎
11. sltu 3.55% [6 ] ███████████▎
12. auipc 2.96% [5 ] █████████▍
13. slli 2.96% [5 ] █████████▍
14. lui 1.78% [3 ] █████▋
15. ecall 1.78% [3 ] █████▋
16. jalr 1.18% [2 ] ███▊
17. bne 1.18% [2 ] ███▊
18. cnop 1.18% [2 ] ███▊
riscv-none-elf-gcc -Q --help=optimizers
The following options control optimizations:
-O<number>
-Ofast
-Og
-Os
-Oz
-faggressive-loop-optimizations [enabled]
-falign-functions [disabled]
-falign-functions=
-falign-jumps [disabled]
-falign-jumps=
-falign-labels [disabled]
-falign-labels=
-falign-loops [disabled]
-falign-loops=
I will use -O0
-O1
-O2
-O3
-Os
-Ofast
to optimize the code and compare the different.
You shall use RDCYCLE/RDCYCLEH instruction for the statistics of your program’s execution.
I added the RDCYCLE instruction part at below.
RDCYCLE/RDCYCLEH
#include <stdio.h>
typedef uint64_t ticks;
static inline ticks getticks(void)
{
uint64_t result;
uint32_t l, h, h2;
asm volatile(
"rdcycleh %0\n"
"rdcycle %1\n"
"rdcycleh %2\n"
"sub %0, %0, %2\n"
"seqz %0, %0\n"
"sub %0, zero, %0\n"
"and %1, %1, %0\n"
: "=r"(h), "=r"(l), "=r"(h2));
result = (((uint64_t) h) << 32) | ((uint64_t) l);
return result;
}
static uint64_t fib(uint64_t n)
{
if (n <= 1)
return n;
return fib(n - 1) + fib(n - 2);
}
int main()
{
ticks t0 = getticks();
fib(19);
ticks t1 = getticks();
printf("elapsed cycle: %" PRIu64 "\n", t1 - t0);
return 0;
}
I add the code above to get cycle with each optimization compare to the original assembly cycle, with the sheet below we can see the hand write asssembly cycle has the overwhelming advantage of cycle.
O0 | O1 | O2 | O3 | Os | Ofast | Assembly | |
---|---|---|---|---|---|---|---|
elapsed | 6217 | 4528 | 4528 | 4445 | 4528 | 4445 | 933
Image Not Showing
Possible Reasons
|
all: source_O0.elf source_O1.elf source_O2.elf source_O3.elf source_Os.elf source_Ofast.elf
source_O0.elf:
riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -O0 source.c -o $@
source_O1.elf:
riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -O1 source.c -o $@
source_O2.elf:
riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -O2 source.c -o $@
source_O3.elf:
riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -O3 source.c -o $@
source_Os.elf:
riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -Os source.c -o $@
source_Ofast.elf:
riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -Ofast source.c -o $@
$ riscv-none-elf-size source_O0.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_O0.elf
text data bss dec hex filename
76692 2372 1548 80612 13ae4 source_O0.elf
$ riscv-none-elf-readelf -h source_O0.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_O0.elf
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x100d8
Start of program headers: 52 (bytes into file)
Start of section headers: 94772 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
riscv-none-elf-objdump -d Movezeroes_O0.elf >dis_objdump_O0.txt
main
and predict_if_mul_overflow
000105a6 <main>:
105a6: 1141 add sp,sp,-16
105a8: c606 sw ra,12(sp)
105aa: c422 sw s0,8(sp)
105ac: 0800 add s0,sp,16
105ae: f9018593 add a1,gp,-112 # 1d7a0 <a_x1>
105b2: f8818513 add a0,gp,-120 # 1d798 <a_x0>
105b6: 3741 jal 10536 <predict_if_mul_overflow>
105b8: 87aa mv a5,a0
105ba: 85be mv a1,a5
105bc: 67f1 lui a5,0x1c
105be: ea078513 add a0,a5,-352 # 1bea0 <__clzsi2+0x70>
105c2: 2135 jal 109ee <printf>
105c4: f4018593 add a1,gp,-192 # 1d750 <b_x1>
105c8: f3818513 add a0,gp,-200 # 1d748 <b_x0>
105cc: 37ad jal 10536 <predict_if_mul_overflow>
105ce: 87aa mv a5,a0
105d0: 85be mv a1,a5
105d2: 67f1 lui a5,0x1c
105d4: ea078513 add a0,a5,-352 # 1bea0 <__clzsi2+0x70>
105d8: 2919 jal 109ee <printf>
105da: f5018593 add a1,gp,-176 # 1d760 <c_x1>
105de: f4818513 add a0,gp,-184 # 1d758 <c_x0>
105e2: 3f91 jal 10536 <predict_if_mul_overflow>
105e4: 87aa mv a5,a0
105e6: 85be mv a1,a5
105e8: 67f1 lui a5,0x1c
105ea: ea078513 add a0,a5,-352 # 1bea0 <__clzsi2+0x70>
105ee: 2101 jal 109ee <printf>
105f0: f6018593 add a1,gp,-160 # 1d770 <d_x1>
105f4: f5818513 add a0,gp,-168 # 1d768 <d_x0>
105f8: 3f3d jal 10536 <predict_if_mul_overflow>
105fa: 87aa mv a5,a0
105fc: 85be mv a1,a5
105fe: 67f1 lui a5,0x1c
10600: ea078513 add a0,a5,-352 # 1bea0 <__clzsi2+0x70>
10604: 26ed jal 109ee <printf>
10606: 4781 li a5,0
10608: 853e mv a0,a5
1060a: 40b2 lw ra,12(sp)
1060c: 4422 lw s0,8(sp)
1060e: 0141 add sp,sp,16
10610: 8082 ret
00010536 <predict_if_mul_overflow>:
10536: 7179 add sp,sp,-48
10538: d606 sw ra,44(sp)
1053a: d422 sw s0,40(sp)
1053c: 1800 add s0,sp,48
1053e: fca42e23 sw a0,-36(s0)
10542: fcb42c23 sw a1,-40(s0)
10546: fdc42783 lw a5,-36(s0)
1054a: 4398 lw a4,0(a5)
1054c: 43dc lw a5,4(a5)
1054e: 853a mv a0,a4
10550: 85be mv a1,a5
10552: 3ec5 jal 10142 <count_leading_zeros>
10554: 87aa mv a5,a0
10556: 873e mv a4,a5
10558: 03f00793 li a5,63
1055c: 8f99 sub a5,a5,a4
1055e: fef42623 sw a5,-20(s0)
10562: fd842783 lw a5,-40(s0)
10566: 4398 lw a4,0(a5)
10568: 43dc lw a5,4(a5)
1056a: 853a mv a0,a4
1056c: 85be mv a1,a5
1056e: 3ed1 jal 10142 <count_leading_zeros>
10570: 87aa mv a5,a0
10572: 873e mv a4,a5
10574: 03f00793 li a5,63
10578: 8f99 sub a5,a5,a4
1057a: fef42423 sw a5,-24(s0)
1057e: fec42783 lw a5,-20(s0)
10582: 00178713 add a4,a5,1
10586: fe842783 lw a5,-24(s0)
1058a: 0785 add a5,a5,1
1058c: 973e add a4,a4,a5
1058e: 03f00793 li a5,63
10592: 00e7d463 bge a5,a4,1059a <predict_if_mul_overflow+0x64>
10596: 4785 li a5,1
10598: a011 j 1059c <predict_if_mul_overflow+0x66>
1059a: 4781 li a5,0
1059c: 853e mv a0,a5
1059e: 50b2 lw ra,44(sp)
105a0: 5422 lw s0,40(sp)
105a2: 6145 add sp,sp,48
105a4: 8082 ret
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
1. cmv 10.57% [1822 ] █████████████████████████████████████████████
2. clwsp 8.54% [1472 ] ████████████████████████████████████▎
3. addi 7.28% [1254 ] ██████████████████████████████▉
4. cli 7.04% [1214 ] █████████████████████████████▉
5. cswsp 6.67% [1150 ] ████████████████████████████▍
6. caddi 4.34% [748 ] ██████████████████▍
7. cj 3.95% [680 ] ████████████████▊
8. jal 3.65% [630 ] ███████████████▌
9. beq 2.84% [489 ] ████████████
10. sw 2.69% [463 ] ███████████▍
11. lw 2.67% [460 ] ███████████▎
12. clw 2.31% [399 ] █████████▊
13. cadd 2.25% [387 ] █████████▌
14. andi 1.99% [343 ] ████████▍
15. bne 1.93% [332 ] ████████▏
16. cbeqz 1.87% [322 ] ███████▉
17. cjr 1.84% [318 ] ███████▊
18. csw 1.71% [294 ] ███████▎
19. sub 1.48% [255 ] ██████▎
20. bge 1.28% [221 ] █████▍
21. lbu 1.26% [218 ] █████▍
22. auipc 1.22% [210 ] █████▏
23. blt 1.19% [205 ] █████
24. or 1.13% [195 ] ████▊
25. cbnez 1.12% [193 ] ████▊
26. cslli 1.11% [191 ] ████▋
27. slli 1.04% [179 ] ████▍
28. srli 1.03% [178 ] ████▍
$ riscv-none-elf-size source_O1.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_O1.elf
text data bss dec hex filename
75784 2372 1548 79704 13758 source_O1.elf
source_O1.elf
$ riscv-none-elf-readelf -h source_O1.elf
layer1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_O1.elf
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x100d8
Start of program headers: 52 (bytes into file)
Start of section headers: 94772 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
$ riscv-none-elf-objdump -d source_O1.elf >./text/source_O1.txt
main
and predict_if_mul_overflow
0001028a <main>:
1028a: 1141 add sp,sp,-16
1028c: c606 sw ra,12(sp)
1028e: c422 sw s0,8(sp)
10290: f8818593 add a1,gp,-120 # 1d798 <a_x1>
10294: f9018513 add a0,gp,-112 # 1d7a0 <a_x0>
10298: 3f75 jal 10254 <predict_if_mul_overflow>
1029a: 85aa mv a1,a0
1029c: 6471 lui s0,0x1c
1029e: b7040513 add a0,s0,-1168 # 1bb70 <__clzsi2+0x6e>
102a2: 2939 jal 106c0 <printf>
102a4: f5818593 add a1,gp,-168 # 1d768 <b_x1>
102a8: f6018513 add a0,gp,-160 # 1d770 <b_x0>
102ac: 3765 jal 10254 <predict_if_mul_overflow>
102ae: 85aa mv a1,a0
102b0: b7040513 add a0,s0,-1168
102b4: 2131 jal 106c0 <printf>
102b6: f4818593 add a1,gp,-184 # 1d758 <c_x1>
102ba: f5018513 add a0,gp,-176 # 1d760 <c_x0>
102be: 3f59 jal 10254 <predict_if_mul_overflow>
102c0: 85aa mv a1,a0
102c2: b7040513 add a0,s0,-1168
102c6: 2eed jal 106c0 <printf>
102c8: f3818593 add a1,gp,-200 # 1d748 <d_x1>
102cc: f4018513 add a0,gp,-192 # 1d750 <d_x0>
102d0: 3751 jal 10254 <predict_if_mul_overflow>
102d2: 85aa mv a1,a0
102d4: b7040513 add a0,s0,-1168
102d8: 26e5 jal 106c0 <printf>
102da: 4501 li a0,0
102dc: 40b2 lw ra,12(sp)
102de: 4422 lw s0,8(sp)
102e0: 0141 add sp,sp,16
102e2: 8082 ret
00010254 <predict_if_mul_overflow>:
10254: 1141 add sp,sp,-16
10256: c606 sw ra,12(sp)
10258: c422 sw s0,8(sp)
1025a: c226 sw s1,4(sp)
1025c: 84ae mv s1,a1
1025e: 414c lw a1,4(a0)
10260: 4108 lw a0,0(a0)
10262: 35c5 jal 10142 <count_leading_zeros>
10264: 842a mv s0,a0
10266: 4088 lw a0,0(s1)
10268: 40cc lw a1,4(s1)
1026a: 3de1 jal 10142 <count_leading_zeros>
1026c: 04000793 li a5,64
10270: 40878433 sub s0,a5,s0
10274: 8f89 sub a5,a5,a0
10276: 943e add s0,s0,a5
10278: 04042513 slti a0,s0,64
1027c: 00154513 xor a0,a0,1
10280: 40b2 lw ra,12(sp)
10282: 4422 lw s0,8(sp)
10284: 4492 lw s1,4(sp)
10286: 0141 add sp,sp,16
10288: 8082 ret
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
1. cmv 10.49% [1784 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
2. clwsp 8.59% [1462 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏
3. addi 7.33% [1246 ] ██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉
4. cli 7.13% [1212 ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌
5. cswsp 6.70% [1140 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍
6. caddi 4.40% [749 ] █████████████████████████████████████████████████████████████████████████▉
7. cj 3.99% [679 ] ██████████████████████████████████████████████████████████████████▉
8. jal 3.70% [630 ] ██████████████████████████████████████████████████████████████▏
9. beq 2.87% [489 ] ████████████████████████████████████████████████▏
10. sw 2.41% [410 ] ████████████████████████████████████████▍
11. clw 2.35% [399 ] ███████████████████████████████████████▎
12. cadd 2.35% [399 ] ███████████████████████████████████████▎
13. lw 2.22% [377 ] █████████████████████████████████████▏
14. andi 2.02% [343 ] █████████████████████████████████▊
15. bne 1.95% [332 ] ████████████████████████████████▊
16. cbeqz 1.89% [322 ] ███████████████████████████████▊
17. cjr 1.87% [318 ] ███████████████████████████████▎
18. csw 1.73% [294 ] █████████████████████████████
19. sub 1.49% [253 ] ████████████████████████▉
20. bge 1.29% [220 ] █████████████████████▋
21. lbu 1.28% [218 ] █████████████████████▌
22. auipc 1.23% [210 ] ████████████████████▋
23. blt 1.21% [205 ] ████████████████████▏
24. cbnez 1.13% [193 ] ███████████████████
25. cslli 1.09% [185 ] ██████████████████▎
26. slli 1.08% [184 ] ██████████████████▏
27. or 1.06% [180 ] █████████████████▊
28. srli 1.05% [178 ] █████████████████▌
$ riscv-none-elf-size source_O2.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_O2.elf
text data bss dec hex filename
75784 2372 1548 79704 13758 source_O2.elf
$ riscv-none-elf-readelf -h source_O2.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_O2.elf
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x1015c
Start of program headers: 52 (bytes into file)
Start of section headers: 94788 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
$ riscv-none-elf-objdump -d source_O2.elf >./text/source_O2.txt
main
and predict_if_mul_overflow
000100b0 <main>:
100b0: 1141 add sp,sp,-16
100b2: f8818593 add a1,gp,-120 # 1d798 <a_x1>
100b6: f9018513 add a0,gp,-112 # 1d7a0 <a_x0>
100ba: c606 sw ra,12(sp)
100bc: c422 sw s0,8(sp)
100be: 2ac5 jal 102ae <predict_if_mul_overflow>
100c0: 6471 lui s0,0x1c
100c2: 85aa mv a1,a0
100c4: b7040513 add a0,s0,-1168 # 1bb70 <__clzsi2+0x6e>
100c8: 2be5 jal 106c0 <printf>
100ca: f5818593 add a1,gp,-168 # 1d768 <b_x1>
100ce: f6018513 add a0,gp,-160 # 1d770 <b_x0>
100d2: 2af1 jal 102ae <predict_if_mul_overflow>
100d4: 85aa mv a1,a0
100d6: b7040513 add a0,s0,-1168
100da: 23dd jal 106c0 <printf>
100dc: f4818593 add a1,gp,-184 # 1d758 <c_x1>
100e0: f5018513 add a0,gp,-176 # 1d760 <c_x0>
100e4: 22e9 jal 102ae <predict_if_mul_overflow>
100e6: 85aa mv a1,a0
100e8: b7040513 add a0,s0,-1168
100ec: 2bd1 jal 106c0 <printf>
100ee: f3818593 add a1,gp,-200 # 1d748 <d_x1>
100f2: f4018513 add a0,gp,-192 # 1d750 <d_x0>
100f6: 2a65 jal 102ae <predict_if_mul_overflow>
100f8: 85aa mv a1,a0
100fa: b7040513 add a0,s0,-1168
100fe: 23c9 jal 106c0 <printf>
10100: 40b2 lw ra,12(sp)
10102: 4422 lw s0,8(sp)
10104: 4501 li a0,0
10106: 0141 add sp,sp,16
10108: 8082 ret
000102ae <predict_if_mul_overflow>:
102ae: 1141 add sp,sp,-16
102b0: c226 sw s1,4(sp)
102b2: 84ae mv s1,a1
102b4: 414c lw a1,4(a0)
102b6: 4108 lw a0,0(a0)
102b8: c606 sw ra,12(sp)
102ba: c422 sw s0,8(sp)
102bc: 35c5 jal 1019c <count_leading_zeros>
102be: 40cc lw a1,4(s1)
102c0: 842a mv s0,a0
102c2: 4088 lw a0,0(s1)
102c4: 3de1 jal 1019c <count_leading_zeros>
102c6: 04000793 li a5,64
102ca: 40878433 sub s0,a5,s0
102ce: 8f89 sub a5,a5,a0
102d0: 943e add s0,s0,a5
102d2: 40b2 lw ra,12(sp)
102d4: 04042513 slti a0,s0,64
102d8: 4422 lw s0,8(sp)
102da: 4492 lw s1,4(sp)
102dc: 00154513 xor a0,a0,1
102e0: 0141 add sp,sp,16
102e2: 8082 ret
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
1. cmv 10.49% [1784 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
2. clwsp 8.59% [1462 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏
3. addi 7.33% [1246 ] ██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉
4. cli 7.13% [1212 ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌
5. cswsp 6.70% [1140 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍
6. caddi 4.40% [749 ] █████████████████████████████████████████████████████████████████████████▉
7. cj 3.99% [679 ] ██████████████████████████████████████████████████████████████████▉
8. jal 3.70% [630 ] ██████████████████████████████████████████████████████████████▏
9. beq 2.87% [489 ] ████████████████████████████████████████████████▏
10. sw 2.41% [410 ] ████████████████████████████████████████▍
11. clw 2.35% [399 ] ███████████████████████████████████████▎
12. cadd 2.35% [399 ] ███████████████████████████████████████▎
13. lw 2.22% [377 ] █████████████████████████████████████▏
14. andi 2.02% [343 ] █████████████████████████████████▊
15. bne 1.95% [332 ] ████████████████████████████████▊
16. cbeqz 1.89% [322 ] ███████████████████████████████▊
17. cjr 1.87% [318 ] ███████████████████████████████▎
18. csw 1.73% [294 ] █████████████████████████████
19. sub 1.49% [253 ] ████████████████████████▉
20. bge 1.29% [220 ] █████████████████████▋
21. lbu 1.28% [218 ] █████████████████████▌
22. auipc 1.23% [210 ] ████████████████████▋
23. blt 1.21% [205 ] ████████████████████▏
24. cbnez 1.13% [193 ] ███████████████████
25. cslli 1.09% [185 ] ██████████████████▎
26. slli 1.08% [184 ] ██████████████████▏
27. or 1.06% [180 ] █████████████████▊
28. srli 1.05% [178 ] █████████████████▌
$ riscv-none-elf-size source_O3.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_O3.elf
text data bss dec hex filename
76436 2372 1548 80356 139e4 source_O3.elf
$ riscv-none-elf-readelf -h source_O3.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_O3.elf
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x1015c
Start of program headers: 52 (bytes into file)
Start of section headers: 94788 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
$ riscv-none-elf-objdump -d source_O3.elf >./text/source_O3.txt
main
and predict_if_mul_overflow
000100b0 <main>:
100b0: 1141 add sp,sp,-16
100b2: f8818593 add a1,gp,-120 # 1d798 <a_x1>
100b6: f9018513 add a0,gp,-112 # 1d7a0 <a_x0>
100ba: c606 sw ra,12(sp)
100bc: c422 sw s0,8(sp)
100be: 2ae5 jal 102b6 <predict_if_mul_overflow>
100c0: 6471 lui s0,0x1c
100c2: 85aa mv a1,a0
100c4: db840513 add a0,s0,-584 # 1bdb8 <__clzsi2+0x6e>
100c8: 043000ef jal 1090a <printf>
100cc: f5818593 add a1,gp,-168 # 1d768 <b_x1>
100d0: f6018513 add a0,gp,-160 # 1d770 <b_x0>
100d4: 22cd jal 102b6 <predict_if_mul_overflow>
100d6: 85aa mv a1,a0
100d8: db840513 add a0,s0,-584
100dc: 02f000ef jal 1090a <printf>
100e0: f4818593 add a1,gp,-184 # 1d758 <c_x1>
100e4: f5018513 add a0,gp,-176 # 1d760 <c_x0>
100e8: 22f9 jal 102b6 <predict_if_mul_overflow>
100ea: 85aa mv a1,a0
100ec: db840513 add a0,s0,-584
100f0: 01b000ef jal 1090a <printf>
100f4: f3818593 add a1,gp,-200 # 1d748 <d_x1>
100f8: f4018513 add a0,gp,-192 # 1d750 <d_x0>
100fc: 2a6d jal 102b6 <predict_if_mul_overflow>
100fe: 85aa mv a1,a0
10100: db840513 add a0,s0,-584
10104: 007000ef jal 1090a <printf>
10108: 40b2 lw ra,12(sp)
1010a: 4422 lw s0,8(sp)
1010c: 4501 li a0,0
1010e: 0141 add sp,sp,16
10110: 8082 ret
000102b6 <predict_if_mul_overflow>:
102b6: 00452883 lw a7,4(a0)
102ba: 0045a803 lw a6,4(a1)
102be: 4108 lw a0,0(a0)
102c0: 418c lw a1,0(a1)
102c2: 01f89613 sll a2,a7,0x1f
102c6: 01f81693 sll a3,a6,0x1f
102ca: 00155713 srl a4,a0,0x1
102ce: 0015d793 srl a5,a1,0x1
102d2: 8f51 or a4,a4,a2
102d4: 8fd5 or a5,a5,a3
102d6: 0018d613 srl a2,a7,0x1
102da: 00185693 srl a3,a6,0x1
102de: 01166633 or a2,a2,a7
102e2: 0106e6b3 or a3,a3,a6
102e6: 8d59 or a0,a0,a4
102e8: 8ddd or a1,a1,a5
102ea: 01e61893 sll a7,a2,0x1e
102ee: 01e69813 sll a6,a3,0x1e
102f2: 00255713 srl a4,a0,0x2
102f6: 0025d793 srl a5,a1,0x2
102fa: 00e8e733 or a4,a7,a4
102fe: 00f867b3 or a5,a6,a5
10302: 00265893 srl a7,a2,0x2
10306: 0026d813 srl a6,a3,0x2
1030a: 01166633 or a2,a2,a7
1030e: 0106e6b3 or a3,a3,a6
10312: 8d59 or a0,a0,a4
10314: 8ddd or a1,a1,a5
10316: 01c61893 sll a7,a2,0x1c
1031a: 01c69813 sll a6,a3,0x1c
1031e: 00455713 srl a4,a0,0x4
10322: 0045d793 srl a5,a1,0x4
10326: 00e8e733 or a4,a7,a4
1032a: 00f867b3 or a5,a6,a5
1032e: 00465893 srl a7,a2,0x4
10332: 0046d813 srl a6,a3,0x4
10336: 01166633 or a2,a2,a7
1033a: 0106e6b3 or a3,a3,a6
1033e: 8d59 or a0,a0,a4
10340: 8ddd or a1,a1,a5
10342: 01861893 sll a7,a2,0x18
10346: 01869813 sll a6,a3,0x18
1034a: 00855713 srl a4,a0,0x8
1034e: 0085d793 srl a5,a1,0x8
10352: 00e8e733 or a4,a7,a4
10356: 00f867b3 or a5,a6,a5
1035a: 00865893 srl a7,a2,0x8
1035e: 0086d813 srl a6,a3,0x8
10362: 01166633 or a2,a2,a7
10366: 0106e6b3 or a3,a3,a6
1036a: 8d59 or a0,a0,a4
1036c: 8ddd or a1,a1,a5
1036e: 01061893 sll a7,a2,0x10
10372: 01069813 sll a6,a3,0x10
10376: 01055713 srl a4,a0,0x10
1037a: 0105d793 srl a5,a1,0x10
1037e: 00e8e733 or a4,a7,a4
10382: 00f867b3 or a5,a6,a5
10386: 01065893 srl a7,a2,0x10
1038a: 0106d813 srl a6,a3,0x10
1038e: 01166633 or a2,a2,a7
10392: 0106e6b3 or a3,a3,a6
10396: 8d59 or a0,a0,a4
10398: 8ddd or a1,a1,a5
1039a: 8d51 or a0,a0,a2
1039c: 8dd5 or a1,a1,a3
1039e: 01f61313 sll t1,a2,0x1f
103a2: 01f69893 sll a7,a3,0x1f
103a6: 00155713 srl a4,a0,0x1
103aa: 0015d793 srl a5,a1,0x1
103ae: 55555837 lui a6,0x55555
103b2: 55580813 add a6,a6,1365 # 55555555 <__BSS_END__+0x555377b9>
103b6: 00e36733 or a4,t1,a4
103ba: 00f8e7b3 or a5,a7,a5
103be: 00165313 srl t1,a2,0x1
103c2: 0016d893 srl a7,a3,0x1
103c6: 01077733 and a4,a4,a6
103ca: 0107f7b3 and a5,a5,a6
103ce: 40e50733 sub a4,a0,a4
103d2: 40f587b3 sub a5,a1,a5
103d6: 01037333 and t1,t1,a6
103da: 0108f833 and a6,a7,a6
103de: 00e53533 sltu a0,a0,a4
103e2: 00f5b5b3 sltu a1,a1,a5
103e6: 40660633 sub a2,a2,t1
103ea: 410686b3 sub a3,a3,a6
103ee: 8e09 sub a2,a2,a0
103f0: 8e8d sub a3,a3,a1
103f2: 01e61313 sll t1,a2,0x1e
103f6: 01e69893 sll a7,a3,0x1e
103fa: 00275513 srl a0,a4,0x2
103fe: 0027d593 srl a1,a5,0x2
10402: 33333837 lui a6,0x33333
10406: 33380813 add a6,a6,819 # 33333333 <__BSS_END__+0x33315597>
1040a: 00a36533 or a0,t1,a0
1040e: 00b8e5b3 or a1,a7,a1
10412: 01057533 and a0,a0,a6
10416: 0105f5b3 and a1,a1,a6
1041a: 00265313 srl t1,a2,0x2
1041e: 01077733 and a4,a4,a6
10422: 0026d893 srl a7,a3,0x2
10426: 0107f7b3 and a5,a5,a6
1042a: 972a add a4,a4,a0
1042c: 97ae add a5,a5,a1
1042e: 01037333 and t1,t1,a6
10432: 0108f8b3 and a7,a7,a6
10436: 01067633 and a2,a2,a6
1043a: 0106f6b3 and a3,a3,a6
1043e: 961a add a2,a2,t1
10440: 96c6 add a3,a3,a7
10442: 00a73533 sltu a0,a4,a0
10446: 00b7b5b3 sltu a1,a5,a1
1044a: 9532 add a0,a0,a2
1044c: 95b6 add a1,a1,a3
1044e: 01c51893 sll a7,a0,0x1c
10452: 01c59813 sll a6,a1,0x1c
10456: 00475613 srl a2,a4,0x4
1045a: 0047d693 srl a3,a5,0x4
1045e: 00c8e633 or a2,a7,a2
10462: 00d866b3 or a3,a6,a3
10466: 9732 add a4,a4,a2
10468: 97b6 add a5,a5,a3
1046a: 00455893 srl a7,a0,0x4
1046e: 0045d813 srl a6,a1,0x4
10472: 98aa add a7,a7,a0
10474: 95c2 add a1,a1,a6
10476: 00c73633 sltu a2,a4,a2
1047a: 00d7b6b3 sltu a3,a5,a3
1047e: 0f0f1537 lui a0,0xf0f1
10482: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d3173>
10486: 9646 add a2,a2,a7
10488: 96ae add a3,a3,a1
1048a: 8e69 and a2,a2,a0
1048c: 8fe9 and a5,a5,a0
1048e: 8ee9 and a3,a3,a0
10490: 8f69 and a4,a4,a0
10492: 01861893 sll a7,a2,0x18
10496: 01869813 sll a6,a3,0x18
1049a: 00875513 srl a0,a4,0x8
1049e: 0087d593 srl a1,a5,0x8
104a2: 00a8e533 or a0,a7,a0
104a6: 00b865b3 or a1,a6,a1
104aa: 953a add a0,a0,a4
104ac: 95be add a1,a1,a5
104ae: 00865893 srl a7,a2,0x8
104b2: 0086d813 srl a6,a3,0x8
104b6: 9646 add a2,a2,a7
104b8: 96c2 add a3,a3,a6
104ba: 00e53733 sltu a4,a0,a4
104be: 00f5b7b3 sltu a5,a1,a5
104c2: 9732 add a4,a4,a2
104c4: 97b6 add a5,a5,a3
104c6: 01071893 sll a7,a4,0x10
104ca: 01079813 sll a6,a5,0x10
104ce: 01055693 srl a3,a0,0x10
104d2: 0105d613 srl a2,a1,0x10
104d6: 00d8e6b3 or a3,a7,a3
104da: 00c86633 or a2,a6,a2
104de: 96aa add a3,a3,a0
104e0: 962e add a2,a2,a1
104e2: 01075893 srl a7,a4,0x10
104e6: 0107d813 srl a6,a5,0x10
104ea: 9746 add a4,a4,a7
104ec: 97c2 add a5,a5,a6
104ee: 00a6b533 sltu a0,a3,a0
104f2: 00b635b3 sltu a1,a2,a1
104f6: 95be add a1,a1,a5
104f8: 953a add a0,a0,a4
104fa: 9536 add a0,a0,a3
104fc: 962e add a2,a2,a1
104fe: 04000793 li a5,64
10502: 07f67613 and a2,a2,127
10506: 07f57513 and a0,a0,127
1050a: 40a78533 sub a0,a5,a0
1050e: 8f91 sub a5,a5,a2
10510: 0542 sll a0,a0,0x10
10512: 07c2 sll a5,a5,0x10
10514: 04000713 li a4,64
10518: 83c1 srl a5,a5,0x10
1051a: 8141 srl a0,a0,0x10
1051c: 40a70533 sub a0,a4,a0
10520: 8f1d sub a4,a4,a5
10522: 953a add a0,a0,a4
10524: 04052513 slti a0,a0,64
10528: 00154513 xor a0,a0,1
1052c: 8082 ret
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
1. cmv 10.38% [1782 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
2. clwsp 8.50% [1459 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
3. addi 7.28% [1250 ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍
4. cli 7.06% [1212 ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋
5. cswsp 6.62% [1137 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎
6. caddi 4.35% [747 ] █████████████████████████████████████████████████████████████████████████▊
7. cj 3.95% [679 ] ███████████████████████████████████████████████████████████████████
8. jal 3.69% [633 ] ██████████████████████████████████████████████████████████████▌
9. beq 2.85% [489 ] ████████████████████████████████████████████████▎
10. cadd 2.47% [425 ] █████████████████████████████████████████▉
11. sw 2.39% [410 ] ████████████████████████████████████████▍
12. clw 2.31% [397 ] ███████████████████████████████████████▏
13. lw 2.21% [379 ] █████████████████████████████████████▍
14. andi 2.01% [345 ] ██████████████████████████████████
15. bne 1.93% [332 ] ████████████████████████████████▊
16. cbeqz 1.88% [322 ] ███████████████████████████████▊
17. cjr 1.85% [318 ] ███████████████████████████████▍
18. csw 1.71% [294 ] █████████████████████████████
19. sub 1.50% [258 ] █████████████████████████▍
20. bge 1.28% [220 ] █████████████████████▋
21. lbu 1.27% [218 ] █████████████████████▌
22. srli 1.27% [218 ] █████████████████████▌
23. auipc 1.22% [210 ] ████████████████████▋
24. or 1.21% [208 ] ████████████████████▌
25. blt 1.19% [205 ] ████████████████████▏
26. slli 1.19% [204 ] ████████████████████▏
27. cbnez 1.12% [193 ] ███████████████████
28. cslli 1.09% [187 ] ██████████████████▍
29. cor 1.01% [174 ] █████████████████▏
$ riscv-none-elf-size source_Os.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_Os.elf
text data bss dec hex filename
75784 2372 1548 79704 13758 source_Os.elf
$ riscv-none-elf-readelf -h source_Os.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_Os.elf
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x1015c
Start of program headers: 52 (bytes into file)
Start of section headers: 94788 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
$ riscv-none-elf-objdump -d source_Os.elf >./text/source_Os.txt
main
and predict_if_mul_overflow
000100b0 <main>:
100b0: 1141 add sp,sp,-16
100b2: f8818593 add a1,gp,-120 # 1d798 <a_x1>
100b6: f9018513 add a0,gp,-112 # 1d7a0 <a_x0>
100ba: c606 sw ra,12(sp)
100bc: c422 sw s0,8(sp)
100be: 2ac5 jal 102ae <predict_if_mul_overflow>
100c0: 6471 lui s0,0x1c
100c2: 85aa mv a1,a0
100c4: b7040513 add a0,s0,-1168 # 1bb70 <__clzsi2+0x6e>
100c8: 2be5 jal 106c0 <printf>
100ca: f5818593 add a1,gp,-168 # 1d768 <b_x1>
100ce: f6018513 add a0,gp,-160 # 1d770 <b_x0>
100d2: 2af1 jal 102ae <predict_if_mul_overflow>
100d4: 85aa mv a1,a0
100d6: b7040513 add a0,s0,-1168
100da: 23dd jal 106c0 <printf>
100dc: f4818593 add a1,gp,-184 # 1d758 <c_x1>
100e0: f5018513 add a0,gp,-176 # 1d760 <c_x0>
100e4: 22e9 jal 102ae <predict_if_mul_overflow>
100e6: 85aa mv a1,a0
100e8: b7040513 add a0,s0,-1168
100ec: 2bd1 jal 106c0 <printf>
100ee: f3818593 add a1,gp,-200 # 1d748 <d_x1>
100f2: f4018513 add a0,gp,-192 # 1d750 <d_x0>
100f6: 2a65 jal 102ae <predict_if_mul_overflow>
100f8: 85aa mv a1,a0
100fa: b7040513 add a0,s0,-1168
100fe: 23c9 jal 106c0 <printf>
10100: 40b2 lw ra,12(sp)
10102: 4422 lw s0,8(sp)
10104: 4501 li a0,0
10106: 0141 add sp,sp,16
10108: 8082 ret
000102ae <predict_if_mul_overflow>:
102ae: 1141 add sp,sp,-16
102b0: c226 sw s1,4(sp)
102b2: 84ae mv s1,a1
102b4: 414c lw a1,4(a0)
102b6: 4108 lw a0,0(a0)
102b8: c606 sw ra,12(sp)
102ba: c422 sw s0,8(sp)
102bc: 35c5 jal 1019c <count_leading_zeros>
102be: 40cc lw a1,4(s1)
102c0: 842a mv s0,a0
102c2: 4088 lw a0,0(s1)
102c4: 3de1 jal 1019c <count_leading_zeros>
102c6: 04000793 li a5,64
102ca: 40878433 sub s0,a5,s0
102ce: 8f89 sub a5,a5,a0
102d0: 943e add s0,s0,a5
102d2: 40b2 lw ra,12(sp)
102d4: 04042513 slti a0,s0,64
102d8: 4422 lw s0,8(sp)
102da: 4492 lw s1,4(sp)
102dc: 00154513 xor a0,a0,1
102e0: 0141 add sp,sp,16
102e2: 8082 ret
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
1. cmv 10.49% [1784 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
2. clwsp 8.59% [1462 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏
3. addi 7.33% [1246 ] ██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉
4. cli 7.13% [1212 ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌
5. cswsp 6.70% [1140 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍
6. caddi 4.40% [749 ] █████████████████████████████████████████████████████████████████████████▉
7. cj 3.99% [679 ] ██████████████████████████████████████████████████████████████████▉
8. jal 3.70% [630 ] ██████████████████████████████████████████████████████████████▏
9. beq 2.87% [489 ] ████████████████████████████████████████████████▏
10. sw 2.41% [410 ] ████████████████████████████████████████▍
11. clw 2.35% [399 ] ███████████████████████████████████████▎
12. cadd 2.35% [399 ] ███████████████████████████████████████▎
13. lw 2.22% [377 ] █████████████████████████████████████▏
14. andi 2.02% [343 ] █████████████████████████████████▊
15. bne 1.95% [332 ] ████████████████████████████████▊
16. cbeqz 1.89% [322 ] ███████████████████████████████▊
17. cjr 1.87% [318 ] ███████████████████████████████▎
18. csw 1.73% [294 ] █████████████████████████████
19. sub 1.49% [253 ] ████████████████████████▉
20. bge 1.29% [220 ] █████████████████████▋
21. lbu 1.28% [218 ] █████████████████████▌
22. auipc 1.23% [210 ] ████████████████████▋
23. blt 1.21% [205 ] ████████████████████▏
24. cbnez 1.13% [193 ] ███████████████████
25. cslli 1.09% [185 ] ██████████████████▎
26. slli 1.08% [184 ] ██████████████████▏
27. or 1.06% [180 ] █████████████████▊
28. srli 1.05% [178 ] █████████████████▌
$ riscv-none-elf-size source_Ofast.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-size source_Ofast.elf
text data bss dec hex filename
76436 2372 1548 80356 139e4 source_Ofast.elf
$ riscv-none-elf-readelf -h source_Ofast.elf
player1@player1:~/Desktop/rv32emu/lab2$ riscv-none-elf-readelf -h source_Ofast.elf
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x1015c
Start of program headers: 52 (bytes into file)
Start of section headers: 94788 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
$ riscv-none-elf-objdump -d source_Ofast.elf >./text/source_Ofast.txt
main
and predict_if_mul_overflow
000100b0 <main>:
100b0: 1141 add sp,sp,-16
100b2: f8818593 add a1,gp,-120 # 1d798 <a_x1>
100b6: f9018513 add a0,gp,-112 # 1d7a0 <a_x0>
100ba: c606 sw ra,12(sp)
100bc: c422 sw s0,8(sp)
100be: 2ae5 jal 102b6 <predict_if_mul_overflow>
100c0: 6471 lui s0,0x1c
100c2: 85aa mv a1,a0
100c4: db840513 add a0,s0,-584 # 1bdb8 <__clzsi2+0x6e>
100c8: 043000ef jal 1090a <printf>
100cc: f5818593 add a1,gp,-168 # 1d768 <b_x1>
100d0: f6018513 add a0,gp,-160 # 1d770 <b_x0>
100d4: 22cd jal 102b6 <predict_if_mul_overflow>
100d6: 85aa mv a1,a0
100d8: db840513 add a0,s0,-584
100dc: 02f000ef jal 1090a <printf>
100e0: f4818593 add a1,gp,-184 # 1d758 <c_x1>
100e4: f5018513 add a0,gp,-176 # 1d760 <c_x0>
100e8: 22f9 jal 102b6 <predict_if_mul_overflow>
100ea: 85aa mv a1,a0
100ec: db840513 add a0,s0,-584
100f0: 01b000ef jal 1090a <printf>
100f4: f3818593 add a1,gp,-200 # 1d748 <d_x1>
100f8: f4018513 add a0,gp,-192 # 1d750 <d_x0>
100fc: 2a6d jal 102b6 <predict_if_mul_overflow>
100fe: 85aa mv a1,a0
10100: db840513 add a0,s0,-584
10104: 007000ef jal 1090a <printf>
10108: 40b2 lw ra,12(sp)
1010a: 4422 lw s0,8(sp)
1010c: 4501 li a0,0
1010e: 0141 add sp,sp,16
10110: 8082 ret
000102b6 <predict_if_mul_overflow>:
102b6: 00452883 lw a7,4(a0)
102ba: 0045a803 lw a6,4(a1)
102be: 4108 lw a0,0(a0)
102c0: 418c lw a1,0(a1)
102c2: 01f89613 sll a2,a7,0x1f
102c6: 01f81693 sll a3,a6,0x1f
102ca: 00155713 srl a4,a0,0x1
102ce: 0015d793 srl a5,a1,0x1
102d2: 8f51 or a4,a4,a2
102d4: 8fd5 or a5,a5,a3
102d6: 0018d613 srl a2,a7,0x1
102da: 00185693 srl a3,a6,0x1
102de: 01166633 or a2,a2,a7
102e2: 0106e6b3 or a3,a3,a6
102e6: 8d59 or a0,a0,a4
102e8: 8ddd or a1,a1,a5
102ea: 01e61893 sll a7,a2,0x1e
102ee: 01e69813 sll a6,a3,0x1e
102f2: 00255713 srl a4,a0,0x2
102f6: 0025d793 srl a5,a1,0x2
102fa: 00e8e733 or a4,a7,a4
102fe: 00f867b3 or a5,a6,a5
10302: 00265893 srl a7,a2,0x2
10306: 0026d813 srl a6,a3,0x2
1030a: 01166633 or a2,a2,a7
1030e: 0106e6b3 or a3,a3,a6
10312: 8d59 or a0,a0,a4
10314: 8ddd or a1,a1,a5
10316: 01c61893 sll a7,a2,0x1c
1031a: 01c69813 sll a6,a3,0x1c
1031e: 00455713 srl a4,a0,0x4
10322: 0045d793 srl a5,a1,0x4
10326: 00e8e733 or a4,a7,a4
1032a: 00f867b3 or a5,a6,a5
1032e: 00465893 srl a7,a2,0x4
10332: 0046d813 srl a6,a3,0x4
10336: 01166633 or a2,a2,a7
1033a: 0106e6b3 or a3,a3,a6
1033e: 8d59 or a0,a0,a4
10340: 8ddd or a1,a1,a5
10342: 01861893 sll a7,a2,0x18
10346: 01869813 sll a6,a3,0x18
1034a: 00855713 srl a4,a0,0x8
1034e: 0085d793 srl a5,a1,0x8
10352: 00e8e733 or a4,a7,a4
10356: 00f867b3 or a5,a6,a5
1035a: 00865893 srl a7,a2,0x8
1035e: 0086d813 srl a6,a3,0x8
10362: 01166633 or a2,a2,a7
10366: 0106e6b3 or a3,a3,a6
1036a: 8d59 or a0,a0,a4
1036c: 8ddd or a1,a1,a5
1036e: 01061893 sll a7,a2,0x10
10372: 01069813 sll a6,a3,0x10
10376: 01055713 srl a4,a0,0x10
1037a: 0105d793 srl a5,a1,0x10
1037e: 00e8e733 or a4,a7,a4
10382: 00f867b3 or a5,a6,a5
10386: 01065893 srl a7,a2,0x10
1038a: 0106d813 srl a6,a3,0x10
1038e: 01166633 or a2,a2,a7
10392: 0106e6b3 or a3,a3,a6
10396: 8d59 or a0,a0,a4
10398: 8ddd or a1,a1,a5
1039a: 8d51 or a0,a0,a2
1039c: 8dd5 or a1,a1,a3
1039e: 01f61313 sll t1,a2,0x1f
103a2: 01f69893 sll a7,a3,0x1f
103a6: 00155713 srl a4,a0,0x1
103aa: 0015d793 srl a5,a1,0x1
103ae: 55555837 lui a6,0x55555
103b2: 55580813 add a6,a6,1365 # 55555555 <__BSS_END__+0x555377b9>
103b6: 00e36733 or a4,t1,a4
103ba: 00f8e7b3 or a5,a7,a5
103be: 00165313 srl t1,a2,0x1
103c2: 0016d893 srl a7,a3,0x1
103c6: 01077733 and a4,a4,a6
103ca: 0107f7b3 and a5,a5,a6
103ce: 40e50733 sub a4,a0,a4
103d2: 40f587b3 sub a5,a1,a5
103d6: 01037333 and t1,t1,a6
103da: 0108f833 and a6,a7,a6
103de: 00e53533 sltu a0,a0,a4
103e2: 00f5b5b3 sltu a1,a1,a5
103e6: 40660633 sub a2,a2,t1
103ea: 410686b3 sub a3,a3,a6
103ee: 8e09 sub a2,a2,a0
103f0: 8e8d sub a3,a3,a1
103f2: 01e61313 sll t1,a2,0x1e
103f6: 01e69893 sll a7,a3,0x1e
103fa: 00275513 srl a0,a4,0x2
103fe: 0027d593 srl a1,a5,0x2
10402: 33333837 lui a6,0x33333
10406: 33380813 add a6,a6,819 # 33333333 <__BSS_END__+0x33315597>
1040a: 00a36533 or a0,t1,a0
1040e: 00b8e5b3 or a1,a7,a1
10412: 01057533 and a0,a0,a6
10416: 0105f5b3 and a1,a1,a6
1041a: 00265313 srl t1,a2,0x2
1041e: 01077733 and a4,a4,a6
10422: 0026d893 srl a7,a3,0x2
10426: 0107f7b3 and a5,a5,a6
1042a: 972a add a4,a4,a0
1042c: 97ae add a5,a5,a1
1042e: 01037333 and t1,t1,a6
10432: 0108f8b3 and a7,a7,a6
10436: 01067633 and a2,a2,a6
1043a: 0106f6b3 and a3,a3,a6
1043e: 961a add a2,a2,t1
10440: 96c6 add a3,a3,a7
10442: 00a73533 sltu a0,a4,a0
10446: 00b7b5b3 sltu a1,a5,a1
1044a: 9532 add a0,a0,a2
1044c: 95b6 add a1,a1,a3
1044e: 01c51893 sll a7,a0,0x1c
10452: 01c59813 sll a6,a1,0x1c
10456: 00475613 srl a2,a4,0x4
1045a: 0047d693 srl a3,a5,0x4
1045e: 00c8e633 or a2,a7,a2
10462: 00d866b3 or a3,a6,a3
10466: 9732 add a4,a4,a2
10468: 97b6 add a5,a5,a3
1046a: 00455893 srl a7,a0,0x4
1046e: 0045d813 srl a6,a1,0x4
10472: 98aa add a7,a7,a0
10474: 95c2 add a1,a1,a6
10476: 00c73633 sltu a2,a4,a2
1047a: 00d7b6b3 sltu a3,a5,a3
1047e: 0f0f1537 lui a0,0xf0f1
10482: f0f50513 add a0,a0,-241 # f0f0f0f <__BSS_END__+0xf0d3173>
10486: 9646 add a2,a2,a7
10488: 96ae add a3,a3,a1
1048a: 8e69 and a2,a2,a0
1048c: 8fe9 and a5,a5,a0
1048e: 8ee9 and a3,a3,a0
10490: 8f69 and a4,a4,a0
10492: 01861893 sll a7,a2,0x18
10496: 01869813 sll a6,a3,0x18
1049a: 00875513 srl a0,a4,0x8
1049e: 0087d593 srl a1,a5,0x8
104a2: 00a8e533 or a0,a7,a0
104a6: 00b865b3 or a1,a6,a1
104aa: 953a add a0,a0,a4
104ac: 95be add a1,a1,a5
104ae: 00865893 srl a7,a2,0x8
104b2: 0086d813 srl a6,a3,0x8
104b6: 9646 add a2,a2,a7
104b8: 96c2 add a3,a3,a6
104ba: 00e53733 sltu a4,a0,a4
104be: 00f5b7b3 sltu a5,a1,a5
104c2: 9732 add a4,a4,a2
104c4: 97b6 add a5,a5,a3
104c6: 01071893 sll a7,a4,0x10
104ca: 01079813 sll a6,a5,0x10
104ce: 01055693 srl a3,a0,0x10
104d2: 0105d613 srl a2,a1,0x10
104d6: 00d8e6b3 or a3,a7,a3
104da: 00c86633 or a2,a6,a2
104de: 96aa add a3,a3,a0
104e0: 962e add a2,a2,a1
104e2: 01075893 srl a7,a4,0x10
104e6: 0107d813 srl a6,a5,0x10
104ea: 9746 add a4,a4,a7
104ec: 97c2 add a5,a5,a6
104ee: 00a6b533 sltu a0,a3,a0
104f2: 00b635b3 sltu a1,a2,a1
104f6: 95be add a1,a1,a5
104f8: 953a add a0,a0,a4
104fa: 9536 add a0,a0,a3
104fc: 962e add a2,a2,a1
104fe: 04000793 li a5,64
10502: 07f67613 and a2,a2,127
10506: 07f57513 and a0,a0,127
1050a: 40a78533 sub a0,a5,a0
1050e: 8f91 sub a5,a5,a2
10510: 0542 sll a0,a0,0x10
10512: 07c2 sll a5,a5,0x10
10514: 04000713 li a4,64
10518: 83c1 srl a5,a5,0x10
1051a: 8141 srl a0,a0,0x10
1051c: 40a70533 sub a0,a4,a0
10520: 8f1d sub a4,a4,a5
10522: 953a add a0,a0,a4
10524: 04052513 slti a0,a0,64
10528: 00154513 xor a0,a0,1
1052c: 8082 ret
+---------------------------------------------+
| RV32 Target Instruction Frequency Histogram |
+---------------------------------------------+
1. cmv 10.38% [1782 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
2. clwsp 8.50% [1459 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
3. addi 7.28% [1250 ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍
4. cli 7.06% [1212 ] ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋
5. cswsp 6.62% [1137 ] ████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎
6. caddi 4.35% [747 ] █████████████████████████████████████████████████████████████████████████▊
7. cj 3.95% [679 ] ███████████████████████████████████████████████████████████████████
8. jal 3.69% [633 ] ██████████████████████████████████████████████████████████████▌
9. beq 2.85% [489 ] ████████████████████████████████████████████████▎
10. cadd 2.47% [425 ] █████████████████████████████████████████▉
11. sw 2.39% [410 ] ████████████████████████████████████████▍
12. clw 2.31% [397 ] ███████████████████████████████████████▏
13. lw 2.21% [379 ] █████████████████████████████████████▍
14. andi 2.01% [345 ] ██████████████████████████████████
15. bne 1.93% [332 ] ████████████████████████████████▊
16. cbeqz 1.88% [322 ] ███████████████████████████████▊
17. cjr 1.85% [318 ] ███████████████████████████████▍
18. csw 1.71% [294 ] █████████████████████████████
19. sub 1.50% [258 ] █████████████████████████▍
20. bge 1.28% [220 ] █████████████████████▋
21. lbu 1.27% [218 ] █████████████████████▌
22. srli 1.27% [218 ] █████████████████████▌
23. auipc 1.22% [210 ] ████████████████████▋
24. or 1.21% [208 ] ████████████████████▌
25. blt 1.19% [205 ] ████████████████████▏
26. slli 1.19% [204 ] ████████████████████▏
27. cbnez 1.12% [193 ] ███████████████████
28. cslli 1.09% [187 ] ██████████████████▍
29. cor 1.01% [174 ] █████████████████▏
Based on the results above, we can see that the number of text used in the executable file compiled directly from assembly language is comparatively more concise than the one compiled from the C language. This result may lead us to consider why assembly language can produce more streamlined executable files. Here, we will discuss some possible reasons:
The conciseness of assembly language may stem from its directness, manual optimization, compact instruction set, and higher level of customized control. However, it also requires more time and effort as it lacks the abstractions and conveniences of high-level languages. Therefore, when choosing a programming language, one should make a wise choice based on project requirements and time constraints.
Computer Architecture HW2
Assignment1: RISC-V Assembly and Instruction Pipeline
rv32emu
編譯器和最佳化原理篇
or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up