riscv
contributed by 鄭惟
(https://hackmd.io/@WeiCheng14159/rkUifs2Hw)
.data
input: .word 0x0000000f
one: .word 0x80000000
str1: .string "clz value of "
str2: .string " is "
.text
main:
lw a0, input # Load input from static data
jal ra, clz # Jump-and-link to the 'clz' label
# Print the result to console
mv a1, a0
lw a0, input
jal ra, printResult
# Exit program
li a7, 10
ecall
clz:
# t0 = one
# t1 = cnt = 32
# t2 = res
# a0 = i
lw t0, one
li t1, 32
li t2, 0
_beg: bne t1, zero, cnt
_ret: mv a0, t2
ret
cnt: addi t1,t1,-1
and t3, a0, t0 # i & one
bne t3, zero, _ret
addi t2, t2, 1
srli t0, t0, 1
j _beg
# --- printResult ---
# a0: input
# a1: result
printResult:
mv t0, a0
mv t1, a1
la a0, str1
li a7, 4
ecall
mv a0, t0
li a7, 1
ecall
la a0, str2
li a7, 4
ecall
mv a0, t1
li a7, 1
ecall
ret
unsigned int clz(unsigned int i)
{
unsigned int one = 0x80000000;
unsigned int res = 0;
for (int cnt = 0; cnt < 32; cnt++) {
if ((i & one) == 0)
res++;
else
return res;
one = one >> 1;
}
return res;
}
unsigned int clz(unsigned int i)
{
unsigned int one = 0x80000000;
unsigned int res = 0;
for (int cnt = 0; cnt < 32; cnt++) {
if ((i & one) == 0)
res++;
else
return res;
one = one >> 1;
}
return res;
}
void _start()
{
volatile char* tx = (volatile char*) 0x40002000;
const char *str1 = "clz value of ";
const char *str2 = " is ";
const char *input = "0x0fffffff";
unsigned int *p, res = clz(0x0fffffff);
while (*str1) {
*tx = *str1;
str1++;
}
while (*input) {
*tx = *input;
input++;
}
while (*str2) {
*tx = *str2;
str2++;
}
p = &res;
*tx = *p + '0';
}
without optimization
Run
$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -nostdlib clz.c -o clz
$ ./emu-rv32i clz
clz value of 0x0fffffff is 4
>>> Execution time: 43216 ns
>>> Instruction count: 407 (IPS=9417808)
>>> Jumps: 45 (11.06%) - 11 forwards, 34 backwards
>>> Branching T=33 (82.50%) F=7 (17.50%)
$ riscv-none-embed-objdump -d clz
clz: file format elf32-littleriscv
Disassembly of section .text:
00010054 <clz>:
10054: fd010113 addi sp,sp,-48
10058: 02812623 sw s0,44(sp)
1005c: 03010413 addi s0,sp,48
10060: fca42e23 sw a0,-36(s0)
10064: 800007b7 lui a5,0x80000
10068: fef42623 sw a5,-20(s0)
1006c: fe042423 sw zero,-24(s0)
10070: fe042223 sw zero,-28(s0)
10074: 0440006f j 100b8 <clz+0x64>
10078: fdc42703 lw a4,-36(s0)
1007c: fec42783 lw a5,-20(s0)
10080: 00f777b3 and a5,a4,a5
10084: 00079a63 bnez a5,10098 <clz+0x44>
10088: fe842783 lw a5,-24(s0)
1008c: 00178793 addi a5,a5,1 # 80000001 <__global_pointer$+0x7ffee5fa>
10090: fef42423 sw a5,-24(s0)
10094: 00c0006f j 100a0 <clz+0x4c>
10098: fe842783 lw a5,-24(s0)
1009c: 02c0006f j 100c8 <clz+0x74>
100a0: fec42783 lw a5,-20(s0)
100a4: 0017d793 srli a5,a5,0x1
100a8: fef42623 sw a5,-20(s0)
100ac: fe442783 lw a5,-28(s0)
100b0: 00178793 addi a5,a5,1
100b4: fef42223 sw a5,-28(s0)
100b8: fe442703 lw a4,-28(s0)
100bc: 01f00793 li a5,31
100c0: fae7dce3 bge a5,a4,10078 <clz+0x24>
100c4: fe842783 lw a5,-24(s0)
100c8: 00078513 mv a0,a5
100cc: 02c12403 lw s0,44(sp)
100d0: 03010113 addi sp,sp,48
100d4: 00008067 ret
000100d8 <_start>:
100d8: fd010113 addi sp,sp,-48
100dc: 02112623 sw ra,44(sp)
100e0: 02812423 sw s0,40(sp)
100e4: 03010413 addi s0,sp,48
100e8: 400027b7 lui a5,0x40002
100ec: fef42023 sw a5,-32(s0)
100f0: 000107b7 lui a5,0x10
100f4: 1e478793 addi a5,a5,484 # 101e4 <_start+0x10c>
100f8: fef42623 sw a5,-20(s0)
100fc: 000107b7 lui a5,0x10
10100: 1f478793 addi a5,a5,500 # 101f4 <_start+0x11c>
10104: fef42423 sw a5,-24(s0)
10108: 000107b7 lui a5,0x10
1010c: 1fc78793 addi a5,a5,508 # 101fc <_start+0x124>
10110: fef42223 sw a5,-28(s0)
10114: 100007b7 lui a5,0x10000
10118: fff78513 addi a0,a5,-1 # fffffff <__global_pointer$+0xffee5f8>
1011c: f39ff0ef jal ra,10054 <clz>
10120: 00050793 mv a5,a0
10124: fcf42c23 sw a5,-40(s0)
10128: 0200006f j 10148 <_start+0x70>
1012c: fec42783 lw a5,-20(s0)
10130: 0007c703 lbu a4,0(a5)
10134: fe042783 lw a5,-32(s0)
10138: 00e78023 sb a4,0(a5)
1013c: fec42783 lw a5,-20(s0)
10140: 00178793 addi a5,a5,1
10144: fef42623 sw a5,-20(s0)
10148: fec42783 lw a5,-20(s0)
1014c: 0007c783 lbu a5,0(a5)
10150: fc079ee3 bnez a5,1012c <_start+0x54>
10154: 0200006f j 10174 <_start+0x9c>
10158: fe442783 lw a5,-28(s0)
1015c: 0007c703 lbu a4,0(a5)
10160: fe042783 lw a5,-32(s0)
10164: 00e78023 sb a4,0(a5)
10168: fe442783 lw a5,-28(s0)
1016c: 00178793 addi a5,a5,1
10170: fef42223 sw a5,-28(s0)
10174: fe442783 lw a5,-28(s0)
10178: 0007c783 lbu a5,0(a5)
1017c: fc079ee3 bnez a5,10158 <_start+0x80>
10180: 0200006f j 101a0 <_start+0xc8>
10184: fe842783 lw a5,-24(s0)
10188: 0007c703 lbu a4,0(a5)
1018c: fe042783 lw a5,-32(s0)
10190: 00e78023 sb a4,0(a5)
10194: fe842783 lw a5,-24(s0)
10198: 00178793 addi a5,a5,1
1019c: fef42423 sw a5,-24(s0)
101a0: fe842783 lw a5,-24(s0)
101a4: 0007c783 lbu a5,0(a5)
101a8: fc079ee3 bnez a5,10184 <_start+0xac>
101ac: fd840793 addi a5,s0,-40
101b0: fcf42e23 sw a5,-36(s0)
101b4: fdc42783 lw a5,-36(s0)
101b8: 0007a783 lw a5,0(a5)
101bc: 0ff7f793 andi a5,a5,255
101c0: 03078793 addi a5,a5,48
101c4: 0ff7f713 andi a4,a5,255
101c8: fe042783 lw a5,-32(s0)
101cc: 00e78023 sb a4,0(a5)
101d0: 00000013 nop
101d4: 02c12083 lw ra,44(sp)
101d8: 02812403 lw s0,40(sp)
101dc: 03010113 addi sp,sp,48
101e0: 00008067 ret
Instruction State
Instructions Stat:
LUI = 6
JAL = 10
JALR = 2
BNE = 35
BGE = 5
LW = 145
LBU = 57
SB = 28
SW = 52
ADDI = 55
ANDI = 2
SRLI = 4
AND = 5
LI* = 6
Five Most Frequent:
1) LW = 145 (35.63%)
2) LBU = 57 (14.00%)
3) ADDI = 55 (13.51%)
4) SW = 52 (12.78%)
5) BNE = 35 (8.60%)
Readelf
$ riscv-none-embed-readelf -h clz
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x100d8
Start of program headers: 52 (bytes into file)
Start of section headers: 944 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 1
Size of section headers: 40 (bytes)
Number of section headers: 7
Section header string table index: 6
Size
$ riscv-none-embed-size clz
text data bss dec hex filename
435 0 0 435 1b3 clz
with O3 optimization
Run
$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -O3 -nostdlib clz.c -o clz
$ ./emu-rv32i clz
clz value of 0x0fffffff is 4
>>> Execution time: 17247 ns
>>> Instruction count: 150 (IPS=8697164)
>>> Jumps: 28 (18.67%) - 0 forwards, 28 backwards
>>> Branching T=27 (77.14%) F=8 (22.86%)
clz: file format elf32-littleriscv
Disassembly of section .text:
00010054 <clz>:
10054: 00000793 li a5,0
10058: 80000737 lui a4,0x80000
1005c: 02000613 li a2,32
10060: 0100006f j 10070 <clz+0x1c>
10064: 00178793 addi a5,a5,1
10068: 00175713 srli a4,a4,0x1
1006c: 00c78663 beq a5,a2,10078 <clz+0x24>
10070: 00e576b3 and a3,a0,a4
10074: fe0688e3 beqz a3,10064 <clz+0x10>
10078: 00078513 mv a0,a5
1007c: 00008067 ret
00010080 <_start>:
10080: 10000637 lui a2,0x10000
10084: 00000793 li a5,0
10088: 80000737 lui a4,0x80000
1008c: 02000593 li a1,32
10090: fff60613 addi a2,a2,-1 # fffffff <__global_pointer$+0xffee6c3>
10094: 00175713 srli a4,a4,0x1
10098: 00178793 addi a5,a5,1
1009c: 00c776b3 and a3,a4,a2
100a0: 00b78463 beq a5,a1,100a8 <_start+0x28>
100a4: fe0688e3 beqz a3,10094 <_start+0x14>
100a8: 00010737 lui a4,0x10
100ac: 11870713 addi a4,a4,280 # 10118 <_start+0x98>
100b0: 06300693 li a3,99
100b4: 40002637 lui a2,0x40002
100b8: 00d60023 sb a3,0(a2) # 40002000 <__global_pointer$+0x3fff06c4>
100bc: 00170713 addi a4,a4,1
100c0: 00074683 lbu a3,0(a4)
100c4: fe069ae3 bnez a3,100b8 <_start+0x38>
100c8: 00010737 lui a4,0x10
100cc: 12870713 addi a4,a4,296 # 10128 <_start+0xa8>
100d0: 03000693 li a3,48
100d4: 40002637 lui a2,0x40002
100d8: 00d60023 sb a3,0(a2) # 40002000 <__global_pointer$+0x3fff06c4>
100dc: 00170713 addi a4,a4,1
100e0: 00074683 lbu a3,0(a4)
100e4: fe069ae3 bnez a3,100d8 <_start+0x58>
100e8: 00010737 lui a4,0x10
100ec: 13470713 addi a4,a4,308 # 10134 <_start+0xb4>
100f0: 02000693 li a3,32
100f4: 40002637 lui a2,0x40002
100f8: 00d60023 sb a3,0(a2) # 40002000 <__global_pointer$+0x3fff06c4>
100fc: 00170713 addi a4,a4,1
10100: 00074683 lbu a3,0(a4)
10104: fe069ae3 bnez a3,100f8 <_start+0x78>
10108: 03078793 addi a5,a5,48
1010c: 0ff7f793 andi a5,a5,255
10110: 00f60023 sb a5,0(a2)
10114: 00008067 ret
Instruction State
Instructions Stat:
LUI = 8
JALR = 1
BEQ = 8
BNE = 27
LBU = 27
SB = 28
ADDI = 41
ANDI = 1
SRLI = 4
AND = 4
LI* = 5
Five Most Frequent:
1) ADDI = 41 (27.33%)
2) SB = 28 (18.67%)
3) BNE = 27 (18.00%)
4) LBU = 27 (18.00%)
5) LUI = 8 (5.33%)
Readelf
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x10080
Start of program headers: 52 (bytes into file)
Start of section headers: 740 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 1
Size of section headers: 40 (bytes)
Number of section headers: 7
Section header string table index: 6
Size
text data bss dec hex filename
232 0 0 232 e8 clz
with Os optimization
Run
$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -Os -nostdlib clz.c -o clz
$ ./emu-rv32i clz
clz value of 0x0fffffff is 4
>>> Execution time: 17117 ns
>>> Instruction count: 190 (IPS=11100075)
>>> Jumps: 62 (32.63%) - 29 forwards, 33 backwards
>>> Branching T=32 (82.05%) F=7 (17.95%)
clz: file format elf32-littleriscv
Disassembly of section .text:
00010054 <clz>:
10054: 00000793 li a5,0
10058: 80000737 lui a4,0x80000
1005c: 02000693 li a3,32
10060: 00e57633 and a2,a0,a4
10064: 00061863 bnez a2,10074 <clz+0x20>
10068: 00178793 addi a5,a5,1
1006c: 00175713 srli a4,a4,0x1
10070: fed798e3 bne a5,a3,10060 <clz+0xc>
10074: 00078513 mv a0,a5
10078: 00008067 ret
0001007c <_start>:
1007c: 10000537 lui a0,0x10000
10080: ff010113 addi sp,sp,-16
10084: fff50513 addi a0,a0,-1 # fffffff <__global_pointer$+0xffee6cf>
10088: 00112623 sw ra,12(sp)
1008c: fc9ff0ef jal ra,10054 <clz>
10090: 000107b7 lui a5,0x10
10094: 10c78793 addi a5,a5,268 # 1010c <_start+0x90>
10098: 400026b7 lui a3,0x40002
1009c: 0007c703 lbu a4,0(a5)
100a0: 04071463 bnez a4,100e8 <_start+0x6c>
100a4: 000107b7 lui a5,0x10
100a8: 11c78793 addi a5,a5,284 # 1011c <_start+0xa0>
100ac: 400026b7 lui a3,0x40002
100b0: 0007c703 lbu a4,0(a5)
100b4: 04071063 bnez a4,100f4 <_start+0x78>
100b8: 000107b7 lui a5,0x10
100bc: 12878793 addi a5,a5,296 # 10128 <_start+0xac>
100c0: 400026b7 lui a3,0x40002
100c4: 0007c703 lbu a4,0(a5)
100c8: 02071c63 bnez a4,10100 <_start+0x84>
100cc: 03050513 addi a0,a0,48
100d0: 0ff57513 andi a0,a0,255
100d4: 400027b7 lui a5,0x40002
100d8: 00a78023 sb a0,0(a5) # 40002000 <__global_pointer$+0x3fff06d0>
100dc: 00c12083 lw ra,12(sp)
100e0: 01010113 addi sp,sp,16
100e4: 00008067 ret
100e8: 00e68023 sb a4,0(a3) # 40002000 <__global_pointer$+0x3fff06d0>
100ec: 00178793 addi a5,a5,1
100f0: fadff06f j 1009c <_start+0x20>
100f4: 00e68023 sb a4,0(a3)
100f8: 00178793 addi a5,a5,1
100fc: fb5ff06f j 100b0 <_start+0x34>
10100: 00e68023 sb a4,0(a3)
10104: 00178793 addi a5,a5,1
10108: fbdff06f j 100c4 <_start+0x48>
Instruction State
Instructions Stat:
LUI = 9
JAL = 28
JALR = 2
BNE = 39
LW = 1
LBU = 30
SB = 28
SW = 1
ADDI = 41
ANDI = 1
SRLI = 4
AND = 5
LI* = 2
Five Most Frequent:
1) ADDI = 41 (21.58%)
2) BNE = 39 (20.53%)
3) LBU = 30 (15.79%)
4) JAL = 28 (14.74%)
5) SB = 28 (14.74%)
Readelf
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x1007c
Start of program headers: 52 (bytes into file)
Start of section headers: 728 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 1
Size of section headers: 40 (bytes)
Number of section headers: 7
Section header string table index: 6
Size
text data bss dec hex filename
220 0 0 220 dc clz
O0 | O3 | Os | |
---|---|---|---|
Execution time | 43216 ns | 17247 ns | 17117 ns |
Instruction count | 407 | 150 | 190 |
Jumps | 45 (11.06%) | 28 (18.67%) | 62 (32.63%) |
Jumps forwards | 11 | 0 | 29 |
Jumps backwards | 34 | 28 | 33 |
Branching True | 33 (82.50%) | 27 (77.14%) | 32 (82.05%) |
Branching False | 7 (17.50%) | 8 (22.86%) | 7 (17.95%) |
contributed by < shauming1020 > 待釐清的議題 quiz8 測驗 1 [x] 狀態壓縮、Dynamic Programming [x] 理解遞迴版本 recursive-nos1.c [x] 說明 interative-nos1.c 和 interative-nos2.c 之間的差異,為何 interative-nos2.c 執行時間較短呢 ? [ ] 嘗試實作不同上述的程式碼 (限制為 C99/C11 + GNU extensions),應比較遞迴和非遞迴的形式在效能的落差,並分析時間與空間複雜度
Jan 19, 2021contributed by < shauming1020 > Requirements [x] Ensure signature matched with the requirements described in RISC-V Compliance Tests. [x] Check the generated VCD file and use GTKwave to view the waveform. [x] Explain how your program is executed along with Reindeer Simulation. [x] Summarize how RISC-V Compliance Tests works and why the signature should be matched. [ ] Explain how Reindeer works with Verilator. [x] What is 2 x 2 Pipeline? How can we benefit from such pipeline design?
Dec 3, 2020contributed by < shauming1020 > 測驗 1 #include <stdio.h> #include <stdlib.h> double divop(double orig, int slots) { if (slots == 1 || orig == 0) return orig; int od = slots & 1;
Oct 31, 2020Bubble Sort contributed by 王傑世 (https://hackmd.io/4-oWQOprRnCLu3ZeJy31kA?view) :::spoiler Assembly code .data arr: .word 2, 3, 7, 4, 1 .text main: la s0, arr
Oct 26, 2020or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up