riscv
contributed by 王傑世
(https://hackmd.io/4-oWQOprRnCLu3ZeJy31kA?view)
.data
arr: .word 2, 3, 7, 4, 1
.text
main:
la s0, arr
mv t3, s0
addi s1, s1, 4
addi t0, zero, -1
jal ra, bbsort
mv t0, zero
addi s1, s1, 1
mv s0, t3
j print
bbsort:
addi sp, sp, -12
sw ra, 8(sp)
sw s1, 4(sp)
sw s0, 0(sp)
oloop:
addi t0, t0, 1
mv t1, zero
sub t2, s1, t0
blt t0, s1, iloop
addi sp, sp, 12
jr ra
iloop:
mv s0, t3
bge t1, t2, oloop
slli t4, t1, 2
add s0, s0, t4
lw a2, 0(s0)
lw a3, 4(s0)
addi t1, t1, 1
blt a2, a3, swap
j iloop
swap:
sw a2, 4(s0)
sw a3, 0(s0)
j iloop
print:
bge t0, s1, end
lw a0, 0(s0)
li a7, 1
ecall
addi t0, t0, 1
addi s0, s0, 4
j print
end:
void BBSort(int *arr, int size) {
for (int i = 0; i < size; ++i)
for (int j = 0; j < size - i; ++j)
if (arr[j] < arr[j + 1]) {
int temp = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = temp;
}
}
int main()
{
int arr[] = {2, 3, 7, 4, 1};
int size = 5;
BBSort(arr, size - 1);
for (int i = 0; i < size; ++i) {
printf("%d\n", arr[i]);
}
return 0;
}
void BBSort(int *arr, int size) {
for (int i = 0; i < size; ++i)
for (int j = 0; j < size - i; ++j)
if (arr[j] < arr[j + 1]) {
int temp = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = temp;
}
}
void _start()
{
volatile char* tx = (volatile char*) 0x40002000;
int *p, size = 5, arr[5];
arr[0] = 2;
arr[1] = 3;
arr[2] = 7;
arr[3] = 4;
arr[4] = 1;
BBSort(arr, size - 1);
p = arr;
while (size--) {
*tx = *p++ + '0';
}
}
memcpy
when using Os optimization
void _start()
{
volatile char* tx = (volatile char*) 0x40002000;
int *p;
int arr[] = {2, 3, 7, 4, 1}, size = 5;
BBSort(arr, size - 1);
p = arr;
while (size--) {
*tx = *p++ + '0';
}
}
$riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -Os -nostdlib bbsort.c -o bbsort
/home/dcmc/riscv-none-embed-gcc/8.2.0-3.1/bin/../lib/gcc/riscv-none-embed/8.2.0/../../../../riscv-none-embed/bin/ld: /tmp/ccsfBVO3.o: in function `_start':
bbsort.c:(.text+0x58): undefined reference to `memcpy'
collect2: error: ld returned 1 exit status
without optimization
Run
$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -nostdlib bbsort.c -o bbsort_pure
$ ./emu-rv32i bbsort_pure
74321
>>> Execution time: 20031 ns
>>> Instruction count: 488 (IPS=24362238)
>>> Jumps: 33 (6.76%) - 12 forwards, 21 backwards
>>> Branching T=24 (68.57%) F=11 (31.43%)
$ riscv-none-embed-objdump -d bbsort_pure
bbsort_pure: file format elf32-littleriscv
Disassembly of section .text:
00010054 <BBSort>:
10054: fd010113 addi sp,sp,-48
10058: 02812623 sw s0,44(sp)
1005c: 03010413 addi s0,sp,48
10060: fca42e23 sw a0,-36(s0)
10064: fcb42c23 sw a1,-40(s0)
10068: fe042623 sw zero,-20(s0)
1006c: 0c80006f j 10134 <BBSort+0xe0>
10070: fe042423 sw zero,-24(s0)
10074: 0a00006f j 10114 <BBSort+0xc0>
10078: fe842783 lw a5,-24(s0)
1007c: 00279793 slli a5,a5,0x2
10080: fdc42703 lw a4,-36(s0)
10084: 00f707b3 add a5,a4,a5
10088: 0007a703 lw a4,0(a5)
1008c: fe842783 lw a5,-24(s0)
10090: 00178793 addi a5,a5,1
10094: 00279793 slli a5,a5,0x2
10098: fdc42683 lw a3,-36(s0)
1009c: 00f687b3 add a5,a3,a5
100a0: 0007a783 lw a5,0(a5)
100a4: 06f75263 bge a4,a5,10108 <BBSort+0xb4>
100a8: fe842783 lw a5,-24(s0)
100ac: 00279793 slli a5,a5,0x2
100b0: fdc42703 lw a4,-36(s0)
100b4: 00f707b3 add a5,a4,a5
100b8: 0007a783 lw a5,0(a5)
100bc: fef42223 sw a5,-28(s0)
100c0: fe842783 lw a5,-24(s0)
100c4: 00178793 addi a5,a5,1
100c8: 00279793 slli a5,a5,0x2
100cc: fdc42703 lw a4,-36(s0)
100d0: 00f70733 add a4,a4,a5
100d4: fe842783 lw a5,-24(s0)
100d8: 00279793 slli a5,a5,0x2
100dc: fdc42683 lw a3,-36(s0)
100e0: 00f687b3 add a5,a3,a5
100e4: 00072703 lw a4,0(a4)
100e8: 00e7a023 sw a4,0(a5)
100ec: fe842783 lw a5,-24(s0)
100f0: 00178793 addi a5,a5,1
100f4: 00279793 slli a5,a5,0x2
100f8: fdc42703 lw a4,-36(s0)
100fc: 00f707b3 add a5,a4,a5
10100: fe442703 lw a4,-28(s0)
10104: 00e7a023 sw a4,0(a5)
10108: fe842783 lw a5,-24(s0)
1010c: 00178793 addi a5,a5,1
10110: fef42423 sw a5,-24(s0)
10114: fd842703 lw a4,-40(s0)
10118: fec42783 lw a5,-20(s0)
1011c: 40f707b3 sub a5,a4,a5
10120: fe842703 lw a4,-24(s0)
10124: f4f74ae3 blt a4,a5,10078 <BBSort+0x24>
10128: fec42783 lw a5,-20(s0)
1012c: 00178793 addi a5,a5,1
10130: fef42623 sw a5,-20(s0)
10134: fec42703 lw a4,-20(s0)
10138: fd842783 lw a5,-40(s0)
1013c: f2f74ae3 blt a4,a5,10070 <BBSort+0x1c>
10140: 00000013 nop
10144: 02c12403 lw s0,44(sp)
10148: 03010113 addi sp,sp,48
1014c: 00008067 ret
00010150 <_start>:
10150: fd010113 addi sp,sp,-48
10154: 02112623 sw ra,44(sp)
10158: 02812423 sw s0,40(sp)
1015c: 03010413 addi s0,sp,48
10160: 400027b7 lui a5,0x40002
10164: fef42223 sw a5,-28(s0)
10168: 00500793 li a5,5
1016c: fef42423 sw a5,-24(s0)
10170: 00200793 li a5,2
10174: fcf42823 sw a5,-48(s0)
10178: 00300793 li a5,3
1017c: fcf42a23 sw a5,-44(s0)
10180: 00700793 li a5,7
10184: fcf42c23 sw a5,-40(s0)
10188: 00400793 li a5,4
1018c: fcf42e23 sw a5,-36(s0)
10190: 00100793 li a5,1
10194: fef42023 sw a5,-32(s0)
10198: fe842783 lw a5,-24(s0)
1019c: fff78713 addi a4,a5,-1 # 40001fff <__global_pointer$+0x3fff05fb>
101a0: fd040793 addi a5,s0,-48
101a4: 00070593 mv a1,a4
101a8: 00078513 mv a0,a5
101ac: ea9ff0ef jal ra,10054 <BBSort>
101b0: fd040793 addi a5,s0,-48
101b4: fef42623 sw a5,-20(s0)
101b8: 0280006f j 101e0 <_start+0x90>
101bc: fec42783 lw a5,-20(s0)
101c0: 00478713 addi a4,a5,4
101c4: fee42623 sw a4,-20(s0)
101c8: 0007a783 lw a5,0(a5)
101cc: 0ff7f793 andi a5,a5,255
101d0: 03078793 addi a5,a5,48
101d4: 0ff7f713 andi a4,a5,255
101d8: fe442783 lw a5,-28(s0)
101dc: 00e78023 sb a4,0(a5)
101e0: fe842783 lw a5,-24(s0)
101e4: fff78713 addi a4,a5,-1
101e8: fee42423 sw a4,-24(s0)
101ec: fc0798e3 bnez a5,101bc <_start+0x6c>
101f0: 00000013 nop
101f4: 02c12083 lw ra,44(sp)
101f8: 02812403 lw s0,40(sp)
101fc: 03010113 addi sp,sp,48
10200: 00008067 ret
Instruction State
Instructions Stat:
LUI = 1
JAL = 7
JALR = 2
BNE = 6
BLT = 19
BGE = 10
LW = 206
SB = 5
SW = 58
ADDI = 69
ANDI = 10
SLLI = 40
ADD = 40
SUB = 14
LI* = 8
Five Most Frequent:
1) LW = 206 (42.21%)
2) ADDI = 69 (14.14%)
3) SW = 58 (11.89%)
4) SLLI = 40 (8.20%)
5) ADD = 40 (8.20%)
Readelf
$ riscv-none-embed-readelf -h bbsort_pure
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x10150
Start of program headers: 52 (bytes into file)
Start of section headers: 920 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 1
Size of section headers: 40 (bytes)
Number of section headers: 6
Section header string table index: 5
Size
$ riscv-none-embed-size bbsort_pure
text data bss dec hex filename
432 0 0 432 1b0 bbsort_pure
with O3 optimization
Run
$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -O3 -nostdlib bbsort.c -o bbsort_O3
$ ./emu-rv32i bbsort_O3
74321
>>> Execution time: 7310 ns
>>> Instruction count: 50 (IPS=6839945)
>>> Jumps: 4 (8.00%) - 3 forwards, 1 backwards
>>> Branching T=3 (50.00%) F=3 (50.00%)
$ riscv-none-embed-objdump -d bbsort_O3
bbsort_O3: file format elf32-littleriscv
Disassembly of section .text:
00010054 <BBSort>:
10054: 02b05a63 blez a1,10088 <BBSort+0x34>
10058: 00259613 slli a2,a1,0x2
1005c: 00c50633 add a2,a0,a2
10060: 00050793 mv a5,a0
10064: 0007a703 lw a4,0(a5)
10068: 0047a683 lw a3,4(a5)
1006c: 00d75663 bge a4,a3,10078 <BBSort+0x24>
10070: 00d7a023 sw a3,0(a5)
10074: 00e7a223 sw a4,4(a5)
10078: 00478793 addi a5,a5,4
1007c: fec794e3 bne a5,a2,10064 <BBSort+0x10>
10080: ffc60613 addi a2,a2,-4
10084: fcc51ee3 bne a0,a2,10060 <BBSort+0xc>
10088: 00008067 ret
0001008c <_start>:
1008c: fe010113 addi sp,sp,-32
10090: 00100713 li a4,1
10094: 00e12e23 sw a4,28(sp)
10098: 00400713 li a4,4
1009c: 00700793 li a5,7
100a0: 00e12a23 sw a4,20(sp)
100a4: 00200713 li a4,2
100a8: 00e12c23 sw a4,24(sp)
100ac: 00f12823 sw a5,16(sp)
100b0: 00300713 li a4,3
100b4: 00400693 li a3,4
100b8: 00200613 li a2,2
100bc: 00f75a63 bge a4,a5,100d0 <_start+0x44>
100c0: 00070593 mv a1,a4
100c4: 00e12823 sw a4,16(sp)
100c8: 00078713 mv a4,a5
100cc: 00058793 mv a5,a1
100d0: 00d7dc63 bge a5,a3,100e8 <_start+0x5c>
100d4: 00068593 mv a1,a3
100d8: 00d12823 sw a3,16(sp)
100dc: 00f12a23 sw a5,20(sp)
100e0: 00078693 mv a3,a5
100e4: 00058793 mv a5,a1
100e8: 08c6c663 blt a3,a2,10174 <_start+0xe8>
100ec: 00f75a63 bge a4,a5,10100 <_start+0x74>
100f0: 00070613 mv a2,a4
100f4: 00e12823 sw a4,16(sp)
100f8: 00078713 mv a4,a5
100fc: 00060793 mv a5,a2
10100: 00d7d863 bge a5,a3,10110 <_start+0x84>
10104: 00f12a23 sw a5,20(sp)
10108: 00d12823 sw a3,16(sp)
1010c: 00068793 mv a5,a3
10110: 00f75663 bge a4,a5,1011c <_start+0x90>
10114: 00e12823 sw a4,16(sp)
10118: 00078713 mv a4,a5
1011c: 03070713 addi a4,a4,48
10120: 400027b7 lui a5,0x40002
10124: 0ff77713 andi a4,a4,255
10128: 00e78023 sb a4,0(a5) # 40002000 <__global_pointer$+0x3fff067c>
1012c: 01012703 lw a4,16(sp)
10130: 03070713 addi a4,a4,48
10134: 0ff77713 andi a4,a4,255
10138: 00e78023 sb a4,0(a5)
1013c: 01412703 lw a4,20(sp)
10140: 03070713 addi a4,a4,48
10144: 0ff77713 andi a4,a4,255
10148: 00e78023 sb a4,0(a5)
1014c: 01812703 lw a4,24(sp)
10150: 03070713 addi a4,a4,48
10154: 0ff77713 andi a4,a4,255
10158: 00e78023 sb a4,0(a5)
1015c: 01c12703 lw a4,28(sp)
10160: 03070713 addi a4,a4,48
10164: 0ff77713 andi a4,a4,255
10168: 00e78023 sb a4,0(a5)
1016c: 02010113 addi sp,sp,32
10170: 00008067 ret
10174: 00d12c23 sw a3,24(sp)
10178: 00c12a23 sw a2,20(sp)
1017c: 00060693 mv a3,a2
10180: f6dff06f j 100ec <_start+0x60>
Instruction State
Instructions Stat:
LUI = 1
JALR = 1
BLT = 1
BGE = 5
LW = 4
SB = 5
SW = 7
ADDI = 20
ANDI = 5
LI* = 7
Five Most Frequent:
1) ADDI = 20 (40.00%)
2) SW = 7 (14.00%)
3) LI* = 7 (14.00%)
4) BGE = 5 (10.00%)
5) SB = 5 (10.00%)
Readelf
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x1008c
Start of program headers: 52 (bytes into file)
Start of section headers: 792 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 1
Size of section headers: 40 (bytes)
Number of section headers: 6
Section header string table index: 5
Size
text data bss dec hex filename
304 0 0 304 130 bbsort_O3
with Os optimization
Run
$ riscv-none-embed-gcc -march=rv32i -mabi=ilp32 -O3 -nostdlib bbsort.c -o bbsort_Os
$ ./emu-rv32i bbsort_Os
74321
>>> Execution time: 9482 ns
>>> Instruction count: 160 (IPS=16874077)
>>> Jumps: 26 (16.25%) - 10 forwards, 16 backwards
>>> Branching T=19 (63.33%) F=11 (36.67%)
$ riscv-none-embed-objdump -d bbsort_Os
bbsort_Os: file format elf32-littleriscv
Disassembly of section .text:
00010054 <BBSort>:
10054: 00058713 mv a4,a1
10058: 40e587b3 sub a5,a1,a4
1005c: 00b7c463 blt a5,a1,10064 <BBSort+0x10>
10060: 00008067 ret
10064: 00050793 mv a5,a0
10068: 00000693 li a3,0
1006c: 0007a603 lw a2,0(a5)
10070: 0047a803 lw a6,4(a5)
10074: 01065663 bge a2,a6,10080 <BBSort+0x2c>
10078: 0107a023 sw a6,0(a5)
1007c: 00c7a223 sw a2,4(a5)
10080: 00168693 addi a3,a3,1
10084: 00478793 addi a5,a5,4
10088: fee6c2e3 blt a3,a4,1006c <BBSort+0x18>
1008c: fff70713 addi a4,a4,-1
10090: fc9ff06f j 10058 <BBSort+0x4>
00010094 <_start>:
10094: fd010113 addi sp,sp,-48
10098: 00200793 li a5,2
1009c: 00f12623 sw a5,12(sp)
100a0: 00300793 li a5,3
100a4: 00f12823 sw a5,16(sp)
100a8: 00700793 li a5,7
100ac: 00f12a23 sw a5,20(sp)
100b0: 00400793 li a5,4
100b4: 00f12c23 sw a5,24(sp)
100b8: 00400593 li a1,4
100bc: 00100793 li a5,1
100c0: 00c10513 addi a0,sp,12
100c4: 02112623 sw ra,44(sp)
100c8: 00f12e23 sw a5,28(sp)
100cc: f89ff0ef jal ra,10054 <BBSort>
100d0: 00000713 li a4,0
100d4: 40002637 lui a2,0x40002
100d8: 01400693 li a3,20
100dc: 00c10793 addi a5,sp,12
100e0: 00e787b3 add a5,a5,a4
100e4: 0007a783 lw a5,0(a5)
100e8: 00470713 addi a4,a4,4
100ec: 03078793 addi a5,a5,48
100f0: 0ff7f793 andi a5,a5,255
100f4: 00f60023 sb a5,0(a2) # 40002000 <__global_pointer$+0x3fff06f8>
100f8: fed712e3 bne a4,a3,100dc <_start+0x48>
100fc: 02c12083 lw ra,44(sp)
10100: 03010113 addi sp,sp,48
10104: 00008067 ret
Instruction State
Instructions Stat:
LUI = 1
JAL = 5
JALR = 2
BNE = 5
BLT = 15
BGE = 10
LW = 26
SB = 5
SW = 16
ADDI = 59
ANDI = 5
ADD = 5
SUB = 5
LI* = 12
Five Most Frequent:
1) ADDI = 59 (36.88%)
2) LW = 26 (16.25%)
3) SW = 16 (10.00%)
4) BLT = 15 (9.38%)
5) LI* = 12 (7.50%)
Readelf
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x10094
Start of program headers: 52 (bytes into file)
Start of section headers: 668 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 1
Size of section headers: 40 (bytes)
Number of section headers: 6
Section header string table index: 5
Size
text data bss dec hex filename
180 0 0 180 b4 bbsort_Os
O0 | O3 | Os | |
---|---|---|---|
Execution time | 20031 ns | 7310 ns | 9482 ns |
Instruction count | 488 | 50 | 160 |
Jumps | 33 (6.76%) | 4 (8.00%) | 26 (16.25%) |
Jumps forwards | 12 | 3 | 10 |
Jumps backwards | 21 | 1 | 16 |
Branching True | 24 (68.57%) | 3 (50.00%) | 19 (63.33%) |
Branching False | 11 (31.43%) | 3 (50.00%) | 11 (36.67%) |
contributed by < shauming1020 > 待釐清的議題 quiz8 測驗 1 [x] 狀態壓縮、Dynamic Programming [x] 理解遞迴版本 recursive-nos1.c [x] 說明 interative-nos1.c 和 interative-nos2.c 之間的差異,為何 interative-nos2.c 執行時間較短呢 ? [ ] 嘗試實作不同上述的程式碼 (限制為 C99/C11 + GNU extensions),應比較遞迴和非遞迴的形式在效能的落差,並分析時間與空間複雜度
Jan 19, 2021contributed by < shauming1020 > Requirements [x] Ensure signature matched with the requirements described in RISC-V Compliance Tests. [x] Check the generated VCD file and use GTKwave to view the waveform. [x] Explain how your program is executed along with Reindeer Simulation. [x] Summarize how RISC-V Compliance Tests works and why the signature should be matched. [ ] Explain how Reindeer works with Verilator. [x] What is 2 x 2 Pipeline? How can we benefit from such pipeline design?
Dec 3, 2020contributed by < shauming1020 > 測驗 1 #include <stdio.h> #include <stdlib.h> double divop(double orig, int slots) { if (slots == 1 || orig == 0) return orig; int od = slots & 1;
Oct 31, 2020Count Leading Zero contributed by 鄭惟 (https://hackmd.io/@WeiCheng14159/rkUifs2Hw) :::spoiler Assembly Code .data input: .word 0x0000000f one: .word 0x80000000 str1: .string "clz value of " str2: .string " is "
Oct 26, 2020or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up