# Assignment 2 rv32emu Note ###### tags: `Computer Architecture` ## Resources [Lab2: RISC-V RV32I[MACF] emulator with ELF support](https://hackmd.io/@sysprog/SJAR5XMmi) [rv32emu](https://github.com/sysprog21/rv32emu) [github](https://github.com/chiangkd/Computer-Architecture) ## Original Code **Assembly - Original Solution by [曾晧峖](https://hackmd.io/@tseng0201/rkN-AlvMi)** ``` # RISC-V assembly program to print "Hello World!" to stdout. .org 0 # Provide program starting address to linker .global _start /* newlib system calls */ .set SYSEXIT, 93 .set SYSWRITE, 64 .data test_1_s: .string "anagram" test_1_t: .string "nagaram" test_2_s: .string "rat" test_2_t: .string "anagram" test_3_s: .string "tseng" test_3_t: .string "gnest" correct_1: .string "test_1: correct" not_correct_1: .string "test_1: not correct" correct_2: .string "test_2: correct" not_correct_2: .string "test_2: not correct" correct_3: .string "test_3: correct" not_correct_3: .string "test_3: not correct" .text main: addi a7, x0, 4 la a0, test_1_s # s(a0) = test_1_s la a1, test_1_t # t(a1) = test_1_t jal ra, isAnagram # call isAnagram(s(a0), t(a1)) bne a0, x0 TRUE_1 # if isAnagram(s(a0), t(a1)) == 1 correct la a0, not_correct_1 # not correct print error ecall TEST_2: la a0, test_2_s # s(a0) = test_2_s la a1, test_2_t # t(a1) = test_2_t jal ra isAnagram # call isAnagram(s(a0), t(a1)) beq a0, x0 TRUE_2 # if isAnagram(s(a0), t(a1)) == 0 correct la a0, not_correct_2 # not correct print error ecall TEST_3: la a0, test_3_s # s(a0) = test_3_s la a1, test_3_t # t(a1) = test_3_t jal ra isAnagram # call isAnagram(s(a0), t(a1)) bne a0, x0 TRUE_3 # if isAnagram(s(a0), t(a1)) == 0 correct la a0, not_correct_3 # not correct print error j END ecall TRUE_1: la a0, correct_1 # correct print correct ecall j TEST_2 # go to example2 TRUE_2: la a0, correct_2 # correct print correct ecall j TEST_3 # go to example3 TRUE_3: la a0, correct_3 # correct print correct ecall END: addi a7, x0, 10 ecall isAnagram: # a0 = s, a1 = t addi sp, sp, -104 # get sapce for store int letter_freq[26] addi t0, sp, 0 # t0 = letter_freq[0] addi t1, x0, 0 # t1 = i = 0 li t2, 26 LOOP1: # int letter_freq[26] = {0}; beq t1, t2 GET_FREQ_s # if i < 26 sw x0, 0(t0) # letter_freq[i] = 0; addi, t1, t1, 1 # i++; addi, t0, t0, 4 # j LOOP1 GET_FREQ_s: addi, t0, sp, 0 # t0 = letter_freq[0] addi, t1, a0, 0 # t1 = s addi, t2, x0, 0 # t2 = i = 0 LOOP2: # for( ;s[i] ;i++ ) add t3, t1, t2 # get address of s[index] from s[0] + lb t5, (0)t3 # t5 = s[i] beq, t5, x0, GET_FREQ_F # if s[i] ==0 break the loop addi t5, t5 -97 # t5 = s[i] - 'a' slli t5, t5, 2 # get offset form letter_freq[0] to letter_freq[s[i] - 'a'] add t5, t5, t0 # get address of letter_freq[s[i] - 'a'] lw t3, 0(t5) # t3 = [freq[s[i] - 'a']] addi t3, t3, 1 # t3 = [freq[s[i] - 'a']] + 1 sw t3, 0(t5) # [freq[s[i] - 'a']] = ([freq[s[i] - 'a']] + 1) addi t2, t2, 1 # i++ j LOOP2 GET_FREQ_F: addi, t0, sp, 0 # t0 = freq[] addi, t1, a1, 0 # t1 = t addi, t2, x0, 0 # t2 = i = 0 LOOP3: # for( ;s[i] ;i++ ) add t3, t1, t2 # get address of t[index] lb t5, (0)t3 # t5 = t[i] beq, t5, x0, CHECK # if t[i] == 0 break the loop addi t5, t5 -97 # t5 = t[i] - 'a' slli t5, t5, 2 # get offset form letter_freq[0] to letter_freq[t[i] - 'a'] add t5, t5, t0 lw t3, 0(t5) # t5 = [freq[t[i] - 'a']] addi t3, t3, -1 # t3 = [freq[t[i] - 'a']] - 1 sw t3, 0(t5) # [freq[t[i] - 'a']] = ([freq[t[i] - 'a']] - 1) addi t2, t2, 1 # i++ j LOOP3 CHECK: addi t0, sp, 0 # t0 = address freq[0] addi t1, x0, 0 # t1 = i = 0 li t2, 26 LOOP4: # for (int i = 0; i < 26; i++) beq t1, t2 TRUE # i < 26 lw t3, 0(t0) # t3 = freq[i]; bne t3, x0, FALSE # if freq[i] != 0 break addi, t1, t1, 1 # i++; addi, t0, t0, 4 # freq + 1 j LOOP4 FALSE: addi a0, x0, 0 # if flase return false j END_F TRUE: addi a0, x0, 01 # if pass check return true END_F: jr ra # return, a0 = return value = true or false ``` first when I `make` the program, I got the message below ``` riscv-none-elf-as -R -march=rv32i -mabi=ilp32 -o hw2.o hw2.S hw2.S: Assembler messages: hw2.S: Warning: end of file in comment; newline inserted hw2.S:34: Error: illegal operands `bne a0,x0 TRUE_1' hw2.S:40: Error: illegal operands `jal ra isAnagram' hw2.S:41: Error: illegal operands `beq a0,x0 TRUE_2' hw2.S:47: Error: illegal operands `jal ra isAnagram' hw2.S:48: Error: illegal operands `bne a0,x0 TRUE_3' hw2.S:75: Error: illegal operands `beq t1,t2 GET_FREQ_s' hw2.S:77: Error: unrecognized opcode `addi, t1,t1,1' hw2.S:78: Error: unrecognized opcode `addi, t0,t0,4' hw2.S:82: Error: unrecognized opcode `addi, t0,sp,0' hw2.S:83: Error: unrecognized opcode `addi, t1,a0,0' hw2.S:84: Error: unrecognized opcode `addi, t2,x0,0' hw2.S:87: Error: illegal operands `lb t5,(0)t3' hw2.S:88: Error: unrecognized opcode `beq, t5,x0,GET_FREQ_F' hw2.S:89: Error: illegal operands `addi t5,t5-97' hw2.S:98: Error: unrecognized opcode `addi, t0,sp,0' hw2.S:99: Error: unrecognized opcode `addi, t1,a1,0' hw2.S:100: Error: unrecognized opcode `addi, t2,x0,0' hw2.S:103: Error: illegal operands `lb t5,(0)t3' hw2.S:104: Error: unrecognized opcode `beq, t5,x0,CHECK' hw2.S:105: Error: illegal operands `addi t5,t5-97' hw2.S:118: Error: illegal operands `beq t1,t2 TRUE' hw2.S:121: Error: unrecognized opcode `addi, t1,t1,1' hw2.S:122: Error: unrecognized opcode `addi, t0,t0,4' make: *** [Makefile:9: hw2.o] Error 1 ``` - `bne a0, x0 TRUE_1` need to write as `bne a0, x0, TRUE_1` **Don't forget the comma** - `lb t5, (0)t3` need to write as `lb t5, 0(t3)` - `beq, t5, x0, GET_FREQ_F` need to wrie as `beq t5, x0, GET_FREQ_F` **remove the redundant comma** ## Problem ### Warning: end of file in comment ``` Warning: end of file in comment; newline inserted ``` - [No new line at end of file? What does this mean?](https://www.linuxquestions.org/questions/programming-9/no-new-line-at-end-of-file-what-does-this-mean-281830/) **Fix** ```diff= end: li a7, SYSEXIT # "exit" syscall add a0, x0, 0 # Use 0 return code ecall # invoke syscall to terminate the program + #add a blank line ``` ### Code bug to fix ``` # RISC-V assembly program to print "Hello World!" to stdout. .org 0 # Provide program starting address to linker .global _start /* newlib system calls */ .set SYSEXIT, 93 .set SYSWRITE, 64 .data test_1_s: .string "anagram" test_1_t: .string "nagaram" test_2_s: .string "rat" test_2_t: .string "anagram" test_3_s: .string "tseng" test_3_t: .string "gnest" correct_1: .string "test_1: correct\n" .set t1cor_size, .-correct_1 not_correct_1: .string "test_1: not correct\n" .set t1incor_size, .-not_correct_1 correct_2: .string "test_2: correct\n" .set t2cor_size, .-correct_2 not_correct_2: .string "test_2: not correct\n" .set t2incor_size, .-not_correct_2 correct_3: .string "test_3: correct\n" .set t3cor_size, .-correct_3 not_correct_3: .string "test_3: not correct\n" .set t3incor_size, .-not_correct_3 .text _start: li a7, SYSWRITE # "write" system call li a0, 1 # 1 = standard output (stdout) la a0, test_1_s # s(a0) = test_1_s la a1, test_1_t # t(a1) = test_1_t jal ra, isAnagram # call isAnagram(s(a0), t(a1)) bne a0, x0, TRUE_1 # if isAnagram(s(a0), t(a1)) == 1 correct la a1, not_correct_1 # test1 not correct address la a2, t1incor_size # test1 not correct length ecall TEST_2: la a0, test_2_s # s(a0) = test_2_s la a1, test_2_t # t(a1) = test_2_t jal ra, isAnagram # call isAnagram(s(a0), t(a1)) beqz a0, TRUE_2 # if isAnagram(s(a0), t(a1)) == 0 correct la a1, not_correct_2 # test2 not correct address la a2, t2incor_size # test1 not correct length ecall TEST_3: la a0, test_3_s # s(a0) = test_3_s la a1, test_3_t # t(a1) = test_3_t jal ra, isAnagram # call isAnagram(s(a0), t(a1)) bne a0, x0, TRUE_3 # if isAnagram(s(a0), t(a1)) == 1 correct la a1, not_correct_3 # test3 not correct address la a2, t3incor_size # test3 not correct length ecall j end TRUE_1: la a1, correct_1 # test1 correct address la a2, t1cor_size # test1 correct length ecall j TEST_2 # go to example2 TRUE_2: la a1, correct_2 # test2 correct address la a2, t2cor_size # test2 correct length ecall j TEST_3 # go to example3 TRUE_3: la a1, correct_3 # test2 correct address la a2, t3cor_size # test2 correct length ecall isAnagram: # a0 = s, a1 = t addi sp, sp, -104 # get sapce for store int letter_freq[26] addi t0, sp, 0 # t0 = letter_freq[0] addi t1, x0, 0 # t1 = i = 0 li t2, 26 LOOP1: # int letter_freq[26] = {0}; beq t1, t2, GET_FREQ_s # if i < 26 sw x0, 0(t0) # letter_freq[i] = 0; addi t1, t1, 1 # i++; addi t0, t0, 4 # j LOOP1 GET_FREQ_s: addi t0, sp, 0 # t0 = letter_freq[0] addi t1, a0, 0 # t1 = s addi t2, x0, 0 # t2 = i = 0 LOOP2: # for( ;s[i] ;i++ ) add t3, t1, t2 # get address of s[index] from s[0] + lb t5, 0(t3) # t5 = s[i] beq t5, x0, GET_FREQ_F # if s[i] ==0 break the loop addi t5, t5, -97 # t5 = s[i] - 'a' slli t5, t5, 2 # get offset form letter_freq[0] to letter_freq[s[i] - 'a'] add t5, t5, t0 # get address of letter_freq[s[i] - 'a'] lw t3, 0(t5) # t3 = [freq[s[i] - 'a']] addi t3, t3, 1 # t3 = [freq[s[i] - 'a']] + 1 sw t3, 0(t5) # [freq[s[i] - 'a']] = ([freq[s[i] - 'a']] + 1) addi t2, t2, 1 # i++ j LOOP2 GET_FREQ_F: addi t0, sp, 0 # t0 = freq[] addi t1, a1, 0 # t1 = t addi t2, x0, 0 # t2 = i = 0 LOOP3: # for( ;s[i] ;i++ ) add t3, t1, t2 # get address of t[index] lb t5, 0(t3) # t5 = t[i] beq t5, x0, CHECK # if t[i] == 0 break the loop addi t5, t5, -97 # t5 = t[i] - 'a' slli t5, t5, 2 # get offset form letter_freq[0] to letter_freq[t[i] - 'a'] add t5, t5, t0 lw t3, 0(t5) # t5 = [freq[t[i] - 'a']] addi t3, t3, -1 # t3 = [freq[t[i] - 'a']] - 1 sw t3, 0(t5) # [freq[t[i] - 'a']] = ([freq[t[i] - 'a']] - 1) addi t2, t2, 1 # i++ j LOOP3 CHECK: addi t0, sp, 0 # t0 = address freq[0] addi t1, x0, 0 # t1 = i = 0 li t2, 26 LOOP4: # for (int i = 0; i < 26; i++) beq t1, t2, TRUE # i < 26 lw t3, 0(t0) # t3 = freq[i]; bne t3, x0, FALSE # if freq[i] != 0 break addi t1, t1, 1 # i++; addi t0, t0, 4 # freq + 1 j LOOP4 FALSE: addi a0, x0, 0 # if flase return false j END_F TRUE: addi a0, x0, 1 # if pass check return true END_F: jr ra # return, a0 = return value = true or false end: li a7, SYSEXIT # "exit" syscall add a0, x0, 0 # Use 0 return code ecall # invoke syscall to terminate the program ``` :::warning **Problem:** In ripes, the assembler code can work succesfully, but in rv32emu, **test_2** can't not show the result. when I set the `test_2_t` with the same length `test_2_s`, it works!, I am still try to find the problem. **Fix it:** Finally find out that In Ripes, system call `PrintString` only need `a0` for string address and `a7` for system call index. But in rv32emu, system call need`a0` to handle [file descriptor](https://www.computerhope.com/jargon/f/file-descriptor.htm#std)(gives **1** for stdout in my case),`a1` for string address, `a2` for string length, mentioned in [docs/syscall.md](https://github.com/sysprog21/rv32emu/blob/master/docs/syscall.md). However, after jump to `isAnagram`, `a0` was overwriten to the return **True** of **False**, so if the original string not match to specific stirng (not an **Anagram**), the `a0` will be `0` and the associate `ecall` instruction will see it as **stdin**, which causes this problem. ::: For three testcases, expected that - testcase 1 return **True** - testcase 2 return **False** - testcase 3 return **True** after calling `isAnagram` function. So, it's necesserily to add `li a0, 1` when return value is **False** (means `0`) ```diff TEST_1: addi sp, sp, -4 sw ra, 0(sp) la a0, test_1_s # s(a0) = test_1_s la a1, test_1_t # t(a1) = test_1_t jal ra, isAnagram # call isAnagram(s(a0), t(a1)) bne a0, x0, TRUE_1 # if isAnagram(s(a0), t(a1)) == 1 correct + li a0, 1 # reload a0 to handle stdout la a1, not_correct_1 # test1 not correct address la a2, t1incor_size # test1 not correct length ecall lw ra, 0(sp) addi sp, sp, 4 jr ra TEST_2: addi sp, sp, -4 sw ra, 0(sp) la a0, test_2_s # s(a0) = test_2_s la a1, test_2_t # t(a1) = test_2_t jal ra, isAnagram # call isAnagram(s(a0), t(a1)) beq a0, x0, TRUE_2 # if isAnagram(s(a0), t(a1)) == 0 correct la a1, not_correct_2 # test2 not correct address la a2, t2incor_size # test not correct length ecall lw ra, 0(sp) addi sp, sp, 4 jr ra TEST_3: addi sp, sp, -4 sw ra, 0(sp) la a0, test_3_s # s(a0) = test_3_s la a1, test_3_t # t(a1) = test_3_t jal ra, isAnagram # call isAnagram(s(a0), t(a1)) bne a0, x0, TRUE_3 # if isAnagram(s(a0), t(a1)) == 1 correct + li a0, 1 # reload a0 to handle stdout la a1, not_correct_3 # test3 not correct address la a2, t3incor_size # test3 not correct length ecall lw ra, 0(sp) addi sp, sp, 4 jr ra TRUE_1: la a1, correct_1 # test1 correct address la a2, t1cor_size # test1 correct length ecall lw ra, 0(sp) addi sp, sp, 4 jr ra # go to example2 TRUE_2: + li a0, 1 # reload a0 to handle stdout la a1, correct_2 # test2 correct address la a2, t2cor_size # test2 correct length ecall lw ra, 0(sp) addi sp, sp, 4 jr ra # go to example3 TRUE_3: la a1, correct_3 # test2 correct address la a2, t3cor_size # test2 correct length ecall lw ra, 0(sp) addi sp, sp, 4 jr ra ``` ## Objdump Assembly code Note ### -O0 ```= 00010184 <isAnagram>: 10184: f6010113 addi sp,sp,-160 10188: 08112e23 sw ra,156(sp) 1018c: 08812c23 sw s0,152(sp) 10190: 0a010413 addi s0,sp,160 10194: f6a42623 sw a0,-148(s0) 10198: f6b42423 sw a1,-152(s0) ######################### Section 0 ######################### 1019c: f7c40793 addi a5,s0,-132 101a0: 06800713 li a4,104 # int letter_freq[26] = {0} 101a4: 00070613 mv a2,a4 # a2 = a4 101a8: 00000593 li a1,0 101ac: 00078513 mv a0,a5 # initial address 101b0: 328000ef jal ra,104d8 <memset> 101b4: fe042623 sw zero,-20(s0) ######################### Section 0 END ######################### ######################### Section 1 ######################### 101b8: 0480006f j 10200 <isAnagram+0x7c> 101bc: fec42783 lw a5,-20(s0) # a5 = 0 101c0: f6c42703 lw a4,-148(s0) # a4 = a0 101c4: 00f707b3 add a5,a4,a5 # a5 = a0 (test_1_s address) + a5 (offset) 101c8: 0007c783 lbu a5,0(a5) # a5 = s[i] 101cc: f9f78713 addi a4,a5,-97 # 97 = 'a', a4 = s[i] - 'a' (0 ~ 25) 101d0: 00271793 slli a5,a4,0x2 # push letter_freq 101d4: ff078793 addi a5,a5,-16 101d8: 008787b3 add a5,a5,s0 # s0 = stack top, 101dc: f8c7a783 lw a5,-116(a5) # a5 = letter_freq[s[i] - 'a'] 101e0: 00178693 addi a3,a5,1 # a3 = letter_freq[s[i] - 'a']++ 101e4: 00271793 slli a5,a4,0x2 101e8: ff078793 addi a5,a5,-16 101ec: 008787b3 add a5,a5,s0 101f0: f8d7a623 sw a3,-116(a5) 101f4: fec42783 lw a5,-20(s0) # i = 0 101f8: 00178793 addi a5,a5,1 # i++ 101fc: fef42623 sw a5,-20(s0) # stored back 10200: fec42783 lw a5,-20(s0) # a5 = 0 10204: f6c42703 lw a4,-148(s0) # a4 = a0 10208: 00f707b3 add a5,a4,a5 # a5 = a0(test_1_s address) + a5 (offset) 1020c: 0007c783 lbu a5,0(a5) # a5 = s[i] 10210: fa0796e3 bnez a5,101bc <isAnagram+0x38> # s[i] != 0 10214: fe042423 sw zero,-24(s0) ######################### Section 1 END ######################### ######################### Section 2 ######################### 10218: 0480006f j 10260 <isAnagram+0xdc> 1021c: fe842783 lw a5,-24(s0) 10220: f6842703 lw a4,-152(s0) 10224: 00f707b3 add a5,a4,a5 10228: 0007c783 lbu a5,0(a5) 1022c: f9f78713 addi a4,a5,-97 10230: 00271793 slli a5,a4,0x2 10234: ff078793 addi a5,a5,-16 10238: 008787b3 add a5,a5,s0 1023c: f8c7a783 lw a5,-116(a5) 10240: fff78693 addi a3,a5,-1 10244: 00271793 slli a5,a4,0x2 10248: ff078793 addi a5,a5,-16 1024c: 008787b3 add a5,a5,s0 10250: f8d7a623 sw a3,-116(a5) 10254: fe842783 lw a5,-24(s0) 10258: 00178793 addi a5,a5,1 1025c: fef42423 sw a5,-24(s0) 10260: fe842783 lw a5,-24(s0) 10264: f6842703 lw a4,-152(s0) 10268: 00f707b3 add a5,a4,a5 1026c: 0007c783 lbu a5,0(a5) 10270: fa0796e3 bnez a5,1021c <isAnagram+0x98> 10274: fe042223 sw zero,-28(s0) ######################### Section 2 END ######################### ######################### Section 3 ######################### 10278: 0300006f j 102a8 <isAnagram+0x124> 1027c: fe442783 lw a5,-28(s0) 10280: 00279793 slli a5,a5,0x2 10284: ff078793 addi a5,a5,-16 10288: 008787b3 add a5,a5,s0 1028c: f8c7a783 lw a5,-116(a5) 10290: 00078663 beqz a5,1029c <isAnagram+0x118> 10294: 00000793 li a5,0 10298: 0200006f j 102b8 <isAnagram+0x134> 1029c: fe442783 lw a5,-28(s0) 102a0: 00178793 addi a5,a5,1 102a4: fef42223 sw a5,-28(s0) 102a8: fe442703 lw a4,-28(s0) 102ac: 01900793 li a5,25 102b0: fce7d6e3 bge a5,a4,1027c <isAnagram+0xf8> 102b4: 00100793 li a5,1 102b8: 00078513 mv a0,a5 ######################### Section 3 END ######################### 102bc: 09c12083 lw ra,156(sp) 102c0: 09812403 lw s0,152(sp) 102c4: 0a010113 addi sp,sp,160 102c8: 00008067 ret ``` Section 0 is corredponded to original C code ```c int letter_freq[26] = {0}; ``` Section 1 is corresponded to original C code ```c for (int i = 0; s[i]; i++) { letter_freq[s[i] - 'a']++; } ``` - I don't know why `line 26` need `addi a5, a5, -16` Section 2 is corresponded to original C code ```c for (int i = 0; t[i]; i++) { letter_freq[t[i] - 'a']--; } ``` Section 3 is corresponded to original C code ```c for (int i = 0; i < 26; i++) { if (letter_freq[i]) return 0; } return 1; ``` ### -O1 ```= 00010184 <isAnagram>: 10184: f8010113 addi sp,sp,-128 10188: 06112e23 sw ra,124(sp) 1018c: 06812c23 sw s0,120(sp) 10190: 06912a23 sw s1,116(sp) 10194: 00050493 mv s1,a0 10198: 00058413 mv s0,a1 #################### Section 0 #################### 1019c: 06800613 li a2,104 101a0: 00000593 li a1,0 101a4: 00810513 addi a0,sp,8 101a8: 264000ef jal ra,1040c <memset> 101ac: 0004c783 lbu a5,0(s1) #################### Section 0 End #################### #################### Section 1 #################### 101b0: 02078863 beqz a5,101e0 <isAnagram+0x5c> # a5 = 0 break the for loop 101b4: 00148513 addi a0,s1,1 # s++ 101b8: f9f78793 addi a5,a5,-97 # - 'a' 101bc: 00279793 slli a5,a5,0x2 101c0: 07078793 addi a5,a5,112 101c4: 002787b3 add a5,a5,sp 101c8: f987a703 lw a4,-104(a5) 101cc: 00170713 addi a4,a4,1 101d0: f8e7ac23 sw a4,-104(a5) 101d4: 00150513 addi a0,a0,1 # i++ 101d8: fff54783 lbu a5,-1(a0) # a5 = s[i] 101dc: fc079ee3 bnez a5,101b8 <isAnagram+0x34> # s[i]!=0 #################### Section 1 End #################### #################### Section 2 #################### 101e0: 00044783 lbu a5,0(s0) 101e4: 02078863 beqz a5,10214 <isAnagram+0x90> 101e8: 00140593 addi a1,s0,1 101ec: f9f78793 addi a5,a5,-97 101f0: 00279793 slli a5,a5,0x2 101f4: 07078793 addi a5,a5,112 101f8: 002787b3 add a5,a5,sp 101fc: f987a703 lw a4,-104(a5) 10200: fff70713 addi a4,a4,-1 10204: f8e7ac23 sw a4,-104(a5) 10208: 00158593 addi a1,a1,1 1020c: fff5c783 lbu a5,-1(a1) 10210: fc079ee3 bnez a5,101ec <isAnagram+0x68> #################### Section 2 End #################### #################### Section 3 #################### 10214: 00810793 addi a5,sp,8 10218: 07010693 addi a3,sp,112 1021c: 0007a703 lw a4,0(a5) 10220: 00071a63 bnez a4,10234 <isAnagram+0xb0> 10224: 00478793 addi a5,a5,4 10228: fed79ae3 bne a5,a3,1021c <isAnagram+0x98> 1022c: 00100513 li a0,1 10230: 0080006f j 10238 <isAnagram+0xb4> 10234: 00000513 li a0,0 #################### Section 3 End #################### 10238: 07c12083 lw ra,124(sp) 1023c: 07812403 lw s0,120(sp) 10240: 07412483 lw s1,116(sp) 10244: 08010113 addi sp,sp,128 10248: 00008067 ret ``` Section 0 is corredponded to original C code ```c int letter_freq[26] = {0}; ``` Section 1 is corresponded to original C code ```c for (int i = 0; s[i]; i++) { letter_freq[s[i] - 'a']++; } ``` Section 2 is corresponded to original C code ```c for (int i = 0; t[i]; i++) { letter_freq[t[i] - 'a']--; } ``` Section 3 is corresponded to original C code ```c for (int i = 0; i < 26; i++) { if (letter_freq[i]) return 0; } return 1; ``` In `isAnagram()` section 1, the assembly code first do the plus in `line 20(101b4)`, and extract value `array[i-1]` in `line 29(101d8)` for the loop condition. - If the extracted value is 0 (means loop is over), goto next stage. - If the extracted value is not 0 (means loop should keep going), branch to `line 21(101b8)`(next line of `i++`). And for section 3, reduce one branch and some code size. ### -O2 ```= 00010244 <isAnagram>: 10244: f8010113 addi sp,sp,-128 10248: 06812c23 sw s0,120(sp) 1024c: 06912a23 sw s1,116(sp) 10250: 00058413 mv s0,a1 10254: 00050493 mv s1,a0 #################### Section 0 #################### 10258: 06800613 li a2,104 1025c: 00000593 li a1,0 10260: 00810513 addi a0,sp,8 10264: 06112e23 sw ra,124(sp) 10268: 1bc000ef jal ra,10424 <memset> 1026c: 0004c783 lbu a5,0(s1) #################### Section 0 End #################### #################### Section 1 #################### 10270: 02078863 beqz a5,102a0 <isAnagram+0x5c> 10274: 00148513 addi a0,s1,1 10278: f9f78793 addi a5,a5,-97 1027c: 00279793 slli a5,a5,0x2 10280: 07078793 addi a5,a5,112 10284: 002786b3 add a3,a5,sp 10288: f986a703 lw a4,-104(a3) 1028c: 00054783 lbu a5,0(a0) 10290: 00150513 addi a0,a0,1 10294: 00170713 addi a4,a4,1 10298: f8e6ac23 sw a4,-104(a3) 1029c: fc079ee3 bnez a5,10278 <isAnagram+0x34> #################### Section 1 End #################### #################### Section 2 #################### 102a0: 00044783 lbu a5,0(s0) 102a4: 02078863 beqz a5,102d4 <isAnagram+0x90> 102a8: 00140593 addi a1,s0,1 102ac: f9f78793 addi a5,a5,-97 102b0: 00279793 slli a5,a5,0x2 102b4: 07078793 addi a5,a5,112 102b8: 002786b3 add a3,a5,sp 102bc: f986a703 lw a4,-104(a3) 102c0: 0005c783 lbu a5,0(a1) 102c4: 00158593 addi a1,a1,1 102c8: fff70713 addi a4,a4,-1 102cc: f8e6ac23 sw a4,-104(a3) 102d0: fc079ee3 bnez a5,102ac <isAnagram+0x68> #################### Section 2 End #################### #################### Section 3 #################### 102d4: 00810793 addi a5,sp,8 102d8: 07010693 addi a3,sp,112 102dc: 0080006f j 102e4 <isAnagram+0xa0> 102e0: 02f68463 beq a3,a5,10308 <isAnagram+0xc4> 102e4: 0007a703 lw a4,0(a5) 102e8: 00478793 addi a5,a5,4 102ec: fe070ae3 beqz a4,102e0 <isAnagram+0x9c> 102f0: 07c12083 lw ra,124(sp) 102f4: 07812403 lw s0,120(sp) 102f8: 07412483 lw s1,116(sp) 102fc: 00000513 li a0,0 10300: 08010113 addi sp,sp,128 10304: 00008067 ret 10308: 07c12083 lw ra,124(sp) 1030c: 07812403 lw s0,120(sp) 10310: 07412483 lw s1,116(sp) 10314: 00100513 li a0,1 10318: 08010113 addi sp,sp,128 1031c: 00008067 ret #################### Section 3 End #################### ``` - Each section handle the same region as mentioned before. Section 3 is a little different with `-O1` optimization, it loop between `line 49` and `line 50`. And if `a4` is not equal to 0, just return 0. ### -O3 ```= 00010244 <isAnagram>: 10244: f8010113 addi sp,sp,-128 10248: 06812c23 sw s0,120(sp) 1024c: 06912a23 sw s1,116(sp) 10250: 00058413 mv s0,a1 10254: 00050493 mv s1,a0 10258: 06800613 li a2,104 1025c: 00000593 li a1,0 10260: 00810513 addi a0,sp,8 10264: 06112e23 sw ra,124(sp) 10268: 1bc000ef jal ra,10424 <memset> 1026c: 0004c783 lbu a5,0(s1) 10270: 02078863 beqz a5,102a0 <isAnagram+0x5c> 10274: 00148513 addi a0,s1,1 10278: f9f78793 addi a5,a5,-97 1027c: 00279793 slli a5,a5,0x2 10280: 07078793 addi a5,a5,112 10284: 002786b3 add a3,a5,sp 10288: f986a703 lw a4,-104(a3) 1028c: 00054783 lbu a5,0(a0) 10290: 00150513 addi a0,a0,1 10294: 00170713 addi a4,a4,1 10298: f8e6ac23 sw a4,-104(a3) 1029c: fc079ee3 bnez a5,10278 <isAnagram+0x34> 102a0: 00044783 lbu a5,0(s0) 102a4: 02078863 beqz a5,102d4 <isAnagram+0x90> 102a8: 00140593 addi a1,s0,1 102ac: f9f78793 addi a5,a5,-97 102b0: 00279793 slli a5,a5,0x2 102b4: 07078793 addi a5,a5,112 102b8: 002786b3 add a3,a5,sp 102bc: f986a703 lw a4,-104(a3) 102c0: 0005c783 lbu a5,0(a1) 102c4: 00158593 addi a1,a1,1 102c8: fff70713 addi a4,a4,-1 102cc: f8e6ac23 sw a4,-104(a3) 102d0: fc079ee3 bnez a5,102ac <isAnagram+0x68> 102d4: 00810793 addi a5,sp,8 102d8: 07010693 addi a3,sp,112 102dc: 0080006f j 102e4 <isAnagram+0xa0> 102e0: 02f68463 beq a3,a5,10308 <isAnagram+0xc4> 102e4: 0007a703 lw a4,0(a5) 102e8: 00478793 addi a5,a5,4 102ec: fe070ae3 beqz a4,102e0 <isAnagram+0x9c> 102f0: 07c12083 lw ra,124(sp) 102f4: 07812403 lw s0,120(sp) 102f8: 07412483 lw s1,116(sp) 102fc: 00000513 li a0,0 10300: 08010113 addi sp,sp,128 10304: 00008067 ret 10308: 07c12083 lw ra,124(sp) 1030c: 07812403 lw s0,120(sp) 10310: 07412483 lw s1,116(sp) 10314: 00100513 li a0,1 10318: 08010113 addi sp,sp,128 1031c: 00008067 ret ``` ### -Ofast ```= 00010244 <isAnagram>: 10244: f8010113 addi sp,sp,-128 10248: 06812c23 sw s0,120(sp) 1024c: 06912a23 sw s1,116(sp) 10250: 00058413 mv s0,a1 10254: 00050493 mv s1,a0 10258: 06800613 li a2,104 1025c: 00000593 li a1,0 10260: 00810513 addi a0,sp,8 10264: 06112e23 sw ra,124(sp) 10268: 1bc000ef jal ra,10424 <memset> 1026c: 0004c783 lbu a5,0(s1) 10270: 02078863 beqz a5,102a0 <isAnagram+0x5c> 10274: 00148513 addi a0,s1,1 10278: f9f78793 addi a5,a5,-97 1027c: 00279793 slli a5,a5,0x2 10280: 07078793 addi a5,a5,112 10284: 002786b3 add a3,a5,sp 10288: f986a703 lw a4,-104(a3) 1028c: 00054783 lbu a5,0(a0) 10290: 00150513 addi a0,a0,1 10294: 00170713 addi a4,a4,1 10298: f8e6ac23 sw a4,-104(a3) 1029c: fc079ee3 bnez a5,10278 <isAnagram+0x34> 102a0: 00044783 lbu a5,0(s0) 102a4: 02078863 beqz a5,102d4 <isAnagram+0x90> 102a8: 00140593 addi a1,s0,1 102ac: f9f78793 addi a5,a5,-97 102b0: 00279793 slli a5,a5,0x2 102b4: 07078793 addi a5,a5,112 102b8: 002786b3 add a3,a5,sp 102bc: f986a703 lw a4,-104(a3) 102c0: 0005c783 lbu a5,0(a1) 102c4: 00158593 addi a1,a1,1 102c8: fff70713 addi a4,a4,-1 102cc: f8e6ac23 sw a4,-104(a3) 102d0: fc079ee3 bnez a5,102ac <isAnagram+0x68> 102d4: 00810793 addi a5,sp,8 102d8: 07010693 addi a3,sp,112 102dc: 0080006f j 102e4 <isAnagram+0xa0> 102e0: 02f68463 beq a3,a5,10308 <isAnagram+0xc4> 102e4: 0007a703 lw a4,0(a5) 102e8: 00478793 addi a5,a5,4 102ec: fe070ae3 beqz a4,102e0 <isAnagram+0x9c> 102f0: 07c12083 lw ra,124(sp) 102f4: 07812403 lw s0,120(sp) 102f8: 07412483 lw s1,116(sp) 102fc: 00000513 li a0,0 10300: 08010113 addi sp,sp,128 10304: 00008067 ret 10308: 07c12083 lw ra,124(sp) 1030c: 07812403 lw s0,120(sp) 10310: 07412483 lw s1,116(sp) 10314: 00100513 li a0,1 10318: 08010113 addi sp,sp,128 1031c: 00008067 ret ``` ### -Os ```= 00010238 <isAnagram>: 10238: f8010113 addi sp,sp,-128 1023c: 06812c23 sw s0,120(sp) 10240: 06912a23 sw s1,116(sp) 10244: 00058413 mv s0,a1 10248: 00050493 mv s1,a0 1024c: 06800613 li a2,104 10250: 00000593 li a1,0 10254: 00810513 addi a0,sp,8 10258: 06112e23 sw ra,124(sp) 1025c: 1a0000ef jal ra,103fc <memset> 10260: 00048513 mv a0,s1 10264: 00054783 lbu a5,0(a0) 10268: 00150513 addi a0,a0,1 1026c: 04079263 bnez a5,102b0 <isAnagram+0x78> 10270: 00040593 mv a1,s0 10274: 0005c783 lbu a5,0(a1) 10278: 00158593 addi a1,a1,1 1027c: 04079a63 bnez a5,102d0 <isAnagram+0x98> 10280: 00810793 addi a5,sp,8 10284: 0007a703 lw a4,0(a5) 10288: 06071463 bnez a4,102f0 <isAnagram+0xb8> 1028c: 00478793 addi a5,a5,4 10290: 07010713 addi a4,sp,112 10294: fee798e3 bne a5,a4,10284 <isAnagram+0x4c> 10298: 00100513 li a0,1 1029c: 07c12083 lw ra,124(sp) 102a0: 07812403 lw s0,120(sp) 102a4: 07412483 lw s1,116(sp) 102a8: 08010113 addi sp,sp,128 102ac: 00008067 ret 102b0: f9f78793 addi a5,a5,-97 102b4: 00279793 slli a5,a5,0x2 102b8: 07078793 addi a5,a5,112 102bc: 002787b3 add a5,a5,sp 102c0: f987a703 lw a4,-104(a5) 102c4: 00170713 addi a4,a4,1 102c8: f8e7ac23 sw a4,-104(a5) 102cc: f99ff06f j 10264 <isAnagram+0x2c> 102d0: f9f78793 addi a5,a5,-97 102d4: 00279793 slli a5,a5,0x2 102d8: 07078793 addi a5,a5,112 102dc: 002787b3 add a5,a5,sp 102e0: f987a703 lw a4,-104(a5) 102e4: fff70713 addi a4,a4,-1 102e8: f8e7ac23 sw a4,-104(a5) 102ec: f89ff06f j 10274 <isAnagram+0x3c> 102f0: 00000513 li a0,0 102f4: fa9ff06f j 1029c <isAnagram+0x64> ``` `-Os` optimization generate much more `bne` rather than `beq`, and the code scheduling is more complicated than other optimization levels, this code do more **"jump back"** branch, such as `line 39` and `line 47` unconditional jump, it can reduce code, but I think it will do more branch than other opimization levels. For example, for loop1, `line 15` will branch to `line 32(102b0)` and do the unconditional jump(`line 39`) back to `line 13(10264)` to go through the for loop, and so does loop2. So, it will do **two jump** when going through the loop. ## `rv32emu` Source Code Note ### syscall_handler **`syscall.c`** ```c enum { #define _(name, number) SYS_##name = number, SUPPORTED_SYSCALLS #undef _ }; #define SUPPORTED_SYSCALLS \ _(close, 57) \ _(lseek, 62) \ _(read, 63) \ _(write, 64) \ _(fstat, 80) \ _(exit, 93) \ _(gettimeofday, 169) \ _(brk, 214) \ _(open, 1024) \ IIF(RV32_HAS(SDL))( \ _(draw_frame, 0xBEEF) \ _(setup_queue, 0xC0DE) \ _(submit_queue, 0xFEED), \ ) void syscall_handler(struct riscv_t *rv) { /* get the syscall number */ riscv_word_t syscall = rv_get_reg(rv, rv_reg_a7); switch (syscall) { /* dispatch system call */ #define _(name, number) \ case SYS_##name: \ syscall_##name(rv); \ break; SUPPORTED_SYSCALLS #undef _ default: fprintf(stderr, "unknown syscall %d\n", (int) syscall); rv_halt(rv); break; } } ``` - Notice that the syscall index is different from Ripes. - And from `risdv_word_t syscall = rv_get_reg(rv, rv_reg_a7` we know that system call index need to stored in `a7` So, if we call the system call `64`, it will handle by `syscall_handler` and it will call another function `syscall_write()` ```c static void syscall_write(struct riscv_t *rv) { state_t *s = rv_userdata(rv); /* access userdata */ /* _write(fde, buffer, count) */ riscv_word_t fd = rv_get_reg(rv, rv_reg_a0); riscv_word_t buffer = rv_get_reg(rv, rv_reg_a1); riscv_word_t count = rv_get_reg(rv, rv_reg_a2); /* read the string that we are printing */ uint8_t *tmp = malloc(count); memory_read(s->mem, tmp, buffer, count); /* lookup the file descriptor */ map_iter_t it; map_find(s->fd_map, &it, &fd); if (!map_at_end(s->fd_map, &it)) { /* write out the data */ size_t written = fwrite(tmp, 1, count, map_iter_value(&it, FILE *)); /* return number of bytes written */ rv_set_reg(rv, rv_reg_a0, written); } else { /* error */ rv_set_reg(rv, rv_reg_a0, -1); } free(tmp); } ``` From the definition, we know that - `a0` handle `fd` - `a1` handle `buffer` - `a2` handle `count` For `write`, index with `64`, is defined as ```c /* The structure with the cookie function pointers. The tag name of this struct is _IO_cookie_io_functions_t to preserve historic C++ mangled names for functions taking cookie_io_functions_t arguments. That name should not be used in new code. */ typedef struct _IO_cookie_io_functions_t { cookie_read_function_t *read; /* Read bytes. */ cookie_write_function_t *write; /* Write bytes. */ cookie_seek_function_t *seek; /* Seek/tell file position. */ cookie_close_function_t *close; /* Close file. */ } cookie_io_functions_t; ``` `cookie_io_functions_t.h` ```c /* Write NBYTES bytes pointed to by BUF to COOKIE. Write all NBYTES bytes unless there is an error. Return number of bytes written. If there is an error, return 0 and do not write anything. If the file has been opened for append (__mode.__append set), then set the file pointer to the end of the file and then do the write; if not, just write at the current file pointer. */ typedef __ssize_t cookie_write_function_t (void *__cookie, const char *__buf, size_t __nbytes); ``` ## Some Note There is a problem annoying me a lot of time. ``` .data str: .ascii "Hello World!\n" .set str_size, .-str ``` - the `.set ste_size, .-str` is quite hard to understand for me. - `.` means **current location** - `-` just means substraction, so the expression `.-str` will be the strlen. of `.ascii` ## Reference Link - [X Macro](https://en.wikipedia.org/wiki/X_Macro) - [Meaning of .-main expression](https://stackoverflow.com/questions/11058361/meaning-of-main-expression) - [SWAR](https://en.wikipedia.org/wiki/SWAR) - [Linux核心實做 - 2022q1 第八週測驗題](https://hackmd.io/@sysprog/linux2022-quiz8/https%3A%2F%2Fhackmd.io%2F%40sysprog%2FHyg5nxO79)