Try   HackMD

Assignment 2 rv32emu Note

tags: Computer Architecture

Resources

Lab2: RISC-V RV32I[MACF] emulator with ELF support

rv32emu

github

Original Code

Assembly - Original Solution by 曾晧峖

# RISC-V assembly program to print "Hello World!" to stdout.

.org 0
# Provide program starting address to linker
.global _start

/* newlib system calls */
.set SYSEXIT,  93
.set SYSWRITE, 64

.data

test_1_s: .string "anagram"
test_1_t: .string "nagaram"
test_2_s: .string "rat"
test_2_t: .string "anagram"
test_3_s: .string "tseng"
test_3_t: .string "gnest"
correct_1:       .string "test_1: correct"
not_correct_1:   .string "test_1: not correct"
correct_2:       .string "test_2: correct"
not_correct_2:   .string "test_2: not correct"
correct_3:       .string "test_3: correct"
not_correct_3:   .string "test_3: not correct"

.text

main: 
	addi a7, x0, 4
	
	la a0, test_1_s        # s(a0) = test_1_s  
	la a1, test_1_t        # t(a1) = test_1_t
	jal ra, isAnagram      # call isAnagram(s(a0), t(a1))
	bne a0, x0 TRUE_1      # if isAnagram(s(a0), t(a1)) == 1 correct
	la a0,  not_correct_1  # not correct print error
	ecall
TEST_2:
	la a0, test_2_s        # s(a0) = test_2_s
	la a1, test_2_t        # t(a1) = test_2_t
	jal ra isAnagram	   # call isAnagram(s(a0), t(a1))
	beq a0, x0 TRUE_2	   # if isAnagram(s(a0), t(a1)) == 0 correct
	la a0,  not_correct_2  # not correct print error
	ecall
TEST_3:
	la a0, test_3_s        # s(a0) = test_3_s
	la a1, test_3_t        # t(a1) = test_3_t
	jal ra isAnagram	   # call isAnagram(s(a0), t(a1))
	bne a0, x0 TRUE_3      # if isAnagram(s(a0), t(a1)) == 0 correct
	la a0,  not_correct_3  # not correct print error
	j END
	ecall
TRUE_1:
	la a0,  correct_1      # correct print correct
        ecall
	j TEST_2               # go to example2
	
TRUE_2:
        la a0,  correct_2      # correct print correct
        ecall
	j TEST_3               # go to example3
TRUE_3:
	la a0,  correct_3      # correct print correct
        ecall
END: 
	addi a7, x0, 10
	ecall

isAnagram:                  # a0 = s, a1 = t
	addi sp, sp, -104       # get sapce for store int letter_freq[26]
	addi t0, sp, 0          # t0 = letter_freq[0]
	addi t1, x0, 0          # t1 = i = 0
	li t2, 26
LOOP1:                      # int letter_freq[26] = {0};
	beq t1, t2 GET_FREQ_s   # if i < 26 
	sw x0, 0(t0)            # letter_freq[i] = 0;
	addi, t1, t1, 1         # i++;
	addi, t0, t0, 4         # 
	j LOOP1
	
GET_FREQ_s:
	addi, t0, sp, 0         # t0 = letter_freq[0]
 	addi, t1, a0, 0         # t1 = s
	addi, t2, x0, 0         # t2 = i = 0
LOOP2:                            # for(  ;s[i] ;i++ )
	add  t3, t1, t2         # get address of s[index] from s[0] + 
	lb t5, (0)t3            # t5 = s[i]
 	beq, t5, x0, GET_FREQ_F # if s[i] ==0 break the loop
	addi t5, t5 -97         # t5 = s[i] - 'a'
	slli t5, t5, 2          # get offset form letter_freq[0] to letter_freq[s[i] - 'a']
	add t5, t5, t0          # get address of letter_freq[s[i] - 'a']
	lw t3, 0(t5)            # t3 = [freq[s[i] - 'a']]
	addi t3, t3, 1          # t3 = [freq[s[i] - 'a']] + 1
	sw t3, 0(t5)            # [freq[s[i] - 'a']] = ([freq[s[i] - 'a']] + 1) 
	addi t2, t2, 1          # i++
	j LOOP2
GET_FREQ_F:
	addi, t0, sp, 0         # t0 = freq[]
 	addi, t1, a1, 0         # t1 = t
	addi, t2, x0, 0         # t2 = i = 0
LOOP3:                      # for(  ;s[i] ;i++ )
	add  t3, t1, t2         # get address of t[index]
	lb t5, (0)t3            # t5 = t[i]
 	beq, t5, x0, CHECK      # if t[i] == 0 break the loop
	addi t5, t5 -97         # t5 = t[i] - 'a'
	slli t5, t5, 2          # get offset form letter_freq[0] to letter_freq[t[i] - 'a']
	add t5, t5, t0        
	lw t3, 0(t5)            # t5 = [freq[t[i] - 'a']]
	addi t3, t3, -1         # t3 = [freq[t[i] - 'a']] - 1
	sw t3, 0(t5)            # [freq[t[i] - 'a']] = ([freq[t[i] - 'a']] - 1) 
	addi t2, t2, 1          # i++
	j LOOP3
CHECK:
	addi t0, sp, 0 # t0 = address freq[0]
	addi t1, x0, 0 # t1 = i = 0
	li t2, 26
LOOP4:                      # for (int i = 0; i < 26; i++)
	beq t1, t2 TRUE         # i < 26
	lw t3, 0(t0)            # t3 = freq[i];
	bne t3, x0, FALSE       # if freq[i] != 0 break
	addi, t1, t1, 1         # i++;
	addi, t0, t0, 4         # freq + 1
	j LOOP4
	
FALSE:
	addi a0, x0, 0          # if flase return false
	j END_F
TRUE:
	addi a0, x0, 01          # if pass check return true
END_F:
	jr ra                   # return,  a0 = return value = true or false

first when I make the program, I got the message below

riscv-none-elf-as -R -march=rv32i -mabi=ilp32 -o hw2.o hw2.S
hw2.S: Assembler messages:
hw2.S: Warning: end of file in comment; newline inserted
hw2.S:34: Error: illegal operands `bne a0,x0 TRUE_1'
hw2.S:40: Error: illegal operands `jal ra isAnagram'
hw2.S:41: Error: illegal operands `beq a0,x0 TRUE_2'
hw2.S:47: Error: illegal operands `jal ra isAnagram'
hw2.S:48: Error: illegal operands `bne a0,x0 TRUE_3'
hw2.S:75: Error: illegal operands `beq t1,t2 GET_FREQ_s'
hw2.S:77: Error: unrecognized opcode `addi, t1,t1,1'
hw2.S:78: Error: unrecognized opcode `addi, t0,t0,4'
hw2.S:82: Error: unrecognized opcode `addi, t0,sp,0'
hw2.S:83: Error: unrecognized opcode `addi, t1,a0,0'
hw2.S:84: Error: unrecognized opcode `addi, t2,x0,0'
hw2.S:87: Error: illegal operands `lb t5,(0)t3'
hw2.S:88: Error: unrecognized opcode `beq, t5,x0,GET_FREQ_F'
hw2.S:89: Error: illegal operands `addi t5,t5-97'
hw2.S:98: Error: unrecognized opcode `addi, t0,sp,0'
hw2.S:99: Error: unrecognized opcode `addi, t1,a1,0'
hw2.S:100: Error: unrecognized opcode `addi, t2,x0,0'
hw2.S:103: Error: illegal operands `lb t5,(0)t3'
hw2.S:104: Error: unrecognized opcode `beq, t5,x0,CHECK'
hw2.S:105: Error: illegal operands `addi t5,t5-97'
hw2.S:118: Error: illegal operands `beq t1,t2 TRUE'
hw2.S:121: Error: unrecognized opcode `addi, t1,t1,1'
hw2.S:122: Error: unrecognized opcode `addi, t0,t0,4'
make: *** [Makefile:9: hw2.o] Error 1
  • bne a0, x0 TRUE_1 need to write as bne a0, x0, TRUE_1 Don't forget the comma
  • lb t5, (0)t3 need to write as lb t5, 0(t3)
  • beq, t5, x0, GET_FREQ_F need to wrie as beq t5, x0, GET_FREQ_F remove the redundant comma

Problem

Warning: end of file in comment

Warning: end of file in comment; newline inserted

Fix

end: li a7, SYSEXIT # "exit" syscall add a0, x0, 0 # Use 0 return code ecall # invoke syscall to terminate the program + #add a blank line

Code bug to fix

# RISC-V assembly program to print "Hello World!" to stdout.

.org 0
# Provide program starting address to linker
.global _start

/* newlib system calls */
.set SYSEXIT,  93
.set SYSWRITE, 64

.data

test_1_s: .string "anagram"
test_1_t: .string "nagaram"
test_2_s: .string "rat"
test_2_t: .string "anagram"
test_3_s: .string "tseng"
test_3_t: .string "gnest"
correct_1:       .string "test_1: correct\n"
                 .set t1cor_size, .-correct_1
not_correct_1:   .string "test_1: not correct\n"
                 .set t1incor_size, .-not_correct_1
correct_2:       .string "test_2: correct\n"
                 .set t2cor_size, .-correct_2
not_correct_2:   .string "test_2: not correct\n"
                 .set t2incor_size, .-not_correct_2
correct_3:       .string "test_3: correct\n"
                 .set t3cor_size, .-correct_3
not_correct_3:   .string "test_3: not correct\n"
                 .set t3incor_size, .-not_correct_3

.text

_start: 
    li a7, SYSWRITE        # "write" system call
    li a0, 1               #  1 = standard output (stdout)


    la a0, test_1_s        # s(a0) = test_1_s  
    la a1, test_1_t        # t(a1) = test_1_t
    jal ra, isAnagram      # call isAnagram(s(a0), t(a1))
    bne a0, x0, TRUE_1     # if isAnagram(s(a0), t(a1)) == 1 correct
    la a1, not_correct_1   # test1 not correct address
    la a2, t1incor_size    # test1 not correct length
    ecall
TEST_2:
    la a0, test_2_s        # s(a0) = test_2_s
    la a1, test_2_t        # t(a1) = test_2_t
    jal ra, isAnagram	   # call isAnagram(s(a0), t(a1))
    beqz a0, TRUE_2	   # if isAnagram(s(a0), t(a1)) == 0 correct
    la a1, not_correct_2   # test2 not correct address
    la a2, t2incor_size    # test1 not correct length
    ecall
TEST_3:
    la a0, test_3_s        # s(a0) = test_3_s
    la a1, test_3_t        # t(a1) = test_3_t
    jal ra, isAnagram	   # call isAnagram(s(a0), t(a1))
    bne a0, x0, TRUE_3     # if isAnagram(s(a0), t(a1)) == 1 correct
    la a1, not_correct_3   # test3 not correct address
    la a2, t3incor_size    # test3 not correct length
    ecall
    j end
TRUE_1:
    la a1, correct_1     # test1 correct address
    la a2, t1cor_size    # test1 correct length
    ecall
    j TEST_2               # go to example2

TRUE_2:
    la a1, correct_2     # test2 correct address
    la a2, t2cor_size    # test2 correct length
    ecall
    j TEST_3               # go to example3
TRUE_3:
    la a1, correct_3     # test2 correct address
    la a2, t3cor_size    # test2 correct length
    ecall

isAnagram:                  # a0 = s, a1 = t
    addi sp, sp, -104       # get sapce for store int letter_freq[26]
    addi t0, sp, 0          # t0 = letter_freq[0]
    addi t1, x0, 0          # t1 = i = 0
    li t2, 26
LOOP1:                      # int letter_freq[26] = {0};
    beq t1, t2, GET_FREQ_s   # if i < 26 
    sw x0, 0(t0)            # letter_freq[i] = 0;
    addi t1, t1, 1         # i++;
    addi t0, t0, 4         # 
    j LOOP1
	
GET_FREQ_s:
    addi t0, sp, 0         # t0 = letter_freq[0]
    addi t1, a0, 0         # t1 = s
    addi t2, x0, 0         # t2 = i = 0
LOOP2:                            # for(  ;s[i] ;i++ )
    add  t3, t1, t2         # get address of s[index] from s[0] + 
    lb t5, 0(t3)            # t5 = s[i]
    beq t5, x0, GET_FREQ_F # if s[i] ==0 break the loop
    addi t5, t5, -97         # t5 = s[i] - 'a'
    slli t5, t5, 2          # get offset form letter_freq[0] to letter_freq[s[i] - 'a']
    add t5, t5, t0          # get address of letter_freq[s[i] - 'a']
    lw t3, 0(t5)            # t3 = [freq[s[i] - 'a']]
    addi t3, t3, 1          # t3 = [freq[s[i] - 'a']] + 1
    sw t3, 0(t5)            # [freq[s[i] - 'a']] = ([freq[s[i] - 'a']] + 1) 
    addi t2, t2, 1          # i++
    j LOOP2
GET_FREQ_F:
    addi t0, sp, 0         # t0 = freq[]
    addi t1, a1, 0         # t1 = t
    addi t2, x0, 0         # t2 = i = 0
LOOP3:                      # for(  ;s[i] ;i++ )
    add  t3, t1, t2         # get address of t[index]
    lb t5, 0(t3)            # t5 = t[i]
    beq t5, x0, CHECK      # if t[i] == 0 break the loop
    addi t5, t5, -97         # t5 = t[i] - 'a'
    slli t5, t5, 2          # get offset form letter_freq[0] to letter_freq[t[i] - 'a']
    add t5, t5, t0        
    lw t3, 0(t5)            # t5 = [freq[t[i] - 'a']]
    addi t3, t3, -1         # t3 = [freq[t[i] - 'a']] - 1
    sw t3, 0(t5)            # [freq[t[i] - 'a']] = ([freq[t[i] - 'a']] - 1) 
    addi t2, t2, 1          # i++
    j LOOP3
CHECK:
    addi t0, sp, 0 # t0 = address freq[0]
    addi t1, x0, 0 # t1 = i = 0
    li t2, 26
LOOP4:                      # for (int i = 0; i < 26; i++)
    beq t1, t2, TRUE         # i < 26
    lw t3, 0(t0)            # t3 = freq[i];
    bne t3, x0, FALSE       # if freq[i] != 0 break
    addi t1, t1, 1         # i++;
    addi t0, t0, 4         # freq + 1
    j LOOP4
	
FALSE:
    addi a0, x0, 0          # if flase return false
    j END_F
TRUE:
    addi a0, x0, 1          # if pass check return true
END_F:
    jr ra                   # return,  a0 = return value = true or false

end:
    li a7, SYSEXIT      # "exit" syscall
    add a0, x0, 0       # Use 0 return code
    ecall               # invoke syscall to terminate the program

Problem:
In ripes, the assembler code can work succesfully, but in rv32emu, test_2 can't not show the result. when I set the test_2_t with the same length test_2_s, it works!, I am still try to find the problem.

Fix it:
Finally find out that In Ripes, system call PrintString only need a0 for string address and a7 for system call index.

But in rv32emu, system call needa0 to handle file descriptor(gives 1 for stdout in my case),a1 for string address, a2 for string length, mentioned in docs/syscall.md.

However, after jump to isAnagram, a0 was overwriten to the return True of False, so if the original string not match to specific stirng (not an Anagram), the a0 will be 0 and the associate ecall instruction will see it as stdin, which causes this problem.

For three testcases, expected that

  • testcase 1 return True
  • testcase 2 return False
  • testcase 3 return True

after calling isAnagram function. So, it's necesserily to add li a0, 1 when return value is False (means 0)

TEST_1:
    addi sp, sp, -4
    sw ra, 0(sp)
    la a0, test_1_s        # s(a0) = test_1_s  
    la a1, test_1_t        # t(a1) = test_1_t
    jal ra, isAnagram      # call isAnagram(s(a0), t(a1))
    bne a0, x0, TRUE_1     # if isAnagram(s(a0), t(a1)) == 1 correct
+   li a0, 1               # reload a0 to handle stdout
    la a1, not_correct_1   # test1 not correct address
    la a2, t1incor_size    # test1 not correct length
    ecall
    lw ra, 0(sp)
    addi sp, sp, 4
    jr ra

TEST_2:
    addi sp, sp, -4
    sw ra, 0(sp)
    la a0, test_2_s        # s(a0) = test_2_s
    la a1, test_2_t        # t(a1) = test_2_t
    jal ra, isAnagram	   # call isAnagram(s(a0), t(a1))
    beq a0, x0, TRUE_2	   # if isAnagram(s(a0), t(a1)) == 0 correct
    
    la a1, not_correct_2   # test2 not correct address
    la a2, t2incor_size    # test not correct length
    ecall
    lw ra, 0(sp)
    addi sp, sp, 4
    jr ra

TEST_3:
    addi sp, sp, -4
    sw ra, 0(sp)
    la a0, test_3_s        # s(a0) = test_3_s
    la a1, test_3_t        # t(a1) = test_3_t
    jal ra, isAnagram	   # call isAnagram(s(a0), t(a1))
    bne a0, x0, TRUE_3     # if isAnagram(s(a0), t(a1)) == 1 correct
+   li a0, 1               # reload a0 to handle stdout
    la a1, not_correct_3   # test3 not correct address
    la a2, t3incor_size    # test3 not correct length
    ecall
    lw ra, 0(sp)
    addi sp, sp, 4
    jr ra

TRUE_1:
    la a1, correct_1     # test1 correct address
    la a2, t1cor_size    # test1 correct length
    ecall
    lw ra, 0(sp)
    addi sp, sp, 4
    jr ra               # go to example2
	
TRUE_2:
+   li a0, 1             # reload a0 to handle stdout 
    la a1, correct_2     # test2 correct address
    la a2, t2cor_size    # test2 correct length
    ecall
    lw ra, 0(sp)
    addi sp, sp, 4
    jr ra              # go to example3

TRUE_3:
    la a1, correct_3     # test2 correct address
    la a2, t3cor_size    # test2 correct length
    ecall
    lw ra, 0(sp)
    addi sp, sp, 4
    jr ra

Objdump Assembly code Note

-O0

00010184 <isAnagram>: 10184: f6010113 addi sp,sp,-160 10188: 08112e23 sw ra,156(sp) 1018c: 08812c23 sw s0,152(sp) 10190: 0a010413 addi s0,sp,160 10194: f6a42623 sw a0,-148(s0) 10198: f6b42423 sw a1,-152(s0) ######################### Section 0 ######################### 1019c: f7c40793 addi a5,s0,-132 101a0: 06800713 li a4,104 # int letter_freq[26] = {0} 101a4: 00070613 mv a2,a4 # a2 = a4 101a8: 00000593 li a1,0 101ac: 00078513 mv a0,a5 # initial address 101b0: 328000ef jal ra,104d8 <memset> 101b4: fe042623 sw zero,-20(s0) ######################### Section 0 END ######################### ######################### Section 1 ######################### 101b8: 0480006f j 10200 <isAnagram+0x7c> 101bc: fec42783 lw a5,-20(s0) # a5 = 0 101c0: f6c42703 lw a4,-148(s0) # a4 = a0 101c4: 00f707b3 add a5,a4,a5 # a5 = a0 (test_1_s address) + a5 (offset) 101c8: 0007c783 lbu a5,0(a5) # a5 = s[i] 101cc: f9f78713 addi a4,a5,-97 # 97 = 'a', a4 = s[i] - 'a' (0 ~ 25) 101d0: 00271793 slli a5,a4,0x2 # push letter_freq 101d4: ff078793 addi a5,a5,-16 101d8: 008787b3 add a5,a5,s0 # s0 = stack top, 101dc: f8c7a783 lw a5,-116(a5) # a5 = letter_freq[s[i] - 'a'] 101e0: 00178693 addi a3,a5,1 # a3 = letter_freq[s[i] - 'a']++ 101e4: 00271793 slli a5,a4,0x2 101e8: ff078793 addi a5,a5,-16 101ec: 008787b3 add a5,a5,s0 101f0: f8d7a623 sw a3,-116(a5) 101f4: fec42783 lw a5,-20(s0) # i = 0 101f8: 00178793 addi a5,a5,1 # i++ 101fc: fef42623 sw a5,-20(s0) # stored back 10200: fec42783 lw a5,-20(s0) # a5 = 0 10204: f6c42703 lw a4,-148(s0) # a4 = a0 10208: 00f707b3 add a5,a4,a5 # a5 = a0(test_1_s address) + a5 (offset) 1020c: 0007c783 lbu a5,0(a5) # a5 = s[i] 10210: fa0796e3 bnez a5,101bc <isAnagram+0x38> # s[i] != 0 10214: fe042423 sw zero,-24(s0) ######################### Section 1 END ######################### ######################### Section 2 ######################### 10218: 0480006f j 10260 <isAnagram+0xdc> 1021c: fe842783 lw a5,-24(s0) 10220: f6842703 lw a4,-152(s0) 10224: 00f707b3 add a5,a4,a5 10228: 0007c783 lbu a5,0(a5) 1022c: f9f78713 addi a4,a5,-97 10230: 00271793 slli a5,a4,0x2 10234: ff078793 addi a5,a5,-16 10238: 008787b3 add a5,a5,s0 1023c: f8c7a783 lw a5,-116(a5) 10240: fff78693 addi a3,a5,-1 10244: 00271793 slli a5,a4,0x2 10248: ff078793 addi a5,a5,-16 1024c: 008787b3 add a5,a5,s0 10250: f8d7a623 sw a3,-116(a5) 10254: fe842783 lw a5,-24(s0) 10258: 00178793 addi a5,a5,1 1025c: fef42423 sw a5,-24(s0) 10260: fe842783 lw a5,-24(s0) 10264: f6842703 lw a4,-152(s0) 10268: 00f707b3 add a5,a4,a5 1026c: 0007c783 lbu a5,0(a5) 10270: fa0796e3 bnez a5,1021c <isAnagram+0x98> 10274: fe042223 sw zero,-28(s0) ######################### Section 2 END ######################### ######################### Section 3 ######################### 10278: 0300006f j 102a8 <isAnagram+0x124> 1027c: fe442783 lw a5,-28(s0) 10280: 00279793 slli a5,a5,0x2 10284: ff078793 addi a5,a5,-16 10288: 008787b3 add a5,a5,s0 1028c: f8c7a783 lw a5,-116(a5) 10290: 00078663 beqz a5,1029c <isAnagram+0x118> 10294: 00000793 li a5,0 10298: 0200006f j 102b8 <isAnagram+0x134> 1029c: fe442783 lw a5,-28(s0) 102a0: 00178793 addi a5,a5,1 102a4: fef42223 sw a5,-28(s0) 102a8: fe442703 lw a4,-28(s0) 102ac: 01900793 li a5,25 102b0: fce7d6e3 bge a5,a4,1027c <isAnagram+0xf8> 102b4: 00100793 li a5,1 102b8: 00078513 mv a0,a5 ######################### Section 3 END ######################### 102bc: 09c12083 lw ra,156(sp) 102c0: 09812403 lw s0,152(sp) 102c4: 0a010113 addi sp,sp,160 102c8: 00008067 ret

Section 0 is corredponded to original C code

int letter_freq[26] = {0};

Section 1 is corresponded to original C code

for (int i = 0; s[i]; i++) {
    letter_freq[s[i] - 'a']++;
}
  • I don't know why line 26 need addi a5, a5, -16

Section 2 is corresponded to original C code

for (int i = 0; t[i]; i++) {
    letter_freq[t[i] - 'a']--;
}

Section 3 is corresponded to original C code

for (int i = 0; i < 26; i++) {
    if (letter_freq[i]) 
        return 0;
}
return 1;

-O1

00010184 <isAnagram>: 10184: f8010113 addi sp,sp,-128 10188: 06112e23 sw ra,124(sp) 1018c: 06812c23 sw s0,120(sp) 10190: 06912a23 sw s1,116(sp) 10194: 00050493 mv s1,a0 10198: 00058413 mv s0,a1 #################### Section 0 #################### 1019c: 06800613 li a2,104 101a0: 00000593 li a1,0 101a4: 00810513 addi a0,sp,8 101a8: 264000ef jal ra,1040c <memset> 101ac: 0004c783 lbu a5,0(s1) #################### Section 0 End #################### #################### Section 1 #################### 101b0: 02078863 beqz a5,101e0 <isAnagram+0x5c> # a5 = 0 break the for loop 101b4: 00148513 addi a0,s1,1 # s++ 101b8: f9f78793 addi a5,a5,-97 # - 'a' 101bc: 00279793 slli a5,a5,0x2 101c0: 07078793 addi a5,a5,112 101c4: 002787b3 add a5,a5,sp 101c8: f987a703 lw a4,-104(a5) 101cc: 00170713 addi a4,a4,1 101d0: f8e7ac23 sw a4,-104(a5) 101d4: 00150513 addi a0,a0,1 # i++ 101d8: fff54783 lbu a5,-1(a0) # a5 = s[i] 101dc: fc079ee3 bnez a5,101b8 <isAnagram+0x34> # s[i]!=0 #################### Section 1 End #################### #################### Section 2 #################### 101e0: 00044783 lbu a5,0(s0) 101e4: 02078863 beqz a5,10214 <isAnagram+0x90> 101e8: 00140593 addi a1,s0,1 101ec: f9f78793 addi a5,a5,-97 101f0: 00279793 slli a5,a5,0x2 101f4: 07078793 addi a5,a5,112 101f8: 002787b3 add a5,a5,sp 101fc: f987a703 lw a4,-104(a5) 10200: fff70713 addi a4,a4,-1 10204: f8e7ac23 sw a4,-104(a5) 10208: 00158593 addi a1,a1,1 1020c: fff5c783 lbu a5,-1(a1) 10210: fc079ee3 bnez a5,101ec <isAnagram+0x68> #################### Section 2 End #################### #################### Section 3 #################### 10214: 00810793 addi a5,sp,8 10218: 07010693 addi a3,sp,112 1021c: 0007a703 lw a4,0(a5) 10220: 00071a63 bnez a4,10234 <isAnagram+0xb0> 10224: 00478793 addi a5,a5,4 10228: fed79ae3 bne a5,a3,1021c <isAnagram+0x98> 1022c: 00100513 li a0,1 10230: 0080006f j 10238 <isAnagram+0xb4> 10234: 00000513 li a0,0 #################### Section 3 End #################### 10238: 07c12083 lw ra,124(sp) 1023c: 07812403 lw s0,120(sp) 10240: 07412483 lw s1,116(sp) 10244: 08010113 addi sp,sp,128 10248: 00008067 ret

Section 0 is corredponded to original C code

int letter_freq[26] = {0};

Section 1 is corresponded to original C code

for (int i = 0; s[i]; i++) {
    letter_freq[s[i] - 'a']++;
}

Section 2 is corresponded to original C code

for (int i = 0; t[i]; i++) {
    letter_freq[t[i] - 'a']--;
}

Section 3 is corresponded to original C code

for (int i = 0; i < 26; i++) {
    if (letter_freq[i]) 
        return 0;
}
return 1;

In isAnagram() section 1, the assembly code first do the plus in line 20(101b4), and extract value array[i-1] in line 29(101d8) for the loop condition.

  • If the extracted value is 0 (means loop is over), goto next stage.
  • If the extracted value is not 0 (means loop should keep going), branch to line 21(101b8)(next line of i++).

And for section 3, reduce one branch and some code size.

-O2

00010244 <isAnagram>: 10244: f8010113 addi sp,sp,-128 10248: 06812c23 sw s0,120(sp) 1024c: 06912a23 sw s1,116(sp) 10250: 00058413 mv s0,a1 10254: 00050493 mv s1,a0 #################### Section 0 #################### 10258: 06800613 li a2,104 1025c: 00000593 li a1,0 10260: 00810513 addi a0,sp,8 10264: 06112e23 sw ra,124(sp) 10268: 1bc000ef jal ra,10424 <memset> 1026c: 0004c783 lbu a5,0(s1) #################### Section 0 End #################### #################### Section 1 #################### 10270: 02078863 beqz a5,102a0 <isAnagram+0x5c> 10274: 00148513 addi a0,s1,1 10278: f9f78793 addi a5,a5,-97 1027c: 00279793 slli a5,a5,0x2 10280: 07078793 addi a5,a5,112 10284: 002786b3 add a3,a5,sp 10288: f986a703 lw a4,-104(a3) 1028c: 00054783 lbu a5,0(a0) 10290: 00150513 addi a0,a0,1 10294: 00170713 addi a4,a4,1 10298: f8e6ac23 sw a4,-104(a3) 1029c: fc079ee3 bnez a5,10278 <isAnagram+0x34> #################### Section 1 End #################### #################### Section 2 #################### 102a0: 00044783 lbu a5,0(s0) 102a4: 02078863 beqz a5,102d4 <isAnagram+0x90> 102a8: 00140593 addi a1,s0,1 102ac: f9f78793 addi a5,a5,-97 102b0: 00279793 slli a5,a5,0x2 102b4: 07078793 addi a5,a5,112 102b8: 002786b3 add a3,a5,sp 102bc: f986a703 lw a4,-104(a3) 102c0: 0005c783 lbu a5,0(a1) 102c4: 00158593 addi a1,a1,1 102c8: fff70713 addi a4,a4,-1 102cc: f8e6ac23 sw a4,-104(a3) 102d0: fc079ee3 bnez a5,102ac <isAnagram+0x68> #################### Section 2 End #################### #################### Section 3 #################### 102d4: 00810793 addi a5,sp,8 102d8: 07010693 addi a3,sp,112 102dc: 0080006f j 102e4 <isAnagram+0xa0> 102e0: 02f68463 beq a3,a5,10308 <isAnagram+0xc4> 102e4: 0007a703 lw a4,0(a5) 102e8: 00478793 addi a5,a5,4 102ec: fe070ae3 beqz a4,102e0 <isAnagram+0x9c> 102f0: 07c12083 lw ra,124(sp) 102f4: 07812403 lw s0,120(sp) 102f8: 07412483 lw s1,116(sp) 102fc: 00000513 li a0,0 10300: 08010113 addi sp,sp,128 10304: 00008067 ret 10308: 07c12083 lw ra,124(sp) 1030c: 07812403 lw s0,120(sp) 10310: 07412483 lw s1,116(sp) 10314: 00100513 li a0,1 10318: 08010113 addi sp,sp,128 1031c: 00008067 ret #################### Section 3 End ####################
  • Each section handle the same region as mentioned before.

Section 3 is a little different with -O1 optimization, it loop between line 49 and line 50. And if a4 is not equal to 0, just return 0.

-O3

00010244 <isAnagram>: 10244: f8010113 addi sp,sp,-128 10248: 06812c23 sw s0,120(sp) 1024c: 06912a23 sw s1,116(sp) 10250: 00058413 mv s0,a1 10254: 00050493 mv s1,a0 10258: 06800613 li a2,104 1025c: 00000593 li a1,0 10260: 00810513 addi a0,sp,8 10264: 06112e23 sw ra,124(sp) 10268: 1bc000ef jal ra,10424 <memset> 1026c: 0004c783 lbu a5,0(s1) 10270: 02078863 beqz a5,102a0 <isAnagram+0x5c> 10274: 00148513 addi a0,s1,1 10278: f9f78793 addi a5,a5,-97 1027c: 00279793 slli a5,a5,0x2 10280: 07078793 addi a5,a5,112 10284: 002786b3 add a3,a5,sp 10288: f986a703 lw a4,-104(a3) 1028c: 00054783 lbu a5,0(a0) 10290: 00150513 addi a0,a0,1 10294: 00170713 addi a4,a4,1 10298: f8e6ac23 sw a4,-104(a3) 1029c: fc079ee3 bnez a5,10278 <isAnagram+0x34> 102a0: 00044783 lbu a5,0(s0) 102a4: 02078863 beqz a5,102d4 <isAnagram+0x90> 102a8: 00140593 addi a1,s0,1 102ac: f9f78793 addi a5,a5,-97 102b0: 00279793 slli a5,a5,0x2 102b4: 07078793 addi a5,a5,112 102b8: 002786b3 add a3,a5,sp 102bc: f986a703 lw a4,-104(a3) 102c0: 0005c783 lbu a5,0(a1) 102c4: 00158593 addi a1,a1,1 102c8: fff70713 addi a4,a4,-1 102cc: f8e6ac23 sw a4,-104(a3) 102d0: fc079ee3 bnez a5,102ac <isAnagram+0x68> 102d4: 00810793 addi a5,sp,8 102d8: 07010693 addi a3,sp,112 102dc: 0080006f j 102e4 <isAnagram+0xa0> 102e0: 02f68463 beq a3,a5,10308 <isAnagram+0xc4> 102e4: 0007a703 lw a4,0(a5) 102e8: 00478793 addi a5,a5,4 102ec: fe070ae3 beqz a4,102e0 <isAnagram+0x9c> 102f0: 07c12083 lw ra,124(sp) 102f4: 07812403 lw s0,120(sp) 102f8: 07412483 lw s1,116(sp) 102fc: 00000513 li a0,0 10300: 08010113 addi sp,sp,128 10304: 00008067 ret 10308: 07c12083 lw ra,124(sp) 1030c: 07812403 lw s0,120(sp) 10310: 07412483 lw s1,116(sp) 10314: 00100513 li a0,1 10318: 08010113 addi sp,sp,128 1031c: 00008067 ret

-Ofast

00010244 <isAnagram>: 10244: f8010113 addi sp,sp,-128 10248: 06812c23 sw s0,120(sp) 1024c: 06912a23 sw s1,116(sp) 10250: 00058413 mv s0,a1 10254: 00050493 mv s1,a0 10258: 06800613 li a2,104 1025c: 00000593 li a1,0 10260: 00810513 addi a0,sp,8 10264: 06112e23 sw ra,124(sp) 10268: 1bc000ef jal ra,10424 <memset> 1026c: 0004c783 lbu a5,0(s1) 10270: 02078863 beqz a5,102a0 <isAnagram+0x5c> 10274: 00148513 addi a0,s1,1 10278: f9f78793 addi a5,a5,-97 1027c: 00279793 slli a5,a5,0x2 10280: 07078793 addi a5,a5,112 10284: 002786b3 add a3,a5,sp 10288: f986a703 lw a4,-104(a3) 1028c: 00054783 lbu a5,0(a0) 10290: 00150513 addi a0,a0,1 10294: 00170713 addi a4,a4,1 10298: f8e6ac23 sw a4,-104(a3) 1029c: fc079ee3 bnez a5,10278 <isAnagram+0x34> 102a0: 00044783 lbu a5,0(s0) 102a4: 02078863 beqz a5,102d4 <isAnagram+0x90> 102a8: 00140593 addi a1,s0,1 102ac: f9f78793 addi a5,a5,-97 102b0: 00279793 slli a5,a5,0x2 102b4: 07078793 addi a5,a5,112 102b8: 002786b3 add a3,a5,sp 102bc: f986a703 lw a4,-104(a3) 102c0: 0005c783 lbu a5,0(a1) 102c4: 00158593 addi a1,a1,1 102c8: fff70713 addi a4,a4,-1 102cc: f8e6ac23 sw a4,-104(a3) 102d0: fc079ee3 bnez a5,102ac <isAnagram+0x68> 102d4: 00810793 addi a5,sp,8 102d8: 07010693 addi a3,sp,112 102dc: 0080006f j 102e4 <isAnagram+0xa0> 102e0: 02f68463 beq a3,a5,10308 <isAnagram+0xc4> 102e4: 0007a703 lw a4,0(a5) 102e8: 00478793 addi a5,a5,4 102ec: fe070ae3 beqz a4,102e0 <isAnagram+0x9c> 102f0: 07c12083 lw ra,124(sp) 102f4: 07812403 lw s0,120(sp) 102f8: 07412483 lw s1,116(sp) 102fc: 00000513 li a0,0 10300: 08010113 addi sp,sp,128 10304: 00008067 ret 10308: 07c12083 lw ra,124(sp) 1030c: 07812403 lw s0,120(sp) 10310: 07412483 lw s1,116(sp) 10314: 00100513 li a0,1 10318: 08010113 addi sp,sp,128 1031c: 00008067 ret

-Os

00010238 <isAnagram>: 10238: f8010113 addi sp,sp,-128 1023c: 06812c23 sw s0,120(sp) 10240: 06912a23 sw s1,116(sp) 10244: 00058413 mv s0,a1 10248: 00050493 mv s1,a0 1024c: 06800613 li a2,104 10250: 00000593 li a1,0 10254: 00810513 addi a0,sp,8 10258: 06112e23 sw ra,124(sp) 1025c: 1a0000ef jal ra,103fc <memset> 10260: 00048513 mv a0,s1 10264: 00054783 lbu a5,0(a0) 10268: 00150513 addi a0,a0,1 1026c: 04079263 bnez a5,102b0 <isAnagram+0x78> 10270: 00040593 mv a1,s0 10274: 0005c783 lbu a5,0(a1) 10278: 00158593 addi a1,a1,1 1027c: 04079a63 bnez a5,102d0 <isAnagram+0x98> 10280: 00810793 addi a5,sp,8 10284: 0007a703 lw a4,0(a5) 10288: 06071463 bnez a4,102f0 <isAnagram+0xb8> 1028c: 00478793 addi a5,a5,4 10290: 07010713 addi a4,sp,112 10294: fee798e3 bne a5,a4,10284 <isAnagram+0x4c> 10298: 00100513 li a0,1 1029c: 07c12083 lw ra,124(sp) 102a0: 07812403 lw s0,120(sp) 102a4: 07412483 lw s1,116(sp) 102a8: 08010113 addi sp,sp,128 102ac: 00008067 ret 102b0: f9f78793 addi a5,a5,-97 102b4: 00279793 slli a5,a5,0x2 102b8: 07078793 addi a5,a5,112 102bc: 002787b3 add a5,a5,sp 102c0: f987a703 lw a4,-104(a5) 102c4: 00170713 addi a4,a4,1 102c8: f8e7ac23 sw a4,-104(a5) 102cc: f99ff06f j 10264 <isAnagram+0x2c> 102d0: f9f78793 addi a5,a5,-97 102d4: 00279793 slli a5,a5,0x2 102d8: 07078793 addi a5,a5,112 102dc: 002787b3 add a5,a5,sp 102e0: f987a703 lw a4,-104(a5) 102e4: fff70713 addi a4,a4,-1 102e8: f8e7ac23 sw a4,-104(a5) 102ec: f89ff06f j 10274 <isAnagram+0x3c> 102f0: 00000513 li a0,0 102f4: fa9ff06f j 1029c <isAnagram+0x64>

-Os optimization generate much more bne rather than beq, and the code scheduling is more complicated than other optimization levels, this code do more "jump back" branch, such as line 39 and line 47 unconditional jump, it can reduce code, but I think it will do more branch than other opimization levels.

For example, for loop1, line 15 will branch to line 32(102b0) and do the unconditional jump(line 39) back to line 13(10264) to go through the for loop, and so does loop2. So, it will do two jump when going through the loop.

rv32emu Source Code Note

syscall_handler

syscall.c

enum {
#define _(name, number) SYS_##name = number,
    SUPPORTED_SYSCALLS
#undef _
};

#define SUPPORTED_SYSCALLS       \
    _(close,            57)      \
    _(lseek,            62)      \
    _(read,             63)      \
    _(write,            64)      \
    _(fstat,            80)      \
    _(exit,             93)      \
    _(gettimeofday,     169)     \
    _(brk,              214)     \
    _(open,             1024)    \
    IIF(RV32_HAS(SDL))(          \
        _(draw_frame,   0xBEEF)  \
        _(setup_queue,  0xC0DE)  \
        _(submit_queue, 0xFEED), \
    )

void syscall_handler(struct riscv_t *rv)
{
    /* get the syscall number */
    riscv_word_t syscall = rv_get_reg(rv, rv_reg_a7);

    switch (syscall) { /* dispatch system call */
#define _(name, number)     \
    case SYS_##name:        \
        syscall_##name(rv); \
        break;
        SUPPORTED_SYSCALLS
#undef _
    default:
        fprintf(stderr, "unknown syscall %d\n", (int) syscall);
        rv_halt(rv);
        break;
    }
}

  • Notice that the syscall index is different from Ripes.
  • And from risdv_word_t syscall = rv_get_reg(rv, rv_reg_a7 we know that system call index need to stored in a7

So, if we call the system call 64, it will handle by syscall_handler and it will call another function syscall_write()

static void syscall_write(struct riscv_t *rv)
{
    state_t *s = rv_userdata(rv); /* access userdata */

    /* _write(fde, buffer, count) */
    riscv_word_t fd = rv_get_reg(rv, rv_reg_a0);
    riscv_word_t buffer = rv_get_reg(rv, rv_reg_a1);
    riscv_word_t count = rv_get_reg(rv, rv_reg_a2);

    /* read the string that we are printing */
    uint8_t *tmp = malloc(count);
    memory_read(s->mem, tmp, buffer, count);

    /* lookup the file descriptor */
    map_iter_t it;
    map_find(s->fd_map, &it, &fd);
    if (!map_at_end(s->fd_map, &it)) {
        /* write out the data */
        size_t written = fwrite(tmp, 1, count, map_iter_value(&it, FILE *));

        /* return number of bytes written */
        rv_set_reg(rv, rv_reg_a0, written);
    } else {
        /* error */
        rv_set_reg(rv, rv_reg_a0, -1);
    }

    free(tmp);
}

From the definition, we know that

  • a0 handle fd
  • a1 handle buffer
  • a2 handle count

For write, index with 64, is defined as

/* The structure with the cookie function pointers.
   The tag name of this struct is _IO_cookie_io_functions_t to
   preserve historic C++ mangled names for functions taking
   cookie_io_functions_t arguments.  That name should not be used in
   new code.  */
typedef struct _IO_cookie_io_functions_t
{
  cookie_read_function_t *read;		/* Read bytes.  */
  cookie_write_function_t *write;	/* Write bytes.  */
  cookie_seek_function_t *seek;		/* Seek/tell file position.  */
  cookie_close_function_t *close;	/* Close file.  */
} cookie_io_functions_t;

cookie_io_functions_t.h

/* Write NBYTES bytes pointed to by BUF to COOKIE.  Write all NBYTES bytes
   unless there is an error.  Return number of bytes written.  If
   there is an error, return 0 and do not write anything.  If the file
   has been opened for append (__mode.__append set), then set the file
   pointer to the end of the file and then do the write; if not, just
   write at the current file pointer.  */
typedef __ssize_t cookie_write_function_t (void *__cookie, const char *__buf,
                                           size_t __nbytes);

Some Note

There is a problem annoying me a lot of time.

.data
str: .ascii "Hello World!\n"
     .set str_size, .-str
  • the .set ste_size, .-str is quite hard to understand for me.
  • . means current location
  • - just means substraction, so the expression .-str will be the strlen. of .ascii