Computer Architecture
Lab2: RISC-V RV32I[MACF] emulator with ELF support
Assembly - Original Solution by 曾晧峖
# RISC-V assembly program to print "Hello World!" to stdout.
.org 0
# Provide program starting address to linker
.global _start
/* newlib system calls */
.set SYSEXIT, 93
.set SYSWRITE, 64
.data
test_1_s: .string "anagram"
test_1_t: .string "nagaram"
test_2_s: .string "rat"
test_2_t: .string "anagram"
test_3_s: .string "tseng"
test_3_t: .string "gnest"
correct_1: .string "test_1: correct"
not_correct_1: .string "test_1: not correct"
correct_2: .string "test_2: correct"
not_correct_2: .string "test_2: not correct"
correct_3: .string "test_3: correct"
not_correct_3: .string "test_3: not correct"
.text
main:
addi a7, x0, 4
la a0, test_1_s # s(a0) = test_1_s
la a1, test_1_t # t(a1) = test_1_t
jal ra, isAnagram # call isAnagram(s(a0), t(a1))
bne a0, x0 TRUE_1 # if isAnagram(s(a0), t(a1)) == 1 correct
la a0, not_correct_1 # not correct print error
ecall
TEST_2:
la a0, test_2_s # s(a0) = test_2_s
la a1, test_2_t # t(a1) = test_2_t
jal ra isAnagram # call isAnagram(s(a0), t(a1))
beq a0, x0 TRUE_2 # if isAnagram(s(a0), t(a1)) == 0 correct
la a0, not_correct_2 # not correct print error
ecall
TEST_3:
la a0, test_3_s # s(a0) = test_3_s
la a1, test_3_t # t(a1) = test_3_t
jal ra isAnagram # call isAnagram(s(a0), t(a1))
bne a0, x0 TRUE_3 # if isAnagram(s(a0), t(a1)) == 0 correct
la a0, not_correct_3 # not correct print error
j END
ecall
TRUE_1:
la a0, correct_1 # correct print correct
ecall
j TEST_2 # go to example2
TRUE_2:
la a0, correct_2 # correct print correct
ecall
j TEST_3 # go to example3
TRUE_3:
la a0, correct_3 # correct print correct
ecall
END:
addi a7, x0, 10
ecall
isAnagram: # a0 = s, a1 = t
addi sp, sp, -104 # get sapce for store int letter_freq[26]
addi t0, sp, 0 # t0 = letter_freq[0]
addi t1, x0, 0 # t1 = i = 0
li t2, 26
LOOP1: # int letter_freq[26] = {0};
beq t1, t2 GET_FREQ_s # if i < 26
sw x0, 0(t0) # letter_freq[i] = 0;
addi, t1, t1, 1 # i++;
addi, t0, t0, 4 #
j LOOP1
GET_FREQ_s:
addi, t0, sp, 0 # t0 = letter_freq[0]
addi, t1, a0, 0 # t1 = s
addi, t2, x0, 0 # t2 = i = 0
LOOP2: # for( ;s[i] ;i++ )
add t3, t1, t2 # get address of s[index] from s[0] +
lb t5, (0)t3 # t5 = s[i]
beq, t5, x0, GET_FREQ_F # if s[i] ==0 break the loop
addi t5, t5 -97 # t5 = s[i] - 'a'
slli t5, t5, 2 # get offset form letter_freq[0] to letter_freq[s[i] - 'a']
add t5, t5, t0 # get address of letter_freq[s[i] - 'a']
lw t3, 0(t5) # t3 = [freq[s[i] - 'a']]
addi t3, t3, 1 # t3 = [freq[s[i] - 'a']] + 1
sw t3, 0(t5) # [freq[s[i] - 'a']] = ([freq[s[i] - 'a']] + 1)
addi t2, t2, 1 # i++
j LOOP2
GET_FREQ_F:
addi, t0, sp, 0 # t0 = freq[]
addi, t1, a1, 0 # t1 = t
addi, t2, x0, 0 # t2 = i = 0
LOOP3: # for( ;s[i] ;i++ )
add t3, t1, t2 # get address of t[index]
lb t5, (0)t3 # t5 = t[i]
beq, t5, x0, CHECK # if t[i] == 0 break the loop
addi t5, t5 -97 # t5 = t[i] - 'a'
slli t5, t5, 2 # get offset form letter_freq[0] to letter_freq[t[i] - 'a']
add t5, t5, t0
lw t3, 0(t5) # t5 = [freq[t[i] - 'a']]
addi t3, t3, -1 # t3 = [freq[t[i] - 'a']] - 1
sw t3, 0(t5) # [freq[t[i] - 'a']] = ([freq[t[i] - 'a']] - 1)
addi t2, t2, 1 # i++
j LOOP3
CHECK:
addi t0, sp, 0 # t0 = address freq[0]
addi t1, x0, 0 # t1 = i = 0
li t2, 26
LOOP4: # for (int i = 0; i < 26; i++)
beq t1, t2 TRUE # i < 26
lw t3, 0(t0) # t3 = freq[i];
bne t3, x0, FALSE # if freq[i] != 0 break
addi, t1, t1, 1 # i++;
addi, t0, t0, 4 # freq + 1
j LOOP4
FALSE:
addi a0, x0, 0 # if flase return false
j END_F
TRUE:
addi a0, x0, 01 # if pass check return true
END_F:
jr ra # return, a0 = return value = true or false
first when I make
the program, I got the message below
riscv-none-elf-as -R -march=rv32i -mabi=ilp32 -o hw2.o hw2.S
hw2.S: Assembler messages:
hw2.S: Warning: end of file in comment; newline inserted
hw2.S:34: Error: illegal operands `bne a0,x0 TRUE_1'
hw2.S:40: Error: illegal operands `jal ra isAnagram'
hw2.S:41: Error: illegal operands `beq a0,x0 TRUE_2'
hw2.S:47: Error: illegal operands `jal ra isAnagram'
hw2.S:48: Error: illegal operands `bne a0,x0 TRUE_3'
hw2.S:75: Error: illegal operands `beq t1,t2 GET_FREQ_s'
hw2.S:77: Error: unrecognized opcode `addi, t1,t1,1'
hw2.S:78: Error: unrecognized opcode `addi, t0,t0,4'
hw2.S:82: Error: unrecognized opcode `addi, t0,sp,0'
hw2.S:83: Error: unrecognized opcode `addi, t1,a0,0'
hw2.S:84: Error: unrecognized opcode `addi, t2,x0,0'
hw2.S:87: Error: illegal operands `lb t5,(0)t3'
hw2.S:88: Error: unrecognized opcode `beq, t5,x0,GET_FREQ_F'
hw2.S:89: Error: illegal operands `addi t5,t5-97'
hw2.S:98: Error: unrecognized opcode `addi, t0,sp,0'
hw2.S:99: Error: unrecognized opcode `addi, t1,a1,0'
hw2.S:100: Error: unrecognized opcode `addi, t2,x0,0'
hw2.S:103: Error: illegal operands `lb t5,(0)t3'
hw2.S:104: Error: unrecognized opcode `beq, t5,x0,CHECK'
hw2.S:105: Error: illegal operands `addi t5,t5-97'
hw2.S:118: Error: illegal operands `beq t1,t2 TRUE'
hw2.S:121: Error: unrecognized opcode `addi, t1,t1,1'
hw2.S:122: Error: unrecognized opcode `addi, t0,t0,4'
make: *** [Makefile:9: hw2.o] Error 1
bne a0, x0 TRUE_1
need to write as bne a0, x0, TRUE_1
Don't forget the commalb t5, (0)t3
need to write as lb t5, 0(t3)
beq, t5, x0, GET_FREQ_F
need to wrie as beq t5, x0, GET_FREQ_F
remove the redundant commaWarning: end of file in comment; newline inserted
Fix
end:
li a7, SYSEXIT # "exit" syscall
add a0, x0, 0 # Use 0 return code
ecall # invoke syscall to terminate the program
+ #add a blank line
# RISC-V assembly program to print "Hello World!" to stdout.
.org 0
# Provide program starting address to linker
.global _start
/* newlib system calls */
.set SYSEXIT, 93
.set SYSWRITE, 64
.data
test_1_s: .string "anagram"
test_1_t: .string "nagaram"
test_2_s: .string "rat"
test_2_t: .string "anagram"
test_3_s: .string "tseng"
test_3_t: .string "gnest"
correct_1: .string "test_1: correct\n"
.set t1cor_size, .-correct_1
not_correct_1: .string "test_1: not correct\n"
.set t1incor_size, .-not_correct_1
correct_2: .string "test_2: correct\n"
.set t2cor_size, .-correct_2
not_correct_2: .string "test_2: not correct\n"
.set t2incor_size, .-not_correct_2
correct_3: .string "test_3: correct\n"
.set t3cor_size, .-correct_3
not_correct_3: .string "test_3: not correct\n"
.set t3incor_size, .-not_correct_3
.text
_start:
li a7, SYSWRITE # "write" system call
li a0, 1 # 1 = standard output (stdout)
la a0, test_1_s # s(a0) = test_1_s
la a1, test_1_t # t(a1) = test_1_t
jal ra, isAnagram # call isAnagram(s(a0), t(a1))
bne a0, x0, TRUE_1 # if isAnagram(s(a0), t(a1)) == 1 correct
la a1, not_correct_1 # test1 not correct address
la a2, t1incor_size # test1 not correct length
ecall
TEST_2:
la a0, test_2_s # s(a0) = test_2_s
la a1, test_2_t # t(a1) = test_2_t
jal ra, isAnagram # call isAnagram(s(a0), t(a1))
beqz a0, TRUE_2 # if isAnagram(s(a0), t(a1)) == 0 correct
la a1, not_correct_2 # test2 not correct address
la a2, t2incor_size # test1 not correct length
ecall
TEST_3:
la a0, test_3_s # s(a0) = test_3_s
la a1, test_3_t # t(a1) = test_3_t
jal ra, isAnagram # call isAnagram(s(a0), t(a1))
bne a0, x0, TRUE_3 # if isAnagram(s(a0), t(a1)) == 1 correct
la a1, not_correct_3 # test3 not correct address
la a2, t3incor_size # test3 not correct length
ecall
j end
TRUE_1:
la a1, correct_1 # test1 correct address
la a2, t1cor_size # test1 correct length
ecall
j TEST_2 # go to example2
TRUE_2:
la a1, correct_2 # test2 correct address
la a2, t2cor_size # test2 correct length
ecall
j TEST_3 # go to example3
TRUE_3:
la a1, correct_3 # test2 correct address
la a2, t3cor_size # test2 correct length
ecall
isAnagram: # a0 = s, a1 = t
addi sp, sp, -104 # get sapce for store int letter_freq[26]
addi t0, sp, 0 # t0 = letter_freq[0]
addi t1, x0, 0 # t1 = i = 0
li t2, 26
LOOP1: # int letter_freq[26] = {0};
beq t1, t2, GET_FREQ_s # if i < 26
sw x0, 0(t0) # letter_freq[i] = 0;
addi t1, t1, 1 # i++;
addi t0, t0, 4 #
j LOOP1
GET_FREQ_s:
addi t0, sp, 0 # t0 = letter_freq[0]
addi t1, a0, 0 # t1 = s
addi t2, x0, 0 # t2 = i = 0
LOOP2: # for( ;s[i] ;i++ )
add t3, t1, t2 # get address of s[index] from s[0] +
lb t5, 0(t3) # t5 = s[i]
beq t5, x0, GET_FREQ_F # if s[i] ==0 break the loop
addi t5, t5, -97 # t5 = s[i] - 'a'
slli t5, t5, 2 # get offset form letter_freq[0] to letter_freq[s[i] - 'a']
add t5, t5, t0 # get address of letter_freq[s[i] - 'a']
lw t3, 0(t5) # t3 = [freq[s[i] - 'a']]
addi t3, t3, 1 # t3 = [freq[s[i] - 'a']] + 1
sw t3, 0(t5) # [freq[s[i] - 'a']] = ([freq[s[i] - 'a']] + 1)
addi t2, t2, 1 # i++
j LOOP2
GET_FREQ_F:
addi t0, sp, 0 # t0 = freq[]
addi t1, a1, 0 # t1 = t
addi t2, x0, 0 # t2 = i = 0
LOOP3: # for( ;s[i] ;i++ )
add t3, t1, t2 # get address of t[index]
lb t5, 0(t3) # t5 = t[i]
beq t5, x0, CHECK # if t[i] == 0 break the loop
addi t5, t5, -97 # t5 = t[i] - 'a'
slli t5, t5, 2 # get offset form letter_freq[0] to letter_freq[t[i] - 'a']
add t5, t5, t0
lw t3, 0(t5) # t5 = [freq[t[i] - 'a']]
addi t3, t3, -1 # t3 = [freq[t[i] - 'a']] - 1
sw t3, 0(t5) # [freq[t[i] - 'a']] = ([freq[t[i] - 'a']] - 1)
addi t2, t2, 1 # i++
j LOOP3
CHECK:
addi t0, sp, 0 # t0 = address freq[0]
addi t1, x0, 0 # t1 = i = 0
li t2, 26
LOOP4: # for (int i = 0; i < 26; i++)
beq t1, t2, TRUE # i < 26
lw t3, 0(t0) # t3 = freq[i];
bne t3, x0, FALSE # if freq[i] != 0 break
addi t1, t1, 1 # i++;
addi t0, t0, 4 # freq + 1
j LOOP4
FALSE:
addi a0, x0, 0 # if flase return false
j END_F
TRUE:
addi a0, x0, 1 # if pass check return true
END_F:
jr ra # return, a0 = return value = true or false
end:
li a7, SYSEXIT # "exit" syscall
add a0, x0, 0 # Use 0 return code
ecall # invoke syscall to terminate the program
Problem:
In ripes, the assembler code can work succesfully, but in rv32emu, test_2 can't not show the result. when I set the test_2_t
with the same length test_2_s
, it works!, I am still try to find the problem.
Fix it:
Finally find out that In Ripes, system call PrintString
only need a0
for string address and a7
for system call index.
But in rv32emu, system call needa0
to handle file descriptor(gives 1 for stdout in my case),a1
for string address, a2
for string length, mentioned in docs/syscall.md.
However, after jump to isAnagram
, a0
was overwriten to the return True of False, so if the original string not match to specific stirng (not an Anagram), the a0
will be 0
and the associate ecall
instruction will see it as stdin, which causes this problem.
For three testcases, expected that
after calling isAnagram
function. So, it's necesserily to add li a0, 1
when return value is False (means 0
)
TEST_1:
addi sp, sp, -4
sw ra, 0(sp)
la a0, test_1_s # s(a0) = test_1_s
la a1, test_1_t # t(a1) = test_1_t
jal ra, isAnagram # call isAnagram(s(a0), t(a1))
bne a0, x0, TRUE_1 # if isAnagram(s(a0), t(a1)) == 1 correct
+ li a0, 1 # reload a0 to handle stdout
la a1, not_correct_1 # test1 not correct address
la a2, t1incor_size # test1 not correct length
ecall
lw ra, 0(sp)
addi sp, sp, 4
jr ra
TEST_2:
addi sp, sp, -4
sw ra, 0(sp)
la a0, test_2_s # s(a0) = test_2_s
la a1, test_2_t # t(a1) = test_2_t
jal ra, isAnagram # call isAnagram(s(a0), t(a1))
beq a0, x0, TRUE_2 # if isAnagram(s(a0), t(a1)) == 0 correct
la a1, not_correct_2 # test2 not correct address
la a2, t2incor_size # test not correct length
ecall
lw ra, 0(sp)
addi sp, sp, 4
jr ra
TEST_3:
addi sp, sp, -4
sw ra, 0(sp)
la a0, test_3_s # s(a0) = test_3_s
la a1, test_3_t # t(a1) = test_3_t
jal ra, isAnagram # call isAnagram(s(a0), t(a1))
bne a0, x0, TRUE_3 # if isAnagram(s(a0), t(a1)) == 1 correct
+ li a0, 1 # reload a0 to handle stdout
la a1, not_correct_3 # test3 not correct address
la a2, t3incor_size # test3 not correct length
ecall
lw ra, 0(sp)
addi sp, sp, 4
jr ra
TRUE_1:
la a1, correct_1 # test1 correct address
la a2, t1cor_size # test1 correct length
ecall
lw ra, 0(sp)
addi sp, sp, 4
jr ra # go to example2
TRUE_2:
+ li a0, 1 # reload a0 to handle stdout
la a1, correct_2 # test2 correct address
la a2, t2cor_size # test2 correct length
ecall
lw ra, 0(sp)
addi sp, sp, 4
jr ra # go to example3
TRUE_3:
la a1, correct_3 # test2 correct address
la a2, t3cor_size # test2 correct length
ecall
lw ra, 0(sp)
addi sp, sp, 4
jr ra
00010184 <isAnagram>:
10184: f6010113 addi sp,sp,-160
10188: 08112e23 sw ra,156(sp)
1018c: 08812c23 sw s0,152(sp)
10190: 0a010413 addi s0,sp,160
10194: f6a42623 sw a0,-148(s0)
10198: f6b42423 sw a1,-152(s0)
######################### Section 0 #########################
1019c: f7c40793 addi a5,s0,-132
101a0: 06800713 li a4,104 # int letter_freq[26] = {0}
101a4: 00070613 mv a2,a4 # a2 = a4
101a8: 00000593 li a1,0
101ac: 00078513 mv a0,a5 # initial address
101b0: 328000ef jal ra,104d8 <memset>
101b4: fe042623 sw zero,-20(s0)
######################### Section 0 END #########################
######################### Section 1 #########################
101b8: 0480006f j 10200 <isAnagram+0x7c>
101bc: fec42783 lw a5,-20(s0) # a5 = 0
101c0: f6c42703 lw a4,-148(s0) # a4 = a0
101c4: 00f707b3 add a5,a4,a5 # a5 = a0 (test_1_s address) + a5 (offset)
101c8: 0007c783 lbu a5,0(a5) # a5 = s[i]
101cc: f9f78713 addi a4,a5,-97 # 97 = 'a', a4 = s[i] - 'a' (0 ~ 25)
101d0: 00271793 slli a5,a4,0x2 # push letter_freq
101d4: ff078793 addi a5,a5,-16
101d8: 008787b3 add a5,a5,s0 # s0 = stack top,
101dc: f8c7a783 lw a5,-116(a5) # a5 = letter_freq[s[i] - 'a']
101e0: 00178693 addi a3,a5,1 # a3 = letter_freq[s[i] - 'a']++
101e4: 00271793 slli a5,a4,0x2
101e8: ff078793 addi a5,a5,-16
101ec: 008787b3 add a5,a5,s0
101f0: f8d7a623 sw a3,-116(a5)
101f4: fec42783 lw a5,-20(s0) # i = 0
101f8: 00178793 addi a5,a5,1 # i++
101fc: fef42623 sw a5,-20(s0) # stored back
10200: fec42783 lw a5,-20(s0) # a5 = 0
10204: f6c42703 lw a4,-148(s0) # a4 = a0
10208: 00f707b3 add a5,a4,a5 # a5 = a0(test_1_s address) + a5 (offset)
1020c: 0007c783 lbu a5,0(a5) # a5 = s[i]
10210: fa0796e3 bnez a5,101bc <isAnagram+0x38> # s[i] != 0
10214: fe042423 sw zero,-24(s0)
######################### Section 1 END #########################
######################### Section 2 #########################
10218: 0480006f j 10260 <isAnagram+0xdc>
1021c: fe842783 lw a5,-24(s0)
10220: f6842703 lw a4,-152(s0)
10224: 00f707b3 add a5,a4,a5
10228: 0007c783 lbu a5,0(a5)
1022c: f9f78713 addi a4,a5,-97
10230: 00271793 slli a5,a4,0x2
10234: ff078793 addi a5,a5,-16
10238: 008787b3 add a5,a5,s0
1023c: f8c7a783 lw a5,-116(a5)
10240: fff78693 addi a3,a5,-1
10244: 00271793 slli a5,a4,0x2
10248: ff078793 addi a5,a5,-16
1024c: 008787b3 add a5,a5,s0
10250: f8d7a623 sw a3,-116(a5)
10254: fe842783 lw a5,-24(s0)
10258: 00178793 addi a5,a5,1
1025c: fef42423 sw a5,-24(s0)
10260: fe842783 lw a5,-24(s0)
10264: f6842703 lw a4,-152(s0)
10268: 00f707b3 add a5,a4,a5
1026c: 0007c783 lbu a5,0(a5)
10270: fa0796e3 bnez a5,1021c <isAnagram+0x98>
10274: fe042223 sw zero,-28(s0)
######################### Section 2 END #########################
######################### Section 3 #########################
10278: 0300006f j 102a8 <isAnagram+0x124>
1027c: fe442783 lw a5,-28(s0)
10280: 00279793 slli a5,a5,0x2
10284: ff078793 addi a5,a5,-16
10288: 008787b3 add a5,a5,s0
1028c: f8c7a783 lw a5,-116(a5)
10290: 00078663 beqz a5,1029c <isAnagram+0x118>
10294: 00000793 li a5,0
10298: 0200006f j 102b8 <isAnagram+0x134>
1029c: fe442783 lw a5,-28(s0)
102a0: 00178793 addi a5,a5,1
102a4: fef42223 sw a5,-28(s0)
102a8: fe442703 lw a4,-28(s0)
102ac: 01900793 li a5,25
102b0: fce7d6e3 bge a5,a4,1027c <isAnagram+0xf8>
102b4: 00100793 li a5,1
102b8: 00078513 mv a0,a5
######################### Section 3 END #########################
102bc: 09c12083 lw ra,156(sp)
102c0: 09812403 lw s0,152(sp)
102c4: 0a010113 addi sp,sp,160
102c8: 00008067 ret
Section 0 is corredponded to original C code
int letter_freq[26] = {0};
Section 1 is corresponded to original C code
for (int i = 0; s[i]; i++) {
letter_freq[s[i] - 'a']++;
}
line 26
need addi a5, a5, -16
Section 2 is corresponded to original C code
for (int i = 0; t[i]; i++) {
letter_freq[t[i] - 'a']--;
}
Section 3 is corresponded to original C code
for (int i = 0; i < 26; i++) {
if (letter_freq[i])
return 0;
}
return 1;
00010184 <isAnagram>:
10184: f8010113 addi sp,sp,-128
10188: 06112e23 sw ra,124(sp)
1018c: 06812c23 sw s0,120(sp)
10190: 06912a23 sw s1,116(sp)
10194: 00050493 mv s1,a0
10198: 00058413 mv s0,a1
#################### Section 0 ####################
1019c: 06800613 li a2,104
101a0: 00000593 li a1,0
101a4: 00810513 addi a0,sp,8
101a8: 264000ef jal ra,1040c <memset>
101ac: 0004c783 lbu a5,0(s1)
#################### Section 0 End ####################
#################### Section 1 ####################
101b0: 02078863 beqz a5,101e0 <isAnagram+0x5c> # a5 = 0 break the for loop
101b4: 00148513 addi a0,s1,1 # s++
101b8: f9f78793 addi a5,a5,-97 # - 'a'
101bc: 00279793 slli a5,a5,0x2
101c0: 07078793 addi a5,a5,112
101c4: 002787b3 add a5,a5,sp
101c8: f987a703 lw a4,-104(a5)
101cc: 00170713 addi a4,a4,1
101d0: f8e7ac23 sw a4,-104(a5)
101d4: 00150513 addi a0,a0,1 # i++
101d8: fff54783 lbu a5,-1(a0) # a5 = s[i]
101dc: fc079ee3 bnez a5,101b8 <isAnagram+0x34> # s[i]!=0
#################### Section 1 End ####################
#################### Section 2 ####################
101e0: 00044783 lbu a5,0(s0)
101e4: 02078863 beqz a5,10214 <isAnagram+0x90>
101e8: 00140593 addi a1,s0,1
101ec: f9f78793 addi a5,a5,-97
101f0: 00279793 slli a5,a5,0x2
101f4: 07078793 addi a5,a5,112
101f8: 002787b3 add a5,a5,sp
101fc: f987a703 lw a4,-104(a5)
10200: fff70713 addi a4,a4,-1
10204: f8e7ac23 sw a4,-104(a5)
10208: 00158593 addi a1,a1,1
1020c: fff5c783 lbu a5,-1(a1)
10210: fc079ee3 bnez a5,101ec <isAnagram+0x68>
#################### Section 2 End ####################
#################### Section 3 ####################
10214: 00810793 addi a5,sp,8
10218: 07010693 addi a3,sp,112
1021c: 0007a703 lw a4,0(a5)
10220: 00071a63 bnez a4,10234 <isAnagram+0xb0>
10224: 00478793 addi a5,a5,4
10228: fed79ae3 bne a5,a3,1021c <isAnagram+0x98>
1022c: 00100513 li a0,1
10230: 0080006f j 10238 <isAnagram+0xb4>
10234: 00000513 li a0,0
#################### Section 3 End ####################
10238: 07c12083 lw ra,124(sp)
1023c: 07812403 lw s0,120(sp)
10240: 07412483 lw s1,116(sp)
10244: 08010113 addi sp,sp,128
10248: 00008067 ret
Section 0 is corredponded to original C code
int letter_freq[26] = {0};
Section 1 is corresponded to original C code
for (int i = 0; s[i]; i++) {
letter_freq[s[i] - 'a']++;
}
Section 2 is corresponded to original C code
for (int i = 0; t[i]; i++) {
letter_freq[t[i] - 'a']--;
}
Section 3 is corresponded to original C code
for (int i = 0; i < 26; i++) {
if (letter_freq[i])
return 0;
}
return 1;
In isAnagram()
section 1, the assembly code first do the plus in line 20(101b4)
, and extract value array[i-1]
in line 29(101d8)
for the loop condition.
line 21(101b8)
(next line of i++
).And for section 3, reduce one branch and some code size.
00010244 <isAnagram>:
10244: f8010113 addi sp,sp,-128
10248: 06812c23 sw s0,120(sp)
1024c: 06912a23 sw s1,116(sp)
10250: 00058413 mv s0,a1
10254: 00050493 mv s1,a0
#################### Section 0 ####################
10258: 06800613 li a2,104
1025c: 00000593 li a1,0
10260: 00810513 addi a0,sp,8
10264: 06112e23 sw ra,124(sp)
10268: 1bc000ef jal ra,10424 <memset>
1026c: 0004c783 lbu a5,0(s1)
#################### Section 0 End ####################
#################### Section 1 ####################
10270: 02078863 beqz a5,102a0 <isAnagram+0x5c>
10274: 00148513 addi a0,s1,1
10278: f9f78793 addi a5,a5,-97
1027c: 00279793 slli a5,a5,0x2
10280: 07078793 addi a5,a5,112
10284: 002786b3 add a3,a5,sp
10288: f986a703 lw a4,-104(a3)
1028c: 00054783 lbu a5,0(a0)
10290: 00150513 addi a0,a0,1
10294: 00170713 addi a4,a4,1
10298: f8e6ac23 sw a4,-104(a3)
1029c: fc079ee3 bnez a5,10278 <isAnagram+0x34>
#################### Section 1 End ####################
#################### Section 2 ####################
102a0: 00044783 lbu a5,0(s0)
102a4: 02078863 beqz a5,102d4 <isAnagram+0x90>
102a8: 00140593 addi a1,s0,1
102ac: f9f78793 addi a5,a5,-97
102b0: 00279793 slli a5,a5,0x2
102b4: 07078793 addi a5,a5,112
102b8: 002786b3 add a3,a5,sp
102bc: f986a703 lw a4,-104(a3)
102c0: 0005c783 lbu a5,0(a1)
102c4: 00158593 addi a1,a1,1
102c8: fff70713 addi a4,a4,-1
102cc: f8e6ac23 sw a4,-104(a3)
102d0: fc079ee3 bnez a5,102ac <isAnagram+0x68>
#################### Section 2 End ####################
#################### Section 3 ####################
102d4: 00810793 addi a5,sp,8
102d8: 07010693 addi a3,sp,112
102dc: 0080006f j 102e4 <isAnagram+0xa0>
102e0: 02f68463 beq a3,a5,10308 <isAnagram+0xc4>
102e4: 0007a703 lw a4,0(a5)
102e8: 00478793 addi a5,a5,4
102ec: fe070ae3 beqz a4,102e0 <isAnagram+0x9c>
102f0: 07c12083 lw ra,124(sp)
102f4: 07812403 lw s0,120(sp)
102f8: 07412483 lw s1,116(sp)
102fc: 00000513 li a0,0
10300: 08010113 addi sp,sp,128
10304: 00008067 ret
10308: 07c12083 lw ra,124(sp)
1030c: 07812403 lw s0,120(sp)
10310: 07412483 lw s1,116(sp)
10314: 00100513 li a0,1
10318: 08010113 addi sp,sp,128
1031c: 00008067 ret
#################### Section 3 End ####################
Section 3 is a little different with -O1
optimization, it loop between line 49
and line 50
. And if a4
is not equal to 0, just return 0.
00010244 <isAnagram>:
10244: f8010113 addi sp,sp,-128
10248: 06812c23 sw s0,120(sp)
1024c: 06912a23 sw s1,116(sp)
10250: 00058413 mv s0,a1
10254: 00050493 mv s1,a0
10258: 06800613 li a2,104
1025c: 00000593 li a1,0
10260: 00810513 addi a0,sp,8
10264: 06112e23 sw ra,124(sp)
10268: 1bc000ef jal ra,10424 <memset>
1026c: 0004c783 lbu a5,0(s1)
10270: 02078863 beqz a5,102a0 <isAnagram+0x5c>
10274: 00148513 addi a0,s1,1
10278: f9f78793 addi a5,a5,-97
1027c: 00279793 slli a5,a5,0x2
10280: 07078793 addi a5,a5,112
10284: 002786b3 add a3,a5,sp
10288: f986a703 lw a4,-104(a3)
1028c: 00054783 lbu a5,0(a0)
10290: 00150513 addi a0,a0,1
10294: 00170713 addi a4,a4,1
10298: f8e6ac23 sw a4,-104(a3)
1029c: fc079ee3 bnez a5,10278 <isAnagram+0x34>
102a0: 00044783 lbu a5,0(s0)
102a4: 02078863 beqz a5,102d4 <isAnagram+0x90>
102a8: 00140593 addi a1,s0,1
102ac: f9f78793 addi a5,a5,-97
102b0: 00279793 slli a5,a5,0x2
102b4: 07078793 addi a5,a5,112
102b8: 002786b3 add a3,a5,sp
102bc: f986a703 lw a4,-104(a3)
102c0: 0005c783 lbu a5,0(a1)
102c4: 00158593 addi a1,a1,1
102c8: fff70713 addi a4,a4,-1
102cc: f8e6ac23 sw a4,-104(a3)
102d0: fc079ee3 bnez a5,102ac <isAnagram+0x68>
102d4: 00810793 addi a5,sp,8
102d8: 07010693 addi a3,sp,112
102dc: 0080006f j 102e4 <isAnagram+0xa0>
102e0: 02f68463 beq a3,a5,10308 <isAnagram+0xc4>
102e4: 0007a703 lw a4,0(a5)
102e8: 00478793 addi a5,a5,4
102ec: fe070ae3 beqz a4,102e0 <isAnagram+0x9c>
102f0: 07c12083 lw ra,124(sp)
102f4: 07812403 lw s0,120(sp)
102f8: 07412483 lw s1,116(sp)
102fc: 00000513 li a0,0
10300: 08010113 addi sp,sp,128
10304: 00008067 ret
10308: 07c12083 lw ra,124(sp)
1030c: 07812403 lw s0,120(sp)
10310: 07412483 lw s1,116(sp)
10314: 00100513 li a0,1
10318: 08010113 addi sp,sp,128
1031c: 00008067 ret
00010244 <isAnagram>:
10244: f8010113 addi sp,sp,-128
10248: 06812c23 sw s0,120(sp)
1024c: 06912a23 sw s1,116(sp)
10250: 00058413 mv s0,a1
10254: 00050493 mv s1,a0
10258: 06800613 li a2,104
1025c: 00000593 li a1,0
10260: 00810513 addi a0,sp,8
10264: 06112e23 sw ra,124(sp)
10268: 1bc000ef jal ra,10424 <memset>
1026c: 0004c783 lbu a5,0(s1)
10270: 02078863 beqz a5,102a0 <isAnagram+0x5c>
10274: 00148513 addi a0,s1,1
10278: f9f78793 addi a5,a5,-97
1027c: 00279793 slli a5,a5,0x2
10280: 07078793 addi a5,a5,112
10284: 002786b3 add a3,a5,sp
10288: f986a703 lw a4,-104(a3)
1028c: 00054783 lbu a5,0(a0)
10290: 00150513 addi a0,a0,1
10294: 00170713 addi a4,a4,1
10298: f8e6ac23 sw a4,-104(a3)
1029c: fc079ee3 bnez a5,10278 <isAnagram+0x34>
102a0: 00044783 lbu a5,0(s0)
102a4: 02078863 beqz a5,102d4 <isAnagram+0x90>
102a8: 00140593 addi a1,s0,1
102ac: f9f78793 addi a5,a5,-97
102b0: 00279793 slli a5,a5,0x2
102b4: 07078793 addi a5,a5,112
102b8: 002786b3 add a3,a5,sp
102bc: f986a703 lw a4,-104(a3)
102c0: 0005c783 lbu a5,0(a1)
102c4: 00158593 addi a1,a1,1
102c8: fff70713 addi a4,a4,-1
102cc: f8e6ac23 sw a4,-104(a3)
102d0: fc079ee3 bnez a5,102ac <isAnagram+0x68>
102d4: 00810793 addi a5,sp,8
102d8: 07010693 addi a3,sp,112
102dc: 0080006f j 102e4 <isAnagram+0xa0>
102e0: 02f68463 beq a3,a5,10308 <isAnagram+0xc4>
102e4: 0007a703 lw a4,0(a5)
102e8: 00478793 addi a5,a5,4
102ec: fe070ae3 beqz a4,102e0 <isAnagram+0x9c>
102f0: 07c12083 lw ra,124(sp)
102f4: 07812403 lw s0,120(sp)
102f8: 07412483 lw s1,116(sp)
102fc: 00000513 li a0,0
10300: 08010113 addi sp,sp,128
10304: 00008067 ret
10308: 07c12083 lw ra,124(sp)
1030c: 07812403 lw s0,120(sp)
10310: 07412483 lw s1,116(sp)
10314: 00100513 li a0,1
10318: 08010113 addi sp,sp,128
1031c: 00008067 ret
00010238 <isAnagram>:
10238: f8010113 addi sp,sp,-128
1023c: 06812c23 sw s0,120(sp)
10240: 06912a23 sw s1,116(sp)
10244: 00058413 mv s0,a1
10248: 00050493 mv s1,a0
1024c: 06800613 li a2,104
10250: 00000593 li a1,0
10254: 00810513 addi a0,sp,8
10258: 06112e23 sw ra,124(sp)
1025c: 1a0000ef jal ra,103fc <memset>
10260: 00048513 mv a0,s1
10264: 00054783 lbu a5,0(a0)
10268: 00150513 addi a0,a0,1
1026c: 04079263 bnez a5,102b0 <isAnagram+0x78>
10270: 00040593 mv a1,s0
10274: 0005c783 lbu a5,0(a1)
10278: 00158593 addi a1,a1,1
1027c: 04079a63 bnez a5,102d0 <isAnagram+0x98>
10280: 00810793 addi a5,sp,8
10284: 0007a703 lw a4,0(a5)
10288: 06071463 bnez a4,102f0 <isAnagram+0xb8>
1028c: 00478793 addi a5,a5,4
10290: 07010713 addi a4,sp,112
10294: fee798e3 bne a5,a4,10284 <isAnagram+0x4c>
10298: 00100513 li a0,1
1029c: 07c12083 lw ra,124(sp)
102a0: 07812403 lw s0,120(sp)
102a4: 07412483 lw s1,116(sp)
102a8: 08010113 addi sp,sp,128
102ac: 00008067 ret
102b0: f9f78793 addi a5,a5,-97
102b4: 00279793 slli a5,a5,0x2
102b8: 07078793 addi a5,a5,112
102bc: 002787b3 add a5,a5,sp
102c0: f987a703 lw a4,-104(a5)
102c4: 00170713 addi a4,a4,1
102c8: f8e7ac23 sw a4,-104(a5)
102cc: f99ff06f j 10264 <isAnagram+0x2c>
102d0: f9f78793 addi a5,a5,-97
102d4: 00279793 slli a5,a5,0x2
102d8: 07078793 addi a5,a5,112
102dc: 002787b3 add a5,a5,sp
102e0: f987a703 lw a4,-104(a5)
102e4: fff70713 addi a4,a4,-1
102e8: f8e7ac23 sw a4,-104(a5)
102ec: f89ff06f j 10274 <isAnagram+0x3c>
102f0: 00000513 li a0,0
102f4: fa9ff06f j 1029c <isAnagram+0x64>
-Os
optimization generate much more bne
rather than beq
, and the code scheduling is more complicated than other optimization levels, this code do more "jump back" branch, such as line 39
and line 47
unconditional jump, it can reduce code, but I think it will do more branch than other opimization levels.
For example, for loop1, line 15
will branch to line 32(102b0)
and do the unconditional jump(line 39
) back to line 13(10264)
to go through the for loop, and so does loop2. So, it will do two jump when going through the loop.
rv32emu
Source Code Notesyscall.c
enum {
#define _(name, number) SYS_##name = number,
SUPPORTED_SYSCALLS
#undef _
};
#define SUPPORTED_SYSCALLS \
_(close, 57) \
_(lseek, 62) \
_(read, 63) \
_(write, 64) \
_(fstat, 80) \
_(exit, 93) \
_(gettimeofday, 169) \
_(brk, 214) \
_(open, 1024) \
IIF(RV32_HAS(SDL))( \
_(draw_frame, 0xBEEF) \
_(setup_queue, 0xC0DE) \
_(submit_queue, 0xFEED), \
)
void syscall_handler(struct riscv_t *rv)
{
/* get the syscall number */
riscv_word_t syscall = rv_get_reg(rv, rv_reg_a7);
switch (syscall) { /* dispatch system call */
#define _(name, number) \
case SYS_##name: \
syscall_##name(rv); \
break;
SUPPORTED_SYSCALLS
#undef _
default:
fprintf(stderr, "unknown syscall %d\n", (int) syscall);
rv_halt(rv);
break;
}
}
risdv_word_t syscall = rv_get_reg(rv, rv_reg_a7
we know that system call index need to stored in a7
So, if we call the system call 64
, it will handle by syscall_handler
and it will call another function syscall_write()
static void syscall_write(struct riscv_t *rv)
{
state_t *s = rv_userdata(rv); /* access userdata */
/* _write(fde, buffer, count) */
riscv_word_t fd = rv_get_reg(rv, rv_reg_a0);
riscv_word_t buffer = rv_get_reg(rv, rv_reg_a1);
riscv_word_t count = rv_get_reg(rv, rv_reg_a2);
/* read the string that we are printing */
uint8_t *tmp = malloc(count);
memory_read(s->mem, tmp, buffer, count);
/* lookup the file descriptor */
map_iter_t it;
map_find(s->fd_map, &it, &fd);
if (!map_at_end(s->fd_map, &it)) {
/* write out the data */
size_t written = fwrite(tmp, 1, count, map_iter_value(&it, FILE *));
/* return number of bytes written */
rv_set_reg(rv, rv_reg_a0, written);
} else {
/* error */
rv_set_reg(rv, rv_reg_a0, -1);
}
free(tmp);
}
From the definition, we know that
a0
handle fd
a1
handle buffer
a2
handle count
For write
, index with 64
, is defined as
/* The structure with the cookie function pointers.
The tag name of this struct is _IO_cookie_io_functions_t to
preserve historic C++ mangled names for functions taking
cookie_io_functions_t arguments. That name should not be used in
new code. */
typedef struct _IO_cookie_io_functions_t
{
cookie_read_function_t *read; /* Read bytes. */
cookie_write_function_t *write; /* Write bytes. */
cookie_seek_function_t *seek; /* Seek/tell file position. */
cookie_close_function_t *close; /* Close file. */
} cookie_io_functions_t;
cookie_io_functions_t.h
/* Write NBYTES bytes pointed to by BUF to COOKIE. Write all NBYTES bytes
unless there is an error. Return number of bytes written. If
there is an error, return 0 and do not write anything. If the file
has been opened for append (__mode.__append set), then set the file
pointer to the end of the file and then do the write; if not, just
write at the current file pointer. */
typedef __ssize_t cookie_write_function_t (void *__cookie, const char *__buf,
size_t __nbytes);
There is a problem annoying me a lot of time.
.data
str: .ascii "Hello World!\n"
.set str_size, .-str
.set ste_size, .-str
is quite hard to understand for me..
means current location-
just means substraction, so the expression .-str
will be the strlen. of .ascii
發哥(聯發科)上機考題目整理Benson Note[分享]嵌入式經典面試題
Dec 15, 2024contributed by < chiangkd > :::warning :warning: 留意細節!唯有重視小處並步步為營,方可挑戰原始程式碼達到三千萬行的 Linux 核心 :notes: jserv Got it! 謝謝老師 ::: 作業要求
Mar 24, 2024踩了不少雷,紀錄一下
Dec 6, 2023Github : FreeRTOS-on-VexRiscv Requirement and Expectation :::warning Reproduce Run FreeRTOS and multitasking on VexRiscv with FreeRTOS 202212.00 released or latest Study Reference Link to understand how to measure FreeRTOS context switch Understand how to accurately measure cycle count and related latencyInclude timer interrupt. Run above at simulator and quantify the performence about context switch
May 29, 2023or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up