# Try to find the redundant area with the Python gdb (riscv)
大概想統計出某個區域內的pc address所使用的 regiser 的使用次數與alias的register,和記錄所有regiser 在某個區域內的所有regiser的 狀態,先假設相同compiler來說在編譯加了某些option 導致某段code做不好的話,應該在某個區域內的register殘留的狀態應該有做好的register來的多.
# python
```python!
import struct
import sys
import os
import argparse
import gdb
import subprocess
filepath="log.txt"
if os.path.isfile(filepath):
os.remove(filepath)
f = open(filepath, "w")
f.close()
# Redirect the output stream to the log.txt file.
# sys.stdout = f
backup_cycle = 0
last_inst = 0
prev_addrees = 0
possiable_address = []
record ={}
# gdb.execute("source test_eembc-V5_LM.gdb", to_string=True)
# gdb.execute("source BTB_miss_rate_V5.gdb", to_string=True)
# gdb.execute("b demo/dhry_1.c:145 ")
# gdb.Breakpoint("demo/dhry_1.c:145")
# gdb.Breakpoint("foo at demo/dhry_1.c:145")
# gdb.Breakpoint('7')
# bp = gdb.Breakpoint("foo", gdb.BP_BREAKPOINT, temporary=True)
# gdb.Breakpoint.delete(gdb.lookup_breakpoint("my_breakpoint"))
gdb.execute("set logging redirect on")
gdb.execute("set logging file log.txt")
gdb.execute("set logging on")
init_bp = ["*0x11a4"]
print("start check")
times = globals()['times']
current_area =50
current_start=0
dual_issue=0
no_dual_issue=0
long_cycle=0
same_inst={}
rrecord_register_valid_by_register={}
rrecord_register_valid_by_register_count = {}
rrecord_pcaddress_register_status = {}
record_register_valid = {}
alias_chain = {}
history_get_valid_list=[]
active_range = {}
start_pc_address = ""
end_pc_address = ""
find_loop = -1
for bp in init_bp:
gdb.Breakpoint(bp)
gdb.execute("c")
backpc = 0
vtype = 0
vtype_pc = 0
for i in range(int(times)):
# gdb.execute("nexti")
gdb.execute("ni")
# print address info
frame = gdb.selected_frame()
print("PC address:", hex(frame.pc()))
if find_loop == -1:
start_pc_address =str(hex(frame.pc()))
if(start_pc_address not in active_range ):
active_range[start_pc_address] = []
find_loop = 0
disassembly = gdb.execute("disassemble 0x{:x},+1".format(frame.pc()), to_string=True)
opcode = disassembly.split()[2]
arr = disassembly.split('=> ')
get_asm_register = str(arr[1].split(" ")[1]).replace("\t"," ").split(" ")[-1].replace("\nEnd","")
get_asm_register_list = get_asm_register.split(",")
get_valid_list = []
process_register = []
for register in get_asm_register_list:
offset = 0
if "(" in register:
result = register[register.index("("):].replace("(","").replace(")","")
offset = register[:register.index("(")]
# print(offset)
else:
result = register
if result not in process_register:
if not str(result).isdigit() and str(result).find("-") <0 and str(result).find("0x") <0 and str(result).find("ret") <0 and str(result).find("e") <0 and str(result).find("m") <0 and str(result).find("ta") <0 and str(result).find("tu") <0:
if(str(offset).isdigit() and offset != 0):
check_invalid_register = gdb.execute("p /x $" + result +" + "+ str(offset), to_string=True).replace("\t"," ")
get_register = register
get_register_value = str( check_invalid_register.split("=")[1]).strip().strip()
check_pass =1
else:
check_invalid_register = gdb.execute("info register " + result, to_string=True).replace("\t"," ")
get_register = check_invalid_register.split(" ")[0]
if check_invalid_register.find("Invalid") < 0 :
get_register_value = str( check_invalid_register).replace("\n","").replace(str( get_register).strip() , "").strip()
check_pass =1
if check_pass == 1:
record_register_valid[str(hex(frame.pc()))] = str( check_invalid_register)
rrecord_register_valid_by_register[str( get_register).strip()] = str( get_register_value)
pc_address = str(hex(frame.pc())).strip()
if pc_address not in rrecord_pcaddress_register_status:
rrecord_pcaddress_register_status[pc_address] = []
find_pc_address_record =0
for get_pc_address_register_status in rrecord_pcaddress_register_status[pc_address] :
for get_recored_register in get_pc_address_register_status:
if (get_register in get_recored_register):
find_pc_address_record = 1
if find_pc_address_record == 0:
rrecord_pcaddress_register_status[pc_address].append({str(get_register):[]})
# print(rrecord_pcaddress_register_status[pc_address])
for get_pc_address_register_status in rrecord_pcaddress_register_status[pc_address] :
# print(get_pc_address_register_status)
for get_recored_register in get_pc_address_register_status:
if (str(get_register) in get_recored_register):
get_pc_address_register_status[str(get_recored_register)].append (str( get_register_value))
# print(str(get_register))
# for get_register_status in get_pc_address_register_status[str(get_recored_register)]:
# print(get_register_status)
# rrecord_pcaddress_register_status [str(hex(frame.pc()))] = collect_memory
if( str( get_register).strip() in rrecord_register_valid_by_register_count):
rrecord_register_valid_by_register_count[ str( get_register).strip()] += 1
else:
rrecord_register_valid_by_register_count[ str( get_register)] = 1
get_valid_list.append(str( get_register).strip())
if(str(get_register).strip() not in history_get_valid_list):
history_get_valid_list.append(str( get_register).strip())
process_register.append (result)
if str(result).find("ta") >=0 or str(result).find("tu") >=0:
check_invalid_register = gdb.execute("p $vtype" , to_string=True).replace("\t"," ")
now_vtype =str( check_invalid_register.split("=")[1]).strip()
now_vtype_pc = frame.pc()
if vtype == 0:
vtype = now_vtype
vtype_pc = now_vtype_pc
if(vtype_pc != now_vtype_pc ):
if(vtype == now_vtype):
print("vtype same PC prev_address:", hex(vtype_pc))
print("vtype same PC prev_vtype:", vtype)
print("vtype same PC now_address:", hex(now_vtype_pc))
print("vtype same PC now_vtype:", now_vtype)
else:
print("vtype diff PC prev_address:", hex(vtype_pc))
print("vtype diff PC prev_vtype:", vtype)
print("vtype diff PC now_address:", hex(now_vtype_pc))
print("vtype diff PC now_vtype:", now_vtype)
vtype = now_vtype
vtype_pc = now_vtype_pc
else:
vtype = now_vtype
vtype_pc = now_vtype_pc
no_duplication_case_list = []
for y in get_valid_list:
for x in rrecord_register_valid_by_register:
if(x != y ):
if(str(rrecord_register_valid_by_register[str(x)]) == str(rrecord_register_valid_by_register[str(y)])):
if(str("\t"+x+" same value " + y ) not in no_duplication_case_list):
if(str("\t"+y+" same value " + x ) not in no_duplication_case_list):
# print("\t"+x+" same value " + y )
alias_chain[x] = y
alias_chain[y] = x
no_duplication_case_list.append (str("\t"+x+" same value " + y ) )
if( frame.pc() <= backpc):
print("find loop")
if find_loop == 0 :
find_loop = 1
print("===========================")
for x in history_get_valid_list:
print(str(x)+ " usages : " + str( rrecord_register_valid_by_register_count[x]))
print("alias")
print("===========================")
for x in history_get_valid_list:
test = []
test.append(x)
find = x
while True:
if ( find in alias_chain):
if ( alias_chain[find] not in test):
test.append(alias_chain[find])
find = alias_chain[find]
else:
break
else:
break
if(len(test)>=1):
print("========")
print("register : " + x)
# print("alias 3")
print(test)
for y in test:
print(str(y)+ " usages : " + str( rrecord_register_valid_by_register_count[y]))
alias_chain = {}
history_get_valid_list= []
record_register_valid = {}
rrecord_register_valid_by_register= {}
rrecord_register_valid_by_register_count = {}
backpc = frame.pc()
print("============next basic block===============")
backpc = frame.pc()
if find_loop == 1:
if end_pc_address not in active_range[start_pc_address]:
active_range[start_pc_address].append (end_pc_address)
find_loop = -1
end_pc_address = str(hex(frame.pc()))
print("print range")
active_range = dict(sorted(active_range.items(), key=lambda item: int(item[0], 16)))
for x in active_range:
print(x)
print(active_range[x])
print("print record")
print(type(rrecord_pcaddress_register_status))
rrecord_pcaddress_register_status = dict(sorted(rrecord_pcaddress_register_status.items(), key=lambda item: int(item[0], 16)))
no_duplication_case_list = []
rootcause_range = {}
for get_pc_address in rrecord_pcaddress_register_status :
for get_pc_address2 in rrecord_pcaddress_register_status :
# print(get_pc_address)
check_range = 0
for search_active_range in active_range:
if (int(get_pc_address2, 16) >int(search_active_range, 16) ):
check_active_range = active_range[search_active_range]
for range in check_active_range:
a= int(search_active_range, 16)
b= int(get_pc_address, 16)
c= int(get_pc_address2, 16)
d= int(range, 16)
if( d> b and b >a and d > a ):
if(d > c and c >a and d > a ):
check_range=1
break
if check_range == 1:
break
if(check_range == 1):
if(get_pc_address != get_pc_address2):
for get_pc_address_register_status in rrecord_pcaddress_register_status[get_pc_address] :
for twt in get_pc_address_register_status:
for get_pc_address_register_status2 in rrecord_pcaddress_register_status[get_pc_address2] :
for twt2 in get_pc_address_register_status2:
if(twt != twt2):
check_same = get_pc_address_register_status[twt] == get_pc_address_register_status2[twt2]
if(str("\t"+get_pc_address+" same value " + get_pc_address2 ) not in no_duplication_case_list):
if(str("\t"+get_pc_address2+" same value " + get_pc_address ) not in no_duplication_case_list):
if (check_same) :
# print(c > b and b >a and c > a )
# print(active_range[search_active_range])
# print(search_active_range)
# print(get_pc_address2)
# print(range)
if (search_active_range+ "-" + range ) not in rootcause_range :
rootcause_range[str( (search_active_range+ "-" + range) )] = 1
else :
rootcause_range[str( (search_active_range+ "-" + range) )] +=1
# if ("(" + get_pc_address+ ","+ get_pc_address2 + ") in "+search_active_range+ "-" + range ) not in rootcause_range :
# rootcause_range[str( ("(" + get_pc_address+ ","+ get_pc_address2 + ") in "+search_active_range+ "-" + range ))] = 1
# else :
# rootcause_range[str( ("(" + get_pc_address+ ","+ get_pc_address2 + ") in "+search_active_range+ "-" + range ) )] +=1
print("same value same block")
print(get_pc_address)
print(twt)
print(get_pc_address_register_status[twt])
print(get_pc_address2)
print(twt2)
print(get_pc_address_register_status2[twt2])
no_duplication_case_list.append (str("\t"+get_pc_address+" same value " + get_pc_address2 ) )
print("=================================")
print("rootcause_range")
for range in rootcause_range:
print("range:")
print(range)
print("redunent count:")
print(rootcause_range[range])
gdb.execute("set logging off")
gdb.execute("interrupt")
```
# run
```bash!
python exec(open('gdb_si_by_time copy.py').read(), {'times': '40000'})
```
跑個四萬次 ni,
由於要考慮register最多可以傳播到多遠,所以要定義active_range ,而在最後比對不同的 pc address 中, register的所有記憶體狀態就可以找到某些register是否有alias關係.
```bash=
PC address: 0x11a6
PC address: 0x11aa
PC address: 0x11ae
PC address: 0x11b0
PC address: 0x11b2
PC address: 0x11b4
PC address: 0x11b6
PC address: 0x11b8
PC address: 0x11ba
PC address: 0x11bc
PC address: 0x11be
PC address: 0x11c0
PC address: 0x11c2
PC address: 0x11c4
PC address: 0x11c6
PC address: 0x11c8
PC address: 0x11cc
PC address: 0x11d0
PC address: 0x11d2
PC address: 0x11d6
PC address: 0x11d8
PC address: 0x11da
PC address: 0x11dc
PC address: 0x11de
PC address: 0x11e0
PC address: 0x11e4
PC address: 0x11e8
PC address: 0x11ec
PC address: 0x11ee
PC address: 0x11f0
PC address: 0x11f4
vtype diff PC prev_address: 0x11e4
vtype diff PC prev_vtype: 192
vtype diff PC now_address: 0x11f4
vtype diff PC now_vtype: 200
PC address: 0x11f8
PC address: 0x11fc
vtype diff PC prev_address: 0x11f4
vtype diff PC prev_vtype: 200
vtype diff PC now_address: 0x11fc
vtype diff PC now_vtype: 199
PC address: 0x1200
PC address: 0x1202
PC address: 0x1206
vtype diff PC prev_address: 0x11fc
vtype diff PC prev_vtype: 199
vtype diff PC now_address: 0x1206
vtype diff PC now_vtype: 200
PC address: 0x120a
PC address: 0x120e
PC address: 0x1212
PC address: 0x1216
PC address: 0x1218
PC address: 0x121c
PC address: 0x121e
PC address: 0x1222
PC address: 0x1224
PC address: 0x11ec
find loop
===========================
t0 usages : 2
t1 usages : 2
ra usages : 1
120(sp) usages : 1
s0 usages : 1
112(sp) usages : 1
s1 usages : 3
104(sp) usages : 1
s2 usages : 4
96(sp) usages : 1
s3 usages : 1
88(sp) usages : 1
s4 usages : 4
80(sp) usages : 1
s5 usages : 3
72(sp) usages : 1
s6 usages : 1
64(sp) usages : 1
s7 usages : 2
56(sp) usages : 1
s8 usages : 1
48(sp) usages : 1
s9 usages : 2
40(sp) usages : 1
s10 usages : 2
32(sp) usages : 1
s11 usages : 2
24(sp) usages : 1
sp usages : 1
a0 usages : 8
a1 usages : 5
a2 usages : 7
a3 usages : 4
v8 usages : 2
a4 usages : 1
a5 usages : 8
v1 usages : 3
alias
===========================
========
register : t0
['t0']
t0 usages : 2
========
register : t1
['t1']
t1 usages : 2
========
register : ra
['ra']
ra usages : 1
========
register : 120(sp)
['120(sp)']
120(sp) usages : 1
========
register : s0
['s0']
s0 usages : 1
========
register : 112(sp)
['112(sp)']
112(sp) usages : 1
========
register : s1
['s1', 's6']
s1 usages : 3
s6 usages : 1
========
register : 104(sp)
['104(sp)']
104(sp) usages : 1
========
register : s2
['s2', 'a2']
s2 usages : 4
a2 usages : 7
========
register : 96(sp)
['96(sp)']
96(sp) usages : 1
========
register : s3
['s3']
s3 usages : 1
========
register : 88(sp)
['88(sp)']
88(sp) usages : 1
========
register : s4
['s4', 'a5', 's11']
s4 usages : 4
a5 usages : 8
s11 usages : 2
========
register : 80(sp)
['80(sp)']
80(sp) usages : 1
========
register : s5
['s5']
s5 usages : 3
========
register : 72(sp)
['72(sp)']
72(sp) usages : 1
========
register : s6
['s6', 's1']
s6 usages : 1
s1 usages : 3
========
register : 64(sp)
['64(sp)']
64(sp) usages : 1
========
register : s7
['s7']
s7 usages : 2
========
register : 56(sp)
['56(sp)']
56(sp) usages : 1
========
register : s8
['s8']
s8 usages : 1
========
register : 48(sp)
['48(sp)']
48(sp) usages : 1
========
register : s9
['s9']
s9 usages : 2
========
register : 40(sp)
['40(sp)']
40(sp) usages : 1
========
register : s10
['s10']
s10 usages : 2
========
register : 32(sp)
['32(sp)']
32(sp) usages : 1
========
register : s11
['s11', 'a5']
s11 usages : 2
a5 usages : 8
========
register : 24(sp)
['24(sp)']
24(sp) usages : 1
========
register : sp
['sp']
sp usages : 1
========
register : a0
['a0', 'a1']
a0 usages : 8
a1 usages : 5
========
register : a1
['a1', 'a0']
a1 usages : 5
a0 usages : 8
========
register : a2
['a2', 's2']
a2 usages : 7
s2 usages : 4
========
register : a3
['a3', 'a0', 'a1']
a3 usages : 4
a0 usages : 8
a1 usages : 5
========
register : v8
['v8']
v8 usages : 2
========
register : a4
['a4']
a4 usages : 1
========
register : a5
['a5', 's11']
a5 usages : 8
s11 usages : 2
========
register : v1
['v1']
v1 usages : 3
```
active_range
當pc address發生折返的情況會記錄折返點
```python=
print range
0x1368
['0x1390', '0x13ac', '0x1446']
0x5c0
['0x610']
0x5cc
['0x5d4']
0x1466
['0x1498']
0x27dc
['0x2894']
0x1bc
['0x1de']
0x158a
['0x15c8']
0x1502
['0x156e']
0x5d0
['0x610']
0x11a6
['0x1224']
0x14c
['0x1224', '0x212']
0x13ee
['0x1446']
0x13f6
['0x1446', '0x1456', '0x14a4']
0x25c
['0x262', '0x25c']
0x1b0
['0x1de']
0x15f2
['0x1628', '0x28a2', '0x2894']
0x135e
['0x1390']
0x130a
['0x1350', '0x1390']
0x176
['0x60e']
0x244
['0x248']
0x59e
['0x1224', '0x604', '0x60e']
0x1480
['0x14a4', '0x1498']
0x150a
['0x156e', '0x157e', '0x15c8']
0x11ee
['0x1224', '0x127a']
0x1298
['0x12c8']
0x1470
['0x14a4', '0x1498', '0x14b4', '0x156e']
0x12aa
['0x12c8', '0x12d8', '0x1350']
0x123e
['0x127a', '0x12c8']
0x15a4
['0x15c8', '0x15d8', '0x1628']
0x5c2
['0x5e8']
```
print record
在檢查不同pc address的時候,會額外檢查pc address是否在合理的 active_range
```bash=
<class 'dict'>
same value same block
0x121c
a0
['0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988']
0x1212
a3
['0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988']
same value same block
0x121c
a0
['0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988']
0x120e
a1
['0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988', '0x21fa9c 2226844', '0x21faae 2226862', '0x21fac0 2226880', '0x21fad2 2226898', '0x21fae4 2226916', '0x21faf6 2226934', '0x21fb08 2226952', '0x21fb1a 2226970', '0x21fb2c 2226988']
```
最後再輸出的時候,將前面的active_range內發生的記憶體相似的情況給複製下來
![](https://hackmd.io/_uploads/HyZHdA_WT.png)
```bash=
=================================
rootcause_range
range:
0x27dc-0x2894
redunent count:
6
range:
0x15f2-0x28a2
redunent count:
55
range:
0x130a-0x1350
redunent count:
14
range:
0x11a6-0x1224
redunent count:
39
range:
0x1502-0x156e
redunent count:
22
range:
0x15f2-0x1628
redunent count:
21
range:
0x1470-0x156e
redunent count:
5
range:
0x13f6-0x14a4
redunent count:
2
range:
0x135e-0x1390
redunent count:
1
range:
0x15a4-0x1628
redunent count:
18
range:
0x12aa-0x1350
redunent count:
8
range:
0x123e-0x12c8
redunent count:
1
range:
0x14c-0x1224
redunent count:
25
range:
0x5c0-0x610
redunent count:
1
range:
0x11ee-0x127a
redunent count:
79
range:
0x1368-0x1446
redunent count:
28
```
例如慢慢看可以看出在某個區間內的a1和a3可以替換成一個register
before
```asm=
11d0: 8a2a c.mv s4,a0
11d2: 00151493 slli s1,a0,0x1
11d6: 8d2e c.mv s10,a1
11d8: 8db2 c.mv s11,a2
11da: 8cb6 c.mv s9,a3
11dc: 8bb2 c.mv s7,a2
11de: 8532 c.mv a0,a2
11e0: 00000913 li s2,0
11e4: 0c807ad7 vsetvli s5,zero,e16,m1,ta,ma
11e8: 5e074457 vmv.v.x v8,a4
11ec: 85aa c.mv a1,a0
11ee: 8652 c.mv a2,s4
11f0: 00050693 mv a3,a0
11f4: 0c7677d7 vsetvli a5,a2,e8,mf2,ta,ma
11f8: 0205d087 vle16.v v1,(a1)
11fc: 0c807ad7 vsetvli s5,zero,e16,m1,ta,ma
1200: 8e1d c.sub a2,a5
1202: 021400d7 vadd.vv v1,v1,v8
1206: 0c87f057 vsetvli zero,a5,e16,m1,ta,ma
120a: 0206d0a7 vse16.v v1,(a3)
120e: 0af585db lea.h a1,a1,a5
1212: 0af686db lea.h a3,a3,a5
1216: fe79 c.bnez a2,0x11f4 <matrix_test+0x50>
```
after
```asm=
11d0: 8a2a c.mv s4,a0
11d2: 00151493 slli s1,a0,0x1
11d6: 8d2e c.mv s10,a1
11d8: 8db2 c.mv s11,a2
11da: 8cb6 c.mv s9,a3
11dc: 8bb2 c.mv s7,a2
11de: 8532 c.mv a0,a2
11e0: 00000913 li s2,0
11e4: 0c807ad7 vsetvli s5,zero,e16,m1,ta,ma
11e8: 5e074457 vmv.v.x v8,a4
11ec: 85aa c.mv a1,a0
11ee: 8652 c.mv a2,s4
11f4: 0c7677d7 vsetvli a5,a2,e8,mf2,ta,ma
11f8: 0205d087 vle16.v v1,(a1)
11fc: 0c807ad7 vsetvli s5,zero,e16,m1,ta,ma
1200: 8e1d c.sub a2,a5
1202: 021400d7 vadd.vv v1,v1,v8
1206: 0c87f057 vsetvli zero,a5,e16,m1,ta,ma
120a: 0206d0a7 vse16.v v1,(a1)
120e: 0af585db lea.h a1,a1,a5
1216: fe79 c.bnez a2,0x11f4 <matrix_test+0x50>
```
如何影響編譯後的 asm ,可以找出 build 成 .o的那段c code在改成輸出成.s 再用as 去合成.o ,就可以異動 asm 的順序和code gen ,在異動後coremark 的 crc check 也沒出錯.