假設在沒有模擬器的情況下要找出兩個elf 相同的compiler 但是不同cflags 的指令差異這部分在面對未知的benchmark 要實現自動找尋異常區間這部分可能要耗費大量的人力,這部分假設要做成自動化大概有幾個想法
大概步驟是想要根據benchmark 的時間長度與要檢測硬體counter 變化來下手,人工debug也大概流程是這樣概念,只不過透過 sim 來模擬找出 Hotspots 通常模擬越準就只能跑很久,我這邊是單純想透過 python 控制 gdb 使用硬體來找尋異常區間
![image](https://hackmd.io/_uploads/rkEnxh5e1x.png)
這邊是放大第一次分歧點的cycle 區間
![image](https://hackmd.io/_uploads/ryhqlhclkg.png)
記得編譯的時候要下-g
這樣我透過addr2line可以顯示 benchmark 行號
大概想法是找到第一個分歧點後不斷地放大該分歧點,就可以找到可疑區間了
#findarea.sh
核心主要是rollback.py,這邊要注意的是 max_rollback_time會決定放大區間的粗細程度
```sh
# 定義相同的變數
elf="linear_alg-sml-50x50_version1noinlinenobit.elf"
elf2="linear_alg-sml-50x50_version2noinlinenobit.elf"
outputname="roughly"
# [lm:cache]
rollback_type="lm"
rollback_mode=1
max_rollback_time=100
scop=40
ipaddress="-----:1111" # gdb ipaddress
# auto find area
python3 rollback.py --elf $elf --type $rollback_type --mode $rollback_mode --max_rollback_time $max_rollback_time --ipaddress $ipaddress
python3 rollback.py --elf $elf2 --type $rollback_type --mode $rollback_mode --max_rollback_time $max_rollback_time --ipaddress $ipaddress
python3 draw2.py --elf $elf --elf2 $elf2 --outputname $outputname
rollback_mode=0
outputname="reduction"
python3 rollback.py --elf $elf --type $rollback_type --mode $rollback_mode --max_rollback_time $max_rollback_time --ipaddress $ipaddress --scop $scop
python3 rollback.py --elf $elf2 --type $rollback_type --mode $rollback_mode --max_rollback_time $max_rollback_time --ipaddress $ipaddress --scop $scop
python3 draw2.py --elf $elf --elf2 $elf2 --outputname $outputname
python3 rollback.py --elf $elf --type $rollback_type --mode $rollback_mode --max_rollback_time $max_rollback_time --ipaddress $ipaddress --scop $scop
python3 rollback.py --elf $elf2 --type $rollback_type --mode $rollback_mode --max_rollback_time $max_rollback_time --ipaddress $ipaddress --scop $scop
python3 draw2.py --elf $elf --elf2 $elf2 --outputname $outputname
```
# rollback.py
這邊大概透過控制thread 來中斷 gdb 可以達到 timeout 的功能,當然也可以拿來當定時中斷
```python
import os
import time
from datetime import datetime
import subprocess
import getpass
import argparse
def killfpga():
fpga = 'ps -ef | grep '+getpass.getuser()+' | grep "gdb" | awk '+'{print $2}'+' | xargs kill -9'
status , result = subprocess.getstatusoutput(fpga)
if(status!= 0):
return -1 , result
return 0,result
def get_fpga_ouput():
fpga = 'bash ./rollback.sh'
status , result = subprocess.getstatusoutput(fpga)
# debug output log
print(result)
if(status!= 0):
return -1 , result
return 0,result
def read_and_eval_elf(elf_name):
# 確認檔案是否存在
if not os.path.isfile(elf_name):
print(f"Error: File {elf_name} does not exist.")
return None
try:
# 讀取 ELF 檔案內容
with open(elf_name, 'r') as file:
content = file.read()
# 將字串轉換成真正的 list
result_list = list(eval(content))
return result_list
except Exception as e:
print(f"Error occurred while reading or evaluating the file: {e}")
return None
start = None
upper_limit = None
# 解析命令行參數
parser = argparse.ArgumentParser(description='Cycle comparison analysis script')
parser.add_argument('--elf', type=str, required=True, help='Path to the first ELF file')
parser.add_argument('--type', type=str, required=True, help='Analysis type')
parser.add_argument('--mode', type=int, required=True, help='Analysis mode')
parser.add_argument('--ipaddress', type=str, required=True, help='ipaddress')
# if mode = 0
parser.add_argument('--start', type=float, required=False, default=0.31619, help='Start value for the analysis')
parser.add_argument('--upper_limit', type=float, required=False, default=1.4, help='Upper limit value for the analysis')
# if mode = 1
parser.add_argument('--scop', type=float, required=False, default=1.4, help='scop')
parser.add_argument('--max_rollback_time', type=int, required=False, default=100, help='max rollback_time')
args = parser.parse_args()
if args.mode == 0:
start = args.start
upper_limit = args.upper_limit
scop_cut = float(args.upper_limit - args.start) / args.scop
print(scop_cut)
file_path ="reduction.txt"
if os.path.exists(file_path):
gescop =read_and_eval_elf(file_path)
print(gescop )
start =gescop[0]
if start <=0.2 :
start = start +0.2
upper_limit = gescop[1]
scop_cut = float(upper_limit - start) / args.scop
print("update new scop")
print(scop_cut)
gdb_script_path = 'gdb_script.gdb'
with open(gdb_script_path, 'r') as file:
gdb_script_content = file.read()
gdb_script_content = gdb_script_content.replace('{ipaddress}', args.ipaddress)
if(args.type == "lm"):
gdb_script_content = gdb_script_content.replace('{type}', "so ./test_eembc-V5_LM.gdb")
elif(args.type == "cache"):
gdb_script_content = gdb_script_content.replace('{type}', "so ./test_eembc-V5_cache.gdb")
gdb_script_content = gdb_script_content.replace('{elf_name}', args.elf)
with open(gdb_script_path.replace("gdb_script","gdb_script_ex"), 'w') as file:
file.write(gdb_script_content)
################
rollbacktime = 100
decrease = args.mode
# decrease == 0
# start -> start+1 .... upper_limit
# decrease ==1
# 10 -> 5 -> 2.5 ->1.25-> 0.625 ... until Counter ==0 or Other termination conditions
# Other termination conditions
stop_conditions = 0
################
# init
measurementcount_path = 'lastmeasurementcount.txt'
expectedtime_path = 'expectedtime.txt'
if(decrease == 1 ):
if os.path.isfile(measurementcount_path) :
os.remove(measurementcount_path)
if os.path.isfile(expectedtime_path) :
os.remove(expectedtime_path)
else :
if os.path.isfile(measurementcount_path) :
os.remove(measurementcount_path)
if os.path.isfile(expectedtime_path) :
os.remove(expectedtime_path)
f = open(expectedtime_path, "a")
f.write(str(start))
f.close()
f = open(measurementcount_path, "a")
f.write(str(0))
f.close()
# start
now_pc = ""
record = []
for i in range(rollbacktime):
killfpga()
last_expectedtime = start
prev_detect_count = 0
detect_count = 0
print("Rollback Times:\t" , i+1)
print(record)
if(i+1 >=args.max_rollback_time):
break
st = time.time()
status , result = get_fpga_ouput()
# get fpga output
for line in result.splitlines():
# Detect Counter
if(line.find("Cycles : " )>= 0):
x = line.split(":")
detect_count =int (x[1])
print("========")
print("Detect Counter:\t" , detect_count)
if( os.path.exists(measurementcount_path) == True):
f = open(measurementcount_path, 'r')
prev_detect_count = int(f.read())
if(decrease == 0 and prev_detect_count == 0):
prev_detect_count = detect_count
f.close()
else:
# record first detect_count
f = open(measurementcount_path, "a")
f.write(str(detect_count))
f.close()
# Detect Thread Rollback time
if(line.find("last expectedtime" )>= 0):
x = line.split(":")
last_expectedtime = float(x[1].strip())
# Detect Pc Address
if(line.find("PC :" )>= 0):
x = line.split(":")
print(x)
now_pc = str(x[1].strip())
et = time.time()
elapsed_time = et - st
elapsed_time = round(float(elapsed_time), 5)
# Prepare Next measurement time
expectedtime = 0.0
if( (os.path.exists(expectedtime_path) == True)):
f = open(expectedtime_path, 'r')
get_output =f.read()
f.close()
# setting expectedtime
if(get_output != ''):
expectedtime = float(get_output)
else:
print("No Expectedtime")
break
if(last_expectedtime != 0):
# detect_count equal 0 End of measurement
if(detect_count == 0):
break
# Other termination conditions
if(detect_count <stop_conditions ):
break
if(float(detect_count/prev_detect_count) >30):
print("drop ! Measuring interval oscillation")
if(start):
start = start +0.1
scop_cut = float(upper_limit - start) / args.scop
print("update new scop too short")
print(scop_cut)
f = open(expectedtime_path, "w")
f.write(str(start))
f.close()
continue
elif(decrease == 0 and float(detect_count/prev_detect_count) >4):
print("drop ! Measuring interval oscillation")
start = start +0.1
scop_cut = float(upper_limit - start) / args.scop
print("update new scop too short")
print(scop_cut)
f = open(expectedtime_path, "w")
f.write(str(start))
f.close()
continue
else:
# No Exceptions Update detect_count to lastmeasurementcount.txt
if(os.path.exists(measurementcount_path) == True):
os.remove(measurementcount_path)
f = open(measurementcount_path, "a")
f.write(str(detect_count))
f.close()
if(os.path.exists(expectedtime_path) == True):
os.remove(expectedtime_path)
record.append([now_pc,detect_count,prev_detect_count,float(prev_detect_count/detect_count),last_expectedtime])
print("Last Expectedtime:\t",last_expectedtime)
f = open(expectedtime_path, "a")
# Determine the next measurement time update expectedtime to last_expectedtime
if(decrease == 0):
f.write(str( float(last_expectedtime)+scop_cut))
else:
f.write(str( float(last_expectedtime)/2))
f.close()
if(decrease == 0):
if(last_expectedtime >=upper_limit):
break
else:
# first rollback time
f = open(expectedtime_path, "a")
# f.write(str( float(elapsed_time)/2))
f.write(str( float(elapsed_time)))
f.close()
f = open((args.elf).replace(".elf",".log"), "w")
f.write(str( record))
f.close()
import os
import time
from datetime import datetime
import subprocess
import getpass
import argparse
def killfpga():
fpga = 'ps -ef | grep '+getpass.getuser()+' | grep "gdb" | awk '+'{print $2}'+' | xargs kill -9'
status , result = subprocess.getstatusoutput(fpga)
if(status!= 0):
return -1 , result
return 0,result
def get_fpga_ouput():
fpga = 'bash ./rollback.sh'
status , result = subprocess.getstatusoutput(fpga)
# debug output log
print(result)
if(status!= 0):
return -1 , result
return 0,result
def read_and_eval_elf(elf_name):
# 確認檔案是否存在
if not os.path.isfile(elf_name):
print(f"Error: File {elf_name} does not exist.")
return None
try:
# 讀取 ELF 檔案內容
with open(elf_name, 'r') as file:
content = file.read()
# 將字串轉換成真正的 list
result_list = list(eval(content))
return result_list
except Exception as e:
print(f"Error occurred while reading or evaluating the file: {e}")
return None
start = None
upper_limit = None
# 解析命令行參數
parser = argparse.ArgumentParser(description='Cycle comparison analysis script')
parser.add_argument('--elf', type=str, required=True, help='Path to the first ELF file')
parser.add_argument('--type', type=str, required=True, help='Analysis type')
parser.add_argument('--mode', type=int, required=True, help='Analysis mode')
parser.add_argument('--ipaddress', type=str, required=True, help='ipaddress')
# if mode = 0
parser.add_argument('--start', type=float, required=False, default=0.31619, help='Start value for the analysis')
parser.add_argument('--upper_limit', type=float, required=False, default=1.4, help='Upper limit value for the analysis')
# if mode = 1
parser.add_argument('--scop', type=float, required=False, default=1.4, help='scop')
parser.add_argument('--max_rollback_time', type=int, required=False, default=100, help='max rollback_time')
args = parser.parse_args()
if args.mode == 0:
start = args.start
upper_limit = args.upper_limit
scop_cut = float(args.upper_limit - args.start) / args.scop
print(scop_cut)
file_path ="reduction.txt"
if os.path.exists(file_path):
gescop =read_and_eval_elf(file_path)
print(gescop )
start =gescop[0]
if start <=0.2 :
start = start +0.2
upper_limit = gescop[1]
scop_cut = float(upper_limit - start) / args.scop
print("update new scop")
print(scop_cut)
gdb_script_path = 'gdb_script.gdb'
with open(gdb_script_path, 'r') as file:
gdb_script_content = file.read()
gdb_script_content = gdb_script_content.replace('{ipaddress}', args.ipaddress)
if(args.type == "lm"):
gdb_script_content = gdb_script_content.replace('{type}', "so ./test_eembc-V5_LM.gdb")
elif(args.type == "cache"):
gdb_script_content = gdb_script_content.replace('{type}', "so ./test_eembc-V5_cache.gdb")
gdb_script_content = gdb_script_content.replace('{elf_name}', args.elf)
with open(gdb_script_path.replace("gdb_script","gdb_script_ex"), 'w') as file:
file.write(gdb_script_content)
################
rollbacktime = 100
decrease = args.mode
# decrease == 0
# start -> start+1 .... upper_limit
# decrease ==1
# 10 -> 5 -> 2.5 ->1.25-> 0.625 ... until Counter ==0 or Other termination conditions
# Other termination conditions
stop_conditions = 0
################
# init
measurementcount_path = 'lastmeasurementcount.txt'
expectedtime_path = 'expectedtime.txt'
if(decrease == 1 ):
if os.path.isfile(measurementcount_path) :
os.remove(measurementcount_path)
if os.path.isfile(expectedtime_path) :
os.remove(expectedtime_path)
else :
if os.path.isfile(measurementcount_path) :
os.remove(measurementcount_path)
if os.path.isfile(expectedtime_path) :
os.remove(expectedtime_path)
f = open(expectedtime_path, "a")
f.write(str(start))
f.close()
f = open(measurementcount_path, "a")
f.write(str(0))
f.close()
```
# rollback.sh
```sh
riscv64-unknown-elf-gdb -x 'gdb_script_ex.gdb'
```
# gdb_script.gdb
以該gdb script 作為 templete 生成 gdb_script_ex.gdb
```
target remote {ipaddress}
file {elf_name}
set pagination off
{type}
so ./BTB_miss_rate_V5.gdb
python import gdb; gdb.execute("source rollbacktimer.py")
cont
cont
so ./print_V5_BTB_embench.gdb
```
# gdb_script_ex.gdb
細部的gdb script 就看不同人怎麼設計了 看你要印出什麼硬體counter 進行夾擠
```
target remote ---------:1111
file blacks-sml-n500v20-sp_test2.elf
set pagination off
so ./test_eembc-V5_LM.gdb
so ./BTB_miss_rate_V5.gdb
cont
cont
so ./print_V5_BTB_embench.gdb
```
# rollbacktimer.py
這邊負責控制rollback.py 的每次中斷時間
```python
import time
import gdb
import os
import sched
import signal
import asyncio
from threading import Timer
import threading
expectedtime =0.0
first = 0
file_path = 'expectedtime.txt'
def send_signal():
# print(f"Current process ID: {pid}")
print("thread")
time.sleep(round(float(expectedtime), 5) )
print("\nlast expectedtime : "+ str(round(float(expectedtime), 5)))
gdb.execute("interrupt")
if(os.path.exists(file_path) == True):
f = open(file_path, 'r')
expectedtime = float(f.read())
f.close()
t = threading.Thread(target=send_signal)
t.start()
print("hello word")
```
# draw
這邊還是要注意
```python
import subprocess
import matplotlib.pyplot as plt
import itertools
from collections import defaultdict
import os
import argparse
def read_and_eval_elf(elf_name):
# 確認檔案是否存在
if not os.path.isfile(elf_name):
print(f"Error: File {elf_name} does not exist.")
return None
try:
# 讀取 ELF 檔案內容
with open(elf_name, 'r') as file:
content = file.read()
# 將字串轉換成真正的 list
result_list = list(eval(content))
return result_list
except Exception as e:
print(f"Error occurred while reading or evaluating the file: {e}")
return None
parser = argparse.ArgumentParser(description='Cycle comparison analysis script')
parser.add_argument('--elf', type=str, required=True, help='Path to the first ELF file')
parser.add_argument('--elf2', type=str, required=True, help='Path to the first ELF file')
parser.add_argument('--outputname', type=str, required=True, help='outputname')
args = parser.parse_args()
# 定義 ELF 文件的路徑
elf_name = args.elf
elf_name2 = args.elf2
# 假設這些是我們要查找的地址(從之前的數據中提取)
data1 =read_and_eval_elf(elf_name.replace(".elf",".log"))
data2 =read_and_eval_elf(elf_name2.replace(".elf",".log"))
print(elf_name.replace(".elf",".log"))
data1 = sorted(data1, key=lambda x: x[1])
data2 = sorted(data2, key=lambda x: x[1])
# 提取函數名稱的函數
def get_function_name(elf_file, address):
try:
result = subprocess.run(['/NOBACKUP/sqa3/NFSTest/build-astversion2/build-toolchain/linux/nds64le-elf-mculib-v5/bin/riscv64-elf-addr2line', '-e', elf_file, address, '-f', '-p'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode == 0:
return result.stdout.strip()
else:
return 'Unknown'
except FileNotFoundError:
return 'addr2line not found'
# 提取每個地址的函數名稱
def extract_function_names(elf_file, data):
function_names = []
for entry in data:
address = entry[0]
function_name = get_function_name(elf_file, address)
function_names.append((function_name, entry[1], address, entry[4]))
return function_names
# 提取每個地址的函數名稱
functions_version1 = extract_function_names(elf_name, data1)
functions_version2 = extract_function_names(elf_name2, data2)
# 比較兩個 ELF 文件中的函數
print("\nFunctions from version1 ELF:")
for func in functions_version1:
print(f"Function: {func[0]}, Cycles: {func[1]}, PC Address: {func[2]}, Seconds: {func[3]}")
print("\nFunctions from version2 ELF:")
for func in functions_version2:
print(f"Function: {func[0]}, Cycles: {func[1]}, PC Address: {func[2]}, Seconds: {func[3]}")
# 尋找分歧點
end_early_points = []
discrepancy_points = []
last_discrepancy_index = -1
for i in range(min(len(functions_version1), len(functions_version2))):
cycle_version1 = functions_version1[i][1]
cycle_version2 = functions_version2[i][1]
if cycle_version1 == cycle_version2:
end_early_points.append((functions_version1[i][0], functions_version1[i][3], functions_version2[i][3]))
elif abs(cycle_version1 - cycle_version2) / max(cycle_version1, cycle_version2) >= 0.015:
if not discrepancy_points or discrepancy_points[-1][1] != cycle_version1:
discrepancy_points.append((functions_version1[i][0], functions_version1[i][3], functions_version2[i][3]))
last_discrepancy_index = i
# 優化分歧點,只保留第一個和最後一個分歧點
if discrepancy_points:
first_discrepancy_point = discrepancy_points[0]
last_discrepancy_point = discrepancy_points[-1]
# 檢查最後一個分歧點是否所有 cycle 都相同,若相同則取第一個相同的最後分歧點
for i in range(last_discrepancy_index, len(functions_version1)):
if functions_version1[i][1] == functions_version2[i][1]:
last_discrepancy_point = (functions_version1[i][0], functions_version1[i][3], functions_version2[i][3])
else:
break
discrepancy_points = [first_discrepancy_point, last_discrepancy_point]
# 打印分歧點的時間區間
if discrepancy_points:
first_discrepancy_time = discrepancy_points[0][1]
last_discrepancy_time = discrepancy_points[-1][2]
print(f"\nTime Interval of Discrepancy: [{first_discrepancy_time}, {last_discrepancy_time}]")
f = open("reduction.txt", "w")
f.write(str([float(first_discrepancy_time), float(last_discrepancy_time)]))
f.close()
# 繪製比較圖表
x = range(len(functions_version1))
cycles_version1 = [func[1] for func in functions_version1]
cycles_version2 = [func[1] for func in functions_version2]
plt.figure(figsize=(16, 10))
# 使用相同顏色表示相同函數名稱
color_cycle = itertools.cycle(['red', 'orange', 'green', 'blue', 'purple', 'cyan', 'magenta', 'lime', 'pink', 'teal', 'violet'])
function_colors = {}
# 初始化函數顏色
all_functions = set([f[0] for f in functions_version1] + [f[0] for f in functions_version2])
for func in all_functions:
if func not in function_colors:
function_colors[func] = next(color_cycle)
min_length = min(len(functions_version1), len(functions_version2))
for i in range(min_length - 1):
func_version1 = functions_version1[i][0]
func_version2 = functions_version2[i][0]
color_version1 = function_colors[func_version1]
color_version2 = function_colors[func_version2]
plt.plot([i, i + 1], [functions_version1[i][1], functions_version1[i + 1][1]], color=color_version1)
plt.plot([i, i + 1], [functions_version2[i][1], functions_version2[i + 1][1]], color=color_version2)
if i == 0 or func_version1 != functions_version1[i - 1][0]:
if functions_version1[i][3] != 'N/A':
plt.text(i, functions_version1[i][1], f"{func_version1} ({functions_version1[i][2]}, {functions_version1[i][3]}s)", fontsize=8, color='black', rotation=45, verticalalignment='top')
if i == 0 or func_version2 != functions_version2[i - 1][0]:
plt.text(i, functions_version2[i][1], f"{func_version2} ({functions_version2[i][2]}, {functions_version2[i][3]}s)", fontsize=8, color='black', rotation=45, verticalalignment='bottom')
# 標註分歧點
if abs(functions_version1[i][1] - functions_version2[i][1]) / max(functions_version1[i][1], functions_version2[i][1]) >= 0.15:
plt.scatter(i, max(functions_version1[i][1], functions_version2[i][1]), color='red', marker='x', s=100)
plt.xlabel('Function Index')
plt.ylabel('Cycle Count')
plt.title('Function Cycle Count Comparison Between version1 and version2')
plt.grid(True)
plt.tight_layout()
plt.savefig(f'{args.outputname}.png', format='png', bbox_inches='tight')
plt.close()
```