<h1>data analysis with graphing and saved results</h1>
<ul>
<li>上方存資料的路徑有改:<b>processed_data_to_be_trained -> processed_data_for_ML</b></li>
<li>同一次執行中每十萬筆結果分一檔,以 <b>檔名(數字).csv</b> 區隔</li>
<li>其他詳見註解</li>
</ul>
``` python=
import csv ######################
import pandas ## only for makeNTU ##
from scatterPlot import * ######################
timeSlice =10
recordRate =100
whereDataGoes ="processed_data_for_ML/dataToBeTrained" ### 注意: 若不想遺失資料請每次編譯前改後面的檔名,否則同名檔案將被覆蓋。資料相同就沒差。 ###
attributeVectorList =[]
def scatterAnalysis(filePath, situa):
global attributeVectorList
try:
threeAxisesAccData =pandas.read_csv(str(filePath)).values
except:
print("Cannot open the raw data file.")
return
sumOfMagSq =0
maxAmpl =0
avg =0
maxSlope =0
latestTwentyData =[]
peakList =[]
counter =0
for singleData in threeAxisesAccData:
mag =sum(singleData[i]*singleData[i] for i in range(1, 4))**0.5
#sum of mag square to compute 標準差
sumOfMagSq +=mag**2
#maxAmpl
if mag > maxAmpl:
maxAmpl =mag#
#peak: a mag greater than 10 before and 10 after can called a peak
#would do the peak avg finally and compare numbers of peaks gtr or smlr than peak avg
latestTwentyData.append(mag)
if len(latestTwentyData) >= 20:
if latestTwentyData[9] == max(latestTwentyData):
peakList.append(mag)
latestTwentyData.pop(0)
#avg: divided by num finally
avg +=mag#
#maxSlope
if counter > 3:
slope =0.25*recordRate*abs(latestTwentyData[-1]-latestTwentyData[-5])
if counter > 3 and slope > maxSlope:
maxSlope =slope#
counter +=1
############################ when one time slice passed ############################
if counter >= timeSlice*recordRate:
peakAvg =sum(peakList)/len(peakList)
peakAboveRate =0
for peak in peakList:
if peak >= peakAvg:
peakAboveRate +=1
peakAboveRate /=len(peakList)#
avg /=counter
stdDiviation =sumOfMagSq/(timeSlice*recordRate)-avg**2#
attributeVectorList.append((maxAmpl, avg, maxSlope, peakAboveRate, stdDiviation, str(situa)))
sumOfMagSq, maxAmpl, avg, maxSlope, latestTwentyData, peakList =0, 0, 0, 0, [], []
counter =0
#####################################################################################
def saveData(fileName, fieldnames, dataList):
count =0
renewedFileName =str(fileName) + ".csv"
fileCount =0
csvfile =None
writer =None
for data in dataList:
if count == 0 or count > 10000:
if count > 10000:
fileCount +=1
renewedFileName =str(fileName) + "({})".format(fileCount) + ".csv"
count =1
createNewFile(renewedFileName, fieldnames)
csvfile =open(renewedFileName, 'a', newline ='')
writer =csv.DictWriter(csvfile, fieldnames =fieldnames)
dataOfOneRow =dict()
for i in range(len(fieldnames)):
dataOfOneRow[fieldnames[i]] =data[i]
writer.writerow(dataOfOneRow)
def createNewFile(fileName, fieldnames):
try:
with open(fileName, 'w', newline ='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames =fieldnames)
writer.writeheader()
except:
print("One error in creating new file. Now in an endless loop.")
while True:
pass
#以下請依功能自行解註
if __name__ == "__main__":
dimentionName =["maxAmpl", "avgAmpl", "maxSlope", "peakAboveRate", "stdDiviation"]
#scatterAnalysis("raw_data/_rest.xlsx - Raw Data.csv", "rest") #分析
#scatterAnalysis("raw_data/_walk.xlsx - Raw Data.csv", "walk") #分析
#scatterAnalysis("raw_data/_active.xlsx - Raw Data.csv", "active") #分析
#scatterAnalysis("raw_data/_fall.xlsx - Raw Data.csv", "fall") #分析
#scatterAnalysis("raw_data/110秒已知測資.xls - Raw Data.csv", "100s_unknown") #分析
#scatterAnalysis("raw_data/500秒測資.xls - Raw Data.csv", "500s_unknown") #分析
#plotScatter(attributeVectorList, dimentionName) #畫圖
#dimentionName.append("situation") #存成 .csv
#saveData(fileName =whereDataGoes, fieldnames =dimentionName, dataList =attributeVectorList) #存成 .csv