###### tags: `計算智慧與規劃` `期末專題`
# 推測某年 PM2.5 程式
### 檔案
- 2019 原始資料
- 2020 原始資料
- Weight_2019
```python=
import csv
from collections import defaultdict
MinList = [1000 for i in range(11)]
MaxList = [0 for i in range(11)]
# {key : value}
library = defaultdict(dict) # {"key" : {"key" : value }}
# {"Model編號": {"日期" : [這個日期對應到的PM2.5值 (最多24個)]}}
def pick(data) :
newdata = []
for i in range(len(data)) :
if (data[i][2] == "PM2.5 " or data[i][2] == "AMB_TEMP ") :
newdata.append(data[i])
if (data[i][2] == "CO " or data[i][2] == "NO ") :
newdata.append(data[i])
if (data[i][2] == "NO2 " or data[i][2] == "NOx ") :
newdata.append(data[i])
if (data[i][2] == "O3 " or data[i][2] == "PM10 ") :
newdata.append(data[i])
if (data[i][2] == "RAINFALL " or data[i][2] == "RH ") :
newdata.append(data[i])
if (data[i][2] == "SO2 ") :
newdata.append(data[i])
return newdata
def changeType(data) :
for i in range(len(data)) :
num = 0
for j in range(3,len(data[i])) :
if(is_number(data[i][j])) :
data[i][j] = float(data[i][j])
else :
data[i][j] = "empty"
# 判斷是否能轉換成 float
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
def turn(data) :
# 先計算這筆資料是366天還是 365天
day = int(len(data) / 11)
# 12 項分別是10個變數、pM2.5、時間
newdata = [["empty" for i in range(12)] for i in range(day*24)]
for i in range(day) :
for pos in range(24) :
newdata[i*24+pos][0] = data[i*11][1] + str(pos)
for v in range(11) :
newdata[i*24+pos][v+1] = data[i*11+v][pos+3]
return newdata
# 只要有空值全部刪掉
def check(data) :
newdata = []
for i in range(len(data)) :
num = 0
for j in range(len(data[i])) :
if data[i][j] != "empty" :
num = num + 1
if (num == 12) :
newdata.append(data[i])
return newdata
def changeSpace(data) :
pos = len(data[0])
for i in range(len(data)) :
tmp = data[i][pos-1]
data[i][pos-1] = data[i][pos-4]
data[i][pos-4] = tmp
def MinMax(data) :
for i in range(len(data)) :
for pos in range(11) :
if (MaxList[pos] < data[i][pos+1]) :
MaxList[pos] = data[i][pos+1]
if (MinList[pos] > data[i][pos+1]) :
MinList[pos] = data[i][pos+1]
print("MinList", MinList, "MaxList", MaxList)
def norm(data) :
for i in range(len(data)) :
for pos in range(11) :
data[i][pos+1] = (data[i][pos+1] - MinList[pos]) / (MaxList[pos] - MinList[pos])
return data
def change(weight) :
new = [0 for i in range(len(weight))]
for i in range(len(weight)) :
for j in range(len(weight[0])) :
new[i] = weight[i][1].split(",")
new[i].pop()
for i in range(len(new)) :
for j in range(len(new[0])) :
new[i][j] = float(new[i][j])
return new
def difScore(data, weight) :
scores = []
for i in range(len(weight)) :
scores.append(score(data, weight, i))
# 回到沒有正規化
notNorm(scores)
print(scores)
def score(data, weight, num) :
# 全部的誤差
total = 0
for pos in range(len(data)) :
# 猜測的pm2.5
guess = 0
for i in range(10) :
guess += float(weight[num][i]) * float(data[pos][i+1])
# 扣掉誤差
guess = guess - weight[num][10]
# ========================================================================
# 把每小時結果依 日期 分類, 放入dictionary #
tmp = guess # 因為 guess 的值還要計算, 先把他複製到 tmp
# 如果 guess 小於 0 , 讓他等於 0
if(tmp < 0):
tmp = 0
else:
# 把單位回復成 ppm
tmp = tmp * (MaxList[10] - MinList[10]) + MinList[10]
#print("range: ", (MaxList[9] - MinList[9]))
#print(tmp)
date = data[pos][0].split(' ')[0] # data[pos] 對應到的日期
# 如果已經有一個 date, 加到現有 date 的後面
if (date in library[num].keys()):
library[num][date].append(tmp)
# 如果沒有, 創一個新的 item
else:
library[num][date] = [] # 因為一天有多個小時, 用 list 存
library[num][date].append(tmp)
# ========================================================================
# 取絕對值
if(guess - data[pos][len(weight)-1] >= 0) :
fit = guess - data[pos][len(weight)-1]
else :
fit = data[pos][len(weight)-1] - guess
# 加入總誤差
total += fit
# 平均誤差
newscore = total / len(data)
return newscore
def notNorm(scores) :
for i in range(len(scores)) :
scores[i] = scores[i] * (MaxList[10] - MinList[10])
def main() :
name = input()
olddata = []
# 讀入舊檔案
url = name+'_2019.csv'
with open(url, newline= '') as csvfile :
rows = csv.reader(csvfile, delimiter = ',')
for row in rows :
olddata.append(row)
# 挑選需要的資料
olddata = pick(olddata)
# 把資料從 string 轉成 float
changeType(olddata)
# 把資料轉成 1小時為單位
olddata = turn(olddata)
# 檢查有沒有空值或亂碼
olddata = check(olddata)
# 把 pm2.5 換到最後面
changeSpace(olddata)
# 讀入新檔案
newdata = []
#url = "C:/Users/rita5/Desktop/鋼鐵/全部_2019/高屏空品區_2019/高屏空品區/"+name+'_2019.csv'
url = name+'_2020.csv'
with open(url, newline= '') as csvfile :
rows = csv.reader(csvfile, delimiter = ',')
for row in rows :
newdata.append(row)
# 挑選需要的資料
newdata = pick(newdata)
# 把資料從 string 轉成 float
changeType(newdata)
# 把資料轉成 1小時為單位
newdata = turn(newdata)
# 檢查有沒有空值或亂碼
newdata = check(newdata)
# 把 pm2.5 換到最後面
changeSpace(newdata)
# 計算2019 最大最小
MinMax(olddata)
# 把2020資料用2019年的最大最小正規化
newdata = norm(newdata)
# 讀入權重
weight = []
url = "Weight"+name+'_2019_2.csv'
with open(url, newline= '', encoding="utf-8") as csvfile :
rows = csv.reader(csvfile, delimiter = ',')
for row in rows :
weight.append(row)
# 整理資料, 只拿權重部分
weight = change(weight)
# 計算分數
difScore(newdata, weight)
# =====================================================================
# print("ghjkl: ", library[0]["2020/01/09"])
# 寫出 csv
url = 'Models_Guess_'+name+'_2020.csv'
with open(url, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(library[0].keys()) # 第一個 row: 日期
# modelNum: model 的編號, 共 8 個
for modelNum in range(8):
year = [] # 紀錄一年中 每天 的PM2.5平均值
for date in library[modelNum].keys():
# library[modelNum][date]: list, 該 model 在該日期每小時的 PM2.5 值
# sum(library[modelNum][date]: 總和
# len(library[modelNum][date]: 數量
# 平均 = 總和 / 數量
year.append(str(sum(library[modelNum][date])/len(library[modelNum][date])))
#print(year)
writer.writerow(year) # 寫出, 一個 row 一個 model
# print(len(year))
csvfile.close()
# ======================================================================
if __name__ == "__main__" :
main()
```