# 計算機概論-分析財報-建立模型 # 點陣圖 ```python= import pandas as pd import matplotlib.pyplot as plt database1=pd.read_excel("money.xlsx") x=database1["股東權益總額"].values y=database1["常續性稅後淨利"].values plt.scatter(x,y) plt.ylabel("net income") plt.xlabel("total") plt.show() ``` # 數學方式求出MSE ```python= import numpy as np import pandas as pd import matplotlib.pyplot as plt import sklearn from sklearn.model_selection import train_test_split #分割資料==>測試集(TEST),訓練集(TRAIN) #資料載入python database1=pd.read_excel("money.xlsx") #取出資料(不使用函式的話本身不會加入截距項(w0*x0)),因此需要再1row中把x0假設為1 #在reshape中,採用(-1,1)是轉列==[[1,2],[3,4]])==>[[1,2] # [3,4]] dirty_x=database1["股東權益總額"].values.reshape(-1,1) x_0=np.ones(dirty_x.shape[0]).reshape(-1,1) x=np.concatenate([x_0,dirty_x],axis=1) y=database1["常續性稅後淨利"].values.reshape(-1,1) #分割資料==>訓練集,測試集 x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.33,random_state=10) #種子碼採用10,比例33:67 #利用Normal Equation推理出w(方程式參數),並且都是使用切分資料後"訓練集"資料組成的線性圖形 w=np.linalg.inv(x_train.T.dot(x_train)).dot(x_train.T.dot(y_train)) #x==矩陣==>(((x的轉置矩陣)*x)的反矩陣)*(x的轉置矩陣*y) print(w) #利用數學求出MES #其實在W求出後基本上MSE的答案也會淺顯溢出,因為W的求出是由MSE的推導而出來的 #先讓測試集進入系統跑出y預測值 y_pred=x_test.dot(w) #矩陣相乘[w0,w1][x0,x1]=y , [w0,w1]已知 #再用mse公式==>(y_預測(x_test出來)-y_測試)**2/數量(可不除,只是會變成平均跟非平均) mse=np.sum((y_pred-y_test)**2)/y_pred.shape[0] print(mse) ``` # 若用數學方式求出線性方程組係數 ```python= import numpy as np import pandas as pd import matplotlib.pyplot as plt import sklearn from sklearn.model_selection import train_test_split #分割資料==>測試集(TEST),訓練集(TRAIN) #資料載入python database1=pd.read_excel("money.xlsx") #取出資料(不使用函式的話本身不會加入截距項(w0*x0)),因此需要再1row中把x0假設為1 #在reshape中,採用(-1,1)是轉列==[[1,2],[3,4]])==>[[1,2] # [3,4]] dirty_x=database1["股東權益總額"].values.reshape(-1,1) x_0=np.ones(dirty_x.shape[0]).reshape(-1,1) x=np.concatenate([x_0,dirty_x],axis=1) y=database1["常續性稅後淨利"].values.reshape(-1,1) #分割資料==>訓練集,測試集 x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.33,random_state=10) #種子碼採用10,比例33:67 #利用Normal Equation推理出w(方程式參數),並且都是使用切分資料後"訓練集"資料組成的線性圖形 w=np.linalg.inv(x_train.T.dot(x_train)).dot(x_train.T.dot(y_train)) #x==矩陣==>(((x的轉置矩陣)*x)的反矩陣)*(x的轉置矩陣*y) print(w) ``` # test預測分析-步驟1-設定參數及函式 ```python= #設定次方數 global des_degree des_degree=6 #刪除截距項 def clear_row(x): y=np.delete(x,0,axis=1) return y ``` # test預測分析-步驟2-選擇套件及函式 ```python= import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split #分割資料==>測試集(TEST),訓練集(TRAIN) from sklearn.linear_model import LinearRegression #建立線性模型 from sklearn.metrics import mean_squared_error #找出MSE(損失函數) from sklearn.preprocessing import PolynomialFeatures #feature_次方化 ``` # test預測分析-步驟3-載入資料以及整理 ```python= import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error from sklearn.preprocessing import PolynomialFeatures database1=pd.read_excel("money.xlsx") #取出資料==>矩陣化(方便列運算) dirty_x=database1["股東權益總額"].values.reshape(-1,1) y=database1["常續性稅後淨利"].values.reshape(-1,1) print(dirty_x) print(y) ``` ![](https://i.imgur.com/z5f7JUt.png) # test預測分析-步驟4-次方化並切分資料 ```python= import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split #分割資料==>測試集(TEST),訓練集(TRAIN) from sklearn.linear_model import LinearRegression #建立線性模型 from sklearn.metrics import mean_squared_error #找出MSE(損失函數) from sklearn.preprocessing import PolynomialFeatures #feature_次方化 #設定次方數 global des_degree des_degree=6 #刪除截距項 def clear_row(x): y=np.delete(x,0,axis=1) return y #資料載入python database1=pd.read_excel("money.xlsx") #取出資料==>矩陣化(方便列運算) dirty_x=database1["股東權益總額"].values.reshape(-1,1) y=database1["常續性稅後淨利"].values.reshape(-1,1) #polynomialFeaturnes poly=PolynomialFeatures(degree=des_degree) #清除截距項(原因後面提) x=clear_row(poly.fit_transform(dirty_x)) #切分資料 x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.33,random_state=13) print("x_test:",x_test) print("x_train:",x_train) print("y_test:",y_test) print("y_train:",y_train) ``` ![](https://i.imgur.com/2iQutDb.png) # test預測分析-步驟5-製作模型並呈現數線係數 ```python= #建立模型(linearRegression()會自動補上截距項) lr=LinearRegression() lr.fit(x_train,y_train) #模型擬合相對應分數 print("測試集資料分數:",lr.score(x_test,y_test)) print("訓練集資料分數:",lr.score(x_train,y_train)) #呈現係數值(截距項+多維係數) w=np.hstack([lr.intercept_,lr.coef_[0]]).reshape(-1,1) print(w) ``` ![](https://i.imgur.com/nfvbljI.png) # test預測分析-步驟6-畫圖 ```python= #創造隨機點預測(隨機點須在回歸線上) x_pred=np.linspace(1700000,2321000,500000).reshape(-1,1) y_pred=lr.predict(clear_row(poly.fit_transform(x_pred))) #整理資料() b=[] for i in range(1,des_degree): b.append(i) del_x=np.delete(x_test,b,axis=1) print(y_test) #呈現資料狀態 plt.scatter(del_x,y_test,label="test point") plt.plot(x_pred,y_pred,label="h",c="red") plt.legend() plt.show() ``` ![](https://i.imgur.com/Ii9KUy1.png) ![](https://i.imgur.com/J1hDcR6.png) ###### 資料來源: http://chur.chu.edu.tw/bitstream/987654321/1590/1/GE096100190.pdf