# Scikit Learn ## 線性回歸 ### 程式碼 ```python= import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression x = np.linspace(0, 5, 200) y = 1.2 * x + 0.8 + 0.5 * np.random.randn(200) x_train, x_test, y_train, y_test = train_test_split(x, y,test_size = 0.2,random_state = 0) x_train = x_train.reshape(len(x_train), 1) x_test = x_test.reshape(len(x_test), 1) model = LinearRegression() model.fit(x_train, y_train) y_predict = model.predict(x_test) model.predict([[4.3],[2.5]]) plt.scatter(x_test, y_test) plt.plot(x_test, y_predict,c = 'r') plt.show() ``` ### 函數圖形 ![](https://i.imgur.com/4wMzbZe.png) ### 程式介紹 首先先引入一些第三方套件,numpy和matplotlib幾乎是必備,今後不多做贅述,接著從sklearn.model_selection引入train_test_split,train_test_split主要功能是劃分資料,最後從linear_model引入LinearRegression方便稍後做線性回歸訓練。 接著建造假數據,x為0\~5得數字,共500個,y為將x乘以1.2+0.8,且還須加上一個雜值,使數據更像真數據。然後我們將x和y資料做切割,分別代表x訓練資料x測試資料,y訓練資料,y測試資料,在train_test_split()參數裡先加入要切割的資料,在第三個參數加入每個資料的大小,範圍在0~1之間,而random_state是我們的亂數種子,可以固定我們切割資料的結果。然後我將x的訓練資料和測試資料變成n\*1的矩陣,代表說我們每次輸入皆是一個資料。 接著我們的model是線性回歸,而我們使用model.fit()將此model fit,意思是使此model在函數中找到代表的線,此線是最合適所有資料點的,也就是線性回歸在做的事,找尋最佳直線。接著進行預。,我們將一開始切割好的預測資料放入,此時為了測試效果,我在後面又多加兩個資料,記得自己增加資料時,要保持n\*1的矩陣,最後將點點出來,並畫出回歸直線即完成。 ## 波士頓房價預測(真實數據) ### 程式碼 ```python= import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.datasets import load_boston sns.set() boston_dataset = load_boston() # print(boston_dataset.DESCR) # description # print(boston_dataset.data[:5]) boston = pd.DataFrame(boston_dataset.data,columns = boston_dataset.feature_names) # boston.head() boston['MEDV'] = boston_dataset.target boston.head() sns.distplot(boston.MEDV, bins = 30) corr_matrix = boston.corr().round(2) plt.figure(figsize = (11.7, 8.27)) # 寬先寫 sns.heatmap(corr_matrix, annot = True) # draw plt.figure(3) boston.iloc[0] x = boston.loc[:, "CRIM" : "LSTAT"].values y = boston.MEDV.values x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0) model = LinearRegression() model.fit(x_train, y_train) y_predict = model.predict(x_test) plt.scatter(y_test, y_predict) plt.xlim(0, 55) plt.ylim(0, 55) plt.plot([0, 55], [0, 55], c = 'r') ``` ### 輸出圖形 ![](https://i.imgur.com/tD3Znzi.png) ![](https://i.imgur.com/IaL2nZo.png) ![](https://i.imgur.com/PpCLUHW.png) ### 程式說明 這邊我們要畫的圖是matplotlib畫布出來的,所以這邊引用seaborn來做,在Scikit Learn裡的dataset非常豐富,這邊使用boston的房價,是真實數據,再來使用sns.set()將畫圖功能交給seaborn處理,並將boston的資料提取存在boston_dataset,接著運用DataFrame將資料存成像excel的表格,更容易觀看,然後我們要預測房價,所以我們多出一個項目"MEDV",代表房價,資料從boston_dataset.target中取出,且用boston.head()可以看到第一個資料,再來我們用直方圖顯現出boston房價的趨勢,其中bins代表每隔多少一個區間。 再來建造相關係數矩陣,這邊取到小數點後兩位,在將調整我們的figure視窗,並把相關係數矩陣畫出來,這邊會用深淺代表數據的趨勢,再來將我們要取的資料取出來,從"CRIM" 到 "LSTAT"所有的資料,再將此轉成numpy array的形式,y就是boston房價,也就是說我們利用"CRIM" 到 "LSTAT"的資料來預測y的房價。 再來就是切割資料和訓練資料,這邊和前面一樣,這邊就不多做贅述,最後做預測,並將圖做出來就完成了。 ## SVM ### 程式碼 ```python= import numpy as np import pandas as pd import matplotlib.pyplot as plt #from sklearn.datasets.samples_generator import make_blobs from sklearn.datasets import make_blobs from sklearn.model_selection import train_test_split from sklearn.svm import SVC x, y = make_blobs(n_samples = 200, centers = 3,n_features = 2,random_state = 8) plt.scatter(x[:,0],x[:,1], alpha = 0.5, s = 100, c = y) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0) clf = SVC() clf.fit(x_train, y_train) y_predict = clf.predict(x_test) print(y_predict-y_test) plt.scatter(x_test[:,0], x_test[:,1], c = y_predict-y_test) x0 = np.arange(-10, 11, 0.02) y0 = np.arange(-15, 15, 0.02) X, Y = np.meshgrid(x0, y0) P = np.c_[X.ravel(), Y.ravel()] z = clf.predict(P) Z = z.reshape(X.shape) plt.contourf(X, Y, Z, alpha = 0.3) plt.scatter(x[:,0],x[:,1], c = y) plt.show() ``` ### 輸出圖片 ![](https://i.imgur.com/tEO8Aqv.png) ### 程式介紹 ## 鴛尾花(iris) ### 程式碼 ```python= import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import make_blobs from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.datasets import load_iris iris = load_iris() x = iris.data y = iris.target x = x[:,2:] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0) clf = SVC() clf.fit(x_train, y_train) y_predict = clf.predict(x_test) x0 = np.linspace(0, 8, 500) y0 = np.linspace(0, 3, 500) X, Y = np.meshgrid(x0, y0) P = np.c_[X.ravel(), Y.ravel()] z = clf.predict(P) Z = z.reshape(X.shape) plt.contourf(X, Y, Z, alpha = 0.3) plt.scatter(x[:,0], x[:, 1], c = y) plt.show() ``` ### 輸出圖片 ![](https://i.imgur.com/HEz3cIb.png) ### PCA ### 程式碼 ```python= %matplotlib inline import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import make_blobs from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.decomposition import PCA iris = load_iris() x = iris.data y = iris.target pca = PCA(n_components = 2) pca.fit(x) x = pca.transform(x) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)\ clf = SVC() clf.fit(x_train, y_train) y_predict = clf.predict(x_test) x0 = np.linspace(-4, 8, 500) y0 = np.linspace(-2, 3, 500) X, Y = np.meshgrid(x0, y0) P = np.c_[X.ravel(), Y.ravel()] z = clf.predict(P) Z = z.reshape(X.shape) plt.contourf(X, Y, Z, alpha = 0.3) plt.scatter(x[:,0], x[:, 1], c = y) plt.show() ``` ### 輸出圖片 ![](https://i.imgur.com/2Ry3Gk3.png)