### 引入local端dataset
```
import pandas as pd
iris = pd.read_csv("iris.csv") \\輸入位址
print(iris)
```
#### iris 使用svm進行分析
```
import numpy as np
import matplotlib.pyplot as plt
#sklearn套件直接提供現實世界的鳶尾花的花萼與花瓣的長寬度統計資料
from sklearn.datasets import load_iris
iris = load_iris()
#可以印出來看看資料Iris Plants Database
#print(iris.DESCR)
#X是花萼與花瓣的長度、寬度的原始資料
#Y是將花分類之後的正確答案
X = iris.data
Y = iris.target
#以第一筆為例
#花萼長度 花萼寬度 花瓣長度 花瓣寬度
#以下取法將可以取到花瓣的長度以及花瓣的寬度
X = X[:,2:]
#進行機器學習的時候,就需區分 訓練用data 以及 測試用data
from sklearn.model_selection import train_test_split
#train_test_split將會自動把資料分類為 x_train, x_test, y_train, y_test 這四種
#測試資料佔的比例暫訂為20%, 因此test_size = 0.2
#random_state請輸入一個隨便的數字
x_train, x_test, y_train, y_test = train_test_split(X,Y,
test_size = 0.2,
random_state=87
)
#花瓣的長度:x_train[:,0]
#花瓣的寬度:x_train[:,1]
#c=分類答案的陣列
#顯示在畫面上將會有三種顏色的花瓣
plt.scatter(x_train[:,0],x_train[:,1], c=y_train)
#匯入SVM支持向量機函式庫的分類功能SVC
from sklearn.svm import SVC
#然後要開一台SVC
clf = SVC()
#開始訓練:clf.fit(輸入資料,正確答案)
clf.fit(x_train,y_train)
#帶入測試資料試試看預測結果
y_test_predict = clf.predict(x_test)
#畫出測試資料的預測結果看看
#從圖看起來,似乎預測挺成功的
plt.scatter(x_test[:,0],x_test[:,1], c=y_test_predict)
#可以更精準的判斷到底有幾個點分類錯誤的方式:剪去原本的正確答案y_test:c=y_test_predict - y_test
#當預測正確時,c=y_test_predict - y_test應該為0,預測錯誤的話c=y_test_predict - y_test就是1
#由此可以發現只有一個點是預測錯誤的(這個點的顏色與其他所有的點的顏色不同)
plt.scatter(x_test[:,0],x_test[:,1], c=y_test_predict - y_test)
```
### 透過local 解析csv檔進行分析
```
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
import numpy as np
df = pd.read_csv('iris.csv')
X = df.iloc[:, [3,4]] # Will give you columns 2 and 3 i.e 'petal_length' and 'petal_width'
y = df.iloc[:, 5] # Label column i.e 'species'
le = preprocessing.LabelEncoder()
y = le.fit_transform(y)
x_train,x_test,y_train,y_test = train_test_split(X,y, test_size=0.2,random_state=47)
clf =SVC()
clf.fit(x_train,y_train)
y_test_predict=clf.predict(x_test)
print(X)
colormap = np.array(['y', 'g', 'r'])
plt.scatter(x_train['PetalLengthCm'],x_train['PetalWidthCm'], c=y_train)
plt.scatter(x_test['PetalLengthCm'],x_test['PetalWidthCm'],c=y_test_predict)
plt.scatter(x_test['PetalLengthCm'],x_test['PetalWidthCm'], c=colormap[y_test_predict - y_test])
```
### 使用to_numpy的方法,最後畫圖才可以用x_train[:,0]
```
df = pd.read_csv('iris.csv')
new=df[['PetalLengthCm','PetalWidthCm']].to_numpy()
X=new //data
y=df['Species'] //target
x_train,x_test,y_train,y_test = train_test_split(X,y, test_size=0.2,random_state=47)
clf =SVC(kernel='linear',C=1,gamma='auto')
clf.fit(x_train,y_train)
y_test_predict=clf.predict(x_test)
print(x_train[:,0])
colormap = np.array([ 'g', 'r'])
plt.scatter(x_train[:,0],x_train[:,1], c=y_train)
```
### 繪圖
https://pyecontech.com/2020/04/11/python_svm/
https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
https://ccliao.github.io/2017/06/24/python-svm/
```
def plot_estimator(estimator, X, y, title):
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 ##加1減1只是為了繪圖時留空白
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1)) #meshgrid根據坐標向量創建坐標矩陣
Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()]) #一維,共3476筆資料
#ravel() Return a contiguous flattened array
#np.c_ 串接兩個list,np.ravel將矩陣變為一維
Z = Z.reshape(xx.shape) #二維,共44筆資料(44,79)
plt.plot()
#Contours(輪廓) can be explained simply as a curve joining all the continuous points (along the boundary), having same color or intensity.
plt.contourf(xx, yy, Z, alpha=0.4, cmap = plt.cm.RdYlBu) #cmap- Colormap #alpha透明度,愈小愈透明0~1
plt.scatter(X[:, 0], X[:, 1], c=y, cmap = plt.cm.brg) #c - color
plt.title(title)
plt.xlabel('Petal.Length')
plt.ylabel('Petal.Width')
plt.show()
df = pd.read_csv('iris.csv')
new=df[['PetalLengthCm','PetalWidthCm']].to_numpy()
X=new
y=df['Species']
x_train,x_test,y_train,y_test = train_test_split(X,y, test_size=0.2,random_state=47)
clf =SVC(kernel='linear',C=1,gamma='auto')
clf.fit(x_train,y_train)
y_test_predict=clf.predict(x_test)
print(x_train[:,0])
colormap = np.array([ 'g', 'r'])
plot_estimator(clf,X,y,"svm")
```