HackMD - Collaborative Markdown Knowledge Base

#### Загрузим датасет из библиотеки sklearn ```python=3.9 iris = datasets.load_iris() X = iris.data print("Dataset shape -", X.shape) print("Dataset sample example -", *X[:1]) ``` ```protobuf=1 Dataset shape - (150, 4) Dataset sample example - [5.1 3.5 1.4 0.2] ``` #### Разбиение выборки на тестовую и обу ```python=3.9 Y = iris.target # Train size default = .25 X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, random_state=222) print(f'Train sample shape (source), (result) - {X_train.shape}, {Y_train.shape}') print(f'Test sample shape (source), (result) - {X_test.shape}, {Y_test.shape}') ``` ```protobuf=1 Train sample shape (source), (result) - (112, 4), (112,) Test sample shape (source), (result) - (38, 4), (38,) ``` #### RBF ```python=3.9 C_array = np.logspace(-3, 3, num=7) gamma_array = np.logspace(-5, 2, num=8) svc = SVC(kernel='rbf') grid = model_selection.GridSearchCV(svc, param_grid={'C': C_array, 'gamma': gamma_array}) grid.fit(X_train, Y_train) info(name='radial', grid_search_cv=grid, gamma=True) svc = SVC(kernel='rbf', C=grid.best_estimator_.C, gamma=grid.best_estimator_.gamma) svc.fit(X_train, Y_train) pfm(Y_test, svc.predict(X_test)) ``` ```protobuf=1 best radial C = 1.0 CV error = 0.009090909090909038 gamma = 1.0 accuracy_score: 0.9210526315789473 confusion_matrix: [[15 0 0] [ 0 9 1] [ 0 2 11]] classification_report: precision recall f1-score support 0 1.00 1.00 1.00 15 1 0.82 0.90 0.86 10 2 0.92 0.85 0.88 13 accuracy 0.92 38 macro avg 0.91 0.92 0.91 38 weighted avg 0.92 0.92 0.92 38 ``` #### Линейное ```python=3.9 # Linear C_array = np.logspace(-3, 3, num=7) svc = SVC(kernel='linear') grid = model_selection.GridSearchCV(svc, param_grid={'C': C_array}) grid.fit(X_train, Y_train) info(name='linear', grid_search_cv=grid) svc = SVC(kernel='linear', C=grid.best_estimator_.C) svc.fit(X_train, Y_train) pfm(Y_test, svc.predict(X_test)) # Linear ^=== ``` ```protobuf=1 best linear C = 1.0 CV error = 0.009090909090909038 accuracy_score: 0.9473684210526315 confusion_matrix: [[15 0 0] [ 0 9 1] [ 0 1 12]] classification_report: precision recall f1-score support 0 1.00 1.00 1.00 15 1 0.90 0.90 0.90 10 2 0.92 0.92 0.92 13 accuracy 0.95 38 macro avg 0.94 0.94 0.94 38 weighted avg 0.95 0.95 0.95 38 ``` #### Полиномиальное ```python=3.9 # Poly C_array = np.logspace(-5, 2, num=8) gamma_array = np.logspace(-5, 2, num=8) degree_array = [2, 3, 4] svc = SVC(kernel='poly') grid = model_selection.GridSearchCV(svc, param_grid={'C': C_array, 'gamma': gamma_array, 'degree': degree_array}) grid.fit(X_train, Y_train) info(name='poly', grid_search_cv=grid, gamma=True, degree=True) svc = SVC(kernel='poly', C=grid.best_estimator_.C, gamma=grid.best_estimator_.gamma, degree=grid.best_estimator_.degree) svc.fit(X_train, Y_train) pfm(Y_test, svc.predict(X_test)) # Poly ^=== ``` ```protobuf=1 best poly C = 1e-05 CV error = 0.009090909090909038 gamma = 100.0 degree = 2 accuracy_score: 0.9210526315789473 confusion_matrix: [[15 0 0] [ 0 9 1] [ 0 2 11]] classification_report: precision recall f1-score support 0 1.00 1.00 1.00 15 1 0.82 0.90 0.86 10 2 0.92 0.85 0.88 13 accuracy 0.92 38 macro avg 0.91 0.92 0.91 38 weighted avg 0.92 0.92 0.92 38 ``` #### Cигмоидное ```python=3.9 # Sigmoid C_array = np.logspace(-3, 3, num=7) gamma_array = np.logspace(-5, 2, num=8) svc = SVC(kernel='sigmoid') grid = model_selection.GridSearchCV(svc, param_grid={'C': C_array, 'gamma': gamma_array}) grid.fit(X_train, Y_train) print('best sigmoid') print(f'C = {grid.best_estimator_.C}') print(f'CV error = {1 - grid.best_score_}') print(f'gamma = {grid.best_estimator_.gamma}') info(name='sigmoid', grid_search_cv=grid, gamma=True) svc = SVC(kernel='sigmoid', C=grid.best_estimator_.C, gamma=grid.best_estimator_.gamma) svc.fit(X_train, Y_train) pfm(Y_test, svc.predict(X_test)) # Sigmoid ^=== ``` ```protobuf=1 best sigmoid C = 1000.0 CV error = 0.009090909090909038 gamma = 0.001 best sigmoid C = 1000.0 CV error = 0.009090909090909038 gamma = 0.001 accuracy_score: 0.9473684210526315 confusion_matrix: [[15 0 0] [ 0 9 1] [ 0 1 12]] classification_report: precision recall f1-score support 0 1.00 1.00 1.00 15 1 0.90 0.90 0.90 10 2 0.92 0.92 0.92 13 accuracy 0.95 38 macro avg 0.94 0.94 0.94 38 weighted avg 0.95 0.95 0.95 38 ``` #### Визуализация данных, используя PCA ```python=3.9 # 3D graph # Size in inch fig_size = (10, 10) fig = plt.figure(1, figsize=fig_size) axes = Axes3D(fig, elev=10, azim=75, auto_add_to_figure=False) x_reduced = PCA(n_components=3).fit_transform(X) axes.scatter(x_reduced[:, 0], x_reduced[:, 1], x_reduced[:, 2], c=Y, cmap=plt.cm.Set1, edgecolor='k', s=40) axes.set_title("PCA") axes.set_xlabel("PCA 1-vector") axes.w_xaxis.set_ticklabels([]) axes.set_ylabel("PCA 2-vector") axes.w_yaxis.set_ticklabels([]) axes.set_zlabel("PCA 3-vector") axes.w_zaxis.set_ticklabels([]) fig.add_axes(axes) plt.show() ``` ![](https://i.imgur.com/YtDH1oy.png) ##### Вспомогательные функции ```python=3.9 def pfm(y_true, y_pred): print("accuracy_score:", metrics.accuracy_score(y_true, y_pred), end='\n\n') print("confusion_matrix:", metrics.confusion_matrix(y_true, y_pred), end='\n\n', sep='\n') print("classification_report:", metrics.classification_report(y_true, y_pred), sep='\n') def info(name: str, grid_search_cv: model_selection.GridSearchCV, gamma=None, degree=None): if gamma is None: gamma = False if degree is None: degree = False print(f'best {name}') print(f'C = {grid_search_cv.best_estimator_.C}') print(f'CV error = {1 - grid_search_cv.best_score_}') if gamma: print(f'gamma = {grid_search_cv.best_estimator_.gamma}') if degree: print(f'degree = {grid.best_estimator_.degree}') ```