機器學習暫時程式碼

# 機器學習暫時程式碼 ###### tags: `Study` :::spoiler ```python= from sklearn.tree import DecisionTreeClassifier classifier = DecisionTreeClassifier(criterion = 'entropy',random_state=0) classifier.fit(x_test,y_test) ``` ::: 這邊之後可以再用confusion_matrix來觀察 --- :::spoiler Decision Tree ```python= import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split from sklearn import metrics data = pd.read_csv('car_evaluation.csv') data.columns data['Class_Values'].value_counts() data['Class_Values'].value_counts().plot(kind = 'bar') plt.show() data['safety'].value_counts().plot(kind = 'bar') plt.show() import seaborn as sns sns.countplot(data['buying'], hue = data['Class_Values']) plt.show() data.info() X = data.drop(['Class_Values'], axis = 1) y = data['Class_Values'] from sklearn.preprocessing import OrdinalEncoder buying_price_category = ['low', 'med', 'high', 'vhigh'] maint_cost_category = ['low', 'med', 'high', 'vhigh'] doors_category = ['2', '3', '4', '5more'] person_capacity_category = ['2', '4', 'more'] lug_boot_category = ['small', 'med', 'big'] safety_category = ['low', 'med', 'high'] all_categories = [buying_price_category, maint_cost_category,doors_category,person_capacity_category,lug_boot_category,safety_category] oe = OrdinalEncoder(categories= all_categories) X = oe.fit_transform( data[['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']]) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0) DT_classifier = DecisionTreeClassifier(criterion= 'entropy', random_state=0, max_depth= 3, min_samples_split= 10) DT_classifier.fit(X_train, y_train) y_pred = DT_classifier.predict(X_test) confusion_matrix(y_test, y_pred) from sklearn import tree fig = plt.figure(figsize=(15,12)) _ = tree.plot(DT_classifier,feature_names=data.columns[:-1], class_names=DT_classifier.classes_, filled=True) ``` ::: :::spoiler K-means ```python= # K-Means Clustering # Importing the libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd # Importing the dataset dataset = pd.read_csv('Mall_Customers.csv') X = dataset.iloc[:, 3:5].values # Using the elbow method to find the optimal number of clusters from sklearn.cluster import KMeans wcss = [] for i in range(1,11): kmeans = KMeans(n_clusters = i, max_iter = 300, n_init = 10, init = 'k-means++', random_state = 0) kmeans.fit(X) wcss.append(kmeans.inertia_) plt.plot(range(1,11), wcss) plt.title('The Elbow Method') plt.xlabel('Number of Clusters') plt.ylabel('WCSS') plt.show() # Applying the k-means to the mall dataset kmeans = KMeans(n_clusters = 5, max_iter = 300, n_init = 10, init = 'k-means++', random_state = 0) y_kmeans = kmeans.fit_predict(X) # Visualizing the clusters plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Careful') plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Standard') plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Target') plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Careless') plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Sensible') plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids') plt.title('Clusters of clients') plt.xlabel('Annual Income (k$)') plt.ylabel('Spending Score (1-100)') plt.legend() plt.show() ``` :::