數據分析
機器學習
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
from sklearn import preprocessing, linear_model, neighbors,
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
# 注意檔案格式
# 如不是標準csv,可用delim_whitspace = true
# 如無header,可用header = None
# df = pd.read_csv('path', header = None, delim_whitespace=True)
df = pd.read_csv('path')
df = df[index].dropna(axis = 0, how = 'any')
# x = df.drop(index, axis = 1)
# axis = 1 代表直列,column
# axis = 0 代表橫列,row
y = df[13]
x = df.drop(13, axis = 1)
# 產生degree 為 2 的feature
poly = PolynomialFeatures(degree = 2).fit(x)
x = poly.transform(x)
# test_size通常小於0.5
# random_state = 1,使shuffle機制停止,固定切割資料,debug可以用
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, random_state = 1)
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
model = linear_model.LinearRegression()
model = LogisticRegression()
model.fit(x_train, y_train)
# Linear
y_pred = model.predict(x_test)
print('Cofficient : {}'.format(model.coef_))
print('Mean squared error : {}'.format(mean_squared_error(y_test, y_pred)))
print('Variance score : {}'.format(r2_score(y_test, y_pred)))
# Logistic
print(model.coef_)
print(model.intercept_)
y_pred = model.predict(x_test)
print(y_pred)
accuracy = model.score(x_test, y_test)
print(accuracy)
from sklearn import preprocessing, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
# 資料輸入
df = pd.read_csv('./dataset/housing.csv', header = None, delim_whitespace=True)
# 答案取出
y = df[13]
x = df.drop(13, axis = 1)
# Split data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1)
# Normalization
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
# Model Select
model = linear_model.LinearRegression()
model.fit(x_train, y_train)
# Predict
y_pred = model.predict(x_test)
print('Cofficient : {}'.format(model.coef_))
print('Mean squared error : {}'.format(mean_squared_error(y_test, y_pred)))
print('Variance score : {}'.format(r2_score(y_test, y_pred)))
# import numpy as np
import pandas as pd
from sklearn import linear_model, preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
# 匯入檔案
df = pd.read_csv('./dataset/winequality-red.csv')
# 處理answer and data
y = df['quality']
x = df.drop('quality', axis = 1)
# 產生degree 為 2 的feature
poly = PolynomialFeatures(degree = 2).fit(x)
x = poly.transform(x)
# Split data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 1)
# Normalization
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
# Select model
model = linear_model.LinearRegression()
model.fit(x_train, y_train)
# Predict
y_pred = model.predict(x_test)
# 查看係數
print('The coefficient : {}\n'.format(model.coef_))
print('Mean squared error : {}'.format(mean_squared_error(y_test, y_pred)))
print('Variance score : {}'.format(r2_score(y_test, y_pred)))
import numpy as np
import pandas as pd
from sklearn import preprocessing, linear_model
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
pima = pd.read_csv('./dataset/pima-indians-diabetes.csv')
#x = pima[['pregnant', 'insulin', 'bmi', 'age']]
y = pima['label']
x = pima.drop(['label'], axis = 1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 1)
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
model = LogisticRegression()
model.fit(x_train, y_train)
print(model.coef_)
print(model.intercept_)
y_pred = model.predict(x_test)
print(y_pred)
accuracy = model.score(x_test, y_test)
print(accuracy)
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
from sklearn import preprocessing, linear_model, neighbors
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
### 匯入檔案
## df = df.reav_csv('path')
## df = df.read_csv('path', header = None)
## df = df.read_csv('path', delim_whitspace = true)
## missing value
#df = df[index].dropna(axis = 0, how = 'any')
## 分割answer and data
## x = df.drop(index, axis = 1)
## axis = 1 代表直列,column
## axis = 0 代表橫列,row
# y = df[index]
# x = df.drop(index, axis = 1)
### Poly(視情況使用)
### 產生degree 為 2 的feature
## poly = PolynomialFeatures(degree = 2).fit(x)
## x = poly.transform(x)
## Split data
## test_size通常小於0.5
## random_state = 1,使shuffle機制停止,固定切割資料,debug可以用
# x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, random_state = 1)
## Normalization
# scaler = preprocessing.StandardScaler().fit(x_train)
# x_train = scaler.transform(x_train)
# x_test = scaler.transform(x_test)
## Model Select
### Linear Regression
## model = linear_model.LinearRegression()
## model = LogisticRegression()
### KNN
## model = neighbors.KNeighborsClassifier()
## model = neighbors.KNeighborsClassifier(n_neighbors=3)
### Decision Tree
## model = DecisionTreeClassifier(max_depth=3)
## model = DecisionTreeClassifier()
### Random Forest
## model = RandomForestClassifier(max_depth=7, random_state=0)
### SVC
## model = SVC(kernel='rbf')
### Naive Bayes
## model = GaussianNB()
## Fit
# model.fit(x_train, y_train)
## Predict
# y_pred = model.predict(x_test)
### Metric
## print('Cofficient : {}'.format(model.coef_))
## print('Mean squared error : {}'.format(mean_squared_error(y_test, y_pred)))
## print('Variance score : {}'.format(r2_score(y_test, y_pred)))
## print('Intercept of Model : {}'.format(model.intercept_))
## print('準確率 : {}'.format(model.score(x_test, y_test)))
## print('accuracy : {}'.format(accuracy_score(y_test, y_pred)))
## print('number of correct sample : {}'.format(accuracy_score(y_test, y_pred, normalize = False)))
## print('con_matrix: {}'.format(confusion_matrix(y_test, y_pred)))
## 匯出CSV
# submission = pd.DataFrame({
# "PassengerId": test_df["PassengerId"],
# "Survived": Y_pred
# })
# submission.to_csv('submission.csv', index=False)
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
from sklearn import preprocessing, linear_model, neighbors
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
### 匯入檔案
## df = df.reav_csv('path')
## df = df.read_csv('path', header = None)
## df = df.read_csv('path', delim_whitspace = true)
## missing value
#df = df[index].dropna(axis = 0, how = 'any')
### 合併資料
## f1 = data['V1'].values
## f2 = data['V2'].values
## X = np.array(list(zip(f1, f2)))
### Model select
## model = KMeans(n_clusters=3).fit(x)
## model = DBSCAN(eps=0.3, min_samples=5).fit(x)
## model = mixture.GaussianMixture(n_components=3).fit(X)
## Predict
# labels = model.predict(x)
# labels = model.labels_
# X_pred = gmm.predict(x)
#### Metric
### Kmeans
## centroids = kmeans.cluster_centers_
## print('centroids: {}'.format(centroids))
## print('prediction on each data: {}'.format(labels))
## labels = kmeans.predict(np.array([[12.0,14.0]]))
## print('prediction on data point (12.0, 14.0): {}'.format(labels))
### DBSCAN
### Note that -1 are noisy points
## print('cluster on X {}'.format(labels))
### Number of clusters in labels, ignoring noise if present.
## n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
## print('number of clusters: {}'.format(n_clusters))
## 匯出CSV
# submission = pd.DataFrame({
# "PassengerId": test_df["PassengerId"],
# "Survived": Y_pred
# })
# submission.to_csv('submission.csv', index=False)
輸入資料有答案(label)
May 17, 2023持續更新中 程式 書籍 競賽程式Apcs 325 演算法學習資源 資管營簡報與筆記 尋找程式之路裡面有很多各種競賽的介紹,還有學習資源 Leetcode
Feb 23, 2023聯絡方式 學涯引路人|FB粉專:https://www.facebook.com/UnfilteredCollegeLife 學涯引路人|IG粉專:https://www.instagram.com/unfiltered_college_life/ 叛逆青年看社會|FB粉專:https://www.facebook.com/RBYSS 叛逆青年看社會|IG粉專:https://www.instagram.com/rebellious_youth_see_society/ 我的FB:https://www.facebook.com/profile.php?id=100004486146747 大學可能跟你想的不一樣 Q&A
Jun 6, 2022考試制度 APCS組 考試內容 考試時間 觀念題一節60分鐘,共兩節 實作題考150分鐘
Apr 28, 2022or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up