# 數據不好不能怪程式
###### tags: `dataset` `6187`
:::info
**請開始你的表演**
:::
``` python
# 導入函式庫
import numpy as np
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from matplotlib import pyplot as plt
import sklearn.datasets as datasets
import pandas as pd
from sklearn.utils import shuffle
from sklearn.externals.six import StringIO
from IPython.display import Image
from sklearn.tree import export_graphviz
from xgboost import XGBClassifier
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_score
import pydotplus
import os
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score
from sklearn.model_selection import train_test_split
feature_namesANN = [
'gender',
'stie_FS','stie_MS','stie_RS',
'stieavg_FS','stieavg_MS','stieavg_RS',
'TMA_FS','TMA_MS','TMA_RS','TMA_NO',
'CMA_FS','CMA_MS','CMA_RS','CMA_NO'
,'region','highest_education','age_band'
]
#'gender','site','siteavg','TMA','CMA','region','highest_education','age_band'
feature_namesDTree = [
'gender',
'site',
'siteavg',
'TMA',
'CMA'
]
target_string = 'final'
target_names = ['Pass','Fail'] #成績
print("feature_namesANN : ")
print(*feature_namesANN, sep = ", ")
print("feature_namesDTree : ")
print(*feature_namesDTree, sep = ", ")
print("target : "+target_string)
print(*target_names, sep = ", ")
outputnum=len(target_names)
dfann = pd.read_csv('read data/20200420/opendata ann/opendata ann.csv')
dfann = shuffle(dfann)
dfdtree = pd.read_csv('read data/20200420/opendata dtree/opendata dtree.csv')
dfdtree = shuffle(dfdtree)
from sklearn.metrics import classification_report
model = Sequential()
model.add(Dense(units = 15 , input_dim=len(feature_namesANN), activation='relu'))
model.add(Dense(outputnum,kernel_initializer='uniform', activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x=dfann[feature_namesANN], y=dfann[target_string])
#scores = cross_val_score(model,dfann[feature_namesANN],dfann[target_string],cv=10,scoring='precision')
print(classification_report(dfann[target_string],model.predict_classes(dfann[feature_namesANN])))
dtree=DecisionTreeClassifier( criterion = "entropy", splitter = "best",min_samples_split =int(np.size(dfann[target_string],0)*0.1))
dtree.fit(dfdtree[feature_namesDTree], dfdtree[target_string])
print(classification_report(dfdtree[target_string],dtree.predict(dfdtree[feature_namesDTree])))
```
```python
feature_namesANN :
gender, stie_FS, stie_MS, stie_RS, stieavg_FS, stieavg_MS, stieavg_RS, TMA_FS, TMA_MS, TMA_RS, TMA_NO, CMA_FS, CMA_MS, CMA_RS, CMA_NO, region, highest_education, age_band
feature_namesDTree :
gender, site, siteavg, TMA, CMA
target : final
Pass, Fail
Epoch 1/1
19412/19412 [==============================] - 1s 34us/step - loss: 0.4785 - acc: 0.7707
precision recall f1-score support
0 0.83 0.93 0.87 12360
1 0.84 0.66 0.74 7052
avg / total 0.83 0.83 0.82 19412
precision recall f1-score support
0 0.79 0.97 0.87 12360
1 0.91 0.56 0.69 7052
avg / total 0.84 0.82 0.81 19412
```
**這個**
```python
print(dfdtree[target_string])
8019 1
13765 0
16299 0
2799 0
16340 0
..
2303 0
9566 1
6052 1
4035 0
18806 0
type(dfdtree[target_string])
#<class 'pandas.core.series.Series'>
a=dfdtree[target_string].tolist()
#<class 'list'>
```
**跟這個**
```python
print(dtree.predict(dfdtree[feature_namesDTree]))
[0 0 0 ... 0 0 0]
```
## 我不敢相信黑洞是資工系畢業的