2019deeplearn hw2
DeeplearnHW02 410535020 資管四 葉松
###### tags: `Fundamental Deep Learning Assignments 2019`
# Hand Posture Recognition with CNN
---
<!-- 


-->
# 目錄
[TOC]
# 1.問題
---
In this assignment, you will practice the design of a Convolutional Neural Network to recognize the three different hand postures illustrated below:
You will be given 5 data sets captured from different persons under different photo
shooting conditions. The samples in each data set are stored in 9 separate directories.
The directories 0000~0002, 0003~0005, and 0006~0008 contain samples of Posture 1, 2, and 3, respectively. Each directory has 20 samples. Each sample is a gray image of 32x32 pixels
# 2.CNN神經網絡介紹及實現過程
---
這邊先簡單介紹兩個會使用到的應用層
**Convolution Layer卷積層**
能將原始圖片的與特定的filter做卷積運算,用以萃取出影像特徵
**Pooling Layer 池化層**
能將影像所減取樣,減少需要處理的資料,且讓影像中手勢位置的差異變小
下面是解決問題的步驟
**資料預處理->建立模型->訓練模型->評估模型的準確率**
### 資料預處理
首先安裝跟匯入需要用到的模組
```python=
#安裝opencv
!apt-get -qq install -y libsm6 libxext6 && pip install -q -U opencv-python
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from keras.utils import np_utils, plot_model
from keras.utils import to_categorical
```
讀取壓縮檔手勢圖片資料集(讀取imgset的method請看完整code
```python=
#匯入訓練及測試資料
train_sets = ['Set1', 'Set2', 'Set3']
test_sets = ['Set4', 'Set5']
trn_array, trn_labels = read_datasets(train_sets)
tst_array, tst_labels = read_datasets(test_sets)
```
標準化訓練跟測試資料(這樣可以提高模型預測的準確度,且達到更快的收斂效果
```python=
#匯入訓練及測試資料
trn_array4D=trn_array.reshape((540,32,32,1)).astype('float32')/255
```
使用np_util.to_categorical 把訓練及測試資料的標籤,進行Onehot encoding轉換
```python=
y_TrainOneHot = to_categorical(trn_labels)
y_TestOneHot = to_categorical(tst_labels)
```
### 建立模型
```python=
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPooling2D
# 建立一個Sequential線性堆疊模型
model = Sequential()
# 建立捲積層,輸入影像大小是32*32,執行一次卷機運算並產生16個32*32的影像,並設定relu為激活函數
model.add(Conv2D(16,(3,3),input_shape=(32,32,1), activation='relu'))
# 建立池化層,執行一次縮減取樣,將剛剛產生的16個影像縮小成16個16*16
model.add(MaxPooling2D(3, 3))
model.add(Conv2D(32,(2,2),padding='same',activation='relu'))
model.add(MaxPooling2D(2, 2))
# 隨機放棄神經網路中30%的神經元,用來避免過擬合的問題,
model.add(Dropout(0.3))
# 建立平坦層,將上面產出的多維陣列轉為一維陣列
model.add(Flatten())
# 建立隱藏層,有100個神經元
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
# 建立輸出層,有3個神經元,softmax用來將神經元個輸出轉換成每個手勢可能的機率
model.add(Dense(3,activation='softmax'))
```
### 訓練模型
```python=
# 定義訓練方式,設定categorical_crossentropy為損失函數
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
# 開始訓練
train_history=model.fit(x=trn_array4D, y=y_TrainOneHot,epochs=25, batch_size=54,verbose=2)
```
### 評估模型的準確率
```python=
# 傳入測試資料
scores = model.evaluate(tst_array4D, y_TestOneHot)
# 顯示測試結果及準確率
scores[1]
```
# 3.效能評估
嘗試過好幾組不同參數的組合,也調整過批次處理的大小
當中需要注意的是,沒有使用Dropout()且沒有其他特別處理訓練出來的模型,容易產生過擬合的狀況,測試資料丟進去得到得準確率相較來說會比較低
最後使用上面介紹所建立的模型結果是測試幾遍中較佳的一個,投入訓練資料得到的準確率約在0.93 ~ 1.00間浮動,測試資料的準確率則約在0.89 ~ 0.96之間浮動
下圖為訓練的過程

訓練過程中準確率變化

訓練過程中loss變化

# 4.完整Code
```python=
#安裝opencv
!apt-get -qq install -y libsm6 libxext6 && pip install -q -U opencv-python
#匯入library
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from keras.utils import np_utils, plot_model
#自行上傳壓縮檔
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
print('User uploaded file "{name}" with length {length} bytes'.format(
name=fn, length=len(uploaded[fn])))
#解壓縮RAR DATA SET
!apt-get install unrar
#!unrar e 'All_gray_1_32_32.rar'
!unrar x 'All_gray_1_32_32.rar'
#定義圖片輸入numpy陣列
def enumerate_files(dirs, path="All_gray_1_32_32/", n_poses=3,n_samples=20):
os.chdir("/content")
filenames, targets = [], []
for p in dirs:
for n in range(n_poses):
for j in range(3):
dir_name =path+p+'/000'+str(n*3+j)+'/'
for s in range(n_samples):
d = dir_name+'%04d/'%s
for f in os.listdir(d):
if f.endswith('jpg'):
filenames += [d+f]
targets.append(n)
return filenames,targets
def read_images(files):
imgs = []
for f in files:
img = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
imgs.append(img)
return imgs
def read_datasets(datasets):
files, labels = enumerate_files(datasets)
list_of_arrays = read_images(files)
return np.array(list_of_arrays), labels
#匯入訓練及測試資料
train_sets = ['Set1', 'Set2', 'Set3']
test_sets = ['Set4', 'Set5']
trn_array, trn_labels = read_datasets(train_sets)
tst_array, tst_labels = read_datasets(test_sets)
trn_array4D=trn_array.reshape((540,32,32,1)).astype('float32')/255
tst_array4D=tst_array.reshape((360,32,32,1)).astype('float32')/255
print(trn_array4D)
print(trn_array4D.shape)
# 定義顯示img函示
import matplotlib.pyplot as plt
def plot_image(image):
fig = plt.gcf()
fig.set_size_inches(2,2)
plt.imshow(image, cmap='binary')
plt.show()
#測試
plot_image(trn_array[1])
# 把類別做Onehot encoding
from keras.utils import to_categorical
y_TrainOneHot = to_categorical(trn_labels)
y_TestOneHot = to_categorical(tst_labels)
print(trn_labels)
print(y_TrainOneHot)
#建立model
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPooling2D
model = Sequential()
model.add(Conv2D(16,(3,3),input_shape=(32,32,1), activation='relu'))
model.add(MaxPooling2D(3, 3))
model.add(Conv2D(32,(2,2),padding='same',activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3,activation='softmax'))
#開始訓練model
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
train_history=model.fit(x=trn_array4D, y=y_TrainOneHot,epochs=25, batch_size=54,verbose=2)
#評估測試資料在模型的準確率
scores = model.evaluate(tst_array4D, y_TestOneHot)
scores[1]
#定義訓練過程圖表
import matplotlib.pyplot as plt
def show_train_history(train_acc):
plt.plot(train_history.history[train_acc])
plt.title('Train History')
if(train_acc=='loss'):
plt.ylabel('Loss')
else:
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train'], loc='upper left')
plt.show()
#展示訓練過程精確度圖表
show_train_history('acc')
#展示訓練過程loss圖表
show_train_history('loss')