Hand Posture Recognition with CNN

2019deeplearn hw2 DeeplearnHW02 410535020 資管四葉松 ###### tags: `Fundamental Deep Learning Assignments 2019` # Hand Posture Recognition with CNN ---  # 目錄 [TOC] # 1.問題 --- In this assignment, you will practice the design of a Convolutional Neural Network to recognize the three different hand postures illustrated below: You will be given 5 data sets captured from different persons under different photo shooting conditions. The samples in each data set are stored in 9 separate directories. The directories 0000~0002, 0003~0005, and 0006~0008 contain samples of Posture 1, 2, and 3, respectively. Each directory has 20 samples. Each sample is a gray image of 32x32 pixels # 2.CNN神經網絡介紹及實現過程 --- 這邊先簡單介紹兩個會使用到的應用層 **Convolution Layer卷積層** 能將原始圖片的與特定的filter做卷積運算，用以萃取出影像特徵 **Pooling Layer 池化層** 能將影像所減取樣，減少需要處理的資料，且讓影像中手勢位置的差異變小下面是解決問題的步驟 **資料預處理->建立模型->訓練模型->評估模型的準確率** ### 資料預處理首先安裝跟匯入需要用到的模組 ```python= #安裝opencv !apt-get -qq install -y libsm6 libxext6 && pip install -q -U opencv-python import os import cv2 import numpy as np from sklearn.model_selection import train_test_split from keras.preprocessing import image from keras.utils import np_utils, plot_model from keras.utils import to_categorical ``` 讀取壓縮檔手勢圖片資料集(讀取imgset的method請看完整code ```python= #匯入訓練及測試資料 train_sets = ['Set1', 'Set2', 'Set3'] test_sets = ['Set4', 'Set5'] trn_array, trn_labels = read_datasets(train_sets) tst_array, tst_labels = read_datasets(test_sets) ``` 標準化訓練跟測試資料(這樣可以提高模型預測的準確度，且達到更快的收斂效果 ```python= #匯入訓練及測試資料 trn_array4D=trn_array.reshape((540,32,32,1)).astype('float32')/255 ``` 使用np_util.to_categorical 把訓練及測試資料的標籤，進行Onehot encoding轉換 ```python= y_TrainOneHot = to_categorical(trn_labels) y_TestOneHot = to_categorical(tst_labels) ``` ### 建立模型 ```python= from keras.models import Sequential from keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPooling2D # 建立一個Sequential線性堆疊模型 model = Sequential() # 建立捲積層，輸入影像大小是32*32，執行一次卷機運算並產生16個32*32的影像，並設定relu為激活函數 model.add(Conv2D(16,(3,3),input_shape=(32,32,1), activation='relu')) # 建立池化層，執行一次縮減取樣，將剛剛產生的16個影像縮小成16個16*16 model.add(MaxPooling2D(3, 3)) model.add(Conv2D(32,(2,2),padding='same',activation='relu')) model.add(MaxPooling2D(2, 2)) # 隨機放棄神經網路中30%的神經元，用來避免過擬合的問題， model.add(Dropout(0.3)) # 建立平坦層，將上面產出的多維陣列轉為一維陣列 model.add(Flatten()) # 建立隱藏層，有100個神經元 model.add(Dense(100, activation='relu')) model.add(Dropout(0.5)) # 建立輸出層，有3個神經元，softmax用來將神經元個輸出轉換成每個手勢可能的機率 model.add(Dense(3,activation='softmax')) ``` ### 訓練模型 ```python= # 定義訓練方式，設定categorical_crossentropy為損失函數 model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy']) # 開始訓練 train_history=model.fit(x=trn_array4D, y=y_TrainOneHot,epochs=25, batch_size=54,verbose=2) ``` ### 評估模型的準確率 ```python= # 傳入測試資料 scores = model.evaluate(tst_array4D, y_TestOneHot) # 顯示測試結果及準確率 scores[1] ``` # 3.效能評估嘗試過好幾組不同參數的組合，也調整過批次處理的大小當中需要注意的是，沒有使用Dropout()且沒有其他特別處理訓練出來的模型，容易產生過擬合的狀況，測試資料丟進去得到得準確率相較來說會比較低最後使用上面介紹所建立的模型結果是測試幾遍中較佳的一個，投入訓練資料得到的準確率約在0.93 ~ 1.00間浮動，測試資料的準確率則約在0.89 ~ 0.96之間浮動下圖為訓練的過程 ![](https://i.imgur.com/EHCQ2GQ.png) 訓練過程中準確率變化 ![](https://i.imgur.com/eGzGbEO.png) 訓練過程中loss變化 ![](https://i.imgur.com/pd9EMIg.png) # 4.完整Code ```python= #安裝opencv !apt-get -qq install -y libsm6 libxext6 && pip install -q -U opencv-python #匯入library import os import cv2 import numpy as np from sklearn.model_selection import train_test_split from keras.preprocessing import image from keras.utils import np_utils, plot_model #自行上傳壓縮檔 from google.colab import files uploaded = files.upload() for fn in uploaded.keys(): print('User uploaded file "{name}" with length {length} bytes'.format( name=fn, length=len(uploaded[fn]))) #解壓縮RAR DATA SET !apt-get install unrar #!unrar e 'All_gray_1_32_32.rar' !unrar x 'All_gray_1_32_32.rar' #定義圖片輸入numpy陣列 def enumerate_files(dirs, path="All_gray_1_32_32/", n_poses=3,n_samples=20): os.chdir("/content") filenames, targets = [], [] for p in dirs: for n in range(n_poses): for j in range(3): dir_name =path+p+'/000'+str(n*3+j)+'/' for s in range(n_samples): d = dir_name+'%04d/'%s for f in os.listdir(d): if f.endswith('jpg'): filenames += [d+f] targets.append(n) return filenames,targets def read_images(files): imgs = [] for f in files: img = cv2.imread(f, cv2.IMREAD_GRAYSCALE) imgs.append(img) return imgs def read_datasets(datasets): files, labels = enumerate_files(datasets) list_of_arrays = read_images(files) return np.array(list_of_arrays), labels #匯入訓練及測試資料 train_sets = ['Set1', 'Set2', 'Set3'] test_sets = ['Set4', 'Set5'] trn_array, trn_labels = read_datasets(train_sets) tst_array, tst_labels = read_datasets(test_sets) trn_array4D=trn_array.reshape((540,32,32,1)).astype('float32')/255 tst_array4D=tst_array.reshape((360,32,32,1)).astype('float32')/255 print(trn_array4D) print(trn_array4D.shape) # 定義顯示img函示 import matplotlib.pyplot as plt def plot_image(image): fig = plt.gcf() fig.set_size_inches(2,2) plt.imshow(image, cmap='binary') plt.show() #測試 plot_image(trn_array[1]) # 把類別做Onehot encoding from keras.utils import to_categorical y_TrainOneHot = to_categorical(trn_labels) y_TestOneHot = to_categorical(tst_labels) print(trn_labels) print(y_TrainOneHot) #建立model from keras.models import Sequential from keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPooling2D model = Sequential() model.add(Conv2D(16,(3,3),input_shape=(32,32,1), activation='relu')) model.add(MaxPooling2D(3, 3)) model.add(Conv2D(32,(2,2),padding='same',activation='relu')) model.add(MaxPooling2D(2, 2)) model.add(Dropout(0.3)) model.add(Flatten()) model.add(Dense(100, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(3,activation='softmax')) #開始訓練model model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy']) train_history=model.fit(x=trn_array4D, y=y_TrainOneHot,epochs=25, batch_size=54,verbose=2) #評估測試資料在模型的準確率 scores = model.evaluate(tst_array4D, y_TestOneHot) scores[1] #定義訓練過程圖表 import matplotlib.pyplot as plt def show_train_history(train_acc): plt.plot(train_history.history[train_acc]) plt.title('Train History') if(train_acc=='loss'): plt.ylabel('Loss') else: plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['train'], loc='upper left') plt.show() #展示訓練過程精確度圖表 show_train_history('acc') #展示訓練過程loss圖表 show_train_history('loss')