--- tags: DeepLearning_HW1 title: Learning 8-bit parity checking problem with MLP --- # Learning 8-bit parity checking problem with MLP ## 目錄 ###### [TOC] ## 問題 ##### 8個'1'或'0'組成的數字中,若有奇數個'1',顯示結果為'1';反之有偶數個'1',則輸出 為'0'。使用機器學習訓練機器自動辨識輸入的8bit應輸出甚麼結果。 ## 解決 ### 產生訓練資料 ##### 利用十進位轉二進位的方式,產生8位元的數字,總共有256組。 ##### 利用count和%2指令計算每組數字中有奇數個或偶數個'1',產生相對的'1'或'0'。 ##### 如此一來,就可以使用迴圈自動生成測試資料。 ``` input_list = [] label = [] for i in range(0,256): # print('{0:08b}'.format(i)) input_list.append([int(x) for x in '{0:08b}'.format(i)]) label.append(['{0:08b}'.format(i).count('1') % 2]) input_list = np.asarray(input_list) label = np.asarray(label) ``` ### 產生模型 ##### 先將激活函數用類別表示,分別有**Linear**、**Relu**、**Tanh**、**Sigmoid**、**Loss**,以利之後使用。 ``` #linear function class Linear: def __init__(self,m,n): self.W, self.b = np.random.randn(m, n), np.random.randn(1,n) self.dW, self.db = None, None def forward(self, x): self.x = x out = np.dot(x, self.W)+self.b return out def backward(self, dout): dx = np.dot(dout, self.W.T) self.dW = np.dot(self.x.T, dout) self.db = np.sum(dout, axis=0) return dx #relu function class ReLU: def __init__(self): pass def forward(self, x): self.mask=(x<=0) out = x out[self.mask] = 0 return out def backward(self, dout): dx = dout dx[self.mask] = 0 return dx #sigmoid function class Sigmoid: def __init__(self): pass def forward(self, x): out = 1.0/(1.0+np.exp(-x)) self.o = out return out def backward(self, dout): dx = dout*self.o*(1-self.o) return dx #tanh function class Tanh: def __init__(self): pass def forward(self, x): # find from website # out -> kXn : kXn( (exp(x)-exp(-x)) / (exp(x)-exp(-x)) ) out = (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x)) self.out = out return out def backward(self, dout): # derivatives of tanh # dx -> kXn : kXn( dout * ((1-o)**2) ) dx = dout*(1.0-(self.out)**2) return dx #loss function class Loss: def __init__(self): pass # MSE def forward(self, y, ybar): self.ybar = ybar self.y = y return np.sum((y-ybar)**2) def backward(self, dout): dy = -(2*(self.y-self.ybar)) return dy ``` ##### 放上實作的兩層和三層layers的程式碼 ``` class TwoLayer: def __init__(self,m,n,o): self.linear1, self.linear2 = Linear(m, n), Linear(n, o) self.act1, self.act2 = Tanh(), Sigmoid() self.loss = Loss() self.last_dW1 , self.last_dW2 = 0, 0 self.last_db1 , self.last_db2 = 0, 0 def forward(self, x): # -- L1 --------------------- x = self.linear1.forward(x) x = self.act1.forward(x) # -- output --------------------- x = self.linear2.forward(x) self.ybar = self.act2.forward(x) return self.ybar def backward(self, y): # --output------------- self.L = self.loss.forward(y, self.ybar) g = self.loss.backward(1) # --L2------------- g = self.act2.backward(g) g = self.linear2.backward(g) # --input------------- g = self.act1.backward(g) g = self.linear1.backward(g) def update(self, eta, alpha): # --L1-- self.linear1.W = self.linear1.W - eta*self.linear1.dW + alpha*self.last_dW1 self.linear1.b = self.linear1.b - eta*self.linear1.db + alpha*self.last_db1 self.last_dW1 = eta*self.linear1.dW self.last_db1 = eta*self.linear1.db # --L2-- self.linear2.W = self.linear2.W - eta*self.linear2.dW + alpha*self.last_dW2 self.linear2.b = self.linear2.b - eta*self.linear2.db + alpha*self.last_db2 self.last_dW2 = eta*self.linear2.dW self.last_db2 = eta*self.linear2.db ``` ``` class ThreeLayer: def __init__(self,m,n,o,p): self.linear1, self.linear2, self.linear3 = Linear(m, n), Linear(n, o), Linear(o, p) self.act1, self.act2, self.act3=ReLU(), Tanh(), Sigmoid() self.loss = Loss() self.last_dW1, self.last_dW2, self.last_dW3 = 0.0, 0.0, 0.0 self.last_db1, self.last_db2, self.last_db3 = 0.0, 0.0, 0.0 def forward(self, x): # -- L1 ----------------------- x = self.linear1.forward(x) x = self.act1.forward(x) # -- L2 ----------------------- x = self.linear2.forward(x) x = self.act2.forward(x) # -- output ----------------------- x = self.linear3.forward(x) self.ybar = self.act3.forward(x) return self.ybar def backward(self, y): # --output-- self.L = self.loss.forward(y, self.ybar) g = self.loss.backward(1) # --L3-- g = self.act3.backward(g) g = self.linear3.backward(g) # --L2-- g = self.act2.backward(g) g = self.linear2.backward(g) # --input-- g = self.act1.backward(g) g = self.linear1.backward(g) def update(self, eta, alpha): # --L1-- self.linear1.W = self.linear1.W - eta*self.linear1.dW + alpha*self.last_dW1 self.linear1.b = self.linear1.b - eta*self.linear1.db + alpha*self.last_db1 self.last_dW1 = eta*self.linear1.dW self.last_db1 = eta*self.linear1.db # --L2-- self.linear2.W = self.linear2.W - eta*self.linear2.dW + alpha*self.last_dW2 self.linear2.b = self.linear2.b - eta*self.linear2.db + alpha*self.last_db2 self.last_dW2 = eta*self.linear2.dW self.last_db2 = eta*self.linear2.db # --L3-- self.linear3.W = self.linear3.W - eta*self.linear3.dW + alpha*self.last_dW3 self.linear3.b = self.linear3.b - eta*self.linear3.db + alpha*self.last_db3 self.last_dW3 = eta*self.linear3.dW self.last_db3 = eta*self.linear3.db ``` ### 訓練神經網路 ##### 模型初始化為2層,分別為**tanh**、**sigmoid** ##### 接著讓模型以eta = 0.003、alpha = 0.001的參數,訓練15,000遍測試資料。 ``` #initialize the model max_epochs, chk_epochs = 15000, 1000 last_dW, last_db = 0.0, 0.0 eta, alpha = 0.003, 0.001 loss_min = [] epoch_min = [] while(keepRetraining >= minLoss): loss = [] epoch = [] model = TwoLayer(8, 10, 1) for e in range(max_epochs): model.forward(input_list) model.backward(label) model.update(eta, alpha) loss.append(model.L) epoch.append(e) if (e+1)%chk_epochs==0: print(model.ybar.T) print('Epoch %3d: loss=%.6f'%(e+1, model.L)) if(model.L < minLoss): minLoss = model.L loss_min = loss epoch_min = epoch minOutput = model.ybar.T ``` ### 儲存最小的loss以及256個輸出 ``` fp = open("D:\\Justin\'s_University\\大四上\\深度學習\\HW1\\output\\"+str(minLoss)+".txt", "a") fp.write(str(minLoss)) fp.close() fp = open("D:\\Justin\'s_University\\大四上\\深度學習\\HW1\\output\\"+str(minLoss)+"_output.txt", "a") fp.write(str(minOutput)) fp.close() ``` ### 繪出訓練誤差 ##### 使用matplotlib函示庫畫出訓練誤差 ```python=135 plt.style.use("ggplot") plt.figure() plt.plot(epoch_min, loss_min, label = "Training_Error") plt.xlabel("Epochs") plt.ylabel("Training Error") plt.legend() picname = str(minLoss) plt.savefig("D:\\Justin\'s_University\\大四上\\深度學習\\HW1\\output\\"+picname+".jpg") print(str(minLoss)+".png is saved") plt.show() ``` ### 顯示訓練後的結果 <font size="6">__**2 layers**__ </font> ##### max_epochs, chk_epochs = 15000, 1000 ##### last_dW, last_db = 0.0, 0.0 ##### eta, alpha = 0.003, 0.001 ##### Tanh(10)->Sigmoid(1) ![](https://i.imgur.com/btXNZwz.jpg) <font size="4">__**Loss = 1.3197356837354326**__ </font> $\Rightarrow$ [2-Layers output Table](https://hackmd.io/7oyBFKe7SeiM00REiN43QA)$\Leftarrow$ ---------------------------------------------------- <font size="6">__**3 layers**__ </font> ##### max_epochs, chk_epochs = 15000, 1000 ##### last_dW, last_db = 0.0, 0.0 ##### eta, alpha = 0.003, 0.001 ##### ReLU(30)->Tanh(4)->Sigmoid(1) ![](https://i.imgur.com/Qtk2qos.jpg) <font size="4">__**Loss = 1.0003163010187124**__ </font> $\Rightarrow$ [3-Layers output Table](https://hackmd.io/nxHiQZoST8yB6aR5WWBYaQ)$\Leftarrow$ ---------------------------------------------------- <font size="6">__**4 layers**__ </font> ##### max_epochs, chk_epochs = 15000, 1000 ##### last_dW, last_db = 0.0, 0.0 ##### eta, alpha = 0.003, 0.001 ##### ReLU(128)->Tanh(53)->Sigmoid(28)->Sigmoid(1) ![](https://i.imgur.com/ZafT58m.jpg) <font size="4">__**Loss = 0.03333781335491803**__ </font> $\Rightarrow$ [4-Layers output Table](https://hackmd.io/r-gNGcykQ9OLcm06ESNr4Q)$\Leftarrow$ ##### max_epochs, chk_epochs = 25000, 1000 ##### last_dW, last_db = 0.0, 0.0 ##### eta, alpha = 0.003, 0.001 ##### ReLU(128)->Tanh(53)->Sigmoid(28)->Sigmoid(1) ![](https://i.imgur.com/De86AZL.jpg) <font size="4">__**Loss = 0.018032712342026344**__ </font> $\Rightarrow$ [4-Layers output Table](https://hackmd.io/25yWTKMjSRq1JDN46_m1Iw)$\Leftarrow$ ---------------------------------------------------- <font size="6">__**5 layers**__ </font> ##### max_epochs, chk_epochs = 15000, 1000 ##### last_dW, last_db = 0.0, 0.0 ##### eta, alpha = 0.003, 0.001 ReLU(128)->Tanh(64)->Sigmoid(32)->ReLU(16)->Sigmoid(1) ![](https://i.imgur.com/1C2zrds.jpg) <font size="4">__**Loss = 0.0025766262860609504**__ </font> $\Rightarrow$ [5-Layers output Table](https://hackmd.io/WQpSsj8qTcuLL3rLoXPrFg?both)$\Leftarrow$