# Learning 8-bit parity checking problem with MLP --- # 目錄 [TOC] # 問題描述 設計一個多層感知器(MLP)來學習8位奇偶校驗(8BPC,意即當輸入的8bit中,1為偶數個,輸出1,反之則輸出0) --- # 解法 流程為:產生tranning data ⇨ 建立模型 ⇨ 訓練神經網路 ⇨ 繪製Trainning Error --- # 產生training data 8bit剛好會有256組,也就是十進位的0~255。故可以使用十進位轉二進位來產生我們所需的訓練資料。 用count指令來算每筆資料1的個數,也是算出每筆資料0的個數。加上迴圈,即可生成我們需要的training data ```python= import numpy as np import matplotlib.pyplot as plt def ParityGenerator(n): array1 = [] array2 = [] for i in range(n): num = list(str("{0:08b}".format(i))) num = list(map(int,num)) array1.append(num) array2.append([1 if num.count(1)%2 == 1 else 0]) '''判斷parity''' return np.array(array1),np.array(array2) ``` # 建立模型 ### 定義激活函數 在產生模組前,需要先定義訓練時所需的激活函數,在此使用class將**Linear、ReLU、Sigmoid、Tanh、Loss**分類。 此時須注意,Linear中的weight與bias是透過函式隨機取值,此時需將值除以15,以避免取到過於極端的值。 ```python= #Linear Function class Linear: def __init__(self, m, n): self.W, self.b = np.random.randn(m, n) / 15, np.random.rand(1, n) / 15 self.dw, self.db = None, None def forward(self, x): self.x = x out = np.dot(x, self.W) + self.b return out def backward(self, dout): dx = np.dot(dout, self.W.T) self.dW = np.dot(self.x.T, dout) self.db = np.sum(dout, axis = 0) return dx #ReLU Function class ReLU: def __init__(self): pass def forward(self, x): self.mask = (x <= 0) out = x out[out <= 0] = 0 return out def backward(self, dout): dx = dout dx[self.mask] = 0 return dx #Sigmoid Function class Sigmoid: def __init__(self): pass def forward(self, x): out = 1.0 / (1 + np.exp(-x)) self.o = out return out def backward(self, dout): dx = dout * self.o * (1 - self.o) return dx #Tanh Function class Tanh: def __init__(self): pass def forward(self, x): out = (1.0 - np.exp(-x)) / (1.0 + np.exp(-x)) self.o = out return out def backward(self, dout): dx = dout * (1 - self.o ** 2) return dx #Loss Function class Loss: def __init__(self): pass def forward(self, y, ybar): self.ybar = ybar return np.sum((y - self.ybar) ** 2) def backward(self, dout): dy = -(2 * (y - self.ybar)) return dy ``` ### 模型中激活函數的選擇 模型初始為三層:Relu、Tanh、Sigmoid,第一層選擇使用Relu以加速收斂速度,但Relu會將負數都視為0,不是zero-centered。所以第二層使用Tanh來修正這個問題,但Tanh和要求的範圍不同。所以最後使用Sigmoid來將輸出導回正確區間。 ```python= class ThreeLayer: def __init__( self, m, n, o, p): self.linear1 = Linear(m, n) self.act1 = ReLU() self.linear2 = Linear(n, o) self.act2 = Tanh() self.linear3 = Linear(o, p) self.act3 = Sigmoid() self.loss = Loss() self.last_dW1, self.last_db1 = 0, 0 self.last_dW2, self.last_db2 = 0, 0 self.last_dW3, self.last_db3 = 0, 0 ``` ### 訓練神經網路 用eta=0.001 alpha=0.001 epochs=20000訓練模組 輸入為256個neuron,輸出則為1個neuron。 ```python= model = MLP(8, ['ReLU', 'Tanh', 'Sigmoid'], [256, 128, 1]) max_epochs, chk_epochs = 20000, 1000 eta, alpha = 0.001, 0.001 LossSplt = [] EpochSplt = [] for epoch in range(max_epochs): model.forward(X) model.backward(y) model.update(eta, alpha) if(epoch + 1) % chk_epochs == 0: #print(model.ybar.T) print('Epoch %3d: loss = %.6f'%(epoch + 1, model.L)) LossSplt.append(model.L) EpochSplt.append(epoch + 1) ``` ### 繪製Tranning Error 使用matplotlib ```python= plt.style.use("ggplot") plt.figure() plt.plot(EpochSplt, LossSplt, label = "Training_Error") plt.xlabel("Epochs") plt.ylabel("Training Error") plt.legend() plt.show() ``` ![](https://i.imgur.com/HmAkNib.jpg) ![](https://i.imgur.com/LfX3X09.jpg) # 不同層數的比較 **two layer** Relu(256) ⇨ Sigmoid(1) ![](https://i.imgur.com/7rERizV.jpg) Epoch 20000: loss = 0.240388 **four layer** Relu(256) ⇨ Tanh(128) ⇨ Relu(256)⇨ Sigmoid(1) ![](https://i.imgur.com/nlDKCY5.jpg) Epoch 20000: loss = 0.027128 # 加分題 :A workable MLP class with the API 觀察三四層的程式碼即可發現,在各個函式裡,會有很多只是序號不同的變數,為了使其可以更靈活運用,我們可以使用list來儲存這些變數,並利用迴圈來對每個激活函數進行前饋、後饋、驗證及更新模型。 ```python= class MLP: def __init__( self, InputShape, ActivationList, LayerList): ActivationDic = { 'ReLU' : ReLU(), 'Sigmoid' : Sigmoid(), 'Tanh' : Tanh() } #self.InputShape = InputShape self.Linear = [] self.Activation = [] self.ActivationList = ActivationList self.Layer = [InputShape] self.Layer += LayerList self.Loss = Loss() self.Length = len(LayerList) self.last_dW = [] self.last_db = [] for i in range(self.Length): self.Linear.append(Linear(self.Layer[i], self.Layer[i + 1])) self.Activation.append(ActivationDic[self.ActivationList[i]]) self.last_dW.append(0) self.last_db.append(0) self.loss = Loss() def forward(self, x): for j in range(self.Length - 1): x = self.Linear[j].forward(x) x = self.Activation[j].forward(x) x = self.Linear[self.Length - 1].forward(x) self.ybar = self.Activation[self.Length - 1].forward(x) return self.ybar def backward(self, y): self.L = self.loss.forward(y, self.ybar) g = self.loss.backward(1) for k in range(self.Length - 1, -1, -1): g = self.Activation[k].backward(g) g = self.Linear[k].backward(g) def update(self, eta, alpha): for l in range(self.Length): self.Linear[l].W = self.Linear[l].W - eta * self.Linear[l].dW + alpha * self.last_dW[l] self.Linear[l].b = self.Linear[l].b - eta * self.Linear[l].db + alpha * self.last_db[l] self.last_dW[l] = eta * self.Linear[l].dW self.last_db[l] = eta * self.Linear[l].db ```