# Learning 8-bit parity checking problem with MLP
---
# 目錄
[TOC]
# 問題描述
設計一個多層感知器(MLP)來學習8位奇偶校驗(8BPC,意即當輸入的8bit中,1為偶數個,輸出1,反之則輸出0)
---
# 解法
流程為:產生tranning data ⇨ 建立模型 ⇨ 訓練神經網路 ⇨ 繪製Trainning Error
---
# 產生training data
8bit剛好會有256組,也就是十進位的0~255。故可以使用十進位轉二進位來產生我們所需的訓練資料。
用count指令來算每筆資料1的個數,也是算出每筆資料0的個數。加上迴圈,即可生成我們需要的training data
```python=
import numpy as np
import matplotlib.pyplot as plt
def ParityGenerator(n):
array1 = []
array2 = []
for i in range(n):
num = list(str("{0:08b}".format(i)))
num = list(map(int,num))
array1.append(num)
array2.append([1 if num.count(1)%2 == 1 else 0])
'''判斷parity'''
return np.array(array1),np.array(array2)
```
# 建立模型
### 定義激活函數
在產生模組前,需要先定義訓練時所需的激活函數,在此使用class將**Linear、ReLU、Sigmoid、Tanh、Loss**分類。
此時須注意,Linear中的weight與bias是透過函式隨機取值,此時需將值除以15,以避免取到過於極端的值。
```python=
#Linear Function
class Linear:
def __init__(self, m, n):
self.W, self.b = np.random.randn(m, n) / 15, np.random.rand(1, n) / 15
self.dw, self.db = None, None
def forward(self, x):
self.x = x
out = np.dot(x, self.W) + self.b
return out
def backward(self, dout):
dx = np.dot(dout, self.W.T)
self.dW = np.dot(self.x.T, dout)
self.db = np.sum(dout, axis = 0)
return dx
#ReLU Function
class ReLU:
def __init__(self):
pass
def forward(self, x):
self.mask = (x <= 0)
out = x
out[out <= 0] = 0
return out
def backward(self, dout):
dx = dout
dx[self.mask] = 0
return dx
#Sigmoid Function
class Sigmoid:
def __init__(self):
pass
def forward(self, x):
out = 1.0 / (1 + np.exp(-x))
self.o = out
return out
def backward(self, dout):
dx = dout * self.o * (1 - self.o)
return dx
#Tanh Function
class Tanh:
def __init__(self):
pass
def forward(self, x):
out = (1.0 - np.exp(-x)) / (1.0 + np.exp(-x))
self.o = out
return out
def backward(self, dout):
dx = dout * (1 - self.o ** 2)
return dx
#Loss Function
class Loss:
def __init__(self):
pass
def forward(self, y, ybar):
self.ybar = ybar
return np.sum((y - self.ybar) ** 2)
def backward(self, dout):
dy = -(2 * (y - self.ybar))
return dy
```
### 模型中激活函數的選擇
模型初始為三層:Relu、Tanh、Sigmoid,第一層選擇使用Relu以加速收斂速度,但Relu會將負數都視為0,不是zero-centered。所以第二層使用Tanh來修正這個問題,但Tanh和要求的範圍不同。所以最後使用Sigmoid來將輸出導回正確區間。
```python=
class ThreeLayer:
def __init__( self, m, n, o, p):
self.linear1 = Linear(m, n)
self.act1 = ReLU()
self.linear2 = Linear(n, o)
self.act2 = Tanh()
self.linear3 = Linear(o, p)
self.act3 = Sigmoid()
self.loss = Loss()
self.last_dW1, self.last_db1 = 0, 0
self.last_dW2, self.last_db2 = 0, 0
self.last_dW3, self.last_db3 = 0, 0
```
### 訓練神經網路
用eta=0.001 alpha=0.001 epochs=20000訓練模組
輸入為256個neuron,輸出則為1個neuron。
```python=
model = MLP(8, ['ReLU', 'Tanh', 'Sigmoid'], [256, 128, 1])
max_epochs, chk_epochs = 20000, 1000
eta, alpha = 0.001, 0.001
LossSplt = []
EpochSplt = []
for epoch in range(max_epochs):
model.forward(X)
model.backward(y)
model.update(eta, alpha)
if(epoch + 1) % chk_epochs == 0:
#print(model.ybar.T)
print('Epoch %3d: loss = %.6f'%(epoch + 1, model.L))
LossSplt.append(model.L)
EpochSplt.append(epoch + 1)
```
### 繪製Tranning Error
使用matplotlib
```python=
plt.style.use("ggplot")
plt.figure()
plt.plot(EpochSplt, LossSplt, label = "Training_Error")
plt.xlabel("Epochs")
plt.ylabel("Training Error")
plt.legend()
plt.show()
```


# 不同層數的比較
**two layer**
Relu(256) ⇨ Sigmoid(1)

Epoch 20000: loss = 0.240388
**four layer**
Relu(256) ⇨ Tanh(128) ⇨ Relu(256)⇨ Sigmoid(1)

Epoch 20000: loss = 0.027128
# 加分題 :A workable MLP class with the API
觀察三四層的程式碼即可發現,在各個函式裡,會有很多只是序號不同的變數,為了使其可以更靈活運用,我們可以使用list來儲存這些變數,並利用迴圈來對每個激活函數進行前饋、後饋、驗證及更新模型。
```python=
class MLP:
def __init__( self, InputShape, ActivationList, LayerList):
ActivationDic = {
'ReLU' : ReLU(),
'Sigmoid' : Sigmoid(),
'Tanh' : Tanh()
}
#self.InputShape = InputShape
self.Linear = []
self.Activation = []
self.ActivationList = ActivationList
self.Layer = [InputShape]
self.Layer += LayerList
self.Loss = Loss()
self.Length = len(LayerList)
self.last_dW = []
self.last_db = []
for i in range(self.Length):
self.Linear.append(Linear(self.Layer[i], self.Layer[i + 1]))
self.Activation.append(ActivationDic[self.ActivationList[i]])
self.last_dW.append(0)
self.last_db.append(0)
self.loss = Loss()
def forward(self, x):
for j in range(self.Length - 1):
x = self.Linear[j].forward(x)
x = self.Activation[j].forward(x)
x = self.Linear[self.Length - 1].forward(x)
self.ybar = self.Activation[self.Length - 1].forward(x)
return self.ybar
def backward(self, y):
self.L = self.loss.forward(y, self.ybar)
g = self.loss.backward(1)
for k in range(self.Length - 1, -1, -1):
g = self.Activation[k].backward(g)
g = self.Linear[k].backward(g)
def update(self, eta, alpha):
for l in range(self.Length):
self.Linear[l].W = self.Linear[l].W - eta * self.Linear[l].dW + alpha * self.last_dW[l]
self.Linear[l].b = self.Linear[l].b - eta * self.Linear[l].db + alpha * self.last_db[l]
self.last_dW[l] = eta * self.Linear[l].dW
self.last_db[l] = eta * self.Linear[l].db
```