Try   HackMD

Learning 8-bit parity checking problem with MLP

目錄

問題

8個'1'或'0'組成的數字中,若有奇數個'1',顯示結果為'1';反之有偶數個'1',則輸出 為'0'。使用機器學習訓練機器自動辨識輸入的8bit應輸出甚麼結果。

解決

產生訓練資料

利用十進位轉二進位的方式,產生8位元的數字,總共有256組。
利用count和%2指令計算每組數字中有奇數個或偶數個'1',產生相對的'1'或'0'。
如此一來,就可以使用迴圈自動生成測試資料。
input_list = []
label = []
for i in range(0,256):
#     print('{0:08b}'.format(i))
​   input_list.append([int(x) for x in '{0:08b}'.format(i)])
​   label.append(['{0:08b}'.format(i).count('1') % 2])

input_list = np.asarray(input_list)
label = np.asarray(label)

產生模型

先將激活函數用類別表示,分別有LinearReluTanhSigmoidLoss,以利之後使用。
#linear function
class Linear:
    def __init__(self,m,n):
        self.W, self.b = np.random.randn(m, n), np.random.randn(1,n)
        self.dW, self.db = None, None
        
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W)+self.b
        
        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        
        return dx
#relu function
class ReLU:
    def __init__(self):
        pass
    
    def forward(self, x):
        self.mask=(x<=0)
        out = x
        out[self.mask] = 0
        
        return out
        
    def backward(self, dout):
        dx = dout
        dx[self.mask] = 0
        return dx
#sigmoid function
class Sigmoid:
    def __init__(self):
        pass
    
    def forward(self, x):
        out = 1.0/(1.0+np.exp(-x))
        self.o = out
        return out
        
    def backward(self, dout):
        dx = dout*self.o*(1-self.o)
        return dx
#tanh function
class Tanh:
    def __init__(self):
        pass
    
    def forward(self, x):
        # find from website
        # out -> kXn : kXn( (exp(x)-exp(-x)) / (exp(x)-exp(-x)) )
        out = (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
        self.out = out
        return out

    def backward(self, dout):
        # derivatives of tanh
        # dx -> kXn : kXn( dout * ((1-o)**2) )
        dx = dout*(1.0-(self.out)**2)
        return dx
#loss function
class Loss:
    def __init__(self):
        pass
    
#     MSE
    def forward(self, y, ybar):
        self.ybar = ybar
        self.y = y
        return np.sum((y-ybar)**2)
    
    def backward(self, dout):
        dy = -(2*(self.y-self.ybar))
        return dy
放上實作的兩層和三層layers的程式碼
class TwoLayer:
    def __init__(self,m,n,o):
        self.linear1, self.linear2 = Linear(m, n), Linear(n, o) 
        self.act1, self.act2 = Tanh(), Sigmoid()
        self.loss = Loss()
        self.last_dW1 , self.last_dW2 = 0, 0
        self.last_db1 , self.last_db2 = 0, 0
        
    def forward(self, x):
#       -- L1 ---------------------       
        x = self.linear1.forward(x)
        x = self.act1.forward(x)
#       -- output ---------------------
        x = self.linear2.forward(x)
        self.ybar = self.act2.forward(x)

        
        return self.ybar
        
    def backward(self, y):
#       --output-------------
        self.L = self.loss.forward(y, self.ybar)
        g = self.loss.backward(1)
#       --L2-------------
        g = self.act2.backward(g)
        g = self.linear2.backward(g)
#       --input-------------
        g = self.act1.backward(g)
        g = self.linear1.backward(g)

        
    def update(self, eta, alpha):
#       --L1--  
        self.linear1.W = self.linear1.W - eta*self.linear1.dW + alpha*self.last_dW1
        self.linear1.b = self.linear1.b - eta*self.linear1.db + alpha*self.last_db1
        self.last_dW1 = eta*self.linear1.dW
        self.last_db1 = eta*self.linear1.db
#       --L2--        
        self.linear2.W = self.linear2.W - eta*self.linear2.dW + alpha*self.last_dW2
        self.linear2.b = self.linear2.b - eta*self.linear2.db + alpha*self.last_db2
        self.last_dW2 = eta*self.linear2.dW
        self.last_db2 = eta*self.linear2.db
class ThreeLayer:
    def __init__(self,m,n,o,p):
        self.linear1, self.linear2, self.linear3 = Linear(m, n), Linear(n, o), Linear(o, p)
        self.act1, self.act2, self.act3=ReLU(), Tanh(), Sigmoid()
        self.loss = Loss()
        self.last_dW1, self.last_dW2, self.last_dW3 = 0.0, 0.0, 0.0
        self.last_db1, self.last_db2, self.last_db3 = 0.0, 0.0, 0.0

    def forward(self, x):
#       -- L1 -----------------------
        x = self.linear1.forward(x)
        x = self.act1.forward(x)
#       -- L2 -----------------------
        x = self.linear2.forward(x)
        x = self.act2.forward(x)
#       -- output -----------------------
        x = self.linear3.forward(x)
        self.ybar = self.act3.forward(x)

        return self.ybar
    
    
    def backward(self, y):
#       --output--        
        self.L = self.loss.forward(y, self.ybar)
        g = self.loss.backward(1)
#       --L3--
        g = self.act3.backward(g)
        g = self.linear3.backward(g)
#       --L2--
        g = self.act2.backward(g)
        g = self.linear2.backward(g)
#       --input--
        g = self.act1.backward(g)
        g = self.linear1.backward(g)


    
    def update(self, eta, alpha):
#       --L1--        
        self.linear1.W = self.linear1.W - eta*self.linear1.dW + alpha*self.last_dW1
        self.linear1.b = self.linear1.b - eta*self.linear1.db + alpha*self.last_db1
        self.last_dW1 = eta*self.linear1.dW
        self.last_db1 = eta*self.linear1.db  
#       --L2--        
        self.linear2.W = self.linear2.W - eta*self.linear2.dW + alpha*self.last_dW2
        self.linear2.b = self.linear2.b - eta*self.linear2.db + alpha*self.last_db2
        self.last_dW2 = eta*self.linear2.dW
        self.last_db2 = eta*self.linear2.db  
#       --L3--        
        self.linear3.W = self.linear3.W - eta*self.linear3.dW + alpha*self.last_dW3
        self.linear3.b = self.linear3.b - eta*self.linear3.db + alpha*self.last_db3
        self.last_dW3 = eta*self.linear3.dW
        self.last_db3 = eta*self.linear3.db  
        

訓練神經網路

模型初始化為2層,分別為tanhsigmoid
接著讓模型以eta = 0.003、alpha = 0.001的參數,訓練15,000遍測試資料。
#initialize the model

max_epochs, chk_epochs = 15000, 1000
last_dW, last_db = 0.0, 0.0
eta, alpha = 0.003, 0.001

loss_min = []
epoch_min = []


while(keepRetraining >= minLoss):
    loss = []
    epoch = []
    model = TwoLayer(8, 10, 1)
    for e in range(max_epochs):
        model.forward(input_list)
        model.backward(label)
        model.update(eta, alpha)
        loss.append(model.L)
        epoch.append(e)
        if (e+1)%chk_epochs==0:
            print(model.ybar.T)
            print('Epoch %3d: loss=%.6f'%(e+1, model.L))
    if(model.L < minLoss):
        minLoss = model.L
        loss_min = loss
        epoch_min = epoch
        minOutput = model.ybar.T
        

儲存最小的loss以及256個輸出


fp = open("D:\\Justin\'s_University\\大四上\\深度學習\\HW1\\output\\"+str(minLoss)+".txt", "a")
fp.write(str(minLoss))
fp.close()
fp = open("D:\\Justin\'s_University\\大四上\\深度學習\\HW1\\output\\"+str(minLoss)+"_output.txt", "a")
fp.write(str(minOutput))
fp.close()

繪出訓練誤差

使用matplotlib函示庫畫出訓練誤差
plt.style.use("ggplot") plt.figure() plt.plot(epoch_min, loss_min, label = "Training_Error") plt.xlabel("Epochs") plt.ylabel("Training Error") plt.legend() picname = str(minLoss) plt.savefig("D:\\Justin\'s_University\\大四上\\深度學習\\HW1\\output\\"+picname+".jpg") print(str(minLoss)+".png is saved") plt.show()

顯示訓練後的結果

2 layers

max_epochs, chk_epochs = 15000, 1000
last_dW, last_db = 0.0, 0.0
eta, alpha = 0.003, 0.001
Tanh(10)->Sigmoid(1)


Loss = 1.3197356837354326
2-Layers output Table


3 layers

max_epochs, chk_epochs = 15000, 1000
last_dW, last_db = 0.0, 0.0
eta, alpha = 0.003, 0.001
ReLU(30)->Tanh(4)->Sigmoid(1)


Loss = 1.0003163010187124
3-Layers output Table


4 layers

max_epochs, chk_epochs = 15000, 1000
last_dW, last_db = 0.0, 0.0
eta, alpha = 0.003, 0.001
ReLU(128)->Tanh(53)->Sigmoid(28)->Sigmoid(1)


Loss = 0.03333781335491803
4-Layers output Table

max_epochs, chk_epochs = 25000, 1000
last_dW, last_db = 0.0, 0.0
eta, alpha = 0.003, 0.001
ReLU(128)->Tanh(53)->Sigmoid(28)->Sigmoid(1)


Loss = 0.018032712342026344
4-Layers output Table


5 layers

max_epochs, chk_epochs = 15000, 1000
last_dW, last_db = 0.0, 0.0
eta, alpha = 0.003, 0.001

ReLU(128)->Tanh(64)->Sigmoid(32)->ReLU(16)->Sigmoid(1)


Loss = 0.0025766262860609504
5-Layers output Table