# 7.9.2~7.10.1
## 拆分權種
為了分辨計算,將權重及偏置向拆成兩項\
例如:$h^{'}$= $tanh(W_{ih} + b_{ih} + W_{hh}h + b_{hh})$
## 多層循環神經網路
:::info
第一層隱含層接收資料,產生隱狀態$H^{(1)}$,這個隱狀態又繼續作為第二個隱含層的輸入... 而最後一層可以做為整個網路的輸出層,也可以繼續接一個/多個非循環神經網路
:::
簡單來說可以表示成
$H_t = f_1(H^{l-1}_t ,H^{l}_{t-1})$
### RNNBase
```python=
from Layers import *
class RNNBase(Layer):
def __init__(self, mode, input_size, hidden_size, n_layers, bias=True):
super(RNBase, self).__init__()
self.mode = mode
if mode == 'RNN_TANH':
self.cells = [RNNCell(input_size, hidden_size, bias, nonlinearity="tanh")]
self.cells += [RNNCell(hidden_size, hidden_size, bias, nonlinearity="tanh") for _ in range(n_layers-1)]
elif mode == 'RNN_RELU':
self.cells = [RNNCell(input_size, hidden_size, bias, nonlinearity="relu")]
self.cells += [RNNCell(hidden_size, hidden_size, bias, nonlinearity="relu") for _ in range(n_layers-1)]
elif mode == 'LSTM':
self.cells = [LSTMCell(input_size, hidden_size, bias)]
self.cells += [LSTMCell(hidden_size, hidden_size, bias) for _ in range(n_layers-1)]
elif mode == 'GRU':
self.cells = [GRUCell(input_size, hidden_size, bias)]
self.cells += [GRUCell(hidden_size, hidden_size, bias) for _ in range(n_layers-1)]
self.input_size, self.hidden_size = input_size, hidden_size
self.n_layers = n_layers
self.flatten_parameters()
self.params = None
def flatten_parameters(self):
self.params = []
self.grads = []
for i in range(self.n_layers):
rnn = self.cells[i]
for j, p in enumerate(rnn.params):
self.params.append(p)
self.grads.append(rnn.grads[j])
def forward(self, x, h=None):
seq_len, batch_size = x.shape[0], x.shape[1]
n_layers = self.n_layers
mode = self.mode
hs = [np.zeros((batch_size, self.hidden_size)) for _ in range(n_layers)]
zs = [[] for _ in range(n_layers)]
if h is None:
h = self.init_hidden(batch_size)
if mode == 'LSTM':
self.h = (h[0].copy(), h[1].copy())
else:
self.h = h.copy()
for i in range(n_layers):
cell = self.cells[i]
if i != 0:
x = hs[i-1] # output h of the previous layer
if mode == 'LSTM':
x = np.array([h for h, c in x])
hi = (h[i], c[i])
else:
hi = hs[i]
for t in range(seq_len):
hi = cell(x[t], hi)
if isinstance(hi, tuple):
hi, z = hi[0], hi[1]
zs[i].append(z)
hs[i][t] = hi
if mode == 'LSTM' or mode == 'GRU':
zs[i].append(z)
self.hs = np.array(hs) # (layer size, seq size, batch size, hidden size)
if len(zs[0]) > 0:
self.zs = np.array(zs)
else:
self.zs = None
output = hs[-1] # containing the output features ('h t') from the last layer of the RNN
if mode == 'LSTM':
output = [h for h, c in output]
hn = self.hs[:, -1, :, :] # containing the hidden state for 't = seq_len'
return np.array(output), hn
def call(self, x, h=None):
return self.forward(x, h)
def init_hidden(self, batch_size):
zeros = np.zeros((self.n_layers, batch_size, self.hidden_size))
if self.mode == 'LSTM':
self.h = (zeros.copy(), zeros.copy())
else:
self.h = zeros.copy()
return self.h
def backward(self, dhs, input):
if self.hs is None:
self.hs, _ = self.forward(input)
hs = self.hs
s = self.zs if self.zs is not None else hs
seq_len, batch_size = input.shape[0], input.shape[1]
dinput = [None for _ in range(seq_len)]
if len(dhs.shape) == 2: # dh at the last time (batch, hidden)
dhs = [np.zeros_like(dhs) for _ in range(seq_len)]
dhs[-1] = dhs
dhs = np.array(dhs)
elif dhs.shape[0] != seq_len:
raise RuntimeError("dhs has inconsistent seq_len: got {}, expected {}".format(
dhs.shape[0], seq_len))
else:
pass
dhidden = [None for _ in range(self.n_layers)]
for layer in reversed(range(self.n_layers)):
layer_hs = hs[layer]
layer_zs = s[layer]
cell = self.cells[layer]
if layer == 0:
layer_input = input
else:
if self.mode == 'LSTM':
layer_input = self.hs[layer - 1][0]
else:
layer_input = self.hs[layer - 1]
h_0 = self.h[layer]
dh = np.zeros_like(dhs[0])
if self.mode == 'LSTM':
h_0 = (self.h[0][layer], self.h[1][layer])
dc = np.zeros_like(dhs[0])
for t in reversed(range(seq_len)):
dh += dhs[t]
h_pre = h_0 if t == 0 else layer_hs[t - 1]
if self.mode == 'LSTM':
dhe = (dh, dc)
dx, dhe = cell.backward(dhe, layer_zs[t], layer_input[t], h_pre)
dh, dc = dhe
else:
dx, dh, _ = cell.backward(dh, layer_zs[t], layer_input[t], h_pre)
if layer > 0:
dhs[t] = dx
else:
dinput[t] = dx
if self.mode == 'LSTM':
dhidden[layer] = dhe
else:
dhidden[layer] = dh
return np.array(dinput), np.array(dhidden)
def parameters(self):
if self.params is None:
self.params = []
for i in range(len(self.params)):
self.params.append([self.params[i], self.grads[i]])
return self.params
```
```python=
class RNN(RNNBase):
def __init__(self, *args, **kwargs):
if 'nonlinearity' in kwargs:
if kwargs['nonlinearity'] == 'tanh':
mode = 'RNN_TANH'
elif kwargs['nonlinearity'] == 'relu':
mode = 'RNN_RELU'
else:
raise ValueError("Unknown nonlinearity '{}'".format(kwargs['nonlinearity']))
del kwargs['nonlinearity']
else:
mode = 'RNN_TANH'
super(RNN, self).__init__(mode, *args, **kwargs)
class LSTM(RNNBase):
def __init__(self, *args, **kwargs):
super(LSTM, self).__init__('LSTM', *args, **kwargs)
class GRU(RNNBase):
def __init__(self, *args, **kwargs):
super(GRU, self).__init__('GRU', *args, **kwargs)
```
問題:
* 這些處理方式可以應用在哪些方面?