# 7.4.5~7.4.7
## 7.4.5 梯度驗證
比較RNN loss grad與numarical gradient
``` python
def rnn_loss_grad(Fs, Ys, loss_fn=loss_gradient_softmax_crossentropy, flatten=True):
loss = 0
dFs = {}
for t in range(len(Fs)):
F = Fs[t]
Y = Ys[t]
if flatten and Y.ndim == 2:
Y = Y.flatten()
loss_t, dF_t = loss_fn(F, Y)
loss += loss_t
dFs[t] = dF_t
return loss, dFs
```
## 7.4.6 梯度下降訓練
使用梯度下降法進行訓練
``` python
class SGD():
def __init__(self,params,learning_rate=0.01,momentum=0.9) -> None:
self.params = params
self.lr = learning_rate
self.momentum = momentum
self.vs = []
for p in self.params:
v = np.zeros_like(p)
self.vs.append(v)
def step(self,grads):
for i in range(len(self.params)):
grad = grads[i]
self.vs[i] = self.momentum*self.vs[i]+self.lr*grad
self.params[i] -= self.lr*self.vs[i]
def scale_learning_rate(self,scale):
self.lr *= scale
```
## 7.4.7 序列資料取樣
分為
順序取樣
ex.[1,2,3],[2,3,4],[3,4,5]...
隨機取樣
ex.[1,10,20],[3,50,9]...
``` python
def data_iter_consecutive(data,batch_size,seq_len,start_range=10,to_3D = True):
start = np.random.randint(0,start_range)
block_len = (len(data)-start-1)//batch_size
Xs = data[start:start+block_len*batch_size]
Ys = data[start+1:start+block_len*batch_size+1]
Xs = Xs.reshape(batch_size,-1)
Ys = Ys.reshape(batch_size,-1)
reset = True
num_batches = Xs.shape[1]//seq_len
for i in range(0,num_batches*seq_len,seq_len):
X = Xs[:,i:(i+seq_len)]
Y = Ys[:,i:(i+seq_len)]
# if to_3D change batch_size x time_length to time_length x batch_size x features
if to_3D:
X = np.swapaxes(X,0,1)
X = X.reshape(X.shape[0],X.shape[1],-1)
Y = np.swapaxes(Y,0,1)
Y = Y.reshape(Y.shape[0],Y.shape[1],-1)
else:
X = np.swapaxes(X,0,1)
Y = np.swapaxes(Y,0,1)
if reset:
reset = False
yield X,Y,True
else:
yield X,Y,False
```
問題:
1.取樣的方式有哪兩種
2.順序取樣的特色是甚麼