# 深度學習與光電應用 HW2
contributed by <[`tintinjian12999`](https://github.com/tintinjian12999)>
## 1. (100 %) 以Matlab code或Python code 建立全連接層神經網路(fullconntected neural network) 對 MNIST手寫數字圖形資料庫進行辨識 (以training dataset訓練,對test dataset進行辨識),計算正確率、印出前25筆預測錯誤的圖形、預測結果與標籤,並列出可學習參數的總數 (Matlab與Python程式都不需要使用validation set,60000筆training dataset全用來訓練)。
### (a) 重新執行課程所附程式
```python
import os
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import MNIST
import matplotlib.pyplot as plt
```
導入會用到的模組。
```python
PATH_DATASETS = "" # 預設路徑
BATCH_SIZE = 512 # 批量
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
```
設定檔案存的路徑、每次讀進的資料量、設定要用CPU或GPU做運算(cuda 為 GPU)。
```python
# 下載 MNIST 手寫阿拉伯數字 訓練資料
train_ds = MNIST(PATH_DATASETS, train=True, download=True,
transform=transforms.ToTensor())
# 下載測試資料
test_ds = MNIST(PATH_DATASETS, train=False, download=True,
transform=transforms.ToTensor())
# 訓練/測試資料的維度
print(train_ds.data.shape, test_ds.data.shape)
```
下載測試資料與訓練資料。
```python
# 訓練資料前10筆圖片的數字
print(train_ds.targets[:9])
fig = plt.figure(figsize = (8,9))
for i in range(9):
plt.subplot(3,3,i + 1) #做一个3*3的图 range(9)从0开始,因需要从1开始,所以i+1
plt.imshow(train_ds.data[i].numpy(),cmap='gray')
plt.axis('off')
plt.title('%i' % train_ds.targets[i])
plt.show()
```

稍微 show 一下前十張訓練資料圖片。(這裡可以看到 train_ds 或 test_ds 有分為 data 和 target,分別代表圖片與該圖片所對應的值)
```python
print(test_ds.targets[:9])
fig = plt.figure(figsize = (8,9))
for i in range(9):
plt.subplot(3,3,i + 1) #做一个3*3的图 range(9)从0开始,因需要从1开始,所以i+1
plt.imshow(test_ds.data[i].numpy(),cmap='gray')
plt.axis('off')
plt.title('%i' % test_ds.targets[i])
plt.show()
```

對測試資料做一樣的事情
```python
model = torch.nn.Sequential(
torch.nn.Flatten(),
torch.nn.Linear(28 * 28, 256),
torch.nn.Dropout(0.2),
torch.nn.Linear(256, 10),
# 使用nn.CrossEntropyLoss()時,不需要將輸出經過softmax層,否則計算的損失會有誤
# torch.nn.Softmax(dim=1)
).to(device)
print(model)
```
定義這次訓練使用的模型,先將圖片平坦化展開成一維的 tensor,接著經過兩個線性層,中間的 dropout 會隨機抽 20% 的節點使其權重為 0, 目的是為了避免 overfitting,最後的 to(device) 是要讓 model 使用我們指定的裝置執行。
```python
epochs = 5
lr=0.1
# 建立 DataLoader
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE)
# 設定優化器(optimizer)
# optimizer = torch.optim.Adam(model.parameters(), lr=lr)
optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
model.train()
loss_list = []
for epoch in range(1, epochs + 1):
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
# if batch_idx == 0 and epoch == 1: print(data[0])
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
loss_list.append(loss.item())
batch = batch_idx * len(data)
data_count = len(train_loader.dataset)
percentage = (100. * batch_idx / len(train_loader))
print(f'Epoch {epoch}: [{batch:5d} / {data_count}] ({percentage:.0f} %)' +
f' Loss: {loss.item():.6f}')
plt.plot(loss_list, 'r')
```
訓練模型,每次訓練用 60000 筆數據(全部訓練資料集的數據)進行訓練,重複訓練五次,最後將 Loss 對訓練次數的關係畫為折線圖(這裡在單次訓練裡跑十筆資料存一次)

> 注意這段程式碼不能直接重複執行,因為執行過一次後該 model 已經被訓練過了,若要重跑的畫需要連上方定義 model 的部分都跑一次。
```python
# 建立 DataLoader
test_loader = DataLoader(test_ds, shuffle=False, batch_size=BATCH_SIZE)
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
# sum up batch loss
test_loss += criterion(output, target).item()
# 預測
pred = output.argmax(dim=1, keepdim=True)
# 正確筆數
correct += pred.eq(target.view_as(pred)).sum().item()
# 平均損失
test_loss /= len(test_loader.dataset)
# 顯示測試結果
batch = batch_idx * len(data)
data_count = len(test_loader.dataset)
percentage = 100. * correct / data_count
print(f'平均損失: {test_loss:.4f}, 準確率: {correct}/{data_count}' +
f' ({percentage:.0f}%)\n')
```
平均損失: 0.0007, 準確率: 9080/10000 (91%)
```
用訓練完的模型去跑測試資料集並評估準確率。
```python
# 實際預測 20 筆資料
predictions = []
with torch.no_grad():
for i in range(20):
data, target = test_ds[i][0], test_ds[i][1]
data = data.reshape(1, *data.shape).to(device)
output = torch.argmax(model(data), axis=-1)
predictions.append(str(output.item()))
# 比對
print('actual :', test_ds.targets[0:20].numpy())
print('prediction: ', ' '.join(predictions[0:20]))
```
```
actual : [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
prediction: 7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4
```
同上,這次是將預測結果顯示出來
```python
# 顯示第 10 筆的機率
import numpy as np
i=9
data = test_ds[i][0]
data = data.reshape(1, *data.shape).to(device)
#print(data.shape)
predictions = torch.softmax(model(data), dim=1)
print(f'0~9預測機率: {np.around(predictions.cpu().detach().numpy(), 2)}')
print(f'0~9預測機率: {np.argmax(predictions.cpu().detach().numpy(), axis=-1)}')
```
```
0~9預測機率: [[0. 0. 0. 0. 0.02 0. 0. 0.06 0. 0.92]]
0~9預測機率: [9]
```
用模型來預測特定資料。
```python
predictions = []
count = 0
i = 0
wrong_predicted_data = []
wrong_predicted_target = []
wrong_predicted_img = []
with torch.no_grad():
while count <= 25:
data, targets = test_ds[i][0], test_ds[i][1]
data1 = data.reshape(1, *data.shape).to(device)
output = torch.argmax(model(data1), axis=-1)
if output.item() != int(targets):
wrong_predicted_data.append(int(output.item()))
wrong_predicted_target.append(int(targets))
wrong_predicted_img.append(test_ds.data[i].numpy())
count = count + 1
i = i + 1
```
將錯誤的前25筆存下來
```python
print(wrong_predicted_target)
fig = plt.figure(figsize = (20,9))
for i in range(25):
plt.subplot(5,5,i + 1) #做一个3*3的图 range(9)从0开始,因需要从1开始,所以i+1
plt.imshow(wrong_predicted_img[i],cmap='gray')
plt.axis('off')
plt.title('Target:%i Guess:%i' % (wrong_predicted_target[i], wrong_predicted_data[i]))
plt.show()
```

印出前25筆預測錯誤的資料
```python
def count_params(model):
return sum([np.prod(layer.size()) for layer in model.parameters() if layer.requires_grad])
print('Model {}, Number of parameters {}'.format(model, count_params(model)))
```
```
Model Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=256, bias=True)
(2): Dropout(p=0.2, inplace=False)
(3): Linear(in_features=256, out_features=10, bias=True)
), Number of parameters 203530
```
印出可學習參數的總數。
### (b) 比較在中間層後分別加入relu層與tanh層但取消dropout層的預測效果
#### ReLU
```python
model = torch.nn.Sequential(
torch.nn.Flatten(),
torch.nn.Linear(28 * 28, 256),
torch.nn.ReLU(),
torch.nn.Linear(256, 10),
# 使用nn.CrossEntropyLoss()時,不需要將輸出經過softmax層,否則計算的損失會有誤
# torch.nn.Softmax(dim=1)
).to(device)
print(model)
```
得到的 Loss 如下

與原始的 Loss 差不多
```
平均損失: 0.0006, 準確率: 9096/10000 (91%)
0~9預測機率: [[0. 0. 0. 0. 0.02 0. 0. 0.06 0. 0.91]]
0~9預測機率: [9]
```
準確率同樣為91%,且也有預測出正確的結果。

```
Model Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=256, bias=True)
(2): ReLU()
(3): Linear(in_features=256, out_features=10, bias=True)
), Number of parameters 203530
```
#### Tanh
```python
model = torch.nn.Sequential(
torch.nn.Flatten(),
torch.nn.Linear(28 * 28, 256),
torch.nn.Tanh(),
torch.nn.Linear(256, 10),
# 使用nn.CrossEntropyLoss()時,不需要將輸出經過softmax層,否則計算的損失會有誤
# torch.nn.Softmax(dim=1)
).to(device)
print(model)
```

```
平均損失: 0.0007, 準確率: 9103/10000 (91%)
0~9預測機率: [[0. 0. 0. 0. 0.02 0. 0. 0.08 0. 0.9 ]]
0~9預測機率: [9]
```

```
Model Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=256, bias=True)
(2): Tanh()
(3): Linear(in_features=256, out_features=10, bias=True)
), Number of parameters 203530
```
與前者基本一樣。
### ( c ) 比較 dropout比例 (0.1, 0.2, 0.3, 0.5) 的預測效果
#### 0.1

```
平均損失: 0.0007, 準確率: 9087/10000 (91%)
0~9預測機率: [[0. 0. 0. 0. 0.02 0. 0. 0.06 0. 0.91]]
0~9預測機率: [9]
```

```
Model Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=256, bias=True)
(2): Dropout(p=0.1, inplace=False)
(3): Linear(in_features=256, out_features=10, bias=True)
), Number of parameters 203530
```
#### 0.2
如前所述
#### 0.3

```
平均損失: 0.0007, 準確率: 9082/10000 (91%)
0~9預測機率: [[0. 0. 0. 0. 0.03 0. 0. 0.07 0. 0.9 ]]
0~9預測機率: [9]
```

```
Model Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=256, bias=True)
(2): Dropout(p=0.3, inplace=False)
(3): Linear(in_features=256, out_features=10, bias=True)
), Number of parameters 203530
```
#### 0.5

```
平均損失: 0.0007, 準確率: 9082/10000 (91%)
0~9預測機率: [[0. 0. 0. 0. 0.03 0. 0. 0.07 0. 0.9 ]]
0~9預測機率: [9]
```

```
Model Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=256, bias=True)
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=256, out_features=10, bias=True)
), Number of parameters 203530
```
### (d) 使用兩層中間層(中間層後都加入dropout(0.2),共三層神經元,64-64-10, 128-64-10),比較預測效果
#### 64-64-10
```python
model = torch.nn.Sequential(
torch.nn.Flatten(),
torch.nn.Linear(28 * 28, 64),
torch.nn.Dropout(0.2),
torch.nn.Linear(64, 64),
torch.nn.Dropout(0.2),
torch.nn.Linear(64, 10),
# 使用nn.CrossEntropyLoss()時,不需要將輸出經過softmax層,否則計算的損失會有誤
# torch.nn.Softmax(dim=1)
).to(device)
print(model)
```

```
平均損失: 0.0007, 準確率: 9054/10000 (91%)
0~9預測機率: [[0. 0. 0. 0. 0.03 0. 0. 0.07 0. 0.89]]
0~9預測機率: [9]
```

```
Model Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=64, bias=True)
(2): Dropout(p=0.2, inplace=False)
(3): Linear(in_features=64, out_features=64, bias=True)
(4): Dropout(p=0.2, inplace=False)
(5): Linear(in_features=64, out_features=10, bias=True)
), Number of parameters 55050
```
#### 128-64-10
```python
model = torch.nn.Sequential(
torch.nn.Flatten(),
torch.nn.Linear(28 * 28, 128),
torch.nn.Dropout(0.2),
torch.nn.Linear(128, 64),
torch.nn.Dropout(0.2),
torch.nn.Linear(64, 10),
# 使用nn.CrossEntropyLoss()時,不需要將輸出經過softmax層,否則計算的損失會有誤
# torch.nn.Softmax(dim=1)
).to(device)
print(model)
```

```.
平均損失: 0.0007, 準確率: 9077/10000 (91%)
0~9預測機率: [[0. 0. 0. 0. 0.02 0. 0. 0.09 0. 0.88]]
0~9預測機率: [9]
```

```
Model Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=128, bias=True)
(2): Dropout(p=0.2, inplace=False)
(3): Linear(in_features=128, out_features=64, bias=True)
(4): Dropout(p=0.2, inplace=False)
(5): Linear(in_features=64, out_features=10, bias=True)
), Number of parameters 109386
```
基本上除了可訓練參數的不同都與一開始的結果大同小異。