Pytorch 自學筆記

# 環境配置安裝 pytorch 2.1.2 window 版本 > pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 # Quick Start ## 引入函式庫 ```python import torch # 引入 PyTorch 函式庫 from torch import nn # 引入跟神經網路有關的函式庫 from torch.utils.data import DataLoader # 引入跟處理資料集有關的函式庫 from torchvision import datasets # 引入資料集函式庫 from torchvision.transforms import ToTensor # 引入轉換資料格式的函式庫 import matplotlib.pyplot as plt # 引入繪圖函式庫 ``` ## 下載內建的 MINST 資料集 ```python # 引入 MINST 資料集 sample，training_data 是 FashionMNIST 型態的物件 training_data = datasets.FashionMNIST( root="data", train=True, # 決定是訓練資料集 download=True, transform=ToTensor(), ) # 引入 MINST 資料集 label，test_data 是 FashionMNIST 型態的物件 test_data = datasets.FashionMNIST( root="data", train=False, # 決定是測試資料集 download=True, transform=ToTensor(), ) ``` ## 使用 DataLoader 打包資料 * DataLoader 可以幫我們打包 dataset，以更有效的載入和管理 * 功能包含了 batch 處理、打亂資料集等等 * 傳入的 dataset 須為 dataset 型態，同時可以指定 batch size (不指定的話預設為 1) ```python batch_size = 64 #在這邊設定 batch size # 創建 MINST 資料集 sample 的 DataLoader，train_dataloader 是 DataLoader 型態的物件 train_dataloader = DataLoader(training_data, batch_size=batch_size) # 創建 MINST 資料集 label 的 DataLoader，test_dataloader 是 DataLoader 型態的物件 test_dataloader = DataLoader(test_data, batch_size=batch_size) for X, y in test_dataloader: # 每次迭代中，X 是一個 batch 的 sample，y 是一個 batch 的 label print(f"Shape of X [batch size, channel , height, width]: {X.shape}") # 印出 sample 的維度 print(f"Shape of y: {y.shape}") # 印出 label 的維度 break for X, y in test_dataloader: plt.imshow(X[0][0],cmap="gray") # 展示第一個 batch 中的第一張圖片 plt.show() print("label: ",y[0]) # 印出第一個 batch 中的第一張圖片的 label break ``` ## 建立模型 * override basic model 中的 \__init__ 跟 forward 兩個 method 後即可使用 * 再疊加神經網路層的時候，更像是疊上權重及想要的 activation function，而不是像 tensorflow 一樣疊加神經元 ```python # 決定訓練用的硬體裝置 device = ( "cuda" # CUDA 是NVIDIA 研發的平行運算平台及編程模型，可利用繪圖處理單元(GPU) 的能力大幅提升運算效能 if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" ) print(f"Using {device} device") # 定義神經網路模型 # pytorch 的神經網路模型是繼承自 nn.Module 的，繼承並 override __init__ 和 forward 兩個 method 後即可使用 class NeuralNetwork(nn.Module): def __init__(self): # override __init__ method super().__init__() # 執行 parent class 的 constructor self.flatten = nn.Flatten() # 展平層，將資料展平成一維向量 self.linear_relu_stack = nn.Sequential( # 用 Sequential 的方式依序疊加神經網路層 nn.Linear(28*28, 512), # 定義一個全連接層，連結上一層的 28*28 個神經元跟下一層的 512 個神經元 nn.ReLU(), # 接上一個 ReLU activation function nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 10) ) def forward(self, x): # override forward method， x 代表模型的輸入資料 x = self.flatten(x) # 讓模型的輸入資料通過剛剛定義的 self.flatten logits = self.linear_relu_stack(x) # 讓模型的輸入資料通過剛剛定義的 self.linear_relu_stack return logits model = NeuralNetwork().to(device) # 創建 NeuralNetwork 的 instance，並將模型移動至指定的裝置上 print(model) ``` ## 定義訓練模型的函式 * 需要實作訓練模型時的各種細節 (計算 loss、反向傳播等) ```python train_loss_history = [] # 儲存每個 epoch 的訓練 loss train_accuracy_history = [] # 儲存每個 epoch 的訓練 accuracy # 定義訓練模型的函式 def train(dataloader, model, loss_fn, optimizer): # 需要的參數有訓練用的資料集, 模型, 採用的損失函數, 採用的 optimizer size = len(dataloader.dataset) # 用於顯示訓練時的資訊 num_batches = len(dataloader) # 用於顯示訓練時的資訊 model.train() # 將模型設為訓練模式，以通知模型接下來的操作會更新權重 train_loss, train_correct = 0, 0 for batch, (X, y) in enumerate(dataloader): # 遍歷訓練資料集。每次迭代中，X 是一個 batch 的 sample，y 是一個 batch 的 label，batch 是當前 batch 的 index X, y = X.to(device), y.to(device) # 將資料移動至指定的裝置上 # 計算預測錯誤 pred = model(X) # 將 sample 傳給模型，得到模型當前的預測結果 loss = loss_fn(pred, y) # 計算當前模型預測結果與正確答案間的 loss train_loss += loss.item() train_correct += (pred.argmax(1) == y).type(torch.float).sum().item() # 反向傳播 loss.backward() # 將 loss 傳給模型，計算出模型中各個參數的梯度，用於更新模型中的參數 optimizer.step() # 使用剛剛計算出的梯度與 learning rate，更新模型中的參數 optimizer.zero_grad() # 將模型中參數的梯度歸零，以避免梯度累加 if batch % 100 == 0: # 每 100 個 batch 印出訓練資訊 loss, current = loss.item(), (batch + 1) * len(X) print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]") train_loss /= num_batches train_correct /= size train_loss_history.append(train_loss) train_accuracy_history.append(train_correct) ``` ## 定義測試模型的函式 * 需要實作測試模型時的各種細節 (計算 loss、計算預測正確的資料數量等) * 用於測試的資料集單純用來衡量模型，並不會影響到模型的權重 ```python test_loss_history = [] # 用於儲存每個 epochs 的測試 loss test_accuracy_history = [] # 用於儲存每個 epochs 的測試正確率 # 定義測試模型的函式 def test(dataloader, model, loss_fn): size = len(dataloader.dataset) # 用於顯示測試時的資訊 num_batches = len(dataloader) model.eval() # 將模型設為測試模式，以通知模型接下來的操作不會更新權重 test_loss, test_correct = 0, 0 # 初始化測試時的 loss 與正確率 with torch.no_grad(): # 在這區域中的程式碼，都不會計算梯度，以加速運算 for X, y in dataloader: X, y = X.to(device), y.to(device) pred = model(X) test_loss += loss_fn(pred, y).item() # 計算當前模型預測結果與正確答案間的 loss test_correct += (pred.argmax(1) == y).type(torch.float).sum().item() # 計算當前模型預測正確的答案數量 test_loss /= num_batches # 計算平均 loss test_correct /= size # 計算正確率 print(f"Test Error: \n Accuracy: {(100*test_correct):>0.1f}%, Avg loss: {test_loss:>8f} \n") test_loss_history.append(test_loss) # 將當前的 tet loss 加入 test_loss_history 中 test_accuracy_history.append(test_correct) # 將當前的 test 正確率加入 test_accuracy_history 中 ``` ## 開始執行訓練及測試 * 呼叫剛剛定義好，用來訓練及測試的函式 ```python epochs = 5 # 決定訓練的 epochs for t in range(epochs): # 遍歷 epochs print(f"Epoch {t+1}\n-------------------------------") train(train_dataloader, model, loss_fn, optimizer) # 使用訓練資料集訓練模型 test(test_dataloader, model, loss_fn) # 使用測試資料集測試模型 print("Done!") ``` ## 分析訓練期間的 loss ```python plt.plot(train_loss_history, label='Train loss') plt.plot(test_loss_history, label='Test loss') plt.xlabel('Epoch') plt.ylabel('loss') plt.legend() plt.show() ``` ## 分析訓練期間的 accuracy ```python plt.plot(train_accuracy_history, label='Train accuracy') plt.plot(test_accuracy_history, label='Test accuracy') plt.xlabel('Epoch') plt.ylabel('accuracy') plt.legend() plt.show() ``` ## 儲存模型 * pytorch 的模型其副檔名為 .pth ```python torch.save(model.state_dict(), "saved_model/course0_model.pth") # 將模型的參數序列化並儲存至指定路徑 print("Saved PyTorch Model Successfully!") ``` ## 載入模型 ```python model = NeuralNetwork().to(device) # 創建空的模型 model.load_state_dict(torch.load("saved_model/course0_model.pth")) # 從指定路徑載入模型的參數 ``` ## 使用模型預測資料 ```python model.eval() # 將模型設為測試模式，以通知模型接下來的操作不會更新權重 with torch.no_grad(): # 在這區域中的程式碼，都不會計算梯度，以加速運算 for X, y in test_dataloader: # 這邊拿測試資料集來讓模型預測 plt.imshow(X[0][0],cmap="gray") # 展示第一個 batch 中的第一張圖片 plt.show() X, y = X.to(device), y.to(device) pred = model(X) print("predict: ",pred.argmax(1)[0]) # 印出第一個 batch 中的第一張圖片的預測結果 print("answer: ",y[0]) # 印出第一個 batch 中的第一張圖片的正確答案 break ``` ## 分析混淆矩陣 ```python from sklearn.metrics import confusion_matrix import seaborn as sn import pandas as pd import numpy as np y_pred = [] y_true = [] # iterate over test data model.eval() # 將模型設為測試模式，以通知模型接下來的操作不會更新權重 with torch.no_grad(): # 在這區域中的程式碼，都不會計算梯度，以加速運算 for X, y in test_dataloader: # 這邊拿測試資料集來讓模型預測 X, y = X.to(device), y.to(device) pred = model(X) X = (torch.max(torch.exp(pred), 1)[1]).data.cpu().numpy() y_pred.extend(X) # Save Prediction y = y.data.cpu().numpy() y_true.extend(y) # Save Truth print("橫軸: 預測結果") print("縱軸: 正確答案") # Build confusion matrix cf_matrix = confusion_matrix(y_true, y_pred) df_cm = pd.DataFrame(cf_matrix / np.sum(cf_matrix, axis=1)[:, None]) plt.figure(figsize = (10,6)) sn.heatmap(df_cm, annot=True) ``` # Tensor * Tensor 的本質跟 numpy 的 mdarray 非常像，但多了一些深度學習特化功能 * 可以在 GPU 上執行 * 微分速度快 # Datasets & DataLoaders * 基本的使用方法在 Quick Start 中有所介紹 # Transforms * 提供了許多方法，可以對資料進行預處理 (翻轉、裁切等等) * 可以提升訓練資料集的數量 # Neural Network * Pytorch 中提供了許多神經網路種類，以下介紹其使用方式