在import torch之後,再做pyplot的畫圖,可能會導致kernel dead,解決方法如下
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import torch
from torch import nn #可以呼叫模型、損失函數....
from torch import optim #引入優化器具
import torch.nn.functional as F #可以呼叫模型、損失函數....
from torch import TensorDataset #用來打包數據(資料, 標籤)
from torch import DataLoader #可以將它用在生成批次、隨機的訓練資料
在做資料轉換時,建議都要指定變數型別,不然可能會報錯
將某個data轉為tensor,並指定變數型別(此為浮點數)
tensor_data = torch.tensor(data, dtype=torch.float32)
可以做到變數型態轉型、cpu和cuda的移動
tensor_float32 = tensor_data.to(dtype=torch.float32)#轉換成浮點數
tensor_cuda = tensor_float32.to(device='cuda')#轉換到GPU
tensor_cpu = tensor_cuda.to(device='cpu')#轉換到CPU
參考
直接將變數強制傳形成tensor的某個變數型別
float_data = torch.FloatTensor(data)
float_data_cuda = torch.cuda.FloatTensor(data)#強制移到GPU並轉成浮點數
將tensor在CPU、GPU之間移動
tensor_cuda = tensor_float32.cuda()#轉換到GPU
tensor_cpu = tensor_cuda.cpu()#轉換到CPU
numpy_array = torch_tensor.numpy()
將numpy_array轉成torch_array
(注意,轉換之後的torch array共用numpy array的記憶體)
# 增加張量維度
a = torch.ones((2, 2, 3))
g = torch.unsqueeze(a, dim=1) #增加維度在1的位置
h = torch.squeeze(g) #壓縮為度為1的位置
print(g.shape, h.shape)#(2, 1, 2, 3), (2, 2, 3)
random_indice = torch.randperm(2 * len(x))
train_data = train_data[random_indice]
train_label = train_label[random_indice]
indice = torch.randperm(len(mnist))
train_sampler = RandomSampler(indice[:int(0.9 * len(indice))])
val_sampler = RandomSampler(indice[int(0.9 * len(indice)):int(0.95 * len(indice))])
test_sampler = RandomSampler(indice[int(0.95 * len(indice)):])
train_dataloader = DataLoader(mnist, batch_size=batch, sampler=train_sampler)
val_dataloader = DataLoader(mnist, batch_size=batch, sampler=val_sampler)
test_dataloader = DataLoader(mnist, batch_size=batch, sampler=test_sampler)
對input的某個維度求和,並回傳結果
如果使用True,求和的維度就不會被squeeze
找到前k大或前k小的數據其對應的index
要找到前k個數據
指定在哪個維度尋找
指定為True就按照大到小排序,False反之
返回的結果是否按照順序回傳
tensor1裡面放value
tensor2裡面放index
在dim = 1上操作
可以取得tensor2所對應tensor1上的value值
EX:
inputs = torch.Tensor([[1, 2], [3, 4], [5, 6]])
index = torch.LongTensor([[1], [0], [1]])
output = torch.gather(inputs, 1, index)
print(output)
"""
tensor([[2.],
[3.],
[6.]])
"""
train_data = torch.cat((x, fake_mnist))
a = torch.tensor([[1,2,3], [1,2,3], [1,2,3], [1,2,3]])
b = torch.tensor([1, 2, 3, 4])
c = torch.tensor([1, 2, 3, 4])
train_data = TensorDataset(a, b, c)
for t in train_data:
print(t)
"""
(tensor([1, 2, 3]), tensor(1), tensor(1))
(tensor([1, 2, 3]), tensor(2), tensor(2))
(tensor([1, 2, 3]), tensor(3), tensor(3))
(tensor([1, 2, 3]), tensor(4), tensor(4))
"""
train_loader = DataLoader(train_data, shuffle=False, batch_size=2)
counter = 0
for x, y, z in train_loader:
counter += 1
print(f"=========================\nbatch {counter}:")
print(x)
print(y)
print(z)
"""
=========================
batch 1:
tensor([[1, 2, 3],
[1, 2, 3]])
tensor([1, 2])
tensor([1, 2])
=========================
batch 2:
tensor([[1, 2, 3],
[1, 2, 3]])
tensor([3, 4])
tensor([3, 4])
"""
將已經random過順序的list轉成sampler
indices = list(range(len(dataset)))#取得全資料長度
np.random.shuffle(indices) #隨機
sample = SubsetRandomSampler(indices)
train_data_loader = DataLoader(dataset, batch_size=32,
sampler=sample)
model = nn.Sequential(nn.Linear(784, 128),
nn.ReLU(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, 10),
nn.LogSoftmax(dim = 1)
)
class NetWork(nn.Module):
def __init__(self, input_, h1, output_):
super().__init__()#這個很重要
#初始化一些東西,如:linear、drop
self.fc1 = nn.Linear(input_, h1)
self.fc2 = nn.Linear(h1, output_)
self.drop = nn.Dropout(0.5)
def forward(self, x, batch):
# 確保符合形狀,視情況更動
x = x.view(batch, -1)
#把x帶入進行計算
x = F.relu(self.fc1(x))
x = self.drop(x)
x = self.fc2(x)
x = F.softmax(x, dim=1)
return x
self.f1 = nn.Linear(in_features=10, out_features=5, weight=custom_weight, bias=custom_bias)
x = self.f1(x)#形狀必須匹配
print(self.f1.weight)
print(self.f1.bias)
x = F.relu(x)
relu_layer = nn.ReLU()
x = relu_layer(x)
使用批次優化時,dim=1
output = F.softmax(x, dim=1)#搭配CrossEntropyLoss使用
output = F.log_softmax(x, dim=1)#搭配NLLLoss使用
drop = nn.Dropout(0.5)
x = drop(x)
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
要修改的模型參數,這個一定要有
學習率
adam的係數,我不知道它在幹嘛qq
避免分母為0,會添加到方差的分母上
正規化
改成True會用新的更新規則,有時候會有助於穩定優化過程
optim.SGD(model.parameters(), lr = 0.003)#能固定學習率
optim.RMSprop(model.parameters(), lr=0.01)
optim.Adagrad(model.parameters(), lr=0.01)
loss_function = nn.loss_function()
loss = loss_function(model(input_data),labels)
crossEntropy加負號和把對應到正確答案的loss加總的步驟
要搭配LogSoftmax()
顧名思義,crossEntropy
但pytorch很奇怪
你用這個除了做crossEntropy還幫你做softmax
二元CrossEntropy
均方誤差
model = NetWork()
loss_function = nn.CrossEntropyLoss()
optimizer = optimizer = optim.Adam(model.parameters(), lr=0.001)
use_gpu = torch.cuda.is_available()
if (use_gpu):
model.to(dtype=torch.float32, device='cuda')
loss_function.to(device='cuda')
for i in range(epoch):
model.train()
train_loss = 0
valid_loss = 0
for j in range(len(batch_train_x)):
optimizer.zero_grad()#重設gradient
x = batch_train_x[j]
y = batch_train_y[j]
if (use_gpu):
x = x.to(dtype=torch.float32, device='cuda')
y = y.to(dtype=torch.float32, device='cuda')
prediction = model(x)
loss = loss_function(prediction, y)
loss.backward()#往回走取微分
optimizer.step()#對模型進行修改
train_loss += loss.item()
要使用GPU優化前,先做確認
torch.cuda.is_available()
GPU占用不會自己清理,如果能把不必要的清掉能減少許多GPU ram的浪費
torch.cuda.empty_cache()
loss = loss_function(model(x), label)
loss_num = loss.item()
optimizer.zero_grad()#重設gradient
loss.backward()#往回走取微分
optimizer.step()#對模型進行修改
backward這個函數會取得產生loss的過程中所參與到的所有tensor的微分,optimizer.step()則會利用過程中算出的微分去修改tensor
啟用dropout
model.train()
model.eval()
with no_grad():
for j in range(len(batch_train_x)):
x = batch_train_x[j]
y = batch_train_y[j]
if (use_gpu):
x = x.to(dtype=torch.float32, device='cuda')
y = y.to(dtype=torch.float32, device='cuda')
prediction = model(x)
loss = loss_function(prediction, y)
valid_loss += loss.item()
關閉梯度計算能減少負擔(測試的時候不用算梯度),增加運算速度
with torch.no_grad():
#下面的部分會處於不計算梯度的模式
關閉dropout
model.eval() # 將模型切換為評估模式
torch.save(model ,"model.pth" )
model = torch.load("model.pth")
torch.save(model.state_dict() ,"checkpoint.pth" )
訓練過後,模型的各種參數
check_point = torch.load("checkpoint.pth")
model = NetWork()#要跟讀取的模型同個結構
model.load_state_dict(check_point)
pytorch本身有提供許多圖像辨識的pre_train模型
提供很多開源的大型模型
教學
使用Densenet121這個已經訓練好的模型
model = models.densenet121(pretrained=True)
model = models.vgg16(pretrained=True)
print(model)
"""
VGG (
(features): Sequential (
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU (inplace)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU (inplace)
(4): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
接下來還是一堆的捲積、池化、Relu
)
(classifier): Sequential (
(0): Linear (25088 -> 4096)
(1): ReLU (inplace)
(2): Dropout (p = 0.5)
(3): Linear (4096 -> 4096)
(4): ReLU (inplace)
(5): Dropout (p = 0.5)
(6): Linear (4096 -> 1000)
)
)
"""
這個方法的問題,是有可能因為pretrain的模型太大導致GPU ram塞不下
model.classifier = torch.nn.Sequential(torch.nn.Linear(25088, 4096),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(4096,4096),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(4096, 2))
feature_extractor = torch.nn.Sequential(*list(pretrained_model.children())[:5])#提取pretrained_model.children前五層
features = feature_extractor(input_data)#提取特徵
"""
略過很多步驟,包含丟給GPU
"""
output = model(features)