8.8_昀甯 - HackMD

# 8.8 深度卷積對抗網路 DCGAN是一個使用深度卷積為主架構的GAN模型。它比普通的GAN更能提取圖片的特徵等細節，故生成能力會比使用普通全連接層 (Dense)的GAN要來的優秀。另外也因為CNN訓練參數比單純Dense層來的少一點，所以訓練速度基本上會比較快。 1. 取消所有pooling層。G網路中使用轉置卷積（transposed convolutional layer）進行上取樣，D網路中用加入stride的卷積代替pooling。 2. 在 D 和 G 中均使用 batch normalization 3. 去掉 FC 層，使網路變為全卷積網路 (Conv2D) 4. G網路中使用 ReLU 作為啟用函式，最後一層使用 tanh 5. D網路中使用 LeakyReLU 作為啟用函式 ![螢幕擷取畫面 2023-12-23 204642](https://hackmd.io/_uploads/HJcRLLEDp.png) ```python= import torch from torch import nn import numpy as np import torchvision.datasets as dset import torchvision.transforms as transforms import torch.optim as optim from torchvision.utils import save_image import matplotlib.pyplot as plt import matplotlib.animation as animation # 可以用多圖製作成 animation : So cool import torchvision.utils as vutils """ workflow: 1.下載mnist 2.資料前處理（正規化） 3.建構生成器跟鑑別器 4.先訓練鑑別器讓他具有一定的判識能力（凍結 G ） 5.再同時訓練 G，D 6.Predict 結果 """ class Discriminator(nn.Module): def __init__(self): super(Discriminator, self).__init__() self.main = nn.Sequential( # input is (1) x 28 x 28 nn.Conv2d(1, 64, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (64) x 14 x 14 nn.Conv2d(64, 128, 4, 2, 1, bias=False), nn.BatchNorm2d(128), nn.LeakyReLU(0.2, inplace=True), # state size. (128) x 7 x 7 nn.Conv2d(128, 256, 3, 2, 1, bias=False), nn.BatchNorm2d(256), nn.LeakyReLU(0.2, inplace=True), # state size. (256) x 4 x 4 nn.Conv2d(256, 1, 4, 1, 0, bias=False), # state size. (1) x 1 x 1 nn.Sigmoid() ) def forward(self, input): return self.main(input) class Generator(nn.Module): def __init__(self, z_dim): super(Generator, self).__init__() self.main = nn.Sequential( # input is Z, going into a convolution nn.ConvTranspose2d( z_dim, 256, 4, 1, 0, bias=False), nn.BatchNorm2d(256), nn.ReLU(True), # state size. (256) x 4 x 4 nn.ConvTranspose2d(256, 128, 4, 2, 1, bias=False), nn.BatchNorm2d(128), nn.ReLU(True), # state size. (128) x 8 x 8 nn.ConvTranspose2d( 128, 64, 4, 2, 1, bias=False), nn.BatchNorm2d(64), nn.ReLU(True), # state size. (64) x 16 x 16 nn.ConvTranspose2d( 64, 1, 4, 2, 3, bias=False), # state size. (64) x 28 x 28 nn.Tanh() ) def forward(self, input): return self.main(input) class main(): def __init__(self, batch_size=64, z_dim=100, epochs=100, lr=0.001): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # gpu device self.batch_size = batch_size self.z_dim = z_dim self.epochs = epochs self.lr = lr self.G = Generator(self.z_dim).to(self.device) self.D = Discriminator().to(self.device) self.criterion = nn.BCELoss() # loss self.G_optimizer = optim.Adam(self.G.parameters(), lr = self.lr) # optimizer self.D_optimizer = optim.Adam(self.D.parameters(), lr = self.lr) self.G_losses = [] self.D_losses = [] self.img_list = [] def get_dataloader(self): """取得資料集""" # Pytorch 的 dataset 就是 dataloader，可以直接用 transforms 的方式 transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean = (0.5,), std = (0.5,)), ]) # 取得 Mnist 的 Dataset train_set = dset.MNIST(root='./mnist_data/', train=True, transform=transform, download=True) test_set = dset.MNIST(root='./mnist_data/', train=False, transform=transform, download=False) # 導入 Data_loader train_loader = torch.utils.data.DataLoader( dataset = train_set, batch_size = self.batch_size, shuffle=True ) test_loader = torch.utils.data.DataLoader( dataset = test_set, batch_size = self.batch_size, shuffle=False ) return train_loader, test_loader def D_train(self, x): """ D 要學會如何分辨真實 / 假圖片，所以要計算 Real Loss 跟 Fake Loss """ # 清空Grad 不然梯度會累加 self.D.zero_grad() # 訓練辨識真實圖片： x_real = x.to(self.device) # 64,1,28,28 y_real = torch.ones(self.batch_size,).to(self.device) # 64 個 1 x_real_predict = self.D(x_real) # 64,1,1,1 D_real_loss = self.criterion(x_real_predict.view(-1), y_real) # view make [64,1,1,1] -> [64] and cal loss D_real_loss.backward() # 丟到 Grad 自動計算倒傳遞 # 訓練辨識假圖片： noise = torch.tensor(torch.randn(self.batch_size, self.z_dim,1 , 1, device=self.device)) # torch.Size([64, 100, 1, 1]) y_fake = torch.zeros(self.batch_size,).to(self.device) # 64 個 0 x_fake = self.G(noise) # 64,1,28,28 x_fake_predict = self.D(x_fake) # 64,1,1,1 D_fake_loss = self.criterion(x_fake_predict.view(-1), y_fake) # view make [64,1,1,1] -> [64] and cal loss D_fake_loss.backward() # 丟到 Grad 自動計算倒傳遞 # 這裡只計算 D 的 loss D_total_loss = D_real_loss + D_fake_loss # Total Loss 是全部的 Loss 方便查看 self.D_optimizer.step() # auto gradient return D_total_loss.data.item() def G_train(self): """ 清空 grad 並且從 noise 生成圖片，指定成 label: 1，因為要騙 D 說他是正確的，但是 D 不會被騙所以他 loss 一開始會很高 """ # 清空Grad 不然梯度會累加 self.G.zero_grad() # 訓練生成真實圖片 noise = torch.tensor(torch.randn(self.batch_size, self.z_dim,1 , 1, device=self.device)) # torch.Size([64, 100, 1, 1]) y_target = torch.ones(self.batch_size,).to(self.device) # 64 個 1 x_fake = self.G(noise) # 生成假圖片 y_fake = self.D(x_fake) # 獲得 D 的分數 # 這裡只計算 G 的 loss G_loss = self.criterion(y_fake.view(-1), y_target) G_loss.backward() self.G_optimizer.step() return G_loss.data.item() def Draw_plot(self): plt.figure() plt.title(" D Loss, G Loss / Iteration ") plt.plot(self.G_losses, label='G') plt.plot(self.D_losses, label='D') plt.xlabel("Iteration") plt.ylabel("Loss") plt.legend() plt.show() def Draw_Anim_Image(self): fig = plt.figure(figsize=(8,8)) plt.axis("off") ims = [[plt.imshow(np.transpose(i,(1,2,0)), animated=True)] for i in self.img_list] ani = animation.ArtistAnimation(fig, ims, interval=200, repeat_delay=1000, blit=True) #ani.save('test.gif',writer='imagemagick', dpi=100 ) plt.show() def Train(self): # 打印出訓練資訊 print("\nBasic GAN Implement\n") print("Star Training\t",end='') if self.device.type == 'cuda': print("\tUse {}\n".format(torch.cuda.get_device_name(0))) else: print("\tUse CPU\n") # 取得 data loader train_loader, test_loader = self.get_dataloader() # 總共訓練 epochs 次 for epoch in range(self.epochs): # 從 DataLoader 獲得資料，Batch_size 是 64 所以每次獲得的 train_x = [64,1,28,28]，但是我們不需要 label 所以先不使用 for id, (train_x,_) in enumerate(train_loader): # 有些 Loader 沒有64張會報錯，最值觀就這樣解決，捨棄掉最後一組 Loader if(len(train_x)==64): # D_train 跟 G_train 都會返回當前 loss，這邊將他儲存起來 self.D_losses.append( self.D_train(train_x) ) self.G_losses.append( self.G_train() ) # 每隔50次顯示當前 Loss 的平均值 (老實說我不知道這樣是否標準，不過很直觀拉) if(id%50==0): print('[{:03}/{:03}]\t[{:03}/{:03}]\t Loss D: {:.4f} \tLoss G: {:.4f}'.format(epoch+1, self.epochs, id, len(train_loader), np.mean(self.D_losses), np.mean(self.G_losses))) # 每隔10次產生一張假圖，為了做動畫用 if(id%10==0): with( torch.no_grad() ): noise = torch.tensor(torch.randn(self.batch_size, self.z_dim,1 , 1, device=self.device)) fake = self.G(noise).detach().cpu() # 透過 detach() 可以不影響倒傳遞的狀況下拿到 Tensor，並且從 gpu 中取出 self.img_list.append(vutils.make_grid(fake, padding=0, normalize=True)) # 將圖片存下製作GIF self.Draw_plot() # 畫出訓練的曲線圖 self.Draw_Anim_Image() # 將多個輸出的圖轉成動畫 if __name__ == '__main__': # batch_size=64, z_dim=100, epochs=1000, lr=0.001 train = main(64, 100, 1, 0.001 ).Train() ``` ![DCGAN_loss_curve](https://hackmd.io/_uploads/HkCMDINP6.png)