# Unet HW Implementation [toc] ## File - input_feature ```shell= 140.116.245.120/home/nfs_share/Unet/data ``` - quantized_weight ```shell= 140.116.245.120/home/nfs_share/Unet/quantized # channel-wise 140.116.245.120/home/nfs_share/Unet/quantized/channel_wise ``` - unet model ```shell= # unet_q.ckpt 140.116.245.120/home/nfs_share/Unet/quantized/model # unet network 140.116.245.120/home/nfs_share/Unet/quantized/utils ``` ## Unet diagram ![](https://i.imgur.com/OsykedE.png) ## Unet Model :::spoiler UNet_multiple_output_4.py ```python= import torch from torch import nn class UNET(nn.Module): def __init__(self, in_channels, out_channels, base_kernal = 4): """ base_kernal(int) = 4: power(2, base_kernal) is the base number of kernal in each layer """ super().__init__() self.conv1 = self.conv_block(in_channels, 2**(base_kernal), 3, 1) self.maxpool1 = self.max_pool_layer() self.conv2 = self.conv_block(2**(base_kernal), 2**(base_kernal+1), 3, 1) self.maxpool2 = self.max_pool_layer() self.conv3 = self.conv_block(2**(base_kernal+1), 2**(base_kernal+2), 3, 1) self.maxpool3 = self.max_pool_layer() self.conv4 = self.conv_block(2**(base_kernal+2), 2**(base_kernal+3), 3, 1) self.maxpool4 = self.max_pool_layer() self.upconv4 = self.conv_block(2**(base_kernal+3), 2**(base_kernal+4), 3, 1) #(in_channels, out_channels, kernel_size, padding) self.ConvT4 = self.expansive_layer(2**(base_kernal+4), 2**(base_kernal+3)) # (in_channels, out_channels) self.upconv3 = self.conv_block(2**(base_kernal+4), 2**(base_kernal+3), 3, 1) self.ConvT3 = self.expansive_layer(2**(base_kernal+3), 2**(base_kernal+2)) self.upconv2 = self.conv_block(2**(base_kernal+3), 2**(base_kernal+2), 3, 1) self.ConvT2 = self.expansive_layer(2**(base_kernal+2), 2**(base_kernal+1)) self.upconv1 = self.conv_block(2**(base_kernal+2), 2**(base_kernal+1), 3, 1) self.ConvT1 = self.expansive_layer(2**(base_kernal+1), 2**(base_kernal)) self.upconv0 = self.conv_block(2**(base_kernal+1), 2**(base_kernal), 3, 1) self.output_1 = self.output_block(2**(base_kernal), out_channels, 3, 1) self.output_2 = self.output_block(2**(base_kernal), out_channels, 3, 1) def __call__(self, x): # contracting path conv1 = self.conv1(x) conv1_pool = self.maxpool1(conv1) conv2 = self.conv2(conv1_pool) conv2_pool = self.maxpool2(conv2) conv3 = self.conv3(conv2_pool) conv3_pool = self.maxpool3(conv3) conv4 = self.conv4(conv3_pool) conv4_pool = self.maxpool4(conv4) # expansive path upconv4 = self.upconv4(conv4_pool) ConvT4 = self.ConvT4(upconv4) upconv3 = self.upconv3(torch.cat([ConvT4, conv4], 1)) ConvT3 = self.ConvT3(upconv3) upconv2 = self.upconv2(torch.cat([ConvT3, conv3], 1)) ConvT2 = self.ConvT2(upconv2) upconv1 = self.upconv1(torch.cat([ConvT2, conv2], 1)) ConvT1 = self.ConvT1(upconv1) upconv0 = self.upconv0(torch.cat([ConvT1, conv1], 1)) output_1 = self.output_1(upconv0) output_2 = self.output_2(upconv0) return output_1, output_2 def conv_block(self, in_channels, out_channels, kernel_size, padding): conv = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=1, padding=padding), nn.BatchNorm2d(out_channels), nn.ReLU(), nn.Conv2d(out_channels, out_channels, kernel_size=kernel_size, stride=1, padding=padding), nn.BatchNorm2d(out_channels), nn.ReLU() ) return conv def max_pool_layer(self): max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) return max_pool def expansive_layer(self, in_channels, out_channels): up_conv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1, output_padding=1) return up_conv def output_block(self, in_channels, out_channels, kernel_size, padding): output_conv = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=1, padding=padding), ) return output_conv ``` ::: ## Model stat :::spoiler torchstat ```shell= module name input shape output shape params memory(MB) MAdd Flops MemRead(B) MemWrite(B) duration[%] MemR+W(B) 0 conv1.0 3 128 128 16 128 128 448.0 1.00 14,155,776.0 7,340,032.0 198400.0 1048576.0 6.86% 1246976.0 1 conv1.1 16 128 128 16 128 128 32.0 1.00 1,048,576.0 524,288.0 1048704.0 1048576.0 0.92% 2097280.0 2 conv1.2 16 128 128 16 128 128 0.0 1.00 262,144.0 262,144.0 1048576.0 1048576.0 1.00% 2097152.0 3 conv1.3 16 128 128 16 128 128 2320.0 1.00 75,497,472.0 38,010,880.0 1057856.0 1048576.0 3.80% 2106432.0 4 conv1.4 16 128 128 16 128 128 32.0 1.00 1,048,576.0 524,288.0 1048704.0 1048576.0 0.25% 2097280.0 5 conv1.5 16 128 128 16 128 128 0.0 1.00 262,144.0 262,144.0 1048576.0 1048576.0 0.55% 2097152.0 6 maxpool1 16 128 128 16 64 64 0.0 0.25 524,288.0 262,144.0 1048576.0 262144.0 2.35% 1310720.0 7 conv2.0 16 64 64 32 64 64 4640.0 0.50 37,748,736.0 19,005,440.0 280704.0 524288.0 2.21% 804992.0 8 conv2.1 32 64 64 32 64 64 64.0 0.50 524,288.0 262,144.0 524544.0 524288.0 0.20% 1048832.0 9 conv2.2 32 64 64 32 64 64 0.0 0.50 131,072.0 131,072.0 524288.0 524288.0 0.35% 1048576.0 10 conv2.3 32 64 64 32 64 64 9248.0 0.50 75,497,472.0 37,879,808.0 561280.0 524288.0 3.03% 1085568.0 11 conv2.4 32 64 64 32 64 64 64.0 0.50 524,288.0 262,144.0 524544.0 524288.0 0.19% 1048832.0 12 conv2.5 32 64 64 32 64 64 0.0 0.50 131,072.0 131,072.0 524288.0 524288.0 0.36% 1048576.0 13 maxpool2 32 64 64 32 32 32 0.0 0.12 262,144.0 131,072.0 524288.0 131072.0 0.96% 655360.0 14 conv3.0 32 32 32 64 32 32 18496.0 0.25 37,748,736.0 18,939,904.0 205056.0 262144.0 2.13% 467200.0 15 conv3.1 64 32 32 64 32 32 128.0 0.25 262,144.0 131,072.0 262656.0 262144.0 0.18% 524800.0 16 conv3.2 64 32 32 64 32 32 0.0 0.25 65,536.0 65,536.0 262144.0 262144.0 0.34% 524288.0 17 conv3.3 64 32 32 64 32 32 36928.0 0.25 75,497,472.0 37,814,272.0 409856.0 262144.0 2.96% 672000.0 18 conv3.4 64 32 32 64 32 32 128.0 0.25 262,144.0 131,072.0 262656.0 262144.0 0.17% 524800.0 19 conv3.5 64 32 32 64 32 32 0.0 0.25 65,536.0 65,536.0 262144.0 262144.0 0.35% 524288.0 20 maxpool3 64 32 32 64 16 16 0.0 0.06 131,072.0 65,536.0 262144.0 65536.0 0.55% 327680.0 21 conv4.0 64 16 16 128 16 16 73856.0 0.12 37,748,736.0 18,907,136.0 360960.0 131072.0 3.85% 492032.0 22 conv4.1 128 16 16 128 16 16 256.0 0.12 131,072.0 65,536.0 132096.0 131072.0 0.17% 263168.0 23 conv4.2 128 16 16 128 16 16 0.0 0.12 32,768.0 32,768.0 131072.0 131072.0 0.15% 262144.0 24 conv4.3 128 16 16 128 16 16 147584.0 0.12 75,497,472.0 37,781,504.0 721408.0 131072.0 3.11% 852480.0 25 conv4.4 128 16 16 128 16 16 256.0 0.12 131,072.0 65,536.0 132096.0 131072.0 0.17% 263168.0 26 conv4.5 128 16 16 128 16 16 0.0 0.12 32,768.0 32,768.0 131072.0 131072.0 0.14% 262144.0 27 maxpool4 128 16 16 128 8 8 0.0 0.03 65,536.0 32,768.0 131072.0 32768.0 0.33% 163840.0 28 upconv4.0 128 8 8 256 8 8 295168.0 0.06 37,748,736.0 18,890,752.0 1213440.0 65536.0 1.34% 1278976.0 29 upconv4.1 256 8 8 256 8 8 512.0 0.06 65,536.0 32,768.0 67584.0 65536.0 0.16% 133120.0 30 upconv4.2 256 8 8 256 8 8 0.0 0.06 16,384.0 16,384.0 65536.0 65536.0 0.11% 131072.0 31 upconv4.3 256 8 8 256 8 8 590080.0 0.06 75,497,472.0 37,765,120.0 2425856.0 65536.0 4.16% 2491392.0 32 upconv4.4 256 8 8 256 8 8 512.0 0.06 65,536.0 32,768.0 67584.0 65536.0 0.18% 133120.0 33 upconv4.5 256 8 8 256 8 8 0.0 0.06 16,384.0 16,384.0 65536.0 65536.0 0.12% 131072.0 34 ConvT4 256 8 8 128 16 16 295040.0 0.12 37,748,736.0 0.0 0.0 0.0 3.79% 0.0 35 upconv3.0 256 16 16 128 16 16 295040.0 0.12 150,994,944.0 75,530,240.0 1442304.0 131072.0 4.89% 1573376.0 36 upconv3.1 128 16 16 128 16 16 256.0 0.12 131,072.0 65,536.0 132096.0 131072.0 0.19% 263168.0 37 upconv3.2 128 16 16 128 16 16 0.0 0.12 32,768.0 32,768.0 131072.0 131072.0 0.14% 262144.0 38 upconv3.3 128 16 16 128 16 16 147584.0 0.12 75,497,472.0 37,781,504.0 721408.0 131072.0 1.76% 852480.0 39 upconv3.4 128 16 16 128 16 16 256.0 0.12 131,072.0 65,536.0 132096.0 131072.0 0.17% 263168.0 40 upconv3.5 128 16 16 128 16 16 0.0 0.12 32,768.0 32,768.0 131072.0 131072.0 0.13% 262144.0 41 ConvT3 128 16 16 64 32 32 73792.0 0.25 37,748,736.0 0.0 0.0 0.0 2.33% 0.0 42 upconv2.0 128 32 32 64 32 32 73792.0 0.25 150,994,944.0 75,563,008.0 819456.0 262144.0 4.44% 1081600.0 43 upconv2.1 64 32 32 64 32 32 128.0 0.25 262,144.0 131,072.0 262656.0 262144.0 0.19% 524800.0 44 upconv2.2 64 32 32 64 32 32 0.0 0.25 65,536.0 65,536.0 262144.0 262144.0 0.14% 524288.0 45 upconv2.3 64 32 32 64 32 32 36928.0 0.25 75,497,472.0 37,814,272.0 409856.0 262144.0 1.64% 672000.0 46 upconv2.4 64 32 32 64 32 32 128.0 0.25 262,144.0 131,072.0 262656.0 262144.0 0.18% 524800.0 47 upconv2.5 64 32 32 64 32 32 0.0 0.25 65,536.0 65,536.0 262144.0 262144.0 0.29% 524288.0 48 ConvT2 64 32 32 32 64 64 18464.0 0.50 37,748,736.0 0.0 0.0 0.0 4.63% 0.0 49 upconv1.0 64 64 64 32 64 64 18464.0 0.50 150,994,944.0 75,628,544.0 1122432.0 524288.0 4.29% 1646720.0 50 upconv1.1 32 64 64 32 64 64 64.0 0.50 524,288.0 262,144.0 524544.0 524288.0 0.22% 1048832.0 51 upconv1.2 32 64 64 32 64 64 0.0 0.50 131,072.0 131,072.0 524288.0 524288.0 0.13% 1048576.0 52 upconv1.3 32 64 64 32 64 64 9248.0 0.50 75,497,472.0 37,879,808.0 561280.0 524288.0 2.16% 1085568.0 53 upconv1.4 32 64 64 32 64 64 64.0 0.50 524,288.0 262,144.0 524544.0 524288.0 0.41% 1048832.0 54 upconv1.5 32 64 64 32 64 64 0.0 0.50 131,072.0 131,072.0 524288.0 524288.0 0.34% 1048576.0 55 ConvT1 32 64 64 16 128 128 4624.0 1.00 37,748,736.0 0.0 0.0 0.0 9.24% 0.0 56 upconv0.0 32 128 128 16 128 128 4624.0 1.00 150,994,944.0 75,759,616.0 2115648.0 1048576.0 5.45% 3164224.0 57 upconv0.1 16 128 128 16 128 128 32.0 1.00 1,048,576.0 524,288.0 1048704.0 1048576.0 0.31% 2097280.0 58 upconv0.2 16 128 128 16 128 128 0.0 1.00 262,144.0 262,144.0 1048576.0 1048576.0 0.54% 2097152.0 59 upconv0.3 16 128 128 16 128 128 2320.0 1.00 75,497,472.0 38,010,880.0 1057856.0 1048576.0 2.93% 2106432.0 60 upconv0.4 16 128 128 16 128 128 32.0 1.00 1,048,576.0 524,288.0 1048704.0 1048576.0 0.67% 2097280.0 61 upconv0.5 16 128 128 16 128 128 0.0 1.00 262,144.0 262,144.0 1048576.0 1048576.0 0.55% 2097152.0 62 output_1.0 16 128 128 1 128 128 145.0 0.06 4,718,592.0 2,375,680.0 1049156.0 65536.0 2.78% 1114692.0 63 output_2.0 16 128 128 1 128 128 145.0 0.06 4,718,592.0 2,375,680.0 1049156.0 65536.0 1.35% 1114692.0 total 2161922.0 25.34 1,620,017,152.0 737,542,144.0 1049156.0 65536.0 100.00% 60360776.0 ================================================================================================================================================== Total params: 2,161,922 -------------------------------------------------------------------------------------------------------------------------------------------------- Total memory: 25.34MB Total MAdd: 1.62GMAdd Total Flops: 737.54MFlops Total MemR+W: 57.56MB ``` ::: ## System diagram ![](https://i.imgur.com/y9QfyZR.png) ## Input/Output | Signal Name | I/O | Width | | -------- | -------- | -------- | | clk | I | 1 | | rst | I | 1 | | pixel_addr | O | 9 | | pixel_input | I | 128 | | weight_addr | O | 22 | | weight_input | I | 8 | | WE | O | 1 | | conv_addr | O | 18 | | conv_data | I | 8 | | write_data | O | 8 | | done | O | 1 | ## Memory - input_feature memory size : 384*128-bit - weight memory size : 2166272*8-bit - conv1 memory size : 262144*8-bit - conv2 memory size : 131072*8-bit - conv3 memory size : 65536*8-bit - conv4 memory size : 32768*8-bit