# CNN理論 ### 照片的數據結構 CNN常用灰階的照片(亮度0~255) ![](https://i.imgur.com/Z2Ao0G2.png) https://ming-lian.github.io/2019/05/30/Mathmatic-Principle-in-CNN/ ### 捲積 捲積核(Operator mask) : 一個N*N的像素矩陣,用來遍歷原圖片,將其卷積化 e.g: 3X3捲積層 |0|0|0| |1|1|1| |0|0|0| 濾波影像: 捲積核*原圖片卷(此處\*為捲積運算) 捲積運算時,每次將捲積核內的點與覆蓋的原圖像素點進行點對點相乘,如圖: ![](https://i.imgur.com/D4bBzWE.png) ![](https://i.imgur.com/11GlozF.png) 最後出來的圖會從 M*M 變成 (M-N+1)*(M-N+1) 註: 以上將stride值設為1,stride也可以是其他數字(stride為捲積核移動時的跨度) https://chih-sheng-huang821.medium.com/%E5%8D%B7%E7%A9%8D%E7%A5%9E%E7%B6%93%E7%B6%B2%E8%B7%AF-convolutional-neural-network-cnn-%E5%8D%B7%E7%A9%8D%E9%81%8B%E7%AE%97-%E6%B1%A0%E5%8C%96%E9%81%8B%E7%AE%97-856330c2b703 ```go= // 得到圖像邊界 maxImgX := img.Bounds().Max.X maxImgY := img.Bounds().Max.Y // 遍歷所有像素 for y := img.Bounds().Min.Y; y < maxImgY; y++ { for x := img.Bounds().Min.X; x < maxImgX; x++ { // 計算所有kernel // allKernels 是 array[][] // v 是 kernel for i, subKernel := range allKernels { // feature map featureMap[i][y][x] = kannel(x, y, img, subKernel) } } } // x,y 是計算像素的中心,img 是圖像,subKernel 是kernel func kannel(x int, y int, img image.Image, subKernel [][]int) int { // v 是權重 var v int // 遍歷 kernel for kernelY := -len(subKernel) / 2; kernelY <= len(subKernel)/2; kernelY++ { for kernelX := -len(subKernel[0]) / 2; kernelX <= len(subKernel[0])/2; kernelX++ { // 超界略過 if x+kernelX < 0 || x+kernelX > img.Bounds().Max.X-1 || y+kernelY < 0 || y+kernelY > img.Bounds().Max.Y-1 { continue } // 轉灰階並乘上kernel權重kernelY v += int(color.GrayModel.Convert(img.At(x+kernelX, y+kernelY)).(color.Gray).Y) * subKernel[kernelY+len(subKernel)/2][kernelX+len(subKernel[0])/2] } } return v } ``` --- ### 池化(Pooling) 池化是為了將資料簡化,但又不會使重要資料損失,對其進行的降維計算,其中常用的有最大化&平均的降維計算。 最大化池化如下圖: ![](https://i.imgur.com/wI3fJcf.png) https://chih-sheng-huang821.medium.com/%E5%8D%B7%E7%A9%8D%E7%A5%9E%E7%B6%93%E7%B6%B2%E8%B7%AF-convolutional-neural-network-cnn-%E5%8D%B7%E7%A9%8D%E9%81%8B%E7%AE%97-%E6%B1%A0%E5%8C%96%E9%81%8B%E7%AE%97-856330c2b703 平均池化如下圖: ![](https://i.imgur.com/vWtvOT9.png) https://blog.csdn.net/u013289254/article/details/99080916 ```go= // 最大池化 func pooling(featureMap [][][]int) { var poolmap [][][]int for i, subFeatureMap := range featureMap { for y := 0; y < len(subFeatureMap); y += 2 { for x := 2; x < len(subFeatureMap[0]); x += 2 { poolmap[i][y][x] = max(max(subFeatureMap[y][x-2:x]...), max(subFeatureMap[y+1][x-2:x]...)) } } } } func max(input ...int) int { var m int for _, number := range input { if number > m { m = number } } return m } ``` ### 全連接(Fully Connected) step1 : 將池化後的資料平坦化,也就是將其從二維攤成一維的(可利用系統內建的空指針做到時間複雜度O(1)的轉換) step2 : 將平坦化的資料接到ANN(人工神經網路)上,進行訓練 ```cpp= #include <helper_cuda.h> #include <omp.h> #include <stdio.h> #include <vector> #define precision float struct nerve { std::vector<precision> weight; }; struct layer { int indegree=0; int outdegree=0; std::vector<nerve> nerves; }; __global__ precision calculate_nerve(int* g_a, nerve* g_b) { // int idx = blockIdx.x * blockDim.x + threadIdx.x; // g_a[idx] += b; percision reduce_a=0; for (unsigned int i = 0; i < g_a.size(); i++) reduce_a += g_a[i] * g_b[i]; return reduce_a; // All nerve calculation chould be down inside including // activation function } layer init_layer(int indegree,int outdegree) { layer content; nerve initnerve; content.indegree = indegree; content.outdegree = outdegree; initnerve.weight.assign(0,indegree); content.nerves.assign(outdegree, initnerve); } int main() { // working on it // remember to buffer data before executing return 0; int num_gpus = 0; // number of CUDA GPUs cudaGetDeviceCount(&num_gpus); omp_set_num_threads(num_gpus); // create as many CPU threads as there are CUDA devices } #pragma omp parallel { unsigned int cpu_thread_id = omp_get_thread_num(); unsigned int num_cpu_threads = omp_get_num_threads(); // set and check the CUDA device for this CPU thread int gpu_id = -1; checkCudaErrors(cudaSetDevice( cpu_thread_id % num_gpus)); // "% num_gpus" allows more CPU threads than GPU devices checkCudaErrors(cudaGetDevice(&gpu_id)); printf("CPU thread %d (of %d) uses CUDA device %d\n", cpu_thread_id, num_cpu_threads, gpu_id); int* d_a = 0; // pointer to memory on the device associated with this CPU thread int* sub_a = a + cpu_thread_id * n / num_cpu_threads; // pointer to this CPU thread's portion of data unsigned int nbytes_per_kernel = nbytes / num_cpu_threads; dim3 gpu_threads(128); // 128 threads per block dim3 gpu_blocks(n / (gpu_threads.x * num_cpu_threads)); checkCudaErrors(cudaMalloc((void**)&d_a, nbytes_per_kernel)); checkCudaErrors(cudaMemset(d_a, 0, nbytes_per_kernel)); checkCudaErrors( cudaMemcpy(d_a, sub_a, nbytes_per_kernel, cudaMemcpyHostToDevice)); kernelAddConstant << <gpu_blocks, gpu_threads >> > (d_a, b); checkCudaErrors(cudaMemcpy(sub_a, d_a, nbytes_per_kernel, cudaMemcpyDeviceToHost)); checkCudaErrors(cudaFree(d_a)); } ``` ### 總流程圖 $捲積 \rightarrow 池化 \rightarrow 全連接 \rightarrow 丟進ANN裡面$ ![](https://i.imgur.com/S2KlFBH.png) https://medium.com/jameslearningnote/%E8%B3%87%E6%96%99%E5%88%86%E6%9E%90-%E6%A9%9F%E5%99%A8%E5%AD%B8%E7%BF%92-%E7%AC%AC5-1%E8%AC%9B-%E5%8D%B7%E7%A9%8D%E7%A5%9E%E7%B6%93%E7%B6%B2%E7%B5%A1%E4%BB%8B%E7%B4%B9-convolutional-neural-network-4f8249d65d4f