Try   HackMD

Parallel Programming HW-6

// OpenCL卷积核 __kernel void convolution(__global const float *restrict inputImage, __global float *restrict outputImage, __constant const float *restrict filter, const int filterWidth) { // 計算濾波器寬度的一半 int half_filter_width = filterWidth >> 1; // 獲取圖像的寬度和高度 int imageWidth = get_global_size(0); int imageHeight = get_global_size(1); // 獲取當前工作項在 x 和 y 方向上的全局坐標 int x = get_global_id(0); int y = get_global_id(1); // 計算卷積的邊界 int row_offset_min = -clamp(y, 0, half_filter_width); int row_offset_max = clamp(half_filter_width, imageHeight - 1 - y, half_filter_width); int col_offset_min = -clamp(x, 0, half_filter_width); int col_offset_max = clamp(half_filter_width, imageWidth - 1 - x, half_filter_width); // 初始化卷積的總和 float sum = 0; // 循環遍歷卷積的每個元素 for (int row_offset = row_offset_min; row_offset <= row_offset_max; row_offset++) { int imageRow = y + row_offset; int filterRow = half_filter_width + row_offset; int imageBase = imageRow * imageWidth + x; int filterBase = filterRow * filterWidth + half_filter_width; int filterOffset = col_offset_min; do { // 獲取當前濾波器和圖像像素的值 float filterValue = filter[filterBase + filterOffset]; // 判斷濾波器值是否非零,若是則累加到總和中 if (filterValue != 0) { sum = mad(filterValue, inputImage[imageBase + filterOffset], sum); } } while (++filterOffset <= col_offset_max); // 迭代直到達到列的邊界 } // 將卷積的結果寫入輸出圖像 outputImage[y * imageWidth + x] = sum; }