//Force Inline
__attribute__((always_inline)) void subtract_vector(const double *a, const double *b, double *out)
{
// loop unrolling
out[0]=a[0]-b[0];
out[1]=a[1]-b[1];
out[2]=a[2]-b[2];
/*
for (int i = 0; i < 3; i++)
out[i] = a[i] - b[i];
*/
}
#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(64) private(stk), private(d),private(object_color)
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
double r = 0, g = 0, b = 0;
/* MSAA */
for (int s = 0; s < SAMPLES; s++) {
idx_stack_init(&stk);
rayConstruction(d, u, v, w,
i * factor + s / factor,
j * factor + s % factor,
view,
width * factor, height * factor);
if (ray_color(view->vrp, 0.0, d, &stk, rectangulars, spheres,
lights, object_color,
MAX_REFLECTION_BOUNCES)) {
r += object_color[0];
g += object_color[1];
b += object_color[2];
} else {
r += background_color[0];
g += background_color[1];
b += background_color[2];
}
pixels[((i + (j * width)) * 3) + 0] = r * 255 / SAMPLES;
pixels[((i + (j * width)) * 3) + 1] = g * 255 / SAMPLES;
pixels[((i + (j * width)) * 3) + 2] = b * 255 / SAMPLES;
}
}
}
loop_unrolling
Force Inline + loop_unrolling
執行5次的平均時間
OpenMP
OpenMP + Force Inline + loop_unrolling