## Week 3
Working on understand the opencl fft code and optimizing `do_fft_core`
```rust!
let deg = cmp::min(max_deg, log_n - log_p);
let n = 1u32 << log_n;
let local_work_size = 1 << cmp::min(deg - 1, max_log2_local_work_size);
let global_work_size = n >> deg;
let kernel_name = format!("{}_radix_fft", "Bn256_Fr");
let kernel = program.create_kernel(
&kernel_name,
global_work_size as usize,
local_work_size as usize,
)?;
kernel
.arg(&src_buffer) // Source buffer
.arg(&dst_buffer) // Destination buffer
.arg(pq_buffer) // Precalculated twiddle factors
.arg(omegas_buffer) // [omega, omega^2, omega^4, ...]
.arg(&LocalBuffer::<F>::new(1 << deg)) // Local buffer to store intermediary values
.arg(&n) // Number of elements
.arg(&log_p) // Log2 of `p` (Read more in the link above)
.arg(°) // 1=>radix2, 2=>radix4, 3=>radix8, ...
.arg(&max_deg) // Maximum degree supported, according to `pq` and `omegas`
.run()?;
```
Minor testing of change in `max_log2_radix` for `domain.extended_k() = 22` :
```
# ---------------------------------------------------------------------------
with manuall set max_log2_radix = 4:
··········Start: ----------- max_deg=4, max_log2_radix=4, log_n=22
············Start: fft round (log_p)=0 of (log_n)=22, deg=4
············End: fft round (log_p)=0 of (log_n)=22, deg=4 ......................2.698ms
············Start: fft round (log_p)=4 of (log_n)=22, deg=4
············End: fft round (log_p)=4 of (log_n)=22, deg=4 ......................3.066ms
············Start: fft round (log_p)=8 of (log_n)=22, deg=4
············End: fft round (log_p)=8 of (log_n)=22, deg=4 ......................3.431ms
············Start: fft round (log_p)=12 of (log_n)=22, deg=4
············End: fft round (log_p)=12 of (log_n)=22, deg=4 .....................3.795ms
············Start: fft round (log_p)=16 of (log_n)=22, deg=4
············End: fft round (log_p)=16 of (log_n)=22, deg=4 .....................4.158ms
············Start: fft round (log_p)=20 of (log_n)=22, deg=2
············End: fft round (log_p)=20 of (log_n)=22, deg=2 .....................13.511ms
··········End: ----------- max_deg=4, max_log2_radix=4, log_n=22 ...............30.715ms
··End: h_poly ..................................................................24.879s
End: creating proof ... ........................................................37.903s
Total ms for do fft pure : 24808.277
average time for fft_pure: 19.261085ms
number of fft_pure's : 1288
# ---------------------------------------------------------------------------
with manuall set max_log2_radix = 5:
··········Start: ----------- max_deg=5, max_log2_radix=5, log_n=22
············Start: fft round (log_p)=0 of (log_n)=22, deg=5
············End: fft round (log_p)=0 of (log_n)=22, deg=5 ......................1.635ms
············Start: fft round (log_p)=5 of (log_n)=22, deg=5
············End: fft round (log_p)=5 of (log_n)=22, deg=5 ......................1.870ms
············Start: fft round (log_p)=10 of (log_n)=22, deg=5
············End: fft round (log_p)=10 of (log_n)=22, deg=5 .....................2.102ms
············Start: fft round (log_p)=15 of (log_n)=22, deg=5
············End: fft round (log_p)=15 of (log_n)=22, deg=5 .....................2.331ms
············Start: fft round (log_p)=20 of (log_n)=22, deg=2
············End: fft round (log_p)=20 of (log_n)=22, deg=2 .....................13.517ms
··········End: ----------- max_deg=5, max_log2_radix=5, log_n=22 ...............21.495ms
········End: do_fft_pure() --> do_fft_core(), domain.extended_k() = 22 .........21.691ms
··End: h_poly ..................................................................18.694s
End: creating proof ... ........................................................31.487s
Total ms for do fft pure : 14029.687
average time for fft_pure: 21.550978ms
number of fft_pure's : 651
# ---------------------------------------------------------------------------
with manuall set max_log2_radix = 6:
············Start: fft round (log_p)=0 of (log_n)=22, deg=6
············End: fft round (log_p)=0 of (log_n)=22, deg=6 ......................971.615µs
············Start: fft round (log_p)=6 of (log_n)=22, deg=6
············End: fft round (log_p)=6 of (log_n)=22, deg=6 ......................1.146ms
············Start: fft round (log_p)=12 of (log_n)=22, deg=6
············End: fft round (log_p)=12 of (log_n)=22, deg=6 .....................1.280ms
············Start: fft round (log_p)=18 of (log_n)=22, deg=4
············End: fft round (log_p)=18 of (log_n)=22, deg=4 .....................4.338ms
··········End: ----------- max_deg=6, max_log2_radix=6, log_n=22 ...............7.773ms
········End: do_fft_pure() --> do_fft_core(), domain.extended_k() = 22 .........7.964ms
··End: h_poly ..................................................................9.663s
End: creating proof ... ........................................................22.488s
Total ms for do fft pure : 5061.548
average time for fft_pure: 7.835214ms
number of fft_pure's : 646
# ---------------------------------------------------------------------------
with manuall set max_log2_radix = 7:
········Start: do_fft_pure() --> do_fft_core(), domain.extended_k() = 22
··········Start: ----------- max_deg=7, max_log2_radix=7, log_n=22
············Start: fft round (log_p)=0 of (log_n)=22, deg=7
············End: fft round (log_p)=0 of (log_n)=22, deg=7 ......................1.105ms
············Start: fft round (log_p)=7 of (log_n)=22, deg=7
············End: fft round (log_p)=7 of (log_n)=22, deg=7 ......................1.288ms
············Start: fft round (log_p)=14 of (log_n)=22, deg=7
············End: fft round (log_p)=14 of (log_n)=22, deg=7 .....................1.789ms
············Start: fft round (log_p)=21 of (log_n)=22, deg=1
············End: fft round (log_p)=21 of (log_n)=22, deg=1 .....................21.773ms
··········End: ----------- max_deg=7, max_log2_radix=7, log_n=22 ...............25.991ms
··End: h_poly ..................................................................21.432s
End: creating proof ... ........................................................34.165s
# ---------------------------------------------------------------------------
with manuall set max_log2_radix = 8:
··········Start: ----------- max_deg=8, max_log2_radix=8, log_n=22
············Start: fft round (log_p)=0 of (log_n)=22, deg=8
············End: fft round (log_p)=0 of (log_n)=22, deg=8 ......................1.137ms
············Start: fft round (log_p)=8 of (log_n)=22, deg=8
············End: fft round (log_p)=8 of (log_n)=22, deg=8 ......................1.338ms
············Start: fft round (log_p)=16 of (log_n)=22, deg=6
············End: fft round (log_p)=16 of (log_n)=22, deg=6 .....................1.908ms
··········End: ----------- max_deg=8, max_log2_radix=8, log_n=22 ...............4.412ms
··End: h_poly ..................................................................7.475s
End: creating proof ... ........................................................20.370s
Total ms for do fft pure : 2894.262
average time for fft_pure: 4.480282ms
number of fft_pure's : 646
# ---------------------------------------------------------------------------
with manuall set max_log2_radix = 9:
········Start: ----------- max_deg=9, max_log2_radix=9, log_n=22
··········Start: fft round (log_p)=0 of (log_n)=22, deg=9
··········End: fft round (log_p)=0 of (log_n)=22, deg=9 ........................1.921ms
··········Start: fft round (log_p)=9 of (log_n)=22, deg=9
··········End: fft round (log_p)=9 of (log_n)=22, deg=9 ........................1.941ms
··········Start: fft round (log_p)=18 of (log_n)=22, deg=4
··········End: fft round (log_p)=18 of (log_n)=22, deg=4 .......................4.332ms
········End: ----------- max_deg=9, max_log2_radix=9, log_n=22 .................8.220ms
······End: do_fft_pure() --> do_fft_core(), domain.extended_k() = 22 ...........8.433ms
··End: h_poly ..................................................................10.003s
End: creating proof ... ........................................................22.839
Total ms for do fft pure : 5365.554
average time for fft_pure: 8.292974ms
number of fft_pure's : 647
# ---------------------------------------------------------------------------
with manuall set max_log2_radix = 10:
········Start: do_fft_pure() --> do_fft_core(), domain.extended_k() = 22
··········Start: ----------- max_deg=10, max_log2_radix=10, log_n=22
············Start: fft round (log_p)=0 of (log_n)=22, deg=10
············End: fft round (log_p)=0 of (log_n)=22, deg=10 .....................2.386ms
············Start: fft round (log_p)=10 of (log_n)=22, deg=10
············End: fft round (log_p)=10 of (log_n)=22, deg=10 ....................2.424ms
············Start: fft round (log_p)=20 of (log_n)=22, deg=2
············End: fft round (log_p)=20 of (log_n)=22, deg=2 .....................13.508ms
··········End: ----------- max_deg=10, max_log2_radix=10, log_n=22 .............18.344ms
··End: h_poly ..................................................................16.576s
End: creating proof ... ........................................................29.399s
Total ms for do fft pure : 20785.449
average time for fft_pure: 18.377939ms
number of fft_pure's : 1131
```
within the `fft_pure` loop
`local_work_size` is 2^(min compared between: deg - 1, max_log2_local_work_size )
```rust
// 2^log_n
// e.g., 2^22 = 4194304
let n = 1u32 << log_n;
// 2^(min compared between: deg - 1, max_log2_local_work_size )
// e.g., if deg = 8, max_log2_local_work_size = 8.
// cmp_min((8-1), 8) = 7
// 2^7 = 128
let local_work_size = 1 << cmp::min(deg - 1, max_log2_local_work_size);
// divides n by 2^deg
// e.g., for round, use n = 4194304 & deg = 8 : 2^22 >> 8 = 16384
// e.g., for round, use n = 4194304 & deg = 6 : 2^22 >> 6 = 65536
let global_work_size = n >> deg;
```