## Week 49 Debugging cuda/fft field code: Using BN256 Scalar field for fft calculations, (changes from lastweek). Still trying to get the cuda code to return values that are consistent with teh cpu code. Unfortunately this week has mostly been spent debugging and going through the code. ```rust! #[derive(Clone, Debug, Hash, Copy)] pub struct MontgomeryConfigBN256PrimeField; impl IsModulus<U256> for MontgomeryConfigBN256PrimeField { const MODULUS: U256 = // U256::from_hex_unchecked("30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"); // Base U256::from_hex_unchecked("30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001"); // Scalar } pub type BN252PrimeField = U256PrimeField<MontgomeryConfigBN256PrimeField>; impl IsFFTField for BN252PrimeField { const TWO_ADICITY: u64 = 28; // Fr // const TWO_ADIC_ROOT_OF_UNITY: BigInteger = BigInteger([ // 7164790868263648668u64, // 11685701338293206998u64, // 6216421865291908056u64, // 1756667274303109607u64, // ]); // hex: 0x1860ef942963f9e756452ac01eb203d8a22bf3742445ffd6636e735580d13d9c // dec: 11026779196025039675543067535165575398706865421176733435921293210460577938844 const TWO_ADIC_PRIMITVE_ROOT_OF_UNITY: U256 = UnsignedInteger::from_hex_unchecked( "1860ef942963f9e756452ac01eb203d8a22bf3742445ffd6636e735580d13d9c", ); fn field_name() -> &'static str { "bn256" } } ``` looking at calcualtion of root: revision of calcualtion/theory: A Two-Adic field is a type of finite field whose order is of the form $2^n k+1$, where $n$ is the two-adicity of the field. Two-Adic Primitive Root of Unity: A primitive root of unity in a field is an element that, when raised to certain powers, generates all the non-zero elements of the field. A Two-Adic primitive root of unity, denoted as $\omega$, satisfies the property $\omega^{2^n}$ and $ \omega^{j} \eq 1$ for every $j$ below $2^n$. This property is crucial for FFT algorithms because FFT heavily relies on roots of unity. Fast Fourier Transform (FFT): FFT is an algorithm for efficiently computing the Discrete Fourier Transform (DFT) and its inverse. It exploits the properties of roots of unity, allowing a more efficient computation of the DFT compared to a direct computation. ```rust! // root = two_adic_primitive_root_of_unity^(log_power + 1) let root = (0..log_power).fold(two_adic_primitive_root_of_unity.clone(), |acc, _| acc.square() ); ``` basically: $$ \textrm{root} = \textrm{two_adic_primitive_root_of_unity}^{(\textrm{log_power + 1})} $$ calculation by fft (cpu) ``` omega = 0x1860ef942963f9e756452ac01eb203d8a22bf3742445ffd6636e735580d13d9c a.len() = 16 n bin bit-rev n r 0 0000 0000 0 1 0001 1000 8 2 0010 0100 4 3 0011 1100 12 4 0100 0010 2 5 0101 1010 10 6 0110 0110 6 7 0111 1110 14 8 1000 0001 1 9 1001 1001 9 10 1010 0101 5 11 1011 1101 13 12 1100 0011 3 13 1101 1011 11 14 1110 0111 7 15 1111 1111 15 Twiddles: t[0] = 0x0000000000000000000000000000000000000000000000000000000000000001 t[1] = 0x1860ef942963f9e756452ac01eb203d8a22bf3742445ffd6636e735580d13d9c t[2] = 0x0e8e5dc131de33e6dcdebdc5fd779eabf80b19d3f6e66b11181deae3fafdc0c2 t[3] = 0x1896fee423139383e278503fb5d268c4b01191c6e71e71c41af397fb90af0965 t[4] = 0x13f3b69292d8745bfec553cd6d8df8af725d131eca6a5059a1be47ce9dab637c t[5] = 0x1426031e97e4ca82a1e1292999bd0f1cf5813737d8feebedd029eaf311fe62ec t[6] = 0x232829cdaae6be2b1e01ec2efdccd04efc45d2b9bfabc4009f68334e7a30f097 t[7] = 0x0b450f3b3ef2c5334f970deecaca76c77383d3688e9bf09074f3661ef859e046 -----------------------------------------------------------------. fft_res = () mutatted input data: input[0] = 0x0000000000000000000000000000000000000000000000000000000000000cf8 input[1] = 0x27029cd77abd188fd756f5b8ead288a54aa6b574367bde8288ef0d42f594f830 input[2] = 0x1902921bcfc951a4e1bcef3bb60471527b350246ccec251cdf6da2ad46d0a6a5 input[3] = 0x04ab05334b48d15efab6ca924f144b72b602050f9a33f38d7e0b66956c41dd05 input[4] = 0x0e70c93bf32ec262cd39cd47e7ebbc641a4ce01ce6263119424c5db91d5b1be5 input[5] = 0x19ba327643a7090123e44f76eda49ebd010d8e10f9696eba7649e8b51e8e733d input[6] = 0x19ebbd44319c6284641cac5fd71f988d9d88a15e6da139cfd663ee795ae87fcc input[7] = 0x1dd078f72d780a6a156c7d62a3df6f9a058d584d7dfb096eb833b2b2bed5af15 input[8] = 0x0000000000000000000000000000000000000000000000000000000000000008 input[9] = 0x0b02876024d58ab9ec22e8be813659ffabc34f19635c6bb735ec3817980c551a input[10] = 0x03df005c16943320b8b6ab5419d30775b964bdf2ff603d15a52b18c4f3e59125 input[11] = 0x2f2c755517eff3a9cd828e2d5f2ae5a8850f3dad410e80122ebc765d498f2293 input[12] = 0x21f38536ee02ddc6eb16786e99959bf90de7082b93933f78019597dad2a4e42c input[13] = 0x1e681cb4cab2fd6a05d94ce7c782c88b99efd61d7f107c0217ee3e68b93caf0e input[14] = 0x29fb4d29aa6959097210447d5c0b9f647e456ef8b98545202cc7413c4a61488c input[15] = 0x05c1d2e94629077f1663c5e192b676d1cec99d5b7b5610405d78da31e5ece202 ------------------------------------ ``` against cuda / fft sequential, ther is some bug ``` ------------------------------------ cuda fft result: OK cuda_fft len: 16 cuda fft: value: 0x1a485ecdcf2a38bbacfabe737dbf1d3b9ed52fb0e83a868931c700f9dde6e3b5 value: 0x2611f6aa264596dd37affbb5e308f2c6fe1ef6d71cff0e710d17ba1ad0398f47 value: 0x12d2c2776bf65f1048cae90dcafea207c4ecca379b9261bd38adcc1c0bb2e683 value: 0x2424f59874161f1b8ed219517cf62d951d6f678f46371e550800fe1de8af5787 value: 0x27647dffc0edd6024c6d24674d26ecba5685253a47a3484daaf6e022be25f6e6 value: 0x5940d5bd0590f6a28e2829e39dac24267186a0d89479ba9ce6de5ca91aadeee value: 0x206c6f61b47cd6dea864ab0118cee0f7e23991f321038217cb215dcf92ecd209 value: 0x573b70bf2a3debb2e5ee1f057b3354efba1431c882e35844e73f43860c8967a value: 0x1cf91910ee7f56ae64847f762cf80259737962ae3fb5a02602db177c0b043ee8 value: 0x1ba42aa751f4368847d61bc82949a0b1360bff16dd4919cab06700c29cc232e3 value: 0x29ff34beecdcac1ef2d6137e2332d644829e8d2c0f4b634055604a645723a43d value: 0x11eb3d404a44a4f8d30a332e8afe985118358e9cb396337d19353efd687ae504 value: 0x299d6eee2db3f776334b51b29670caef77d64fe25a2fdf6cdfbe98ae8f7b967e value: 0x27cd8836c1ff06a132bea84a70b3e508e3486bc931e445831e4c2d34bbeb14ed value: 0x1f91db772d2dbe3d62a10d8e58162d19ad50836438aafe5b128a5f0eb4357450 value: 0x3d77a6548651e0937919974899d87b10241b14841c5d0821dfd3e5d22f60355 sequential fft: value: 0xcf8 value: 0x217b562a1fc95e3700ce3ae3e82c88e8b8d57ab39b8539d965d0a82d919a005 value: 0x12b9920aeca4412fa6cf348642a987820d793a9165c970c37d81f4e032a4b094 value: 0x2e23b8869649468dd189d3893c2171eadaff9e66c5fab261829256195ff9d50f value: 0xbd8f12c8af610745bb37a6781382455cd4014027a79adc914812e8e89c53d23 value: 0x14d4612d03078a90de0069dd81abdac34e4dd267ed1d0a5ceb4460f59d148ddc value: 0x2d635a7dc5c2a71cb62dcafabdfbb45858ff481f0d8617bd37716e8770a73fc6 value: 0x1ea6938bd5b57976235cbbd30ba08adda7edd696702774b2b1078eb9364a3926 value: 0x8 value: 0x235b6eb3374bec7bdd91855e46d046758f651d7791da8de964a6df3d8c2fc123 value: 0x4f8504e2947dfb91097c048bfc6c1298d06ed738f29eaceab80683ce0e5c9b2 value: 0x2ca2fc74f53c07ab9ad1c26c3fd5f6c5d6fef1d755117d18ec5086f58154aa7d value: 0x248b5d46563b8fb55c9ccb4f004934075af3d445ff3fc2c82f60c705663ac2ee value: 0x25808de230b9d50afb7f5c6a7f62ffecf3f3f0c7aaf03bd1af9e651814b70589 value: 0x1bb3600ee6b4784e030bcba34296b3b65ce8606cf0f96dd327501f835bce4616 value: 0x18c02c91f7b37725e2badb73798cdc8f11e2ea4371cb66f39d98b04d81525306 ------------------------------------ ``` GHaving to resort to testing this on p = 18446744069414584321 $$ \textrm{2-adicity} = 32 \\ $$ $\textrm{log_power} = \textrm{2-adicity} - order = 28$ $$ \textrm{root} = 1753635133440165772^{(28 + 1)} $$ ``` ------------------------------------ get_powers_of_primitive_root() order : 4 ----------------------------------------------------------- get_primitive_root_of_unity() order = 4 TWO_ADIC_PRIMITVE_ROOT_OF_UNITY --> 1753635133440165772 FE two_adic_primitive_root_of_unity --> FieldElement { value: 1753635133440165772 } FE new 2-adic primitive root of unity : FieldElement { value: 1753635133440165772 } --> FieldElement { value: 1753635133440165772 } F TWO_ADICITY : 32 log_power : 28 ----------------------------------------------------------- get_primitive_root_of_unity() END. root : FieldElement { value: 17293822564807737345 } up_to = 8 state = FieldElement { value: 17293822564807737345 } res = FieldElement { value: 1 } state = FieldElement { value: 18446744069397807105 } res = FieldElement { value: 17293822564807737345 } state = FieldElement { value: 4503599626321920 } res = FieldElement { value: 18446744069397807105 } state = FieldElement { value: 281474976710656 } res = FieldElement { value: 4503599626321920 } state = FieldElement { value: 4096 } res = FieldElement { value: 281474976710656 } state = FieldElement { value: 18446742969902956801 } res = FieldElement { value: 4096 } state = FieldElement { value: 18446744000695107585 } res = FieldElement { value: 18446742969902956801 } state = FieldElement { value: 18446744069414584320 } res = FieldElement { value: 18446744000695107585 } ------------------------------------ get_powers_of_primitive_root() END. twiddles len : 8 twiddle[0] : FieldElement { value: 1 } twiddle[0] : FieldElement { value: 281474976710656 } twiddle[0] : FieldElement { value: 18446744069397807105 } twiddle[0] : FieldElement { value: 18446742969902956801 } twiddle[0] : FieldElement { value: 17293822564807737345 } twiddle[0] : FieldElement { value: 4096 } twiddle[0] : FieldElement { value: 4503599626321920 } twiddle[0] : FieldElement { value: 18446744000695107585 } ------------------------------------ ```