第二次討論 - HackMD

## Q: Assume x[i] is a single floating pointing number (“float”) complying with IEEE 754. Calculate the geomean of x[i] without FPU. Ans. int geomean(float *x, int n) // n is the amount of x[i] {} $$ \text{GM} = \sqrt[n]{x_1 \cdot x_2 \cdots x_n} $$ ### 解題想法我的想法是，將輸入的浮點數轉換成定點數，使用定點數進行所有乘法運算，計算整體乘積。最後使用牛頓法求出乘積的 $n$ 次方根，得到近似的幾何平均值。在計算 geomean 之前，需先檢查乘積 $P=\prod x_i$。若 $P<0$ 且 $n$ 為偶數，則實數域中不存在對應的 $n$ 次方根，因此無實數解；只有在 $n$ 為奇數時才可得到實數結果。 ```c int is_geomean_defined(float *x, int n) { int negative_count = 0; int has_zero = 0; for (int i = 0; i < n; i++) { uint32_t bits; memcpy(&bits, &x[i], sizeof(float)); if ((bits >> 31) == 1) { negative_count++; } else if (x[i] == 0.0f) { has_zero = 1; break; } } if (has_zero) return 2; if (negative_count & 1) return (n & 1); return 1; } ``` 以下是我自己設計的 test case : * 一般情形：例如 Case 1，數值皆為正，計算幾何平均的標準情境。 * 負數乘積為正：如 Case 2，負數個數為偶數，結果仍為正數，幾何平均合法。 * 負數乘積為負，且開偶數次方根：如 Case 3，結果為虛數，在實數下不合法。 * 負數乘積為負，但開奇數次方根：如 Case 4，結果為負實數，合法。 * 乘積為 0：如 Case 5，包含 0，定義上 GM 為 0，視為合法，且掠過計算幾何平均。 * 整數完美開根測試：如 Case 6，用來檢驗演算法是否能精確算出 GM。 * 一般浮點數誤差測試：如 Case 7、8，對應實際運算中的誤差情境。 ```c float x[] = {1.0f, 2.0f, 3.0f, 4.0f}; float x2[] = {-2.0f, -3.0f}; float x3[] = {-2.0f, 3.0f}; float x4[] = {-2.0f, 3.0f, 1.0f}; float x5[] = {0.0f, 3.0f, 2.0f}; float x6[] = {2.0f, 8.0f}; float x7[] = {1.5f, 2.0f, 3.0f, 4.0f, -1.57f}; float x8[] = {0.5f, 0.5f, 0.5f}; ``` 先計算有 FPU 的情況下的正確答案： ``` Case 1: Geomean ≈ 2.213364 (FPU) Case 2: Geomean ≈ 2.449490 (FPU) Case 3: Geomean undefined (invalid negatives) Case 4: Geomean ≈ -1.817121 (FPU) Case 5: Geomean = 0.0 (contains zero) Case 6: Geomean ≈ 4.000000 (FPU) Case 7: Geomean ≈ -2.240993 (FPU) Case 8: Geomean ≈ 0.500000 (FPU) ``` Case 3 是負數且開偶數次方根，所以不能做 geomean. ### 整數版本的幾何平均計算（無 FPU）在不具浮點硬體支援的環境下，無法使用 float 或 double 型別進行運算。因此若直接將浮點數近似為整數後處理，會犧牲小數精度，導致結果與實際值偏差。以下為整數版的幾何平均計算流程，使用位元操作解析 IEEE 754 格式，取出 exponent 部分來計算每個值。乘積計算後，再透過牛頓法開 $n$ 次方根。 Reference: [牛頓法](https://hackmd.io/@TonyLinX/r1QzzVbmgl) ```c int int_X_pow(int X, int n) { int result = 1; for (int i = 0; i < n; i++) { result *= X; } if (result==0) { return 1; // 避免除以0 } return result; } int geomean(float *x, int n) { int product = 1; int negative_count = 0; int eps = 1; int MAX_ITR = 30; for (int i = 0; i < n; i++){ uint32_t bits; int xi = 1; memcpy(&bits, &x[i], sizeof(float)); int exponent = ((bits >> 23) & 0xFF) - 127; int sign = (bits >> 31) & 1; if (sign) negative_count++; if (exponent < 0) xi = 1; else xi = 1 << exponent; product *= xi; } int X = product; // 初始值 for (int i = 0; i < MAX_ITR; i++) { int X_next = ((n - 1) * X + (product/ int_X_pow(X, n-1) )) / n; if(llabs(X - X_next) < eps) { break; // 如果 X 已經收斂，則退出迴圈 } X = X_next; } if (negative_count & 1) X = -X; return X; } ``` 實驗結果（與 FPU 計算對比） : ``` Case 1: Geomean ≈ 2.213364 (FPU), 2 (Int), error: 9.639802 % Case 2: Geomean ≈ 2.449490 (FPU), 2 (Int), error: 18.350346 % Case 3: Geomean undefined (invalid negatives) Case 4: Geomean ≈ -1.817121 (FPU), -1 (Int), error: 44.967880 % Case 5: Geomean = 0.0 (contains zero) Case 6: Geomean ≈ 4.000000 (FPU), 4 (Int), error: 0.000000 % Case 7: Geomean ≈ -2.240993 (FPU), -3 (Int), error: 33.869240 % Case 8: Geomean ≈ 0.500000 (FPU), 1 (Int), error: 100.000000 % ``` 可以觀察到，若僅使用整數運算處理幾何平均，一旦實際結果無法開出整數根，都會有誤差。 ### 定點數版本的幾何平均計算（無 FPU） #### 定點數版本需調整以下兩個部分：在牛頓法中，每次迭代會用到 $X^{k-1}_n$。與整數版本不同，我們需要確保每次乘法後仍保持在 Q8.8 格式。這可以透過在每次乘積後進行右移操作來完成： ```diff -int int_X_pow(int X, int n) { - int result = 1; +int64_t int_X_pow_fixed_q8_8(int64_t X, int n) { + + int64_t result = 1LL << 8; + for (int i = 0; i < n; i++) { - result *= X; + result = (result * X) >> 8; } - if (result==0) { - return 1; // 避免除以0 - } - + return result; -} ``` 整數版本僅透過 1 << exponent 取得數值的整數，未考慮 $1.\text{mantissa}$ 的小數部分，因此在浮點數轉換時會產生明顯誤差。相對地，在定點數版本中，將 $1.\text{mantissa}$ 精確地轉換為 Q8.8 格式，可以保留了小數的資訊。這樣的處理方式可顯著降低誤差，使整體結果更接近真實浮點數計算。 ```diff -int geomean(float *x, int n) +float geomean_fixed_q8_8(float *x, int n) { - int product = 1; + int64_t product = 1LL << 8; int negative_count = 0; - int eps = 1; - int MAX_ITR = 30; for (int i = 0; i < n; i++){ uint32_t bits; - int xi = 1; memcpy(&bits, &x[i], sizeof(float)); - int exponent = ((bits >> 23) & 0xFF) - 127; - int sign = (bits >> 31) & 1; + uint8_t sign = (bits >> 31) & 1; + uint8_t exponent = (bits >>23) & 0xFF; + uint32_t mantissa = bits & 0x7FFFFF; + uint32_t value = (1<<8) + (mantissa>>15); - if (sign) negative_count++; - if (exponent < 0) - xi = 1; - else - xi = 1 << exponent; - - product *= xi; - } - int X = product; // 初始值 - for (int i = 0; i < MAX_ITR; i++) { - int X_next = ((n - 1) * X + (product/ int_X_pow(X, n-1) )) / n; - if(llabs(X - X_next) < eps) { - break; + if (sign) negative_count++; + if (expnent > 127) { + value <<= (expnent - 127); + } else if (expnent < 127) { + value >>= (127 - expnent); } - X = X_next; + product = (product * value) >> 8; } - if (negative_count & 1) X = -X; - return X; + int64_t X = product; // 初始值 + const uint8_t MAX_ITR = 30; + const int32_t EPS = 1; + + for(int i = 0; i < MAX_ITR; i++){ + int64_t X_next = ((n - 1) * X + ((product<<8) / int_X_pow_fixed_q8_8(X, n-1))) / n; + if(llabs(X_next - X) < EPS) { + X = X_next; + break; + } + X = X_next; + } + + if (negative_count & 1) + return -(X / 256.0f); + else + return X/256.0f; } ``` 實驗結果（與 FPU 計算對比） : ``` Case 1: Geomean ≈ 2.213364 (FPU), 2 (Int), error: 9.639802 %, Fixed: 2.210938, error_fixed: 0.109624 % Case 2: Geomean ≈ 2.449490 (FPU), 2 (Int), error: 18.350346 %, Fixed: 2.449219, error_fixed: 0.011067 % Case 3: Geomean undefined (invalid negatives) Case 4: Geomean ≈ -1.817121 (FPU), -1 (Int), error: 44.967880 %, Fixed: -1.816406, error_fixed: 0.039316 % Case 5: Geomean = 0.0 (contains zero) Case 6: Geomean ≈ 4.000000 (FPU), 4 (Int), error: 0.000000 %, Fixed: 4.000000, error_fixed: 0.000000 % Case 7: Geomean ≈ -2.240993 (FPU), -3 (Int), error: 33.869240 %, Fixed: -2.238281, error_fixed: 0.120997 % Case 8: Geomean ≈ 0.500000 (FPU), 1 (Int), error: 100.000000 %, Fixed: 0.500000, error_fixed: 0.000000 % ``` 可以觀察到，在使用 Q8.8 格式的定點數實作後，除了整數根的 Case6 之外，其他 Case 都有顯著的改善，誤差都低於 1%，有些甚至達到 0.01% 以下。相較於整數版本因無法保留小數部分導致大幅誤差，定點數能保有部分小數精度，使乘積與開根操作更加穩定且接近真實值。 ![image](https://hackmd.io/_uploads/BJ-Moy3Xel.png) ### 討論 overflow 之前定點數的版本，我是先將所有浮點數轉成 Q8.8 的定點數後，全部相乘在一起得到乘積 $prdouct$ 再去做 geomean 運算，但沒有在每一次相乘過程中限制 $product$ 的大小，所以當 $n$ 很大或 $x[i]$ 很大的時候，在計算 $product$ 的過程就很容易 overflow。這是我新增的 test case 用來測試方法是否會 overflow : * $x_{9}$ 與 $x_{10}$ 是模擬 array 裡的浮點數很大的情況。 * $x_{11}$ 與 $x_{12}$ 是模擬 n 很大的情況。 * $x_{13}$, $x_{14}$, $x_{15}$ 是模擬 $x[i]$ 不都是 $2$ 的冪次以及 n 越來越大的情況下，是否能不 overflow 且保持小數點第六位的精度。 ```c static float x9[] = {1e8f, 1e8f, 1e8f}; static float x10[] = {512.0f, 512.0f, 512.0f, 512.0f, 512.0f, 512.0f, 512.0f}; static float x11[100]; static float x12[1000]; static float x13[10]; static float x14[100]; static float x15[512]; void init_overflow_arrays (void) { for (int i = 0; i < 100; i++) x11[i] = 32.0f; for (int i = 0; i < 1000; i++) x12[i] = 2.0f; for (int i = 0; i < 10; i++) x13[i] = 1.0f + i; for (int i = 0; i < 100; i++) x14[i] = 256.0f + i; for (int i = 0; i < 512; i++) x15[i] = 256.0f + i; } ``` #### 實際遇到 overflow 的部份以下想要討論，哪裡會遇到 overflow 以及具體我改善的想法。 ##### 計算 product 的部份 $$ \text{GM} = \sqrt[n]{x_1 \cdot x_2 \cdots x_n} = \sqrt[n]{product} = \sqrt[n]{P \times 2^{E}} = P^{\frac{1}{n}} \times 2^{\frac{E}{n}} = P^{\frac{1}{n}} \times (2^{a} \times 2^{\frac{r}{n}}) $$ 不再等到計算完所有乘積的結果後，才做幾何平均，而是將原本的 $product$ 分成了 $P$ 跟 $2^{E}$ 分開進行幾何運算。具體來說，在計算 $proudct$ 的過程會多兩次檢查，第一次檢查是檢查 $x[i]$ 是否落在我們控制的範圍內，如果超過，就右移 1 bit 以及 `E++`，這樣可以確保不會因為中途有超大的 $x[i]$，導致 overflow。而第二次的檢查是每一次 $P$ 乘完 $x[i]$ 之後，會檢查 $P$ 是否落在我們控制的範圍內，如果超過，就右移 1 bit 以及 `E++`，這樣可以確保 $P$ 不會越來越大，所以最後會得到一個值為 $P \times 2^{E}$，再拿這個值去做牛頓開 n 次方根。當你的 $n$ 或 $x[i]$ 夠大的情況下，你的 $E$ 也會很大，所以我們無法直接去做 `1 << E` ，於是拆分成整數的部份 $2^{a}$ 以及小數的部份 $2^{\frac{r}{n}}$，例如：$2^{\frac{513}{512}} = 2^{1} + 2^{\frac{1}{512}}$。 ``` Case 9: Value: 4093640704, Product: 4093640704 Value: 4093640704, Product: -6597069766656000 Value: 4093640704, Product: 0 Case 10: Value[0]: 131072, Product: 131072 Value[1]: 131072, Product: 67108864 Value[2]: 131072, Product: 34359738368 Value[3]: 131072, Product: 17592186044416 Value[4]: 131072, Product: 9007199254740992 Value[5]: 131072, Product: 0 Value[6]: 131072, Product: 0 Case 11: Value[0]: 8192, Product: 8192 Value[1]: 8192, Product: 262144 Value[2]: 8192, Product: 8388608 Value[3]: 8192, Product: 268435456 Value[4]: 8192, Product: 8589934592 Value[5]: 8192, Product: 274877906944 Value[6]: 8192, Product: 8796093022208 Value[7]: 8192, Product: 281474976710656 Value[8]: 8192, Product: 9007199254740992 Value[9]: 8192, Product: 0 ``` ##### 計算 $2^{\frac{r}{n}}$ 的部份理論上，可以直接透過 `1 << r` 得到根號內的數值，再去做牛頓法開 $n$ 次方根，但在 Case 15 ，會有 overflow 的問題。例如：在 Case 15 中，$n$ 會是 512 ，而 $r$ 是 463，`1 << 463` 會超過 int64_t 可表達的範圍。可以先透過直式除法將 ${\frac{r}{n}}$ 拆成許多可表達的數值，例如：$2^{\frac{7}{8}} = 2^{\frac{1}{2}} \times 2^{\frac{1}{4}} \times 2^{\frac{1}{8}}$，轉成二進制會變成 $0.111_{2}$，將這些許多可表達的數字相乘可以得到近似於 $2^{\frac{r}{n}}$ 的值。 $$ \frac{r}{n} = 0.b_1b_2b_3\dots_2=\sum \frac{b_i}{2^i} , b_i\in\{0,1\} $$ 先看過去的結果，可以觀察到過去的寫法都會 overflow，無法完成 Case9 ~ Case12 : ``` Case 1: Geomean ≈ 2.213364 (FPU), 2 (Int), error: 9.639802 %, Fixed: 2.210938, error_fixed: 0.109624 % Case 2: Geomean ≈ 2.449490 (FPU), 2 (Int), error: 18.350346 %, Fixed: 2.449219, error_fixed: 0.011067 % Case 3: Geomean undefined (invalid negatives) Case 4: Geomean ≈ -1.817121 (FPU), -1 (Int), error: 44.967876 %, Fixed: -1.816406, error_fixed: 0.039310 % Case 5: Geomean = 0.0 (contains zero) Case 6: Geomean ≈ 4.000000 (FPU), 4 (Int), error: 0.000000 %, Fixed: 4.000000, error_fixed: 0.000000 % Case 7: Geomean ≈ -2.240993 (FPU), -3 (Int), error: 33.869255 %, Fixed: -2.238281, error_fixed: 0.120986 % Case 8: Geomean ≈ 0.500000 (FPU), 1 (Int), error: 100.000000 %, Fixed: 0.500000, error_fixed: 0.000000 % Case 9: Geomean ≈ 100000000.000000 (FPU), 0 (Int), error: 100.000000 %, Fixed: 0.000000, error_fixed: 100.000000 % Case 10: Geomean ≈ 512.000000 (FPU), 0 (Int), error: 100.000000 %, Fixed: 0.000000, error_fixed: 100.000000 % Case 11: Geomean ≈ 32.000000 (FPU), 0 (Int), error: 100.000000 %, Fixed: 0.691406, error_fixed: 97.839355 % Case 12: Geomean ≈ 2.000000 (FPU), 0 (Int), error: 100.000000 %, Fixed: 0.882812, error_fixed: 55.859375 % Case 13: Geomean ≈ 4.528728 (FPU), 524288 (Int), error: 11576836.000000 %, Fixed: 17092.140625, error_fixed: 377315.875000 % Case 14: Geomean ≈ 304.128235 (FPU), 0 (Int), error: 100.000000 %, Fixed: 0.691406, error_fixed: 99.772659 % Case 15: Geomean ≈ 488.833832 (FPU), 0 (Int), error: 100.000000 %, Fixed: 0.882812, error_fixed: 99.819405 % ``` #### 具體修改的地方這裡是討論實際實做的內容 ##### 修改 product 的部份 $$ \text{GM} = \sqrt[n]{x_1 \cdot x_2 \cdots x_n} = \sqrt[n]{product} = \sqrt[n]{P \times 2^{E}} = P^{\frac{1}{n}} \times 2^{\frac{E}{n}} = P^{\frac{1}{n}} \times (2^{a} \times 2^{\frac{r}{n}}) $$ 過去定點數的版本，會先計算出最後的乘積 $product$，再去做牛頓法開 n 次方根，但這樣容易 overflow，所以作了以下調整 : * 將 $prodcut$ 分成 $P \times 2^{E}$ * 檢查 $x[i]$ `(value)` 是否落在我們控制的範圍內 `(Q8_SCALE_FACTOR * 2)`，避免突然的超大值，造成 overflow。 * 相乘完得到新的 $P$，會再檢查新的 $P$ 是否落在控制範圍內 `(63 - Q8_SCALE_FACTOR_BITS - 1)`，確保乘上 $x[i+1]$ 不會 overflow。 ```diff +#define SCALE_BITS 8 +#define SCALE_FACTOR (1 << SCALE_BITS) -float geomean_fixed_q8_8(float *x, int n) +float geomean_precise(float *x, int n) { - int64_t product = 1LL << 8; + int64_t P = 1LL << SCALE_BITS; // P 代表是 product + int64_t R= 0; + + int E = 0; int negative_count = 0; + for (int i = 0; i < n; i++){ uint32_t bits; uint8_t sign = (bits >> 31) & 1; uint8_t expnent = (bits >>23) & 0xFF; uint32_t mantissa = bits & 0x7FFFFF; - uint32_t value = (1<<8) + (mantissa>>15); - + uint32_t value = (1 << SCALE_BITS) + (mantissa >> ((SCALE_BITS < 23) ? (23 - SCALE_BITS) : 0)); + int32_t val = value; + int64_t Pb = P; + + if (sign) negative_count++; - if (expnent > 127) { - value <<= (expnent - 127); - } else if (expnent < 127) { - value >>= (127 - expnent); + E += expnent - 127; + + while(value >= (SCALE_FACTOR * 2)) { + value >>= 1; + E++; } - product = (product * value) >> 8; - - } + P = (P * value) >> SCALE_BITS; + + while(P >= (1LL << (63 - SCALE_BITS - 2))) { + P >>= 1; + E += 1; + } + + } ``` ##### 修改 $2^{\frac{r}{n}}$ 的部份計算完所有的 $x[i]$ 後，將 $P$ 與 $2^{E}$ 分開做牛頓法得到 $n$ 次方根。在計算 $2^{E}$ 的牛頓法，由於在 $n$ 很大的情況下，$E$ 也很大，不能直接 `1 << E`，所以將 $E$ 的部分拆成整數的部份 $2^{a}$ 以及小數的部分 $2^{\frac{r}{n}}$，例如 : $2^{\frac{975}{512}} = 2^{1} \times 2^{\frac{463}{512}}$。整數的部分只需 shift bit 即可完成，但小數的部份須透過直式除法將 ${\frac{r}{n}}$ 拆成許多可表達的數值，例如：$2^{\frac{7}{8}} = 2^{\frac{1}{2}} \times 2^{\frac{1}{4}} \times 2^{\frac{1}{8}}$，轉成二進制會變成 $0.111_{2}$，將這些許多可表達的數字相乘可以得到近似於 $2^{\frac{r}{n}}$ 的值。那這些數值 $2^{\frac{1}{2}}$, $2^{\frac{1}{4}}$, $2^{\frac{1}{8}}$ 可以先建立表格存起來，在計算過程只要透過查表即可完成。 ```diff +int64_t X = 1LL; +int64_t P_X; //product 的初始值 x +int64_t E_X = 1; // exponent 的初始值 x +int64_t a = E / n; +int64_t r = E % n; +int64_t X_r = 0; +int count = r/20; -int64_t X; +P_X = newton_precise(P, n); -X = newton_q8_8(product, n); +if (r) { + E_X = pow2_frac_q23(r, n, 18, SCALE_BITS); + X = (P_X * E_X); + X >>= SCALE_BITS; +} +else + X = P_X; + +if (a > 0) X = X << a; +else X = X >> -a; if (negative_count & 1) X = -X; -return X / 256.0f; +return (X) / (float)SCALE_FACTOR; ``` 以下是我建立的表格： ```c /* ---- Q24.8 (scale = 1<<8) ---- */ static const int64_t k_tbl_8[18] = { 362, 304, 279, 267, 262, 259, 257, 257, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256 }; int64_t pow2_frac_q23(uint32_t p, uint32_t q, int B, int SCALE_BIT) { int64_t res = 1 << SCALE_BIT; for (int i = 0; i < B; ++i) { p <<= 1; if (p >= q) { p -= q; if (SCALE_BIT == 23) res = (res * k_tbl_23[i]) >> SCALE_BIT; else if (SCALE_BIT == 16) res = (res * k_tbl_16[i]) >> SCALE_BIT; else if (SCALE_BIT == 8) res = (res * k_tbl_16[i]) >> SCALE_BIT; else if (SCALE_BIT == 4) res = (res * k_tbl_16[i]) >> SCALE_BIT; } if (p == 0) break; } return res; } ``` 實驗結果 : ``` Case 1: Geomean ≈ 2.213364 (FPU), 2 (Int), Precise: 2.2109375, error_precise: 0.1096243 % Case 2: Geomean ≈ 2.449490 (FPU), 2 (Int), Precise: -1.8046875, error_precise: 0.6842172 % Case 5: Geomean = 0.0 (contains zero) Case 4: Geomean ≈ -1.817121 (FPU), -1 (Int), Precise: -1.8171167, error_precise: 0.0002099 % Case 6: Geomean ≈ 4.000000 (FPU), 4 (Int), Precise: 4.0000000, error_precise: 0.0000000 % Case 7: Geomean ≈ -2.240993 (FPU), -3 (Int), Precise: -2.2265625, error_precise: 0.6439131 % Case 8: Geomean ≈ 0.500000 (FPU), 1 (Int), Precise: 0.5000000, error_precise: 0.0000000 % Case 9: Geomean ≈ 100000000.000000 (FPU), 0 (Int), Precise: 99876864.0000000, error_precise: 0.1231360 % Case 10: Geomean ≈ 512.000000 (FPU), 0 (Int), Precise: 512.0000000, error_precise: 0.0000000 % Case 11: Geomean ≈ 32.000000 (FPU), 0 (Int), Precise: 32.0000000, error_precise: 0.0000000 % Case 12: Geomean ≈ 2.000000 (FPU), 0 (Int), Precise: 2.0000000, error_precise: 0.0000000 % Case 13: Geomean ≈ 4.528728 (FPU), 524288 (Int), Precise: 4.4843750, error_precise: 0.9793805 % Case 14: Geomean ≈ 304.128235 (FPU), 0 (Int), Precise: 303.0000000, error_precise: 0.3709734 % Case 15: Geomean ≈ 488.833832 (FPU), 0 (Int), Precise: 487.0000000, error_precise: 0.3751442 % ``` 由此圖可以觀察到，定點數的版本 (Fixed) 在 Case 1 ~ Case 8 表現很好，但在 Case 9 ~ Case 15 都會 oveflow。經過上述的改善後 (Precise)，不但不會 overflow，而且誤差律都可以降到 $1\%$ 以下。 ![image](https://hackmd.io/_uploads/HyeTWX0CXle.png) 關於精細度的問題可以使用更多位數的定點數使結果更佳精準，例如 Q16 或 Q23。可以觀察到使用 Q23 的誤差率遠低於 Q8。 ![image](https://hackmd.io/_uploads/rydBMAR7xl.png)