2016q3 Homework1 (clz) === contributed by `<vic85821>` ## 開發環境 * Ubuntu 16.04.1 LTS * Linux version 4.4.0-38-generic * CPU : Intel® Core™ i5-3230M CPU @ 2.60GHz * MEM : 8GB * L1d cache : 32K * L1i cache : 32K * L2 cache : 256K * L3 cache : 3072K ## 案例分析 ### iteration ```c int clz_iteration(uint32_t x) { int n = 32, c = 16; do { uint32_t y = x >> c; if (y) { n -= c; x = y; } c >>= 1; } while (c); return (n - x); } ``` ### binary search ```c int clz_bst(uint32_t x) { if (x == 0) return 32; int n = 0; if (x <= 0x0000FFFF) { n += 16; x <<= 16; } if (x <= 0x00FFFFFF) { n += 8; x <<= 8; } if (x <= 0x0FFFFFFF) { n += 4; x <<= 4; } if (x <= 0x3FFFFFFF) { n += 2; x <<= 2; } if (x <= 0x7FFFFFFF) { n += 1; x <<= 1; } return n; } ``` 將32bit的數字一直砍半並對照,對照次數 = log~2~N 32 bit 需要比對5次、64bit需要比對6次 ### byte shift ```c int clz_byteshift(uint32_t x) { if (x == 0) return 32; int n = 1; if ((x >> 16) == 0) { n += 16; x <<= 16; } if ((x >> 24) == 0) { n += 8; x <<= 8; } if ((x >> 28) == 0) { n += 4; x <<= 4; } if ((x >> 30) == 0) { n += 2; x <<= 2; } n = n - (x >> 31); return n; } ``` 與binary search類似的概念 ### recursive ```c int clz_recursive(uint32_t x,int len) { if(len == 0) return 0; /* shift upper half down, rest is filled up with 0s */ uint16_t upper = (x >> len); // mask upper half away uint16_t lower = (x & (0xFFFF >> (16 - len))); return upper ? clz_recursive(upper,len>>1) : len + clz_recursive(lower,len>>1); } ``` tail recursive : 一邊計算一邊將結果的值記下來,可以減少遞迴的次數,但需要額外的記憶體空間 ### Harley algorithm ```c int clz_Harley(uint32_t x) { static prog_uint8_t const Table[] = { 0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF, 16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF, 0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF, 0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF, 14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF, 18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13, 26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25, 0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF, }; /* Propagate leftmost 1-bit to the right */ x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); /* x = x * 0x6EB14F9 */ x = (x << 3) - x; /* Multiply by 7. */ x = (x << 8) - x; /* Multiply by 255. */ x = (x << 8) - x; /* Again. */ x = (x << 8) - x; /* Again. */ return Table[x >> 26]; } ``` 還在找資料Orz!! ## 正確性&效能分析 ### 1.正確性 ```c for(uint32_t i = 0; i < UINT_MAX; i++) { if(__builtin_clz(i) != clz_iteration(i)) printf("%d\n",i); if(__builtin_clz(i) != clz_bst(i)) printf("%d\n",i); if(__builtin_clz(i) != clz_byteshift(i)) printf("%d\n",i); if(__builtin_clz(i) != clz_recursive(i,16)) printf("%d\n",i); if(__builtin_clz(i) != clz_harley(i)) printf("%d\n",i); } ``` 驗完都正確,除了i = 0時結果不同 ### 2.時間 **iteration** ![](https://i.imgur.com/Ap6f7Nr.png) **binary search** ![](https://i.imgur.com/2LksZAY.png) **byte sift** ![](https://i.imgur.com/Mopenve.png) **recursive** ![](https://i.imgur.com/tbAFo0V.png) **harley** ![](https://i.imgur.com/cPoTZyP.png) **total** ![](https://i.imgur.com/Z8aZEBe.png) 畫出來有點奇怪,數序有點太整齊了 ## 參考資料 [TempoJiJi HackMD](https://hackmd.io/s/ryiig7YT) [harley](http://www.hackersdelight.org/hdcodetxt/nlz.c.txt) [ktvexe HackMD](https://hackmd.io/GYJghsCsDMDGCmBaekQA5EBZrxIs0ADHtAOyYCM0mI8AbCKbEA==)