# 2017q1 Homework1 (clz) contributed by <`Weinux`> ###### tags: `sysprog2017` `w1` `clz` `Weinux` ## 開發環境 * OS:Lubuntu 16.04 LTS * Linux 系統版本: 4.4.0-63-generic * Architecture: x86_64 * CPU 作業模式: 32-bit, 64-bit * Byte Order: Little Endian * CPU: Intel(R) Core(TM) i5-2410M CPU @ 2.30GHz * Model name: Intel(R) Core(TM) i5-2410M CPU @ 2.30GHz * Memory: 6GB (5870MB) * Cache: * L1d cache: 32K * L1i cache: 32K * L2 cache: 256K * L3 cache: 3072K ## CLZ 比較 32-bit 數值對以下實做的效能差異: - 原始效能比較 ```text= $ make $ make plot ``` ![](https://i.imgur.com/wPxJ9hs.png) ### recursive version ```clike= #include "clz2.h" static const int mask[]={0,8,12,14}; static const int magic[]={2,1,0,0}; unsigned clz2(uint32_t x,int c) { if (!x && !c) return 32; uint32_t upper = (x >> (16>>c)); uint32_t lower = (x & (0xFFFF>>mask[c])); if (c == 3) return upper ? magic[upper] : 2 + magic[lower]; return upper ? clz2(upper, c + 1) : (16 >> (c)) + clz2(lower, c + 1); } ``` ### iteration version ```clike= #include "clz.h" static inline __attribute((always_inline)) unsigned clz(uint32_t x) { int n = 32, c = 16; do { uint32_t y = x >> c; if (y) { n -= c; x = y; } c >>= 1; } while (c); return (n - x); } ``` ### binary search technique ```clike= #include "clz.h" static inline __attribute((always_inline)) unsigned clz(uint32_t x) { if (x == 0) return 32; int n = 0; if (x <= 0x0000FFFF) { n += 16; x <<= 16; } if (x <= 0x00FFFFFF) { n += 8; x <<= 8; } if (x <= 0x0FFFFFFF) { n += 4; x <<= 4; } if (x <= 0x3FFFFFFF) { n += 2; x <<= 2; } if (x <= 0x7FFFFFFF) { n += 1; x <<= 1; } return n; } ``` ### byte-shift version ```clike= #include "clz.h" static inline __attribute((always_inline)) unsigned clz(uint32_t x) { if (x == 0) return 32; int n = 1; if ((x >> 16) == 0) { n += 16; x <<= 16; } if ((x >> 24) == 0) { n += 8; x <<= 8; } if ((x >> 28) == 0) { n += 4; x <<= 4; } if ((x >> 30) == 0) { n += 2; x <<= 2; } n = n - (x >> 31); return n; } ``` ### Harley’s algorithm ```clike= #include "clz.h" static inline __attribute((always_inline)) unsigned clz(uint32_t x) { // CTZ table #ifdef CTZ static uint8_t const Table[] = { 0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF, 16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF, 0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF, 0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF, 14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF, 18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13, 26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25, 0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF, }; // CLZ table #else static uint8_t const Table[] ={ 32,31, 0,16, 0,30, 3, 0,15, 0, 0, 0,29,10, 2, 0, 0, 0,12,14,21, 0,19, 0, 0,28, 0,25, 0, 9, 1, 0, 17, 0, 4, 0, 0, 0,11, 0,13,22,20, 0,26, 0, 0,18, 5, 0, 0,23, 0,27, 0, 6,0,24, 7, 0, 8, 0, 0, 0 }; #endif /* Propagate leftmost 1-bit to the right */ x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); /* x = x * 0x6EB14F9 */ x = (x << 3) - x; /* Multiply by 7. */ x = (x << 8) - x; /* Multiply by 255. */ x = (x << 8) - x; /* Again. */ x = (x << 8) - x; /* Again. */ return Table[(x >> 26)]; } ``` ## 參考資料