# 2016q3 Homework1 (clz) contributed by <`jeff60907`> [作業說明](https://hackmd.io/s/B1LHefQ6) [閱讀重新理解數值](https://hackmd.io/s/SkKZBXZT) * 比較效能差異 - [ ]recursive version - [x]iteration version - [x]binary search technique - [x]byte-shift version - [ ]Harley’s algorithm #### iteration version ```C int clz(uint32_t x) { int n = 32, c = 16; do { uint32_t y = x >> c; if (y) { n -= c; x = y; } c >>= 1; } while (c); return (n - x); } ``` > 1bit 比對 #### binary search technique ```C int clz(uint32_t x) { if (x == 0) return 32; int n = 0; if (x <= 0x0000FFFF) { n += 16; x <<= 16; } if (x <= 0x00FFFFFF) { n += 8; x <<= 8; } if (x <= 0x0FFFFFFF) { n += 4; x <<= 4; } if (x <= 0x3FFFFFFF) { n += 2; x <<= 2; } if (x <= 0x7FFFFFFF) { n += 1; x <<= 1; } return n; } ``` > 上次上課有介紹過先將32bit拆成對半比較,然後接著依序對照, > 32bit最多比對5次,64bit最多6次 執行次數為 log~2~N #### byte-shift version ```C int clz(uint32_t x) { if (x == 0) return 32; int n = 1; if ((x >> 16) == 0) { n += 16; x <<= 16; } if ((x >> 24) == 0) { n += 8; x <<= 8; } if ((x >> 28) == 0) { n += 4; x <<= 4; } if ((x >> 30) == 0) { n += 2; x <<= 2; } n = n - (x >> 31); return n; } ``` > 先shift bits數再進行比對,可以少了一次 if 判別 #### recursive version ```c= uint8_t clz(uint32_t x) { /* shift upper half down, rest is filled up with 0s */ uint16_t upper = (x >> 16); // mask upper half away uint16_t lower = (x & 0xFFFF); return upper ? clz(upper) : 16 + clz(lower); } ``` > 未修改版 先了解運作模式 > 參考多位同學的recursive,使用一個shift紀錄切割的bit位數 ```c= uint8_t clz_recursive(uint32_t x, int shift) { if(shift == 0) return 0; /* shift upper half down, rest is filled up with 0s */ uint16_t upper = (x >> shift); // mask upper half away uint16_t lower = (x & 0xFFFF); return upper ? clz_recursive(upper, shift>>1) : shift + clz_recursive(lower, shift>>1); } ``` #### Harley’s algorithm ```c= uint8_t clz(uint32_t x) { static prog_uint8_t const Table[] = { 0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF, 16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF, 0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF, 0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF, 14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF, 18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13, 26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25, 0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF, }; /* Propagate leftmost 1-bit to the right */ x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); /* x = x * 0x6EB14F9 */ x = (x << 3) - x; /* Multiply by 7. */ x = (x << 8) - x; /* Multiply by 255. */ x = (x << 8) - x; /* Again. */ x = (x << 8) - x; /* Again. */ return pgm_read_byte(&Table[x >> 26]); } ``` > 未修改版 > [參考<heathcliffYang>, <janetwei>共筆](https://hackmd.io/s/B18nZp1Jl#),了解harly運作 ```c= uint8_t clz_harley(uint32_t x) { const char table[64] = {32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u, u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u, 17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18, 5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u}; /* Propagate leftmost 1-bit to the right */ x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); /* x = x * 0x6EB14F9 */ x = (x << 3) - x; /* Multiply by 7. */ x = (x << 8) - x; /* Multiply by 255. */ x = (x << 8) - x; /* Again. */ x = (x << 8) - x; /* Again. */ return table[x >> 26]; } ``` ## 測試比較效能並驗證 test.c 驗證演算法資料正確 [參考Tempo JiJi](https://hackmd.io/s/ryiig7YT#) ```c= for(int i=1; i<UINT_MAX; i++) { if( __builtin_clz(i) != clz_iteration(i)) printf("%d\n",i); if( __builtin_clz(i) != clz_binary_search(i)) printf("%d\n",i); if( __builtin_clz(i) != clz_binary_shift(i)) printf("%d\n",i); if( __builtin_clz(i) != clz_recursive(i,16)) printf("%d\n",i); if( __builtin_clz(i) != clz_harley(i)) printf("%d\n",i); } printf("all right\n"); ``` 範圍 [1000000 : 5000000] 每次加100 #### iteration version ![](https://i.imgur.com/aN9s6ap.png) #### binary search ![](https://i.imgur.com/aMFx0eb.png) #### byte-shift ![](https://i.imgur.com/wAyVktk.png) #### recursive ![](https://i.imgur.com/U28yUGz.png) #### harley ![](https://i.imgur.com/xH9Tw1L.png) #### 效能比較 ![](https://i.imgur.com/U5RPeKx.png)