2016q3 Homework1 (clz) === # 目標 - [x] iteration version - [x] recursive version - [x] Harley’s algorithm - [x] binary search technique - [x] byte-shift version - [ ] benchmark suit gnuplot # 程式解說 * 定義unsigned int uint8_t為0~2^8-1 (0x00~0xFF) uint16_t為0~2^16-1 (0x0000~0xFFFF) uint32_t為0~2^32-1 (0x00000000~0xFFFFFFFF) uint64_t為0~2^64-1 (0x0000000000000000~0xFFFFFFFFFFFFFFFF) * 遞迴 :::info 原式 ```javascript uint8_t clz(uint32_t x) { /* shift upper half down, rest is filled up with 0s */ uint16_t upper = (x >> 16); // mask upper half away uint16_t lower = (x & 0xFFFF); return upper ? clz(upper) : 16 + clz(lower); } ``` 修改過後的程式碼,決定多一個參數 count 來計算目前切割的 bit 位數 ```javascript uint8_t clz_recursive(uint32_t x,int count) { count = count / 2; if (count < 1) return 0; uint16_t upper = (x >> count); uint16_t lower = (x & 0xFFFF); return upper ? clz_recursive(upper,count) : count + clz_recursive(lower,count); } ``` ::: * Harley’s algorithm :::info 原式 ```javascript static prog_uint8_t const Table[] = { 0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF, 16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF, 0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF, 0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF, 14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF, 18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13, 26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25, 0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF, }; /* Propagate leftmost 1-bit to the right */ x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); /* x = x * 0x6EB14F9 */ x = (x << 3) - x; /* Multiply by 7. */ x = (x << 8) - x; /* Multiply by 255. */ x = (x << 8) - x; /* Again. */ x = (x << 8) - x; /* Again. */ return pgm_read_byte(&Table[x >> 26]); ``` - [ ] 研究pgm_read_byte ```javascript uint8_t clz_Harley(uint32_t x) { static uint8_t const Table[] = { 0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF, 16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF, 0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF, 0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF, 14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF, 18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13, 26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25, 0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF, }; /* Propagate leftmost 1-bit to the right */ x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); /* x = x * 0x6EB14F9 */ x = (x << 3) - x; /* Multiply by 7. */ x = (x << 8) - x; /* Multiply by 255. */ x = (x << 8) - x; /* Again. */ x = (x << 8) - x; /* Again. */ return Table[x >> 26]; } ``` ::: * 這是loop 10000次的結果 `0.000434 0.0003200.000280 0.000241 0.000229`