2017q3 Homework1 (clz)

# 2017q3 Homework1 (clz) contributed by <`ZixinYang`> ## De Bruijn 數列給定 m 個字母，以及字串長度 k, 這個數列向左做旋轉後, 取出前 k 個字母。恰好可以湊出所有這 k 個字母可能的組合。 ## CLZ ### Recursive Version ```clike= int clz(uint32_t x) { uint32_t upper = (x >> 16); uint32_t lower = (x & 0x0000FFFF); return upper ? -16 + clz(upper) : lower ? -1 + clz(lower >> 1) : 32; } ``` 先將前16個位元和後16個分別存起來, 如果 upper 有值, 則呼叫 clz, 呼叫之後原本存在 upper 的值, 會存在 lower, 然後一直右移, 直到 upper, lower 皆為0, 即回傳32, 並不斷回傳值, 直到產生結果。 ### Iteration Version ```clike= int clz(uint32_t x) { int n = 32, c = 16; do { uint32_t y = x >> c; if (y) { n -= c; x = y; } c >>= 1; } while (c); return (n - x); } ``` 計算要左移幾次才會變 0。算完之後再用這個數有幾位去減。 ### Binary Search Version ```clike= int clz(uint32_t x) { if (x == 0) return 32; int n = 0; if (x <= 0x0000FFFF) { n += 16; x <<= 16; } if (x <= 0x00FFFFFF) { n += 8; x <<= 8; } if (x <= 0x0FFFFFFF) { n += 4; x <<= 4; } if (x <= 0x3FFFFFFF) { n += 2; x <<= 2; } if (x <= 0x7FFFFFFF) { n += 1; x <<= 1; } return n; } ``` 用 bit mask 做 binary search。一個二進位的數可以表示成： $$d_{1} \cdot 16 + d_{2} \cdot 8 + d_{3} \cdot 4 + d_{4} \cdot 2 + d_{5} \cdot 1$$ `if` 只是在判斷 $d_{1}$ ~ $d_{5}$ 哪些為 0, 哪些不為 0 ### Byte-Shift Version ```clike= int clz(uint32_t x) { if (x == 0) return 32; int n = 1; if ((x >> 16) == 0) { n += 16; x <<= 16; } if ((x >> 24) == 0) { n += 8; x <<= 8; } if ((x >> 28) == 0) { n += 4; x <<= 4; } if ((x >> 30) == 0) { n += 2; x <<= 2; } n = n - (x >> 31); return n; } ``` 同理 binary search 的方法 ### Harley's Algorithm ```clike= int clz(uint32_t x) { static char table[64] = { 32, 31, u, 16, u, 30, 3, u, 15, u, u, u, 29, 10, 2, u, u, u, 12, 14, 21, u, 19, u, u, 28, u, 25, u, 9, 1, u, 17, u, 4, u, u, u, 11, u, 13, 22, 20, u, 26, u, u, 18, 5, u, u, 23, u, 27, u, 6, u,24, 7, u, 8, u, 0, u }; x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); x = (x << 3) - x; x = (x << 8) - x; x = (x << 8) - x; x = (x << 8) - x; return table[x >> 26]; } ``` u 不會用到, 可為任意數, 參考[這篇](https://hackmd.io/s/B1p3-506#)的做法 ## 效能測試 ### Output: ``` 100000000 0.000002 100000001 0.000001 100000002 0.000000 100000003 0.000001 100000004 0.000000 100000005 0.000000 100000006 0.000000 100000007 0.000000 100000008 0.000000 100000009 0.000000 100000010 0.000000 100000011 0.000001 100000012 0.000000 100000013 0.000000 100000014 0.000000 100000015 0.000000 100000016 0.000001 100000017 0.000000 100000018 0.000001 100000019 0.000000 100000020 0.000000 100000021 0.000001 100000022 0.000000 100000023 0.000000 ``` ### Gnuplot ![](https://i.imgur.com/WKWtQ7D.png) 每種方法執行時間都是超小的數, 不知道是不是用錯計時的函式了... ```clike= #include <stdio.h> #include <stdint.h> #include <time.h> int clz(uint32_t x) { uint32_t upper = (x >> 16); uint32_t lower = (x & 0x0000FFFF); return upper ? -16 + clz(upper) : lower ? -1 + clz(lower >> 1) : 32; } int main(){ clock_t start, end; FILE *fp; fp = fopen("clz_recursive.txt", "w+"); for(int i = 100000000; i <= 105000000; i++){ start = clock(); int CLZ = clz(i); end = clock(); fprintf(fp, "%d %lf\n", i, (end-start)/(double)(CLOCKS_PER_SEC)); } fclose(fp); return 0; } ```