contributed by <abba123
>
OS : ubuntu 16.04.1 LTS
我們這次要對計算 clz 的不同種方法做比較
這邊是我們這次的主角
uint8_t clz_iteration(uint32_t x)
{
uint32_t n = 32, c = 16;
do {
uint32_t y = x >> c;
if (y) {
n -= c;
x = y;
}
c >>= 1;
} while (c);
return (n - x);
}
uint8_t clz_binary_search(uint32_t x)
{
if (x == 0) return 32;
uint32_t n = 0;
if (x <= 0x0000FFFF) {
n += 16;
x <<= 16;
}
if (x <= 0x00FFFFFF) {
n += 8;
x <<= 8;
}
if (x <= 0x0FFFFFFF) {
n += 4;
x <<= 4;
}
if (x <= 0x3FFFFFFF) {
n += 2;
x <<= 2;
}
if (x <= 0x7FFFFFFF) {
n += 1;
x <<= 1;
}
return n;
}
uint8_t clz_byte_shift(uint32_t x)
{
if (x == 0) return 32;
uint32_t n = 1;
if ((x >> 16) == 0) {
n += 16;
x <<= 16;
}
if ((x >> 24) == 0) {
n += 8;
x <<= 8;
}
if ((x >> 28) == 0) {
n += 4;
x <<= 4;
}
if ((x >> 30) == 0) {
n += 2;
x <<= 2;
}
n = n - (x >> 31);
return n;
}
uint8_t clz_recursive(uint32_t x,int i)
{
if(i==0) return 0;
if (x==(x<<i)>>i)
return i+clz_recursive(x<<i,i>>1);
else
return clz_recursive(x,i>>1);
}
uint8_t clz(uint32_t x,int i)
{
/* shift upper half down, rest is filled up with 0s */
if(i==0) return 0;
uint16_t upper = (x >> i);
// mask upper half away
uint16_t lower = (x & 0xFFFF);
return upper ? clz(upper,i/2) : i + clz(lower,i/2);
}
uint8_t clz_harley(uint32_t x)
{
static unsigned char Table[] = {
0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF,
16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF,
0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF,
0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF,
14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF,
18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13,
26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25,
0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF,
};
/* Propagate leftmost 1-bit to the right */
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
/* x = x * 0x6EB14F9 */
x = (x << 3) - x; /* Multiply by 7. */
x = (x << 8) - x; /* Multiply by 255. */
x = (x << 8) - x; /* Again. */
x = (x << 8) - x; /* Again. */
return 31-Table[x >> 26];
}
接下來我們針對不同的數字
帶入進行計算 clz 實驗
由於跑一次 clz 的時間太短,所以我決定做 1000000 次然後加總,這樣算一筆數據,我們總共做了 50 筆
這是我們這次畫圖的 gnuplot 腳本
reset
set xlabel 'function'
set xtics add ("iteration" 1)
set xtics add ("binary search" 2)
set xtics add ("byte shift" 3)
set xtics add ("recursive" 4)
set xtics add ("harley" 5)
set ylabel 'sec'
set style fill solid
set title 'perfomance comparison'
set term png enhanced font 'Verdana,10'
set output 'runtime.png'
plot[0:6][:] 'output.txt' using (1):1 title "iteration" with points, \
'output.txt' using (2):2 title "binary search" with points, \
'output.txt' using (3):3 title "byte shift" with points, \
'output.txt' using (4):4 title "recursive" with points, \
'output.txt' using (5):5 title "harley" with points
N=1
N=1000
N=1000000
N=123456789
從這邊我們可以發現到
對於 iteration 來說 N 越大,也就是 clz 越小,計算量比較龐大 binary、byte_shift、recursive 則相反 harley 則是沒什麼變動
有些數值看得出來不合理,嘗試用信賴區間去除不合理數值,會是個比較好的統計
我們把他們合併成一張 gif 比較容易觀察