contributed by < twzjwang
>
作業說明: B04: clz
github: twzjwang/clz-tests
Ubuntu Ubuntu 16.04.2 LTS
Linux 4.8.0-36-generic
cpu
version: Intel® Core™ i5-3337U CPU @ 1.80GHz
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 4
L1d cache: 32K
L1i cache: 32K
L2 cache: 256K
L3 cache: 3072K
memory
size: 4GiB
1
前面(左邊)有幾個 0
執行make run
make plot
結果
recursive version
static const int mask[]={0,8,12,14};
static const int magic[]={2,1,0,0};
unsigned clz2(uint32_t x,int c)
{
if (!x && !c) return 32;
uint32_t upper = (x >> (16>>c));
uint32_t lower = (x & (0xFFFF>>mask[c]));
if (c == 3) return upper ? magic[upper] : 2 + magic[lower];
return upper ? clz2(upper, c + 1) : (16 >> (c)) + clz2(lower, c + 1);
}
clz2(x,0)
clz2(upper, c+1)
,否則 clz2(lower, c+1)
mask[]
及 magic[]
協助計算iteration version
static inline __attribute((always_inline))
unsigned clz(uint32_t x)
{
int n = 32, c = 16;
do {
uint32_t y = x >> c;
if (y) { n -= c; x = y; }
c >>= 1;
} while (c);
return (n - x);
}
clz(x)
clz2
將 x 分為兩部分
binary search technique
static inline __attribute((always_inline))
unsigned clz(uint32_t x)
{
if (x == 0) return 32;
int n = 0;
if (x <= 0x0000FFFF) { n += 16; x <<= 16; }
if (x <= 0x00FFFFFF) { n += 8; x <<= 8; }
if (x <= 0x0FFFFFFF) { n += 4; x <<= 4; }
if (x <= 0x3FFFFFFF) { n += 2; x <<= 2; }
if (x <= 0x7FFFFFFF) { n += 1; x <<= 1; }
return n;
}
byte-shift version
static inline __attribute((always_inline))
unsigned clz(uint32_t x)
{
if (x == 0) return 32;
int n = 1;
if ((x >> 16) == 0) { n += 16; x <<= 16; }
if ((x >> 24) == 0) { n += 8; x <<= 8; }
if ((x >> 28) == 0) { n += 4; x <<= 4; }
if ((x >> 30) == 0) { n += 2; x <<= 2; }
n = n - (x >> 31);
return n;
}
Harley’s algorithm