contributed by <carolc0708
>
uint8_t clz(uint32_t x)
{
/* shift upper half down, rest is filled up with 0s */
uint16_t upper = (x >> 16);
// mask upper half away
uint16_t lower = (x & 0xFFFF);
return upper ? clz(upper) : 16 + clz(lower);
}
uint8_t clz_binary_recursive(uint32_t x, int shift)
{
if(x == 0)
return 32;
if(shift == 0)
return 0;
if(x <= (0xFFFFFFFF >> shift))
return shift + clz_binary_recursive(x << shift, shift / 2);
else
return clz_binary_recursive(x, shift / 2);
}
uint8_t clz_recursive(uint32_t x)
{
return x ? clz_recursive(x>>1) -1 : 32;
}
int clz(uint32_t x) {
int n = 32, c = 16;
do {
uint32_t y = x >> c;
if (y) { n -= c; x = y; }
c >>= 1;
} while (c);
return (n - x);
}
檢查 leading zero 是否在前 16 bit,若是則擠掉後 16 bit 繼續比。接著依序比 8、4、2…
uint8_t clz_iteration(uint32_t x)
{
int n = 0;
while(x)
{
x >>= 1;
n += 1;
}
return 32-n;
}
uint8_t clz(uint32_t x)
{
static prog_uint8_t const Table[] = {
0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF,
16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF,
0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF,
0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF,
14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF,
18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13,
26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25,
0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF,
};
/* Propagate leftmost 1-bit to the right */
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
/* x = x * 0x6EB14F9 */
x = (x << 3) - x; /* Multiply by 7. */
x = (x << 8) - x; /* Multiply by 255. */
x = (x << 8) - x; /* Again. */
x = (x << 8) - x; /* Again. */
return pgm_read_byte(&Table[x >> 26]);
}
修正版:
將 table 改為以下,u 可以為任意值。
const char table[64] =
{32,31, u,16, u,30, 3, u,15, u, u, u,29,10, 2, u,
u, u,12,14,21, u,19, u,u,28, u,25, u, 9, 1, u,
17, u, 4, u, u, u,11, u,13,22,20, u,26, u, u,18,
5, u, u,23, u,27, u, 6,u,24, 7, u, 8, u, 0, u};
int clz(uint32_t x) {
if (x == 0) return 32;
int n = 0;
if (x <= 0x0000FFFF) { n += 16; x <<= 16; }
if (x <= 0x00FFFFFF) { n += 8; x <<= 8; }
if (x <= 0x0FFFFFFF) { n += 4; x <<= 4; }
if (x <= 0x3FFFFFFF) { n += 2; x <<= 2; }
if (x <= 0x7FFFFFFF) { n += 1; x <<= 1; }
return n;
}
先看 leading zero 可能若在哪個範圍,再進去把前面不需要判斷的 bit 都左移擠掉,再縮小範圍去看。
int clz(uint32_t x) {
if (x == 0) return 32;
int n = 1;
if ((x >> 16) == 0) { n += 16; x <<= 16; }
if ((x >> 24) == 0) { n += 8; x <<= 8; }
if ((x >> 28) == 0) { n += 4; x <<= 4; }
if ((x >> 30) == 0) { n += 2; x <<= 2; }
n = n - (x >> 31);
return n;
}
先判斷 x 的 leading zero 是在哪個範圍,若為後半部,則將前半部 bit 左移擠掉繼續判斷。
n = n - (x >> 31)
為判斷最後 1 bit 用。
寫 verify.c
做程式驗證。
使用 Assertion function 做比對驗證
參考 Assertion 用法,在驗證程式中使用 assertion function 比較 __builtin_clz()
和各個 clz 版本的結果是否相同。
Assertions in C
In C, assertions are implemented with the standard assert macro. The argument to assert must be true when the macro is executed, otherwise the program aborts and prints an error message. For example, the assertion
assert( size <= LIMIT );
will abort the program and print an error message like this:
Assertion violation: file tripe.c, line 34: size <= LIMIT
if size is greater than LIMIT.
使用 __builtin_clz(x)
來驗證計算是否正確
剛開始一直出現錯誤,把值一個個印出來才發現 __builtin_clz(0)
是 31,但應該是 32 才正確。
Built-in Function: int __builtin_clz (unsigned int x)
Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
發現 __builtin_clz(0)
的結果是 undefined。
為避免造成錯誤就先不使用,僅檢查 x =1~UINT_MAX 間所有數值結果。
參考 Hacker's Delight這本書當中所提及之應用: