contributed by <ChiuYiTang
>
Ubuntu 16.04.5
Linux 4.4.0-96-generic
gcc version 5.4.0 20160609
$ lscpu
static const int mask[] = { 0, 8, 12,14 };
static const int magic[] = { 2, 1, 0, 0 };
unsigned clz2(uint32_t x,int c)
{
if (!x && !c) return 32;
uint32_t upper = (x >> (16 >> c));
uint32_t lower = (x & (0xFFFF >> mask[c]));
if (c == 3) return upper ? magic[upper] : 2 + magic[lower];
return upper ? clz2(upper, c + 1) : (16 >> (c)) + clz2(lower, c + 1);
}
clz2(x,0)
clz2(upper, c + 1)
,否則計算clz,並呼叫clz2(lower, c + 1)
mask[]
方便計算lower bitsmagic[]
紀錄各階段clz
static inline __attribute((always_inline))
unsigned clz(uint32_t x)
{
int n = 32, c = 16;
do {
uint32_t y = x >> c;
if (y) {
n -= c;
x = y;
}
c >>= 1;
} while (c);
return (n - x);
}
clz(x)
n - x
static inline __attribute((always_inline))
unsigned clz(uint32_t x)
{
if (x == 0) return 32;
int n = 0;
if (x <= 0x0000FFFF) {
n += 16;
x <<= 16;
}
if (x <= 0x00FFFFFF) {
n += 8;
x <<= 8;
}
if (x <= 0x0FFFFFFF) {
n += 4;
x <<= 4;
}
if (x <= 0x3FFFFFFF) {
n += 2;
x <<= 2;
}
if (x <= 0x7FFFFFFF) {
n += 1;
x <<= 1;
}
return n;
}
clz(x)
,初始 counter n = 0
。counter
static inline __attribute((always_inline))
unsigned clz(uint32_t x)
{
if (x == 0) return 32;
int n = 1;
if ((x >> 16) == 0) {
n += 16;
x <<= 16;
}
if ((x >> 24) == 0) {
n += 8;
x <<= 8;
}
if ((x >> 28) == 0) {
n += 4;
x <<= 4;
}
if ((x >> 30) == 0) {
n += 2;
x <<= 2;
}
n = n - (x >> 31);
return n;
}
clz(x)
,初始 counter n = 1
。counter - lowest bit
static inline __attribute((always_inline))
unsigned clz(uint32_t x)
{
#ifdef CTZ
static uint8_t const Table[] = {
0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF,
16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF,
0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF,
0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF,
14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF,
18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13,
26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25,
0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF,
};
#else
static uint8_t const Table[] = {
32,31, 0,16, 0,30, 3, 0,15, 0, 0, 0,29,10, 2, 0,
0, 0,12,14,21, 0,19, 0, 0,28, 0,25, 0, 9, 1, 0,
17, 0, 4, 0, 0, 0,11, 0,13,22,20, 0,26, 0, 0,18,
5, 0, 0,23, 0,27, 0, 6,0,24, 7, 0, 8, 0, 0, 0
};
#endif
/* Propagate leftmost 1-bit to the right */
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
/* x = x * 0x6EB14F9 */
x = (x << 3) - x; /* Multiply by 7. */
x = (x << 8) - x; /* Multiply by 255. */
x = (x << 8) - x; /* Again. */
x = (x << 8) - x; /* Again. */
return Table[(x >> 26)];
}
int highest_bit_position(int x)
{
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return x;
}
input:
00000000 00000000 10000000 00000000
00000000 00000000 11000000 00000000
00000000 00000000 11110000 00000000
00000000 00000000 11111111 00000000
00000000 00000000 11111111 11111111
00000000 00000000 11111111 11111111
input => ctz[index]=> result
1 => ctz[ 1] => 0
2 => ctz[ 5] => 1
4 => ctz[12] => 2
8 => ctz[25] => 3
16 => ctz[53] => 4
32 => ctz[44] => 5
64 => ctz[27] => 6
128 => ctz[57] => 7
256 => ctz[51] => 8
512 => ctz[41] => 9
1024 => ctz[20] => 10
2048 => ctz[42] => 11
4096 => ctz[22] => 12
8192 => ctz[47] => 13
16384 => ctz[32] => 14
32768 => ctz[ 3] => 15
65536 => ctz[ 8] => 16
131072 => ctz[19] => 17
262144 => ctz[40] => 18
524288 => ctz[18] => 19
1048576 => ctz[38] => 20
2097152 => ctz[13] => 21
4194304 => ctz[29] => 22
8388608 => ctz[60] => 23
16777216 => ctz[58] => 24
33554432 => ctz[55] => 25
67108864 => ctz[48] => 26
134217728 => ctz[34] => 27
268435456 => ctz[ 6] => 28
536870912 => ctz[14] => 29
1073741824 => ctz[30] => 30
2147483648 => ctz[62] => 31
input => clz[index]=> result
1 => clz[ 1] => 31
2 => clz[ 5] => 30
4 => clz[12] => 29
8 => clz[25] => 28
16 => clz[53] => 27
32 => clz[44] => 26
64 => clz[27] => 25
128 => clz[57] => 24
256 => clz[51] => 23
512 => clz[41] => 22
1024 => clz[20] => 21
2048 => clz[42] => 20
4096 => clz[22] => 19
8192 => clz[47] => 18
16384 => clz[32] => 17
32768 => clz[ 3] => 16
65536 => clz[ 8] => 15
131072 => clz[19] => 14
262144 => clz[40] => 13
524288 => clz[18] => 12
1048576 => clz[38] => 11
2097152 => clz[13] => 10
4194304 => clz[29] => 9
8388608 => clz[60] => 8
16777216 => clz[58] => 7
33554432 => clz[55] => 6
67108864 => clz[48] => 5
134217728 => clz[34] => 4
268435456 => clz[ 6] => 3
536870912 => clz[14] => 2
1073741824 => clz[30] => 1
2147483648 => clz[62] => 0
感覺可用稀疏矩陣操作進一步簡化空間使用量,但需與時間開銷作取捨。ChiuYiTang
int __builtin_clz (unsigned int x)
gnuplot
來觀察效能差異
int clock_settime(clockid_t clk_id, const struct timespec *tp);
{0 0 0 0};
搬移第一個盤子放到空的柱子上 {0 0 0 1};
搬移第二個盤子放到空的柱子上 {0 0 1 1};
將第一個盤子放到第二個盤子上 {0 0 1 0};
將第三個盤子放到空的柱子上 {0 1 1 0};
將第一個盤子放到第四個盤子上 {0 1 1 1};
將第二個盤子放到第三個盤子上 {0 1 0 1};
將第一個盤子放到第二個盤子上 {0 1 0 0}:
搬移第四個盤子放到空的柱子上 {1 1 0 0};
將第一個盤子放到第四個盤子上 {1 1 0 1};
將第二個盤子放到空的柱子上 {1 1 1 1};
將第一個盤子放到第二個盤子上 {1 1 1 0};
將第三個盤子放到第四個盤子上 {1 0 1 0};
將第一個盤子放到空的柱子上 {1 0 1 1};
將第二個盤子放到第三個盤子上 {1 0 0 1};
將第一個盤子放到第二個盤子上 {1 0 0 0};
重新理解數值
wikipedia:Find first set
How to use assertions in C
wikipedia:Tower of Hanoi
Gray code encoder disc
Manipulating Probability Distribution Functions