contributed by <RayPan
>
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 4
On-line CPU(s) list: 0-3
Thread(s) per core: 2
Core(s) per socket: 2
Socket(s): 1
NUMA node(s): 1
Vendor ID: GenuineIntel
CPU family: 6
Model: 60
Model name: Intel(R) Core(TM) i5-4200H CPU @ 2.80GHz
Stepping: 3
CPU MHz: 2869.015
CPU max MHz: 3400.0000
CPU min MHz: 800.0000
BogoMIPS: 5587.49
Virtualization: VT-x
L1d cache: 32K
L1i cache: 32K
L2 cache: 256K
L3 cache: 3072K
NUMA node0 CPU(s): 0-3
比較 32-bit 數值對以下實做的效能差異:
uint8_t clz_recursive(uint32_t x, int shift_len)
{
if(shift_len == 0) return 0;
/* shift upper half down, rest is filled up with 0s */
uint16_t upper = (x >> shift_len);
// mask upper half away
uint16_t lower = (x & (0xFFFF >> (16 - shift_len)));
return upper ? clz_recursive(upper, shift_len >> 1) : shift_len + clz_recursive(lower, shift_len >> 1);
}
uint8_t clz_iteration(uint32_t x)
{
int n = 32, c = 16;
do {
uint32_t y = x >> c;
if (y) {
n -= c;
x = y;
}
c >>= 1;
} while (c);
return (n - x);
}
uint8_t clz_binary_search(uint32_t x)
{
int n = 0;
if (x == 0) return 32;
if (x <= 0x0000FFFF){
n += 16;
x <<= 16;
}
if (x <= 0x00FFFFFF){
n += 8;
x <<= 8;
}
if (x <= 0x0FFFFFFF){
n += 4;
x <<= 4;
}
if (x <= 0x3FFFFFFF){
n += 2;
x <<= 2;
}
if (x <= 0x7FFFFFFF){
n += 1;
x <<= 1;
}
return n;
}
uint8_t clz_byte_shift(uint32_t x)
{
int n = 1;
if (x == 0) return 32;
if ((x >> 16) == 0) {
n += 16;
x <<= 16;
}
if ((x >> 24) == 0) {
n += 8;
x <<= 8;
}
if ((x >> 28) == 0) {
n += 4;
x <<= 4;
}
if ((x >> 30) == 0) {
n += 2;
x <<= 2;
}
n = n - (x >> 31);
return n;
}
int clz_harley(uint32_t x)
{
static uint8_t const Table[] = {
32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u,
u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u,
17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18,
5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u
};
/* Propagate leftmost 1-bit to the right */
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
/* x = x * 0x6EB14F9 */
x = (x << 3) - x; /* Multiply by 7. */
x = (x << 8) - x; /* Multiply by 255. */
x = (x << 8) - x; /* Again. */
x = (x << 8) - x; /* Again. */
return Table[x >> 26];
}
加工中
參考共筆: TempoJiJi
作業詳細資訊: A04:clz
RayPan
A04 Clz