Try   HackMD

2016q3 Homework1 (clz)

contributed by <vic85821>

開發環境

  • Ubuntu 16.04.1 LTS
  • Linux version 4.4.0-38-generic
  • CPU : Intel® Core™ i5-3230M CPU @ 2.60GHz
  • MEM : 8GB
  • L1d cache : 32K
  • L1i cache : 32K
  • L2 cache : 256K
  • L3 cache : 3072K

案例分析

iteration

int clz_iteration(uint32_t x) {
   int n = 32, c = 16;
   do {
        uint32_t y = x >> c;
        if (y) { n -= c; x = y; }
        c >>= 1;
    } while (c);
    return (n - x);
}
int clz_bst(uint32_t x) {
	if (x == 0) return 32;
    int n = 0;
    if (x <= 0x0000FFFF) { n += 16; x <<= 16; }
    if (x <= 0x00FFFFFF) { n += 8; x <<= 8; }
    if (x <= 0x0FFFFFFF) { n += 4; x <<= 4; }
    if (x <= 0x3FFFFFFF) { n += 2; x <<= 2; }
    if (x <= 0x7FFFFFFF) { n += 1; x <<= 1; }
    return n;
} 

將32bit的數字一直砍半並對照,對照次數 = log2N
32 bit 需要比對5次、64bit需要比對6次

byte shift

int clz_byteshift(uint32_t x) {
	if (x == 0) return 32;
    int n = 1;
    if ((x >> 16) == 0) { n += 16; x <<= 16; }
    if ((x >> 24) == 0) { n += 8; x <<= 8; }
    if ((x >> 28) == 0) { n += 4; x <<= 4; }
    if ((x >> 30) == 0) { n += 2; x <<= 2; }
    n = n - (x >> 31);
    return n;
}

與binary search類似的概念

recursive

int clz_recursive(uint32_t x,int len) {

    if(len == 0)
        return 0;

/* shift upper half down, rest is filled up with 0s */
    uint16_t upper = (x >> len); 
	
// mask upper half away
    uint16_t lower = (x & (0xFFFF >> (16 - len)));
	
    return upper ? clz_recursive(upper,len>>1) : len + clz_recursive(lower,len>>1);
}

tail recursive : 一邊計算一邊將結果的值記下來,可以減少遞迴的次數,但需要額外的記憶體空間

Harley algorithm

int clz_Harley(uint32_t x) {
    
    static prog_uint8_t const Table[] = {
      0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF,
      16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF,
      0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF,
      0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF,
      14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF,
      18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13,
      26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25,
      0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF,
    };

    /* Propagate leftmost 1-bit to the right */
    x = x | (x >> 1);
    x = x | (x >> 2);
    x = x | (x >> 4);
    x = x | (x >> 8);
    x = x | (x >> 16);
 
    /* x = x * 0x6EB14F9 */
    x = (x << 3) - x;   /* Multiply by 7. */
    x = (x << 8) - x;   /* Multiply by 255. */
    x = (x << 8) - x;   /* Again. */
    x = (x << 8) - x;   /* Again. */

    return Table[x >> 26];
}

還在找資料Orz!!

正確性&效能分析

1.正確性

    for(uint32_t  i = 0; i < UINT_MAX; i++)
    {
        if(__builtin_clz(i) != clz_iteration(i))
            printf("%d\n",i);
        if(__builtin_clz(i) != clz_bst(i))
            printf("%d\n",i);
        if(__builtin_clz(i) != clz_byteshift(i))
            printf("%d\n",i);
        if(__builtin_clz(i) != clz_recursive(i,16))
            printf("%d\n",i);
        if(__builtin_clz(i) != clz_harley(i))
            printf("%d\n",i);
    }

驗完都正確,除了i = 0時結果不同

2.時間

iteration

binary search

byte sift

recursive

harley

total

畫出來有點奇怪,數序有點太整齊了

參考資料

TempoJiJi HackMD
harley
ktvexe HackMD