2017q3 Homework1 (clz)

contributed by <maskashura>

Reviewed by `jcyztw`

程式執行結果段落怎麼沒有作分析呢？有空趕快補齊內容吧。
在各種 CLZ ( Count Leading Zeros ) 寫法段落，binary.c (binary search) 部份的程式碼中，可以請你解釋為什麼 if statements 會用那些數值(例如：0x0000FFFF)來做判斷？(提示：可與 binary search 作聯想。這個問題我去找老師討論作業時有被問過，當下我整個人愣住，直到老師給我提示才知道怎麼回答XD)

執行環境

lscpu

Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                4
On-line CPU(s) list:   0-3
Thread(s) per core:    2
Core(s) per socket:    2
Socket(s):             1
NUMA node(s):          1
Vendor ID:             GenuineIntel
CPU family:            6
Model:                 142
Model name:            Intel(R) Core(TM) i5-7300U CPU @ 2.60GHz
Stepping:              9
CPU MHz:               1067.541
CPU max MHz:           3500.0000
CPU min MHz:           400.0000
BogoMIPS:              5424.00
Virtualization:        VT-x
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              3072K
NUMA node0 CPU(s):     0-3

各種 CLZ ( Count Leading Zeros ) 寫法

iteration.c

n 為輸入值的位元數中, c 為移動的位元數,會一次依序向右移動16,8,4,2,1 bits , 用類似 binary search 方式做檢查, 若位移後檢查為0則回傳 (n-x),即得二進位值開頭有幾個零

static inline __attribute((always_inline))
unsigned clz(uint32_t x)
{
    int n = 32, c = 16;
    do {
        uint32_t y = x >> c;
        if (y) {
            n -= c;
            x = y;
        }
        c >>= 1;
    } while (c);
    return (n - x);
}

e.g.
輸入 x=0x12345678, 即為二進位值 00010010001101000101011001111000

當 c=16
y=00000000000000000001001000110100
判斷y不為0, 執行下一次位移
當 c=8
y=00000000000000000000000000010010
判斷y不為0, 執行下一次位移
當 c=4
y=00000000000000000000000000000001
判斷y不為0, 執行下一次位移
當 c=2
y=00000000000000000000000000000000
判斷y為0, 執行

if (y) {
n -= c;
x = y;
}

當 c=1
y=00000000000000000000000000000000
判斷y為0, 再次執行

if (y) {
n -= c;
x = y;
}

最後回傳(n - x) = 3 (二進位值開頭有3個零)

binary.c (binary search)

操作手法同 "iteration.c" , 只是把 while 打散成5組 if ,判斸是否在右半邊, 再將 n 值做回傳

static inline __attribute((always_inline))
unsigned clz(uint32_t x)
{
    if (x == 0) return 32;
    int n = 0;
    if (x <= 0x0000FFFF) {
        n += 16;
        x <<= 16;
    }
    if (x <= 0x00FFFFFF) {
        n += 8;
        x <<= 8;
    }
    if (x <= 0x0FFFFFFF) {
        n += 4;
        x <<= 4;
    }
    if (x <= 0x3FFFFFFF) {
        n += 2;
        x <<= 2;
    }
    if (x <= 0x7FFFFFFF) {
        n += 1;
        x <<= 1;
    }
    return n;
}

byte.c (byte shift)

操作方式仍和 "iteration.c" 一樣, 和 binary search 差別只有條件改用 == 判斷

static inline __attribute((always_inline))
unsigned clz(uint32_t x)
{
    if (x == 0) return 32;
    int n = 1;
    if ((x >> 16) == 0) {
        n += 16;
        x <<= 16;
    }
    if ((x >> 24) == 0) {
        n += 8;
        x <<= 8;
    }
    if ((x >> 28) == 0) {
        n += 4;
        x <<= 4;
    }
    if ((x >> 30) == 0) {
        n += 2;
        x <<= 2;
    }
    n = n - (x >> 31);
    return n;
}

recursive.c

為 iterative 的遞迴版本, c 為右移 bits 數

static const int mask[] =  { 0, 8, 12,14 };
static const int magic[] = { 2, 1,  0, 0 };

unsigned clz2(uint32_t x,int c)
{
    if (!x && !c) return 32;

    uint32_t upper = (x >> (16 >> c));
    uint32_t lower = (x & (0xFFFF >> mask[c]));
    if (c == 3) return upper ? magic[upper] : 2 + magic[lower];
    return upper ? clz2(upper, c + 1) : (16 >> (c)) + clz2(lower, c + 1);
/* shift成功代表leading zero的數量不足count，shift之後繼續用更小的count去檢查 shift不成功代表leading zero的數量足夠count，用shift前的值繼續檢查，而count<<1則把該次檢查計算的leading zero數量加上*/
}

harley.c

為一種 Hash table查表法

把最高不為零位之後的位元全部補為1可以算是一種第一次的分類，歸類相同clz數值的數。
之後用一些運算讓各個數產生一對一的對應，之後做表格 -> hash function 的概念應用。
shift 26 bits，也就是用剩下的 6 bits 來表示0~32的 clz 可能值，但總數是64。根據程式碼的觀察結果，我們認為他是用 output 的結果去做 table

static inline __attribute((always_inline))
unsigned clz(uint32_t x)
{
#ifdef CTZ
    static uint8_t const Table[] = {
        0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF,
        16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF,
        0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF,
        0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF,
        14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF,
        18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13,
        26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25,
        0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF,
    };

#else
    static uint8_t const Table[] = {
        32,31, 0,16, 0,30, 3, 0,15, 0, 0, 0,29,10, 2, 0,
        0, 0,12,14,21, 0,19, 0, 0,28, 0,25, 0, 9, 1, 0,
        17, 0, 4, 0, 0, 0,11, 0,13,22,20, 0,26, 0, 0,18,
        5, 0, 0,23, 0,27, 0, 6,0,24, 7, 0, 8, 0, 0, 0
    };
#endif

    /* Propagate leftmost 1-bit to the right */
    x = x | (x >> 1);
    x = x | (x >> 2);
    x = x | (x >> 4);
    x = x | (x >> 8);
    x = x | (x >> 16);
    /*把最高位後的位元全部補為1*/
    
    /* x = x * 0x6EB14F9 */
    x = (x << 3) - x;   /* Multiply by 7. */
    x = (x << 8) - x;   /* Multiply by 255. */
    x = (x << 8) - x;   /* Again. */
    x = (x << 8) - x;   /* Again. */

    return Table[(x >> 26)];
    /*用前面6個位元表示ｌｅａｄｉｎｇ　ｚｅｒｏ有幾個*/
}

程式執行結果

參考資料