2024q1 Homework2 (quiz1+2)

contributed by < 404allen404 >

開發環境

$ gcc --version
gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
Copyright (C) 2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

$ lscpu
Architecture:            x86_64
  CPU op-mode(s):        32-bit, 64-bit
  Address sizes:         39 bits physical, 48 bits virtual
  Byte Order:            Little Endian
CPU(s):                  12
  On-line CPU(s) list:   0-11
Vendor ID:               GenuineIntel
  Model name:            Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz
    CPU family:          6
    Model:               165
    Thread(s) per core:  2
    Core(s) per socket:  6
    Socket(s):           1
    Stepping:            2
    CPU max MHz:         5000.0000
    CPU min MHz:         800.0000
    BogoMIPS:            5199.98

quiz1

quiz2

測驗 3

測驗 3 的程式碼先定義了許多巨集和函數，在此先說明它們的功能。

BIT_MASK(nr) 這個巨集用來產生第 nr 個位元為 1 的 mask ， mask 的長度為 BITS_PER_LONG 個位元。要加上 % 的原因是因為要避免產生的 mask 變為 0 。

#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))

BIT_WORD(nr) 這個巨集用來換算 nr 是幾個 word ，所以直接把 nr 除以 BITS_PER_LONG 就好。

#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)

small_const_nbits(nbits) 這個巨集透過 __buildin_constant_p 確認 nbits 是一個常數而不是變數，並且確認 nbits 是一個大於零的數字且小於等於 BITS_PER_LONG 。

想問一個問題，為什麼需要使用 __buildin_constant_p 去確認 nbits 是否為常數呢？這樣會幫助編譯器去最佳化我們的程式嗎？

#define small_const_nbits(nbits) \
    (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0)

GENMASK(h, l) 這個巨集可以分成兩部份來解釋，第一個部份是 ((~0UL) - (1UL << (l)) + 1) ，這一個部份是用來確保第 0 個位元到第 l - 1 個位元是 0 ，而另外一個部份是 (~0UL >> (BITS_PER_LONG - 1 - (h))) ，這個部份用來確保第 l + 1 個位元到最後一個位元的值都是 0 ，最後把這兩個部份做 & 運算，就可以得到從第 l 個位元到第 h 個位元的值都是 1 ，且剩餘的位元都是 0 的 mask 。

#define GENMASK(h, l) \
    (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))

__const_hweight8(w) 這個巨集是用來計算 w 的低 8 個位元有幾個 1 。

#define __const_hweight8(w)                                              \
    ((unsigned int) ((!!((w) & (1ULL << 0))) + (!!((w) & (1ULL << 1))) + \
                     (!!((w) & (1ULL << 2))) + (!!((w) & (1ULL << 3))) + \
                     (!!((w) & (1ULL << 4))) + (!!((w) & (1ULL << 5))) + \
                     (!!((w) & (1ULL << 6))) + (!!((w) & (1ULL << 7)))))

__const_hweight16(w) 這個巨集使用了 __const_hweight8(w) 這個巨集去算出 w 的低 16 個位元有幾個 1 。

#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8))

__const_hweight32(w) 這個巨集使用了 __const_hweight16(w) 這個巨集去算出 w 的低 32 個位元有幾個 1 。

#define __const_hweight32(w) \
    (__const_hweight16(w) + __const_hweight16((w) >> 16))

__const_hweight64(w) 這個巨集使用了 __const_hweight32(w) 這個巨集用來計算 w 總共有幾個位元是 1 。

#define __const_hweight64(w) \
    (__const_hweight32(w) + __const_hweight32((w) >> 32))

hweight_long 這個函數用來計算 w 總共有幾個位元是 1 。

static inline unsigned long hweight_long(unsigned long w)
{
    return __const_hweight64(w);
}

min(x, y) 這個巨集用來得到 x 和 y 之中比較小的那個值。

#define min(x, y) (x) < (y) ? (x) : (y)

BITMAP_LAST_WORD_MASK(nbits) 這個巨集主要是用來得到低 nbits 位元都是 1 的 mask ，剩餘的位元都是 0 。接著做更詳細的討論，假設 nbits 是非負整數，當

64 \geq n b i t s \geq 0

，時，可以得到低 nbits 位元都是 1 的 mask ，而當

n b i t s > 64

時，可以得到低 nbits % 64 個位元都是 1 的 mask 。

#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))

FIND_NTH_BIT 這個巨集用來找從 FETCH 這個位置開始尋找第 num 個位元是 1 的位置。首先先來解釋 for 迴圈的作用，當 size 大於 BITS_PER_LONG 的時候，代表有可能改變 idx 用來得到更後面記憶體中的值，所以才需要這個 for 迴圈找到合適的 idx 值。在 for 迴圈內，透過 hweight_long 得到 tmp 有幾個位元是 1 ，就可以透過 w 是否大於 nr ，知道是否要讓 for 迴圈繼續下一回合，因為如果 w 小於等於 nr 就代表在 FETCH[idx] 的值已經可以找到第 num 的 1 了，可以直接用 goto 跳至 found 這個標籤開始尋找。最後解釋一下 for 迴圈出來後的 if ，這個 if 是用來檢查 sz 是不是 BITS_PER_LONG 的倍數，如果不是的話，就產生一個低 sz % 64 位元都是 1 的 mask ，讓 tmp 只保留低 sz % 64 位元的值，剩下的位元都變成 0 ，這樣才能確保 fns 不會檢查超過 size 的範圍。

#define FIND_NTH_BIT(FETCH, size, num)                          \
    ({                                                          \
        unsigned long sz = (size), nr = (num), idx, w, tmp;     \
                                                                \
        for (idx = 0; (idx + 1) * BITS_PER_LONG <= sz; idx++) { \
            if (idx * BITS_PER_LONG + nr >= sz)                 \
                goto out;                                       \
                                                                \
            tmp = (FETCH);                                      \
            w = hweight_long(tmp);                              \
            if (w > nr)                                         \
                goto found;                                     \
                                                                \
            nr -= w;                                            \
        }                                                       \
                                                                \
        if (sz % BITS_PER_LONG)                              \
            tmp = (FETCH) & BITMAP_LAST_WORD_MASK(sz);          \
    found:                                                      \
        sz = min(idx * BITS_PER_LONG + fns(tmp, nr), sz);       \
    out:                                                        \
        sz;                                                     \
    })

__ffs 這個函數是用來找到一個第一個值為 1 的位元是哪一個。如果 if 裡面檢查到非零，代表在這個檢查的範圍裡面就有 1 出現了，不用再把值往右移檢查，只需要縮小檢查的範圍為當前檢查的範圍的一半就好。所以 AAAA 要填入 0xffffffff 。

/* find first bit in word.
 * @word: The word to search
 *
 * Undefined if no bit exists, so code should check against 0 first.
 */
static inline unsigned long __ffs(unsigned long word)
{
    int num = 0;

#if BITS_PER_LONG == 64
    if ((word & 0xffffffff) == 0) {
        num += 32;
        word >>= 32;
    }
#endif
    if ((word & 0xffff) == 0) {
        num += 16;
        word >>= 16;
    }
    if ((word & 0xff) == 0) {
        num += 8;
        word >>= 8;
    }
    if ((word & 0xf) == 0) {
        num += 4;
        word >>= 4;
    }
    if ((word & 0x3) == 0) {
        num += 2;
        word >>= 2;
    }
    if ((word & 0x1) == 0)
        num += 1;
    return num;
}

__clear_bit 這個函數用來把第 nr 個位元變成 0 ，所以 BBBB 要是 ~mask 才對，這樣才能確保只有第 nr 個位元會變成 0 ，剩下的位元都維持原樣。而第 4 行後面要加 BIT_WORD(nr) 是因為如果 nr 大於等於 BITS_PER_LONG ，就需要加 offset ，因為 p 這個指標變數一次只能操作寬度為 BITS_PER_LONG 位元的值。







static inline void __clear_bit(unsigned long nr, volatile unsigned long *addr)
{
    unsigned long mask = BIT_MASK(nr);
    unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr);

    *p &= ~mask;
}

fns 這個函數用來找到一個 word 裡面第 n 個位元是 1 的位置，透過 __ffs 和 __clear_bit 這兩個函數不斷的尋找和清除，直到 n 變成 0 ，就找到第 n 個位元是 1 的位置了。

/* find N'th set bit in a word
 * @word: The word to search
 * @n: Bit to find
 */
static inline unsigned long fns(unsigned long word, unsigned int n)
{
    while (word) {
        unsigned int bit = __ffs(word);
        if (n-- == 0)
            return bit;
        __clear_bit(bit, &word);
    }

    return BITS_PER_LONG;
}

find_nth_bit 這個函數從 addr 這個位置開始尋找第 n 個位元是 1 的位置，而 size 代表要搜尋的最大範圍。當

n \geq s i z e

的時候，代表不可能找到，所以直接 return size 。接著透過 small_const_nbits 確認 size 的範圍是

0 < s i z e \leq B I T S_P E R_L O N G

，這樣就可以透過 GENMASK 產生一個只有要搜尋的範圍的位元是 1 的 mask ，將 mask 和 *addr 做 & 運算，就可以得到一個把要搜尋的範圍以外的位元都變成 0 的 val ，才可以透過 fns 尋找我們要的位置，如果沒找到，一樣 return size ，如果找到了，就 return fns 的結果。會用到 FIND_NTH_BIT 的狀況是當

s i z e > B I T S_P E R_L O N G

的時候，因為上面的方法只能處理

s i z e \leq B I T S_P E R_L O N G

的狀況。

/* find N'th set bit in a memory region
 * @addr: The address to start the search at
 * @size: The maximum number of bits to search
 * @n: The number of set bit, which position is needed, counting from 0
 *
 * The following is semantically equivalent:
 *      idx = find_nth_bit(addr, size, 0);
 *      idx = find_first_bit(addr, size);
 *
 * Returns the bit number of the N'th set bit.
 * If no such, returns @size.
 */
static inline unsigned long find_nth_bit(const unsigned long *addr,
                                           unsigned long size,
                                           unsigned long n)
{
    if (n >= size)
        return size;

    if (small_const_nbits(size)) {
        unsigned long val = *addr & GENMASK(size - 1, 0);

        return val ? fns(val, n) : size;
    }

    return FIND_NTH_BIT(addr[idx], size, n);
}

了解了 find_nth_bit 這個函數的作用後，我開始尋找在 linux 核心的程式碼哪裡有應用到這個函數。

在 include/linux/cpumask.h 內的 cpumask_nth 函數有使用到 find_nth_bit 這個函數。

/**
 * cpumask_nth - get the Nth cpu in a cpumask
 * @srcp: the cpumask pointer
 * @cpu: the Nth cpu to find, starting from 0
 *
 * Return: >= nr_cpu_ids if such cpu doesn't exist.
 */
static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
{
	return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu));
}

2024q1 Homework2 (quiz1+2)

開發環境

quiz1

quiz2

測驗 3

Read more

2024q1 Homework1 (lab0)