quiz8

2022-04-04
scottxxxabc

測驗 1

在 Linux 核心原始程式碼中,lib/string.c 具備 memchr 的實作:

/**
 * memchr - Find a character in an area of memory.
 * @s: The memory area
 * @c: The byte to search for
 * @n: The size of the area.
 *
 * returns the address of the first occurrence of @c, or %NULL
 * if @c is not found
 */
void *memchr(const void *s, int c, size_t n)
{
    const unsigned char *p = s;
    while (n-- != 0) {
        if ((unsigned char)c == *p++) {
            return (void *)(p - 1);
        }
    }
    return NULL;
}

利用上述 SIMD within a register (SWAR) 的技巧,我們可改寫為以下 memchr_opt 函式:

#include <stddef.h> #include <stdint.h> #include <limits.h> #include <string.h> /* Nonzero if either X or Y is not aligned on a "long" boundary */ #define UNALIGNED(X) ((long) X & (sizeof(long) - 1)) /* How many bytes are loaded each iteration of the word copy loop */ #define LBLOCKSIZE (sizeof(long)) /* Threshhold for punting to the bytewise iterator */ #define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) #if LONG_MAX == 2147483647L #define DETECT_NULL(X) (((X) -0x01010101) & ~(X) & 0x80808080) #else #if LONG_MAX == 9223372036854775807L /* Nonzero if X (a long int) contains a NULL byte. */ #define DETECT_NULL(X) (((X) -0x0101010101010101) & ~(X) & 0x8080808080808080) #else #error long int is not a 32bit or 64bit type. #endif #endif /* @return nonzero if (long)X contains the byte used to fill MASK. */ #define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK)) void *memchr_opt(const void *src_void, int c, size_t length) { const unsigned char *src = (const unsigned char *) src_void; unsigned char d = c; while (UNALIGNED(src)) { if (!length--) return NULL; if (*src == d) return (void *) src; src++; } if (!TOO_SMALL(length)) { /* If we get this far, we know that length is large and * src is word-aligned. */ /* The fast code reads the source one word at a time and only performs * the bytewise search on word-sized segments if they contain the search * character, which is detected by XORing the word-sized segment with a * word-sized block of the search character and then detecting for the * presence of NULL in the result. */ unsigned long *asrc = (unsigned long *) src; unsigned long mask = d << 8 | d; mask = mask << 16 | mask; for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1) mask = (mask << i) | mask; while (length >= LBLOCKSIZE) { /* XXXXX: Your implementation should appear here */ } /* If there are fewer than LBLOCKSIZE characters left, then we resort to * the bytewise loop. */ src = (unsigned char *) asrc; } while (length--) { if (*src == d) return (void *) src; src++; } return NULL; }
while (length >= LBLOCKSIZE) {
    if(!DETECT_CHAR(*asrc, mask)) {
        length -= LBLOCKSIZE;
        asrc++;
    }    
    else
        break;
}

想法

  • 根據題目提示得知需使用到 DETECT_CHAR 巨集,DETECT_CHAR 巨集的註解寫到此巨集會在參數 X 含有填滿 MASK 的 byte 時回傳非零。

    • 檢查第 20 行的 DETECT_NULL 會在 X 含有 NULL byte 時回傳非零。
    • 兩個輸入相同時, XOR 的輸出為 0,將 MASK 的每個 byte 都以想要搜尋的字元填滿, 如果 X 的第 n 個 byte 等於想要蒐尋的字元, X ^ MASK 的第 n 個 byte 就會是 0 ,可以用剛才的 DETECT_NULL 巨集偵測。
    • memchr_opt 在第 54 ~ 57 行設定 MASK
    • 若是想要蒐尋的字元為 a,以二進位表示為 0110 0001 ,就要把MASK 的每個 byte 都設為 0110 0001,在我的電腦上 long 為 64 bits, MASK 應為
      01100001 01100001 01100001 01100001 01100001 01100001 01100001 01100001
  • 觀察題目的第 59 行,可以發現 while 迴圈每次應該要使用 DETECT_CHAR 來尋找 asrc 是否包含指定字元。

  • 每次迴圈迭代應該要將 asrc 指標往後移動 long 的長度,並將 length 減少 long 的長度