Try   HackMD

2022q1 Homework8 (quiz8)

tags: 2022-04-04StevenChou499

測驗 1

利用上述 SIMD within a register (SWAR) 的技巧,我們可改寫為以下 memchr_opt 函式:

#include <stddef.h> #include <stdint.h> #include <limits.h> #include <string.h> /* Nonzero if either X or Y is not aligned on a "long" boundary */ #define UNALIGNED(X) ((long) X & (sizeof(long) - 1)) /* How many bytes are loaded each iteration of the word copy loop */ #define LBLOCKSIZE (sizeof(long)) /* Threshhold for punting to the bytewise iterator */ #define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) #if LONG_MAX == 2147483647L #define DETECT_NULL(X) (((X) -0x01010101) & ~(X) & 0x80808080) #else #if LONG_MAX == 9223372036854775807L /* Nonzero if X (a long int) contains a NULL byte. */ #define DETECT_NULL(X) (((X) -0x0101010101010101) & ~(X) & 0x8080808080808080) #else #error long int is not a 32bit or 64bit type. #endif #endif /* @return nonzero if (long)X contains the byte used to fill MASK. */ #define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK)) void *memchr_opt(const void *src_void, int c, size_t length) { const unsigned char *src = (const unsigned char *) src_void; unsigned char d = c; while (UNALIGNED(src)) { if (!length--) return NULL; if (*src == d) return (void *) src; src++; } if (!TOO_SMALL(length)) { /* If we get this far, we know that length is large and * src is word-aligned. */ /* The fast code reads the source one word at a time and only performs * the bytewise search on word-sized segments if they contain the search * character, which is detected by XORing the word-sized segment with a * word-sized block of the search character and then detecting for the * presence of NULL in the result. */ unsigned long *asrc = (unsigned long *) src; unsigned long mask = d << 8 | d; mask = mask << 16 | mask; for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1) mask = (mask << i) | mask; while (length >= LBLOCKSIZE) { /* XXXXX: Your implementation should appear here */ } /* If there are fewer than LBLOCKSIZE characters left, then we resort to * the bytewise loop. */ src = (unsigned char *) asrc; } while (length--) { if (*src == d) return (void *) src; src++; } return NULL; }

請補完程式碼,使上述 memchr_opt 的實作符合 memchr 行為,作答規範:

  1. 列出 memchr_opt 函式完整程式碼,儘量撰寫程式註解
  2. XXXX 所在的 scope 應該利用 DETECT_CHAR 巨集
  3. 儘量以最精簡的程式碼撰寫

Trace code

為了知道 XXXXX 需要填什麼,我們需要先了解程式碼的內容與用意。

#define UNALIGNED(X) ((long) X & (sizeof(long) - 1))

UNALIGNED() 的作用是確認該字串的地址是否有以 long 的長度來做對齊,若有則會回傳 0 ,反之則會回傳其他整數。

#define LBLOCKSIZE (sizeof(long))

根據不同的 Data Model , long 所需的字節數也不同,因此需要以巨集來定義。

#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)

比較其大小是否超過 long 的長度。

#if LONG_MAX == 2147483647L #define DETECT_NULL(X) (((X) -0x01010101) & ~(X) & 0x80808080) #else #if LONG_MAX == 9223372036854775807L /* Nonzero if X (a long int) contains a NULL byte. */ #define DETECT_NULL(X) (((X) -0x0101010101010101) & ~(X) & 0x8080808080808080) #else #error long int is not a 32bit or 64bit type. #endif #endif

此處的用意為區分 32-bit 與 64-bit 在數字上表現的極限差異,並且其中需要的 DETECT_NULL() 也會因此需要調整。

#define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK))

透過 DETECT_NULL() 去檢查是否擁有該字節,若沒有則會回傳 0 ,反之則不會。

const unsigned char *src = (const unsigned char *) src_void; unsigned char d = c; while (UNALIGNED(src)) { if (!length--) return NULL; if (*src == d) return (void *) src; src++; }

此處為判斷 src 是否有對齊成功,若有則不會進入,若無則會使用例題之方法檢查並回傳地址或是回傳  NULL

if (!TOO_SMALL(length)) { /* If we get this far, we know that length is large and * src is word-aligned. */ /* The fast code reads the source one word at a time and only performs * the bytewise search on word-sized segments if they contain the search * character, which is detected by XORing the word-sized segment with a * word-sized block of the search character and then detecting for the * presence of NULL in the result. */ unsigned long *asrc = (unsigned long *) src; unsigned long mask = d << 8 | d; mask = mask << 16 | mask; for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1) mask = (mask << i) | mask; while (length >= LBLOCKSIZE) { /* XXXXX: Your implementation should appear here */ } /* If there are fewer than LBLOCKSIZE characters left, then we resort to * the bytewise loop. */ src = (unsigned char *) asrc; }

此處是先確認字傳長度夠長,接著定義一個指向 unsigned longasrc ,並且使用想要找到的 d 製作一個 mask ,後面的 for 迴圈是因為會有不同的 Data Model 導致 long 所佔的 bytes 不一樣。

XXXXX 填答:

while 迴圈中就是正式尋找字元 c 的範圍了。因為是需要依照 LBLOCKSIZE 的長度去做尋找,所以我們需要透過 DETECT_CHAR() ,再加上 maskasrc 來尋找,但是由於 asrc 為一指標,所以需要加上 * 。若 DETECT_CHAR(mask, *asrc) 回傳非 0 值,代表在這 8 個 byte 中有找到所需字元 c ,這時必須停止尋找,並跳出迴圈,因此要加上 break 。反之若回傳 0 則代表還沒有找到,則需要跳往下 8 個 byte 繼續尋找,此時必須將 asrc++ 便會一次跳躍 8 個 byte ,並且 length 要減去 LBLOCKSIZE ,這樣才不會已經檢查完了但確還沒有跳出迴圈。

完整程式碼註解

#include <stddef.h>
#include <stdint.h>
#include <limits.h>
#include <string.h>
#include <stdio.h>

/* Nonzero if either X or Y is not aligned on a "long" boundary */
#define UNALIGNED(X) ((long) X & (sizeof(long) - 1))

/* How many bytes are loaded each iteration of the word copy loop */
#define LBLOCKSIZE (sizeof(long))

/* Threshhold for punting to the bytewise iterator */
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)

#if LONG_MAX == 2147483647L // computer is 32-bit
#define DETECT_NULL(X) (((X) -0x01010101) & ~(X) & 0x80808080)
#else
#if LONG_MAX == 9223372036854775807L // computer is 64-bit
/* Nonzero if X (a long int) contains a NULL byte. */
#define DETECT_NULL(X) (((X) -0x0101010101010101) & ~(X) & 0x8080808080808080)
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif

/* @return nonzero if (long)X contains the byte used to fill MASK. */
#define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK))

void *memchr_opt(const void *src_void, int c, size_t length)
{
    const unsigned char *src = (const unsigned char *) src_void;
    unsigned char d = c;

    while (UNALIGNED(src)) { // use normal way to find character
        if (!length--)
            return NULL;
        if (*src == d)
            return (void *) src;
        src++;
    }

    if (!TOO_SMALL(length)) { // the length si long enough
        /* If we get this far, we know that length is large and
         * src is word-aligned.
         */

        /* The fast code reads the source one word at a time and only performs
         * the bytewise search on word-sized segments if they contain the search
         * character, which is detected by XORing the word-sized segment with a
         * word-sized block of the search character and then detecting for the
         * presence of NULL in the result.
         */
        unsigned long *asrc = (unsigned long *) src;
        unsigned long mask = d << 8 | d; // create a mask full of character d
        mask = mask << 16 | mask;
        for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1)
            mask = (mask << i) | mask; // the mask is whether 4 or 8 bytes long

        while (length >= LBLOCKSIZE) {
            if(DETECT_CHAR(mask, *asrc)){ // return non-zero if detect d in *asrc
                break; // if detected, exit the while loop
            }
            asrc++; // if not detected, jump to the next 8 bytes
            length -= LBLOCKSIZE; // subtract length by LBLOCKSIZE
        }

        /* If there are fewer than LBLOCKSIZE characters left, then we resort to
         * the bytewise loop.
         */
        src = (unsigned char *) asrc;
    }

    while (length--) { // continue searching for character d
        if (*src == d)
            return (void *) src; // if found, return the address
        src++; // if not found, jump to the next character
    }

    return NULL; // if all not found, return NULL
}

解答:

while (length >= LBLOCKSIZE) {
    /* XXXXX: Your implementation should appear here */
    if(DETECT_CHAR(mask, *asrc)){
        break;
    }
    asrc++;
    length -= LBLOCKSIZE;
}

測驗 2