2022q1 Homework8 (quiz8)

tags: `2022-04-04` 、 `StevenChou499`

測驗 `1`

利用上述 SIMD within a register (SWAR) 的技巧，我們可改寫為以下 memchr_opt 函式:












































































#include <stddef.h>
#include <stdint.h>
#include <limits.h>
#include <string.h>

/* Nonzero if either X or Y is not aligned on a "long" boundary */
#define UNALIGNED(X) ((long) X & (sizeof(long) - 1))

/* How many bytes are loaded each iteration of the word copy loop */
#define LBLOCKSIZE (sizeof(long))

/* Threshhold for punting to the bytewise iterator */
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)

#if LONG_MAX == 2147483647L
#define DETECT_NULL(X) (((X) -0x01010101) & ~(X) & 0x80808080)
#else
#if LONG_MAX == 9223372036854775807L
/* Nonzero if X (a long int) contains a NULL byte. */
#define DETECT_NULL(X) (((X) -0x0101010101010101) & ~(X) & 0x8080808080808080)
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif

/* @return nonzero if (long)X contains the byte used to fill MASK. */
#define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK))

void *memchr_opt(const void *src_void, int c, size_t length)
{
    const unsigned char *src = (const unsigned char *) src_void;
    unsigned char d = c;

    while (UNALIGNED(src)) {
        if (!length--)
            return NULL;
        if (*src == d)
            return (void *) src;
        src++;
    }

    if (!TOO_SMALL(length)) {
        /* If we get this far, we know that length is large and
         * src is word-aligned.
         */

        /* The fast code reads the source one word at a time and only performs
         * the bytewise search on word-sized segments if they contain the search
         * character, which is detected by XORing the word-sized segment with a
         * word-sized block of the search character and then detecting for the
         * presence of NULL in the result.
         */
        unsigned long *asrc = (unsigned long *) src;
        unsigned long mask = d << 8 | d;
        mask = mask << 16 | mask;
        for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1)
            mask = (mask << i) | mask;

        while (length >= LBLOCKSIZE) {
            /* XXXXX: Your implementation should appear here */
        }

        /* If there are fewer than LBLOCKSIZE characters left, then we resort to
         * the bytewise loop.
         */
        src = (unsigned char *) asrc;
    }

    while (length--) {
        if (*src == d)
            return (void *) src;
        src++;
    }

    return NULL;
}

請補完程式碼，使上述 memchr_opt 的實作符合 memchr 行為，作答規範:

列出 memchr_opt 函式完整程式碼，儘量撰寫程式註解
XXXX 所在的 scope 應該利用 DETECT_CHAR 巨集
儘量以最精簡的程式碼撰寫

Trace code

為了知道 XXXXX 需要填什麼，我們需要先了解程式碼的內容與用意。


#define UNALIGNED(X) ((long) X & (sizeof(long) - 1))

UNALIGNED() 的作用是確認該字串的地址是否有以 long 的長度來做對齊，若有則會回傳 0 ，反之則會回傳其他整數。


#define LBLOCKSIZE (sizeof(long))

根據不同的 Data Model ， long 所需的字節數也不同，因此需要以巨集來定義。


#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)

比較其大小是否超過 long 的長度。










#if LONG_MAX == 2147483647L
#define DETECT_NULL(X) (((X) -0x01010101) & ~(X) & 0x80808080)
#else
#if LONG_MAX == 9223372036854775807L
/* Nonzero if X (a long int) contains a NULL byte. */
#define DETECT_NULL(X) (((X) -0x0101010101010101) & ~(X) & 0x8080808080808080)
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif

此處的用意為區分 32-bit 與 64-bit 在數字上表現的極限差異，並且其中需要的 DETECT_NULL() 也會因此需要調整。


#define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK))

透過 DETECT_NULL() 去檢查是否擁有該字節，若沒有則會回傳 0 ，反之則不會。










    const unsigned char *src = (const unsigned char *) src_void;
    unsigned char d = c;

    while (UNALIGNED(src)) {
        if (!length--)
            return NULL;
        if (*src == d)
            return (void *) src;
        src++;
    }

此處為判斷 src 是否有對齊成功，若有則不會進入，若無則會使用例題之方法檢查並回傳地址或是回傳　 NULL 。


























    if (!TOO_SMALL(length)) {
        /* If we get this far, we know that length is large and
         * src is word-aligned.
         */

        /* The fast code reads the source one word at a time and only performs
         * the bytewise search on word-sized segments if they contain the search
         * character, which is detected by XORing the word-sized segment with a
         * word-sized block of the search character and then detecting for the
         * presence of NULL in the result.
         */
        unsigned long *asrc = (unsigned long *) src;
        unsigned long mask = d << 8 | d;
        mask = mask << 16 | mask;
        for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1)
            mask = (mask << i) | mask;

        while (length >= LBLOCKSIZE) {
            /* XXXXX: Your implementation should appear here */
        }

        /* If there are fewer than LBLOCKSIZE characters left, then we resort to
         * the bytewise loop.
         */
        src = (unsigned char *) asrc;
    }

此處是先確認字傳長度夠長，接著定義一個指向 unsigned long 的 asrc ，並且使用想要找到的 d 製作一個 mask ，後面的 for 迴圈是因為會有不同的 Data Model 導致 long 所佔的 bytes 不一樣。

`XXXXX` 填答：

在 while 迴圈中就是正式尋找字元 c 的範圍了。因為是需要依照 LBLOCKSIZE 的長度去做尋找，所以我們需要透過 DETECT_CHAR() ，再加上 mask 與 asrc 來尋找，但是由於 asrc 為一指標，所以需要加上 * 。若 DETECT_CHAR(mask, *asrc) 回傳非 0 值，代表在這 8 個 byte 中有找到所需字元 c ，這時必須停止尋找，並跳出迴圈，因此要加上 break 。反之若回傳 0 則代表還沒有找到，則需要跳往下 8 個 byte 繼續尋找，此時必須將 asrc++ 便會一次跳躍 8 個 byte ，並且 length 要減去 LBLOCKSIZE ，這樣才不會已經檢查完了但確還沒有跳出迴圈。

完整程式碼註解

#include <stddef.h>
#include <stdint.h>
#include <limits.h>
#include <string.h>
#include <stdio.h>

/* Nonzero if either X or Y is not aligned on a "long" boundary */
#define UNALIGNED(X) ((long) X & (sizeof(long) - 1))

/* How many bytes are loaded each iteration of the word copy loop */
#define LBLOCKSIZE (sizeof(long))

/* Threshhold for punting to the bytewise iterator */
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)

#if LONG_MAX == 2147483647L // computer is 32-bit
#define DETECT_NULL(X) (((X) -0x01010101) & ~(X) & 0x80808080)
#else
#if LONG_MAX == 9223372036854775807L // computer is 64-bit
/* Nonzero if X (a long int) contains a NULL byte. */
#define DETECT_NULL(X) (((X) -0x0101010101010101) & ~(X) & 0x8080808080808080)
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif

/* @return nonzero if (long)X contains the byte used to fill MASK. */
#define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK))

void *memchr_opt(const void *src_void, int c, size_t length)
{
    const unsigned char *src = (const unsigned char *) src_void;
    unsigned char d = c;

    while (UNALIGNED(src)) { // use normal way to find character
        if (!length--)
            return NULL;
        if (*src == d)
            return (void *) src;
        src++;
    }

    if (!TOO_SMALL(length)) { // the length si long enough
        /* If we get this far, we know that length is large and
         * src is word-aligned.
         */

        /* The fast code reads the source one word at a time and only performs
         * the bytewise search on word-sized segments if they contain the search
         * character, which is detected by XORing the word-sized segment with a
         * word-sized block of the search character and then detecting for the
         * presence of NULL in the result.
         */
        unsigned long *asrc = (unsigned long *) src;
        unsigned long mask = d << 8 | d; // create a mask full of character d
        mask = mask << 16 | mask;
        for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1)
            mask = (mask << i) | mask; // the mask is whether 4 or 8 bytes long

        while (length >= LBLOCKSIZE) {
            if(DETECT_CHAR(mask, *asrc)){ // return non-zero if detect d in *asrc
                break; // if detected, exit the while loop
            }
            asrc++; // if not detected, jump to the next 8 bytes
            length -= LBLOCKSIZE; // subtract length by LBLOCKSIZE
        }

        /* If there are fewer than LBLOCKSIZE characters left, then we resort to
         * the bytewise loop.
         */
        src = (unsigned char *) asrc;
    }

    while (length--) { // continue searching for character d
        if (*src == d)
            return (void *) src; // if found, return the address
        src++; // if not found, jump to the next character
    }

    return NULL; // if all not found, return NULL
}

解答：

while (length >= LBLOCKSIZE) {
    /* XXXXX: Your implementation should appear here */
    if(DETECT_CHAR(mask, *asrc)){
        break;
    }
    asrc++;
    length -= LBLOCKSIZE;
}

2022q1 Homework8 (quiz8)

tags: 2022-04-04 、 StevenChou499

測驗 1

Trace code

XXXXX 填答：

完整程式碼註解

測驗 2

Read more

Ring buffer 期末報告

數位 IC 設計

HW4 ATCONV

HDLBits 習題

tags: `2022-04-04` 、 `StevenChou499`

測驗 `1`

`XXXXX` 填答：

測驗 `2`