Try   HackMD

2022-04-04 NOVBobLee

測驗一

使用 SWAR 技巧最佳化 memchr 。

void *memchr_opt(const void *src_void, int c, size_t length)
{
    const unsigned char *src = (const unsigned char *) src_void;
    unsigned char d = c;

    /* adjust the start address to be aligned,
     * and search the target char at the same time
     */
    while (UNALIGNED(src)) {
        if (!length--)
            return NULL;
        if (*src == d)
            return (void *) src;
        src++;
    }

    if (!TOO_SMALL(length)) {
        /* If we get this far, we know that length is large and
         * src is word-aligned.
         */

        /* The fast code reads the source one word at a time and only performs
         * the bytewise search on word-sized segments if they contain the search
         * character, which is detected by XORing the word-sized segment with a
         * word-sized block of the search character and then detecting for the
         * presence of NULL in the result.
         */
        unsigned long *asrc = (unsigned long *) src;
        
        /* Produce a mask with the form
         * | d | d | d | d | d | d | d | d | if it's 64 bits
         * | d | d | d | d |                 if it's 32 bits
         */
        unsigned long mask = d << 8 | d;
        mask = mask << 16 | mask;
        for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1)
            mask = (mask << i) | mask;

        /* if the scan range is still in the string */
        while (length >= LBLOCKSIZE) {
            /* XXXXX: Your implementation should appear here */

            /* scan */
            unsigned long pos = DETECT_CHAR(*asrc, mask);
            
            /* if found (nonzero), calculate the position of the first 0x80 met.
             *
             * Suppose it's little endian (e.g. x86), it needs to count the tailing
             * zeros, and turns the bits into bytes, then return the shifted address. 
             */
            if (pos)
                return (void *)((char *) asrc + (__builtin_ctzl(pos) >> 3));
            
            /* if not found (pos is zero), the remained length decreases, and
             * update asrc to the next scan address
             */
            length -= LBLOCKSIZE;
            ++asrc;
        }

        /* If there are fewer than LBLOCKSIZE characters left, then we resort to
         * the bytewise loop.
         */
        src = (unsigned char *) asrc;
    }

    /* search in the remained string */
    while (length--) {
        if (*src == d)
            return (void *) src;
        src++;
    }

    return NULL;
}

執行結果:

$ ./memchr_example
str = "I am the bone of my sword."
Found 'f' at "f my sword."