# 2022-04-04 Quiz8 contributed by [jimmy-liu1021](https://github.com/jimmy-liu1021) ## 測驗1 ```c #include <stddef.h> #include <stdint.h> #include <limits.h> #include <string.h> #include <stdio.h> /* Nonzero if either X or Y is not aligned on a "long" boundary */ #define UNALIGNED(X) ((long) X & (sizeof(long) - 1)) /* How many bytes are loaded each iteration of the word copy loop */ #define LBLOCKSIZE (sizeof(long)) /* Threshhold for punting to the bytewise iterator */ #define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) #if LONG_MAX == 2147483647L #define DETECT_NULL(X) (((X) -0x01010101) & ~(X) & 0x80808080) #else #if LONG_MAX == 9223372036854775807L /* Nonzero if X (a long int) contains a NULL byte. */ #define DETECT_NULL(X) (((X) -0x0101010101010101) & ~(X) & 0x8080808080808080) #else #error long int is not a 32bit or 64bit type. #endif #endif /* @return nonzero if (long)X contains the byte used to fill MASK. */ #define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK)) void *memchr_opt(const void *src_void, int c, size_t length) { const unsigned char *src = (const unsigned char *) src_void; unsigned char d = c; while (UNALIGNED(src)) { if (!length--) return NULL; if (*src == d) return (void *) src; src++; } if (!TOO_SMALL(length)) { /* If we get this far, we know that length is large and * src is word-aligned. */ /* The fast code reads the source one word at a time and only performs * the bytewise search on word-sized segments if they contain the search * character, which is detected by XORing the word-sized segment with a * word-sized block of the search character and then detecting for the * presence of NULL in the result. */ unsigned long *asrc = (unsigned long *) src; unsigned long mask = d << 8 | d; mask = mask << 16 | mask; for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1) mask = (mask << i) | mask; while (length >= LBLOCKSIZE) { /* XXXXX: Your implementation should appear here */ while (DETECT_CHAR(*asrc, mask)) { src = (unsigned char *) asrc; int tmp = LBLOCKSIZE; while(tmp--) { if (*src == d) return (void *) src; src++; } } length -= LBLOCKSIZE; asrc++; } /* If there are fewer than LBLOCKSIZE characters left, then we resort to * the bytewise loop. */ src = (unsigned char *) asrc; } while (length--) { if (*src == d) return (void *) src; src++; } return NULL; } int main() { const char str[] = "http://wiki.csie.ncku.edu.tw"; const char ch = 'h'; char *ret = memchr_opt(str, ch, strlen(str)); printf("String after |%c| is - |%s|\n", ch, ret); return 0; } ``` 此段程式碼主要想透過一次比對多個 bytes 達到提升尋找字元的效率,以下是其中細節: - 變數 asrc 一次取出 long-size 的資料,並使用 DETECT_CHAR 去找出在此 long-size 的資料中每一個 byte,是否有想要被尋找的字元。 - 將被尋找的字元作 bit-compound,將該字元自我複合成 long-size 大小,如此一來可利用 XOR 運算兩筆 long-size 的資料以提升效率。(此部分在 DETECT_CHAR 中) - 若在 long-size 的 asrc 內找到有相符合的字元時,採取 bytewise 的比對,並回傳對應的字串。 - 若 asrc 已不足 long 大小,也要回歸 bytewise 的比對,並回傳。