--- tags: linux2022 --- # `2022-04-04` cwl0429 ## 測驗 `1` 這題目的目標是將 memchr 改寫成 SWAR 版本 ```c void *memchr_opt(const void *src_void, int c, size_t length) { const unsigned char *src = (const unsigned char *) src_void; unsigned char d = c; while (UNALIGNED(src)) { if (!length--) return NULL; if (*src == d) return (void *) src; src++; } if (!TOO_SMALL(length)) { /* If we get this far, we know that length is large and * src is word-aligned. */ /* The fast code reads the source one word at a time and only performs * the bytewise search on word-sized segments if they contain the search * character, which is detected by XORing the word-sized segment with a * word-sized block of the search character and then detecting for the * presence of NULL in the result. */ unsigned long *asrc = (unsigned long *) src; unsigned long mask = d << 8 | d; // 16 bits mask (include two character) mask = mask << 16 | mask; // 32 bits mask for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1) // 64 bits mask mask = (mask << i) | mask; while (length >= LBLOCKSIZE) { /* XXXXX: Your implementation should appear here */ unsigned long cmp = DETECT_CHAR(*asrc, mask); if (cmp) { src = (unsigned char *) asrc; src += (__builtin_clzl(cmp) >> 3) - 1; // convert bits into bytes return (void *) src; } length -= LBLOCKSIZE; asrc += 1; } /* If there are fewer than LBLOCKSIZE characters left, then we resort to * the bytewise loop. */ src = (unsigned char *) asrc; } while (length--) { if (*src == d) return (void *) src; src++; } return NULL; } ``` 主要新增此段程式碼,想法是利用老師提供的 `DETECT_CHAR` 巨集找出此 64 bits 或 32 bits 內使否存在目標 character - 若是有則將 `src` 指向目標 character 所在的地址並回傳 - 否則將 `length` 更新並將 `asrc` 指到下一個 unsigned long 所在地址 ```c while (length >= LBLOCKSIZE) { /* XXXXX: Your implementation should appear here */ unsigned long cmp = DETECT_CHAR(*asrc, mask); if (cmp) { src = (unsigned char *) asrc; src += (__builtin_clzl(cmp) >> 3) - 1; // convert bits into bytes return (void *) src; } length -= LBLOCKSIZE; asrc += 1; } ``` ## 測驗 `2` 此 function 的用途是 reload idx - 若是 `fresh` 領先 `idx` 則將 `idx` 替換成 `fresh` - 否則就使用接續的下一個 `idx` ```c static inline ringidx_t cond_reload(ringidx_t idx, const ringidx_t *loc) { ringidx_t fresh = __atomic_load_n(loc, __ATOMIC_RELAXED); if (before(idx, fresh)) { /* fresh is after idx, use this instead */ idx = fresh; } else { /* Continue with next slot */ /* XXXXX */ idx++; } return idx; } ``` 這邊需要填入 `KKK` 及 `TTT` - `KKK` 需要填入正確的 `size` - `TTT` 是 `__atomic_load_n` 的參數,用載入 `tail` 並和先前載入的 `tail` 做比對 ```c static inline ringidx_t find_tail(lfring_t *lfr, ringidx_t head, ringidx_t tail) { if (lfr->flags & LFRING_FLAG_SP) /* single-producer enqueue */ return __atomic_load_n(&lfr->tail, __ATOMIC_ACQUIRE); /* Multi-producer enqueue. * Scan ring for new elements that have been written but not released. */ ringidx_t mask = lfr->mask; ringidx_t size = /* XXXXX KKK*/ mask - 1; while (before(tail, head + size) && __atomic_load_n(/* XXXXX TTT*/ &lfr->tail, __ATOMIC_RELAXED) == tail) tail++; tail = cond_update(&lfr->tail, tail); return tail; } ``` 此處要填入 `HHH` - `HHH` 是 __atomic_compare_exchange_n 的 `desired` 部份,可以看出此處需要更新 `head` 的數值 ```c uint32_t lfring_dequeue(lfring_t *lfr, void **restrict elems, uint32_t n_elems, uint32_t *index) { ringidx_t mask = lfr->mask; intptr_t actual; ringidx_t head = __atomic_load_n(&lfr->head, __ATOMIC_RELAXED); ringidx_t tail = __atomic_load_n(&lfr->tail, __ATOMIC_ACQUIRE); do { actual = MIN((intptr_t)(tail - head), (intptr_t) n_elems); if (UNLIKELY(actual <= 0)) { /* Ring buffer is empty, scan for new but unreleased elements */ tail = find_tail(lfr, head, tail); actual = MIN((intptr_t)(tail - head), (intptr_t) n_elems); if (actual <= 0) return 0; } for (uint32_t i = 0; i < (uint32_t) actual; i++) elems[i] = lfr->ring[(head + i) & mask].ptr; smp_fence(LoadStore); // Order loads only if (UNLIKELY(lfr->flags & LFRING_FLAG_SC)) { /* Single-consumer */ __atomic_store_n(&lfr->head, head + actual, __ATOMIC_RELAXED); break; } /* else: lock-free multi-consumer */ } while (!__atomic_compare_exchange_n( &lfr->head, &head, /* Updated on failure */ /* XXXXX HHH*/ lfr->head + 1, /* weak */ false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)); //__atomic_compare_exchange_n (ptr, expected, desired, ...) *index = (uint32_t) head; return (uint32_t) actual; } ``` ## 測驗 `3` 研讀中..