# `2022-04-04` by `Kevin-Shih` ## 測驗 `1` `memchr_opt` 函式完整程式碼: ```c #include <stddef.h> #include <stdint.h> #include <limits.h> #include <string.h> /* Nonzero if X is not aligned on a "long" boundary */ #define UNALIGNED(X) ((long) X & (sizeof(long) - 1)) /* How many bytes are loaded each iteration of the word copy loop */ #define LBLOCKSIZE (sizeof(long)) /* Threshhold for punting to the bytewise iterator */ #define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) #if LONG_MAX == 2147483647L #define DETECT_NULL(X) (((X) -0x01010101) & ~(X) & 0x80808080) #else #if LONG_MAX == 9223372036854775807L /* Nonzero if X (a long int) contains a NULL byte. */ #define DETECT_NULL(X) (((X) -0x0101010101010101) & ~(X) & 0x8080808080808080) #else #error long int is not a 32bit or 64bit type. #endif #endif /* @return nonzero if (long)X contains the byte used to fill MASK. */ #define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK)) void *memchr_opt(const void *src_void, int c, size_t length) { const unsigned char *src = (const unsigned char *) src_void; unsigned char d = c; /* Use bytewise loop to search till src is aligned on a "long" boundary * or reach the end of src. */ while (UNALIGNED(src)) { if (!length--) return NULL; if (*src == d) return (void *) src; src++; } if (!TOO_SMALL(length)) { /* If we get this far, we know that length is large and * src is word-aligned. */ /* The fast code reads the source one word at a time and only performs * the bytewise search on word-sized segments if they contain the search * character, which is detected by XORing the word-sized segment with a * word-sized block of the search character and then detecting for the * presence of NULL in the result. */ unsigned long *asrc = (unsigned long *) src; unsigned long mask = d << 8 | d; mask = mask << 16 | mask; for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1) mask = (mask << i) | mask; while (length >= LBLOCKSIZE) { /* XXXXX: Your implementation should appear here */ /* If this word-sized segments contain the search character, * then break the loop and resort to the bytewise loop. */ if (DETECT_CHAR(*asrc, mask)) break; /* If this word-sized segments does not contain the target character, * then `asrc` point to next word-sized segments. */ asrc = (void *)asrc + LBLOCKSIZE; length -= LBLOCKSIZE; } /* If there are fewer than LBLOCKSIZE characters left, then we resort to * the bytewise loop. */ src = (unsigned char *) asrc; } while (length--) { if (*src == d) return (void *) src; src++; } return NULL; } ``` ## 測驗 `2` - `DDD` : idx++ next slot, so let idx = idx + 1 - `KKK` : mask + 1 因前面設定 lfr->mask = ringsz - 1 故推論 size = mask +1 - `TTT` : &lfr->ring[tail & mask].idx, __ATOMIC_RELAXED 因前面的 `lfring_enqueue` 有 `lfr->ring[tail & mask].idx = tail;` 的片段,因此推論要檢驗 lfr->ring[tail & mask].idx 與 tail 是否相等。 - `HHH` : head + actual 由 do while loop 中的 Single-consumer 的部份 ```c if (UNLIKELY(lfr->flags & LFRING_FLAG_SC)) { /* Single-consumer */ __atomic_store_n(&lfr->head, head + actual, __ATOMIC_RELAXED); break; } ``` 可知在跳出迴圈前要將 `head + actual` 存入 `&lfr->head`,又依據 `__atomic_compare_exchange_n` 在將 desire (`HHH`) 寫入*ptr (`&lfr->head`) 後會回傳 true ,再加上 `NOT` 後就會跳出 loop,因此 `HHH`= `head + actual`。 > __atomic_compare_exchange_n (type *ptr, type *expected, type desired, bool weak, int success_memorder, int failure_memorder) > If desired is written into *ptr then true is returned > --[gcc](https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html) 修改後的程式碼: ```c static inline ringidx_t cond_reload(ringidx_t idx, const ringidx_t *loc) { ringidx_t fresh = __atomic_load_n(loc, __ATOMIC_RELAXED); if (before(idx, fresh)) { /* fresh is after idx, use this instead */ idx = fresh; } else { /* Continue with next slot */ /* XXXXX */ idx++; } return idx; } static inline ringidx_t find_tail(lfring_t *lfr, ringidx_t head, ringidx_t tail) { if (lfr->flags & LFRING_FLAG_SP) /* single-producer enqueue */ return __atomic_load_n(&lfr->tail, __ATOMIC_ACQUIRE); /* Multi-producer enqueue. * Scan ring for new elements that have been written but not released. */ ringidx_t mask = lfr->mask; ringidx_t size = /* XXXXX */ mask + 1; /* Since we got `lfr->ring[tail & mask].idx = tail;` in the `lfring_enqueue` * function which enqueue elements at tail, so load * `&lfr->ring[tail & mask].idx` which should equal to tail */ while (before(tail, head + size) && /* XXXXX */ __atomic_load_n(&lfr->ring[tail & mask].idx, __ATOMIC_RELAXED) == tail) tail++; tail = cond_update(&lfr->tail, tail); return tail; } uint32_t lfring_dequeue(lfring_t *lfr, void **restrict elems, uint32_t n_elems, uint32_t *index) { ringidx_t mask = lfr->mask; intptr_t actual; ringidx_t head = __atomic_load_n(&lfr->head, __ATOMIC_RELAXED); ringidx_t tail = __atomic_load_n(&lfr->tail, __ATOMIC_ACQUIRE); do { actual = MIN((intptr_t)(tail - head), (intptr_t) n_elems); if (UNLIKELY(actual <= 0)) { /* Ring buffer is empty, scan for new but unreleased elements */ tail = find_tail(lfr, head, tail); actual = MIN((intptr_t)(tail - head), (intptr_t) n_elems); if (actual <= 0) return 0; } for (uint32_t i = 0; i < (uint32_t) actual; i++) elems[i] = lfr->ring[(head + i) & mask].ptr; smp_fence(LoadStore); // Order loads only if (UNLIKELY(lfr->flags & LFRING_FLAG_SC)) { /* Single-consumer */ __atomic_store_n(&lfr->head, head + actual, __ATOMIC_RELAXED); break; } /* else: lock-free multi-consumer */ } while (!__atomic_compare_exchange_n( &lfr->head, &head, /* Updated on failure */ /* If &lfr->head == &head, then &lfr->head = head + actual * else &head = &lfr->head. */ /* XXXXX */ head + actual, /* weak */ false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)); *index = (uint32_t) head; return (uint32_t) actual; } ``` ## 測驗 `3` 修改後的 `periodic_routine` 程式碼: ```c static void periodic_routine(struct work_struct *ws) { if (likely(ws)) // 當 ws 不為 null check(); queue_delayed_work(wq, &dont_trace_task, JIFFIES_DELAY); /* 因 dont_trace_init 只呼叫一次 queue_delayed_work 因此每次執行 periodic_routine * 都呼叫一次 queue_delayed_work 來實現重複檢查是否有 process 在 ptracing */ ```