# `2022-04-04` by `Kevin-Shih`
## 測驗 `1`
`memchr_opt` 函式完整程式碼:
```c
#include <stddef.h>
#include <stdint.h>
#include <limits.h>
#include <string.h>
/* Nonzero if X is not aligned on a "long" boundary */
#define UNALIGNED(X) ((long) X & (sizeof(long) - 1))
/* How many bytes are loaded each iteration of the word copy loop */
#define LBLOCKSIZE (sizeof(long))
/* Threshhold for punting to the bytewise iterator */
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
#if LONG_MAX == 2147483647L
#define DETECT_NULL(X) (((X) -0x01010101) & ~(X) & 0x80808080)
#else
#if LONG_MAX == 9223372036854775807L
/* Nonzero if X (a long int) contains a NULL byte. */
#define DETECT_NULL(X) (((X) -0x0101010101010101) & ~(X) & 0x8080808080808080)
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif
/* @return nonzero if (long)X contains the byte used to fill MASK. */
#define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK))
void *memchr_opt(const void *src_void, int c, size_t length)
{
const unsigned char *src = (const unsigned char *) src_void;
unsigned char d = c;
/* Use bytewise loop to search till src is aligned on a "long" boundary
* or reach the end of src.
*/
while (UNALIGNED(src)) {
if (!length--)
return NULL;
if (*src == d)
return (void *) src;
src++;
}
if (!TOO_SMALL(length)) {
/* If we get this far, we know that length is large and
* src is word-aligned.
*/
/* The fast code reads the source one word at a time and only performs
* the bytewise search on word-sized segments if they contain the search
* character, which is detected by XORing the word-sized segment with a
* word-sized block of the search character and then detecting for the
* presence of NULL in the result.
*/
unsigned long *asrc = (unsigned long *) src;
unsigned long mask = d << 8 | d;
mask = mask << 16 | mask;
for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1)
mask = (mask << i) | mask;
while (length >= LBLOCKSIZE) {
/* XXXXX: Your implementation should appear here */
/* If this word-sized segments contain the search character,
* then break the loop and resort to the bytewise loop.
*/
if (DETECT_CHAR(*asrc, mask))
break;
/* If this word-sized segments does not contain the target character,
* then `asrc` point to next word-sized segments.
*/
asrc = (void *)asrc + LBLOCKSIZE;
length -= LBLOCKSIZE;
}
/* If there are fewer than LBLOCKSIZE characters left, then we resort to
* the bytewise loop.
*/
src = (unsigned char *) asrc;
}
while (length--) {
if (*src == d)
return (void *) src;
src++;
}
return NULL;
}
```
## 測驗 `2`
- `DDD` : idx++
next slot, so let idx = idx + 1
- `KKK` : mask + 1
因前面設定 lfr->mask = ringsz - 1 故推論 size = mask +1
- `TTT` : &lfr->ring[tail & mask].idx, __ATOMIC_RELAXED
因前面的 `lfring_enqueue` 有 `lfr->ring[tail & mask].idx = tail;` 的片段,因此推論要檢驗 lfr->ring[tail & mask].idx 與 tail 是否相等。
- `HHH` : head + actual
由 do while loop 中的 Single-consumer 的部份
```c
if (UNLIKELY(lfr->flags & LFRING_FLAG_SC)) { /* Single-consumer */
__atomic_store_n(&lfr->head, head + actual, __ATOMIC_RELAXED);
break;
}
```
可知在跳出迴圈前要將 `head + actual` 存入 `&lfr->head`,又依據 `__atomic_compare_exchange_n` 在將 desire (`HHH`) 寫入*ptr (`&lfr->head`) 後會回傳 true ,再加上 `NOT` 後就會跳出 loop,因此 `HHH`= `head + actual`。
> __atomic_compare_exchange_n (type *ptr, type *expected, type desired, bool weak, int success_memorder, int failure_memorder)
> If desired is written into *ptr then true is returned
> --[gcc](https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html)
修改後的程式碼:
```c
static inline ringidx_t cond_reload(ringidx_t idx, const ringidx_t *loc)
{
ringidx_t fresh = __atomic_load_n(loc, __ATOMIC_RELAXED);
if (before(idx, fresh)) { /* fresh is after idx, use this instead */
idx = fresh;
} else { /* Continue with next slot */
/* XXXXX */ idx++;
}
return idx;
}
static inline ringidx_t find_tail(lfring_t *lfr, ringidx_t head, ringidx_t tail)
{
if (lfr->flags & LFRING_FLAG_SP) /* single-producer enqueue */
return __atomic_load_n(&lfr->tail, __ATOMIC_ACQUIRE);
/* Multi-producer enqueue.
* Scan ring for new elements that have been written but not released.
*/
ringidx_t mask = lfr->mask;
ringidx_t size = /* XXXXX */ mask + 1;
/* Since we got `lfr->ring[tail & mask].idx = tail;` in the `lfring_enqueue`
* function which enqueue elements at tail, so load
* `&lfr->ring[tail & mask].idx` which should equal to tail
*/
while (before(tail, head + size) &&
/* XXXXX */ __atomic_load_n(&lfr->ring[tail & mask].idx, __ATOMIC_RELAXED) ==
tail)
tail++;
tail = cond_update(&lfr->tail, tail);
return tail;
}
uint32_t lfring_dequeue(lfring_t *lfr,
void **restrict elems,
uint32_t n_elems,
uint32_t *index)
{
ringidx_t mask = lfr->mask;
intptr_t actual;
ringidx_t head = __atomic_load_n(&lfr->head, __ATOMIC_RELAXED);
ringidx_t tail = __atomic_load_n(&lfr->tail, __ATOMIC_ACQUIRE);
do {
actual = MIN((intptr_t)(tail - head), (intptr_t) n_elems);
if (UNLIKELY(actual <= 0)) {
/* Ring buffer is empty, scan for new but unreleased elements */
tail = find_tail(lfr, head, tail);
actual = MIN((intptr_t)(tail - head), (intptr_t) n_elems);
if (actual <= 0)
return 0;
}
for (uint32_t i = 0; i < (uint32_t) actual; i++)
elems[i] = lfr->ring[(head + i) & mask].ptr;
smp_fence(LoadStore); // Order loads only
if (UNLIKELY(lfr->flags & LFRING_FLAG_SC)) { /* Single-consumer */
__atomic_store_n(&lfr->head, head + actual, __ATOMIC_RELAXED);
break;
}
/* else: lock-free multi-consumer */
} while (!__atomic_compare_exchange_n(
&lfr->head, &head, /* Updated on failure */
/* If &lfr->head == &head, then &lfr->head = head + actual
* else &head = &lfr->head.
*/
/* XXXXX */ head + actual,
/* weak */ false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
*index = (uint32_t) head;
return (uint32_t) actual;
}
```
## 測驗 `3`
修改後的 `periodic_routine` 程式碼:
```c
static void periodic_routine(struct work_struct *ws)
{
if (likely(ws)) // 當 ws 不為 null
check();
queue_delayed_work(wq, &dont_trace_task, JIFFIES_DELAY);
/* 因 dont_trace_init 只呼叫一次 queue_delayed_work 因此每次執行 periodic_routine
* 都呼叫一次 queue_delayed_work 來實現重複檢查是否有 process 在 ptracing
*/
```