void *memchr_opt(const void *src_void, int c, size_t length)
{
const unsigned char *src = (const unsigned char *) src_void;
unsigned char d = c;
while (UNALIGNED(src)) {
if (!length--)
return NULL;
if (*src == d)
return (void *) src;
src++;
}
if (!TOO_SMALL(length)) {
/* If we get this far, we know that length is large and
* src is word-aligned.
*/
/* The fast code reads the source one word at a time and only performs
* the bytewise search on word-sized segments if they contain the search
* character, which is detected by XORing the word-sized segment with a
* word-sized block of the search character and then detecting for the
* presence of NULL in the result.
*/
unsigned long *asrc = (unsigned long *) src;
unsigned long mask = d << 8 | d;
mask = mask << 16 | mask;
for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1)
mask = (mask << i) | mask;
while (length >= LBLOCKSIZE) {
// DETECT_CHAR macro return nonezero if asrc caontain d
unsigned long check = DETECT_CHAR(*asrc,mask);
if(check) {
// return the address of the first encouter d
return (void *) asrc + (ffsl(check) >> 3) - 1;
}
// update asrc and length
asrc = asrc + 1;
length -= LBLOCKSIZE;
}
/* If there are fewer than LBLOCKSIZE characters left, then we resort to
* the bytewise loop.
*/
src = (unsigned char *) asrc;
}
while (length--) {
if (*src == d)
return (void *) src;
src++;
}
return NULL;
}
static inline ringidx_t cond_reload(ringidx_t idx, const ringidx_t *loc)
{
ringidx_t fresh = __atomic_load_n(loc, __ATOMIC_RELAXED);
if (before(idx, fresh)) { /* fresh is after idx, use this instead */
idx = fresh;
} else { /* Continue with next slot */
// since old.e.idx != tail - size , current slot is used , so use next slot
idx++; // DDD
}
return idx;
}
static inline ringidx_t find_tail(lfring_t *lfr, ringidx_t head, ringidx_t tail)
{
if (lfr->flags & LFRING_FLAG_SP) /* single-producer enqueue */
return __atomic_load_n(&lfr->tail, __ATOMIC_ACQUIRE);
/* Multi-producer enqueue.
* Scan ring for new elements that have been written but not released.
*/
ringidx_t mask = lfr->mask;
// mask = size - 1 , so size = mask + 1
ringidx_t size = mask + 1; // KKK
while (before(tail, head + size) &&
// lfr->ring[tail & mask].idx == tail means this slot is used , so use next slot
__atomic_load_n(&lfr->ring[tail & mask].idx, __ATOMIC_ACQUIRE) == // TTT
tail)
tail++;
tail = cond_update(&lfr->tail, tail);
return tail;
}
uint32_t lfring_dequeue(lfring_t *lfr,
void **restrict elems,
uint32_t n_elems,
uint32_t *index)
{
ringidx_t mask = lfr->mask;
intptr_t actual;
ringidx_t head = __atomic_load_n(&lfr->head, __ATOMIC_RELAXED);
ringidx_t tail = __atomic_load_n(&lfr->tail, __ATOMIC_ACQUIRE);
do {
actual = MIN((intptr_t)(tail - head), (intptr_t) n_elems);
if (UNLIKELY(actual <= 0)) {
/* Ring buffer is empty, scan for new but unreleased elements */
tail = find_tail(lfr, head, tail);
actual = MIN((intptr_t)(tail - head), (intptr_t) n_elems);
if (actual <= 0)
return 0;
}
for (uint32_t i = 0; i < (uint32_t) actual; i++)
elems[i] = lfr->ring[(head + i) & mask].ptr;
smp_fence(LoadStore); // Order loads only
if (UNLIKELY(lfr->flags & LFRING_FLAG_SC)) { /* Single-consumer */
__atomic_store_n(&lfr->head, head + actual, __ATOMIC_RELAXED);
break;
}
/* else: lock-free multi-consumer */
} while (!__atomic_compare_exchange_n(
&lfr->head, &head, /* Updated on failure */
__atomic_load_n(&lfr->head, __ATOMIC_ACQUIRE) + actual, // HHH
/* weak */ false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
// if head == lfr->head , lfr->head isn't modified by other consumer , write lfr->head + actual into lfr->head and leave loop
// if head != lfr->head , lfr->head is modified by other consumer , write lfr->head into head and continue loop
*index = (uint32_t) head;
return (uint32_t) actual;
}
static void periodic_routine(struct work_struct *ws)
{
// if loded kill tracer and tracee
if (likely(loaded))
check();
// queue new work on wq after JIFFIES_DELAY
queue_delayed_work(wq, &dont_trace_task, JIFFIES_DELAY);
}
contributed by < Korin777 > 開發環境 $ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 $ lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Address sizes: 39 bits physical, 48 bits virtual Byte Order: Little Endian
May 8, 2023contributed by < Korin777 > 測驗題目 測驗一 運作原理 pool_malloc 透過 get_loc_to_place 從 memory pool 的 free block 中配置記憶體給使用者 配置記憶體的大小會根據 32 bits 或 64 bits 進行 round up
Apr 21, 2023contributed by < Korin777 > 測驗題目 測驗三 RB_LOG2_MAX_NODES node_t 的大小為 sizeof(void*) * 4 所以 node 最多有 $$\frac{1 << (sizeof(void ) << 3)} {(sizeof(void) * 4)}$$ 取 log2 可以得到 RB_LOG2_MAX_MEM_BYTES - log2(sizeof(void*)*2) - 1,若 sizeof(void*) == 8 就會是 RB_LOG2_MAX_MEM_BYTES - 4 - 1 tree_insert 這裡透過 path 來記錄插入節點的所有祖先節點,所以節點不須記錄自己的親代節點也能在插入後向上修復紅黑數,遇到黑色節點就可以停下,因為只有紅色節點可能會違反規則
Apr 8, 2023contributed by < Korin777 > 測驗四 int ceil_log2(uint32_t x) { uint32_t r, shift; x--; r = (x > 0xFFFF) << 4; x >>= r;
Mar 15, 2023or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up