# 2025q1 Homework4 (quiz3+4) contributed by <`alanhc`> ## quiz 3 - https://hackmd.io/@sysprog/linux2025-quiz3#sigaction2 ### 1 ```cpp= /* ceiling division without needing floating-point operations. */ static size_t ceil_div(size_t n, size_t d) { return AAAA; //(n + d - 1) / d } #define INTMAX BBBB // ((1U << 31) - 1) void mpi_set_u64(mpi_t rop, uint64_t op) { size_t capacity = ceil_div(64, 31); mpi_enlarge(rop, capacity); for (size_t n = 0; n < capacity; ++n) { rop->data[n] = op & INTMAX; op >>= 31; } for (size_t n = capacity; n < rop->capacity; ++n) rop->data[n] = 0; } ``` - AAAA 往上整數 - BBBB 要使用1U否則sign左移有可能是undefine behavior ```cpp void mpi_fdiv_qr(mpi_t q, mpi_t r, const mpi_t n, const mpi_t d) { mpi_t n0, d0; mpi_init(n0); mpi_init(d0); mpi_set(n0, n); mpi_set(d0, d); if (mpi_cmp_u32(d0, 0) == 0) { fprintf(stderr, "Division by zero\n"); abort(); } mpi_set_u32(q, 0); mpi_set_u32(r, 0); size_t start = mpi_sizeinbase(n0, 2) - 1; for (size_t i = start; i != (size_t) -1; --i) { mpi_mul_2exp(r, r, DDDD); // mpi_mul_2exp(r, r, 1); if (mpi_testbit(n0, i) != 0) mpi_setbit(r, 0); if (mpi_cmp(r, d0) >= 0) { mpi_sub(r, r, d0); mpi_setbit(q, i); } } mpi_clear(n0); mpi_clear(d0); } ``` - DDDD 是1,因為要逐位處理 每一個 bit ``` void mpi_gcd(mpi_t rop, const mpi_t op1, const mpi_t op2) { if (mpi_cmp_u32(op2, 0) == 0) { mpi_set(rop, op1); return; } mpi_t q, r; mpi_init(q); mpi_init(r); mpi_fdiv_qr(q, r, op1, op2); EEEE; // mpi_gcd(rop, op2, r); mpi_clear(q); mpi_clear(r); } ``` - EEEE 是 mpi_gcd(rop, op2, r); - 因為輾轉相除法是 gcd(a, b) = gcd(b, a mod b) - 這行是做:op1 = op2 * q + r,得到餘數 r。 - 之後是繼續計算 gcd(op2, r)。 #### 延伸 解釋上述程式碼運作原理 這項實作依賴遞迴呼叫,請避免使用並降低記憶體開銷 改進最大公因數的運算效率 ### 2 ``` #include <limits.h> #include <stddef.h> #include <stdint.h> #include <string.h> /* Nonzero if X is not aligned on a "long" boundary */ #define UNALIGNED(X) ((long) X & (sizeof(long) - 1)) /* How many bytes are loaded each iteration of the word copy loop */ #define LBLOCKSIZE (sizeof(long)) /* Threshhold for punting to the bytewise iterator */ #define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) #if LONG_MAX == 2147483647L #define DETECT_NULL(X) (((X) - (0x01010101)) & ~(X) & (0x80808080)) #else #if LONG_MAX == 9223372036854775807L /* Nonzero if X (a long int) contains a NULL byte. */ #define DETECT_NULL(X) \ (((X) - (0x0101010101010101)) & ~(X) & (0x8080808080808080)) #else #error long int is not a 32bit or 64bit type. #endif #endif /* @return nonzero if (long)X contains the byte used to fill MASK. */ #define DETECT_CHAR(X, mask) DETECT_NULL(AAAA) // #define DETECT_CHAR(X, mask) DETECT_NULL((X) ^ (mask)) void *memchr_opt(const void *str, int c, size_t len) { const unsigned char *src = (const unsigned char *) str; unsigned char d = c; while (UNALIGNED(src)) { if (!len--) return NULL; if (*src == d) return (void *) src; src++; } if (!TOO_SMALL(len)) { /* If we get this far, len is large and src is word-aligned. */ /* The fast code reads the source one word at a time and only performs * a bytewise search on word-sized segments if they contain the search * character. This is detected by XORing the word-sized segment with a * word-sized block of the search character, and then checking for the * presence of NULL in the result. */ unsigned long *asrc = (unsigned long *) src; unsigned long mask = d << 8 | d; mask |= mask << 16; for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1) mask |= BBBB; // mask |= mask << i; while (len >= LBLOCKSIZE) { if (DETECT_CHAR(CCCC, mask)) // *asrc break; asrc++; len -= LBLOCKSIZE; } /* If there are fewer than LBLOCKSIZE characters left, then we resort to * the bytewise loop. */ src = (unsigned char *) asrc; } while (len--) { if (*src == d) return (void *) src; src++; } return NULL; } ``` - AAAA 用xor 看是否一樣 - BBBB 用or擴展 - CCCC *asrc ### 3 ``` int coro_create(coro_entry entry, void *args) { /* Search for an available environment slot */ int env; for (env = 0; env < NENV; env++) { if (envs[env].status == ENV_UNUSED) /* Found a free environment */ break; } if (env == NENV) /* No free environments available */ return -1; envs[env].status = AAAA; //ENV_RUNNABLE /* Initialize the context for the new user-level thread */ getcontext(&envs[env].state); make_stack(&envs[env].state); envs[env].state.uc_link = &exiter; makecontext(&envs[env].state, (void (*)(void)) entry, 1, args); /* Return the identifier of the newly created user-level thread */ return env; } ``` ``` static void coro_schedule(void) { int attempts = 0; while (attempts < NENV) { int candidate = (curenv + BBBB) % NENV; // 1 if (envs[candidate].status == ENV_RUNNABLE) { curenv = candidate; /* Request delivery of TIMERSIG after 10 ms */ timer_settime(timer, 0, &ts, NULL); setcontext(&envs[curenv].state); } attempts++; } exit(0); } ``` ``` void coro_yield(void) { envs[curenv].state_reentered = 0; getcontext(&envs[curenv].state); if (envs[curenv].state_reentered++ == 0) { /* Context successfully saved; schedule the next user-level thread to * run. */ CCCC(); // coro_schedule(); } /* Upon re-entry, simply resume execution */ } ``` ``` static void preempt(int signum UNUSED, siginfo_t *si UNUSED, void *context UNUSED) { DDDD(); // coro_yield(); } ``` ## quiz 4 ### 1 ``` uint32_t crc32_u8(uint32_t crc, uint8_t v) { crc ^= v; static const uint32_t crc32_table[] = { 0x00000000, 0x105ec76f, 0x20bd8ede, 0x30e349b1, 0x417b1dbc, 0x5125dad3, 0x61c69362, 0x7198540d, 0x82f63b78, 0x92a8fc17, 0xa24bb5a6, 0xb21572c9, AAAA, BBBB, CCCC, DDDD, // 0xc56bcb63, 0xd5350c0c, 0xe5d645bd, 0xf58882d2 }; crc = (crc >> 4) ^ crc32_table[crc & 0x0F]; crc = (crc >> 4) ^ crc32_table[crc & 0x0F]; return crc; } ``` - 延伸 延伸問題: 在 Linux 核心原始程式碼中找到 CRC32 的應用案例至少 3 處,並探討其原始程式碼 設計實驗來分析 Fast CRC32 提出的手法 (要涵蓋 branchless 和查表的變形),並與硬體的 CRC32 指令效能進行比較。 以 x86-64 或 Arm64 為探討標的,說明 Linux 核心如何使用硬體加速的 CRC32 指令 ### 2 ```cpp case SYNTH_NODE_OSCILLATOR: /* Increment the phase by the base value and adjust for any * detuning. */ node->state += *node->osc.phase_incr; if (node->osc.detune) node->state += *node->osc.detune; node->state &= AAAA; /* wrap around q15 */ // 0xFFFF break; ``` ```cpp case SYNTH_NODE_ENVELOPE: /* The highest bit in the state indicates decay mode * (post-attack). */ if (voice->gate) { /* When the gate is active, process attack followed by decay */ int mode_bit = node->state & BBBB; // 0x800000 int32_t value = node->state & CCCC; //0x7FFFFF ``` ```cpp case SYNTH_NODE_ENVELOPE: /* The highest bit in the state indicates decay mode * (post-attack). */ if (voice->gate) { /* When the gate is active, process attack followed by decay */ int mode_bit = node->state & BBBB; int32_t value = node->state & CCCC; if (mode_bit) { /* Decay phase: decrement the state by the decay rate */ value -= node->env.decay; const q15_t sustainAbs = node->env.sustain < 0 ? -node->env.sustain : node->env.sustain; if (value < sustainAbs << 4) value = sustainAbs << 4; } else { /* Attack phase: increment the state by the attack rate */ value += node->env.attack; if (value > (int32_t) Q15_MAX << 4) { value = (int32_t) Q15_MAX << 4; /* Switch to decay mode once the peak is reached */ mode_bit = DDDD; // 0x800000 } } node->state = value | mode_bit; } else { /* When the gate is inactive, perform the release phase */ /* Clear the decay mode bit so that the next gate triggers a * fresh attack. */ node->state &= EEEE; // 0x7FFFFF node->state -= node->env.release; if (node->state < 0) node->state = 0; } ``` ```cpp q15_t sine_wave(q15_t input) { int index = (input >> 8) & 0x7F; /* Scale by 258 to span the full output range */ q15_t res = sine_lut[index] * 258; /* Perform interpolation between adjacent lookup table values */ q15_t next = sine_lut[index + 1] * 258; res += ((next - res) * (FFFF) >> 8); // (input & 0xFF) return res; } ``` ``` q15_t square_wave(q15_t input) { return input < Q15_MAX / 2 ? GGGG : HHHH; // -Q15_MAX, Q15_MAX } ``` - 延伸問題: 解釋上述程式碼運作原理,搭配「信號與系統」教材 探討定點數的使用和其精確度 改進 MIDI 一閃一閃亮晶晶的音效輸出 ### 3 - 這段程式蠻有趣的:https://gist.github.com/jserv/3294f32b2824076ac17683fdac2daf24 ```cpp /* Main I/O loop: * Call select() to monitor the set of descriptors. After select() returns, * only the descriptors with activity will remain set in rfds. */ while (select(IIII, JJJJ, NULL, NULL, NULL) >= 0) { // max_fd, &rfds /* Check if the server socket has become readable, which indicates an * incoming connection */ if (FD_ISSET(server_fd, &rfds)) { /* Prepare a structure to store the client's address */ struct sockaddr_in sin; socklen_t sinlen = sizeof(sin); ```