2025q1 Homework4 (quiz3+4)

contributed by <alanhc>

quiz 3

https://hackmd.io/@sysprog/linux2025-quiz3#sigaction2

1






















/* ceiling division without needing floating-point operations. */
static size_t ceil_div(size_t n, size_t d)
{
    return AAAA; //(n + d - 1) / d
}

#define INTMAX BBBB // ((1U << 31) - 1)

void mpi_set_u64(mpi_t rop, uint64_t op)
{
    size_t capacity = ceil_div(64, 31);

    mpi_enlarge(rop, capacity);

    for (size_t n = 0; n < capacity; ++n) {
        rop->data[n] = op & INTMAX;
        op >>= 31;
    }

    for (size_t n = capacity; n < rop->capacity; ++n)
        rop->data[n] = 0;
}

AAAA 往上整數
BBBB 要使用1U否則sign左移有可能是undefine behavior

void mpi_fdiv_qr(mpi_t q, mpi_t r, const mpi_t n, const mpi_t d)
{
    mpi_t n0, d0;
    mpi_init(n0);
    mpi_init(d0);
    mpi_set(n0, n);
    mpi_set(d0, d);                                                                                  
    if (mpi_cmp_u32(d0, 0) == 0) {
        fprintf(stderr, "Division by zero\n");
        abort();
    }

    mpi_set_u32(q, 0);
    mpi_set_u32(r, 0);

    size_t start = mpi_sizeinbase(n0, 2) - 1;

    for (size_t i = start; i != (size_t) -1; --i) {
        mpi_mul_2exp(r, r, DDDD);  // mpi_mul_2exp(r, r, 1);
        if (mpi_testbit(n0, i) != 0)
            mpi_setbit(r, 0);
        if (mpi_cmp(r, d0) >= 0) {
            mpi_sub(r, r, d0);
            mpi_setbit(q, i);
        }
    }

    mpi_clear(n0);
    mpi_clear(d0);
}

DDDD 是1，因為要逐位處理每一個 bit

void mpi_gcd(mpi_t rop, const mpi_t op1, const mpi_t op2)
{
    if (mpi_cmp_u32(op2, 0) == 0) {
        mpi_set(rop, op1);
        return;
    }
        
    mpi_t q, r;
    mpi_init(q);
    mpi_init(r);

    mpi_fdiv_qr(q, r, op1, op2);

    EEEE; //  mpi_gcd(rop, op2, r);                                    

    mpi_clear(q);
    mpi_clear(r);
}

EEEE 是 mpi_gcd(rop, op2, r);
因為輾轉相除法是 gcd(a, b) = gcd(b, a mod b)
這行是做：op1 = op2 * q + r，得到餘數 r。
之後是繼續計算 gcd(op2, r)。

延伸

解釋上述程式碼運作原理
這項實作依賴遞迴呼叫，請避免使用並降低記憶體開銷
改進最大公因數的運算效率

2

#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>

/* Nonzero if X is not aligned on a "long" boundary */
#define UNALIGNED(X) ((long) X & (sizeof(long) - 1))

/* How many bytes are loaded each iteration of the word copy loop */
#define LBLOCKSIZE (sizeof(long))

/* Threshhold for punting to the bytewise iterator */
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)

#if LONG_MAX == 2147483647L
#define DETECT_NULL(X) (((X) - (0x01010101)) & ~(X) & (0x80808080))
#else
#if LONG_MAX == 9223372036854775807L
/* Nonzero if X (a long int) contains a NULL byte. */
#define DETECT_NULL(X) \
    (((X) - (0x0101010101010101)) & ~(X) & (0x8080808080808080))
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif

/* @return nonzero if (long)X contains the byte used to fill MASK. */
#define DETECT_CHAR(X, mask) DETECT_NULL(AAAA) // #define DETECT_CHAR(X, mask) DETECT_NULL((X) ^ (mask))

void *memchr_opt(const void *str, int c, size_t len)
{
    const unsigned char *src = (const unsigned char *) str;
    unsigned char d = c;

    while (UNALIGNED(src)) {
        if (!len--)
            return NULL;
        if (*src == d)
            return (void *) src;
        src++;
    }

    if (!TOO_SMALL(len)) {
        /* If we get this far, len is large and src is word-aligned. */

        /* The fast code reads the source one word at a time and only performs
         * a bytewise search on word-sized segments if they contain the search
         * character. This is detected by XORing the word-sized segment with a
         * word-sized block of the search character, and then checking for the
         * presence of NULL in the result.
         */
        unsigned long *asrc = (unsigned long *) src;
        unsigned long mask = d << 8 | d;
        mask |= mask << 16;
        for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1)
            mask |= BBBB; // mask |= mask << i;

        while (len >= LBLOCKSIZE) {
            if (DETECT_CHAR(CCCC, mask)) // *asrc
                break;
            asrc++;
            len -= LBLOCKSIZE;
        }

        /* If there are fewer than LBLOCKSIZE characters left, then we resort to
         * the bytewise loop.
         */
        src = (unsigned char *) asrc;
    }

    while (len--) {
        if (*src == d)
            return (void *) src;
        src++;
    }

    return NULL;
}

AAAA 用xor 看是否一樣
BBBB 用or擴展
CCCC *asrc

3

int coro_create(coro_entry entry, void *args)
{
    /* Search for an available environment slot */
    int env;
    for (env = 0; env < NENV; env++) {
        if (envs[env].status == ENV_UNUSED) /* Found a free environment */
            break;
    }
    if (env == NENV) /* No free environments available */
        return -1;
    envs[env].status = AAAA; //ENV_RUNNABLE

    /* Initialize the context for the new user-level thread */
    getcontext(&envs[env].state);
    make_stack(&envs[env].state);
    envs[env].state.uc_link = &exiter;
    makecontext(&envs[env].state, (void (*)(void)) entry, 1, args);

    /* Return the identifier of the newly created user-level thread */
    return env;
}

static void coro_schedule(void)
{
    int attempts = 0;
    while (attempts < NENV) {
        int candidate = (curenv + BBBB) % NENV; // 1
        if (envs[candidate].status == ENV_RUNNABLE) {
            curenv = candidate;
            /* Request delivery of TIMERSIG after 10 ms */
            timer_settime(timer, 0, &ts, NULL);
            setcontext(&envs[curenv].state);
        }
        attempts++;
    }
    exit(0);
}

void coro_yield(void)
{
    envs[curenv].state_reentered = 0;
    getcontext(&envs[curenv].state);
    if (envs[curenv].state_reentered++ == 0) {
        /* Context successfully saved; schedule the next user-level thread to
         * run.
	 */
        CCCC(); // coro_schedule();
    }
    /* Upon re-entry, simply resume execution */
}

static void preempt(int signum UNUSED,
                    siginfo_t *si UNUSED,
                    void *context UNUSED)
{
    DDDD(); // coro_yield();
}

quiz 4

1

uint32_t crc32_u8(uint32_t crc, uint8_t v)
{
    crc ^= v;
    static const uint32_t crc32_table[] = {
        0x00000000, 0x105ec76f, 0x20bd8ede, 0x30e349b1, 
        0x417b1dbc, 0x5125dad3, 0x61c69362, 0x7198540d, 
        0x82f63b78, 0x92a8fc17, 0xa24bb5a6, 0xb21572c9, 
        AAAA,       BBBB,       CCCC,       DDDD, // 0xc56bcb63, 0xd5350c0c, 0xe5d645bd, 0xf58882d2
    };
    
    crc = (crc >> 4) ^ crc32_table[crc & 0x0F];
    crc = (crc >> 4) ^ crc32_table[crc & 0x0F];
    return crc;
}

延伸
延伸問題:

在 Linux 核心原始程式碼中找到 CRC32 的應用案例至少 3 處，並探討其原始程式碼
設計實驗來分析 Fast CRC32 提出的手法 (要涵蓋 branchless 和查表的變形)，並與硬體的 CRC32 指令效能進行比較。
以 x86-64 或 Arm64 為探討標的，說明 Linux 核心如何使用硬體加速的 CRC32 指令

2

case SYNTH_NODE_OSCILLATOR:
                /* Increment the phase by the base value and adjust for any
                 * detuning.
                 */
                node->state += *node->osc.phase_incr;
                if (node->osc.detune)
                    node->state += *node->osc.detune;
                node->state &= AAAA;  /* wrap around q15 */ // 0xFFFF 
                break;

case SYNTH_NODE_ENVELOPE:
                /* The highest bit in the state indicates decay mode
                 * (post-attack).
                 */
                if (voice->gate) {
                    /* When the gate is active, process attack followed by decay
                     */
                    int mode_bit = node->state & BBBB; // 0x800000
                    int32_t value = node->state & CCCC; //0x7FFFFF

case SYNTH_NODE_ENVELOPE:
                /* The highest bit in the state indicates decay mode
                 * (post-attack).
                 */
                if (voice->gate) {
                    /* When the gate is active, process attack followed by decay
                     */
                    int mode_bit = node->state & BBBB;
                    int32_t value = node->state & CCCC;
                    if (mode_bit) {
                        /* Decay phase: decrement the state by the decay rate */
                        value -= node->env.decay;
                        const q15_t sustainAbs = node->env.sustain < 0
                                                     ? -node->env.sustain
                                                     : node->env.sustain;
                        if (value < sustainAbs << 4)
                            value = sustainAbs << 4;
                    } else {
                        /* Attack phase: increment the state by the attack rate
                         */
                        value += node->env.attack;
                        if (value > (int32_t) Q15_MAX << 4) {
                            value = (int32_t) Q15_MAX << 4;
                            /* Switch to decay mode once the peak is reached */
                            mode_bit = DDDD; // 0x800000
                        }
                    }
                    node->state = value | mode_bit;
                }  else {
                    /* When the gate is inactive, perform the release phase */
                    /* Clear the decay mode bit so that the next gate triggers a
                     * fresh attack.
                     */
                    node->state &= EEEE; // 0x7FFFFF
                    node->state -= node->env.release;
                    if (node->state < 0)
                        node->state = 0;
                }

q15_t sine_wave(q15_t input)
{
    int index = (input >> 8) & 0x7F;
    /* Scale by 258 to span the full output range */
    q15_t res = sine_lut[index] * 258;
    /* Perform interpolation between adjacent lookup table values */
    q15_t next = sine_lut[index + 1] * 258;
    res += ((next - res) * (FFFF) >> 8); // (input & 0xFF)

    return res;
}

q15_t square_wave(q15_t input)
{
    return input < Q15_MAX / 2 ? GGGG : HHHH; //  -Q15_MAX,  Q15_MAX
}

延伸問題:

解釋上述程式碼運作原理，搭配「信號與系統」教材
探討定點數的使用和其精確度
改進 MIDI 一閃一閃亮晶晶的音效輸出

3

這段程式蠻有趣的：https://gist.github.com/jserv/3294f32b2824076ac17683fdac2daf24

    /* Main I/O loop:
     * Call select() to monitor the set of descriptors. After select() returns,
     * only the descriptors with activity will remain set in rfds.
     */
    while (select(IIII, JJJJ, NULL, NULL, NULL) >= 0) { // max_fd, &rfds
        /* Check if the server socket has become readable, which indicates an
         * incoming connection */
        if (FD_ISSET(server_fd, &rfds)) {
            /* Prepare a structure to store the client's address */
            struct sockaddr_in sin;
            socklen_t sinlen = sizeof(sin);

2025q1 Homework4 (quiz3+4)

quiz 3

1

延伸

2

3

quiz 4

1

2

3

Read more

Alan Tseng

til

memory

llama.cpp