contributed by <alanhc
>
/* ceiling division without needing floating-point operations. */
static size_t ceil_div(size_t n, size_t d)
{
return AAAA; //(n + d - 1) / d
}
#define INTMAX BBBB // ((1U << 31) - 1)
void mpi_set_u64(mpi_t rop, uint64_t op)
{
size_t capacity = ceil_div(64, 31);
mpi_enlarge(rop, capacity);
for (size_t n = 0; n < capacity; ++n) {
rop->data[n] = op & INTMAX;
op >>= 31;
}
for (size_t n = capacity; n < rop->capacity; ++n)
rop->data[n] = 0;
}
void mpi_fdiv_qr(mpi_t q, mpi_t r, const mpi_t n, const mpi_t d)
{
mpi_t n0, d0;
mpi_init(n0);
mpi_init(d0);
mpi_set(n0, n);
mpi_set(d0, d);
if (mpi_cmp_u32(d0, 0) == 0) {
fprintf(stderr, "Division by zero\n");
abort();
}
mpi_set_u32(q, 0);
mpi_set_u32(r, 0);
size_t start = mpi_sizeinbase(n0, 2) - 1;
for (size_t i = start; i != (size_t) -1; --i) {
mpi_mul_2exp(r, r, DDDD); // mpi_mul_2exp(r, r, 1);
if (mpi_testbit(n0, i) != 0)
mpi_setbit(r, 0);
if (mpi_cmp(r, d0) >= 0) {
mpi_sub(r, r, d0);
mpi_setbit(q, i);
}
}
mpi_clear(n0);
mpi_clear(d0);
}
void mpi_gcd(mpi_t rop, const mpi_t op1, const mpi_t op2)
{
if (mpi_cmp_u32(op2, 0) == 0) {
mpi_set(rop, op1);
return;
}
mpi_t q, r;
mpi_init(q);
mpi_init(r);
mpi_fdiv_qr(q, r, op1, op2);
EEEE; // mpi_gcd(rop, op2, r);
mpi_clear(q);
mpi_clear(r);
}
解釋上述程式碼運作原理
這項實作依賴遞迴呼叫,請避免使用並降低記憶體開銷
改進最大公因數的運算效率
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
/* Nonzero if X is not aligned on a "long" boundary */
#define UNALIGNED(X) ((long) X & (sizeof(long) - 1))
/* How many bytes are loaded each iteration of the word copy loop */
#define LBLOCKSIZE (sizeof(long))
/* Threshhold for punting to the bytewise iterator */
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
#if LONG_MAX == 2147483647L
#define DETECT_NULL(X) (((X) - (0x01010101)) & ~(X) & (0x80808080))
#else
#if LONG_MAX == 9223372036854775807L
/* Nonzero if X (a long int) contains a NULL byte. */
#define DETECT_NULL(X) \
(((X) - (0x0101010101010101)) & ~(X) & (0x8080808080808080))
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif
/* @return nonzero if (long)X contains the byte used to fill MASK. */
#define DETECT_CHAR(X, mask) DETECT_NULL(AAAA) // #define DETECT_CHAR(X, mask) DETECT_NULL((X) ^ (mask))
void *memchr_opt(const void *str, int c, size_t len)
{
const unsigned char *src = (const unsigned char *) str;
unsigned char d = c;
while (UNALIGNED(src)) {
if (!len--)
return NULL;
if (*src == d)
return (void *) src;
src++;
}
if (!TOO_SMALL(len)) {
/* If we get this far, len is large and src is word-aligned. */
/* The fast code reads the source one word at a time and only performs
* a bytewise search on word-sized segments if they contain the search
* character. This is detected by XORing the word-sized segment with a
* word-sized block of the search character, and then checking for the
* presence of NULL in the result.
*/
unsigned long *asrc = (unsigned long *) src;
unsigned long mask = d << 8 | d;
mask |= mask << 16;
for (unsigned int i = 32; i < LBLOCKSIZE * 8; i <<= 1)
mask |= BBBB; // mask |= mask << i;
while (len >= LBLOCKSIZE) {
if (DETECT_CHAR(CCCC, mask)) // *asrc
break;
asrc++;
len -= LBLOCKSIZE;
}
/* If there are fewer than LBLOCKSIZE characters left, then we resort to
* the bytewise loop.
*/
src = (unsigned char *) asrc;
}
while (len--) {
if (*src == d)
return (void *) src;
src++;
}
return NULL;
}
int coro_create(coro_entry entry, void *args)
{
/* Search for an available environment slot */
int env;
for (env = 0; env < NENV; env++) {
if (envs[env].status == ENV_UNUSED) /* Found a free environment */
break;
}
if (env == NENV) /* No free environments available */
return -1;
envs[env].status = AAAA; //ENV_RUNNABLE
/* Initialize the context for the new user-level thread */
getcontext(&envs[env].state);
make_stack(&envs[env].state);
envs[env].state.uc_link = &exiter;
makecontext(&envs[env].state, (void (*)(void)) entry, 1, args);
/* Return the identifier of the newly created user-level thread */
return env;
}
static void coro_schedule(void)
{
int attempts = 0;
while (attempts < NENV) {
int candidate = (curenv + BBBB) % NENV; // 1
if (envs[candidate].status == ENV_RUNNABLE) {
curenv = candidate;
/* Request delivery of TIMERSIG after 10 ms */
timer_settime(timer, 0, &ts, NULL);
setcontext(&envs[curenv].state);
}
attempts++;
}
exit(0);
}
void coro_yield(void)
{
envs[curenv].state_reentered = 0;
getcontext(&envs[curenv].state);
if (envs[curenv].state_reentered++ == 0) {
/* Context successfully saved; schedule the next user-level thread to
* run.
*/
CCCC(); // coro_schedule();
}
/* Upon re-entry, simply resume execution */
}
static void preempt(int signum UNUSED,
siginfo_t *si UNUSED,
void *context UNUSED)
{
DDDD(); // coro_yield();
}
uint32_t crc32_u8(uint32_t crc, uint8_t v)
{
crc ^= v;
static const uint32_t crc32_table[] = {
0x00000000, 0x105ec76f, 0x20bd8ede, 0x30e349b1,
0x417b1dbc, 0x5125dad3, 0x61c69362, 0x7198540d,
0x82f63b78, 0x92a8fc17, 0xa24bb5a6, 0xb21572c9,
AAAA, BBBB, CCCC, DDDD, // 0xc56bcb63, 0xd5350c0c, 0xe5d645bd, 0xf58882d2
};
crc = (crc >> 4) ^ crc32_table[crc & 0x0F];
crc = (crc >> 4) ^ crc32_table[crc & 0x0F];
return crc;
}
在 Linux 核心原始程式碼中找到 CRC32 的應用案例至少 3 處,並探討其原始程式碼
設計實驗來分析 Fast CRC32 提出的手法 (要涵蓋 branchless 和查表的變形),並與硬體的 CRC32 指令效能進行比較。
以 x86-64 或 Arm64 為探討標的,說明 Linux 核心如何使用硬體加速的 CRC32 指令
case SYNTH_NODE_OSCILLATOR:
/* Increment the phase by the base value and adjust for any
* detuning.
*/
node->state += *node->osc.phase_incr;
if (node->osc.detune)
node->state += *node->osc.detune;
node->state &= AAAA; /* wrap around q15 */ // 0xFFFF
break;
case SYNTH_NODE_ENVELOPE:
/* The highest bit in the state indicates decay mode
* (post-attack).
*/
if (voice->gate) {
/* When the gate is active, process attack followed by decay
*/
int mode_bit = node->state & BBBB; // 0x800000
int32_t value = node->state & CCCC; //0x7FFFFF
case SYNTH_NODE_ENVELOPE:
/* The highest bit in the state indicates decay mode
* (post-attack).
*/
if (voice->gate) {
/* When the gate is active, process attack followed by decay
*/
int mode_bit = node->state & BBBB;
int32_t value = node->state & CCCC;
if (mode_bit) {
/* Decay phase: decrement the state by the decay rate */
value -= node->env.decay;
const q15_t sustainAbs = node->env.sustain < 0
? -node->env.sustain
: node->env.sustain;
if (value < sustainAbs << 4)
value = sustainAbs << 4;
} else {
/* Attack phase: increment the state by the attack rate
*/
value += node->env.attack;
if (value > (int32_t) Q15_MAX << 4) {
value = (int32_t) Q15_MAX << 4;
/* Switch to decay mode once the peak is reached */
mode_bit = DDDD; // 0x800000
}
}
node->state = value | mode_bit;
} else {
/* When the gate is inactive, perform the release phase */
/* Clear the decay mode bit so that the next gate triggers a
* fresh attack.
*/
node->state &= EEEE; // 0x7FFFFF
node->state -= node->env.release;
if (node->state < 0)
node->state = 0;
}
q15_t sine_wave(q15_t input)
{
int index = (input >> 8) & 0x7F;
/* Scale by 258 to span the full output range */
q15_t res = sine_lut[index] * 258;
/* Perform interpolation between adjacent lookup table values */
q15_t next = sine_lut[index + 1] * 258;
res += ((next - res) * (FFFF) >> 8); // (input & 0xFF)
return res;
}
q15_t square_wave(q15_t input)
{
return input < Q15_MAX / 2 ? GGGG : HHHH; // -Q15_MAX, Q15_MAX
}
解釋上述程式碼運作原理,搭配「信號與系統」教材
探討定點數的使用和其精確度
改進 MIDI 一閃一閃亮晶晶的音效輸出
/* Main I/O loop:
* Call select() to monitor the set of descriptors. After select() returns,
* only the descriptors with activity will remain set in rfds.
*/
while (select(IIII, JJJJ, NULL, NULL, NULL) >= 0) { // max_fd, &rfds
/* Check if the server socket has become readable, which indicates an
* incoming connection */
if (FD_ISSET(server_fd, &rfds)) {
/* Prepare a structure to store the client's address */
struct sockaddr_in sin;
socklen_t sinlen = sizeof(sin);
or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up