contributed by <bevmmf
>
補完程式碼
AAAA
:(n + d - 1) / d
BBBB
:0x7fffffff
CCCC
:n + m
DDDD
:1
EEEE
:mpi_gcd(rop, op2, r)
解釋上述程式碼運作原理
#include <inttypes.h> //support int32_t, uint64_t
#include <stdint.h> //support uint32_t, int64_t
/* mpi: Multi-Precision Integers */
typedef struct {
uint32_t *data;
size_t capacity;
} mpi_t[1];
typedef size_t mp_bitcnt_t;
size_t
: 一個專為表示「大小」或「數量」設計的unsignd int類型
此為宣告本題MPI的結構,由 capacity
段 uint32_t 類型 data
組合成多精度的整數進而能描述規模大的int
mpi_t[1]
表示單元素array,目的是傳遞pointermpi_init
: 初始化一mpi_t使capacity為0,data指向NULL
mpi_clear
: free 釋放一mpi_t之data
void mpi_enlarge(mpi_t rop, size_t capacity)
{
if (capacity > rop->capacity) {
size_t min = rop->capacity;
rop->capacity = capacity;
rop->data = realloc(rop->data, capacity * 4);
if (!rop->data && capacity != 0) {
fprintf(stderr, "Out of memory (%zu words requested)\n", capacity);
abort();
}
for (size_t n = min; n < capacity; ++n)
rop->data[n] = 0;
}
}
mpi_enlarge
: 擴充一mpi_t的data段數,若傳參capacity > 該mpi之capacity才運作。運作時由realloc重新分配(傳參capacity*4)bytes,然後將新擴充的段數data都設為0。若分配失敗則輸出錯誤資訊然後abort。
void mpi_compact(mpi_t rop)
{
size_t capacity = rop->capacity;
if (rop->capacity == 0)
return;
for (size_t i = rop->capacity - 1; i != (size_t) -1; --i) {
if (rop->data[i])
break;
capacity--;
}
assert(capacity != (size_t) -1);
rop->data = realloc(rop->data, capacity * 4);
rop->capacity = capacity;
if (!rop->data && capacity != 0) {
fprintf(stderr, "Out of memory (%zu words requested)\n", capacity);
abort();
}
}
mpi_compact
: 壓縮一mpit的記憶體使用,去除末尾的零元素,調整capacity。從data array末尾倒序檢查是否為0,為0則使capcity-1,直到不為0退出。此時退出後之capacity即為皆非0值之data段數。最後利用realloc分配該capacity段數4 bytes的空間。分配失敗則終止。
(size_t) -1
?
%zu
專為用來輸出 size_t 的值
/* ceiling division without needing floating-point operations. */
static size_t ceil_div(size_t n, size_t d)
{
return (n + d - 1) / d;
}
#define INTMAX 0x7fffffff
void mpi_set_u64(mpi_t rop, uint64_t op)
{
size_t capacity = ceil_div(64, 31);
mpi_enlarge(rop, capacity);
for (size_t n = 0; n < capacity; ++n) {
rop->data[n] = op & INTMAX;
op >>= 31;
}
for (size_t n = capacity; n < rop->capacity; ++n)
rop->data[n] = 0;
}
ceil_div
: 取天花板數(向上取整)
mpi_set_u64
: 將 uint64_t 轉成 mpi_t
為啥用 31 bit 作為一段長度?
因為若用 32 bit 加法或乘法進位(carry)處理會變複雜
static void mpi_mul_naive(mpi_t rop, const mpi_t op1, const mpi_t op2)
{
size_t capacity = op1->capacity + op2->capacity;
mpi_t tmp;
mpi_init(tmp);
mpi_enlarge(tmp, capacity);
for (size_t n = 0; n < tmp->capacity; ++n)
tmp->data[n] = 0;
for (size_t n = 0; n < op1->capacity; ++n) {
for (size_t m = 0; m < op2->capacity; ++m) {
uint64_t r = (uint64_t) op1->data[n] * op2->data[m];
uint64_t c = 0;
for (size_t k = n + m; c || r; ++k) {
if (k >= tmp->capacity)
mpi_enlarge(tmp, tmp->capacity + 1);
tmp->data[k] += (r & INTMAX) + c;
r >>= 31;
c = tmp->data[k] >> 31;
tmp->data[k] &= INTMAX;
}
}
}
mpi_set(rop, tmp);
mpi_compact(rop);
mpi_clear(tmp);
}
mpi_mul_naive
: mpi乘法
是否需要以下部分?
1.
if (k >= tmp->capacity)
mpi_enlarge(tmp, tmp->capacity + 1);
不需要 => n段*m段不會大於n+m段
2.
tmp->data[k] &= INTMAX;
不需要 => 因data大小即為INTMAX
//計算多精度整數 n 除以 d 的商 q 和餘數 r (即 n = d * q + r)
void mpi_fdiv_qr(mpi_t q, mpi_t r, const mpi_t n, const mpi_t d)
{
mpi_t n0, d0;
mpi_init(n0);
mpi_init(d0);
mpi_set(n0, n);//copy
mpi_set(d0, d);
if (mpi_cmp_u32(d0, 0) == 0) {
fprintf(stderr, "Division by zero\n");
abort();
}
mpi_set_u32(q, 0);
mpi_set_u32(r, 0);
size_t start = mpi_sizeinbase(n0, 2) - 1;
for (size_t i = start; i != (size_t) -1; --i) {
mpi_mul_2exp(r, r, 1);
if (mpi_testbit(n0, i) != 0)
mpi_setbit(r, 0);
if (mpi_cmp(r, d0) >= 0) {
mpi_sub(r, r, d0);
mpi_setbit(q, i);
}
}
mpi_clear(n0);
mpi_clear(d0);
}
使用二進制長除法
start = mpi_sizeinbase(n0, 2)
- 1 = 4 - 1 = 3(最高位索引)從最高位(i = 3)到最低位(i = 0)逐一處理被除數的每一位。
驗證結果:
13 = 3 × 4 + 1
void mpi_gcd(mpi_t rop, const mpi_t op1, const mpi_t op2)
{
if (mpi_cmp_u32(op2, 0) == 0) {
mpi_set(rop, op1);
return;
}
mpi_t q, r;
mpi_init(q);
mpi_init(r);
mpi_fdiv_qr(q, r, op1, op2);
mpi_gcd(rop, op2, r);
mpi_clear(q);
mpi_clear(r);
}
使用輾轉相除法
當發生整除時(下一次r=0,guard clause發生return),將輸出rop設為當時被除數op1
這項實作依賴遞迴呼叫,請避免使用並降低記憶體開銷
我將原來遞迴修改成以迭代實作
void mpi_gcd(mpi_t rop, const mpi_t op1, const mpi_t op2)
{
if (mpi_cmp_u32(op1, 0) == 0 || mpi_cmp_u32(op2, 0) == 0) {
fprintf(stderr, "Both operands are zero, GCD undefined\n");
return;
}
mpi_t opa, opb, q, r;
mpi_init(opa); // Dividend
mpi_init(opb); // Divisor
mpi_init(q);
mpi_init(r);
mpi_set(opa,op1);
mpi_set(opb,op2);
while(mpi_cmp_u32(opb, 0) != 0){
mpi_fdiv_qr(q, r, opa, opb);
mpi_set(opa,opb);
mpi_set(opb,r);
}
mpi_set(rop,opa);
mpi_clear(q);
mpi_clear(r);
mpi_clear(opa);
mpi_clear(opb);
}
改進最大公因數的運算效率
AAAA
:(x) ^ (mask)
BBBB
:mask << i
CCCC
:*asrc
AAAA
:ENV_RUNNABLE
BBBB
:attempts + 1
CCCC
:coro_shedule
DDDD
:yield
AAAA
:0xc38d26c4
BBBB
:0xd3d3e1ab
CCCC
:0xe330a81a
DDDD
:0xf36e6f75
AAAA
:0x7FFF
BBBB
:0x80000000
CCCC
:0x7FFFFFFF
DDDD
:0x80000000
EEEE
:0x7FFFFFFF
FFFF
:input & 0xFF
GGGG
:Q15_MIN
HHHH
:Q15_MAX
IIII
:max_fd
JJJJ
:&rfds