contributed by <blake11235
>
好東西分享,很完整的HackMD功能介紹
考慮以下程式碼:
#include <stdlib.h>
int isMultN(unsigned int n) {
int odd_c = 0, even_c = 0; /* variables to count odd and even SET bits */
if (n == 0) // return true if difference is 0.
return 1;
if (n == 1) // return false if the difference is not 0.
return 0;
while (n) {
if (n & 1) // odd bit is SET, increment odd_C
odd_c++;
n >>= 1;
if (n & 1) // even bit is SET, increment even_c
even_c++;
n = n >> 1;
}
/* Recursive call till you get 0/1 */
return(isMultN(abs(odd_c - even_c)));
}
其作用為檢查輸入整數是否為 N 的倍數
首先先觀察程式碼
while (n) {
if (n & 1) // odd bit is SET, increment odd_C
odd_c++;
n >>= 1;
if (n & 1) // even bit is SET, increment even_c
even_c++;
n = n >> 1;
}
從這段可以得知他是在計算 n 用 binary 表示的話,奇數位與偶數位的 1 的數目。先是判斷最右邊的那位數,看是否為 1 ,接著向右位移再判斷是否為 1 ,一直操作到 n 為 0 ,代表全部計算完畢。
return(isMultN(abs(odd_c - even_c)));
最後將 odd bit count 和 even bit count 的差值再次輸入函式做遞迴,直到數值為 0 或 1 。也就是最終的判斷依據,是看奇數位 1 的數與偶數位 1 的數兩者要一樣。
既然這段程式碼是依據各 bit 在判斷,那麼就把數值列出來啦~
數值 | binary | 奇偶位 1 的數的差值 |
---|---|---|
1 | 0001 | 1 |
2 | 0010 | 1 |
3 | 0011 | 0 |
5 | 0101 | 2 |
7 | 0111 | 1 |
11 | 1011 | 1 |
13 | 1101 | 1 |
搭啦~ 答案很明顯的就是 3 啦~ | ||
因為只有 3 他的奇位 1 的數目和偶位 1 的數目一樣,所以只能選他了。 | ||
若差值是 1 以上的,會再次進入函式遞迴,直到最後的數值是 1 或者 0 。 |
但事情有那麼順利嗎?會不會有除了 3 以外的質數,他的奇位 1 的數目和偶位 1 的數目也一樣,是不是就爆炸了?那麼我們就來證證看囉!
首先,這個題目讓我想到了一個經典個國中數學題目:如何判斷一個數 3 的倍數?
只需要把每個位數的值都加起來,如果為 3 的倍數的話,那麼此數就是 3 的倍數。
推導方法很簡單,這裡就有點了的講…
那麼就從那個觀念延伸,來這提證明。
假設一個二進位的數值 ABCDEFGH ,代表的是 \(A\cdot2^7+B\cdot2^6+C\cdot2^5+D\cdot2^4+E\cdot2^3+F\cdot2^2+G\cdot2^1+H\cdot2^0 \\=A\cdot128+B\cdot64+C\cdot32+D\cdot16+E\cdot8+F\cdot4+G\cdot2+H\cdot1 \\=(A\cdot129-A)+(B\cdot63+B)+(C\cdot33-C)+(D\cdot15+D)+(E\cdot9-E)+(F\cdot3+F)+(G\cdot3-G)+(H\cdot0+H) \\=(A\cdot129+B\cdot63+C\cdot33+D\cdot15+E\cdot9+F\cdot3+G\cdot3+H\cdot0)+(-A+B-C+D-E+F-G+H)\)
\((A\cdot129+B\cdot63+C\cdot33+D\cdot15+E\cdot9+F\cdot3+G\cdot3+H\cdot0)\) 是 3 的倍數
所以只要判斷\((-A+B-C+D-E+F-G+H)\) 也要是 3 的倍數就好了
可能是 3、6、9 …等等,再把數值代進迴圈,直到最後奇偶位數相差為 0 ,便知道此數是 3 的倍數
針對其他質數判斷是否為其倍數,2 和 5 較為簡單,就不多說了,那麼就想針對 7、11、13 做研究
再次回想起國中數學~~
目標,只使用加減法位移與迴圈
針對一個數於乘於 7 直式乘法,可以觀察到一個有趣現象
\(ABCDEFG\cdot7 = ABCDEFG\cdot0111\)
A B C D E F G
A B C D E F G
+ A B C D E F G
-----------------------
\((A), (A+B), (A+B+C), (B+C+D), (C+D+E), (D+E+F), (E+F+G), (F+G), (G)\)
也就是說,把一個數除以 7 ,就是把最後一位數拿去減倒數第三位、倒數第二位和倒數第一位,減完之後數值向右位移,然後回傳給函式繼續進行,直到除了後三位的其他位數都是 0 ,最後再判斷數值是否為 7 。
int isMultN(unsigned int n)
{
if(n>>3 == 0) {
return (n==7);//判斷是否餘七
}
else{
int temp;
temp = (n&1);//取最後一位數值
n = n - temp - (temp<<1) - (temp<<2);//除於七
return(isMultN(n>>1));
}
}
既然都打出程式了,那麼就來比賽一下,看看是否有比 mod 運算還快,不然而外想到的這種作法沒有比較快,內心會很挫折。
所以阿我用了 clock() 來算一下時間,跟 mod 來比賽看看
#include <stdlib.h>
#include <time.h>
static double diff_in_second(struct timespec t1, struct timespec t2)
{
struct timespec diff;
if (t2.tv_nsec-t1.tv_nsec < 0) {
diff.tv_sec = t2.tv_sec - t1.tv_sec - 1;
diff.tv_nsec = t2.tv_nsec - t1.tv_nsec + 1000000000;
} else {
diff.tv_sec = t2.tv_sec - t1.tv_sec;
diff.tv_nsec = t2.tv_nsec - t1.tv_nsec;
}
return (diff.tv_sec + diff.tv_nsec / 1000000000.0);
}
int isMultN(unsigned int n)
{
if(n>>3 == 0) {
return (n==7);
}
else{
int temp;
temp = (n&1);
n = n - temp - (temp<<1) - (temp<<2);
return(isMultN(n>>1));
}
}
int main()
{
unsigned int x;
scanf("%u", &x);
struct timespec start1, end1, start2, end2;
clock_gettime(CLOCK_REALTIME, &start1);
printf("%s\n", isMultN(x)?"yes":"no");
clock_gettime(CLOCK_REALTIME, &end1);
printf("my time: %.10f sec\n\n", diff_in_second(start1, end1));
clock_gettime(CLOCK_REALTIME, &start2);
printf("%s\n", (x%7)?"no":"yes");
clock_gettime(CLOCK_REALTIME, &end2);
printf("mod time: %.10f sec\n\n", diff_in_second(start2, end2));
return 0;
}
結論是…
我輸了
1354684745
no
my time: 0.0000273730 sec
no
mod time: 0.0000087500 sec
好吧…先做 11 的倍數,不然做不完啦…
11 的倍數也可以用類似於 7 的倍數的方法,只是改變了一點數值。
int isMultN(unsigned int n)
{
if(n>>4 == 0) {
return (n==11);
}
else{
int temp;
temp = (n&1);
n = n - temp - (temp<<1) - (temp<<3);
return(isMultN(n>>1));
}
}
但是,這個方法他的運算時間,比 mod 慢更多,差到了100倍左右…
6541213
no
my time: 0.0000271370 sec
no
mod time: 0.0000034600 sec
想一個比 mod 慢的程式有屁用<bye94046165
>
也是可以用一樣的手法但是速度一樣慢…
65
yes
my time: 0.0000257650 sec
yes
mod time: 0.0000036420 sec
所以,強者我室友e94046165
看了我的 code 叫我把函式遞迴改成 while() 迴圈,順便嘴我的資料結構…
結果,速度快的幾乎跟 mod 一樣了!!
以類似 CS:APP 3/e 第 80 頁針對 8 位元浮點格式的示例,擴充至單精度浮點數
位表示(b x x b) | 指數 | 小數 | 值 | 描述 |
---|---|---|---|---|
bin hex hex bin | e, E, 2E | f, M | V, 十進位 | |
0 00 00000 000 | 0, -126, 2-126 | 0, 0 | 0, 0.0 | 零 |
0 00 00000 001 | 0, -126, 2-126 | 2-23, 2-23 | 2-149, 0.00.. | 最小 denormalized |
… | … | … | … | denormalized |
0 00 11111 111 | 0, -126, 2-126 | 2-1+2-2…+2-23, 2-1+2-2…+2-23 | … | 最大 denormalized |
0 01 00000 000 | 1, -126, 2-126 | 0, 1 | 2-126, 0.00… | 最小 normalized |
0 01 00000 001 | 1, -126, 2-126 | 2-23, 1+2-23 | 2-126+2149, 0.00… | normalized |
… | … | … | … | normalized |
0 fe fffff 111 | 254, 127, 2127 | 2-1+2-2…+2-23, 1+2-1+2-2…+2-23 | … | 最大 normalized |
0 ff 00000 000 | … | … | … | 正無限大 |
在來看題目:
#include <math.h>
static inline float u2f(unsigned int x) { return *(float *) &x; }
float exp2_fp(int x) {
unsigned int exp /* exponent */, frac /* fraction */;
/* too small */
if (x < 2 - pow(2, Y0) - 23) {
exp = 0;
frac = 0;
/* denormalize */
} else if (x < Y1 - pow(2, Y2)) {
exp = 0;
frac = 1 << (unsigned) (x - (2 - pow(2, Y3) - Y4));
/* normalized */
} else if (x < pow(2, Y5)) {
exp = pow(2, Y6) - 1 + x;
frac = 0;
/* too large */
} else {
exp = Y7;
frac = 0;
}
/* pack exp and frac into 32 bits */
return u2f((unsigned) exp << 23 | frac);
}
(x < 2 - pow(2, 7) - 23)
(x < 2 - pow(2, 7))
1 << (x - (2 - pow(2, 7) - 23))
(x < pow(2, 7))
exp = pow(2, 7) - 1 + x
0xff
給定一個單精度的浮點數值,輸出較大且最接近的 2x 值,需要充分考慮到邊界
原來是針對原本的題目做反向操作阿,研究了一下,就用 bits 來操作吧
首先針對所得到的浮點數做分類:
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
unsigned int flo2bin(float f);
int main(){
float x;
int ans = 0;
printf("input a float number:");
scanf("%f", &x);
if(!x){ //0
printf("0\n");
}
else if(x == 0xffffffff){ //infinity
printf("INFINITY\n");
}
else{
if(x >> 23 == 0){ //denormalized
x = x << 9;
while(!(x&80000000)){
x = x << 1;
ans++;
}
if(x << 1)
ans = -ans;
else
ans = -ans-1;
}
else{ //normalized
if(x << 9 == 0) //M=(1+0)=2^0
ans = (x >> 23) - 127;
else //M=(1+f)=2^1
ans = (x >> 23) - 127 + 1;
}
printf("2 e%d", ans);
}
return 0;
}
完成了…嗎?
float.c: In function ‘main’:
float.c:23:8: error: invalid operands to binary >> (have ‘float’ and ‘int’)
if(x >> 23 == 0){
原來, float 不能做 bitwise 的運算阿!我真的現在才知道
于是乎,我想到了一招,用 pointer
意思就是透過轉換位址的型態,將位址傳給 type size 一樣的 unsigned int ,在對它取值,這樣就可以對 unsigned int 做 bitwise 運算了
unsigned int flo2bin(float f){
unsigned int i = *(unsigned int*)&f;
return i;
}
最後我再多計算輸出 2x 與原本浮點數的差值
再啦幹
e94046165…謝謝…
blake11235
input a float number:0.25
2 e-2
diff: 0.000000
input a float number:0.0035
2 e-8
diff: 0.000406
input a float number:5646513.154
2 e23
diff: 2742095.000000
延續 從 \(\sqrt{2}\) 的運算談浮點數,假設 double 為 32-bit 寬度,考慮以下符合 IEEE 754 規範的平方根程式,請補上對應數值,使其得以運作:
#include <stdint.h>
/* A union allowing us to convert between a double and two 32-bit integers.
* Little-endian representation
*/
typedef union {
double value;
struct {
uint32_t lsw;
uint32_t msw;
} parts;
} ieee_double_shape_type;
/* Set a double from two 32 bit ints. */
#define INSERT_WORDS(d, ix0, ix1) \
do { \
ieee_double_shape_type iw_u = { \
.parts.msw = ix0, \
.parts.lsw = ix1, \
}; \
(d) = iw_u.value; \
} while (0)
/* Get two 32 bit ints from a double. */
#define EXTRACT_WORDS(ix0, ix1, d) \
do { \
ieee_double_shape_type ew_u; \
ew_u.value = (d); \
(ix0) = ew_u.parts.msw; \
(ix1) = ew_u.parts.lsw; \
} while (0)
static const double one = 1.0, tiny = 1.0e-300;
double ieee754_sqrt(double x)
{
double z;
int32_t sign = 0x80000000;
uint32_t r, t1, s1, ix1, q1;
int32_t ix0, s0, q, m, t, i;
EXTRACT_WORDS(ix0, ix1, x);
/* take care of INF and NaN */
if ((ix0 & KK1) == KK2) {
/* sqrt(NaN) = NaN, sqrt(+INF) = +INF, sqrt(-INF) = sNaN */
return x * x + x;
}
/* take care of zero */
if (ix0 <= 0) {
if (((ix0 & (~sign)) | ix1) == 0)
return x; /* sqrt(+-0) = +-0 */
if (ix0 < 0)
return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
}
/* normalize x */
m = (ix0 >> 20);
if (m == 0) { /* subnormal x */
while (ix0 == 0) {
m -= 21;
ix0 |= (ix1 >> 11);
ix1 <<= 21;
}
for (i = 0; (ix0 & 0x00100000) == 0; i++)
ix0 <<= 1;
m -= i - 1;
ix0 |= (ix1 >> (32 - i));
ix1 <<= i;
}
m -= KK3; /* unbias exponent */
ix0 = (ix0 & 0x000fffff) | 0x00100000;
if (m & 1) { /* odd m, double x to make it even */
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
}
m >>= 1; /* m = [m/2] */
/* generate sqrt(x) bit by bit */
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
r = 0x00200000; /* r = moving bit from right to left */
while (r != 0) {
t = s0 + r;
if (t <= ix0) {
s0 = t + r;
ix0 -= t;
q += r;
}
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
r >>= 1;
}
r = sign;
while (r != 0) {
t1 = s1 + r;
t = s0;
if ((t < ix0) || ((t == ix0) && (t1 <= ix1))) {
s1 = t1 + r;
if (((t1 & sign) == sign) && (s1 & sign) == 0)
s0 += 1;
ix0 -= t;
if (ix1 < t1)
ix0 -= 1;
ix1 -= t1;
q1 += r;
}
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
r >>= 1;
}
/* use floating add to find out rounding direction */
if ((ix0 | ix1) != 0) {
z = one - tiny; /* trigger inexact flag */
if (z >= one) {
z = one + tiny;
if (q1 == (uint32_t) 0xffffffff) {
q1 = 0;
q += 1;
} else if (z > one) {
if (q1 == (uint32_t) KK4)
q += 1;
q1 += 2;
} else
q1 += (q1 & 1);
}
}
ix0 = (q >> 1) + 0x3fe00000;
ix1 = q1 >> 1;
if ((q & 1) == 1)
ix1 |= sign;
ix0 += (m << KK5);
INSERT_WORDS(z, ix0, ix1);
return z;
}
乾這題好難,本來想偷偷看有誰有做這題的,結果沒有…
首先先解決那五格填空該填入什麼
/* take care of INF and NaN */
if ((ix0 & KK1) == KK2) {
/* sqrt(NaN) = NaN, sqrt(+INF) = +INF, sqrt(-INF) = sNaN */
return x * x + x;
}
可以觀察出此段程式碼要判斷是否為 INF 或 NAN ,而判斷標準就是數字 exp 的部份皆為 1 ,因此 KK2 為 0x7ff00000 , mask 的部份就是取 ix0 的 62~52 位數,所以 KK1 也是 0x7ff00000
m -= KK3; /* unbias exponent */
程式碼後面的註解都說了要 unbias 也就是將偏置移動回來,對 double 雙精度而言 Bias = 2k-1 - 1 = 1023
/* use floating add to find out rounding direction */
if ((ix0 | ix1) != 0) {
z = one - tiny; /* trigger inexact flag */
if (z >= one) {
z = one + tiny;
if (q1 == (uint32_t) 0xffffffff) {
q1 = 0;
q += 1;
} else if (z > one) {
if (q1 == (uint32_t) KK4)
q += 1;
q1 += 2;
} else
q1 += (q1 & 1);
}
}
其實這題我不太懂…
ix0 += (m << KK5);
最後這段,是要把 m exponent 放入 ix0 裡面,而針對 double 他的 exp 是在 62~52 位元,所以需要向左位移 51-32+1=20 位
typedef union {
double value;
struct {
uint32_t lsw;
uint32_t msw;
} parts;
} ieee_double_shape_type;
/* Set a double from two 32 bit ints. */
#define INSERT_WORDS(d, ix0, ix1) \
do { \
ieee_double_shape_type iw_u = { \
.parts.msw = ix0, \
.parts.lsw = ix1, \
}; \
(d) = iw_u.value; \
} while (0)
/* Get two 32 bit ints from a double. */
#define EXTRACT_WORDS(ix0, ix1, d) \
do { \
ieee_double_shape_type ew_u; \
ew_u.value = (d); \
(ix0) = ew_u.parts.msw; \
(ix1) = ew_u.parts.lsw; \
} while (0)
/* take care of INF and NaN */
if ((ix0 & 0x7ff00000) == 0x7ff00000) {
/* sqrt(NaN) = NaN, sqrt(+INF) = +INF, sqrt(-INF) = sNaN */
return x * x + x;
}
/* take care of zero */
if (ix0 <= 0) {
if (((ix0 & (~sign)) | ix1) == 0)
return x; /* sqrt(+-0) = +-0 */
if (ix0 < 0)
return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
}
sqrt(NaN) = NaN, sqrt(+INF) = +INF, sqrt(-INF) = sNaN
因此使用 x * x + x 來表示,參考這裡不太清楚為何能讓 透過 (x - x) / (x - x); 使得 sqrt(-ve) = -NaN ,是否因為產生了 -0/-0 ?
/* normalize x */
m = (ix0 >> 20);
if (m == 0) { // subnormal x
while (ix0 == 0) {
m -= 21;
ix0 |= (ix1 >> 11);
ix1 <<= 21;
}
for (i = 0; (ix0 & 0x00100000) == 0; i++)
ix0 <<= 1;
m -= i - 1;
ix0 |= (ix1 >> (32 - i));
ix1 <<= i;
}
m -= 1023; /* unbias exponent E = e - Bias */
ix0 = (ix0 & 0x000fffff) | 0x00100000;
if (m & 1) {
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
}
m >>= 1;
/* generate sqrt(x) bit by bit */
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
r = 0x00200000; /* r = moving bit from right to left */
while (r != 0) {
t = s0 + r;
if (t <= ix0) {
s0 = t + r;
ix0 -= t;
q += r;
}
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
r >>= 1;//r one time / 2
}
r = sign;
while (r != 0) {
t1 = s1 + r;
t = s0;
if ((t < ix0) || ((t == ix0) && (t1 <= ix1))) {
s1 = t1 + r;
if (((t1 & sign) == sign) && (s1 & sign) == 0)
s0 += 1;
ix0 -= t;
if (ix1 < t1)
ix0 -= 1;
ix1 -= t1;
q1 += r;
}
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
r >>= 1;
}
/* use floating add to find out rounding direction */
if ((ix0 | ix1) != 0) {
z = one - tiny; /* trigger inexact flag */
if (z >= one) {
z = one + tiny;
if (q1 == (uint32_t) 0xffffffff) {
q1 = 0;
q += 1;
} else if (z > one) {
if (q1 == (uint32_t) 0xfffffffe)
q += 1;
q1 += 2;
} else
q1 += (q1 & 1);
}
}
ix0 = (q >> 1) + 0x3fe00000;
ix1 = q1 >> 1;
if ((q & 1) == 1)
ix1 |= sign;
ix0 += (m << KK5);
INSERT_WORDS(z, ix0, ix1);
return z;
#include <stdint.h>
/* A union allowing us to convert between a double and two 32-bit integers.
* Little-endian representation
*/
typedef union {
float value;
struct {
uint16_t lsw;
uint16_t msw;
} parts;
} ieee_float_shape_type;
/* Set a float from two 16 bit ints. */
#define INSERT_WORDS(f, ix0, ix1) \
do { \
ieee_float_shape_type iw_u = { \
.parts.msw = ix0, \
.parts.lsw = ix1, \
}; \
(f) = iw_u.value; \
} while (0)
/* Get two 16 bit ints from a float. */
#define EXTRACT_WORDS(ix0, ix1, f) \
do { \
ieee_float_shape_type ew_u; \
ew_u.value = (f); \
(ix0) = ew_u.parts.msw; \
(ix1) = ew_u.parts.lsw; \
} while (0)
static const float one = 1.0, tiny = 1.0e-300;//TODO
double ieee754_sqrt(float x)
{
float z;
int16_t sign = 0x8000;
uint16_t r, t1, s1, ix1, q1;
int16_t ix0, s0, q, m, t, i;
EXTRACT_WORDS(ix0, ix1, x);
/* take care of INF and NaN */
if ((ix0 & 0x7f80) == 0x7f80) {
/* sqrt(NaN) = NaN, sqrt(+INF) = +INF, sqrt(-INF) = sNaN */
return x * x + x;
}
/* take care of zero */
if (ix0 <= 0) {
if (((ix0 & (~sign)) | ix1) == 0)
return x; /* sqrt(+-0) = +-0 */
if (ix0 < 0)
return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
}
// normalize x
m = (ix0 >> 7);
/* if (m == 0) { // subnormal x
while (ix0 == 0) {
m -= 21;
ix0 |= (ix1 >> 11);
ix1 <<= 21;
}
for (i = 0; (ix0 & 0x00100000) == 0; i++)
ix0 <<= 1;
m -= i - 1;
ix0 |= (ix1 >> (32 - i));
ix1 <<= i;
}
*/
m -= 127; /* unbias exponent E = e - Bias */
ix0 = (ix0 & 0x007f) | 0x0080; /* M = 1 + f = (2^0 == |00100000) + f */
if (m & 1) { /*if m is odd , double x to make it even , in order to make 2^E--->2^(E/2) , E need to be even 2^(E+1)*M = 2^(E)*M*2 */
ix0 += ix0 + ((ix1 & sign) >> 15);/* if need to carry in, ix1 & sign will be 1, ix1*2 overflow is OK */
ix1 += ix1;
}
m >>= 1; /* m = [m/2] 2^(E+1)--->2^(E) */
/* generate sqrt(x) bit by bit */
ix0 += ix0 + ((ix1 & sign) >> 15);
ix1 += ix1;
q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
r = 0x0100; /* r = moving bit from right to left */
while (r != 0) {
t = s0 + r;
if (t <= ix0) {
s0 = t + r;
ix0 -= t;
q += r;
}
ix0 += ix0 + ((ix1 & sign) >> 15);
ix1 += ix1;
r >>= 1;//r one time / 2
}
r = sign;
while (r != 0) {
t1 = s1 + r;
t = s0;
if ((t < ix0) || ((t == ix0) && (t1 <= ix1))) {
s1 = t1 + r;
if (((t1 & sign) == sign) && (s1 & sign) == 0)
s0 += 1;
ix0 -= t;
if (ix1 < t1)
ix0 -= 1;
ix1 -= t1;
q1 += r;
}
ix0 += ix0 + ((ix1 & sign) >> 15);
ix1 += ix1;
r >>= 1;
}
/* use floating add to find out rounding direction beacuse we have a bit that need to be rounding*/
if ((ix0 | ix1) != 0) {
z = one - tiny; /* trigger inexact flag */
if (z >= one) {
z = one + tiny;
if (q1 == (uint16_t) 0xffff) {
q1 = 0;
q += 1;
} else if (z > one) {
if (q1 == (uint16_t) 0xfffe)
q += 1;
q1 += 2;//make q1 == 0
} else
q1 += (q1 & 1);
}
}
ix0 = (q >> 1) + 0x3f00;//Bias exponent E = e - 1023
ix1 = q1 >> 1;
if ((q & 1) == 1)
ix1 |= sign;
ix0 += (m << 7);
INSERT_WORDS(z, ix0, ix1);
return z;
}
int main(){
float f;
scanf("%f", &f);
printf("%f\n", ieee754_sqrt(f));
}
感覺不難,只需要把數值改成對應 float 格式就好。