# 2020q3 Homework6 (quiz6)
[toc]
## 測驗 1
觀察幾位同學的成果後,我覺得本題有幾點特別要注意的地方
1. 雖然 fp32tobf16 是將 32 bit floating point 轉換為 bfloat16 的 function 但回傳值仍為 32 bit floating point ,而 bfloat16 則是放在靠近 MSB 的 16 bits
2. 對於零、正負無限大、NaN 都保持值不變,對 normalized 進行 rounding ,對 denormalized 無條件捨去
```c=
float fp32tobf16(float x) {
float y = x;
int *py = (int *) &y;
unsigned int exp, man;
exp = *py & 0x7F800000u;
man = *py & 0x007FFFFFu;
if (!exp && !man) /* zero */
return x;
if (exp == 0x7F800000u) /* infinity or NaN */
return x;
/* Normalized number. round to nearest */
float r = x;
int *pr = (int *) &r;
*pr &= 0xff800000;
r /= 256;
y = x + r;
*py &= 0xffff0000;
return y;
}
```
### 第 3 行
"c 語言中,floating point 不能做bitwise 運算" 從 sammer1107 同學得知
經測試會出現 ```error: invalid operands to binary << (have ‘float’ and ‘unsigned int’)``` 訊息,```<<```可為其他 bitwise 運算子
因此先轉型成 int 才操作 (使用 int 指標來操作此記憶體位置)
### 第 5,6 行
將上述的值透過 bitwise and 來分別取得 exponent 與 fraction(似乎又稱為 mantissa)
### 第 7 行
因為非零的值做 not 皆為零,因此只有 exp 與 man 皆為零才符合條件
若原始值為零,則直接回傳,不須處理
### 第 9 行
當 exponent 全為 1 ,直接回傳
這麼做可能會出錯,雖然正負無限大和部分 NaN 可以直接去除低位的 16 bits 而不改變,但部分 NaN 的 fraction 會因此變成 0 ,NaN 就變成正負無限大了
我認為應該改成
```c=
if (exp == 0x7F800000u){ /* infinity or NaN */
man = (man << 7) | (man << 14) | (man << 21);
man = man & 0x007F0000u;
*py = exp | man;
return y;
}
```
這是我能想到效率最好的方法
將會被捨去的 bits 的內容透過 bitwise or 移到剩下的 7 bits fraction
不過 sammer1107同學的
```c=
if (exp == 0x7F800000u) { /* infinity or NaN */
// make sure the mantissa is non-zero if NaN
*py |= ((man != 0) << 16);
*py &= 0xFFFF0000;
return y;
}
```
感覺能再省下一些計算
### 第 15 行
尚未處理的數值剩下 normalized 與 denormalized
將輸入值 and 0xff800000
對於 normalized 會留下相同 exponent 但 fraction 為 0 的值,也就是 $1 \times 2^{exponent-bias}$ ,將其除以 256 也會將 exponent 減 8,也可能將整個數變成 0 (當 exponent 小於等於 8 時)
對於 denormalized 則全為 0 ,因為 denormalized 是假定 fraction 與小數點之前為 0 的情況
將處理過的 r 加回去時,那些 exponent 被減 8 的值會剛好使原始值 fraction 第 8 bit 被加 1 ,達到四捨五入的效果
為何 denormalized 就不做 rounding ?
我覺得應該加上
```c=
*pr &= 0x80000000;
*pr |= ((exp == 0) << 15);
y += r;
```
但之後我在參考連結中看到設計的團隊提及 denormals 的處理方式為直接轉為 0
似乎是為了轉換速度而再犧牲一些精度
DP to SP 轉換 有處理 denormals
```c=
double fp64tofp32(double x) {
double y = x;
long *py = (long *) &y;
unsigned long exp62_60, exp, man;
exp62_60 = *py & 0x7000000000000000u;
exp = *py & 0x7FF0000000000000u;
man = *py & 0x000FFFFFFFFFFFFFu;
if (!exp && !man){ /* zero */
return x;
}
if (exp == 0x7FF0000000000000u){ /* infinity or NaN */
*py &= 0xFF80000000000000;
*py |= (long)(man != 0) << 32;
return y;
}
/* Normalized and Denormalized number. round to nearest */
double r = x;
unsigned long *pr = (long *) &r;
if(exp62_60){
*py &= 0x8000000000000000;
*py |= 0x7F80000000000000;
return y;
}
*pr &= 0xFFF0000000000000;
r /= 2097152;
y = x + r;
*pr &= 0x8000000000000000;
long tmp = (exp == 0);
*pr |= (tmp << 28);
y += r;
*py = *py << 3;
*py |= *pr;
*py &= 0xFFFFFFFF00000000;
return y;
}
```
測試程式
```c=
void print_hex(float x) {
int *p = (int *) &x;
printf("%08x", *p);
}
void print_long_hex(double x) {
long *p = (long *) &x;
printf("%016lx", *p);
}
int main() {
unsigned int sign[] = {0x00000000, 0x80000000};
unsigned int exp[]={0x00000000, 0x07000000, 0x00800000, 0x7f800000};
unsigned int man[]={0x00000000, 0x00400000, 0x00000001, 0x007fffff};
unsigned int data=0;
printf("fp32tobf16\n");
for (int i = 0; i < sizeof(sign) / sizeof(sign[0]); i++) {
for (int j = 0; j < sizeof(exp) / sizeof(exp[0]); j++){
for (int k = 0; k < sizeof(man) / sizeof(man[0]); k++) {
data=0;
data|=sign[i];
data|=exp[j];
data|=man[k];
float *a=(float *)&data;
printf("sign is %08x\n",sign[i]);
printf("exp is %08x\n",exp[j]);
printf("man is %08x\n",man[k]);
printf("fp32 %f = ", *a);
print_hex(*a);
printf("\n");
float bf_a = fp32tobf16(*a);
printf("bfloat \t\t");
print_hex(bf_a);
printf("\n\n");
}
}
}
printf("fp64tofp32\n");
unsigned long lsign[] = {0x0000000000000000, 0x8000000000000000};
unsigned long lexp[]={0x0000000000000000, 0x7000000000000000, 0x0010000000000000, 0x7ff0000000000000};
unsigned long lman[]={0x0000000000000000, 0x0008000000000000, 0x0000000010000000, 0x0000000000000001, 0x000fffffffffffff};
unsigned long ldata=0;
for (int i = 0; i < sizeof(lsign) / sizeof(lsign[0]); i++) {
for (int j = 0; j < sizeof(lexp) / sizeof(lexp[0]); j++){
for (int k = 0; k < sizeof(lman) / sizeof(lman[0]); k++) {
ldata=0;
ldata|=lsign[i];
ldata|=lexp[j];
ldata|=lman[k];
double *a=(double *)&ldata;
printf("sign is %016lx\n",lsign[i]);
printf("exp is %016lx\n",lexp[j]);
printf("man is %016lx\n",lman[k]);
printf("fp64 %lf = ", *a);
print_long_hex(*a);
printf("\n");
*a = fp64tofp32(*a);
ldata = ldata >> 32;
unsigned int data = (unsigned int)ldata;
float *fp32 = (float *)&data;
printf("fp32 %f = 00000000", *fp32);
print_hex(*fp32);
printf("\n\n");
}
}
}
return 0;
}
```
主要是針對特殊值來做測試
正負號、 exponent 全為 0 或 1、 exponent 數值中等或極小、 mantissa 全為 0 或 1、 mantissa MSB or LSB or 被捨棄部分的 MSB 為 1 等等的排列組合
其中值得注意的是
```
sign = 0x0000000000000000
exp = 0x0010000000000000
man = 0x000fffffffffffff
output 0x01000002
```
這筆測資
原始值 `0x001fffffffffffff` 在加上 rounding 值 `0x0000000080000000` 後,原先的 matissa 由`1.ffffffffffffffff` 變成 `2.000000007fffffff` ,因為要維持小數點左側為 1 因此向右 shift 1,此時就先做了一次 rounding 導致結果從 `1.000000003fffffff8`(對 8 rounding) 變成 `1.0000000040000000`
而`0x0020000040000000`向右 shift 3 (exp 少 3 bits) 傳換為 fp32 後變成 `0x01000002`
### 批次處理
為了批次處理先將原始的轉換 function 去除條件判斷,並調整呼叫及回傳的形式,也許光是這樣執行效能就有差別
```c=
void fp32tobf16_noif(float* fp32, float* ans, int size) {
for (int i = 0; i < size; i++) {
float y = fp32[i];
unsigned int* py = (unsigned int*)&y;
float r = fp32[i];
unsigned int* pr = (unsigned int*)&r;
unsigned int exp, man;
exp = *py & 0x7F800000u;
man = *py & 0x007FFFFFu;
//NaN
*py |= ((exp == 0x7F800000u && man != 0) << 16);
//Normals rounding
*pr &= 0xff800000;
r /= 256;
y = fp32[i] + r;
//Denormals rounding
*pr &= 0x80000000;
*pr |= ((exp == 0 && man != 0) << 15);
y += r;
*py &= 0xffff0000;
ans[i] = y;
}
}
```
對應的批次轉換 function
```c=
void fp32tobf16_batch(float* fp32, float* ans, int size) {
for (int i = 0; i < size; i += 4) {
unsigned int sign[4] = { 0x80000000u, 0x80000000u , 0x80000000u , 0x80000000u };
unsigned int exp[4] = { 0x7f800000u, 0x7f800000u , 0x7f800000u , 0x7f800000u };
unsigned int man[4] = { 0x007FFFFFu, 0x007FFFFFu , 0x007FFFFFu , 0x007FFFFFu };
unsigned int zero[4] = { 0x0u, 0x0u , 0x0u , 0x0u };
float normal_round_div[4] = { 256, 256, 256, 256 };
unsigned int denormal_round[4] = { 0x00008000u, 0x00008000u, 0x00008000u, 0x00008000u };
unsigned int bit16[4] = { 0xffff0000u, 0xffff0000u, 0xffff0000u, 0xffff0000u };
float out[4];
__m128 y = _mm_load_ps(&fp32[i]);
__m128 r = _mm_load_ps(&fp32[i]);
__m128 cons_sign = _mm_load_ps((float*)sign);
__m128 cons_exp = _mm_load_ps((float*)exp);
__m128 cons_man = _mm_load_ps((float*)man);
__m128 cons_zero = _mm_load_ps((float*)zero);
__m128 m_exp = _mm_and_ps(y, cons_exp);
__m128 m_man = _mm_and_ps(y, cons_man);
//NaN
__m128 exp_all_1 = _mm_cmpeq_ps(m_exp, cons_exp);
__m128 man_not_0 = _mm_cmpneq_ps(m_man, cons_zero);
__m128 nan_fix = _mm_and_ps(exp_all_1, man_not_0);
nan_fix = _mm_and_ps(nan_fix, cons_man);
y = _mm_or_ps(y, nan_fix);
//Normals rounding
__m128 nrd = _mm_load_ps(normal_round_div);
r = _mm_or_ps(_mm_and_ps(r, cons_sign), _mm_and_ps(r, cons_exp));
r = _mm_div_ps(r, nrd);
y = _mm_add_ps(y, r);
//Denormals rounding
__m128 dr = _mm_load_ps((float*)denormal_round);
r = _mm_and_ps(r, cons_sign);
r = _mm_or_ps(r, _mm_and_ps(_mm_and_ps(_mm_cmpeq_ps(m_exp, cons_zero), man_not_0), dr));//*pr |= ((exp == 0 && man != 0) << 15);
_mm_store_ps(out, r);
y = _mm_add_ps(y, r);
__m128 mbit16 = _mm_load_ps((float*)bit16);
y = _mm_and_ps(y, mbit16);
_mm_store_ps(&ans[i], y);
}
}
```
結果
![](https://i.imgur.com/Zf43bIy.png)
![](https://i.imgur.com/VS35eqe.png)
批次處理一次將 4 個 fp32 一起轉換,但是花費時間卻低於原本的 $\dfrac{1}{4}$ ,還沒找出原因
## 測驗 2
```c=
#define RINGBUF_DECL(T, NAME) \
typedef struct { \
int size; \
int start, end; \
T *elements; \
} NAME
```
這樣設計可以使這個 struct 更具彈性,可調整 ring buffer 使用的資料型態
題目中使用的為 int
```c=
#define RINGBUF_INIT(BUF, S, T) \
{ \
static T static_ringbuf_mem[S + 1]; \
BUF.elements = static_ringbuf_mem; \
} \
BUF.size = S; \
BUF.start = 0; \
BUF.end = 0;
```
依照設定初始化 my_buf 的起、終點和容量,並宣告 buffer 的記憶體空間
將 my_buf.elements 指標設定到該記憶體空間起始位置
```c=
#define NEXT_START_INDEX(BUF) \
(((BUF)->start != (BUF)->size) ? ((BUF)->start + 1) : 0)
```
算出 start 的下一個位置,若超出範圍則設回 buffer 起始位置
```c=
#define NEXT_END_INDEX(BUF) (((BUF)->end != (BUF)->size) ? ((BUF)->end + 1) : 0)
```
算出 end 的下一個位置,若超出範圍則設回 buffer 起始位置
```c=
#define is_ringbuf_empty(BUF) ((BUF)->end == (BUF)->start)
```
皆由判斷 end start 是否重疊來知道 buffer 是否為空
```c=
#define is_ringbuf_full(BUF) (NEXT_END_INDEX(BUF) == (BUF)->start)
```
若 end 的下一個位置為 start 就表示 buffer 是滿的
```c=
#define ringbuf_write_peek(BUF) (BUF)->elements[(BUF)->end]
```
取得 end 的位置
```c=
#define ringbuf_write_skip(BUF) \
do { \
(BUF)->end = NEXT_END_INDEX(BUF); \
if (is_ringbuf_empty(BUF)) \
(BUF)->start = NEXT_START_INDEX(BUF); \
} while (0)
```
將 end 往下一格移動,若重疊到 start 則 start 也往下一格移動
```c=
#define ringbuf_read_peek(BUF) (BUF)->elements[(BUF)->start]
```
取得 start 的位置
```c=
#define ringbuf_read_skip(BUF) (BUF)->start = NEXT_START_INDEX(BUF);
```
將 start 往下一格移動
```c=
#define ringbuf_write(BUF, ELEMENT) \
do { \
ringbuf_write_peek(BUF) = ELEMENT; \
ringbuf_write_skip(BUF); \
} while (0)
```
在 end 寫入資料並調整 end 位置
```c=
#define ringbuf_read(BUF, ELEMENT) \
do { \
ELEMENT = ringbuf_read_peek(BUF); \
ringbuf_read_skip(BUF); \
} while (0)
```
讀取 start 所在的 buffer 的值並調整 start 的位置
### do { ... } while (0) 使用的考量
為了解決使用時會發生 dangling else 的問題,同時避免 statements 被分開到不同 scope
```c=
#define statements(x,y) statement1;statement2
if(True)
add_two_num(a,b);
```
展開成
```c=
if(True)
statement1;
statement2;
```
```c=
#define statements(x,y) {statement1;statement2}
if(True)
add_two_num(a,b);
else
statement3;
```
展開成
```c=
if(True){
statement1;
statement2;
};
else
statement3;
```
## 測驗 3