# 2018q3 第 17 週測驗題 (下) ### 測驗 1 考慮以下將整數轉換為浮點數的 C 程式: clike #include <stdint.h> #define I2F_MAX_BITS 15 #define I2F_MAX_INPUT ((1 << I2F_MAX_BITS) - 1) #define I2F_SHIFT (24 - I2F_MAX_BITS) uint32_t u2f(uint32_t input) { if ((input & I2F_MAX_INPUT) == 0) return 0; uint32_t exponent = 126 + I2F_MAX_BITS; uint32_t fraction = (input & I2F_MAX_INPUT) << I2F_SHIFT; for (int i = 0; i < I2F_MAX_BITS; i++) { if (fraction & 0x800000) break; fraction = fraction << 1; exponent = exponent - 1; } return exponent << 23 | (fraction & 0x7fffff); }  注意上述有效的 input 範圍僅在 0 到 2^15^ - 1 (請思考為何如此)，以下用 bitwse 改寫得到更快的實作： clike #include <stdint.h> static inline uint32_t ror32(uint32_t word, unsigned int shift) { return (word >> shift) | (word << (32 - shift)); } /** * __fls - find last (most-significant) set bit in a long word * @word: the word to search */ #ifdef CONFIG_64BIT #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif static inline unsigned long __fls(unsigned long word) { int num = BITS_PER_LONG - 1; #if BITS_PER_LONG == 64 if (!(word & (~0ul << 32))) { num -= 32; word <<= 32; } #endif if (!(word & (~0ul << (BITS_PER_LONG - 16)))) { num -= 16; word <<= 16; } if (!(word & (~0ul << (BITS_PER_LONG - 8)))) { num -= 8; word <<= 8; } if (!(word & (~0ul << (BITS_PER_LONG - 4)))) { num -= 4; word <<= 4; } if (!(word & (~0ul << (BITS_PER_LONG - 2)))) { num -= 2; word <<= 2; } if (!(word & (~0ul << (BITS_PER_LONG - 1)))) num -= 1; return num; } #define I2F_FRAC_BITS 23 #define I2F_MASK ((1 << I2F_FRAC_BITS) - 1) uint32_t u2f(uint32_t x) { if (!x) return 0; uint32_t msb = __fls(x); uint32_t fraction = ror32(x, (msb - I2F_FRAC_BITS) & Q1) & I2F_MASK; uint32_t exponent = (127 + msb) << I2F_FRAC_BITS; return Q2; }  補上以上程式碼。 ==作答區== Q1 = ? * (a) 0xff * (b) 0x7f * (c) 0x3f * (d) 0x2f * (e) 0x1f * (f) 0x0f * (g) 0x1 Q2 = ? * (a) fraction | exponent * (b) fraction + exponent * (c) fraction ^ exponent * (d) exponent << 23 | (fraction & 0x7f) * (e) exponent << 23 | (fraction & 0x3f) * (f) exponent << 23 | (fraction & 0x1f) :::success 延伸問題: 在 Linux 核心的 RAID 程式碼找到 i2f 的實作並且解說，需要涵蓋 x86_64 加速的 __fls 和相關效能分析 ::: --- 測驗 2 考慮以下 [pthread_barrier](https://www.unix.com/man-page/All/3/PTHREAD_BARRIER/) 的替代實作: clike #include <pthread.h> #if 0 /* redefine pthread_barrier_* */ #define pthread_barrier_t barrier_t #define pthread_barrier_init(B, A, N) (barrier_init((B), (N)), 0) #define pthread_barrier_destroy(B) (barrier_destroy(B), 0) #define pthread_barrier_wait barrier_wait #endif typedef struct barrier_t { unsigned count; unsigned total; pthread_mutex_t m; pthread_cond_t cv; } barrier_t; #define BARRIER_FLAG (1UL << 31) void barrier_destroy(barrier_t *b) { pthread_mutex_lock(&b->m); while (b->total > BARRIER_FLAG) { /* Wait until everyone exits the barrier */ pthread_cond_wait(&b->cv, &b->m); } pthread_mutex_unlock(&b->m); pthread_cond_destroy(&b->cv); pthread_mutex_destroy(&b->m); } void barrier_init(barrier_t *b, unsigned count) { pthread_mutex_init(&b->m, NULL); pthread_cond_init(&b->cv, NULL); b->count = count; b->total = BARRIER_FLAG; } int barrier_wait(barrier_t *b) { pthread_mutex_lock(&b->m); while (b->total > BARRIER_FLAG) { /* Wait until everyone exits the barrier */ pthread_cond_wait(&b->cv, &b->m); } /* Are we the first to enter? */ if (b->total == BARRIER_FLAG) b->total = 0; B1; if (b->total == b->count) { b->total += BARRIER_FLAG - 1; pthread_cond_broadcast(&b->cv); pthread_mutex_unlock(&b->m); return PTHREAD_BARRIER_SERIAL_THREAD; } else { while (b->total < BARRIER_FLAG) { /* Wait until enough threads enter the barrier */ pthread_cond_wait(&b->cv, &b->m); } B2; /* Get entering threads to wake up */ if (b->total == BARRIER_FLAG) pthread_cond_broadcast(&b->cv); pthread_mutex_unlock(&b->m); return 0; } }  pthread_barrier_ 系列函式其實只做而且只能做一件事：將先後到達的多個執行緒擋在同一個 barrier 前，直到所有執行緒到齊，然後撤下 barrier 同時放行。重要操作有: 1. init 函數: 負責指定要等待的執行緒個數; 2. wait 函數: 由每個執行緒主動呼叫，它通知 barreir 說「我到起跑線前了」。在 wait 執行尾聲，barrier 會檢查是否所有人都到 barrier 前了，若是，barrier 就消失，所有執行緒繼續執行下一行程式碼；如果不是，則所有已到 wait 的執行緒持續等待，剩下沒執行到 wait 的執行緒可繼續執行; 3. destroy: 釋放 init 申請的資源; 補完程式碼。 ==作答區== B1 = ? * (a) b->total-- * (b) b->total++ B2 = ? * (a) b->total-- * (b) b->total++ :::success 延伸問題: 比較上述 barrier 和內建 pthread_barrier_ 的實作效能差異，並且用 atomics 和 [futex](http://man7.org/linux/man-pages/man2/futex.2.html) 改寫 :::