# 2018q3 第 17 週測驗題 (下) ### 測驗 `1` 考慮以下將整數轉換為浮點數的 C 程式: ```clike #include <stdint.h> #define I2F_MAX_BITS 15 #define I2F_MAX_INPUT ((1 << I2F_MAX_BITS) - 1) #define I2F_SHIFT (24 - I2F_MAX_BITS) uint32_t u2f(uint32_t input) { if ((input & I2F_MAX_INPUT) == 0) return 0; uint32_t exponent = 126 + I2F_MAX_BITS; uint32_t fraction = (input & I2F_MAX_INPUT) << I2F_SHIFT; for (int i = 0; i < I2F_MAX_BITS; i++) { if (fraction & 0x800000) break; fraction = fraction << 1; exponent = exponent - 1; } return exponent << 23 | (fraction & 0x7fffff); } ``` 注意上述有效的 `input` 範圍僅在 0 到 2^15^ - 1 (請思考為何如此),以下用 bitwse 改寫得到更快的實作: ```clike #include <stdint.h> static inline uint32_t ror32(uint32_t word, unsigned int shift) { return (word >> shift) | (word << (32 - shift)); } /** * __fls - find last (most-significant) set bit in a long word * @word: the word to search */ #ifdef CONFIG_64BIT #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif static inline unsigned long __fls(unsigned long word) { int num = BITS_PER_LONG - 1; #if BITS_PER_LONG == 64 if (!(word & (~0ul << 32))) { num -= 32; word <<= 32; } #endif if (!(word & (~0ul << (BITS_PER_LONG - 16)))) { num -= 16; word <<= 16; } if (!(word & (~0ul << (BITS_PER_LONG - 8)))) { num -= 8; word <<= 8; } if (!(word & (~0ul << (BITS_PER_LONG - 4)))) { num -= 4; word <<= 4; } if (!(word & (~0ul << (BITS_PER_LONG - 2)))) { num -= 2; word <<= 2; } if (!(word & (~0ul << (BITS_PER_LONG - 1)))) num -= 1; return num; } #define I2F_FRAC_BITS 23 #define I2F_MASK ((1 << I2F_FRAC_BITS) - 1) uint32_t u2f(uint32_t x) { if (!x) return 0; uint32_t msb = __fls(x); uint32_t fraction = ror32(x, (msb - I2F_FRAC_BITS) & Q1) & I2F_MASK; uint32_t exponent = (127 + msb) << I2F_FRAC_BITS; return Q2; } ``` 補上以上程式碼。 ==作答區== Q1 = ? * `(a)` 0xff * `(b)` 0x7f * `(c)` 0x3f * `(d)` 0x2f * `(e)` 0x1f * `(f)` 0x0f * `(g)` 0x1 Q2 = ? * `(a)` fraction | exponent * `(b)` fraction + exponent * `(c)` fraction ^ exponent * `(d)` exponent << 23 | (fraction & 0x7f) * `(e)` exponent << 23 | (fraction & 0x3f) * `(f)` exponent << 23 | (fraction & 0x1f) :::success 延伸問題: 在 Linux 核心的 RAID 程式碼找到 `i2f` 的實作並且解說,需要涵蓋 x86_64 加速的 `__fls` 和相關效能分析 ::: --- 測驗 `2` 考慮以下 [pthread_barrier](https://www.unix.com/man-page/All/3/PTHREAD_BARRIER/) 的替代實作: ```clike #include <pthread.h> #if 0 /* redefine pthread_barrier_* */ #define pthread_barrier_t barrier_t #define pthread_barrier_init(B, A, N) (barrier_init((B), (N)), 0) #define pthread_barrier_destroy(B) (barrier_destroy(B), 0) #define pthread_barrier_wait barrier_wait #endif typedef struct barrier_t { unsigned count; unsigned total; pthread_mutex_t m; pthread_cond_t cv; } barrier_t; #define BARRIER_FLAG (1UL << 31) void barrier_destroy(barrier_t *b) { pthread_mutex_lock(&b->m); while (b->total > BARRIER_FLAG) { /* Wait until everyone exits the barrier */ pthread_cond_wait(&b->cv, &b->m); } pthread_mutex_unlock(&b->m); pthread_cond_destroy(&b->cv); pthread_mutex_destroy(&b->m); } void barrier_init(barrier_t *b, unsigned count) { pthread_mutex_init(&b->m, NULL); pthread_cond_init(&b->cv, NULL); b->count = count; b->total = BARRIER_FLAG; } int barrier_wait(barrier_t *b) { pthread_mutex_lock(&b->m); while (b->total > BARRIER_FLAG) { /* Wait until everyone exits the barrier */ pthread_cond_wait(&b->cv, &b->m); } /* Are we the first to enter? */ if (b->total == BARRIER_FLAG) b->total = 0; B1; if (b->total == b->count) { b->total += BARRIER_FLAG - 1; pthread_cond_broadcast(&b->cv); pthread_mutex_unlock(&b->m); return PTHREAD_BARRIER_SERIAL_THREAD; } else { while (b->total < BARRIER_FLAG) { /* Wait until enough threads enter the barrier */ pthread_cond_wait(&b->cv, &b->m); } B2; /* Get entering threads to wake up */ if (b->total == BARRIER_FLAG) pthread_cond_broadcast(&b->cv); pthread_mutex_unlock(&b->m); return 0; } } ``` `pthread_barrier_` 系列函式其實只做而且只能做一件事:將先後到達的多個執行緒擋在同一個 barrier 前,直到所有執行緒到齊,然後撤下 barrier 同時放行。重要操作有: 1. `init` 函數: 負責指定要等待的執行緒個數; 2. `wait` 函數: 由每個執行緒主動呼叫,它通知 barreir 說「我到起跑線前了」。在 `wait` 執行尾聲,barrier 會檢查是否所有人都到 barrier 前了,若是,barrier 就消失,所有執行緒繼續執行下一行程式碼;如果不是,則所有已到 `wait` 的執行緒持續等待,剩下沒執行到 `wait` 的執行緒可繼續執行; 3. `destroy`: 釋放 `init` 申請的資源; 補完程式碼。 ==作答區== B1 = ? * `(a)` b->total-- * `(b)` b->total++ B2 = ? * `(a)` b->total-- * `(b)` b->total++ :::success 延伸問題: 比較上述 barrier 和內建 `pthread_barrier_` 的實作效能差異,並且用 atomics 和 [futex](http://man7.org/linux/man-pages/man2/futex.2.html) 改寫 :::