# [2020q1](http://wiki.csie.ncku.edu.tw/linux/schedule) 第 9 週測驗題 ###### tags: `linux2020` :::info 目的: 檢驗學員對 [Linux: 淺談同步機制](https://hackmd.io/s/SJpp-bN0m) 和 [Concurrent Programming](https://hackmd.io/s/Skh_AaVix) 的認知 ::: ==[作答表單](https://docs.google.com/forms/d/e/1FAIpQLSfUft0TQ6N9NsbLvevpduQMXCBgLGSeXfeTcr8hgiU-dOF-2w/viewform)== --- ### 測驗 `1` 以下是個分析 mutex lock/unlock 操作的執行成本的程式: (檔名 `mutexperf.c`,以 `$ gcc -o mutexperf mutexperf.c -lpthread` 編譯) ```cpp #define _GNU_SOURCE #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <sys/time.h> struct test_results { int reps; int64_t start, stop; }; static int64_t now_usecs(void) { struct timeval tv; gettimeofday(&tv, NULL); return (int64_t) tv.tv_sec * 1000000 + tv.tv_usec; } /* Simply acquiring and releasing a lock, without any contention. */ static void lock_unlock(struct test_results *res) { pthread_mutex_t mutex; pthread_mutex_init(&mutex, NULL); res->start = now_usecs(); for (int i = res->reps; i--;) { pthread_mutex_lock(&mutex); pthread_mutex_unlock(&mutex); } res->stop = now_usecs(); pthread_mutex_destroy(&mutex); } static int cmp_int64(const void *ap, const void *bp) { int64_t a = *(int64_t *) ap, b = *(int64_t *) bp; if (a < b) return -1; if (a > b) return 1; return 0; } #define SETS 10 static void measure(void (*test)(struct test_results *res), const char *name, int reps) { printf("Measuring %s: ", name); fflush(stdout); struct test_results res = { .reps = reps }; int64_t times[SETS]; for (int i = 0; i < SETS; i++) { test(&res); times[i] = res.stop - res.start; } qsort(times, SETS, sizeof(int64_t), cmp_int64); printf("best %d ns, 50%%ile %d ns\n", (int) (times[0] * 1000 / reps), (int) (times[SETS / 2] * 1000 / reps)); } int main(void) { measure(lock_unlock, "Locking and unlocking without contention", 10000000); return 0; } ``` 參考執行輸出: ``` Measuring Locking and unlocking without contention: best 31 ns, 50%ile 32 ns ``` 其中 `50%ile` (不要看成 `idle`) 表示第 50 [百分位數](https://en.wikipedia.org/wiki/Percentile)。 考慮到 [lock contention](https://en.wikipedia.org/wiki/Lock_(computer_science)),在 `cmp_int64` 函式之前,增加以下程式碼: ```cpp #define CONTENTION_THREAD_COUNT 4 #define CONTENTION_MUTEX_COUNT (CONTENTION_THREAD_COUNT + 1) struct contention_info { pthread_mutex_t mutexes[CONTENTION_MUTEX_COUNT]; pthread_mutex_t ready_mutex; pthread_cond_t ready_cond; int ready_count; int thread_reps; }; struct contention_thread_info { struct contention_info *info; pthread_mutex_t start_mutex; int thread_index; int64_t start, stop; }; static void *contention_thread(void *v_thread_info) { struct contention_thread_info *thread_info = v_thread_info; struct contention_info *info = thread_info->info; int i = thread_info->thread_index; int reps = info->thread_reps; /* Lock our first mutex */ pthread_mutex_lock(&info->mutexes[i]); /* Indicate that we are ready for the test. */ pthread_mutex_lock(&info->ready_mutex); if (++info->ready_count == CONTENTION_THREAD_COUNT) AAA; pthread_mutex_unlock(&info->ready_mutex); /* Line up to start */ pthread_mutex_lock(&thread_info->start_mutex); pthread_mutex_unlock(&thread_info->start_mutex); thread_info->start = now_usecs(); for (int j = 1; j < reps; j++) { int next = (i + 1) % CONTENTION_MUTEX_COUNT; pthread_mutex_lock(&info->mutexes[next]); BBB; i = next; } thread_info->stop = now_usecs(); pthread_mutex_unlock(&info->mutexes[i]); return NULL; } static void contention(struct test_results *res) { struct contention_info info; struct contention_thread_info thread_infos[CONTENTION_THREAD_COUNT]; pthread_t threads[CONTENTION_THREAD_COUNT]; for (int i = 0; i < CONTENTION_MUTEX_COUNT; i++) pthread_mutex_init(&info.mutexes[i], NULL); pthread_mutex_init(&info.ready_mutex, NULL); pthread_cond_init(&info.ready_cond, NULL); info.ready_count = 0; info.thread_reps = res->reps / CONTENTION_THREAD_COUNT; for (int i = 0; i < CONTENTION_THREAD_COUNT; i++) { thread_infos[i].info = &info; thread_infos[i].thread_index = i; pthread_mutex_init(&thread_infos[i].start_mutex, NULL); pthread_mutex_lock(&thread_infos[i].start_mutex); pthread_create(&threads[i], NULL, contention_thread, &thread_infos[i]); } pthread_mutex_lock(&info.ready_mutex); while (info.ready_count < CONTENTION_THREAD_COUNT) CCC; pthread_mutex_unlock(&info.ready_mutex); for (int i = 0; i < CONTENTION_THREAD_COUNT; i++) pthread_mutex_unlock(&thread_infos[i].start_mutex); for (int i = 0; i < CONTENTION_THREAD_COUNT; i++) { pthread_join(threads[i], NULL); pthread_mutex_destroy(&thread_infos[i].start_mutex); } for (int i = 0; i < CONTENTION_MUTEX_COUNT; i++) pthread_mutex_destroy(&info.mutexes[i]); pthread_mutex_destroy(&info.ready_mutex); pthread_cond_destroy(&info.ready_cond); res->start = thread_infos[0].start; res->stop = thread_infos[0].stop; for (int i = 1; i < CONTENTION_THREAD_COUNT; i++) { if (thread_infos[i].start < res->start) res->start = thread_infos[i].start; if (thread_infos[i].stop > res->stop) res->stop = thread_infos[i].stop; } } ``` 並做了以下調整: ```diff int main(void) { measure(lock_unlock, "Locking and unlocking without contention", 10000000); + measure(contention, "Locking and unlocking with contention", 100000); return 0; } ``` 參考的執行輸出: ``` Measuring Locking and unlocking without contention: best 31 ns, 50%ile 33 ns Measuring Locking and unlocking with contention: best 5269 ns, 50%ile 5895 ns ``` 基於效能的測試的目標,請補完程式碼。 ==作答區== AAA = ? * `(a)` `pthread_cond_wait(&info.ready_cond, &info.ready_mutex)` * `(b)` `pthread_cond_signal(&info->ready_cond)` * `(c)` `pthread_mutex_unlock(&info->ready_mutex)` BBB = ? * `(a)` `pthread_mutex_unlock(&info->mutexes[next])` * `(b)` `pthread_mutex_unlock(&info->mutexes[j])` * `(c)` `pthread_mutex_unlock(&info->mutexes[i])` CCC = ? * `(a)` `pthread_cond_wait(&info.ready_cond, &info.ready_mutex)` * `(b)` `pthread_cond_signal(&info->ready_cond)` * `(c)` `pthread_mutex_unlock(&info->ready_mutex)` :::success 延伸問題: 1. 解釋上述 `mutexperf.c` 效能分析工具的原始程式碼,指出其不足並改善 2. 研讀論文 [A Review of Lightweight Thread Approaches for High Performance Computing](https://www.mcs.anl.gov/papers/P6006-0516.pdf) 並學習其中效能分析的手法,探討 Linux NPTL (即 native thread) 和 lightweight ULT 的表現 :::