# [2020q1](http://wiki.csie.ncku.edu.tw/linux/schedule) 第 9 週測驗題
###### tags: `linux2020`
:::info
目的: 檢驗學員對 [Linux: 淺談同步機制](https://hackmd.io/s/SJpp-bN0m) 和 [Concurrent Programming](https://hackmd.io/s/Skh_AaVix) 的認知
:::
==[作答表單](https://docs.google.com/forms/d/e/1FAIpQLSfUft0TQ6N9NsbLvevpduQMXCBgLGSeXfeTcr8hgiU-dOF-2w/viewform)==
---
### 測驗 `1`
以下是個分析 mutex lock/unlock 操作的執行成本的程式: (檔名 `mutexperf.c`,以 `$ gcc -o mutexperf mutexperf.c -lpthread` 編譯)
```cpp
#define _GNU_SOURCE
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/time.h>
struct test_results {
int reps;
int64_t start, stop;
};
static int64_t now_usecs(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return (int64_t) tv.tv_sec * 1000000 + tv.tv_usec;
}
/* Simply acquiring and releasing a lock, without any contention. */
static void lock_unlock(struct test_results *res) {
pthread_mutex_t mutex;
pthread_mutex_init(&mutex, NULL);
res->start = now_usecs();
for (int i = res->reps; i--;) {
pthread_mutex_lock(&mutex);
pthread_mutex_unlock(&mutex);
}
res->stop = now_usecs();
pthread_mutex_destroy(&mutex);
}
static int cmp_int64(const void *ap, const void *bp) {
int64_t a = *(int64_t *) ap, b = *(int64_t *) bp;
if (a < b) return -1;
if (a > b) return 1;
return 0;
}
#define SETS 10
static void measure(void (*test)(struct test_results *res),
const char *name,
int reps) {
printf("Measuring %s: ", name);
fflush(stdout);
struct test_results res = { .reps = reps };
int64_t times[SETS];
for (int i = 0; i < SETS; i++) {
test(&res);
times[i] = res.stop - res.start;
}
qsort(times, SETS, sizeof(int64_t), cmp_int64);
printf("best %d ns, 50%%ile %d ns\n", (int) (times[0] * 1000 / reps),
(int) (times[SETS / 2] * 1000 / reps));
}
int main(void) {
measure(lock_unlock, "Locking and unlocking without contention", 10000000);
return 0;
}
```
參考執行輸出:
```
Measuring Locking and unlocking without contention: best 31 ns, 50%ile 32 ns
```
其中 `50%ile` (不要看成 `idle`) 表示第 50 [百分位數](https://en.wikipedia.org/wiki/Percentile)。
考慮到 [lock contention](https://en.wikipedia.org/wiki/Lock_(computer_science)),在 `cmp_int64` 函式之前,增加以下程式碼:
```cpp
#define CONTENTION_THREAD_COUNT 4
#define CONTENTION_MUTEX_COUNT (CONTENTION_THREAD_COUNT + 1)
struct contention_info {
pthread_mutex_t mutexes[CONTENTION_MUTEX_COUNT];
pthread_mutex_t ready_mutex;
pthread_cond_t ready_cond;
int ready_count;
int thread_reps;
};
struct contention_thread_info {
struct contention_info *info;
pthread_mutex_t start_mutex;
int thread_index;
int64_t start, stop;
};
static void *contention_thread(void *v_thread_info)
{
struct contention_thread_info *thread_info = v_thread_info;
struct contention_info *info = thread_info->info;
int i = thread_info->thread_index;
int reps = info->thread_reps;
/* Lock our first mutex */
pthread_mutex_lock(&info->mutexes[i]);
/* Indicate that we are ready for the test. */
pthread_mutex_lock(&info->ready_mutex);
if (++info->ready_count == CONTENTION_THREAD_COUNT)
AAA;
pthread_mutex_unlock(&info->ready_mutex);
/* Line up to start */
pthread_mutex_lock(&thread_info->start_mutex);
pthread_mutex_unlock(&thread_info->start_mutex);
thread_info->start = now_usecs();
for (int j = 1; j < reps; j++) {
int next = (i + 1) % CONTENTION_MUTEX_COUNT;
pthread_mutex_lock(&info->mutexes[next]);
BBB;
i = next;
}
thread_info->stop = now_usecs();
pthread_mutex_unlock(&info->mutexes[i]);
return NULL;
}
static void contention(struct test_results *res)
{
struct contention_info info;
struct contention_thread_info thread_infos[CONTENTION_THREAD_COUNT];
pthread_t threads[CONTENTION_THREAD_COUNT];
for (int i = 0; i < CONTENTION_MUTEX_COUNT; i++)
pthread_mutex_init(&info.mutexes[i], NULL);
pthread_mutex_init(&info.ready_mutex, NULL);
pthread_cond_init(&info.ready_cond, NULL);
info.ready_count = 0;
info.thread_reps = res->reps / CONTENTION_THREAD_COUNT;
for (int i = 0; i < CONTENTION_THREAD_COUNT; i++) {
thread_infos[i].info = &info;
thread_infos[i].thread_index = i;
pthread_mutex_init(&thread_infos[i].start_mutex, NULL);
pthread_mutex_lock(&thread_infos[i].start_mutex);
pthread_create(&threads[i], NULL, contention_thread, &thread_infos[i]);
}
pthread_mutex_lock(&info.ready_mutex);
while (info.ready_count < CONTENTION_THREAD_COUNT)
CCC;
pthread_mutex_unlock(&info.ready_mutex);
for (int i = 0; i < CONTENTION_THREAD_COUNT; i++)
pthread_mutex_unlock(&thread_infos[i].start_mutex);
for (int i = 0; i < CONTENTION_THREAD_COUNT; i++) {
pthread_join(threads[i], NULL);
pthread_mutex_destroy(&thread_infos[i].start_mutex);
}
for (int i = 0; i < CONTENTION_MUTEX_COUNT; i++)
pthread_mutex_destroy(&info.mutexes[i]);
pthread_mutex_destroy(&info.ready_mutex);
pthread_cond_destroy(&info.ready_cond);
res->start = thread_infos[0].start;
res->stop = thread_infos[0].stop;
for (int i = 1; i < CONTENTION_THREAD_COUNT; i++) {
if (thread_infos[i].start < res->start)
res->start = thread_infos[i].start;
if (thread_infos[i].stop > res->stop)
res->stop = thread_infos[i].stop;
}
}
```
並做了以下調整:
```diff
int main(void)
{
measure(lock_unlock, "Locking and unlocking without contention", 10000000);
+ measure(contention, "Locking and unlocking with contention", 100000);
return 0;
}
```
參考的執行輸出:
```
Measuring Locking and unlocking without contention: best 31 ns, 50%ile 33 ns
Measuring Locking and unlocking with contention: best 5269 ns, 50%ile 5895 ns
```
基於效能的測試的目標,請補完程式碼。
==作答區==
AAA = ?
* `(a)` `pthread_cond_wait(&info.ready_cond, &info.ready_mutex)`
* `(b)` `pthread_cond_signal(&info->ready_cond)`
* `(c)` `pthread_mutex_unlock(&info->ready_mutex)`
BBB = ?
* `(a)` `pthread_mutex_unlock(&info->mutexes[next])`
* `(b)` `pthread_mutex_unlock(&info->mutexes[j])`
* `(c)` `pthread_mutex_unlock(&info->mutexes[i])`
CCC = ?
* `(a)` `pthread_cond_wait(&info.ready_cond, &info.ready_mutex)`
* `(b)` `pthread_cond_signal(&info->ready_cond)`
* `(c)` `pthread_mutex_unlock(&info->ready_mutex)`
:::success
延伸問題:
1. 解釋上述 `mutexperf.c` 效能分析工具的原始程式碼,指出其不足並改善
2. 研讀論文 [A Review of Lightweight Thread Approaches for High Performance Computing](https://www.mcs.anl.gov/papers/P6006-0516.pdf) 並學習其中效能分析的手法,探討 Linux NPTL (即 native thread) 和 lightweight ULT 的表現
:::