linux2020
目的: 檢驗學員對 Linux: 淺談同步機制 和 Concurrent Programming 的認知
1
以下是個分析 mutex lock/unlock 操作的執行成本的程式: (檔名 mutexperf.c
,以 $ gcc -o mutexperf mutexperf.c -lpthread
編譯)
#define _GNU_SOURCE
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/time.h>
struct test_results {
int reps;
int64_t start, stop;
};
static int64_t now_usecs(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return (int64_t) tv.tv_sec * 1000000 + tv.tv_usec;
}
/* Simply acquiring and releasing a lock, without any contention. */
static void lock_unlock(struct test_results *res) {
pthread_mutex_t mutex;
pthread_mutex_init(&mutex, NULL);
res->start = now_usecs();
for (int i = res->reps; i--;) {
pthread_mutex_lock(&mutex);
pthread_mutex_unlock(&mutex);
}
res->stop = now_usecs();
pthread_mutex_destroy(&mutex);
}
static int cmp_int64(const void *ap, const void *bp) {
int64_t a = *(int64_t *) ap, b = *(int64_t *) bp;
if (a < b) return -1;
if (a > b) return 1;
return 0;
}
#define SETS 10
static void measure(void (*test)(struct test_results *res),
const char *name,
int reps) {
printf("Measuring %s: ", name);
fflush(stdout);
struct test_results res = { .reps = reps };
int64_t times[SETS];
for (int i = 0; i < SETS; i++) {
test(&res);
times[i] = res.stop - res.start;
}
qsort(times, SETS, sizeof(int64_t), cmp_int64);
printf("best %d ns, 50%%ile %d ns\n", (int) (times[0] * 1000 / reps),
(int) (times[SETS / 2] * 1000 / reps));
}
int main(void) {
measure(lock_unlock, "Locking and unlocking without contention", 10000000);
return 0;
}
參考執行輸出:
Measuring Locking and unlocking without contention: best 31 ns, 50%ile 32 ns
其中 50%ile
(不要看成 idle
) 表示第 50 百分位數。
考慮到 lock contention,在 cmp_int64
函式之前,增加以下程式碼:
#define CONTENTION_THREAD_COUNT 4
#define CONTENTION_MUTEX_COUNT (CONTENTION_THREAD_COUNT + 1)
struct contention_info {
pthread_mutex_t mutexes[CONTENTION_MUTEX_COUNT];
pthread_mutex_t ready_mutex;
pthread_cond_t ready_cond;
int ready_count;
int thread_reps;
};
struct contention_thread_info {
struct contention_info *info;
pthread_mutex_t start_mutex;
int thread_index;
int64_t start, stop;
};
static void *contention_thread(void *v_thread_info)
{
struct contention_thread_info *thread_info = v_thread_info;
struct contention_info *info = thread_info->info;
int i = thread_info->thread_index;
int reps = info->thread_reps;
/* Lock our first mutex */
pthread_mutex_lock(&info->mutexes[i]);
/* Indicate that we are ready for the test. */
pthread_mutex_lock(&info->ready_mutex);
if (++info->ready_count == CONTENTION_THREAD_COUNT)
AAA;
pthread_mutex_unlock(&info->ready_mutex);
/* Line up to start */
pthread_mutex_lock(&thread_info->start_mutex);
pthread_mutex_unlock(&thread_info->start_mutex);
thread_info->start = now_usecs();
for (int j = 1; j < reps; j++) {
int next = (i + 1) % CONTENTION_MUTEX_COUNT;
pthread_mutex_lock(&info->mutexes[next]);
BBB;
i = next;
}
thread_info->stop = now_usecs();
pthread_mutex_unlock(&info->mutexes[i]);
return NULL;
}
static void contention(struct test_results *res)
{
struct contention_info info;
struct contention_thread_info thread_infos[CONTENTION_THREAD_COUNT];
pthread_t threads[CONTENTION_THREAD_COUNT];
for (int i = 0; i < CONTENTION_MUTEX_COUNT; i++)
pthread_mutex_init(&info.mutexes[i], NULL);
pthread_mutex_init(&info.ready_mutex, NULL);
pthread_cond_init(&info.ready_cond, NULL);
info.ready_count = 0;
info.thread_reps = res->reps / CONTENTION_THREAD_COUNT;
for (int i = 0; i < CONTENTION_THREAD_COUNT; i++) {
thread_infos[i].info = &info;
thread_infos[i].thread_index = i;
pthread_mutex_init(&thread_infos[i].start_mutex, NULL);
pthread_mutex_lock(&thread_infos[i].start_mutex);
pthread_create(&threads[i], NULL, contention_thread, &thread_infos[i]);
}
pthread_mutex_lock(&info.ready_mutex);
while (info.ready_count < CONTENTION_THREAD_COUNT)
CCC;
pthread_mutex_unlock(&info.ready_mutex);
for (int i = 0; i < CONTENTION_THREAD_COUNT; i++)
pthread_mutex_unlock(&thread_infos[i].start_mutex);
for (int i = 0; i < CONTENTION_THREAD_COUNT; i++) {
pthread_join(threads[i], NULL);
pthread_mutex_destroy(&thread_infos[i].start_mutex);
}
for (int i = 0; i < CONTENTION_MUTEX_COUNT; i++)
pthread_mutex_destroy(&info.mutexes[i]);
pthread_mutex_destroy(&info.ready_mutex);
pthread_cond_destroy(&info.ready_cond);
res->start = thread_infos[0].start;
res->stop = thread_infos[0].stop;
for (int i = 1; i < CONTENTION_THREAD_COUNT; i++) {
if (thread_infos[i].start < res->start)
res->start = thread_infos[i].start;
if (thread_infos[i].stop > res->stop)
res->stop = thread_infos[i].stop;
}
}
並做了以下調整:
int main(void)
{
measure(lock_unlock, "Locking and unlocking without contention", 10000000);
+ measure(contention, "Locking and unlocking with contention", 100000);
return 0;
}
參考的執行輸出:
Measuring Locking and unlocking without contention: best 31 ns, 50%ile 33 ns
Measuring Locking and unlocking with contention: best 5269 ns, 50%ile 5895 ns
基於效能的測試的目標,請補完程式碼。
作答區
AAA = ?
(a)
pthread_cond_wait(&info.ready_cond, &info.ready_mutex)
(b)
pthread_cond_signal(&info->ready_cond)
(c)
pthread_mutex_unlock(&info->ready_mutex)
BBB = ?
(a)
pthread_mutex_unlock(&info->mutexes[next])
(b)
pthread_mutex_unlock(&info->mutexes[j])
(c)
pthread_mutex_unlock(&info->mutexes[i])
CCC = ?
(a)
pthread_cond_wait(&info.ready_cond, &info.ready_mutex)
(b)
pthread_cond_signal(&info->ready_cond)
(c)
pthread_mutex_unlock(&info->ready_mutex)
延伸問題:
mutexperf.c
效能分析工具的原始程式碼,指出其不足並改善改寫自 Linux /dev/mem 的新玩法 一文,採用 CC BY-SA 授權
Jun 19, 2025Copyright (慣C) 2019 宅色夫
Jun 16, 2025現代處理器設計:原理和關鍵特徵 ==直播錄影(上)== ==直播錄影(下)== 導讀短片 How A CPU Works | A Basic Guide On Processor Stages & Functionality 多核處理器有前途嗎? [ ] 〈The Evolution Of CPU Processing Power〉系列 ==^必看^==
Jun 15, 2025系統的 throughput 是評斷排程器優劣的重要效能
Jun 14, 2025or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up