contributed by < ambersun1234
>
$ uname -a
Linux station 5.8.0-55-generic #62~20.04.1-Ubuntu SMP Wed Jun 2 08:55:04 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
$ gcc --version
gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
本次實驗程式碼是以 C 語言 atomic operation 以及 futex 模擬 go routine 以及 go channel
首先先來看看什麼是 futex
context switch
到 kernel space),因此 futex
的誕生可以讓 locking 這件事在 user space
之中完成,所以理論上會比一般的 mutex
, semaphore
還要來的快速futex
的出現僅是為了減少 system call 的消耗,並非完全由 user space 控制。根據 futex(7) 所述
Futex operation occurs entirely in user space for the
noncontended case. The kernel is involved only to arbitrate the contended case. As any sane design will strive for
noncontention, futexes are also optimized for this situation.
不難發現到實做中有許多用到 futex
的地方
static int chan_recv_buf(struct chan *ch, void **data)
{
while (chan_tryrecv_buf(ch, data) == -1) {
if (errno != EAGAIN) return -1;
uint32_t v = 1;
while (!atomic_compare_exchange_weak_explicit(&ch->recv_ftx, &v, v - 1,
memory_order_acq_rel,
memory_order_acquire)) {
if (v == 0) {
atomic_fetch_add_explicit(&ch->recv_waiters, 1,
memory_order_acq_rel);
futex_wait(&ch->recv_ftx, 0);
atomic_fetch_sub_explicit(&ch->recv_waiters, 1,
memory_order_acq_rel);
v = 1;
}
}
}
atomic_fetch_add_explicit(&ch->send_ftx, 1, memory_order_acq_rel);
if (atomic_load_explicit(&ch->send_waiters, memory_order_relaxed) > 0)
futex_wake(&ch->send_ftx, 1);
return 0;
}
go routine
以及 go channel
,自然就會有多個執行緒存在的情況。以上述程式碼為例,就是要從 ring buffer 中獲取資料,可以看到 futex_wake
以及 futex_wait
的存在4 bytes
長的整數,用法如下
up a futex
0 變 1
代表沒有任何的 waiterfutex_wake
)wait a futex
futex_wait
)ring buffer
裡面寫資料
static int chan_trysend_buf(struct chan *ch, void *data)
{
if (atomic_load_explicit(&ch->closed, memory_order_relaxed)) {
errno = EPIPE;
return -1;
}
uint64_t tail, new_tail;
struct chan_item *item;
do {
tail = atomic_load_explicit(&ch->tail, memory_order_acquire);
uint32_t pos = tail, lap = tail >> 32;
item = ch->ring + pos;
if (atomic_load_explicit(&item->lap, memory_order_acquire) != lap) {
errno = EAGAIN;
return -1;
}
if (pos + 1 == ch->cap)
new_tail = (uint64_t)(lap + 2) << 32;
else
new_tail = tail + 1;
} while (!atomic_compare_exchange_weak_explicit(&ch->tail, &tail, new_tail,
memory_order_acq_rel,
memory_order_acquire));
item->data = data;
atomic_fetch_add_explicit(&item->lap, 1, memory_order_release);
return 0;
}
atomic_load_explicit
) 以及比較並交換 (CAS, atomic_compare_exchange_weak_explicit
) 都必須使用 atomic operation 以保證其的正確性
if (memcmp(obj, expected, sizeof *obj) == 0) {
memcpy(obj, &desired, sizeof *obj);
return true;
} else {
memcpy(expected, obj, sizeof *obj);
return false;
}
explicit
,該位置是定義 memory order
memory order
,考慮以下官網範例 cppreference/memory_order
// Thread 1:
r1 = atomic_load_explicit(y, memory_order_relaxed); // A
atomic_store_explicit(x, r1, memory_order_relaxed); // B
// Thread 2:
r2 = atomic_load_explicit(x, memory_order_relaxed); // C
atomic_store_explicit(y, 42, memory_order_relaxed); // D
以上結果有可能會是 r1 == r2 == 42
或其他,為什麼說可能呢?因為編譯器優化會微調 instruction 位置,有可能導致非預期行為
本程式用了兩種 order 方式
memory_order_relaxed
memory_order_release
A store operation with this memory order performs the release operation: no reads or writes in the current thread can be reordered after this store. All writes in the current thread are visible in other threads that acquire the same atomic variable (see Release-Acquire ordering below) and writes that carry a dependency into the atomic variable become visible in other threads that consume the same atomic (see Release-Consume ordering below).
memory_order_acquire
memory_order_release
同樣概念,套用在 load operation 上應提及 lock-free programming 的設計考量,如下方的 lap
unbuffer
以及 ring bufferlap
以及 data
data
: 即資料存放位置lap
: 類似一個計數器(用以紀錄)滿了
還是 空的
實際上是有一點困難的。單純的判斷 head == tail
就會有上述問題lap
用以紀錄讀寫狀態,值得注意的是為了節省資料空間,lap
以及 pos
是放在 uint64_t
底下,在需要讀取的時候分別用 bitwise
運算取出;而此作法很剛好的滿足了 atomic function 的參數需求(uint64_t
)
/* Ring buffer */
size_t cap;
_Atomic uint64_t head, tail;
struct chan_item ring[0];
tail = atomic_load_explicit(&ch->tail, memory_order_acquire);
uint32_t pos = tail, lap = tail >> 32;
item = ch->ring + pos;
lap
的數值而定,假設 lap
數值相同即代表可以將資料寫入 ring buffer 當中,若不相同則代表 ring buffer 已經滿了
if (atomic_load_explicit(&item->lap, memory_order_acquire) != lap) {
errno = EAGAIN;
return -1;
}
// the final element of ring buffer(tail)
if (pos + 1 == ch->cap)
new_tail = (uint64_t)(lap + 2) << 32;
else
new_tail = tail + 1;
} while (!atomic_compare_exchange_weak_explicit(&ch->tail, &tail, new_tail, memory_order_acq_rel, memory_order_acquire));
channel->tail
會被改為 (uint64_t)(lap+2) << 32
pos
? 因為重新一輪的位置一定是從0開始,只要紀錄新 lap 即可head, tail
的 lap 不會跟 item
的 lap 相同,由上述規律我們可以得知
2n
的時候 2n + 1
的時候 -fsanitize=thread --ggdb -fPIE -pie
即可開啟檢查,會有以下錯誤==================
WARNING: ThreadSanitizer: data race (pid=71386)
Read of size 8 at 0x7f3cf01bd278 by thread T2 (mutexes: write M1):
#0 reader main.c:64 (croutine+0x40a0)
#1 <null> <null> (libtsan.so.0+0x2d1af)
Previous write of size 8 at 0x7f3cf01bd278 by thread T1:
#0 chan_send_unbuf chan.c:221 (croutine+0x3747)
#1 chan_send chan.c:302 (croutine+0x3d67)
#2 writer main.c:43 (croutine+0x3e93)
#3 <null> <null> (libtsan.so.0+0x2d1af)
Location is stack of thread T2.
Mutex M1 (0x55687a6a6588) created at:
#0 pthread_mutex_init <null> (libtsan.so.0+0x4a636)
#1 init_mutex main.c:180 (croutine+0x4c63)
#2 main main.c:192 (croutine+0x4cf1)
Thread T2 (tid=71389, running) created by main thread at:
#0 pthread_create <null> (libtsan.so.0+0x5ea99)
#1 create_threads main.c:125 (croutine+0x4631)
#2 test_chan main.c:161 (croutine+0x4a73)
#3 main main.c:195 (croutine+0x4d23)
Thread T1 (tid=71388, running) created by main thread at:
#0 pthread_create <null> (libtsan.so.0+0x5ea99)
#1 create_threads main.c:125 (croutine+0x4631)
#2 test_chan main.c:160 (croutine+0x4a48)
#3 main main.c:195 (croutine+0x4d23)
SUMMARY: ThreadSanitizer: data race main.c:64 in reader
==================
static int chan_send_unbuf(struct chan *ch, void *data)
{
if (atomic_load_explicit(&ch->closed, memory_order_relaxed)) {
errno = EPIPE;
return -1;
}
mutex_lock(&ch->send_mtx);
void **ptr = NULL;
if (!atomic_compare_exchange_strong_explicit(&ch->datap, &ptr, &data,
memory_order_acq_rel,
memory_order_acquire)) {
*ptr = data;
atomic_store_explicit(&ch->datap, NULL, memory_order_release);
if (atomic_fetch_sub_explicit(&ch->recv_ftx, 1, memory_order_acquire) ==
CHAN_WAITING)
futex_wake(&ch->recv_ftx, 1); // YYY
} else {
unbuffered channel
中,於是我嘗試使用 atomic_exchange_explicit
, atomic_store_explicit
保護資料,結果都是以失敗告終send_unbuf
以及 recv_unbuf
來看可以發現到一開始都是先檢查 data pointer 是否為 NULL,兩者分別做存取以及寫入的動作data
這個變數導致的,即使我加上了 atomic 進行存取還是會導致 data race(詳見底下測試)
// chan_send_unbuf
...
if (!atomic_compare_exchange_strong_explicit(&ch->datap, &ptr, &data,
memory_order_acq_rel,
memory_order_acquire)) {
// *ptr = data;
printf("send: %p %p\n", ch->datap, ptr);
atomic_store_explicit(ptr, data, memory_order_release);
atomic_store_explicit(&ch->datap, NULL, memory_order_release);
if (atomic_fetch_sub_explicit(&ch->recv_ftx, 1, memory_order_acquire) ==
CHAN_WAITING)
futex_wake(&ch->recv_ftx, 1); // YYY
} else {
...
---
// chan_recv_unbuf
...
if (!atomic_compare_exchange_strong_explicit(&ch->datap, &ptr, data,
memory_order_acq_rel,
memory_order_acquire)) {
// ptr = ch->datap
// retrieve data from ch->datap(ptr) and put it into data
// *data = *ptr;
printf("recv: %p %p\n", ptr, data);
atomic_store_explicit(data, *ptr, memory_order_release);
atomic_store_explicit(&ch->datap, NULL, memory_order_release);
if (atomic_fetch_sub_explicit(&ch->send_ftx, 1, memory_order_acquire) ==
CHAN_WAITING)
futex_wake(&ch->send_ftx, 1);
} else {
...
recv main: 1 0x7f6e6bbbd278
recv: 0x7f6e6c3be1c0 0x7f6e6bbbd278
recv main: 2 0x7f6e6bbbd278
send main: 3 0x7f6e6c3be288
send: 0x7f6e6bbbd278 0x7f6e6bbbd278
send main: 4 0x7f6e6c3be288
recv main: 3 0x7f6e6bbbd278
recv: 0x7f6e6c3be1c0 0x7f6e6bbbd278
recv main: 4 0x7f6e6bbbd278
send main: 5 0x7f6e6c3be288
send: 0x7f6e6bbbd278 0x7f6e6bbbd278
send main: 6 0x7f6e6c3be288
recv main: 5 0x7f6e6bbbd278
recv: 0x7f6e6c3be1c0 0x7f6e6bbbd278
recv main: 6 0x7f6e6bbbd278
send main: 7 0x7f6e6c3be288
send: 0x7f6e6bbbd278 0x7f6e6bbbd278
send main: 8 0x7f6e6c3be288
recv main: 7 0x7f6e6bbbd278
recv: 0x7f6e6c3be1c0 0x7f6e6bbbd278
recv main: 8 0x7f6e6bbbd278
send main: 9 0x7f6e6c3be288
send: 0x7f6e6bbbd278 0x7f6e6bbbd278
recv main: 9 0x7f6e6bbbd278
0x7f6e6bbbd278
這個位置linux2021