# I. Introduction - This note serves as a document on thread safety analysis and implementation of mutex locks and atomic operations in the C standard library. - Invironment setting: - Ubuntu Linux version: 24.04.1 LTS - gcc version: 13.3.0 # II. Experiments ## 1. Race condition case ### 1.1 Implementation - Both threads in the following `.c` file access the static variable `counter` without any protection against race conditions. ```C= #include <pthread.h> #include <stdio.h> #include <stdlib.h> #define LOOPS 10000000 static int counter = 0; static void *thread(void *unused) { for (int i=0; i< LOOPS; ++i) { ++counter; } return NULL; } int main() { pthread_t t1, t2; pthread_create(&t1, NULL, thread, NULL); pthread_create(&t2, NULL, thread, NULL); pthread_join(t1, NULL); pthread_join(t2, NULL); printf("Counter is %d by thread\n", counter); return 0; } ``` - Compile the `threads_counter.c` file into an executable. ```terminal $ gcc -g -Wall -v threads_counter.c -o threads_counter ``` - Run the executable file. ```Terminal ./threads_counter ``` - The expected value of `counter` variable is to be 20,000,000 but the actual value of is significantly lower. ```cmd 'Counter is 11769728 by thread' # expected output is # 'Counter is 20000000 by thread' ``` ### 1.2 Analysis - To understand what the process actually does. Disassemble the executable into assembly code which is closer to machine code. There are two ways to obtain the assembly code. - Solution 1: - Use the following command to disassemble the executable. ```terminal= $ objdump -d threads_counter > disassebly.txt $ vim disassembly.txt ``` - The extracted assembly code is shown as follows: ```assembly= 00000000000011a9 <thread>: 11a9: f3 0f 1e fa endbr64 11ad: 55 push %rbp 11ae: 48 89 e5 mov %rsp,%rbp 11b1: 48 89 7d e8 mov %rdi,-0x18(%rbp) 11b5: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%rbp) 11bc: eb 13 jmp 11d1 <thread+0x28> 11be: 8b 05 50 2e 00 00 mov 0x2e50(%rip),%eax # 4014 <counter> 11c4: 83 c0 01 add $0x1,%eax 11c7: 89 05 47 2e 00 00 mov %eax,0x2e47(%rip) # 4014 <counter> 11cd: 83 45 fc 01 addl $0x1,-0x4(%rbp) 11d1: 81 7d fc 7f 96 98 00 cmpl $0x98967f,-0x4(%rbp) 11d8: 7e e4 jle 11be <thread+0x15> 11da: b8 00 00 00 00 mov $0x0,%eax 11df: 5d pop %rbp 11e0: c3 ret ``` - Solution 2: - Use GDB to obtain the assembly code. The steps are shown below. ```terminal= # Activate GDB $ gdb threads_counter # Set the break point $ break thread # Execute the process $ run # Generate the assembly code as the thread is paused $ disassemble thread ``` <!-- ![image](https://hackmd.io/_uploads/HJuQWOVc1e.png) --> - The `counter` variable is modified by the following three assembly instructions. This is a typical race condition problem: both threads access `counter` almost simutaneously, only the last completed the operation is stored into the 'counter'. ```assembly= 11be: 8b 05 50 2e 00 00 mov 0x2e50(%rip),%eax # 4014 <counter> 11c4: 83 c0 01 add $0x1,%eax 11c7: 89 05 47 2e 00 00 mov %eax,0x2e47(%rip) # 4014 <counter> ``` ## 2. Mutex lock case ### 2.1 Implementation - In this case, I use the mutex provided by the C standard library to manage the critical section of the `counter` variable. ```C= #include <pthread.h> #include <stdio.h> #include <stdlib.h> #define LOOPS 10000000 static int counter = 0; pthread_mutex_t lock; /* Declare a mutex lock */ static void *thread(void *unused) { for (int i = 0; i < LOOPS; ++i) { pthread_mutex_lock(&lock); /* Obtain the lock */ ++counter; pthread_mutex_unlock(&lock); /* release the lock */ } return NULL; } int main() { pthread_t t1, t2; pthread_mutex_init(&lock, NULL); /* Initialize the mutex lock */ pthread_create(&t1, NULL, thread, NULL); pthread_create(&t2, NULL, thread, NULL); pthread_join(t1, NULL); pthread_join(t2, NULL); printf("Counter is %d by thread\n", counter); pthread_mutex_destroy(&lock); /* Destroy the lock */ return 0; } ``` - the output in this case is correct. ```Terminal= 'Counter is 20000000 by thread' ``` ### 2.2 Analysis - The assembly code using a mutex lock is shown below. ```assembly= 0000000000001229 <thread>: 1229: f3 0f 1e fa endbr64 122d: 55 push %rbp 122e: 48 89 e5 mov %rsp,%rbp 1231: 48 83 ec 20 sub $0x20,%rsp 1235: 48 89 7d e8 mov %rdi,-0x18(%rbp) 1239: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%rbp) 1240: eb 31 jmp 1273 <thread+0x4a> 1242: 48 8d 05 f7 2d 00 00 lea 0x2df7(%rip),%rax # 4040 <lock> 1249: 48 89 c7 mov %rax,%rdi 124c: e8 df fe ff ff call 1130 <pthread_mutex_lock@plt> 1251: 8b 05 11 2e 00 00 mov 0x2e11(%rip),%eax # 4068 <counter> 1257: 83 c0 01 add $0x1,%eax 125a: 89 05 08 2e 00 00 mov %eax,0x2e08(%rip) # 4068 <counter> 1260: 48 8d 05 d9 2d 00 00 lea 0x2dd9(%rip),%rax # 4040 <lock> 1267: 48 89 c7 mov %rax,%rdi 126a: e8 81 fe ff ff call 10f0 <pthread_mutex_unlock@plt> 126f: 83 45 fc 01 addl $0x1,-0x4(%rbp) 1273: 81 7d fc 7f 96 98 00 cmpl $0x98967f,-0x4(%rbp) 127a: 7e c6 jle 1242 <thread+0x19> 127c: b8 00 00 00 00 mov $0x0,%eax 1281: c9 leave 1282: c3 ret ``` - The key difference in the assembly code when using a mutex lock is shown below. ```assembly= 1242: 48 8d 05 f7 2d 00 00 lea 0x2df7(%rip),%rax # 4040 <lock> 1249: 48 89 c7 mov %rax,%rdi 124c: e8 df fe ff ff call 1130 <pthread_mutex_lock@plt> ``` - The `lea` instruction loads the address of the variable `lock` into `rax`. - The `mov` instruction stores `rax` into `rid`. - The `call` instruction invokes `pthread_mutex_lock()`. If the mutex `lock` is held by another threads, the current thread must stall until it acquires the `lock`. ```assembly= 1251: 8b 05 11 2e 00 00 mov 0x2e11(%rip),%eax # 4068 <counter> 1257: 83 c0 01 add $0x1,%eax 125a: 89 05 08 2e 00 00 mov %eax,0x2e08(%rip) ``` - These three instructions execute the code `++counter`. ```assembly= # 4068 <counter> 1260: 48 8d 05 d9 2d 00 00 lea 0x2dd9(%rip),%rax # 4040 <lock> 1267: 48 89 c7 mov %rax,%rdi 126a: e8 81 fe ff ff call 10f0 <pthread_mutex_unlock@plt> ``` - The `lea` instruction stores the address of `lock` into `rax`. - The `mov` instruction stores the value of `rax` into `rdi`. - The `call` instruction invokes `pthread_mutex_unlock()` to release the `lock` allowing other threads can acquire it. ## 3. Atomic operation version ### 3.1 Implementation - In this case, I use atomic operations to ensure the access to shared variable `counter` is not interrupted by other threads. ```C= #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <stdatomic.h> #define LOOPS 10000000 static atomic_int counter = 0; static void *thread(void *unused) { for (int i = 0; i < LOOPS; ++i) { ++counter; } return NULL; } int main() { pthread_t t1, t2; pthread_create(&t1, NULL, thread, NULL); pthread_create(&t2, NULL, thread, NULL); pthread_join(t1, NULL); pthread_join(t2, NULL); printf("Counter is %d by thread\n", counter); return 0; } ``` - The output of this case is correct. ```Terminal= 'Counter is 20000000 by thread' ``` ### 3.2 Analysis - The assembly code using atomic operation is shown below. ```assembly= 00000000000011a9 <thread>: 11a9: f3 0f 1e fa endbr64 11ad: 55 push %rbp 11ae: 48 89 e5 mov %rsp,%rbp 11b1: 48 83 ec 30 sub $0x30,%rsp 11b5: 48 89 7d d8 mov %rdi,-0x28(%rbp) 11b9: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax 11c0: 00 00 11c2: 48 89 45 f8 mov %rax,-0x8(%rbp) 11c6: 31 c0 xor %eax,%eax 11c8: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%rbp) 11cf: eb 1f jmp 11f0 <thread+0x47> 11d1: c7 45 ec 01 00 00 00 movl $0x1,-0x14(%rbp) 11d8: 8b 45 ec mov -0x14(%rbp),%eax 11db: 89 c2 mov %eax,%edx 11dd: 89 d0 mov %edx,%eax 11df: f0 0f c1 05 2d 2e 00 lock xadd %eax,0x2e2d(%rip) # 4014 <counter> 11e6: 00 11e7: 01 d0 add %edx,%eax 11e9: 89 45 f0 mov %eax,-0x10(%rbp) 11ec: 83 45 f4 01 addl $0x1,-0xc(%rbp) 11f0: 81 7d f4 7f 96 98 00 cmpl $0x98967f,-0xc(%rbp) 11f7: 7e d8 jle 11d1 <thread+0x28> 11f9: b8 00 00 00 00 mov $0x0,%eax 11fe: 48 8b 55 f8 mov -0x8(%rbp),%rdx 1202: 64 48 2b 14 25 28 00 sub %fs:0x28,%rdx 1209: 00 00 120b: 74 05 je 1212 <thread+0x69> 120d: e8 6e fe ff ff call 1080 <__stack_chk_fail@plt> 1212: c9 leave 1213: c3 ret ``` - The key differences in the assembly code are shown below. ```assembly= 11d1: c7 45 ec 01 00 00 00 movl $0x1,-0x14(%rbp) 11d8: 8b 45 ec mov -0x14(%rbp),%eax 11db: 89 c2 mov %eax,%edx 11dd: 89 d0 mov %edx,%eax ``` - These instructions stores `1` into the register `eax` to increment `counter`. ```assembly= 11df: f0 0f c1 05 2d 2e 00 lock xadd %eax,0x2e2d(%rip) # 4014 <counter> ``` - The prefix `lock` in this instruction ensures its execution is atomic. - The `xadd` instruction completes to imcrement operation on the variable `counter`. # III. Reference - Linux 程式設計 從應用到核心 - [gfreewind](https://github.com/gfreewind/aple_codes/blob/master/chapter0/0.4.3/0_4_3_threads_cnt.c) <!-- comments --> <!-- - race condition case - ![image](https://hackmd.io/_uploads/rkTwe_Eckx.png) - ![image](https://hackmd.io/_uploads/S16zZ_N91l.png) - ![image](https://hackmd.io/_uploads/rkmBZ_491e.png) - ![image](https://hackmd.io/_uploads/Sk1u-dNcJl.png) --> <!-- - Mutex lock case - ![image](https://hackmd.io/_uploads/S1GUdY4c1g.png) - ![image](https://hackmd.io/_uploads/SJmt_F491l.png) --> <!-- - atomic case - ![image](https://hackmd.io/_uploads/rJpOIKNqkg.png) - ![image](https://hackmd.io/_uploads/HJVJPKV9ke.png) -->