Try   HackMD

Mutex and Semaphore

Mutex、Semaphore 跟 Binary Semaphore 的差別網路上講解的非常清楚。Google 搜尋的答覆不外乎以下幾種:

  1. Ownership 的概念
  2. 使用上本質的差異(資料保護或執行緒同步)
  3. Mutex 可以,但 Semaphore 所不能解決的問題 (e.g. priority inversion,recursive deadlock)

但,你用的 Mutex 真的是你知道的 Mutex 嗎?

實驗設計

考慮三條 thread A, B, C

Thread A, B 想辦法去取得 critical section 的操作後並 lock 住,但不釋放 lock。因此當 A 取得資源後,B 便會 waiting,反之亦然。而 Thread C 負責從第三方去實現 unlock。

預期行為

mutex 因為有 ownership 的概念,上述的操作是不被允許。解鎖將會失敗 (mutex 的所有權在 A,但 C 想去解鎖),然後未取得資源的 thread 會 deadlock。

而 binary semaphore 則會成功。

考慮下方兩個實作的比較: Binary Semaphore v.s. Mutex

$ gcc file.c -o file -lpthread
  • pthread binary Semaphore
#include <stdio.h> 
#include <pthread.h> 
#include <semaphore.h> 
#include <unistd.h> 
  
sem_t bin_sem; 
  
void* lock_thread(void* arg)  { 
    sem_wait(&bin_sem); 
    printf("thread %d Entered..\n", (int) pthread_self());
    return NULL;
}

void* unlock_thread(void* arg) {
    printf("thread %d try to unlock\n", (int) pthread_self());
    sem_post(&bin_sem);
    return NULL;
}

int main() { 
    sem_init(&bin_sem, 0, 1); 
    pthread_t t1, t2, t3; 
    pthread_create(&t1, NULL, lock_thread, NULL);  
    pthread_create(&t2, NULL, lock_thread, NULL);
    sleep(2);
    pthread_create(&t3, NULL, unlock_thread, NULL); 

    pthread_join(t1, NULL);
    pthread_join(t2, NULL);
    pthread_join(t3, NULL);
    sem_destroy(&bin_sem); 
    return 0; 
} 

執行結果:

thread 778938112 Entered..
thread 762152704 try to unlock
thread 770545408 Entered..
  • Mutex
#define _GNU_SOURCE
#include <stdio.h> 
#include <pthread.h> 
#include <semaphore.h> 
#include <unistd.h>
#include <errno.h> 
#include <string.h>

pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
  
void* lock_thread(void* arg) { 
    pthread_mutex_lock(&mutex); 
    printf("thread %d Entered..\n", (int) pthread_self());
    return NULL;
}

void* unlock_thread(void* arg) {
    printf("thread %d try to unlock\n", (int) pthread_self());
    int err = pthread_mutex_unlock(&mutex);
    if (err)
        fprintf(stderr, "Value of errno: %s\n", strerror(err));
    return NULL;
}

int main() {
    pthread_t t1, t2, t3; 
    pthread_create(&t1, NULL, lock_thread, NULL);  
    pthread_create(&t2, NULL, lock_thread, NULL);
    sleep(2);
    pthread_create(&t3, NULL, unlock_thread, NULL); 

    pthread_join(t1, NULL); 
    pthread_join(t2, NULL); 
    pthread_join(t3, NULL);
    pthread_mutex_destroy(&mutex); 
    return 0; 
} 

執行結果:

thread 1699403520 Entered..
thread 1610610432 try to unlock
thread 1691010816 Entered..

哪尼,Mutex 就給我這樣解鎖了?? 太沒潔操了吧

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

事情並不單純,這樣怎麼跟面試官交代。

參考 man page pthread_mutex_unlock() 裏面有提到

If the mutex type is PTHREAD_MUTEX_DEFAULT, attempting to recursively lock the mutex results in undefined behavior. Attempting to unlock the mutex if it was not locked by the calling thread results in undefined behavior. Attempting to unlock the mutex if it is not locked results in undefined behavior.

原來是 undefined behavior (UB) 的部份阿,但從實驗結果來看,UB 還是將資源釋放出來了,這樣不是跟 binary semaphore 一樣了嗎?
ShaoHua Wang

如果有仔細地看 man page pthread_mutex_unlock() 會發現

If the mutex type is PTHREAD_MUTEX_ERRORCHECK, then error checking shall be provided. If a thread attempts to relock a mutex that it has already locked, an error shall be returned. If a thread attempts to unlock a mutex that it has not locked or a mutex which is unlocked, an error shall be returned.

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

再參考以下實作:

Mutex w/ Error Check

#define _GNU_SOURCE
#include <stdio.h> 
#include <pthread.h> 
#include <semaphore.h> 
#include <unistd.h>
#include <errno.h> 
#include <string.h>

// Error Check Type Mutex
pthread_mutex_t mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
  
void* lock_thread(void* arg) { 
    //wait 
    pthread_mutex_lock(&mutex); 
    printf("thread %d Entered..\n", (int) pthread_self());
    return NULL;
}

void* unlock_thread(void* arg) {
    printf("thread %d try to unlock\n", (int) pthread_self());
    int err = pthread_mutex_unlock(&mutex);
    if (err)
        fprintf(stderr, "Value of errno: %s\n", strerror(err));
    return NULL;
}

int main() {
    pthread_t t1, t2, t3; 
    pthread_create(&t1, NULL, lock_thread, NULL);  
    pthread_create(&t2, NULL, lock_thread, NULL);
    sleep(2);
    pthread_create(&t3, NULL, unlock_thread, NULL); 

    pthread_join(t1, NULL); 
    pthread_join(t2, NULL); 
    pthread_join(t3, NULL);
    pthread_mutex_destroy(&mutex); 
    return 0; 
}

執行結果:

thread 885999360 Entered..
thread 869213952 try to unlock
Value of errno: Operation not permitted
(deadlock)

果然行為才如預期一般!

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

那這樣不就代表 defualt mutex = binary semaphore 了嗎? 不行,我不能接受

其實不然,我們從 pthread mutex 實作來看為什麼預設的 Mutex 會這樣做

  • glibc pthread.h 其中 PTHREAD_MUTEX_TIMED_NP 是預設的 mutex 的屬性
enum
{
  PTHREAD_MUTEX_TIMED_NP,
  PTHREAD_MUTEX_RECURSIVE_NP,
  PTHREAD_MUTEX_ERRORCHECK_NP,
  PTHREAD_MUTEX_ADAPTIVE_NP
  ....
};
int __pthread_mutex_lock (pthread_mutex_t *mutex)
{
  unsigned int type = PTHREAD_MUTEX_TYPE_ELISION (mutex);
  /*
   * check mutex type code section
   */
 
  pid_t id = THREAD_GETMEM (THREAD_SELF, tid);

  /* Record the ownership.*/
  mutex->__data.__owner = id;

  /*
   * other code section
   */
  return 0;
}

  /*
   * other code section
   */

#ifndef __pthread_mutex_lock
weak_alias (__pthread_mutex_lock, pthread_mutex_lock)
hidden_def (__pthread_mutex_lock)
#endif
int attribute_hidden
__pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr)
{
  int type = PTHREAD_MUTEX_TYPE_ELISION (mutex);
  /*
   * other code section
   */

  /* 檢查 Mutex 的 type */
  /* Default mutex 並不會檢查 id 是否為原本 owner 的 */
  if (__builtin_expect (type, PTHREAD_MUTEX_TIMED_NP)
      == PTHREAD_MUTEX_TIMED_NP) {
      /* Always reset the owner field.  */
    normal:
      mutex->__data.__owner = 0;
      if (decr)
        /* One less user.  */
        --mutex->__data.__nusers;
      /* Unlock.  */
      lll_unlock (mutex->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex));
      LIBC_PROBE (mutex_release, 1, mutex);
      return 0;
  } 
  /*
   * other code section
   */

  else {
    /* Error checking mutex. 會檢查解鎖的 thread id 是否為 owner id */
    assert (type == PTHREAD_MUTEX_ERRORCHECK_NP);
    if (mutex->__data.__owner != THREAD_GETMEM (THREAD_SELF, tid)
        || ! lll_islocked (mutex->__data.__lock))
      return EPERM;
    goto normal;
  }
}

  /*
   * other code section
   */ 

int __pthread_mutex_unlock (pthread_mutex_t *mutex)
{
  return __pthread_mutex_unlock_usercnt (mutex, 1);
}
weak_alias (__pthread_mutex_unlock, pthread_mutex_unlock)
hidden_def (__pthread_mutex_unlock)

儘管 default mutex 跟 binary semaphore 在特定行為上是相同的,但 mutex 本身還是遵守著紀錄持有者的行為,也就是與 semaphore 最大的差異。

依據 POSIX 標準,mutex 有以下四種型態:

  • NORMAL
  • ERRORCHECK
  • RECURSIVE
  • DEFAULT

其中,在 Pthread 中,DEFAULT 對應於 PTHREAD_MUTEX_NORMAL,但在其他實作中可能會有截然不同的行為。使用 lock 時,應該明確指定是 ERRORCHECK 或 RECURSIVE,以確保行為符合預期。

你今天用對 mutex 了嗎?