Try   HackMD

mimalloc 原始碼分析

以最新版本 2.0.1 為例

  • dev: development branch for mimalloc v1.
  • dev-slice: development branch for mimalloc v2 with a new algorithm for managing internal mimalloc pages.

Types

mi_block

// The free lists use encoded next fields
// (Only actually encodes when MI_ENCODED_FREELIST is defined.)
typedef uintptr_t mi_encoded_t;

// free lists contain blocks
typedef struct mi_block_s {
    mi_encoded_t next;
} mi_block_t;

用來回傳使用者真正需要的記憶體空間位置,不需要紀錄 block 的大小是因為 mi_block_t 都由 mi_page_t 來管理,而每個 mi_page_t 都只對應一個固定的大小 xblock_size 來決定 mi_block_t 可使用的範圍。

每個尚未分配給使用者的 mi_block_t 皆為一個 linked list,其使用 next 欄位紀錄在同個 mi_page_t 內的下個未使用的 mi_block_t。特別的是當定義 MI_ENCODED_FREELIST 時,next 不是紀錄記憶體位置,而是會透過編碼轉換 (類似 XOR) 來增加安全性。

故使用者透過 mi_malloc(size_t size) 獲得的空間,一般情況下,內部為該 block 在 mi_page_t 時存放的 next 之內容,而當 MI_SECURE 開啟時,則會特別將 next 清空,防止資料洩漏:

extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
    ...
#elif (MI_SECURE!=0)
    block->next = 0;  // don't leak internal data
#endif
    ...
}

mi_page

typedef struct mi_page_s {
    // "owned" by the segment
    uint32_t              slice_count;       // slices in this page (0 if not a page)
    uint32_t              slice_offset;      // distance from the actual page data slice (0 if a page)
    uint8_t               is_reset : 1;        // `true` if the page memory was reset
    uint8_t               is_committed : 1;    // `true` if the page virtual memory is committed
    uint8_t               is_zero_init : 1;    // `true` if the page was zero initialized

    // layout like this to optimize access in `mi_malloc` and `mi_free`
    uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
    uint16_t              reserved;          // number of blocks reserved in memory
    mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (8 bits)
    uint8_t               is_zero : 1;         // `true` if the blocks in the free list are zero initialized
    uint8_t               retire_expire : 7;   // expiration count for retired blocks

    mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
    #ifdef MI_ENCODE_FREELIST
    uintptr_t             keys[2];           // two random keys to encode the free lists (see `_mi_block_next`)
    #endif
    uint32_t              used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
    uint32_t              xblock_size;       // size available in each block (always `>0`) 

    mi_block_t* local_free;                  // list of deferred free blocks by this thread (migrates to `free`)
    _Atomic(mi_thread_free_t) xthread_free;  // list of deferred free blocks freed by other threads
    _Atomic(uintptr_t)        xheap;

    struct mi_page_s* next;                  // next page owned by this thread with the same `block_size`
    struct mi_page_s* prev;                  // previous page owned by this thread with the same `block_size`

    // 64-bit 9 words, 32-bit 12 words, (+2 for secure)
    #if MI_INTPTR_SIZE==8
    uintptr_t padding[1];
    #endif
} mi_page_t;
  • mi_block_t* free:此 page 中,尚未被使用者使用的 block
  • mi_block_t* local_free:對應論文中的 local free list
  • mi_thread_free_t* xthread_free:對應論文中的 thread free list

另外 xblock_size 為此 page 內每個 block 可佔有的空間,每個存有相同大小的 block 之 page 也會由 nextprev 欄位連結成 doubly linked list。

mi_segment

typedef mi_page_t  mi_slice_t;

typedef struct mi_segment_s {
    size_t            memid;              // memory id for arena allocation
    bool              mem_is_pinned;      // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)    
    bool              mem_is_large;       // in large/huge os pages?
    bool              mem_is_committed;   // `true` if the whole segment is eagerly committed

    bool              allow_decommit;     
    mi_msecs_t        decommit_expire;
    mi_commit_mask_t  decommit_mask;
    mi_commit_mask_t  commit_mask;

    _Atomic(struct mi_segment_s*) abandoned_next;

    // from here is zero initialized
    struct mi_segment_s* next;            // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)

    size_t            abandoned;          // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
    size_t            abandoned_visits;   // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
    size_t            used;               // count of pages in use
    uintptr_t         cookie;             // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`  

    size_t            segment_slices;      // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
    size_t            segment_info_slices; // initial slices we are using segment info and possible guard pages.

    // layout like this to optimize access in `mi_free`
    mi_segment_kind_t kind;
    _Atomic(uintptr_t) thread_id;          // unique id of the thread owning this segment
    size_t            slice_entries;       // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
    mi_slice_t        slices[MI_SLICES_PER_SEGMENT];
} mi_segment_t;

mi_segment 為實際上 mimalloc 主要從 OS 上獲得的空間,管理著所需要的 page (mi_slice_t slices[MI_SLICES_PER_SEGMENT)。其中 slices[0] 被用來儲存 segment 中的額外資訊。

mi_heap

// A heap owns a set of pages.
struct mi_heap_s {
    mi_tld_t*             tld;
    mi_page_t*            pages_free_direct[MI_PAGES_DIRECT];  // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
    mi_page_queue_t       pages[MI_BIN_FULL + 1];              // queue of pages for each size class (or "bin")
    _Atomic(mi_block_t*)  thread_delayed_free;
    uintptr_t             thread_id;                           // thread this heap belongs too
    uintptr_t             cookie;                              // random cookie to verify pointers (see `_mi_ptr_cookie`)
    uintptr_t             keys[2];                             // two random keys used to encode the `thread_delayed_free` list
    mi_random_ctx_t       random;                              // random number context used for secure allocation
    size_t                page_count;                          // total number of pages in the `pages` queues.
    size_t                page_retired_min;                    // smallest retired index (retired pages are fully free, but still in the page queues)
    size_t                page_retired_max;                    // largest retired index into the `pages` array.
    mi_heap_t*            next;                                // list of heaps per thread
    bool                  no_reclaim;                          // `true` if this heap should not reclaim abandoned pages
};

每個執行緒上都有一個 mi_heap,用來紀錄目前該執行緒所儲存的 page,放入在 mi_page_queue_t pages 當中,其中相同大小的 page 被放入在同個 queue 之中,而 pages[MI_BIN_FULL - 1] 特別被用作放置存有特別大的 block 之 page,pages[MI_BIN_FULL] 則用來當作存放論文中提到的 full list。

mi_tld

// Thread local data
struct mi_tld_s {
    unsigned long long  heartbeat;     // monotonic heartbeat count
    bool                recurse;       // true if deferred was called; used to prevent infinite recursion.
    mi_heap_t*          heap_backing;  // backing heap of this thread (cannot be deleted)
    mi_heap_t*          heaps;         // list of heaps in this thread (so we can abandon all when the thread terminates)
    mi_segments_tld_t   segments;      // segment tld
    mi_os_tld_t         os;            // os tld
    mi_stats_t          stats;         // statistics
};

mi_tid 存放此執行緒的相關運行資料,包括 mi_heap 的資料、作業系統的資料等等,mi_segments_tld_t segment 為目前此執行緒中與 segment 相關的資料,其內部 mi_tld_t->segments->spans[MI_SEGMENT_BIN_MAX+1] 成員存放此執行緒可供使用的所有 page 用來加快存取效率。

Function

mi_malloc

extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
    return mi_heap_malloc(mi_get_default_heap(), size);
}

透過 mi_malloc 分配記憶體,其中 mi_get_default_heap 會回傳 _mi_heap_deafult1,取得目前執行緒所管理的 heap。

  • 如果是主執行緒 (main thread),則執行緒管理的 _mi_heap_default 在程式執行時已被設定為 _mi_heap_main
  • 如果是非主行緒,則執行緒管理的 _mi_heap_default 初始化為 _mi_heap_empty,表示此 heap 尚未分配空間,在其後 mi_malloc 的過程中分配新的空間給予 _meap_default 來當作此執行緒可用的 heap。
extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
    if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
        return mi_heap_malloc_small(heap, size);
    }
    else {
        void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE);      // note: size can overflow but it is detected in malloc_generic
        return p;
    }
}

依照需要的 block 大小與 MI_SMALL_SIZE_MAX (128) 決定使用 mi_malloc 的 fast path 還是 slow path。

  • fast path

    ​​​​static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) {
    ​​​​    const size_t idx = _mi_wsize_from_size(size);
    ​​​​    return heap->pages_free_direct[idx];
    ​​​​}
    
    ​​​​// allocate a small block
    ​​​​extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
    ​​​​      mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE);
    ​​​​      void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE);
    ​​​​      return p;
    ​​​​}
    

    heap->page_free_direct 為存有 130 個 mi_page_t 的陣列,儲存 size 較小的 page,fast path 會從 heap->pages_free_direct 內直接取得需要配置空間的 page,而不需要逐一走訪 page queue 中尋找可用的 page。

    ​​​​// Fast allocation in a page: just pop from the free list.
    ​​​​// Fall back to generic allocation only if the list is empty.
    ​​​​extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
    ​​​​    mi_block_t* const block = page->free;
    ​​​​    if (mi_unlikely(block == NULL)) {
    ​​​​        return _mi_malloc_generic(heap, size); 
    ​​​​    }
    
    ​​​​    // pop from the free list
    ​​​​    page->used++;
    ​​​​    page->free = mi_block_next(page, block);
    
    ​​​​    return block;
    ​​​​}
    

    page->free 指向此 page 中尚未被用到的 block 空間,故分配時只需從該空間取出可用的 block,當 page 內沒有可用的 block 時,呼叫 _mi_malloc_generic 轉移到 slow path 做 malloc。

  • slow path

    當分配所需空間 > MI_SMALL_SIZE_MAX 、page 為空 (page == _mi_page_empty) 或是 page 沒有可用空間 (page->free == NULL) 時,呼叫 _mi_malloc_generic 函式尋找新的 page:

    ​​​​// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
    ​​​​// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
    ​​​​void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
    ​​​​{
    ​​​​    // initialize if necessary
    ​​​​    if (mi_unlikely(!mi_heap_is_initialized(heap))) {
    ​​​​        mi_thread_init(); // calls `_mi_heap_init` in turn
    ​​​​        heap = mi_get_default_heap();
    ​​​​        if (mi_unlikely(!mi_heap_is_initialized(heap))) { 
    ​​​​            return NULL; 
    ​​​​        }
    ​​​​    }
    ​​​​
    ​​​​    // call potential deferred free routines
    ​​​​    _mi_deferred_free(heap, false);
    
    ​​​​    // free delayed frees from other threads
    ​​​​    _mi_heap_delayed_free(heap);
    
    ​​​​    // find (or allocate) a page of the right size
    ​​​​    mi_page_t* page = mi_find_page(heap, size);
    ​​​​    if (mi_unlikely(page == NULL)) { // first time out of memory, try to collect and retry the allocation once more
    ​​​​        mi_heap_collect(heap, true /* force */);
    ​​​​        page = mi_find_page(heap, size);
    ​​​​    }
    
    ​​​​    if (mi_unlikely(page == NULL)) { // out of memory
    ​​​​        const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`  
    ​​​​        _mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size);
    ​​​​        return NULL;
    ​​​​    }
    
    ​​​​    // and try again, this time succeeding! (i.e. this should never recurse)
    ​​​​    return _mi_page_malloc(heap, page, size);
    ​​​​}
    

    _mi_malloc_generic 會先檢查 heap 是否初始化,如果呼叫此函式的執行緒為第一次呼叫 mi_malloc 且不為主執行緒時,mi_heap_is_initialized 會回傳 false 進入 mi_thread_init 做初始化。

    _mi_deferred_free 呼叫使用者自訂的釋放函式,_mi_heap_delayed_free 則釋放 thread delayed free list,其下方再介紹。

    mi_find_page 會嘗試尋找可用的 page,可用的 page 可能是重新分配一個 segment 來取得,或是從已分配的 segment 中尋找。當 mi_find_page 回傳 NULL 時,表示 out of memory,透過 mi_heap_collect 嘗試回收更多的空間並在試一次。最後再重新進入 _mi_page_malloc 從取得的 page 回傳可使用的 block 空間。

    ​​​​// Allocate a page
    ​​​​// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
    ​​​​static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
    ​​​​    // huge allocation?
    ​​​​    const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`  
    ​​​​    if (mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) )) {
    ​​​​        if (mi_unlikely(req_size > PTRDIFF_MAX)) {  // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
    ​​​​          _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
    ​​​​          return NULL;
    ​​​​        }
    ​​​​        else {
    ​​​​          return mi_large_huge_page_alloc(heap,size);
    ​​​​        }
    ​​​​    }
    ​​​​    else {
    ​​​​        // otherwise find a page with free blocks in our size segregated queues
    ​​​​        return mi_find_free_page(heap, size);
    ​​​​    }
    ​​​​}
    

    mi_find_page 用來尋找或分配 page,依據 MI_MEDIUM_OBJ_SIZE_MAX 分類,當需要分配的空間大於此數值時,此時每個 page 皆只包含一個 block,其大小為需要的空間,故直接執行 mi_large_huge_page_alloc 從 segment 分配一個新的 page,而不需要再從 page queue 中搜尋;當需要分配的空間大小不超過 MI_MEDIUM_OBJ_SIZE_MAX 時,可用的 block 空間可能會在已分配的 page 中找到,故從對應的 page queue 中尋找,如果找不到時,也會跟 mi_large_huge_page_alloc 呼叫路徑相同從 segment 中重新分配一個新的 page。

    ​​​​static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
    ​​​​    return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
    ​​​​}
    ​​​​
    ​​​​// Find a page with free blocks of `size`.
    ​​​​static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
    ​​​​    mi_page_queue_t* pq = mi_page_queue(heap,size);
    ​​​​    mi_page_t* page = pq->first;
    ​​​​    if (page != NULL) {
    ​​​​        _mi_page_free_collect(page,false);
    ​​​​        if (mi_page_immediate_available(page)) {
    ​​​​            page->retire_expire = 0;
    ​​​​            return page; // fast path
    ​​​​        }
    ​​​​    }
    ​​​​  return mi_page_queue_find_free_ex(heap, pq, true);
    ​​​​}
    

    mi_page_queue 會找到 size 對應的 page queue,如果該 page queue 之 first 的 free list 不為空,則表示該 page 可用,直接回傳,否則呼叫 mi_page_queue_find_free_ex 在 page queue 中搜尋 free list 不為空的 page:

    ​​​​// Find a page with free blocks of `page->block_size`.
    ​​​​static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
    ​​​​{
    ​​​​    // search through the pages in "next fit" order
    ​​​​    mi_page_t* page = pq->first;
    ​​​​    while (page != NULL)
    ​​​​    {
    ​​​​        mi_page_t* next = page->next; // remember next
    
    ​​​​        // 0. collect freed blocks by us and other threads
    ​​​​        _mi_page_free_collect(page, false);
    
    ​​​​        // 1. if the page contains free blocks, we are done
    ​​​​        if (mi_page_immediate_available(page)) {
    ​​​​            break;  // pick this one
    ​​​​        }
    
    ​​​​        // 2. Try to extend
    ​​​​        if (page->capacity < page->reserved) {
    ​​​​            mi_page_extend_free(heap, page, heap->tld);
    ​​​​            break;
    ​​​​        }
    
    ​​​​        // 3. If the page is completely full, move it to the `mi_pages_full`
    ​​​​        // queue so we don't visit long-lived pages too often.
    ​​​​        mi_page_to_full(page, pq);
    
    ​​​​        page = next;
    ​​​​    } // for each page
    
    ​​​​    if (page == NULL) {
    ​​​​        _mi_heap_collect_retired(heap, false); // perhaps make a page available?
    ​​​​        page = mi_page_fresh(heap, pq);
    ​​​​        if (page == NULL && first_try) {
    ​​​​            // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
    ​​​​            page = mi_page_queue_find_free_ex(heap, pq, false);      
    ​​​​        }
    ​​​​    }
    ​​​​    else {
    ​​​​        page->retire_expire = 0;
    ​​​​    }
    ​​​​    return page;
    ​​​​}
    

    如果 page queue 不為空,則逐一走訪 queue 並嘗試釋放空間,如果在走訪的過程中找到空的 free block 時便直接回傳該 page。當 page queue 中的所有 page 都沒有可用的 block 時,先呼叫 _mi_heap_collect_retired 回收空間,接著 mi_page_fresh 會嘗試從 segment 中獲得一塊新的 page 空間使用。

    ​​​​// Get a fresh page to use
    ​​​​static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
    ​​​​    mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size);
    ​​​​    if (page==NULL) return NULL;
    ​​​​    return page;
    ​​​​}    
    
    ​​​​// allocate a fresh page from a segment
    ​​​​static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) {
    ​​​​    mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os);
    ​​​​    if (page == NULL) {
    ​​​​        // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
    ​​​​        return NULL;
    ​​​​    }
    ​​​​    mi_page_init(heap, page, block_size, heap->tld);
    ​​​​    if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
    ​​​​    return page;
    ​​​​}
    

    _mi_segment_page_alloc 為實際從 segment 分配 page 的函式,當找到可用的 page 時,通過 _mi_page_init 初始化,並將該 page 放到先前所選擇對應大小的 page queue 中。

    ​​​​mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
    ​​​​    mi_page_t* page;
    ​​​​    if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
    ​​​​        page = mi_segments_page_alloc(heap,MI_PAGE_SMALL,block_size,block_size,tld,os_tld);
    ​​​​    }
    ​​​​    else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {
    ​​​​        page = mi_segments_page_alloc(heap,MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,block_size,tld, os_tld);
    ​​​​    }
    ​​​​    else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) {
    ​​​​        page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld);
    ​​​​    }
    ​​​​    else {
    ​​​​        page = mi_segment_huge_page_alloc(block_size,tld,os_tld);
    ​​​​    }
    ​​​​    return page;
    ​​​​}
    

    根據 block_size 決定分配 page 所需的大小,mimalloc 中將 block_size 分成四類,每類配置的空間的方式都不同:

    • Small, [
      0
      KiB,
      8
      KiB]
      每個 segment 有多個 page,每個 page 有多個 block,每個 block 的 block_size 為 small 大小
    • Medium, (
      8
      KiB,
      128
      KiB]
      每個 segment 有多個 page,每個 page 有多個 block,每個 block 的 block_size 為 medium 大小
    • Large, (
      128
      KiB,
      4
      MiB]
      每個 segment 有多個 page,每個 page 有一個 block,每個 block 的 block_size 為 large 大小
    • Huge, (
      4
      MiB,
      263
      Bytes)
      每個 segment 有一個 page,每個 page 有一個 block,每個 block 的 block_size 為 huge 大小
    ​​​​static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_kind, size_t required, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
    ​​​​{
    ​​​​    // find a free page
    ​​​​    size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
    ​​​​    size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
    ​​​​    mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, tld); 
    ​​​​    if (page==NULL) {
    ​​​​        // no free page, allocate a new segment and try again
    ​​​​        if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) {
    ​​​​            // OOM or reclaimed a good page in the heap
    ​​​​            return NULL;  
    ​​​​        }
    ​​​​        else {
    ​​​​            // otherwise try again
    ​​​​            return mi_segments_page_alloc(heap, page_kind, required, block_size, tld, os_tld);
    ​​​​        }
    ​​​​    }
    ​​​​    mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats);
    ​​​​    return page;
    ​​​​}
    

    mi_segments_page_find_and_allocate 會從已經分配的 segment 中尋找可用的 page,如果找不到則透過 mi_segment_reclaim_or_alloc 重新分配一個新的 segment 來尋找。

    ​​​​static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) 
    ​​​​{		
    ​​​​    // 1. try to get a segment from our cache
    ​​​​    mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld);
    ​​​​    if (segment != NULL) {
    ​​​​        mi_segment_init(segment, 0, tld, os_tld, NULL);
    ​​​​        return segment;
    ​​​​    }
    ​​​​    // 2. try to reclaim an abandoned segment
    ​​​​    bool reclaimed;
    ​​​​    segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld);
    ​​​​    if (reclaimed) {
    ​​​​        // reclaimed the right page right into the heap
    ​​​​        return NULL; // pretend out-of-memory as the page will be in the page queue of the heap with available blocks
    ​​​​    }
    ​​​​    else if (segment != NULL) {
    ​​​​        // reclaimed a segment with a large enough empty span in it
    ​​​​        return segment;
    ​​​​    }
    ​​​​    // 3. otherwise allocate a fresh segment
    ​​​​    return mi_segment_alloc(0, tld, os_tld, NULL);  
    ​​​​}
    

    TODO: reclaim usage
    嘗試獲得 semgent 空間,在 semgent 用完時,會將該 segment 放到 cache 中等待下次分配,這樣可以防止頻繁的從系統中分配空間,如果 cache 以及 reclaim 都失敗的話,呼叫 mi_segment_alloc 從 OS 分配實際的空間出來。

    ​​​​static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segments_tld_t* tld) {
    ​​​​    // search from best fit up
    ​​​​    mi_span_queue_t* sq = mi_span_queue_for(slice_count, tld);
    ​​​​    if (slice_count == 0) slice_count = 1;
    ​​​​    while (sq <= &tld->spans[MI_SEGMENT_BIN_MAX]) {
    ​​​​        for (mi_slice_t* slice = sq->first; slice != NULL; slice = slice->next) {
    ​​​​            if (slice->slice_count >= slice_count) {
    ​​​​                // found one
    ​​​​                mi_span_queue_delete(sq, slice);
    ​​​​                mi_segment_t* segment = _mi_ptr_segment(slice);
    ​​​​                if (slice->slice_count > slice_count) {
    ​​​​                    mi_segment_slice_split(segment, slice, slice_count, tld);
    ​​​​                }
    ​​​​                
    ​​​​                mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);
    ​​​​                if (page == NULL) {
    ​​​​                    // commit failed; return NULL but first restore the slice
    ​​​​                    mi_segment_span_free_coalesce(slice, tld);
    ​​​​                    return NULL;
    ​​​​                }
    ​​​​                return page;        
    ​​​​            }
    ​​​​        }
    ​​​​        sq++;
    ​​​​    }
    ​​​​    // could not find a page..
    ​​​​    return NULL;
    ​​​​}
    

    mi_segments_tld_t:spans 紀錄在該執行緒中,所有 segment 內可用的空間,並以 queue 串接。中間的 for 迴圈就是在從這些 queue 內尋找可用來當成 page 的空間 (這邊命名為 slice),並從該空間取得對應的 segment。mi_segment_slice_split 將此空間分割成目前需要的空間以及用不到的空間,並把用不到的空間放回 queue 中以便之後可以再利用。目前需要的空間則透過 mi_segment_span_allocate 實際設定成 page,以及 page 的相關訊息。mi_segment_span_free_coalesce 則是分配失敗時,把剛剛分割的空間還原。

    segment 使用 mi_segment_t:slices 紀錄所分配的 page,每個 slice 內的成員 slice_count,表示此 page 占用 segment 的幾個 sliceslice_offset 不為 0 則表示此 slice 被其他實際的 slice 占用,而 slice_offset 就是該 slice 與實際 slice 間的 byte 數,以圖片表示:

    
    
    
    
    
    
    %0
    
    
    
    values
    
    slices[0]
    {slice_count=2}
    {slice_offset=0}
    
    slices[1]
    {slice_count=0}
    {slice_offset=96}
    
    slices[2]
    {slice_count=1}
    {slice_offset=0}
    
    slices[3]
    {slice_count=3}
    {slice_offset=0}
    
    slices[4]
    {slice_count=0}
    {slice_offset=96}
    
    slices[5]
    {slice_count=0}
    {slice_offset=192}
    
    slices[6]
    {slice_count=1}
    {slice_offset=0}
    
    
    
    

    sizeof(mi_slice_t)=96,故 slice_offset 為 96 的倍數。

    其中 slices[0], slices[2], slices[3], skuces[6] 都會回傳並成為 page,而 page 實際大小就為該 page 所占有的所有 slices。

    ​​​​static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) {
    ​​​​    if (slice->slice_count <= slice_count) return;
    ​​​​    size_t next_index = mi_slice_index(slice) + slice_count;
    ​​​​    size_t next_count = slice->slice_count - slice_count;
    ​​​​    mi_segment_span_free(segment, next_index, next_count, tld);
    ​​​​    slice->slice_count = (uint32_t)slice_count;
    ​​​​}
    

    再回頭看 mi_segment_slice_split 便很好理解,next_index 為分割後下個 slice 的位置,next_count 為下個 slice 擁有的 slice 數目,再透過 mi_segment_span_free 把這些數據設定到 segment 中下個 slice 的位置。

    ​​​​// note: can be called on abandoned segments
    ​​​​static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
    ​​​​    mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) 
    ​​​​                          ? NULL : mi_span_queue_for(slice_count,tld));
    ​​​​    if (slice_count==0) slice_count = 1;
    ​​​​    
    ​​​​    // set first and last slice (the intermediates can be undetermined)
    ​​​​    mi_slice_t* slice = &segment->slices[slice_index];
    ​​​​    slice->slice_count = (uint32_t)slice_count;
    ​​​​    slice->slice_offset = 0;
    ​​​​    if (slice_count > 1) {
    ​​​​        mi_slice_t* last = &segment->slices[slice_index + slice_count - 1];
    ​​​​        last->slice_count = 0;
    ​​​​        last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1));
    ​​​​        last->xblock_size = 0;
    ​​​​    }
    
    ​​​​    // perhaps decommit
    ​​​​    mi_segment_perhaps_decommit(segment,mi_slice_start(slice),slice_count*MI_SEGMENT_SLICE_SIZE,tld->stats);
    
    ​​​​    // and push it on the free page queue (if it was not a huge page)
    ​​​​    if (sq != NULL) mi_span_queue_push( sq, slice );
    ​​​​             else slice->xblock_size = 0; // mark huge page as free anyways
    ​​​​    }
    

    設定 slice_count,以及更新最後一個 sliceslice_offset (最後一個 sliceslice_offset 為與最高 index 的 slice 間的 byte 數),最後 mi_span_queue_push 將這個新分割的 slice 放回 queue 中,以便下次搜尋可以使用。

    ​​​​// Note: may still return NULL if committing the memory failed
    ​​​​static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
    ​​​​    mi_slice_t* slice = &segment->slices[slice_index];
    
    ​​​​    // commit before changing the slice data
    ​​​​    if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) {
    ​​​​        return NULL;  // commit failed!
    ​​​​    }
    
    ​​​​    // convert the slices to a page
    ​​​​    slice->slice_offset = 0;
    ​​​​    slice->slice_count = (uint32_t)slice_count;
    ​​​​    const size_t bsize = slice_count * MI_SEGMENT_SLICE_SIZE;
    ​​​​    slice->xblock_size = (uint32_t)(bsize >= MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : bsize);
    ​​​​    mi_page_t*  page = mi_slice_to_page(slice);
    
    ​​​​    // set slice back pointers for the first MI_MAX_SLICE_OFFSET entries
    ​​​​    size_t extra = slice_count-1;
    ​​​​    if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET;
    ​​​​    if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1;  // huge objects may have more slices than avaiable entries in the segment->slices
    ​​​​    slice++;
    ​​​​    for (size_t i = 1; i <= extra; i++, slice++) {
    ​​​​    	slice->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i);
    ​​​​    	slice->slice_count = 0;
    ​​​​    	slice->xblock_size = 1;
    ​​​​    }
    
    ​​​​    // and also for the last one (if not set already) (the last one is needed for coalescing)
    ​​​​    mi_slice_t* last = &segment->slices[slice_index + slice_count - 1];
    ​​​​    if (last < mi_segment_slices_end(segment) && last >= slice) {
    ​​​​        last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1));
    ​​​​        last->slice_count = 0;
    ​​​​        last->xblock_size = 1;
    ​​​​    }
    
    ​​​​    // and initialize the page
    ​​​​    page->is_reset = false;
    ​​​​    page->is_committed = true;
    ​​​​    segment->used++;
    ​​​​    return page;
    ​​​​}
    

    mi_segment_span_allocateslice 轉成 page 回傳,要注意此時 slice->xblock_size 並非 page 內 block 的大小,而是 page 本身的大小,再之後會呼叫 mi_page_initslice->xblock_size 設定為應儲存的 block 大小。extra 為此 slice 會占用到的所有其他 slice 的數量,需要將這些 slice 的 slice_offset 設定為正確的距離,另外最後一個 slice 沒設置時也會一併設定。

    ​​​​// note: can be called on abandoned segments
    ​​​​static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) {
    ​​​​    mi_segment_t* segment = _mi_ptr_segment(slice);
    ​​​​    bool is_abandoned = mi_segment_is_abandoned(segment);
    
    ​​​​    // for huge pages, just mark as free but don't add to the queues
    ​​​​    if (segment->kind == MI_SEGMENT_HUGE) {
    ​​​​        slice->xblock_size = 0;  // mark as free anyways
    ​​​​        // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to 
    ​​​​        // avoid a possible cache miss (and the segment is about to be freed)
    ​​​​        return slice;
    ​​​​    }
    
    ​​​​    // otherwise coalesce the span and add to the free span queues
    ​​​​    size_t slice_count = slice->slice_count;
    ​​​​    mi_slice_t* next = slice + slice->slice_count;
    ​​​​    if (next < mi_segment_slices_end(segment) && next->xblock_size==0) {
    ​​​​        // free next block -- remove it from free and merge
    ​​​​        slice_count += next->slice_count; // extend
    ​​​​        if (!is_abandoned) {        
    ​​​​            mi_segment_span_remove_from_queue(next, tld); 
    ​​​​        }
    ​​​​    }
    ​​​​    if (slice > segment->slices) {
    ​​​​        mi_slice_t* prev = mi_slice_first(slice - 1);
    ​​​​        if (prev->xblock_size==0) {
    ​​​​          // free previous slice -- remove it from free and merge
    ​​​​          slice_count += prev->slice_count;
    ​​​​          if (!is_abandoned) { 
    ​​​​              mi_segment_span_remove_from_queue(prev, tld); 
    ​​​​          }
    ​​​​          slice = prev;
    ​​​​        }
    ​​​​    }
    
    ​​​​    // and add the new free page
    ​​​​    mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld);
    ​​​​    return slice;
    ​​​​}
    

    mi_segment_span_free_coalesce 將先前分割的 slice 合併,並且加入回 tld 中的 queue,另外對於 huge 類型的 segment (只含有一個 page) 則不須加入到 queue 當中,因其是使用 mi_segment_huge_page_alloc 分配 segment 空間,而不是 mi_segments_page_alloc 從 queue 中尋找。

    如果在 mi_find_page 要分配的 page 的大小超過 MI_MEDIUM_OBJ_SIZE_MAX 時,此時直接呼叫 mi_large_huge_page_alloc 從 segment 中找可用的 page 空間:

    ​​​​// Large and huge page allocation.
    ​​​​// Huge pages are allocated directly without being in a queue.
    ​​​​// Because huge pages contain just one block, and the segment contains
    ​​​​// just that page, we always treat them as abandoned and any thread
    ​​​​// that frees the block can free the whole page and segment directly.
    ​​​​static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
    ​​​​    size_t block_size = _mi_os_good_alloc_size(size);
    ​​​​    bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX);
    ​​​​    mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size));
    ​​​​    mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size);
    ​​​​    if (page != NULL) {
    ​​​​        const size_t bsize = mi_page_block_size(page);  // note: not `mi_page_usable_block_size` as `size` includes padding
    ​​​​        if (pq == NULL) {
    ​​​​            mi_page_set_heap(page, NULL);
    ​​​​        }
    ​​​​    }
    ​​​​    return page;
    ​​​​}
    

    可以看到與 mi_find_free_page 失敗時一樣,皆會呼叫 mi_page_fresh_alloc 來從 segment 中尋找 page。另外 huge 類型因為每個 segment 只有一個 page,所有不用 page queue 儲存。

    mi_page_fresh_alloc 接著會呼叫 _mi_segment_page_alloc 嘗試從 segment 上尋找可用的 page (上方有介紹),在 _mi_segment_page_alloc 中,當 block_size 為 huge 類型時呼叫 mi_segment_huge_page_alloc 直接分配新的 segment:

    ​​​​static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
    ​​​​{
    ​​​​    mi_page_t* page = NULL;
    ​​​​    mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld,&page);
    ​​​​    if (segment == NULL || page==NULL) return NULL;
    ​​​​    segment->thread_id = 0; // huge segments are immediately abandoned
    ​​​​    return page;
    ​​​​}
    

mi_free

// Free a block
void mi_free(void* p) mi_attr_noexcept
{
    const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
    if (mi_unlikely(segment == NULL)) return; 

    const uintptr_t tid = _mi_thread_id();
    mi_page_t* const page = _mi_segment_page_of(segment, p);

    if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
        // local, and not full or aligned
        mi_block_t* block = (mi_block_t*)(p);
        mi_block_set_next(page, block, page->local_free);
        page->local_free = block;
        if (mi_unlikely(--page->used == 0)) {   // using this expression generates better code than: page->used--; if (mi_page_all_free(page))    
            _mi_page_retire(page);
        }
    }
    else {
        // non-local, aligned blocks, or a full page; use the more generic path
        // note: recalc page in generic to improve code generation
        mi_free_generic(segment, tid == segment->thread_id, p);
    }
}

mi_freemi_malloc 一樣分成兩種情況,第一種是執行緒釋放相同執行緒上的變數,此時只需將 block 放入 page->local_free 當中即可,而實際將 page->local_free 移動到 page->free 形成可用空間的時機在於執行 _mi_malloc_generic 中找尋 page 的過程。

如果是不同執行緒間的釋放空間、釋放 full list 上的 page 或是釋放 unaligned 的 page,則會呼叫 mi_free_generic

typedef union mi_page_flags_s {
  uint8_t full_aligned;
  struct {
    uint8_t in_full : 1;
    uint8_t has_aligned : 1;
  } x;
} mi_page_flags_t;

page 將 in_full, has_alignedfull_aligned 用同個 union 表示,只要 x 中有一個成員不為 0,則 full_aligned 就不為 0。

static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) {
    mi_page_t* const page = _mi_segment_page_of(segment, p);
    mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
    _mi_free_block(page, local, block);
}

// regular free
static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block)
{
    // and push it on the free list
    if (mi_likely(local)) {
        // owning thread can free a block directly
        if (mi_unlikely(mi_check_is_double_free(page, block))) return;
        mi_block_set_next(page, block, page->local_free);
        page->local_free = block;
        page->used--;
        if (mi_unlikely(mi_page_all_free(page))) {
            _mi_page_retire(page);
        }
        else if (mi_unlikely(mi_page_is_in_full(page))) {
            _mi_page_unfull(page);
        }
    }
    else {
        _mi_free_block_mt(page,block);
    }
}

_mi_free_block 前半段同個執行緒下的釋放空間與 mi_free 相同,後方 _mi_free_block_mt 則為不同執行緒間的釋放函式,因為要處理不同執行緒間的變數,故在 _mi_free_block_mt 之中使用到了 atomic operation:

// multi-threaded free
static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
    // huge page segments are always abandoned and can be freed immediately
    mi_segment_t* segment = _mi_page_segment(page);
    if (segment->kind==MI_SEGMENT_HUGE) {
        _mi_segment_huge_page_free(segment, page, block);
        return;
    }

    // Try to put the block on either the page-local thread free list, or the heap delayed free list.
    mi_thread_free_t tfreex;
    bool use_delayed;
    mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
    do {
        use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
        if (mi_unlikely(use_delayed)) {
            // unlikely: this only happens on the first concurrent free in a page that is in the full list
            tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
        }
        else {
            // usual: directly add to page thread_free list
            mi_block_set_next(page, block, mi_tf_block(tfree));
            tfreex = mi_tf_set_block(tfree,block);
        }
    } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));

    if (mi_unlikely(use_delayed)) {
        // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
        mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
        if (heap != NULL) {
            // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
            mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
            do {
                mi_block_set_nextx(heap,block,dfree, heap->keys);
            } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
        }

        // and reset the MI_DELAYED_FREEING flag
        tfree = mi_atomic_load_relaxed(&page->xthread_free);
        do {
            tfreex = tfree;
            mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
            tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
        } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
    }
}

_mi_free_block_mt 會將 block 放到兩個可能的地方,一個是 heap 的 thread delayed free list (heap->thread_delayed_free) 當中,另一個為 page 的 thread free list (page->xthread_free) 當中,只有當 thread delayed free list 為空時 (page 第一次分配 block 或呼叫 _mi_heap_delayed_free 後),才會將 block 放到該 list 當中。

thread delayed free list 的用途在於節省 lock 的時間,當某個 page 內的 block 數已滿時,該 page 會透過 mi_page_to_full 函式被放入到 heap->page[MI_BIN_FULL] 中 (論文內的 full list),使其不在 page queue 當中被搜尋以加快節點走訪速度。而當該 page 內的 block 被釋放後,需要將該 page 從 full list 移回 page queue 當中,如果是同個執行緒,則在 mi_free 時就會執行此動作,因為是同個執行緒所以不需要考慮 race condition,但如果釋放的空間是放在 thread free list 的話,檢查 block 是否釋放以及 page 的移動 (可能在移動的過程中 page 內的 block 被其他執行緒釋放) 都必須加上 lock 防止衝突。

在 heap 上增加 thread delayed free list 後,移動 page 的這個動作被延後執行,_mi_heap_delayed_free (在 _mi_malloc_generic 時呼叫) 會透過 atomic operation 把 thread delayed free list 移動到 heap->local_free 當中,之後由於都在同個執行緒上,便不需要額外的 lock,在 _mi_heap_delayed_free -> _mi_free_delayed_block -> _mi_free_block 中便能無鎖的把 page 從 full list 移回 page queue。

簡單來說,thread delayed free list 將有被不同執行緒釋放 block 的 page 連接,並給予 page 一個固定的時間點檢查該 page 是否需要從 full list 移動到 page queue 當中,同時也能真正釋放這些被不同執行緒釋放的 block。

如果沒有 thread delayed free list 的設計,在 _mi_malloc_generic 的過程中,會先嘗試從 segment 中找可用的 page,而不是從 full list 中找尋有被其他 thread 釋放空間 page,造成記憶體用量增加。

在 mimalloc 中,使用 page->xthread_free LSB 的 3 個 bits 來當作 delayed free 的 flags,其餘 bits 表示 thread free list 的 block:

// Thread free flag helpers
static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
    return (mi_block_t*)(tf & ~0x03);
}
static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) {
    return (mi_delayed_t)(tf & 0x03);
}

所以上方 _mi_free_block_mtmi_tf_delayed(tfree) == MI_USE_DELAYED_FREE 時會將 block 放到 thread delayed free list (heap->thread_delayed_free),其餘時間則放到 thread free list (page->xthread_free) 中,並把 page->xthread_free 內的 flag 設定為 MI_NO_DELAYED_FREE,下次就不會再使用 delayed free list。

_mi_malloc_generic 中有提到會呼叫 _mi_heap_delayed_free,此函式會實際將 heap 中的 thread delayed free list 移動回 page 的 local_free

void _mi_heap_delayed_free(mi_heap_t* heap) {
    // take over the list (note: no atomic exchange since it is often NULL)
    mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
    while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ };

    // and free them all
    while(block != NULL) {
        mi_block_t* next = mi_block_nextx(heap,block, heap->keys);
        // use internal free instead of regular one to keep stats etc correct
        if (!_mi_free_delayed_block(block)) {
            // we might already start delayed freeing while another thread has not yet
            // reset the delayed_freeing flag; in that case delay it further by reinserting.
            mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
            do {
                mi_block_set_nextx(heap, block, dfree, heap->keys);
            } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
        }
        block = next;
    }
}

while 迴圈逐一走訪 thread delayed free list,並對每個 block 執行 _mi_free_delayed_block

bool _mi_free_delayed_block(mi_block_t* block) {
    // get segment and page
    const mi_segment_t* const segment = _mi_ptr_segment(block);
    mi_page_t* const page = _mi_segment_page_of(segment, block);

    // Clear the no-delayed flag so delayed freeing is used again for this page.
    // This must be done before collecting the free lists on this page -- otherwise
    // some blocks may end up in the page `thread_free` list with no blocks in the
    // heap `thread_delayed_free` list which may cause the page to be never freed!
    // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
    _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */);

    // collect all other non-local frees to ensure up-to-date `used` count
    _mi_page_free_collect(page, false);

    // and free the block (possibly freeing the page as well since used is updated)
    _mi_free_block(page, true, block);
    return true;
}

_mi_page_use_delayed_free 很簡單,就是將 page 內的 delayed free list 中的 flag 設定回 MI_USE_DELAYED_FREE,表示 block 可以再放到 thread delayed free list 中;_mi_page_free_collect 將可用空間 thread free list 及 local free list 移動回 free list;_mi_free_block 上方有介紹,將此 block (在 thread delayed free list 中的 block) 移動回 page 的 local free list 中。

void _mi_page_free_collect(mi_page_t* page, bool force) {
    // collect the thread free list
    if (force || mi_page_thread_free(page) != NULL) {  // quick test to avoid an atomic operation
        _mi_page_thread_free_collect(page);
    }

    // and the local free list
    if (page->local_free != NULL) {
        if (mi_likely(page->free == NULL)) {
            // usual case
            page->free = page->local_free;
            page->local_free = NULL;
            page->is_zero = false;
        }
        else if (force) {
            // append -- only on shutdown (force) as this is a linear operation
            mi_block_t* tail = page->local_free;
            mi_block_t* next;
            while ((next = mi_block_next(page, tail)) != NULL) {
                tail = next;
            }
            mi_block_set_next(page, tail, page->free);
            page->free = page->local_free;
            page->local_free = NULL;
            page->is_zero = false;
        }
    }
}

_mi_page_free_collect 是把 local free list 以及 thread free list 移回 free list 的函式,除了在 _mi_free_delayed_block 中被呼叫外,也會在其他需要 page 空間時呼叫來取得可用 page 空間。

其中 _mi_page_thread_free_collect 用來回收 thread free list,而下方則是一般 local free list 的回收,將 page->local_freepage->free 連接,並把 page->local_free 清空。

_mi_thread_done

每個執行緒結束時都要正確回收空間,在 mimalloc 當中註冊了 _mi_thread_done 函式,讓執行緒在結束時能夠呼叫該函式來回收空間。

// Set up handlers so `mi_thread_done` is called automatically
static void mi_process_setup_auto_thread_done(void) {
    static bool tls_initialized = false; // fine if it races
    if (tls_initialized) return;
    tls_initialized = true;
    #if defined(_WIN32) && defined(MI_SHARED_LIB)
        // nothing to do as it is done in DllMain
    #elif defined(_WIN32) && !defined(MI_SHARED_LIB)
        mi_fls_key = FlsAlloc(&mi_fls_done);
    #elif defined(MI_USE_PTHREADS)
        mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
        pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
    #endif
    _mi_heap_set_default_direct(&_mi_heap_main);
}

在 windows 平台上註冊 mi_fls_done;在 POSIX 上則是註冊 mi_pthread_done,兩函式皆會呼叫 _mi_thread_done 來回收空間。

static void _mi_thread_done(mi_heap_t* heap) {
    // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps...
    if (heap->thread_id != _mi_thread_id()) return;

    // abandon the thread local heap
    if (_mi_heap_done(heap)) return;  // returns true if already ran
}

// Free the thread local default heap (called from `mi_thread_done`)
static bool _mi_heap_done(mi_heap_t* heap) {
    if (!mi_heap_is_initialized(heap)) return true;
    
    // reset default heap
    _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty);

    // switch to backing heap
    heap = heap->tld->heap_backing;
    if (!mi_heap_is_initialized(heap)) return false;

    // delete all non-backing heaps in this thread
    mi_heap_t* curr = heap->tld->heaps;
    while (curr != NULL) {
        mi_heap_t* next = curr->next; // save `next` as `curr` will be freed
        if (curr != heap) {
            mi_heap_delete(curr);
        }
        curr = next;
    }

    // collect if not the main thread
    if (heap != &_mi_heap_main) {
        _mi_heap_collect_abandon(heap);
    }
  
    // merge stats
    _mi_stats_done(&heap->tld->stats);  

    // free if not the main thread
    if (heap != &_mi_heap_main) {
        // the following assertion does not always hold for huge segments as those are always treated
        // as abondened: one may allocate it in one thread, but deallocate in another in which case
        // the count can be too large or negative. todo: perhaps not count huge segments? see issue #363
        // mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id());
        _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main);
    }
    return false;
}

_mi_thread_done 會呼叫 _mi_heap_done 釋放 heap 空間,在 _mi_heap_done 中,會透過 mi_heap_delete 刪除 heap->tld->heaps 中除了 heap->tld->heap_backing 外的其他 heap,這些 heap 是透過 mi_heap_new 產生的,在後方會再說明。接著執行 _mi_heap_collect_abandon,此函式將 heap 內部的 page 回收,最後 _mi_os_free 則實際釋放該 heap 的記憶體空間。

void _mi_heap_collect_abandon(mi_heap_t* heap) {
    mi_heap_collect_ex(heap, MI_ABANDON);
}

static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
{
    if (heap==NULL || !mi_heap_is_initialized(heap)) return;
    _mi_deferred_free(heap, collect >= MI_FORCE);

    // note: never reclaim on collect but leave it to threads that need storage to reclaim 
    if (
    #ifdef NDEBUG
      collect == MI_FORCE
    #else
      collect >= MI_FORCE
    #endif
    && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim)
    {
        // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
        // if all memory is freed by now, all segments should be freed.
        _mi_abandoned_reclaim_all(heap, &heap->tld->segments);
    }

    // if abandoning, mark all pages to no longer add to delayed_free
    if (collect == MI_ABANDON) {
        mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
    }

    // free thread delayed blocks.
    // (if abandoning, after this there are no more thread-delayed references into the pages.)
    _mi_heap_delayed_free(heap);

    // collect retired pages
    _mi_heap_collect_retired(heap, collect >= MI_FORCE);

    // collect all pages owned by this thread
    mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);

    // collect segment caches
    if (collect >= MI_FORCE) {
        _mi_segment_thread_collect(&heap->tld->segments);
    }
}

_mi_deferred_free 會呼叫使用者註冊的 deferred_free 函式,其下方的 _mi_abandoned_reclaim_all 因不是主執行緒不會運行到,mi_heap_visit_pages 會逐一走訪 page,並對每個 page 執行給定的函式 (此例為 mi_heap_page_never_delayed_free)。

下方 _mi_heap_delayed_free_mi_heap_collect_retired 回收 delayed free list 以及 retireed page。並再呼叫一次 mi_heap_visit_pages 回收所有 page。最後 _mi_segment_thread_collect 把存在 cache 中的 segment 釋放。

static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
    UNUSED(arg2);
    UNUSED(heap);
    mi_collect_t collect = *((mi_collect_t*)arg_collect);
    _mi_page_free_collect(page, collect >= MI_FORCE);
    if (mi_page_all_free(page)) {
        // no more used blocks, free the page. 
        // note: this will free retired pages as well.
        _mi_page_free(page, pq, collect >= MI_FORCE);
    }
    else if (collect == MI_ABANDON) {
        // still used blocks but the thread is done; abandon the page
        _mi_page_abandon(page, pq);
    }
    return true; // don't break
}

mi_heap_collect_ex 中,mi_heap_visit_pages 會將每個 page 透過 mi_heap_page_collect 來釋放,_mi_page_free_collect 在上方已有介紹,mi_page_all_free 檢查 page 內的 block 是否全部釋放,如果是的話則 _mi_page_free 會將該 page 從 segment 中釋放;如果該 page 還有可用的 block 在其他 thread 中被使用,則會呼叫 _mi_page_abandon 將該 page 設置為 abandon。

// Free a page with no more free blocks
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
    // no more aligned blocks in here
    mi_page_set_has_aligned(page, false);

    mi_heap_t* heap = mi_page_heap(page);
    
    // remove from the page list
    // (no need to do _mi_heap_delayed_free first as all blocks are already free)
    mi_segments_tld_t* segments_tld = &heap->tld->segments;
    mi_page_queue_remove(pq, page);

    // and free it
    mi_page_set_heap(page,NULL);
    _mi_segment_page_free(page, force, segments_tld);
}

_mi_page_free 將 page 從 page queue 中移除,並把 page->heap 設為 NULL,最後 _mi_segment_page_free 將 page 從 segment 中刪除。

void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
{
    mi_segment_t* segment = _mi_page_segment(page);

    // mark it as free now
    mi_segment_page_clear(page, tld);

    if (segment->used == 0) {
        // no more used pages; remove from the free list and free the segment
        mi_segment_free(segment, force, tld);
    }
    else if (segment->used == segment->abandoned) {
        // only abandoned pages; remove from free list and abandon
        mi_segment_abandon(segment,tld);
    }
}

mi_segment_page_clear 將 page 回收回 segment,與 page 相似,當沒有可用的 page 時,透過 mi_segment_free 將 segment 也釋放,否則呼叫 mi_segment_abandon 將該 segment 設為 abandon。

// note: can be called on abandoned pages
static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) {
    mi_segment_t* segment = _mi_ptr_segment(page);

    size_t inuse = page->capacity * mi_page_block_size(page);

    // reset the page memory to reduce memory pressure?
    if (!segment->mem_is_pinned && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
        size_t psize;
        uint8_t* start = _mi_page_start(segment, page, &psize);
        page->is_reset = true;
        _mi_os_reset(start, psize, tld->stats);
    }

    // zero the page data, but not the segment fields
    page->is_zero_init = false;
    ptrdiff_t ofs = offsetof(mi_page_t, capacity);
    memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
    page->xblock_size = 1;

    // and free it
    mi_slice_t* slice = mi_segment_span_free_coalesce(mi_page_to_slice(page), tld);  
    segment->used--;
    // cannot assert segment valid as it is called during reclaim
    // mi_assert_expensive(mi_segment_is_valid(segment, tld));
    return slice;
}

mi_segment_page_clear 將 page 內的值清空,並透過 mi_segment_span_free_coalesce 把 page 放回 tld->span 這個 queue 中。

static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
    // Remove the free pages
    mi_slice_t* slice = &segment->slices[0];
    const mi_slice_t* end = mi_segment_slices_end(segment);
    size_t page_count = 0;
    while (slice < end) {
        if (slice->xblock_size == 0 && segment->kind != MI_SEGMENT_HUGE) {
            mi_segment_span_remove_from_queue(slice, tld);
        }
        page_count++;
        slice = slice + slice->slice_count;
    }

    if (!force && mi_segment_cache_push(segment, tld)) {
        // it is put in our cache
    }
    else {
        // otherwise return it to the OS
        mi_segment_os_free(segment,  tld);
    }
}

mi_segment_free 檢查 segment 中的所有 slice,將這些有用到的 slice 放回 span queue 當中,要注意當 slice 還未分配出去時,xblock_size 用來表示該 slice 是否位於 span queue 中,0 表示在 queue 當中,1 則表示不在 queue 當中。mi_segment_cache_push 將 segment 放入 cache 中,下次需要 segment 時可以直接從 cache 中取得記憶體。如果必須釋放空間的話 (如 huge segment),則呼叫 mi_segment_os_free 真正釋放開記憶體。

page(slice) 中的 xblock_size 有許多用途,在 slice 階段 (還未要分配給使用者)時,分成兩種:

  • xblock_size == 0 表示此 slice 目前存放在 tld->spans 這個 queue 當中
  • xblock_size == 1 表示此 slice 目前不存放在 tld->spans 這個 queue 當中

當要把 slice 分配給使用者時,分配函式 mi_segment_span_allocate 內的 xblock_size 表示的是此 slice (page) 實際佔有的空間,也就是所有可分配的空間;在分配後,xblock_size 變成這個 page 內的每個 block 擁有的空間。

mi_heap_*

mimalloc 提供建立在執行緒內自行管理的 heap,相關函式可參考連結

mi_heap_t* mi_heap_new(void) 
{
    mi_heap_t* bheap = mi_heap_get_backing();
    mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);  // todo: OS allocate in secure mode?
    if (heap==NULL) return NULL;
    _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
    heap->tld = bheap->tld;
    heap->thread_id = _mi_thread_id();
    _mi_random_split(&bheap->random, &heap->random);
    heap->cookie  = _mi_heap_random_next(heap) | 1;
    heap->keys[0] = _mi_heap_random_next(heap);
    heap->keys[1] = _mi_heap_random_next(heap);
    heap->no_reclaim = true;  // don't reclaim abandoned pages or otherwise destroy is unsafe
    // push on the thread local heaps list
    heap->next = heap->tld->heaps;
    heap->tld->heaps = heap;
    return heap;
}

mi_heap_new 在目前執行緒中建立一個新的 heap,mi_heap_get_backing 回傳目前執行緒的主要 heap,mi_heap_malloc_tp 實際分配一個新的 heap 空間,要注意的是分配 heap 與一般分配 block 的程式碼相同是使用 mi_heap_malloc 在目前執行緒的主要 heap 的 page 上分配,設置 heap 的過程與 mi_heap_main_init 相似,最後再將新增的 heap 放到 tld->heads 的 linked list 上。

// Safe delete a heap without freeing any still allocated blocks in that heap.
void mi_heap_delete(mi_heap_t* heap)
{
    if (heap==NULL || !mi_heap_is_initialized(heap)) return;

    if (!mi_heap_is_backing(heap)) {
    	// tranfer still used pages to the backing heap
    	mi_heap_absorb(heap->tld->heap_backing, heap);
    }
    else {
    	// the backing heap abandons its pages
    	_mi_heap_collect_abandon(heap);
    }
    mi_heap_free(heap);
}

mi_heap_delete 將 heap 內的 block 回收,其中在 heap 與執行緒主要 heap 不同時呼叫 mi_heap_absorb,將要被釋放的 heap 內空間轉移給主要 heap 來管理,而如果是執行緒主要的 heap,則改呼叫 _mi_heap_collect_abandon 將內部空間釋放,其作用上方已有解釋,最後 mi_heap_free 會實際把 heap 從 tld->heaps 上移除,並釋放 heap 空間。

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
mi_heap_free 或許可以改為放在 if (!mi_heap_is_backing(heap)) 底下。

// Tranfer the pages from one heap to the other
static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
    if (from==NULL || from->page_count == 0) return;

    // reduce the size of the delayed frees
    _mi_heap_delayed_free(from);

    // transfer all pages by appending the queues; this will set a new heap field 
    // so threads may do delayed frees in either heap for a while.
    // note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state
    // so after this only the new heap will get delayed frees
    for (size_t i = 0; i <= MI_BIN_FULL; i++) {
        mi_page_queue_t* pq = &heap->pages[i];
        mi_page_queue_t* append = &from->pages[i];
        size_t pcount = _mi_page_queue_append(heap, pq, append);
        heap->page_count += pcount;
        from->page_count -= pcount;
    }

    // and do outstanding delayed frees in the `from` heap  
    // note: be careful here as the `heap` field in all those pages no longer point to `from`,
    // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a 
    // the regular `_mi_free_delayed_block` which is safe.
    _mi_heap_delayed_free(from);  

    // and reset the `from` heap
    mi_heap_reset_pages(from);  
}

mi_heap_absorb 將有用到的 page queue append 到目標 heap 上,再用 mi_heap_rest_pages 把 heap 內的 page 歸 0。

// called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources.
static void mi_heap_free(mi_heap_t* heap) {
    if (heap==NULL || !mi_heap_is_initialized(heap)) return;
    if (mi_heap_is_backing(heap)) return; // dont free the backing heap

    // reset default
    if (mi_heap_is_default(heap)) {
        _mi_heap_set_default_direct(heap->tld->heap_backing);
    }

    // remove ourselves from the thread local heaps list
    // linear search but we expect the number of heaps to be relatively small
    mi_heap_t* prev = NULL;
    mi_heap_t* curr = heap->tld->heaps; 
    while (curr != heap && curr != NULL) {
        prev = curr;
        curr = curr->next;
    }
    if (curr == heap) {
        if (prev != NULL) { prev->next = heap->next; }
                     else { heap->tld->heaps = heap->next; }
    }

    // and free the used memory
    mi_free(heap);
}

mi_heap_free 設定 default heap 並將 heap 從 tld->heaps 中移除,最後利用 mi_free 釋放 heap 空間,因為 internal heap 與一般的 block 一樣記憶體是存放在 page 當中,故與一般釋放 block 一樣透過 mi_free 即可釋放。

void mi_heap_destroy(mi_heap_t* heap) {
    if (heap==NULL || !mi_heap_is_initialized(heap)) return;
    if (!heap->no_reclaim) {
        // don't free in case it may contain reclaimed pages
        mi_heap_delete(heap);
    }
    else {
        // free all pages
        _mi_heap_destroy_pages(heap);
        mi_heap_free(heap);
    }
}

void _mi_heap_destroy_pages(mi_heap_t* heap) {
    mi_heap_visit_pages(heap, &_mi_heap_page_destroy, NULL, NULL);
    mi_heap_reset_pages(heap);
}

mi_heap_destroymi_heap_delete 不同的地方在於會把 heap 內使用的空間全部釋放,透過呼叫 _mi_heap_destroy_pages 來釋放每個 page。_mi_heap_destroy_page 會逐一走訪 heap 上的每個 page,並對每個 page 執行 _mi_heap_page_destroy 函式:

static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
    // ensure no more thread_delayed_free will be added
    _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);

    /// pretend it is all free now
    page->used = 0;

    // and free the page
    // mi_page_free(page,false);
    page->next = NULL;
    page->prev = NULL;
    _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments);

    return true; // keep going
}

_mi_heap_page_destroy 呼叫 _mi_segment_page_free 來將 segment 內的該 page 移除。