--- title: libc-2.27 FILE description: Tracing source code tags: libc-2.27 lang: zh_tw --- [TOC] # libc-2.27 FILE 從 gdb 上可以看到 stdin stdout 躺在 bss 段上: ![](https://i.imgur.com/eOH97VV.png) # `_IO_2_1_stdin_` show 出 stdin 的位址,發現其名為 `_IO_2_1_stdin_` ![](https://i.imgur.com/fx0lcFG.png) 從 source code 找,找到此變數 ```c // libio/bits/libio.h extern struct _IO_FILE_plus _IO_2_1_stdin_; extern struct _IO_FILE_plus _IO_2_1_stdout_; extern struct _IO_FILE_plus _IO_2_1_stderr_; ``` 看一下 struct `_IO_FILE_plus` ```c // libio/libioP.h /* We always allocate an extra word following an _IO_FILE. This contains a pointer to the function jump table used. This is for compatibility with C++ streambuf; the word can be used to smash to a pointer to a virtual function table. */ struct _IO_FILE_plus { _IO_FILE file; const struct _IO_jump_t *vtable; }; ``` 上面的註解說了這樣設計的目的是為了與C++相容 ```c // libio/bits/libio.h struct _IO_FILE { int _flags; /* High-order word is _IO_MAGIC; rest is flags. */ #define _IO_file_flags _flags /* The following pointers correspond to the C++ streambuf protocol. */ /* Note: Tk uses the _IO_read_ptr and _IO_read_end fields directly. */ char* _IO_read_ptr; /* Current read pointer */ char* _IO_read_end; /* End of get area. */ char* _IO_read_base; /* Start of putback+get area. */ char* _IO_write_base; /* Start of put area. */ char* _IO_write_ptr; /* Current put pointer. */ char* _IO_write_end; /* End of put area. */ char* _IO_buf_base; /* Start of reserve area. */ char* _IO_buf_end; /* End of reserve area. */ /* The following fields are used to support backing up and undo. */ char *_IO_save_base; /* Pointer to start of non-current get area. */ char *_IO_backup_base; /* Pointer to first valid character of backup area */ char *_IO_save_end; /* Pointer to end of non-current get area. */ struct _IO_marker *_markers; struct _IO_FILE *_chain; int _fileno; #if 0 int _blksize; #else int _flags2; #endif _IO_off_t _old_offset; /* This used to be _offset but it's too small. */ #define __HAVE_COLUMN /* temporary */ /* 1+column number of pbase(); 0 is unknown. */ unsigned short _cur_column; signed char _vtable_offset; char _shortbuf[1]; /* char* _save_gptr; char* _save_egptr; */ _IO_lock_t *_lock; #ifdef _IO_USE_OLD_IO_FILE }; ``` ```c // libio/libioP.h struct _IO_jump_t { JUMP_FIELD(size_t, __dummy); JUMP_FIELD(size_t, __dummy2); JUMP_FIELD(_IO_finish_t, __finish); JUMP_FIELD(_IO_overflow_t, __overflow); JUMP_FIELD(_IO_underflow_t, __underflow); JUMP_FIELD(_IO_underflow_t, __uflow); JUMP_FIELD(_IO_pbackfail_t, __pbackfail); /* showmany */ JUMP_FIELD(_IO_xsputn_t, __xsputn); JUMP_FIELD(_IO_xsgetn_t, __xsgetn); JUMP_FIELD(_IO_seekoff_t, __seekoff); JUMP_FIELD(_IO_seekpos_t, __seekpos); JUMP_FIELD(_IO_setbuf_t, __setbuf); JUMP_FIELD(_IO_sync_t, __sync); JUMP_FIELD(_IO_doallocate_t, __doallocate); JUMP_FIELD(_IO_read_t, __read); JUMP_FIELD(_IO_write_t, __write); JUMP_FIELD(_IO_seek_t, __seek); JUMP_FIELD(_IO_close_t, __close); JUMP_FIELD(_IO_stat_t, __stat); JUMP_FIELD(_IO_showmanyc_t, __showmanyc); JUMP_FIELD(_IO_imbue_t, __imbue); #if 0 get_column; set_column; #endif }; ``` 大概了解 struct 後,來看看 scanf、gets、fgets 之類會用到 stdin 的 function 是如何運作的。 # scanf ```c // stdio-common/isoc99_scanf.c /* Read formatted input from stdin according to the format string FORMAT. */ /* VARARGS1 */ int __isoc99_scanf (const char *format, ...) { va_list arg; int done; #ifdef _IO_MTSAFE_IO _IO_acquire_lock_clear_flags2 (stdin); #endif stdin->_flags2 |= _IO_FLAGS2_SCANF_STD; va_start (arg, format); done = _IO_vfscanf (stdin, format, arg, NULL); va_end (arg); #ifdef _IO_MTSAFE_IO _IO_release_lock (stdin); #endif return done; } ``` - `_flags2` 加了 `_IO_FLAGS2_SCANF_STD` - 實際用 gdb trace 後,發現進入 `_IO_vfscanf` 的部分是 call `_IO_vfscanf_internal`,隨後也找到了以下 code ```c // stdio-common/vfscanf.c ldbl_strong_alias (_IO_vfscanf_internal, _IO_vfscanf) ldbl_hidden_def (_IO_vfscanf_internal, _IO_vfscanf) ``` ```c // stdio-common/vfscanf.c int _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr, int *errp) #endif { va_list arg; const CHAR_T *f = format; UCHAR_T fc; /* Current character of the format. */ WINT_T done = 0; /* Assignments done. */ size_t read_in = 0; /* Chars read in. */ WINT_T c = 0; /* Last char read. */ int width; /* Maximum field width. */ int flags; /* Modifiers for current format element. */ int errval = 0; #ifndef COMPILE_WSCANF locale_t loc = _NL_CURRENT_LOCALE; struct __locale_data *const curctype = loc->__locales[LC_CTYPE]; #endif /* Errno of last failed inchar call. */ int inchar_errno = 0; /* Status for reading F-P nums. */ char got_digit, got_dot, got_e, negative; /* If a [...] is a [^...]. */ CHAR_T not_in; #define exp_char not_in /* Base for integral numbers. */ int base; /* Decimal point character. */ #ifdef COMPILE_WSCANF wint_t decimal; #else const char *decimal; #endif /* The thousands character of the current locale. */ #ifdef COMPILE_WSCANF wint_t thousands; #else const char *thousands; #endif struct ptrs_to_free *ptrs_to_free = NULL; /* State for the conversions. */ mbstate_t state; /* Integral holding variables. */ union { long long int q; unsigned long long int uq; long int l; unsigned long int ul; } num; /* Character-buffer pointer. */ char *str = NULL; wchar_t *wstr = NULL; char **strptr = NULL; ssize_t strsize = 0; /* We must not react on white spaces immediately because they can possibly be matched even if in the input stream no character is available anymore. */ int skip_space = 0; /* Workspace. */ CHAR_T *tw; /* Temporary pointer. */ struct char_buffer charbuf; scratch_buffer_init (&charbuf.scratch); #ifdef __va_copy __va_copy (arg, argptr); #else arg = (va_list) argptr; #endif #ifdef ORIENT ORIENT; #endif ``` - 設定一大坨區域變數 ```c ARGCHECK (s, format); ``` - 作基本檢查 - 檢查 `s->_IO_file_flags & _IO_MAGIC_MASK != _IO_MAGIC` - 也就是 `s->_flags & 0xffff0000 != 0xFBAD0000` - 不一樣就直接 return - 檢查 `if (s->_flags & _IO_NO_READS)` - `_IO_NO_READS` 為 `4` - 啟用 `_IO_NO_READS` 表示不能讀,若通過此 if 則 return -1 - 檢查 `else if (format == NULL)` - 通過此 if 也 return -1 :::info 此階段可知道,若要能繼續執行 scanf,則 - `s->_flags` 要 `0xFBADXXXX` 開頭 - `s->_flags & 4` 需要等於 0 - `format` 不能是 `NULL` ::: ```c { #ifndef COMPILE_WSCANF struct __locale_data *const curnumeric = loc->__locales[LC_NUMERIC]; #endif /* Figure out the decimal point character. */ #ifdef COMPILE_WSCANF decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC); #else decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string; #endif /* Figure out the thousands separator character. */ #ifdef COMPILE_WSCANF thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC); #else thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string; if (*thousands == '\0') thousands = NULL; #endif } ``` - ???? - 感覺不是很重要 ```c /* Lock the stream. */ LOCK_STREAM (s); ``` ```c // stdio-common/vfscanf.c #define LOCK_STREAM(S) \ __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \ _IO_flockfile (S) ``` ```c // sysdeps/generic/libc-lock.h /* Start a critical region with a cleanup function */ #define __libc_cleanup_region_start(DOIT, FCT, ARG) \ { \ typeof (***(FCT)) *__save_FCT = (DOIT) ? (FCT) : 0; \ typeof (ARG) __save_ARG = ARG; \ /* close brace is in __libc_cleanup_region_end below. */ /* End a critical region started with __libc_cleanup_region_start. */ #define __libc_cleanup_region_end(DOIT) \ if ((DOIT) && __save_FCT != 0) \ (*__save_FCT)(__save_ARG); \ } ``` - `__libc_cleanup_region_start(DOIT, FCT, ARG)` 最後是 - `(*FCT)(ARG)` - 所以 `LOCK_STREAM(S)` 會呼叫 `_IO_funlockfile(S)` 再呼叫 `_IO_flockfile(S)` - 目前無法完全理解完這邊的code,但有追查到 `__funlockfile` 有兩個版本 ```c // sysdeps/pthread/funlockfile.c void __funlockfile (FILE *stream) { _IO_lock_unlock (*stream->_lock); } strong_alias (__funlockfile, _IO_funlockfile) weak_alias (__funlockfile, funlockfile) ``` ```c // stdio-common/funlockfile.c void __funlockfile (FILE *stream) { /* Do nothing. Using this version does not do any locking. */ } weak_alias (__funlockfile, _IO_funlockfile) weak_alias (__funlockfile, funlockfile); ``` - 猜測原因是多執行緒時才需要考慮到同步問題,進而使用到 lock 機制,單一 thread 就不需要 lock ```c #ifndef COMPILE_WSCANF /* From now on we use `state' to convert the format string. */ memset (&state, '\0', sizeof (state)); #endif /* Run through the format string. */ while (*f != '\0') { ``` - 接下來就是 parse format,這邊主要關注 string 的部分 ```c ... fc = *f++; if (fc != '%') { /* Remember to skip spaces. */ if (ISSPACE (fc)) { skip_space = 1; continue; } /* Read a character. */ c = inchar (); /* Characters other than format specs must just match. */ if (__glibc_unlikely (c == EOF)) input_error (); /* We saw white space char as the last character in the format string. Now it's time to skip all leading white space. */ if (skip_space) { while (ISSPACE (c)) if (__glibc_unlikely (inchar () == EOF)) input_error (); skip_space = 0; } if (__glibc_unlikely (c != fc)) { ungetc (c, s); conv_error (); } continue; } ``` - 這邊注意到 inchar() 實際就是輸入的部分 ```c // stdio-common/vfscanf.c # define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \ : ((c = _IO_getc_unlocked (s)), \ (void) (c != EOF \ ? ++read_in \ : (size_t) (inchar_errno = errno)), c)) ``` - c 一開始初始化為 0,不為 EOF ```c // libio/bits/libio.h #if __GNUC__ >= 3 # define _IO_BE(expr, res) __builtin_expect ((expr), res) #else # define _IO_BE(expr, res) (expr) #endif #define _IO_getc_unlocked(_fp) \ (_IO_BE ((_fp)->_IO_read_ptr >= (_fp)->_IO_read_end, 0) \ ? __uflow (_fp) : *(unsigned char *) (_fp)->_IO_read_ptr++) ``` 第一次 scanf 時,`_IO_read_ptr` 和 `_IO_read_end` 皆為 `0`,所以會走 `__uflow (_fp)` ```c // libio/genops.c int __uflow (_IO_FILE *fp) { if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1) return EOF; if (fp->_mode == 0) _IO_fwide (fp, -1); if (_IO_in_put_mode (fp)) if (_IO_switch_to_get_mode (fp) == EOF) return EOF; if (fp->_IO_read_ptr < fp->_IO_read_end) return *(unsigned char *) fp->_IO_read_ptr++; if (_IO_in_backup (fp)) { _IO_switch_to_main_get_area (fp); if (fp->_IO_read_ptr < fp->_IO_read_end) return *(unsigned char *) fp->_IO_read_ptr++; } if (_IO_have_markers (fp)) { if (save_for_backup (fp, fp->_IO_read_end)) return EOF; } else if (_IO_have_backup (fp)) _IO_free_backup_area (fp); return _IO_UFLOW (fp); } ``` ```c #if _IO_JUMPS_OFFSET # define _IO_JUMPS_FUNC(THIS) \ (IO_validate_vtable \ (*(struct _IO_jump_t **) ((void *) &_IO_JUMPS_FILE_plus (THIS) \ + (THIS)->_vtable_offset))) # define _IO_vtable_offset(THIS) (THIS)->_vtable_offset #else # define _IO_JUMPS_FUNC(THIS) (IO_validate_vtable (_IO_JUMPS_FILE_plus (THIS))) # define _IO_vtable_offset(THIS) 0 #endif ``` - 這邊我用 IDA 看是沒有對應這部分的 code,應該是沒啟用 `_IO_JUMPS_OFFSET`,所以`# define _IO_vtable_offset(THIS) 0` 讓 `_IO_vtable_offset (fp) == 0` 恆成立,編譯器優化後就直接去除這部分的 code - `_IO_fwide (fp, -1) != -1` 檢查了 `fp->_mode == -1` :::info 這邊知道了要讓 scanf 繼續下去則要滿足 `fp->_mode == -1` ::: - 一開始還不會啟用 `_IO_CURRENTLY_PUTTING` flag,所以不會進 `if (_IO_in_put_mode (fp))` - `if (fp->_IO_read_ptr < fp->_IO_read_end)` 這邊兩者都是 0 - `if (_IO_in_backup (fp))` 沒啟用 `_IO_IN_BACKUP` flag 也不會進 - `if (_IO_have_markers (fp))` `fp->_markers` 為 0 也不會進 - `else if (_IO_have_backup (fp))` `(fp)->_IO_save_base` 為 NULL 也不會進 - 最後 `return _IO_UFLOW (fp);` ```c // libio/libioP.h /* The 'uflow' hook returns the next character in the input stream (cast to unsigned char), and increments the read position; EOF is returned on failure. It matches the streambuf::uflow virtual function, which is not in the cfront implementation, but was added to C++ by the ANSI/ISO committee. */ #define _IO_UFLOW(FP) JUMP0 (__uflow, FP) #define _IO_WUFLOW(FP) WJUMP0 (__uflow, FP) #define JUMP0(FUNC, THIS) (_IO_JUMPS_FUNC(THIS)->FUNC) (THIS) ``` - 就是呼叫 `_IO_2_1_stdin_->vtable->__uflow` :::success 若能偽造整個 stdin 結構,符合以下: - `_IO_2_1_stdin_->file->_flags` 為 `0xFBADXXXX` 開頭 - `_IO_2_1_stdin_->file->_flags & 4` 等於 0 - `_IO_2_1_stdin_->file->_flags & 0x800` 等於 0 - 不啟用 `_IO_CURRENTLY_PUTTING` - `_IO_2_1_stdin_->file->_flags & 0x100` 等於 0 - 不啟用 `_IO_IN_BACKUP` - `_IO_2_1_stdin_->file->_IO_save_base` 等於 0 - `_IO_2_1_stdin_->file->_markers` 等於 0 - `_IO_2_1_stdin_->file->_IO_read_ptr` >= `_IO_2_1_stdin_->file->_IO_read_end` - `_IO_2_1_stdin_->file->_mode == -1` 那就會呼叫 `_IO_2_1_stdin_->vtable->__uflow` ::: ```c /* This is the start of the conversion string. */ flags = 0; /* Initialize state of modifiers. */ argpos = 0; /* Prepare temporary buffer. */ char_buffer_rewind (&charbuf); /* Check for a positional parameter specification. */ if (ISDIGIT ((UCHAR_T) *f)) { argpos = read_int ((const UCHAR_T **) &f); if (*f == L_('$')) ++f; else { /* Oops; that was actually the field width. */ width = argpos; argpos = 0; goto got_width; } } ``` - 若是常見的 `scanf("%20s", buf)` 用法的話 - width 變成 20 - goto got_width ```c got_width: if (width == 0) width = -1; /* Check for type modifiers. */ switch (*f++) { ... default: /* Not a recognized modifier. Backup. */ --f; break; } /* End of the format string? */ if (__glibc_unlikely (*f == L_('\0'))) conv_error (); /* Find the conversion specifier. */ fc = *f++; if (skip_space || (fc != L_('[') && fc != L_('c') && fc != L_('C') && fc != L_('n'))) { ... } switch (fc) { ... case L_('s'): /* Read a string. */ if (!(flags & LONG)) { STRING_ARG (str, char, 100); c = inchar (); if (__glibc_unlikely (c == EOF)) input_error (); ```