---
title: libc-2.27 FILE
description: Tracing source code
tags: libc-2.27
lang: zh_tw
---
[TOC]
# libc-2.27 FILE
從 gdb 上可以看到 stdin stdout 躺在 bss 段上:

# `_IO_2_1_stdin_`
show 出 stdin 的位址,發現其名為 `_IO_2_1_stdin_`

從 source code 找,找到此變數
```c
// libio/bits/libio.h
extern struct _IO_FILE_plus _IO_2_1_stdin_;
extern struct _IO_FILE_plus _IO_2_1_stdout_;
extern struct _IO_FILE_plus _IO_2_1_stderr_;
```
看一下 struct `_IO_FILE_plus`
```c
// libio/libioP.h
/* We always allocate an extra word following an _IO_FILE.
This contains a pointer to the function jump table used.
This is for compatibility with C++ streambuf; the word can
be used to smash to a pointer to a virtual function table. */
struct _IO_FILE_plus
{
_IO_FILE file;
const struct _IO_jump_t *vtable;
};
```
上面的註解說了這樣設計的目的是為了與C++相容
```c
// libio/bits/libio.h
struct _IO_FILE {
int _flags; /* High-order word is _IO_MAGIC; rest is flags. */
#define _IO_file_flags _flags
/* The following pointers correspond to the C++ streambuf protocol. */
/* Note: Tk uses the _IO_read_ptr and _IO_read_end fields directly. */
char* _IO_read_ptr; /* Current read pointer */
char* _IO_read_end; /* End of get area. */
char* _IO_read_base; /* Start of putback+get area. */
char* _IO_write_base; /* Start of put area. */
char* _IO_write_ptr; /* Current put pointer. */
char* _IO_write_end; /* End of put area. */
char* _IO_buf_base; /* Start of reserve area. */
char* _IO_buf_end; /* End of reserve area. */
/* The following fields are used to support backing up and undo. */
char *_IO_save_base; /* Pointer to start of non-current get area. */
char *_IO_backup_base; /* Pointer to first valid character of backup area */
char *_IO_save_end; /* Pointer to end of non-current get area. */
struct _IO_marker *_markers;
struct _IO_FILE *_chain;
int _fileno;
#if 0
int _blksize;
#else
int _flags2;
#endif
_IO_off_t _old_offset; /* This used to be _offset but it's too small. */
#define __HAVE_COLUMN /* temporary */
/* 1+column number of pbase(); 0 is unknown. */
unsigned short _cur_column;
signed char _vtable_offset;
char _shortbuf[1];
/* char* _save_gptr; char* _save_egptr; */
_IO_lock_t *_lock;
#ifdef _IO_USE_OLD_IO_FILE
};
```
```c
// libio/libioP.h
struct _IO_jump_t
{
JUMP_FIELD(size_t, __dummy);
JUMP_FIELD(size_t, __dummy2);
JUMP_FIELD(_IO_finish_t, __finish);
JUMP_FIELD(_IO_overflow_t, __overflow);
JUMP_FIELD(_IO_underflow_t, __underflow);
JUMP_FIELD(_IO_underflow_t, __uflow);
JUMP_FIELD(_IO_pbackfail_t, __pbackfail);
/* showmany */
JUMP_FIELD(_IO_xsputn_t, __xsputn);
JUMP_FIELD(_IO_xsgetn_t, __xsgetn);
JUMP_FIELD(_IO_seekoff_t, __seekoff);
JUMP_FIELD(_IO_seekpos_t, __seekpos);
JUMP_FIELD(_IO_setbuf_t, __setbuf);
JUMP_FIELD(_IO_sync_t, __sync);
JUMP_FIELD(_IO_doallocate_t, __doallocate);
JUMP_FIELD(_IO_read_t, __read);
JUMP_FIELD(_IO_write_t, __write);
JUMP_FIELD(_IO_seek_t, __seek);
JUMP_FIELD(_IO_close_t, __close);
JUMP_FIELD(_IO_stat_t, __stat);
JUMP_FIELD(_IO_showmanyc_t, __showmanyc);
JUMP_FIELD(_IO_imbue_t, __imbue);
#if 0
get_column;
set_column;
#endif
};
```
大概了解 struct 後,來看看 scanf、gets、fgets 之類會用到 stdin 的 function 是如何運作的。
# scanf
```c
// stdio-common/isoc99_scanf.c
/* Read formatted input from stdin according to the format string FORMAT. */
/* VARARGS1 */
int
__isoc99_scanf (const char *format, ...)
{
va_list arg;
int done;
#ifdef _IO_MTSAFE_IO
_IO_acquire_lock_clear_flags2 (stdin);
#endif
stdin->_flags2 |= _IO_FLAGS2_SCANF_STD;
va_start (arg, format);
done = _IO_vfscanf (stdin, format, arg, NULL);
va_end (arg);
#ifdef _IO_MTSAFE_IO
_IO_release_lock (stdin);
#endif
return done;
}
```
- `_flags2` 加了 `_IO_FLAGS2_SCANF_STD`
- 實際用 gdb trace 後,發現進入 `_IO_vfscanf` 的部分是 call `_IO_vfscanf_internal`,隨後也找到了以下 code
```c
// stdio-common/vfscanf.c
ldbl_strong_alias (_IO_vfscanf_internal, _IO_vfscanf)
ldbl_hidden_def (_IO_vfscanf_internal, _IO_vfscanf)
```
```c
// stdio-common/vfscanf.c
int
_IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
int *errp)
#endif
{
va_list arg;
const CHAR_T *f = format;
UCHAR_T fc; /* Current character of the format. */
WINT_T done = 0; /* Assignments done. */
size_t read_in = 0; /* Chars read in. */
WINT_T c = 0; /* Last char read. */
int width; /* Maximum field width. */
int flags; /* Modifiers for current format element. */
int errval = 0;
#ifndef COMPILE_WSCANF
locale_t loc = _NL_CURRENT_LOCALE;
struct __locale_data *const curctype = loc->__locales[LC_CTYPE];
#endif
/* Errno of last failed inchar call. */
int inchar_errno = 0;
/* Status for reading F-P nums. */
char got_digit, got_dot, got_e, negative;
/* If a [...] is a [^...]. */
CHAR_T not_in;
#define exp_char not_in
/* Base for integral numbers. */
int base;
/* Decimal point character. */
#ifdef COMPILE_WSCANF
wint_t decimal;
#else
const char *decimal;
#endif
/* The thousands character of the current locale. */
#ifdef COMPILE_WSCANF
wint_t thousands;
#else
const char *thousands;
#endif
struct ptrs_to_free *ptrs_to_free = NULL;
/* State for the conversions. */
mbstate_t state;
/* Integral holding variables. */
union
{
long long int q;
unsigned long long int uq;
long int l;
unsigned long int ul;
} num;
/* Character-buffer pointer. */
char *str = NULL;
wchar_t *wstr = NULL;
char **strptr = NULL;
ssize_t strsize = 0;
/* We must not react on white spaces immediately because they can
possibly be matched even if in the input stream no character is
available anymore. */
int skip_space = 0;
/* Workspace. */
CHAR_T *tw; /* Temporary pointer. */
struct char_buffer charbuf;
scratch_buffer_init (&charbuf.scratch);
#ifdef __va_copy
__va_copy (arg, argptr);
#else
arg = (va_list) argptr;
#endif
#ifdef ORIENT
ORIENT;
#endif
```
- 設定一大坨區域變數
```c
ARGCHECK (s, format);
```
- 作基本檢查
- 檢查 `s->_IO_file_flags & _IO_MAGIC_MASK != _IO_MAGIC`
- 也就是 `s->_flags & 0xffff0000 != 0xFBAD0000`
- 不一樣就直接 return
- 檢查 `if (s->_flags & _IO_NO_READS)`
- `_IO_NO_READS` 為 `4`
- 啟用 `_IO_NO_READS` 表示不能讀,若通過此 if 則 return -1
- 檢查 `else if (format == NULL)`
- 通過此 if 也 return -1
:::info
此階段可知道,若要能繼續執行 scanf,則
- `s->_flags` 要 `0xFBADXXXX` 開頭
- `s->_flags & 4` 需要等於 0
- `format` 不能是 `NULL`
:::
```c
{
#ifndef COMPILE_WSCANF
struct __locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
#endif
/* Figure out the decimal point character. */
#ifdef COMPILE_WSCANF
decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
#else
decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
#endif
/* Figure out the thousands separator character. */
#ifdef COMPILE_WSCANF
thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
#else
thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
if (*thousands == '\0')
thousands = NULL;
#endif
}
```
- ????
- 感覺不是很重要
```c
/* Lock the stream. */
LOCK_STREAM (s);
```
```c
// stdio-common/vfscanf.c
#define LOCK_STREAM(S) \
__libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
_IO_flockfile (S)
```
```c
// sysdeps/generic/libc-lock.h
/* Start a critical region with a cleanup function */
#define __libc_cleanup_region_start(DOIT, FCT, ARG) \
{ \
typeof (***(FCT)) *__save_FCT = (DOIT) ? (FCT) : 0; \
typeof (ARG) __save_ARG = ARG; \
/* close brace is in __libc_cleanup_region_end below. */
/* End a critical region started with __libc_cleanup_region_start. */
#define __libc_cleanup_region_end(DOIT) \
if ((DOIT) && __save_FCT != 0) \
(*__save_FCT)(__save_ARG); \
}
```
- `__libc_cleanup_region_start(DOIT, FCT, ARG)` 最後是
- `(*FCT)(ARG)`
- 所以 `LOCK_STREAM(S)` 會呼叫 `_IO_funlockfile(S)` 再呼叫 `_IO_flockfile(S)`
- 目前無法完全理解完這邊的code,但有追查到 `__funlockfile` 有兩個版本
```c
// sysdeps/pthread/funlockfile.c
void
__funlockfile (FILE *stream)
{
_IO_lock_unlock (*stream->_lock);
}
strong_alias (__funlockfile, _IO_funlockfile)
weak_alias (__funlockfile, funlockfile)
```
```c
// stdio-common/funlockfile.c
void
__funlockfile (FILE *stream)
{
/* Do nothing. Using this version does not do any locking. */
}
weak_alias (__funlockfile, _IO_funlockfile)
weak_alias (__funlockfile, funlockfile);
```
- 猜測原因是多執行緒時才需要考慮到同步問題,進而使用到 lock 機制,單一 thread 就不需要 lock
```c
#ifndef COMPILE_WSCANF
/* From now on we use `state' to convert the format string. */
memset (&state, '\0', sizeof (state));
#endif
/* Run through the format string. */
while (*f != '\0')
{
```
- 接下來就是 parse format,這邊主要關注 string 的部分
```c
...
fc = *f++;
if (fc != '%')
{
/* Remember to skip spaces. */
if (ISSPACE (fc))
{
skip_space = 1;
continue;
}
/* Read a character. */
c = inchar ();
/* Characters other than format specs must just match. */
if (__glibc_unlikely (c == EOF))
input_error ();
/* We saw white space char as the last character in the format
string. Now it's time to skip all leading white space. */
if (skip_space)
{
while (ISSPACE (c))
if (__glibc_unlikely (inchar () == EOF))
input_error ();
skip_space = 0;
}
if (__glibc_unlikely (c != fc))
{
ungetc (c, s);
conv_error ();
}
continue;
}
```
- 這邊注意到 inchar() 實際就是輸入的部分
```c
// stdio-common/vfscanf.c
# define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
: ((c = _IO_getc_unlocked (s)), \
(void) (c != EOF \
? ++read_in \
: (size_t) (inchar_errno = errno)), c))
```
- c 一開始初始化為 0,不為 EOF
```c
// libio/bits/libio.h
#if __GNUC__ >= 3
# define _IO_BE(expr, res) __builtin_expect ((expr), res)
#else
# define _IO_BE(expr, res) (expr)
#endif
#define _IO_getc_unlocked(_fp) \
(_IO_BE ((_fp)->_IO_read_ptr >= (_fp)->_IO_read_end, 0) \
? __uflow (_fp) : *(unsigned char *) (_fp)->_IO_read_ptr++)
```
第一次 scanf 時,`_IO_read_ptr` 和 `_IO_read_end` 皆為 `0`,所以會走 `__uflow (_fp)`
```c
// libio/genops.c
int
__uflow (_IO_FILE *fp)
{
if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
return EOF;
if (fp->_mode == 0)
_IO_fwide (fp, -1);
if (_IO_in_put_mode (fp))
if (_IO_switch_to_get_mode (fp) == EOF)
return EOF;
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr++;
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr++;
}
if (_IO_have_markers (fp))
{
if (save_for_backup (fp, fp->_IO_read_end))
return EOF;
}
else if (_IO_have_backup (fp))
_IO_free_backup_area (fp);
return _IO_UFLOW (fp);
}
```
```c
#if _IO_JUMPS_OFFSET
# define _IO_JUMPS_FUNC(THIS) \
(IO_validate_vtable \
(*(struct _IO_jump_t **) ((void *) &_IO_JUMPS_FILE_plus (THIS) \
+ (THIS)->_vtable_offset)))
# define _IO_vtable_offset(THIS) (THIS)->_vtable_offset
#else
# define _IO_JUMPS_FUNC(THIS) (IO_validate_vtable (_IO_JUMPS_FILE_plus (THIS)))
# define _IO_vtable_offset(THIS) 0
#endif
```
- 這邊我用 IDA 看是沒有對應這部分的 code,應該是沒啟用 `_IO_JUMPS_OFFSET`,所以`# define _IO_vtable_offset(THIS) 0` 讓 `_IO_vtable_offset (fp) == 0` 恆成立,編譯器優化後就直接去除這部分的 code
- `_IO_fwide (fp, -1) != -1` 檢查了 `fp->_mode == -1`
:::info
這邊知道了要讓 scanf 繼續下去則要滿足
`fp->_mode == -1`
:::
- 一開始還不會啟用 `_IO_CURRENTLY_PUTTING` flag,所以不會進 `if (_IO_in_put_mode (fp))`
- `if (fp->_IO_read_ptr < fp->_IO_read_end)` 這邊兩者都是 0
- `if (_IO_in_backup (fp))` 沒啟用 `_IO_IN_BACKUP` flag 也不會進
- `if (_IO_have_markers (fp))` `fp->_markers` 為 0 也不會進
- `else if (_IO_have_backup (fp))` `(fp)->_IO_save_base` 為 NULL 也不會進
- 最後 `return _IO_UFLOW (fp);`
```c
// libio/libioP.h
/* The 'uflow' hook returns the next character in the input stream
(cast to unsigned char), and increments the read position;
EOF is returned on failure.
It matches the streambuf::uflow virtual function, which is not in the
cfront implementation, but was added to C++ by the ANSI/ISO committee. */
#define _IO_UFLOW(FP) JUMP0 (__uflow, FP)
#define _IO_WUFLOW(FP) WJUMP0 (__uflow, FP)
#define JUMP0(FUNC, THIS) (_IO_JUMPS_FUNC(THIS)->FUNC) (THIS)
```
- 就是呼叫 `_IO_2_1_stdin_->vtable->__uflow`
:::success
若能偽造整個 stdin 結構,符合以下:
- `_IO_2_1_stdin_->file->_flags` 為 `0xFBADXXXX` 開頭
- `_IO_2_1_stdin_->file->_flags & 4` 等於 0
- `_IO_2_1_stdin_->file->_flags & 0x800` 等於 0
- 不啟用 `_IO_CURRENTLY_PUTTING`
- `_IO_2_1_stdin_->file->_flags & 0x100` 等於 0
- 不啟用 `_IO_IN_BACKUP`
- `_IO_2_1_stdin_->file->_IO_save_base` 等於 0
- `_IO_2_1_stdin_->file->_markers` 等於 0
- `_IO_2_1_stdin_->file->_IO_read_ptr` >= `_IO_2_1_stdin_->file->_IO_read_end`
- `_IO_2_1_stdin_->file->_mode == -1`
那就會呼叫 `_IO_2_1_stdin_->vtable->__uflow`
:::
```c
/* This is the start of the conversion string. */
flags = 0;
/* Initialize state of modifiers. */
argpos = 0;
/* Prepare temporary buffer. */
char_buffer_rewind (&charbuf);
/* Check for a positional parameter specification. */
if (ISDIGIT ((UCHAR_T) *f))
{
argpos = read_int ((const UCHAR_T **) &f);
if (*f == L_('$'))
++f;
else
{
/* Oops; that was actually the field width. */
width = argpos;
argpos = 0;
goto got_width;
}
}
```
- 若是常見的 `scanf("%20s", buf)` 用法的話
- width 變成 20
- goto got_width
```c
got_width:
if (width == 0)
width = -1;
/* Check for type modifiers. */
switch (*f++)
{
...
default:
/* Not a recognized modifier. Backup. */
--f;
break;
}
/* End of the format string? */
if (__glibc_unlikely (*f == L_('\0')))
conv_error ();
/* Find the conversion specifier. */
fc = *f++;
if (skip_space || (fc != L_('[') && fc != L_('c')
&& fc != L_('C') && fc != L_('n')))
{
...
}
switch (fc)
{
...
case L_('s'): /* Read a string. */
if (!(flags & LONG))
{
STRING_ARG (str, char, 100);
c = inchar ();
if (__glibc_unlikely (c == EOF))
input_error ();
```