# 以 `cpfile.c` 來探討 CSAPP RIO package :::warning TODO: Try the explain usrbuf v.s. internal buf ::: # 大綱 [TOC] * Relation of function call between all this functions in `cpfile.c`: ```shell cpfile.c └── Rio_readlineb() └── rio_readlineb() └── rio_read() └── read() └── Rio_writen() └── rio_writen() └── write() ``` ### `cpfile.c` 分析 :::info * purpose: * in the while loop * robustly read a line from internal buffer `rio` to userbuf `buf` * write `n` bytes from `buf` to destination `fd` (STDOUT) ::: ```cpp #include "csapp.h" //P604 int main(int argc, char **argv) { int n; rio_t rio; char buf[MAXLINE]; Rio_readinitb(&rio, STDIN_FILENO); while((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) Rio_writen(STDOUT_FILENO, buf, n); } ``` ### `Rio_readlineb()` 分析 :::info wrapper function of `rio_readlineb()` * just a `rio_readlineb()` plus some error check ::: ```cpp ssize_t Rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen) { ssize_t rc; if ((rc = rio_readlineb(rp, usrbuf, maxlen)) < 0) unix_error("Rio_readlineb error"); return rc; } ``` ### `rio_readlineb() 分析` :::info * What's the perpose of `rio_readlineb()`? * robustly read a text line (buffered) * buffered: means we use `rio_t` type structure as buffer ```cpp /* Persistent state for the robust I/O (Rio) package */ #define RIO_BUFSIZE 8192 typedef struct { int rio_fd; /* descriptor for this internal buf */ int rio_cnt; /* unread bytes in internal buf */ char *rio_bufptr; /* next unread byte in internal buf */ char rio_buf[RIO_BUFSIZE]; /* internal buffer */ } rio_t; ``` * What's the input parameter? * `*rp` : pointer to internal buffer (`rio_t`) * `usrbuf` : destination to transfer * `maxlen` : max length of character in a line ::: Code ```cpp=1 /* * rio_readlineb - robustly read a text line (buffered) */ ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen) { int n, rc; char c, *bufp = usrbuf; for (n = 1; n < maxlen; n++) { if ((rc = rio_read(rp, &c, 1)) == 1) { *bufp++ = c; if (c == '\n') break; } else if (rc == 0) { if (n == 1) return 0; /* EOF, no data read */ else break; /* EOF, some data was read */ } else return -1; /* error */ } *bufp = 0; return n; } ``` --- ### `rio_read()` 分析 Code: * What is `rio_read()`? * buffered version of Unix `read()` ```cpp=1 /* * rio_read - This is a wrapper for the Unix read() function that * transfers min(n, rio_cnt) bytes from an internal buffer to a user * buffer, where n is the number of bytes requested by the user and * rio_cnt is the number of unread bytes in the internal buffer. On * entry, rio_read() refills the internal buffer via a call to * read() if the internal buffer is empty. */ static ssize_t rio_read(rio_t *rp, char *usrbuf, size_t n) { int cnt; while (rp->rio_cnt <= 0) { /* Refill if buf is empty */ rp->rio_cnt = read(rp->rio_fd, rp->rio_buf, sizeof(rp->rio_buf)); if (rp->rio_cnt < 0) { if (errno != EINTR) /* Interrupted by sig handler return */ return -1; } else if (rp->rio_cnt == 0) /* EOF */ return 0; else rp->rio_bufptr = rp->rio_buf; /* Reset buffer ptr */ } /* Copy min(n, rp->rio_cnt) bytes from internal buf to user buf */ cnt = n; if (rp->rio_cnt < n) cnt = rp->rio_cnt; memcpy(usrbuf, rp->rio_bufptr, cnt); rp->rio_bufptr += cnt; rp->rio_cnt -= cnt; return cnt; } ``` :::warning 如果我們從 stdin 輸入 `hello\n` 這段文字的話 * `rio_read()` 只需 做一次 `read()` system call 來填滿 <br>internal buffer `rp->rio_buf`, <br>但是 `rio_readlineb` 為了判斷何時該停止讀取 (e.g. 遇到 newline character `\n`) 需要呼叫 6 次 `rio_read()`, 所以如果我們要讀取很多個 byte 就會出現效能問題 ::: :::success TODO: 改善 rio_package 使 rio_readlineb() 能在一次讀完一整個 單字時,就判斷是否該停止讀取,而不是每讀一個 `byte` 就要判斷是否為 `\n` or `EOF` Maybe 參考 http parser? ::: ### `Rio_writen()` 分析 :::info * Purpose: * wrapper function of `rio_writen()` * just `rio_writen()` + some error check ::: ```cpp void Rio_writen(int fd, void *usrbuf, size_t n) { if (rio_writen(fd, usrbuf, n) != n) unix_error("Rio_writen error"); } ``` ### `rio_writen()` 分析 * What's the purpose of `rio_writen()`? * write `n` bytes from `usrbuf` to destination `fd` * What’s the input parameter? * `int fd` : destination file descriptor we want to write data . * `void *usrbuf` : the source buffer * What's the function behavior? * line 11: * if `nleft` > 0, line 12 just write `nleft` bytes to destination `fd`, and assign the return value to `nwritten`, if `nwritten <= 0`, means there's no byte being written, or some error occured, * line 13: * so we check the `errno`: * if errno == EINTR * means our system call is interrupted * else * there's some other error, * return -1 * and the `errno` is set by `write()` * line 18: * we update the value of `nleft` * and move bufp to the next position in `usrbuf` we haven't writen. * What does it mean: unbuffered? * because we don't set the `rio_t` internal buf between `usrbuf` and our destination `fd`. * Note: `EINTR == 4` ```cpp=1 /* * rio_writen - robustly write n bytes (unbuffered) */ /* $begin rio_writen */ ssize_t rio_writen(int fd, void *usrbuf, size_t n) { size_t nleft = n; ssize_t nwritten; char *bufp = usrbuf; while (nleft > 0) { if ((nwritten = write(fd, bufp, nleft)) <= 0) { if (errno == EINTR) /* interrupted by sig handler return */ nwritten = 0; /* and call write() again */ else return -1; /* errno set by write() */ } nleft -= nwritten; bufp += nwritten; } return n; } /* $end rio_writen */ ``` ## `rio_readnb()` v.s. `rio_readn()` 差別 ### `rio_readn()`: ```cpp /* * rio_readnb - Robustly read n bytes (buffered) */ /* $begin rio_readnb */ ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n) { size_t nleft = n; ssize_t nread; char *bufp = usrbuf; while (nleft > 0) { if ((nread = rio_read(rp, bufp, nleft)) < 0) { if (errno == EINTR) /* interrupted by sig handler return */ nread = 0; /* call read() again */ else return -1; /* errno set by read() */ } else if (nread == 0) break; /* EOF */ nleft -= nread; bufp += nread; } return (n - nleft); /* return >= 0 */ } /* $end rio_readnb */ ``` * ### `rio_readnb()`: ```cpp /* * rio_readn - robustly read n bytes (unbuffered) */ /* $begin rio_readn */ ssize_t rio_readn(int fd, void *usrbuf, size_t n) { size_t nleft = n; ssize_t nread; char *bufp = usrbuf; while (nleft > 0) { if ((nread = read(fd, bufp, nleft)) < 0) { if (errno == EINTR) /* interrupted by sig handler return */ nread = 0; /* and call read() again */ else return -1; /* errno set by read() */ } else if (nread == 0) break; /* EOF */ nleft -= nread; bufp += nread; } return (n - nleft); /* return >= 0 */ } /* $end rio_readn */ ``` :::info * What's the difference between `rio_readn()` and `rio_readnb()`: * in Def: ```cpp ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n) ssize_t rio_writen(int fd, void *usrbuf, size_t n) ``` * `rio_readnb()` read n bytes from internal buffer pointed to by `rp` to `usrbuf` * `rio_readn()` read n bytes directly from source `fd` to `usrbuf` * That's why it called unbuffered * :question: What's the diff between `rio_readlineb()` and `rio_readnb()`? * in Definition: ```cpp ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n) ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen) ``` * `rio_readnb()` : read n bytes * `rio_readlineb()` : read a text line with `maxlen` specified ::: # 改寫 cpfile.c ## file structure ```cpp rio_read/ ├── Makefile ├── csapp.c ├── csapp.h ├── csapp.o ├── foo.txt ├── read ├── read.c └── read.o ``` ## Makefile [Makefile 筆記](/zEFzk-5zTw6-YfH8fN-eBQ?both) ```shell target := read objs := read.o csapp.o deps := $(objs:%.o=.%.o.d) CC := gcc CFLAGS := -g -pthread all: $(target) $(target): $(objs) $(CC) $(CFLAGS) -o $@ $^ %.o: %.c $(CC) $(CFLAGS) -o $@ -c -MMD -MF .$@.d $< .PHONY: generate generate: @echo "generate foo.txt..." -rm -f foo.txt # download some text from website @wget https://randomword.com/paragraph -O foo.txt .PHONY: clean clean: -rm -f $(objs) $(target) $(deps) -include $(deps) ``` :::success TODO: 改寫 read.c 使他能用 arg 來讀取檔案 ```shell $ ./read foo.txt ``` ::: ## read.c --argv ```cpp #include "csapp.h" int main(int argc, char **argv) { int n; rio_t rio; char buf[MAXLINE]; int fd1; fd1 = Open(argv[1], O_RDWR, 0); Rio_readinitb(&rio, fd1); while((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) Rio_writen(STDOUT_FILENO, buf, n); Close(fd1); } ``` # Reference: * [CMU csapp 書籍資料](http://csapp.cs.cmu.edu/3e/ics3/code/src/csapp.c)