2
linux2021
本題目檢驗學員對 高效 Web 伺服器開發 及 以 sendfile 和 splice 系統呼叫達到 Zero-Copy 的認知
以下程式碼嘗試透過「高效 Web 伺服器開發」提到的 epoll 和「以 sendfile 和 splice 系統呼叫達到 Zero-Copy」提到的 splice 系統呼叫,實作出具體而微的 Port forwarding。考慮一個情境:我們對外有一台防火牆,在 DNS 設定方面,我們設定 ftp.mydomain.com
及 www.mydomain.com
都指向這台防火牆。但我們希望所有 HTTP 連線都重新導向到內部的 192.168.0.2
這台機器上,而所有 FTP 連線都交由 192.168.0.3
來處理。這時候我們就可以使用 port forwarding 的方式來達成。對應的 NAT (Network Address Translation) 的設定如下:
redirect_port tcp 192.168.0.2:80 80
redirect_port tcp 192.168.0.3:20 20
redirect_port tcp 192.168.0.3:21 21
第 1 行的目的就是將 port 80 的 tcp 連線重新導向到 192.168.0.2
的 port 80,而第 2 和第 3 行是將 port 20 及 port 21 的連線交由 192.168.0.3
來處理。在 192.168.0.2
及 192.168.0.3
這二台機器上,我們只要設定它們的 gateway 為防火牆的 IP,例如 192.168.0.1
即可。
使用 splice 系統呼叫,我們有機會在網路介面控制器的支援下,達到 Zero-copy 資料傳輸。
原始程式碼可見 proxy.c
,其 list.h
取自 list.h,改寫自 Linux 核心原始程式碼。
假設本地機器系統 port 8081 已有網頁伺服器在等待連線。proxy
的測試方式為
$ ./proxy 8082 localhost 8081
等程式執行後,在另一個終端機畫面中輸入下列命令:
$ telnet localhost 8082
接著你就可以輸入 HTTP 請求字串,如 GET /index.html
。
此外,你還可以把 port 8082 轉向到 Google 首頁:
先找出 www.google.com
的 IP 地址:
$ nslookup www.google.com
得到以下輸出:
Name: www.google.com
Address: 172.217.27.132
修改上述命令:
./proxy 8082 172.217.27.132 80
重複上述 telnet
命令,這時候就會看到 Google 首頁的字串。
以下是 proxy.c
程式碼列表
/* Simple port forwarder
* Uses pipes to splice two sockets together. This should give something
* approaching zero copy, if the NIC driver is capable.
* This method is rather file descriptor intensive (4 fds/connection), so
* make sure you have enough.
*/
#define _GNU_SOURCE 1
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <netdb.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <time.h>
#include <unistd.h>
#include "list.h"
#define err(x) perror(x), exit(1)
#define NEW(x) ((x) = xmalloc(sizeof(*(x))))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
static int connection_timeout = 5; /* FIXME: configurable */
static void oom(void)
{
fprintf(stderr, "Out of memory\n");
exit(1);
}
void *xmalloc(size_t size)
{
void *p = calloc(size, 1);
if (!p) oom();
return p;
}
void *xrealloc(void *old, size_t size)
{
void *p = realloc(old, size);
if (!p) oom();
return p;
}
struct addrinfo *resolve(char *name, char *port, int flags)
{
struct addrinfo *adr;
struct addrinfo hint = {.ai_flags = flags};
int ret = getaddrinfo(name, port, &hint, &adr);
if (ret) {
fprintf(stderr, "proxy: Cannot resolve %s %s: %s\n", name, port,
gai_strerror(ret));
exit(1);
}
return adr;
}
void setnonblock(int fd, int *cache)
{
int flags;
if (!cache || *cache == -1) {
flags = fcntl(fd, F_GETFL, 0);
if (cache) *cache = flags;
} else
flags = *cache;
fcntl(fd, F_SETFL, flags | O_NONBLOCK);
}
struct buffer {
int pipe[2];
int bytes;
};
struct conn {
struct conn *other;
int fd;
struct buffer buf;
time_t expire;
struct list_head expire_node;
};
LIST_HEAD(expire_list);
#define MIN_EVENTS 32
struct epoll_event *events;
int num_events, max_events;
int epoll_add(int efd, int fd, int revents, void *conn)
{
struct epoll_event ev = {.events = revents, .data.ptr = conn};
if (EEE >= max_events) {
max_events = MAX(max_events * 2, MIN_EVENTS);
events = xrealloc(events, sizeof(struct epoll_event) * max_events);
}
return epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev);
}
int epoll_del(int efd, int fd)
{
num_events--;
assert(num_events >= 0);
return epoll_ctl(efd, EPOLL_CTL_DEL, fd, (void *) 1L);
}
/* Create buffer between two connections */
struct buffer *newbuffer(struct buffer *buf)
{
if (pipe2(buf->pipe, O_NONBLOCK) < 0) {
perror("pipe");
return NULL;
}
return buf;
}
void delbuffer(struct buffer *buf)
{
close(buf->pipe[0]);
close(buf->pipe[1]);
}
void delconn(int efd, struct conn *conn)
{
list_del(&conn->expire_node);
delbuffer(&conn->buf);
epoll_del(efd, conn->fd);
close(conn->fd);
free(conn);
}
struct conn *newconn(int efd, int fd, time_t now)
{
struct conn *conn;
NEW(conn);
conn->fd = fd;
if (!newbuffer(&conn->buf)) {
delconn(efd, conn);
return NULL;
}
if (epoll_add(efd, fd, EPOLLIN | EPOLLOUT | EPOLLET, conn) < 0) {
perror("epoll");
delconn(efd, conn);
return NULL;
}
conn->expire = now + connection_timeout;
list_add_tail(&conn->expire_node, &expire_list);
return conn;
}
/* Process incoming connection. */
void new_request(int efd, int lfd, int *cache, time_t now)
{
int newsk = accept(lfd, NULL, NULL);
if (newsk < 0) {
perror("accept");
return;
}
setnonblock(newsk, cache);
newconn(efd, newsk, now);
}
/* Open outgoing connection */
struct conn *openconn(int efd,
struct addrinfo *host,
int *cache,
struct conn *other,
time_t now)
{
int outfd = socket(host->ai_family, SOCK_STREAM, 0);
if (outfd < 0) return NULL;
setnonblock(outfd, cache);
int n = connect(outfd, host->ai_addr, host->ai_addrlen);
if (n < 0 && errno != EINPROGRESS) {
perror("connect");
close(outfd);
return NULL;
}
struct conn *conn = newconn(efd, outfd, now);
if (conn) {
conn->other = other;
other->other = conn;
}
return conn;
}
#define BUFSZ 16384 /* FIXME: configurable */
/* Move from socket to pipe */
bool move_data_in(int srcfd, struct buffer *buf)
{
for (;;) {
int n = splice(srcfd, NULL, buf->pipe[1], NULL, BUFSZ,
SPLICE_F_NONBLOCK | SPLICE_F_MOVE);
if (n > 0) buf->bytes += n;
if (n == 0) return false;
if (n < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) return true;
return false;
}
}
return true;
}
/* From pipe to socket */
bool move_data_out(struct buffer *buf, int dstfd)
{
while (buf->bytes > 0) {
int bytes = buf->bytes;
if (bytes > BUFSZ) bytes = BUFSZ;
int n = splice(buf->pipe[0], NULL, dstfd, NULL, bytes,
SPLICE_F_NONBLOCK | SPLICE_F_MOVE);
if (n == 0) break;
if (n < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) break;
return false;
}
FFF;
}
/* bytes > 0, add dst to epoll set. Otherwise, remove if it was added */
return true;
}
void closeconn(int efd, struct conn *conn)
{
if (conn->other) delconn(efd, conn->other);
delconn(efd, conn);
}
int expire_connections(int efd, time_t now)
{
struct conn *conn, *tmp;
list_for_each_entry_safe (conn, tmp, &expire_list, expire_node) {
if (conn->expire > now) return (conn->expire - now) * 1000;
closeconn(efd, conn);
}
return -1;
}
void touch_conn(struct conn *conn, time_t now)
{
conn->expire = now + connection_timeout;
list_del(&conn->expire_node);
list_add_tail(&conn->expire_node, &expire_list);
}
int listen_socket(int efd, char *lname, char *port)
{
struct addrinfo *laddr = resolve(lname, port, AI_PASSIVE);
int lfd = socket(laddr->ai_family, SOCK_STREAM, 0);
if (lfd < 0) err("socket");
int opt = 1;
if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(int)) < 0)
err("SO_REUSEADDR");
if (bind(lfd, laddr->ai_addr, laddr->ai_addrlen) < 0) err("bind");
if (listen(lfd, 20) < 0) err("listen");
setnonblock(lfd, NULL);
freeaddrinfo(laddr);
if (epoll_add(efd, lfd, EPOLLIN, NULL) < 0) err("epoll add listen fd");
return lfd;
}
int main(int ac, char **av)
{
if (ac != 4 && ac != 5) {
fprintf(stderr, "Usage: proxy inport outhost outport [listenaddr]\n");
exit(1);
}
struct addrinfo *outhost = resolve(av[2], av[3], 0);
int efd = epoll_create(10);
if (efd < 0) err("epoll_create");
int lfd = listen_socket(efd, av[4] ? av[4] : "0.0.0.0", av[1]);
int cache_in = -1, cache_out = -1;
int timeo = -1;
for (;;) {
int nfds = epoll_wait(efd, events, num_events, timeo);
if (nfds < 0) {
perror("epoll");
continue;
}
time_t now = time(NULL);
for (int i = 0; i < nfds; i++) {
struct epoll_event *ev = &events[i];
struct conn *conn = ev->data.ptr;
/* listen socket */
if (!conn) {
if (ev->events & EPOLLIN) new_request(efd, lfd, &cache_in, now);
continue;
}
if (ev->events & (EPOLLERR | EPOLLHUP)) {
closeconn(efd, conn);
continue;
}
struct conn *other = conn->other;
/* No attempt for partial close right now */
if (ev->events & EPOLLIN) {
touch_conn(conn, now);
if (!other)
other = openconn(efd, outhost, &cache_out, conn, now);
bool in = move_data_in(conn->fd, &conn->buf);
bool out = move_data_out(&conn->buf, other->fd);
if (!in || !out) {
closeconn(efd, conn);
continue;
}
touch_conn(other, now);
}
if ((ev->events & EPOLLOUT) && other) {
if (!move_data_out(&other->buf, conn->fd))
delconn(efd, conn);
else
touch_conn(conn, now);
/* When the pipe filled up could have lost input events.
* Unfortunately, splice does not tell us which end was
* responsible for 0, so we have to ask explicitely.
*/
int len = 0;
if (ioctl(other->fd, FIONREAD, &len) < 0) perror("ioctl");
if (len > 0) {
if (!move_data_in(other->fd, &other->buf))
closeconn(efd, other);
}
}
}
timeo = expire_connections(efd, now);
}
return 0;
}
請補完程式碼。
作答區
EEE = ?
(a)
num_events
(b)
num_events++
(c)
num_events--
(d)
++num_events
(e)
--num_events
FFF = ?
(a)
不需要加入程式碼(b)
buf->bytes += n
(c)
buf->bytes -= n
(d)
buf->bytes--
(e)
buf->bytes++
延伸問題:
or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up