Try   HackMD

Linux Netfilter

tags: linux netfilter kernel module

Netfilter hooks and packet flow

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →
see: https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/netfilter.h#L42

There are five hook point that your can hook to capture network packet.

  • PRE_ROUTING: get the packets, regardless if the packet is for locol processes. The forwarded packets, as the result, can be captured at this point
  • LOCAL_INPUT: packets sent to higher layers local processes, eg. SMB packet
  • LOCAL_OUTPUT: packets sent from higher layers local processes
  • FORWARDED: Forwarded packets
  • POST_ROUTING: Every packets out

你可以決定要在哪個點 hook PRE_ROUTING 就是進來的所有封包 LOCOL_INPUT 是指只有送給你的上層 application 的封包, 不包含 forwarded 封包 FORWARDED 就是指路過但不是給你的封包 LOCOL_OUTPUT 跟 POST_ROUTING 就不用說了吧 ztex

How does netfilter work? struct nf_hook_ops

see:

see:https://elixir.bootlin.com/linux/v4.4.180/source/include/linux/netfilter.h#L171 nf_hook_thresh - call a netfilter hook, is triggered and in charge of executing hook function.

  • NF_IP_PRE_ROUTING
    Image Not Showing Possible Reasons
    • The image file may be corrupted
    • The server hosting the image is unavailable
    • The image path is incorrect
    • The image format is not supported
    Learn More →
  • NF_IP_LOCAL_IN
    Image Not Showing Possible Reasons
    • The image file may be corrupted
    • The server hosting the image is unavailable
    • The image path is incorrect
    • The image format is not supported
    Learn More →
  • NF_IP_FORWARD
    Image Not Showing Possible Reasons
    • The image file may be corrupted
    • The server hosting the image is unavailable
    • The image path is incorrect
    • The image format is not supported
    Learn More →
  • NF_IP_LOCAL_OUT
    Image Not Showing Possible Reasons
    • The image file may be corrupted
    • The server hosting the image is unavailable
    • The image path is incorrect
    • The image format is not supported
    Learn More →
  • NF_IP_POST_ROUTING
    Image Not Showing Possible Reasons
    • The image file may be corrupted
    • The server hosting the image is unavailable
    • The image path is incorrect
    • The image format is not supported
    Learn More →

note that netfilter mechanism has changed a lot as linux version changes. This aritcle is test under linux 4.4. 180

ztex

make a netfilter filter out all smb packet with payload not all containing 0xff

see my repo: https://github.com/tony2037/synonetfilter

SMB2 spec

see:

code

#include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/ip.h> #include <linux/tcp.h> #include <linux/udp.h> #include "xxxxnetfilter.h" ... static struct nf_hook_ops *nfhoIN = NULL; static struct nf_hook_ops *nfhoOUT = NULL; static unsigned int filterSMB2Payload(unsigned char *data, unsigned int len) { size_t i = 0; unsigned char distill = 0xff; for (i = 0; i < len; i++) { distill = distill & data[i]; } if (distill == 0xff) { return 0; } else { return 1; } } static void printSignificantPayload(unsigned char *data, unsigned int len, unsigned int range) { ... } static unsigned int getSMB2Header(struct sk_buff *skb, struct SMB2_HEADER *smbhdr) { ... } static unsigned int hfuncIN(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { ... } static unsigned int hfuncOUT(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct iphdr *iph = NULL; struct tcphdr *tcph = NULL; struct SMB2_HEADER smbhdr = {0}; if (!skb) { return NF_ACCEPT; } iph = ip_hdr(skb); if (iph->protocol == IPPROTO_TCP) { tcph = tcp_hdr(skb); if (ntohs(tcph->source) == 445) { // ntohs(x) __be16_to_cpu(x) ... if (!getSMB2Header(skb, &smbhdr)) { goto forward; } ... } goto forward; } else if (iph->protocol == IPPROTO_UDP) { goto forward; } return NF_DROP; forward: return NF_ACCEPT; } static int __init xxxxNETFILTER_init(void) { nfhoIN = (struct nf_hook_ops*)kcalloc(1, sizeof(struct nf_hook_ops), GFP_KERNEL); nfhoOUT = (struct nf_hook_ops*)kcalloc(1, sizeof(struct nf_hook_ops), GFP_KERNEL); ... nfhoOUT->hook = (nf_hookfn*)hfuncOUT; /* hook function */ nfhoOUT->hooknum = NF_INET_LOCAL_OUT; /* received packets */ nfhoOUT->pf = PF_INET; /* IPv4 */ nfhoOUT->priority = NF_IP_PRI_FIRST; /* max hook priority */ nf_register_net_hook(&init_net, nfhoIN); nf_register_net_hook(&init_net, nfhoOUT); return 0; } static void __exit xxxxNETFILTER_exit(void) { nf_unregister_net_hook(&init_net, nfhoIN); kfree(nfhoIN); nf_unregister_net_hook(&init_net, nfhoOUT); kfree(nfhoOUT); } module_init(xxxxNETFILTER_init); module_exit(xxxxNETFILTER_exit);

在 linux kernel 中要寫一個 netfilter, 一個方便的方式就是寫一支註冊 net hook 的 kernel module 定義 hook operations 的 structure 就是 struct nf_hook_ops 把這個結構體的 hook (hook callback function), hooknum(hook 的位置), pf(protocol family), priority 填好 然後 call nf_register_net_hook (記得 unregister, kfree) ztex

這邊有個 dump tcp payload 的小範例 (不考慮 nonlinear) https://stackoverflow.com/questions/29553990/print-tcp-packet-data

struct sk_buff

see: https://people.cs.clemson.edu/~westall/853/notes/skbuff.pdf also see: https://www.kernel.org/doc/html/v4.14/networking/kapi.html also see: https://blog.csdn.net/ds1130071727/article/details/96908564 also see: http://www.embeddedlinux.org.cn/linux_net/0596002556/understandlni-CHP-2-SECT-1.html also see: http://abcdxyzk.github.io/download/kernel/sk_buff详解.pdf also see: https://eeepage.info/sk_buff-packet/ also see: http://vger.kernel.org/~davem/skb_data.html

hook function 長這個樣子

typedef unsigned int nf_hookfn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state);

其中 struct sk_buff 是一個重要的 structure 定義在 include/linux/skbuff.h

see: http://www.embeddedlinux.org.cn/linux_net/0596002556/understandlni-CHP-2-SECT-1.html

可以看到 struct sk_buff 是個 doubly linked list

這牽扯到 碎片化 (fragment)

顯示幾個重要的 pointer head 指向頭部 data 指向包括 header 的部份 tail 指向 data 尾部 end 指向整個結構體最後 要拿到 tail 可以這樣

tail = skb_tail_pointer(skb);

這牽扯到, 你的 config 是不是 DATA OFFSET, see: https://elixir.bootlin.com/linux/v4.4.180/source/include/linux/skbuff.h#L1769

剛提到 data 包括 header, 那要怎麼拿 header ?

#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
...
struct iphdr *iph = NULL;
struct tcphdr *tcph = NULL;
iph = ip_hdr(skb);
tcph = tcp_hdr(skb);
iph = (struct iphdr *)skb_network_header(skb);
tcph = (struct tcphdr *)skb_transport_header(skb);
  • Figure 2-4. Before and after: (a)skb_put, (b)skb_push, ©skb_pull, and (d)skb_reserve
    這邊提到 include/linux/skbuff.h 中定義的幾個重要函式

fragment

我在處理 SMB2 read response 封包時遇到一個奇怪的問題是: skbuff 的 data 包含 ip header + tcp header + NETBIOS + SMB2 header, 但就是沒有看到 payload.

結果我偶然看到 https://elixir.bootlin.com/linux/latest/source/include/linux/skbuff.h#L3285

/** * skb_linearize - convert paged skb to linear one * @skb: buffer to linarize * * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ static inline int skb_linearize(struct sk_buff *skb) { return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; }

這個函式會把 fraged skb 變成 linear 的, 直接省去我們的一堆麻煩

往下面追可以看到

static inline int __skb_linearize(struct sk_buff *skb)
{
	return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM;
}
/* Moves tail of skb head forward, copying data from fragmented part, * when it is necessary. * 1. It may fail due to malloc failure. * 2. It may change skb pointers. * * It is pretty complicated. Luckily, it is called only in exceptional cases. */ void *__pskb_pull_tail(struct sk_buff *skb, int delta) { /* If skb has not enough free space at tail, get new one * plus 128 bytes for future expansions. If we have enough * room at tail, reallocate without expansion only if skb is cloned. */ int i, k, eat = (skb->tail + delta) - skb->end; if (eat > 0 || skb_cloned(skb)) { if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, GFP_ATOMIC)) return NULL; } BUG_ON(skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)); /* Optimization: no fragments, no reasons to preestimate * size of pulled pages. Superb. */ if (!skb_has_frag_list(skb)) goto pull_pages; /* Estimate size of pulled pages. */ eat = delta; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (size >= eat) goto pull_pages; eat -= size; } /* If we need update frag list, we are in troubles. * Certainly, it is possible to add an offset to skb data, * but taking into account that pulling is expected to * be very rare operation, it is worth to fight against * further bloating skb head and crucify ourselves here instead. * Pure masohism, indeed. 8)8) */ if (eat) { struct sk_buff *list = skb_shinfo(skb)->frag_list; struct sk_buff *clone = NULL; struct sk_buff *insp = NULL; do { if (list->len <= eat) { /* Eaten as whole. */ eat -= list->len; list = list->next; insp = list; } else { /* Eaten partially. */ if (skb_shared(list)) { /* Sucks! We need to fork list. :-( */ clone = skb_clone(list, GFP_ATOMIC); if (!clone) return NULL; insp = list->next; list = clone; } else { /* This may be pulled without * problems. */ insp = list; } if (!pskb_pull(list, eat)) { kfree_skb(clone); return NULL; } break; } } while (eat); /* Free pulled out fragments. */ while ((list = skb_shinfo(skb)->frag_list) != insp) { skb_shinfo(skb)->frag_list = list->next; kfree_skb(list); } /* And insert new clone at head. */ if (clone) { clone->next = list; skb_shinfo(skb)->frag_list = clone; } } /* Success! Now we may commit changes to skb data. */ pull_pages: eat = delta; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (size <= eat) { skb_frag_unref(skb, i); eat -= size; } else { skb_frag_t *frag = &skb_shinfo(skb)->frags[k]; *frag = skb_shinfo(skb)->frags[i]; if (eat) { skb_frag_off_add(frag, eat); skb_frag_size_sub(frag, eat); if (!i) goto end; eat = 0; } k++; } } skb_shinfo(skb)->nr_frags = k; end: skb->tail += delta; skb->data_len -= delta; if (!skb->data_len) skb_zcopy_clear(skb, false); return skb_tail_pointer(skb); } EXPORT_SYMBOL(__pskb_pull_tail);

可以看到函式幫我們處理 skb_shinfo(skb) 的 frags (這個是拿到 skb 尾部的 shared info), 並!吃!(eat) 進 data_len size