Try   HackMD

2021q3 Homework1 (quiz1)

contributed by < linD026 >

tags: linux2021

2021 年暑期 第 1 週隨堂測驗題目


解釋程式碼運作原理

Character Device Drivers

static const struct file_operations fops = {
    .owner = THIS_MODULE,
    .open = device_open,
    .release = device_close,
    .read = device_read,
    .write = device_write,
};

#define MINOR_VERSION 1
#define DEVICE_NAME "hideproc"

static struct cdev cdev;
static struct class *hideproc_class = NULL;

int err, dev_major;
dev_t dev;
printk(KERN_INFO "@ %s\n", __func__);
err = alloc_chrdev_region(&dev, 0, MINOR_VERSION, DEVICE_NAME);
dev_major = MAJOR(dev);

hideproc_class = class_create(THIS_MODULE, DEVICE_NAME);

cdev_init(&cdev, &fops);
cdev_add(&cdev, MKDEV(dev_major, MINOR_VERSION), 1);
device_create(hideproc_class, NULL, MKDEV(dev_major, MINOR_VERSION), NULL,
              DEVICE_NAME);

In the kernel, a character-type device is represented by struct cdev, a structure used to register it in the system. Most driver operations use three important structures: struct file_operations, struct file and struct inode.

struct file_operations

#include <linux/fs.h>

struct file_operations {
    struct module *owner;
    loff_t (*llseek) (struct file *, loff_t, int);
    ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
    ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
    [...]
    long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
    [...]
    int (*open) (struct inode *, struct file *);
    int (*flush) (struct file *, fl_owner_t id);
    int (*release) (struct inode *, struct file *);
    [...]

Registration and unregistration of character devices

The registration/unregistration of a device is made by specifying the major and minor. The dev_t type is used to keep the identifiers of a device (both major and minor) and can be obtained using the MKDEV macro.

alloc_chrdev_region

#include <linux/fs.h>

int register_chrdev_region(dev_t first, unsigned int count, char *name);
void unregister_chrdev_region(dev_t first, unsigned int count);

After assigning the identifiers, the character device will have to be initialized (cdev_init) and the kernel will have to be notified(cdev_add). The cdev_add function must be called only after the device is ready to receive calls. Removing a device is done using the cdev_del function.

#include <linux/cdev.h>

void cdev_init(struct cdev *cdev, struct file_operations *fops);
int cdev_add(struct cdev *dev, dev_t num, unsigned int count);
void cdev_del(struct cdev *dev);

Device Create

class_create

/* This is a #define to keep the compiler from merging different
 * instances of the __key variable */
#define class_create(owner, name)		\
({						\
	static struct lock_class_key __key;	\
	__class_create(owner, name, &__key);	\
})

__class_create

This is used to create a struct class pointer that can then be used in calls to device_create().
Returns struct class pointer on success, or ERR_PTR() on error.
Note, the pointer created here is to be destroyed when finished by making a call to class_destroy().

device_create

This function can be used by char device classes. A struct device will be created in sysfs, registered to the specified class.
A “dev” file will be created, showing the dev_t for the device, if the dev_t is not 0,0. If a pointer to a parent struct device is passed in, the newly created struct device will be a child of that device in sysfs. The pointer to the struct device will be returned from the call. Any further sysfs files that might be required can be created using this pointer.
Returns struct device pointer on success, or ERR_PTR() on error.


Read and Write

$ ps aux | grep cron
$ pidof cron
$ ls -l /dev/hideproc
crw------- 1 root root 242, 1 Jul 24 00:14 /dev/hideproc
$ pidof cron
673
$ echo "add 673" | sudo tee /dev/hideproc
add 673
$ sudo cat /dev/hideproc
pid: 673
$ echo "del 673" | sudo tee /dev/hideproc
del 673
$ sudo cat /dev/hideproc






main



device

Device



device->device


hidden_proc linked list
 for each entry safe:
 read the node->pid



console

console

sudo cat /dev/hideproc



device->console:c


the messages sent by using          
copy_to_user              



console:add->device


device_read
^
 file_operations.read 
^
 read









main



device

Device



device->device


add pid 673 into
 the hidden_proc linked list



console

console

echo "add 673" | sudo tee /dev/hideproc




console:add->device


device_write
^
 file_operations.write 
^
 write



kstrtol(const char *s, unsigned int base, long *res)

const char *s

The start of the string. The string must be null-terminated, and may also include a single newline before its terminating null. The first character may also be a plus sign or a minus sign.

unsigned int base

The number base to use. The maximum supported base is 16. If base is given as 0, then the base of the string is automatically detected with the conventional semantics - If it begins with 0x the number will be parsed as a hexadecimal (case insensitive), if it otherwise begins with 0, it will be parsed as an octal number. Otherwise it will be parsed as a decimal.

long * res

Where to write the result of the conversion on success.


ftrace hook

Using ftrace to hook to functions
GitHub - ilammy / ftrace hook

The ftrace infrastructure was originally created to attach callbacks to the beginning of functions in order to record and trace the flow of the kernel. But callbacks to the start of a function can have other use cases. Either for live kernel patching, or for security monitoring. This document describes how to use ftrace to implement your own function callbacks.

ftrace options

struct ftrace_ops ops = {
      .func                    = my_callback_func,
      .flags                   = MY_FTRACE_FLAGS
      .private                 = any_private_data_structure,
};

FTRACE_OPS_FL_SAVE_REGS

If the callback requires reading or modifying the pt_regs passed to the callback, then it must set this flag. Registering a ftrace_ops with this flag set on an architecture that does not support passing of pt_regs to the callback will fail.

FTRACE_OPS_FL_IPMODIFY

Requires FTRACE_OPS_FL_SAVE_REGS set. If the callback is to “hijack” the traced function (have another function called instead of the traced function), it requires setting this flag. This is what live kernel patches uses. Without this flag the pt_regs->ip can not be modified.
Note, only one ftrace_ops with FTRACE_OPS_FL_IPMODIFY set may be registered to any given function at a time.

So this will let hook_find_ge_pid to replace the original one ( find_ge_pid ).

register and filter

kernel/trace/ftrace.c - register_ftrace_function / ftrace_set_filter_ip

/**
 * register_ftrace_function - register a function for profiling
 * @ops - ops structure that holds the function for profiling.
 *
 * Register a function to be called by all functions in the
 * kernel.
 *
 * Note: @ops->func and all the functions it calls must be labeled
 *       with "notrace", otherwise it will go into a
 *       recursive loop.
 */
int register_ftrace_function(struct ftrace_ops *ops)
{
	int ret = -1;

	ftrace_ops_init(ops);

	mutex_lock(&ftrace_lock);

	ret = ftrace_startup(ops, 0);

	mutex_unlock(&ftrace_lock);

	return ret;
}
EXPORT_SYMBOL_GPL(register_ftrace_function);
/**
 * ftrace_set_filter_ip - set a function to filter on in ftrace by address
 * @ops - the ops to set the filter with
 * @ip - the address to add to or remove from the filter.
 * @remove - non zero to remove the ip from the filter
 * @reset - non zero to reset all filters before applying this filter.
 *
 * Filters denote which functions should be enabled when tracing is enabled
 * If @ip is NULL, it failes to update filter.
 */
int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
			 int remove, int reset)
{
	ftrace_ops_init(ops);
	return ftrace_set_addr(ops, ip, remove, reset, 1);
}
EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);

設定自訂函式

ftrace_set_filter_ip(&hook->ops, hook->address, 0, 0); 設定 hook->address 至 filter hash 裡。大致流程會是:

ftrace_set_filter_ip(&hook->ops, hook->address, 0, 0);
  ftrace_set_addr(ops, ip, remove, reset, 1);
    ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable);
      orig_hash = &ops->func_hash->filter_hash;
      ftrace_match_addr(hash, ip, remove);
        add_hash_entry(hash, ip);
      ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable);
        ftrace_hash_move(ops, enable, orig_hash, hash);
          new_hash = __ftrace_hash_move(src); // add into hash
          ftrace_hash_ipmodify_update(ops, new_hash); // update ipmodify with hook->address 
          rcu_assign_pointer(*dst, new_hash); // update rcu pointer ( ops->func_hash->filter_hash )
        ftrace_ops_update_code(ops, &old_hash_ops);
struct ftrace_ops {
	ftrace_func_t			func;
	struct ftrace_ops __rcu	*next;
	unsigned long			flags;
	void				*private;
	ftrace_func_t			saved_func;
#ifdef CONFIG_DYNAMIC_FTRACE
	struct ftrace_ops_hash		local_hash;
	struct ftrace_ops_hash		*func_hash;
	struct ftrace_ops_hash		old_hash;
	unsigned long			trampoline;
	unsigned long			trampoline_size;
#endif
};
/* The hash used to know what functions callbacks trace */
struct ftrace_ops_hash {
	struct ftrace_hash __rcu	*notrace_hash;
	struct ftrace_hash __rcu	*filter_hash;
	struct mutex			regex_lock;
};
static int
ftrace_hash_move(struct ftrace_ops *ops, int enable,
		 struct ftrace_hash **dst, struct ftrace_hash *src)
{
	/*
	 * Remove the current set, update the hash and add
	 * them back.
	 */
	ftrace_hash_rec_disable_modify(ops, enable);

	rcu_assign_pointer(*dst, new_hash);

	ftrace_hash_rec_enable_modify(ops, enable);
}

static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops)
{
	struct ftrace_hash *hash = ops->func_hash->filter_hash;

	if (ftrace_hash_empty(hash))
		hash = NULL;

	return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash);
        // ftrace_hash_ipmodify_disable is:
        // __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH);
}

/*
 * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
 * or no-needed to update, -EBUSY if it detects a conflict of the flag
 * on a ftrace_rec, and -EINVAL if the new_hash tries to trace all recs.
 * Note that old_hash and new_hash has below meanings
 *  - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected)
 *  - If the hash is EMPTY_HASH, it hits nothing
 *  - Anything else hits the recs which match the hash entries.
 */
static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
					 struct ftrace_hash *old_hash,
					 struct ftrace_hash *new_hash)

重新設定

kernel/trace/ftrace.c - unregister_ftrace_function

/**
 * unregister_ftrace_function - unregister a function for profiling.
 * @ops - ops structure that holds the function to unregister
 *
 * Unregister a function that was added to be called by ftrace profiling.
 */
int unregister_ftrace_function(struct ftrace_ops *ops)
{
	int ret;

	mutex_lock(&ftrace_lock);
	ret = ftrace_shutdown(ops, 0);
	mutex_unlock(&ftrace_lock);

	return ret;
}
EXPORT_SYMBOL_GPL(unregister_ftrace_function);

原本的 function ( 在此為 find_ge_pid ) 的地址重新取代掉之前更改的 hook_find_ge_pid

原先 find_ge_pidstruct pid

kernel/pid.c - find_ge_pid / include/linux/pid.h - struct pid

/*
 * Used by proc to find the first pid that is greater than or equal to nr.
 *
 * If there is a pid at nr this function is exactly the same as find_pid_ns.
 */
struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
{
	return idr_get_next(&ns->idr, &nr);
}
/*
 * struct upid is used to get the id of the struct pid, as it is
 * seen in particular namespace. Later the struct pid is found with
 * find_pid_ns() using the int nr and struct pid_namespace *ns.
 */

struct upid {
	int nr;
	struct pid_namespace *ns;
};

struct pid
{
	refcount_t count;
	unsigned int level;
	spinlock_t lock;
	/* lists of tasks that use this pid */
	struct hlist_head tasks[PIDTYPE_MAX];
	struct hlist_head inodes;
	/* wait queue for pidfd notifications */
	wait_queue_head_t wait_pidfd;
	struct rcu_head rcu;
	struct upid numbers[1];
};

pidof

pidof will search all the pid inside of /proc directory.

you can use ls /proc | grep pid to find the target pid

pidof source code
pid manual page


ERROR: modpost: "kallsyms_lookup_name" [hideproc.ko] undefined!

在我的電腦 ubuntu ( 5.8.0-59-generic ) 直接編譯的話會出現上述錯誤資訊。

commit 0bd476e6c67190b5eb7b6e105c8db8ff61103281 修改了 kallsyms_lookup_name 和 kallsyms_on_each_symbol 函式的 export 形式,這是因為 MODULE_LICENSE("GPL") 等相關問題。

kallsyms: unexport kallsyms_lookup_name() and kallsyms_on_each_symbol()
kallsyms_lookup_name() and kallsyms_on_each_symbol() are exported to
modules despite having no in-tree users and being wide open to abuse by
out-of-tree modules that can use them as a method to invoke arbitrary
non-exported kernel functions.

Unexport kallsyms_lookup_name() and kallsyms_on_each_symbol().

而在 lkml.org - Re: [PATCH 0/3] Unexport kallsyms_lookup_name() and kallsyms_on_each_symbol() 中有說明為何要 unexport 此函式:

Despite having just a single modular in-tree user that I could spot,
kallsyms_lookup_name() is exported to modules and provides a mechanism
for out-of-tree modules to access and invoke arbitrary, non-exported
kernel symbols when kallsyms is enabled.

而在 /proc/kallsyms 當中是可以搜尋的到 kallsyms_lookup_name ,並且也確認了 kernel 的 config 。

$ cat /proc/kallsyms | grep kallsyms_lookup_name
0000000000000000 T module_kallsyms_lookup_name
0000000000000000 T kallsyms_lookup_name
$ cat /boot/config-5.8.0-59-generic | grep CONFIG_KALLSYMS
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
CONFIG_KALLSYMS_BASE_RELATIVE=y

解決辦法除了替換 kernel 版本以及更改原始程式碼並重新編譯以外,也可以利用 VM 直接裝可呼叫之版本, ubuntu 版本約 20.04 以前。

It’s worth noting that, as of writing, the latest kernel available for Ubuntu 20.04 is 5.4.0-60-generic, so these changes won’t actually affect you yet if you’re on an LTS. But it’s nice to be ahead of the curve!

在此使用 Multipass - ubuntu VM ,因為測試過程中 kernel 有可能會被玩壞,利用 VM 可以比較沒顧慮。 VM 版本為:

$ uname -r
5.4.0-80-generic

搞定好版本,用最簡單的方式來測試上述問題。以下為 MIT license 編譯時使用 GPL 的函式,正常會出現:

make -C /lib/modules/5.4.0-80-generic/build M=/home/ubuntu/test modules
make[1]: Entering directory '/usr/src/linux-headers-5.4.0-80-generic'
  Building modules, stage 2.
  MODPOST 1 modules
FATAL: modpost: GPL-incompatible module nol.ko uses GPL-only symbol 'kallsyms_lookup_name'
make[2]: *** [scripts/Makefile.modpost:94: __modpost] Error 1
make[1]: *** [Makefile:1675: modules] Error 2
make[1]: Leaving directory '/usr/src/linux-headers-5.4.0-80-generic'

nol.c

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/kallsyms.h>

MODULE_LICENSE("MIT");

int init_module(void)
{
	unsigned long addr;

	addr = (unsigned long) kallsyms_lookup_name("printk");
	printk("printk addr is %ld\n", addr);
	
	return 0;
}

void cleanup_module(void) {}

GPL 本身就是 GNU General Public License 的簡稱,你不需要說 GPL License,直接書寫 GPL
:notes: jserv

而這次先利用擁有 GPL 的 module 中的 kallsyms_lookup_name 輸出 kallsyms_lookup_name 的 va 並想辦法傳給另一個以 MIT license 宣告的 module 。在此用最蠢的方式,以 printk 輸出後手動修改 out.c 的程式碼。
in.c

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kallsyms.h>

int init_module(void)
{
    unsigned long address;
    
    pr_info("In the license.\n");
    address = (unsigned long)kallsyms_lookup_name("kallsyms_lookup_name");
    printk("address %ld\n", address);
    
    return 0;
}

void cleanup_module()
{
    pr_info("close\n");
}

MODULE_LICENSE("GPL");
[ 4469.175017] In the license.
[ 4469.176006] address -1252755248

之後在 out.c 中使用剛剛的 va 結合 function pointer 來呼叫原先不能呼叫的 kallsyms_lookup_name 函式。
out.c

#include <linux/kernel.h>
#include <linux/module.h>

MODULE_LICENSE("MIT");

int init_module(void)
{
	unsigned long addr;
	int (*pkp)(const char *fmt, ...);
	unsigned long (*funcp)(const char *name) = -1252755248;
		

	addr = (unsigned long)funcp("printk");
	pkp = addr;
	pkp("out of license\n");

	return 0;
}

void cleanup_module() {}

因為 out.c 沒有宣告任何 GPL 的函式,因此編譯會過,sudo insmod out.ko 後會得到:

[ 4508.736207] out of license

這邊也有另一個使用方法:How to use variables or functions that are not exported in the Linux kernel

因此,雖然函式地址會在每次重開機時而有所不同 ( ASLR ) ,但這嚴重的漏洞還是讓開發者們修改了此函式的 export 形式。

The address that the kernel is loaded from is called startup_64 (you can find it in /proc/kallsyms), but kernel address space layout randomization means that this address will change at every boot.

而在 v5.7 之後的版本,則可以參考:Linux Rootkits: New Methods for Kernel 5.7+


PPID 和多個 PID 隱藏

PPID

task_struct 中定義了自己的 pid 以及指向 parent 的指標。

	pid_t				pid;
	pid_t				tgid;

#ifdef CONFIG_STACKPROTECTOR
	/* Canary value for the -fstack-protector GCC feature: */
	unsigned long			stack_canary;
#endif
	/*
	 * Pointers to the (original) parent process, youngest child, younger sibling,
	 * older sibling, respectively.  (p->father can be replaced with
	 * p->real_parent->pid)
	 */

	/* Real parent process: */
	struct task_struct __rcu	*real_parent;

	/* Recipient of SIGCHLD, wait4() reports: */
	struct task_struct __rcu	*parent;

	/*
	 * Children/sibling form the list of natural children:
	 */
	struct list_head		children;
	struct list_head		sibling;
	struct task_struct		*group_leader;

因此只要利用給予的 pid 找到本身的 task_struct 再利用其中的 real_parent 來得到 parent 的 pid 即可。
main.c 當中新增 get_parent_pid 函式,並在 device_write 中增加對 PPID 的處理:

#include <linux/rcupdate.h>

static pid_t get_parent_pid(long child_pid)
{
    struct task_struct *child = NULL;
    struct pid* cp = NULL;
    struct task_struct *parent = NULL;
    
    cp = find_get_pid(child_pid);
    child = get_pid_task(cp, PIDTYPE_PID);
    parent = child->real_parent;
    
    return parent->pid;
}

static ssize_t device_write(struct file *filep,
                            const char *buffer,
                            size_t len,
                            loff_t *offset)
{
    long pid;
    pid_t ppid;
    char *message;

    char add_message[] = "add", del_message[] = "del";
    if (len < sizeof(add_message) - 1 && len < sizeof(del_message) - 1)
        return -EAGAIN;

    message = kmalloc(len + 1, GFP_KERNEL);
    memset(message, 0, len + 1);
    copy_from_user(message, buffer, len);
    if (!memcmp(message, add_message, sizeof(add_message) - 1)) {
        kstrtol(message + sizeof(add_message), 10, &pid);
        hide_process(pid);
	ppid = get_parent_pid(pid);
	hide_process(ppid);
    } else if (!memcmp(message, del_message, sizeof(del_message) - 1)) {
        kstrtol(message + sizeof(del_message), 10, &pid);
        unhide_process(pid);
	ppid = get_parent_pid(pid);
	unhide_process(ppid);
    } else {
        kfree(message);
        return -EAGAIN;
    }

    *offset = len;
    kfree(message);
    return len;
}

並撰寫 script 來驗證執行結果:

# !/bin/bash
# script.sh - get the parent pid
read -p "child pid:" pid
echo parent pid is
ps -p $pid -o ppid=

輸出測試:

$ pidof cron
660
$ echo "add 660" | sudo tee /dev/hideproc
add 660
$ pidof cron
$ ./script.sh
child pid:660
parent pid is
$ sudo cat /dev/hideproc
pid: 660
pid: 1
$ echo "del 660" | sudo tee /dev/hideproc
del 660
$ pidof cron
660
$ ./script.sh 
child pid:660
parent pid is
      1

改進

cdev unregistration

example :

void cleanup_module(void)
{
    int i;

    for(i = 0; i < MY_MAX_MINORS; i++) {
        /* release devs[i] fields */
        cdev_del(&devs[i].cdev);
    }
    unregister_chrdev_region(MKDEV(MY_MAJOR, 0), MY_MAX_MINORS);
}

Device open / close