# Reading [The Linux Kernel Module Programming Guide](https://sysprog21.github.io/lkmpg/) (2) ## Preliminaries 前面複習一些OS kernel/user space, system call,建議寫kernel module時,變數要設定static避免混用到kernel變數,也可以主動去註冊變數到kernel (`/proc/kallsyms`),由於module是可以在kernel中動態插入或移除的程式碼,所以它共享kernel的程式碼空間,而不是有自己的空間。如果module發生segment fault,kernel也會掛掉。 寫driver時裝置分為兩種 * character devices * block devices 兩者的差異在於block devices有緩衝區。另外可以透過 ls -l 輸出中的第一個字元來判斷一個裝置檔案是用於block devices或character devices。如果是b則是block devices,如果是c則是character devices。 # Character Device drivers ## file_operations structures in "include/linux/fs.h" 定義許多character device drivers需要的操作接口,如果有driver沒用到的接口要設定成NULL。character device會註冊在/dev目錄下面,相同major number使用相同driver,不同minor code指的是不同device。 <!-- ,tty就是character device,我們前面開qemu的時候有指定console=ttyS0(之後會補充到,大概很之後XD) --> 這邊書上說了有關register_chrdev和alloc_chrdev_region,但是看了半天還是看不懂,索性自己改寫,以下是書上的範例: ```clike= /* * chardev.c: Creates a read-only char device that says how many times * you have read from the dev file */ #include <linux/atomic.h> #include <linux/cdev.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> /* for sprintf() */ #include <linux/module.h> #include <linux/printk.h> #include <linux/types.h> #include <linux/uaccess.h> /* for get_user and put_user */ #include <linux/version.h> #include <asm/errno.h> /* Prototypes - this would normally go in a .h file */ static int device_open(struct inode *, struct file *); static int device_release(struct inode *, struct file *); static ssize_t device_read(struct file *, char __user *, size_t, loff_t *); static ssize_t device_write(struct file *, const char __user *, size_t, loff_t *); #define DEVICE_NAME "chardev" /* Dev name as it appears in /proc/devices */ #define BUF_LEN 80 /* Max length of the message from the device */ /* Global variables are declared as static, so are global within the file. */ static int major; /* major number assigned to our device driver */ enum { CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN, }; /* Is device open? Used to prevent multiple access to device */ static atomic_t already_open = ATOMIC_INIT(CDEV_NOT_USED); static char msg[BUF_LEN + 1]; /* The msg the device will give when asked */ static struct class *cls; static struct file_operations chardev_fops = { .read = device_read, .write = device_write, .open = device_open, .release = device_release, }; static int __init chardev_init(void) { major = register_chrdev(0, DEVICE_NAME, &chardev_fops); if (major < 0) { pr_alert("Registering char device failed with %d\n", major); return major; } pr_info("I was assigned major number %d.\n", major); #if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 4, 0) cls = class_create(DEVICE_NAME); #else cls = class_create(THIS_MODULE, DEVICE_NAME); #endif device_create(cls, NULL, MKDEV(major, 0), NULL, DEVICE_NAME); pr_info("Device created on /dev/%s\n", DEVICE_NAME); return 0; } static void __exit chardev_exit(void) { device_destroy(cls, MKDEV(major, 0)); class_destroy(cls); /* Unregister the device */ unregister_chrdev(major, DEVICE_NAME); } /* Methods */ /* Called when a process tries to open the device file, like * "sudo cat /dev/chardev" */ static int device_open(struct inode *inode, struct file *file) { static int counter = 0; if (atomic_cmpxchg(&already_open, CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN)) return -EBUSY; sprintf(msg, "I already told you %d times Hello world!\n", counter++); try_module_get(THIS_MODULE); return 0; } /* Called when a process closes the device file. */ static int device_release(struct inode *inode, struct file *file) { /* We're now ready for our next caller */ atomic_set(&already_open, CDEV_NOT_USED); /* Decrement the usage count, or else once you opened the file, you will * never get rid of the module. */ module_put(THIS_MODULE); return 0; } /* Called when a process, which already opened the dev file, attempts to * read from it. */ static ssize_t device_read(struct file *filp, /* see include/linux/fs.h */ char __user *buffer, /* buffer to fill with data */ size_t length, /* length of the buffer */ loff_t *offset) { /* Number of bytes actually written to the buffer */ int bytes_read = 0; const char *msg_ptr = msg; if (!*(msg_ptr + *offset)) { /* we are at the end of message */ *offset = 0; /* reset the offset */ return 0; /* signify end of file */ } msg_ptr += *offset; /* Actually put the data into the buffer */ while (length && *msg_ptr) { /* The buffer is in the user data segment, not the kernel * segment so "*" assignment won't work. We have to use * put_user which copies data from the kernel data segment to * the user data segment. */ put_user(*(msg_ptr++), buffer++); length--; bytes_read++; } *offset += bytes_read; /* Most read functions return the number of bytes put into the buffer. */ return bytes_read; } /* Called when a process writes to dev file: echo "hi" > /dev/hello */ static ssize_t device_write(struct file *filp, const char __user *buff, size_t len, loff_t *off) { pr_alert("Sorry, this operation is not supported.\n"); return -EINVAL; } module_init(chardev_init); module_exit(chardev_exit); MODULE_LICENSE("GPL"); ``` 基本上就是開一個device,定義open,read,write,release的syscall來時要如何應對,最簡單的測試方式就是跑cat /dev/chardev,整個過程cat會去開這個device file所以會觸發open syscall,接下來會read,最後會close device file。所以會看到`"%s already told you %d times Hello world!\n"`,輸出在螢幕上,現在我要做一些調整,兩個devices共用同一個file operations,這樣我們要做的就是多create一個device file,兩個device file的minor要不一樣。 ```clike= /* * chardev.c: Creates a read-only char device that says how many times * you have read from the dev file */ #include <linux/atomic.h> #include <linux/cdev.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> /* for sprintf() */ #include <linux/module.h> #include <linux/printk.h> #include <linux/types.h> #include <linux/uaccess.h> /* for get_user and put_user */ #include <linux/version.h> #include <asm/errno.h> /* Prototypes - this would normally go in a .h file */ static int device_open(struct inode *, struct file *); static int device_release(struct inode *, struct file *); static ssize_t device_read(struct file *, char __user *, size_t, loff_t *); static ssize_t device_write(struct file *, const char __user *, size_t, loff_t *); #define DEVICE_CNT 2 #define DRIVER_NAME "chardev" /* Dev name as it appears in /proc/devices */ #define BUF_LEN 80 /* Max length of the message from the device */ static char *device_name[DEVICE_CNT] = {"chardev0", "chardev1"}; /* Global variables are declared as static, so are global within the file. */ static int major; /* major number assigned to our device driver */ enum { CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN, }; /* Is device open? Used to prevent multiple access to device */ static atomic_t already_open[DEVICE_CNT] = {ATOMIC_INIT(CDEV_NOT_USED), ATOMIC_INIT(CDEV_NOT_USED)}; // static char msg[BUF_LEN + 1]; /* The msg the device will give when asked */ static char msg[DEVICE_CNT][BUF_LEN + 1]; /* The msg the device will give when asked */ static struct class *cls; static struct file_operations chardev_fops = { .read = device_read, .write = device_write, .open = device_open, .release = device_release, }; static int __init chardev_init(void) { major = register_chrdev(0, DRIVER_NAME, &chardev_fops); if (major < 0) { pr_alert("Registering char device failed with %d\n", major); return major; } pr_info("I was assigned major number %d.\n", major); #if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 4, 0) cls = class_create(DRIVER_NAME); #else cls = class_create(THIS_MODULE, DRIVER_NAME); #endif int i = 0; for (; i < DEVICE_CNT; i++) { device_create(cls, NULL, MKDEV(major, i), NULL, device_name[i]); } pr_info("Device created on /dev/%s\n", DRIVER_NAME); return 0; } static void __exit chardev_exit(void) { int i = 0; for (; i < DEVICE_CNT; i++) { device_destroy(cls, MKDEV(major, i)); } class_destroy(cls); unregister_chrdev_region(major, DEVICE_CNT); pr_alert("%s driver removed.\n", DRIVER_NAME); } /* Methods */ /* Called when a process tries to open the device file, like * "sudo cat /dev/chardev" */ static int device_open(struct inode *inode, struct file *file) { static int counter[DEVICE_CNT] = {0, 0}; int minor = iminor(inode); if (atomic_cmpxchg(&already_open[minor], CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN)) return -EBUSY; sprintf(msg[minor], "%s already told you %d times Hello world!\n", device_name[minor], counter[minor]++); try_module_get(THIS_MODULE); return 0; } static int device_open_test(struct inode *inode, struct file *file) { static int counter[DEVICE_CNT] = {0, 0}; int minor = iminor(inode); if (atomic_cmpxchg(&already_open[minor], CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN)) return -EBUSY; sprintf(msg[minor], "%s already told you %d times Hello world! (msg from device_open_test)\n", device_name[minor], counter[minor]++); try_module_get(THIS_MODULE); return 0; } /* Called when a process closes the device file. */ static int device_release(struct inode *inode, struct file *file) { int minor = iminor(inode); /* We're now ready for our next caller */ atomic_set(&already_open[minor], CDEV_NOT_USED); /* Decrement the usage count, or else once you opened the file, you will * never get rid of the module. */ module_put(THIS_MODULE); return 0; } /* Called when a process, which already opened the dev file, attempts to * read from it. */ static ssize_t device_read(struct file *filp, /* see include/linux/fs.h */ char __user *buffer, /* buffer to fill with data */ size_t length, /* length of the buffer */ loff_t *offset) { int minor = iminor(filp->f_inode); /* Number of bytes actually written to the buffer */ int bytes_read = 0; const char *msg_ptr = msg[minor]; if (!*(msg_ptr + *offset)) { /* we are at the end of message */ *offset = 0; /* reset the offset */ return 0; /* signify end of file */ } msg_ptr += *offset; /* Actually put the data into the buffer */ while (length && *msg_ptr) { /* The buffer is in the user data segment, not the kernel * segment so "*" assignment won't work. We have to use * put_user which copies data from the kernel data segment to * the user data segment. */ put_user(*(msg_ptr++), buffer++); length--; bytes_read++; } *offset += bytes_read; /* Most read functions return the number of bytes put into the buffer. */ return bytes_read; } /* Called when a process writes to dev file: echo "hi" > /dev/hello */ static ssize_t device_write(struct file *filp, const char __user *buff, size_t len, loff_t *off) { pr_alert("Sorry, this operation is not supported.\n"); return -EINVAL; } module_init(chardev_init); module_exit(chardev_exit); MODULE_LICENSE("GPL"); ``` 上面的程式執行起來會是 ``` ~ # cat dev/chardev1 chardev1 already told you 0 times Hello world! ~ # cat dev/chardev0 chardev0 already told you 0 times Hello world! ~ # cat dev/chardev1 chardev1 already told you 1 times Hello world! ~ # cat dev/chardev0 chardev0 already told you 1 times Hello world! ~ # cat dev/chardev0 chardev0 already told you 2 times Hello world! ~ # cat dev/chardev0 chardev0 already told you 3 times Hello world! ~ # cat dev/chardev1 chardev1 already told you 2 times Hello world! ``` 兩個device的變數用minor隔離開來但執行的函數是相同的(共用fops),在__init裡面做的改動就是將DEVICE_NAME->DRIVER_NAME,畢竟現在有兩個device file同名的話會出問題所以我另外設定變數device_name[]區分他們,下面是產生不同device file的方式,MKDEV是為了產生一對{major,minor},具體的結構是定義為dev_t: ``` for (; i < DEVICE_CNT; i++) { device_create(cls, NULL, MKDEV(major, i), NULL, device_name[i]); } ``` 到這邊似乎是很完美,但是,如果我有一些device的file operations想要和之前的不一樣,例如我想要minor 0\~5的device使用chardev_fops_0,但是6\~10的想要用chardev_fops_1,要怎麼處理,這就是這章後面再講的問題,但是他沒給範例,底下我自己亂搞的一個小範例,使用alloc_chrdev_region將兩個device分到不同的file operations: ```clike= /* * chardev.c: Creates a read-only char device that says how many times * you have read from the dev file */ #include <linux/atomic.h> #include <linux/cdev.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> /* for sprintf() */ #include <linux/module.h> #include <linux/printk.h> #include <linux/types.h> #include <linux/uaccess.h> /* for get_user and put_user */ #include <linux/version.h> #include <asm/errno.h> /* Prototypes - this would normally go in a .h file */ static int device_open(struct inode *, struct file *); static int device_open_test(struct inode *, struct file *); static int device_release(struct inode *, struct file *); static ssize_t device_read(struct file *, char __user *, size_t, loff_t *); static ssize_t device_write(struct file *, const char __user *, size_t, loff_t *); #define DEVICE_CNT 2 #define DRIVER_NAME "chardev" /* Dev name as it appears in /proc/devices */ static dev_t base_dev, devs[DEVICE_CNT]; static struct cdev my_cdevs[DEVICE_CNT]; #define BUF_LEN 80 /* Max length of the message from the device */ static char *device_name[DEVICE_CNT] = {"chardev0", "chardev1"}; /* Global variables are declared as static, so are global within the file. */ static int major; /* major number assigned to our device driver */ enum { CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN, }; /* Is device open? Used to prevent multiple access to device */ static atomic_t already_open[DEVICE_CNT] = {ATOMIC_INIT(CDEV_NOT_USED), ATOMIC_INIT(CDEV_NOT_USED)}; // static char msg[BUF_LEN + 1]; /* The msg the device will give when asked */ static char msg[DEVICE_CNT][BUF_LEN + 1]; /* The msg the device will give when asked */ static struct class *cls; static struct file_operations chardev_fops = { .read = device_read, .write = device_write, .open = device_open, .release = device_release, }; static struct file_operations chardev_fops_test = { .read = device_read, .write = device_write, .open = device_open_test, .release = device_release, }; static int __init chardev_init(void) { int alloc_ret = -1; alloc_ret = alloc_chrdev_region(&base_dev, 0, DEVICE_CNT, DRIVER_NAME); if (alloc_ret) { pr_alert("Registering char device failed with %d\n", alloc_ret); return alloc_ret; } major = MAJOR(base_dev); int i = 0, cdev_ret = -1; #if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 4, 0) cls = class_create(DRIVER_NAME); #else cls = class_create(THIS_MODULE, DRIVER_NAME); #endif for (i = 0; i < DEVICE_CNT; i++) { devs[i] = MKDEV(MAJOR(base_dev), MINOR(base_dev) + i); if (i == 0) cdev_init(&my_cdevs[i], &chardev_fops); else cdev_init(&my_cdevs[i], &chardev_fops_test); cdev_ret = cdev_add(&my_cdevs[i], devs[i], 1); device_create(cls, NULL, devs[i], NULL, device_name[i]); pr_info("Device created on /dev/%s\n", device_name[i]); } pr_info("Module loaded.Driver name: %s, Major: %d\n", DRIVER_NAME, MAJOR(base_dev)); return 0; } static void __exit chardev_exit(void) { int i = 0; for (; i < DEVICE_CNT; i++) { device_destroy(cls, devs[i]); cdev_del(&my_cdevs[i]); } class_destroy(cls); unregister_chrdev_region(base_dev, DEVICE_CNT); pr_alert("%s driver removed.\n", DRIVER_NAME); } /* Methods */ /* Called when a process tries to open the device file, like * "sudo cat /dev/chardev" */ static int device_open(struct inode *inode, struct file *file) { static int counter[DEVICE_CNT] = {0, 0}; int minor = iminor(inode); if (atomic_cmpxchg(&already_open[minor], CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN)) return -EBUSY; sprintf(msg[minor], "%s already told you %d times Hello world!\n", device_name[minor], counter[minor]++); try_module_get(THIS_MODULE); return 0; } static int device_open_test(struct inode *inode, struct file *file) { static int counter[DEVICE_CNT] = {0, 0}; int minor = iminor(inode); if (atomic_cmpxchg(&already_open[minor], CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN)) return -EBUSY; sprintf(msg[minor], "%s already told you %d times Hello world! (msg from device_open_test)\n", device_name[minor], counter[minor]++); try_module_get(THIS_MODULE); return 0; } /* Called when a process closes the device file. */ static int device_release(struct inode *inode, struct file *file) { int minor = iminor(inode); /* We're now ready for our next caller */ atomic_set(&already_open[minor], CDEV_NOT_USED); /* Decrement the usage count, or else once you opened the file, you will * never get rid of the module. */ module_put(THIS_MODULE); return 0; } /* Called when a process, which already opened the dev file, attempts to * read from it. */ static ssize_t device_read(struct file *filp, /* see include/linux/fs.h */ char __user *buffer, /* buffer to fill with data */ size_t length, /* length of the buffer */ loff_t *offset) { int minor = iminor(filp->f_inode); /* Number of bytes actually written to the buffer */ int bytes_read = 0; const char *msg_ptr = msg[minor]; if (!*(msg_ptr + *offset)) { /* we are at the end of message */ *offset = 0; /* reset the offset */ return 0; /* signify end of file */ } msg_ptr += *offset; /* Actually put the data into the buffer */ while (length && *msg_ptr) { /* The buffer is in the user data segment, not the kernel * segment so "*" assignment won't work. We have to use * put_user which copies data from the kernel data segment to * the user data segment. */ put_user(*(msg_ptr++), buffer++); length--; bytes_read++; } *offset += bytes_read; /* Most read functions return the number of bytes put into the buffer. */ return bytes_read; } /* Called when a process writes to dev file: echo "hi" > /dev/hello */ static ssize_t device_write(struct file *filp, const char __user *buff, size_t len, loff_t *off) { pr_alert("Sorry, this operation is not supported.\n"); return -EINVAL; } module_init(chardev_init); module_exit(chardev_exit); MODULE_LICENSE("GPL"); ``` chardev0分配chardev_fops,chardev1分配chardev_fops_test,執行的結果: ``` ~ # cat dev/chardev0 chardev0 already told you 0 times Hello world! ~ # cat dev/chardev1 chardev1 already told you 0 times Hello world! (msg from device_open_test) ~ # cat dev/chardev0 chardev0 already told you 1 times Hello world! ~ # cat dev/chardev0 chardev0 already told you 2 times Hello world! ~ # cat dev/chardev1 chardev1 already told you 1 times Hello world! (msg from device_open_test) ~ # cat dev/chardev1 chardev1 already told you 2 times Hello world! (msg from device_open_test) ~ # cat dev/chardev1 chardev1 already told you 3 times Hello world! (msg from device_open_test) ``` chardev_fops_test中的.open對應到device_open_test,會比原來的版本增加 (msg from device_open_test),這個範例描述了原書裡面的這句話`However, register_chrdev() would occupy a range of minor numbers associated with the given major. The recommended way to reduce waste for char device registration is using cdev interface.`,因為register_chrdev沒辦法分配不同的file operations給不同的device file,如此一來那個唯一的file operations連接的function就必須要對不同minor給不同的function會很醜很麻煩。 不正式的說cdev扮演的角色就是負責分配不同的file operations給不同的minor,剩下就是release時要delete它,至於這個minor對應的device就要用device_create生成(當/dev是devtmpfs),所以如果只init cdev還不夠,系統不會自己幫你生出device file,畢竟cdev沒有你要生成的device的名稱。 ## Murmur 看這段如果沒有試試以上例子,我是搞不清楚那些差別的,加上他給的例子實在太簡單,對書上的內容描述不夠充分,`The recommended way to reduce waste for char device registration is using cdev interface.`,給的例子又不是"recommand way",看的頭很痛。寫這些例子的時候遇到一些bug,kernel crush了幾次,還好使用QEMU,不用整天開機關機的試,前面沒有特別提到put_user,這個還蠻重要的註解裡也有寫清楚要看一下。