# Reading [The Linux Kernel Module Programming Guide](https://sysprog21.github.io/lkmpg/) (2)
## Preliminaries
前面複習一些OS kernel/user space, system call,建議寫kernel module時,變數要設定static避免混用到kernel變數,也可以主動去註冊變數到kernel (`/proc/kallsyms`),由於module是可以在kernel中動態插入或移除的程式碼,所以它共享kernel的程式碼空間,而不是有自己的空間。如果module發生segment fault,kernel也會掛掉。
寫driver時裝置分為兩種
* character devices
* block devices
兩者的差異在於block devices有緩衝區。另外可以透過 ls -l 輸出中的第一個字元來判斷一個裝置檔案是用於block devices或character devices。如果是b則是block devices,如果是c則是character devices。
# Character Device drivers
## file_operations structures in "include/linux/fs.h"
定義許多character device drivers需要的操作接口,如果有driver沒用到的接口要設定成NULL。character device會註冊在/dev目錄下面,相同major number使用相同driver,不同minor code指的是不同device。
<!-- ,tty就是character device,我們前面開qemu的時候有指定console=ttyS0(之後會補充到,大概很之後XD) -->
這邊書上說了有關register_chrdev和alloc_chrdev_region,但是看了半天還是看不懂,索性自己改寫,以下是書上的範例:
```clike=
/*
* chardev.c: Creates a read-only char device that says how many times
* you have read from the dev file
*/
#include <linux/atomic.h>
#include <linux/cdev.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h> /* for sprintf() */
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/types.h>
#include <linux/uaccess.h> /* for get_user and put_user */
#include <linux/version.h>
#include <asm/errno.h>
/* Prototypes - this would normally go in a .h file */
static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char __user *, size_t,
loff_t *);
#define DEVICE_NAME "chardev" /* Dev name as it appears in /proc/devices */
#define BUF_LEN 80 /* Max length of the message from the device */
/* Global variables are declared as static, so are global within the file. */
static int major; /* major number assigned to our device driver */
enum {
CDEV_NOT_USED,
CDEV_EXCLUSIVE_OPEN,
};
/* Is device open? Used to prevent multiple access to device */
static atomic_t already_open = ATOMIC_INIT(CDEV_NOT_USED);
static char msg[BUF_LEN + 1]; /* The msg the device will give when asked */
static struct class *cls;
static struct file_operations chardev_fops = {
.read = device_read,
.write = device_write,
.open = device_open,
.release = device_release,
};
static int __init chardev_init(void)
{
major = register_chrdev(0, DEVICE_NAME, &chardev_fops);
if (major < 0) {
pr_alert("Registering char device failed with %d\n", major);
return major;
}
pr_info("I was assigned major number %d.\n", major);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 4, 0)
cls = class_create(DEVICE_NAME);
#else
cls = class_create(THIS_MODULE, DEVICE_NAME);
#endif
device_create(cls, NULL, MKDEV(major, 0), NULL, DEVICE_NAME);
pr_info("Device created on /dev/%s\n", DEVICE_NAME);
return 0;
}
static void __exit chardev_exit(void)
{
device_destroy(cls, MKDEV(major, 0));
class_destroy(cls);
/* Unregister the device */
unregister_chrdev(major, DEVICE_NAME);
}
/* Methods */
/* Called when a process tries to open the device file, like
* "sudo cat /dev/chardev"
*/
static int device_open(struct inode *inode, struct file *file)
{
static int counter = 0;
if (atomic_cmpxchg(&already_open, CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN))
return -EBUSY;
sprintf(msg, "I already told you %d times Hello world!\n", counter++);
try_module_get(THIS_MODULE);
return 0;
}
/* Called when a process closes the device file. */
static int device_release(struct inode *inode, struct file *file)
{
/* We're now ready for our next caller */
atomic_set(&already_open, CDEV_NOT_USED);
/* Decrement the usage count, or else once you opened the file, you will
* never get rid of the module.
*/
module_put(THIS_MODULE);
return 0;
}
/* Called when a process, which already opened the dev file, attempts to
* read from it.
*/
static ssize_t device_read(struct file *filp, /* see include/linux/fs.h */
char __user *buffer, /* buffer to fill with data */
size_t length, /* length of the buffer */
loff_t *offset)
{
/* Number of bytes actually written to the buffer */
int bytes_read = 0;
const char *msg_ptr = msg;
if (!*(msg_ptr + *offset)) { /* we are at the end of message */
*offset = 0; /* reset the offset */
return 0; /* signify end of file */
}
msg_ptr += *offset;
/* Actually put the data into the buffer */
while (length && *msg_ptr) {
/* The buffer is in the user data segment, not the kernel
* segment so "*" assignment won't work. We have to use
* put_user which copies data from the kernel data segment to
* the user data segment.
*/
put_user(*(msg_ptr++), buffer++);
length--;
bytes_read++;
}
*offset += bytes_read;
/* Most read functions return the number of bytes put into the buffer. */
return bytes_read;
}
/* Called when a process writes to dev file: echo "hi" > /dev/hello */
static ssize_t device_write(struct file *filp, const char __user *buff,
size_t len, loff_t *off)
{
pr_alert("Sorry, this operation is not supported.\n");
return -EINVAL;
}
module_init(chardev_init);
module_exit(chardev_exit);
MODULE_LICENSE("GPL");
```
基本上就是開一個device,定義open,read,write,release的syscall來時要如何應對,最簡單的測試方式就是跑cat /dev/chardev,整個過程cat會去開這個device file所以會觸發open syscall,接下來會read,最後會close device file。所以會看到`"%s already told you %d times Hello world!\n"`,輸出在螢幕上,現在我要做一些調整,兩個devices共用同一個file operations,這樣我們要做的就是多create一個device file,兩個device file的minor要不一樣。
```clike=
/*
* chardev.c: Creates a read-only char device that says how many times
* you have read from the dev file
*/
#include <linux/atomic.h>
#include <linux/cdev.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h> /* for sprintf() */
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/types.h>
#include <linux/uaccess.h> /* for get_user and put_user */
#include <linux/version.h>
#include <asm/errno.h>
/* Prototypes - this would normally go in a .h file */
static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char __user *, size_t,
loff_t *);
#define DEVICE_CNT 2
#define DRIVER_NAME "chardev" /* Dev name as it appears in /proc/devices */
#define BUF_LEN 80 /* Max length of the message from the device */
static char *device_name[DEVICE_CNT] = {"chardev0", "chardev1"};
/* Global variables are declared as static, so are global within the file. */
static int major; /* major number assigned to our device driver */
enum
{
CDEV_NOT_USED,
CDEV_EXCLUSIVE_OPEN,
};
/* Is device open? Used to prevent multiple access to device */
static atomic_t already_open[DEVICE_CNT] = {ATOMIC_INIT(CDEV_NOT_USED), ATOMIC_INIT(CDEV_NOT_USED)};
// static char msg[BUF_LEN + 1]; /* The msg the device will give when asked */
static char msg[DEVICE_CNT][BUF_LEN + 1]; /* The msg the device will give when asked */
static struct class *cls;
static struct file_operations chardev_fops = {
.read = device_read,
.write = device_write,
.open = device_open,
.release = device_release,
};
static int __init chardev_init(void)
{
major = register_chrdev(0, DRIVER_NAME, &chardev_fops);
if (major < 0)
{
pr_alert("Registering char device failed with %d\n", major);
return major;
}
pr_info("I was assigned major number %d.\n", major);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 4, 0)
cls = class_create(DRIVER_NAME);
#else
cls = class_create(THIS_MODULE, DRIVER_NAME);
#endif
int i = 0;
for (; i < DEVICE_CNT; i++)
{
device_create(cls, NULL, MKDEV(major, i), NULL, device_name[i]);
}
pr_info("Device created on /dev/%s\n", DRIVER_NAME);
return 0;
}
static void __exit chardev_exit(void)
{
int i = 0;
for (; i < DEVICE_CNT; i++)
{
device_destroy(cls, MKDEV(major, i));
}
class_destroy(cls);
unregister_chrdev_region(major, DEVICE_CNT);
pr_alert("%s driver removed.\n", DRIVER_NAME);
}
/* Methods */
/* Called when a process tries to open the device file, like
* "sudo cat /dev/chardev"
*/
static int device_open(struct inode *inode, struct file *file)
{
static int counter[DEVICE_CNT] = {0, 0};
int minor = iminor(inode);
if (atomic_cmpxchg(&already_open[minor], CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN))
return -EBUSY;
sprintf(msg[minor], "%s already told you %d times Hello world!\n", device_name[minor], counter[minor]++);
try_module_get(THIS_MODULE);
return 0;
}
static int device_open_test(struct inode *inode, struct file *file)
{
static int counter[DEVICE_CNT] = {0, 0};
int minor = iminor(inode);
if (atomic_cmpxchg(&already_open[minor], CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN))
return -EBUSY;
sprintf(msg[minor], "%s already told you %d times Hello world! (msg from device_open_test)\n", device_name[minor], counter[minor]++);
try_module_get(THIS_MODULE);
return 0;
}
/* Called when a process closes the device file. */
static int device_release(struct inode *inode, struct file *file)
{
int minor = iminor(inode);
/* We're now ready for our next caller */
atomic_set(&already_open[minor], CDEV_NOT_USED);
/* Decrement the usage count, or else once you opened the file, you will
* never get rid of the module.
*/
module_put(THIS_MODULE);
return 0;
}
/* Called when a process, which already opened the dev file, attempts to
* read from it.
*/
static ssize_t device_read(struct file *filp, /* see include/linux/fs.h */
char __user *buffer, /* buffer to fill with data */
size_t length, /* length of the buffer */
loff_t *offset)
{
int minor = iminor(filp->f_inode);
/* Number of bytes actually written to the buffer */
int bytes_read = 0;
const char *msg_ptr = msg[minor];
if (!*(msg_ptr + *offset))
{ /* we are at the end of message */
*offset = 0; /* reset the offset */
return 0; /* signify end of file */
}
msg_ptr += *offset;
/* Actually put the data into the buffer */
while (length && *msg_ptr)
{
/* The buffer is in the user data segment, not the kernel
* segment so "*" assignment won't work. We have to use
* put_user which copies data from the kernel data segment to
* the user data segment.
*/
put_user(*(msg_ptr++), buffer++);
length--;
bytes_read++;
}
*offset += bytes_read;
/* Most read functions return the number of bytes put into the buffer. */
return bytes_read;
}
/* Called when a process writes to dev file: echo "hi" > /dev/hello */
static ssize_t device_write(struct file *filp, const char __user *buff,
size_t len, loff_t *off)
{
pr_alert("Sorry, this operation is not supported.\n");
return -EINVAL;
}
module_init(chardev_init);
module_exit(chardev_exit);
MODULE_LICENSE("GPL");
```
上面的程式執行起來會是
```
~ # cat dev/chardev1
chardev1 already told you 0 times Hello world!
~ # cat dev/chardev0
chardev0 already told you 0 times Hello world!
~ # cat dev/chardev1
chardev1 already told you 1 times Hello world!
~ # cat dev/chardev0
chardev0 already told you 1 times Hello world!
~ # cat dev/chardev0
chardev0 already told you 2 times Hello world!
~ # cat dev/chardev0
chardev0 already told you 3 times Hello world!
~ # cat dev/chardev1
chardev1 already told you 2 times Hello world!
```
兩個device的變數用minor隔離開來但執行的函數是相同的(共用fops),在__init裡面做的改動就是將DEVICE_NAME->DRIVER_NAME,畢竟現在有兩個device file同名的話會出問題所以我另外設定變數device_name[]區分他們,下面是產生不同device file的方式,MKDEV是為了產生一對{major,minor},具體的結構是定義為dev_t:
```
for (; i < DEVICE_CNT; i++)
{
device_create(cls, NULL, MKDEV(major, i), NULL, device_name[i]);
}
```
到這邊似乎是很完美,但是,如果我有一些device的file operations想要和之前的不一樣,例如我想要minor 0\~5的device使用chardev_fops_0,但是6\~10的想要用chardev_fops_1,要怎麼處理,這就是這章後面再講的問題,但是他沒給範例,底下我自己亂搞的一個小範例,使用alloc_chrdev_region將兩個device分到不同的file operations:
```clike=
/*
* chardev.c: Creates a read-only char device that says how many times
* you have read from the dev file
*/
#include <linux/atomic.h>
#include <linux/cdev.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h> /* for sprintf() */
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/types.h>
#include <linux/uaccess.h> /* for get_user and put_user */
#include <linux/version.h>
#include <asm/errno.h>
/* Prototypes - this would normally go in a .h file */
static int device_open(struct inode *, struct file *);
static int device_open_test(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char __user *, size_t,
loff_t *);
#define DEVICE_CNT 2
#define DRIVER_NAME "chardev" /* Dev name as it appears in /proc/devices */
static dev_t base_dev, devs[DEVICE_CNT];
static struct cdev my_cdevs[DEVICE_CNT];
#define BUF_LEN 80 /* Max length of the message from the device */
static char *device_name[DEVICE_CNT] = {"chardev0", "chardev1"};
/* Global variables are declared as static, so are global within the file. */
static int major; /* major number assigned to our device driver */
enum
{
CDEV_NOT_USED,
CDEV_EXCLUSIVE_OPEN,
};
/* Is device open? Used to prevent multiple access to device */
static atomic_t already_open[DEVICE_CNT] = {ATOMIC_INIT(CDEV_NOT_USED), ATOMIC_INIT(CDEV_NOT_USED)};
// static char msg[BUF_LEN + 1]; /* The msg the device will give when asked */
static char msg[DEVICE_CNT][BUF_LEN + 1]; /* The msg the device will give when asked */
static struct class *cls;
static struct file_operations chardev_fops = {
.read = device_read,
.write = device_write,
.open = device_open,
.release = device_release,
};
static struct file_operations chardev_fops_test = {
.read = device_read,
.write = device_write,
.open = device_open_test,
.release = device_release,
};
static int __init chardev_init(void)
{
int alloc_ret = -1;
alloc_ret = alloc_chrdev_region(&base_dev, 0, DEVICE_CNT, DRIVER_NAME);
if (alloc_ret)
{
pr_alert("Registering char device failed with %d\n", alloc_ret);
return alloc_ret;
}
major = MAJOR(base_dev);
int i = 0, cdev_ret = -1;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 4, 0)
cls = class_create(DRIVER_NAME);
#else
cls = class_create(THIS_MODULE, DRIVER_NAME);
#endif
for (i = 0; i < DEVICE_CNT; i++)
{
devs[i] = MKDEV(MAJOR(base_dev), MINOR(base_dev) + i);
if (i == 0)
cdev_init(&my_cdevs[i], &chardev_fops);
else
cdev_init(&my_cdevs[i], &chardev_fops_test);
cdev_ret = cdev_add(&my_cdevs[i], devs[i], 1);
device_create(cls, NULL, devs[i], NULL, device_name[i]);
pr_info("Device created on /dev/%s\n", device_name[i]);
}
pr_info("Module loaded.Driver name: %s, Major: %d\n", DRIVER_NAME, MAJOR(base_dev));
return 0;
}
static void __exit chardev_exit(void)
{
int i = 0;
for (; i < DEVICE_CNT; i++)
{
device_destroy(cls, devs[i]);
cdev_del(&my_cdevs[i]);
}
class_destroy(cls);
unregister_chrdev_region(base_dev, DEVICE_CNT);
pr_alert("%s driver removed.\n", DRIVER_NAME);
}
/* Methods */
/* Called when a process tries to open the device file, like
* "sudo cat /dev/chardev"
*/
static int device_open(struct inode *inode, struct file *file)
{
static int counter[DEVICE_CNT] = {0, 0};
int minor = iminor(inode);
if (atomic_cmpxchg(&already_open[minor], CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN))
return -EBUSY;
sprintf(msg[minor], "%s already told you %d times Hello world!\n", device_name[minor], counter[minor]++);
try_module_get(THIS_MODULE);
return 0;
}
static int device_open_test(struct inode *inode, struct file *file)
{
static int counter[DEVICE_CNT] = {0, 0};
int minor = iminor(inode);
if (atomic_cmpxchg(&already_open[minor], CDEV_NOT_USED, CDEV_EXCLUSIVE_OPEN))
return -EBUSY;
sprintf(msg[minor], "%s already told you %d times Hello world! (msg from device_open_test)\n", device_name[minor], counter[minor]++);
try_module_get(THIS_MODULE);
return 0;
}
/* Called when a process closes the device file. */
static int device_release(struct inode *inode, struct file *file)
{
int minor = iminor(inode);
/* We're now ready for our next caller */
atomic_set(&already_open[minor], CDEV_NOT_USED);
/* Decrement the usage count, or else once you opened the file, you will
* never get rid of the module.
*/
module_put(THIS_MODULE);
return 0;
}
/* Called when a process, which already opened the dev file, attempts to
* read from it.
*/
static ssize_t device_read(struct file *filp, /* see include/linux/fs.h */
char __user *buffer, /* buffer to fill with data */
size_t length, /* length of the buffer */
loff_t *offset)
{
int minor = iminor(filp->f_inode);
/* Number of bytes actually written to the buffer */
int bytes_read = 0;
const char *msg_ptr = msg[minor];
if (!*(msg_ptr + *offset))
{ /* we are at the end of message */
*offset = 0; /* reset the offset */
return 0; /* signify end of file */
}
msg_ptr += *offset;
/* Actually put the data into the buffer */
while (length && *msg_ptr)
{
/* The buffer is in the user data segment, not the kernel
* segment so "*" assignment won't work. We have to use
* put_user which copies data from the kernel data segment to
* the user data segment.
*/
put_user(*(msg_ptr++), buffer++);
length--;
bytes_read++;
}
*offset += bytes_read;
/* Most read functions return the number of bytes put into the buffer. */
return bytes_read;
}
/* Called when a process writes to dev file: echo "hi" > /dev/hello */
static ssize_t device_write(struct file *filp, const char __user *buff,
size_t len, loff_t *off)
{
pr_alert("Sorry, this operation is not supported.\n");
return -EINVAL;
}
module_init(chardev_init);
module_exit(chardev_exit);
MODULE_LICENSE("GPL");
```
chardev0分配chardev_fops,chardev1分配chardev_fops_test,執行的結果:
```
~ # cat dev/chardev0
chardev0 already told you 0 times Hello world!
~ # cat dev/chardev1
chardev1 already told you 0 times Hello world! (msg from device_open_test)
~ # cat dev/chardev0
chardev0 already told you 1 times Hello world!
~ # cat dev/chardev0
chardev0 already told you 2 times Hello world!
~ # cat dev/chardev1
chardev1 already told you 1 times Hello world! (msg from device_open_test)
~ # cat dev/chardev1
chardev1 already told you 2 times Hello world! (msg from device_open_test)
~ # cat dev/chardev1
chardev1 already told you 3 times Hello world! (msg from device_open_test)
```
chardev_fops_test中的.open對應到device_open_test,會比原來的版本增加 (msg from device_open_test),這個範例描述了原書裡面的這句話`However, register_chrdev() would occupy a range of minor numbers associated with the given major. The recommended way to reduce waste for char device registration is using cdev interface.`,因為register_chrdev沒辦法分配不同的file operations給不同的device file,如此一來那個唯一的file operations連接的function就必須要對不同minor給不同的function會很醜很麻煩。
不正式的說cdev扮演的角色就是負責分配不同的file operations給不同的minor,剩下就是release時要delete它,至於這個minor對應的device就要用device_create生成(當/dev是devtmpfs),所以如果只init cdev還不夠,系統不會自己幫你生出device file,畢竟cdev沒有你要生成的device的名稱。
## Murmur
看這段如果沒有試試以上例子,我是搞不清楚那些差別的,加上他給的例子實在太簡單,對書上的內容描述不夠充分,`The recommended way to reduce waste for char device registration is using cdev interface.`,給的例子又不是"recommand way",看的頭很痛。寫這些例子的時候遇到一些bug,kernel crush了幾次,還好使用QEMU,不用整天開機關機的試,前面沒有特別提到put_user,這個還蠻重要的註解裡也有寫清楚要看一下。