Try   HackMD

Review on my experience of Linux kernel

tags: linux kernel

This document is meant to record my experience of Linux kernel

GPT MBR patch in Linux

This is a lab that I try to understand more about MBR and GPT in linux kernel.

In this work, I manage to use the secondary header to fix the wrecked up GPT.

The note is over there: https://hackmd.io/@ztex/rypYsHWzP

The code snippet in the kernel below is the logic how I figure out the alternative page table and use it to fix the primary table.

​​else if (good_agpt) {/* alterative gpt is good, but primary is not, so write agpt to pgpt*/ ​​ *gpt = agpt; ​​ *ptes = aptes;pr_warn("[ZTEX][RECOVER] [%s] Try to recover from alternate GPT\n", state->name);kfree(pgpt);kfree(pptes); ​ pgpt = NULL; ​ pptes = NULL; ​ ret = pgpt_recover(state, &pgpt, &pptes, &agpt, &aptes);if (ret) {pr_warn("[ZTEX][RECOVER]Write recoverd GPT back to lba %d\n", GPT_PRIMARY_PARTITION_TABLE_LBA);if(write_gpt_header(state, pgpt, GPT_PRIMARY_PARTITION_TABLE_LBA) == NULL) {pr_warn("Fail to recover the primary gpt from alterative gpt\n"); ​​ kfree(pgpt); ​​ kfree(pptes); ​​ pr_warn("[ZTEX][RECOVER]After trying to fix primary, fail, use alternate GPT.\n"); ​​ return 1;} ​​ else { ​​ *gpt = pgpt; ​​ *ptes = pptes; ​​ return 1; ​​ }}kfree(pgpt);kfree(pptes); ​​ return 1; ​​ }

The code repository is over there: https://github.com/tony2037/newbie-lab-pci-sata-scsi/tree/master/GPT

PCI and AHCI

This project is that I want to get for familiar with pic device.

The note is over here: https://hackmd.io/@ztex/Bk-Cm6kfv

In this project I firstly use pci linux kernel api to find the configuration space base address.

Then, by refering the spec, I come up with the offset of the BAR(base address register).

Then, I can manipulation the data the way I want

The code snippet shows how to I find the configuration space.

... // Enable pci device ret = pci_enable_device(controller1.dev); if(ret < 0) printk(KERN_WARNING "pci enable fail, vendor(%x):device(%x)\n", controller1.vendor, controller1.device); printk(KERN_WARNING "pci (1) enable success, vendor(%x):device(%x)\n", controller1.vendor, controller1.device); ret = pci_enable_device(controller2.dev); if(ret < 0) printk(KERN_WARNING "pci enable fail, vendor(%x):device(%x)\n", controller2.vendor, controller2.device); printk(KERN_WARNING "pci (2) enable success, vendor(%x):device(%x)\n", controller2.vendor, controller2.device); /* Get the I/O base address from the appropriate base address register (bar) in the configuration space */ controller1.io_base = pci_resource_start(controller1.dev, REQUIRE_BAR); controller2.io_base = pci_resource_start(controller2.dev, REQUIRE_BAR); /* Assign which pci the slots belong to*/ slots[1].controller = &controller2; slots[2].controller = &controller2; slots[3].controller = &controller1; slots[4].controller = &controller1; slots[1].port_number = SLOT1_PORT; slots[2].port_number = SLOT2_PORT; slots[3].port_number = SLOT3_PORT; slots[4].port_number = SLOT4_PORT; ...

The code is over here: https://github.com/tony2037/newbie-lab-pci-sata-scsi/blob/master/pci-monitor.c

To understand more about PCI, I came up with this little tool looks like lspci, its based on x86 platform
https://github.com/tony2037/zspci/blob/master/zspci.c

This code snippet shows how I address the IO space and traverse all the bus, device and functio to dummp all the pci devices info

void ReadData(uint32_t bus, uint32_t dev, uint32_t func, uint32_t regoffset, uint32_t *data)
{
    uint32_t address = 0;
    address = TO_ADDRESS(bus, dev, func, regoffset);
    outl(address, CONFIG_ADDRESS);
    *data = inl(CONFIG_DATA);
}

...

for(bus = 0; bus < MAX_BUS; bus++) {
        for(device = 0; device < MAX_DEVICE; device++) {
            for(func = 0; func < MAX_FUNCTION; func++) {
                ReadData(bus, device, func, 0, &data);
                if ((data != 0xffffffff) && (data != 0)) {
                    printf("\n%02x:%02x:%02x\n", bus, device, func);
                    for (regoffset = 0; regoffset < 16; regoffset++) {
                        if(regoffset % 4 == 0) {
                            printf("%02x: ", regoffset * 4);
                        }
                        ReadData(bus, device, func, regoffset, &data);
                        Hexdump32(data);
                        if(regoffset % 4 == 3) {
                            printf("\n");
                        }
                    }
                    memset(&hdr, 0, sizeof(struct Header));
                    ReadHeader(bus, device, func, &hdr);
                    ret = HexdumpHeader(&hdr);
                    if (ret < 0) {
                        goto fail;
                    }
                }
            }
        }
    }

    iopl(0);

GPIO and IOCTL

Here I was try to use gpio pin to control over the disk power and use ioctl to commuinate with user space.

The note is over here: https://hackmd.io/@ztex/ByU_3LAWw
the code is over here: https://github.com/tony2037/newbie-lab-pci-sata-scsi/blob/master/gpio-control.c

the code snippt shows how to control the disk power over a given gpio number

if (strcmp(command, "up") == 0) {
		SYNO_GPIO_WRITE(irgGPIOPins[slot], GPIOF_INIT_HIGH);
	} else if (strcmp(command, "down") == 0) {
		SYNO_GPIO_WRITE(irgGPIOPins[slot], GPIOF_INIT_LOW);
	
	} else {
		printk(KERN_INFO "No such command: %s\n", command);
	}

In the user space, this is use we use a custom character device to communicate with the kernel space

fd = open("/dev/user-control", O_RDWR); if (fd == -1) { perror("open /dev/user-control failed.\n"); goto end; } printf("Let slot %d goes %s\n", slot, operation); sprintf(buf, "%d %s", slot, operation); if(strcmp(operation, "up") != 0 && strcmp(operation, "down") != 0) { printf("No such operation: %s\n", operation); goto end; } if(strcmp(operation, "down") == 0) { dev_fd = open(chpDevFile, O_RDWR); if (dev_fd == -1) { printf("open %s failed.\n", chpDevFile); goto end; } ARGS_STANDBYNOW(args_standbynow); ARGS_CHECKPOWERMODE(args_checkpowermode); ret = ioctl(dev_fd, HDIO_DRIVE_CMD, args_standbynow);

Linux Netfilter

In this project, I try to use netfiler mechanism.

Here I try to capture the net packet with certain pattern.

The note is over here: https://hackmd.io/@ztex/ryLeDk04v

the code below shows how I register a net filter.

static int __init xxxxNETFILTER_init(void)
{
    nfhoIN = (struct nf_hook_ops*)kcalloc(1, sizeof(struct nf_hook_ops), GFP_KERNEL);
    nfhoOUT = (struct nf_hook_ops*)kcalloc(1, sizeof(struct nf_hook_ops), GFP_KERNEL);
    
    ...
    
    nfhoOUT->hook = (nf_hookfn*)hfuncOUT;     /* hook function */
    nfhoOUT->hooknum = NF_INET_LOCAL_OUT;    /* received packets */
    nfhoOUT->pf = PF_INET;    /* IPv4 */
    nfhoOUT->priority = NF_IP_PRI_FIRST;    /* max hook priority */

    nf_register_net_hook(&init_net, nfhoIN);
    nf_register_net_hook(&init_net, nfhoOUT);

    return 0;
}

the full implementatoin is over here: https://github.com/tony2037/synonetfilter

My first experience with riscv architecture

kindly see my note here: https://hackmd.io/@ztex/HyDMZyJtE

and the repository here:
https://github.com/tony2037/riscv-tinyemu

In the poject I manage to port buildroot onto tinyemu (it's a sort of small version of qemu)

After I saw the magical project of darkriscv: https://github.com/tony2037/darkriscv

I then try to implement a riscv driver

The note is over here: https://github.com/tony2037/riscv-from-scratch

The code below is a simple crt implemented with riscv assembly.

__start:
    .cfi_startproc
    .cfi_undefined ra
    .option push
    .option norelax
    la gp, __global_pointer$
    .option pop
    /* set the address. __statck_top to sp, stack pointer */
    la sp, __stack_top
    /* s0 is what is known as a “saved register” meaning it is preserved across function calls. Second, s0 sometimes acts as the frame pointer, which enables each function invocation to maintain it’s own little space on the stack for storing parameters passed into that function. */
    add s0, sp, zero
    /* unconditional jump */
    jal zero, main
    .cfi_endproc
    .end

And the code below shows that I try to implenment a function and follow the riscv C ABI and convention

uart_put_char:
    .cfi_startproc
    # create 32 bytes of space on stack
    addi sp,sp,-32
    # store callers frame pointer inside the newly created stack frame
    sd fp,24(sp)
    # set the frame pointer to the beginning of the stack frame
    addi fp,sp,32
    # copies register a0 into register a5 to sign-extend our single byte input
    # this is required by the RISC-V calling convention
    # https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md#-integer-calling-convention
    # mv is a pseudoinstruction that expands to: addi a5,a0,0
    mv a5,a0
    # copies least-significant byte of register a5 onto the stack at
    # address (frame pointer - 17 bytes)
    sb a5,-17(s0)
    # load UART base address, message and message length
    li s1, 0x10000000
    la s2, message
    addi s3, s2, 13

loop:
    lb s4, 0(s2)
    sb s4, 0(s1)
    addi s2, s2, 1
    blt s2, s3, loop

    .cfi_endproc

message:
    # "Hello, bear\n" is 13 characters long
    .string "Hello, bear\n"

SCSI (simple computer system interface) and SATA (serial advanced technology attachment) is a draft about my study on the two protocols: https://hackmd.io/@ztex/SyRjSs5-P

Block device, BIO in linux kernel is a draft about my understanding of the block device and the IO structure:
https://hackmd.io/@ztex/SJaMGhjfD

Linux storage stack and raid is about my study of the raid in linux: https://hackmd.io/@ztex/ryqqaWQED

And then the work belows is a tool to calculate the mapping of data in the raid.

This repository is over here: https://github.com/tony2037/bit-newbie

The code snippet shows How I parse an disk array

iskarray::Diskarray(string md) : mdName(md), minDiskSize(UINT64_MAX)
{
    DIR *dir = NULL;
    struct dirent *ent = NULL;
    uint32_t offset = 0;
    uint64_t disksize = 0;
    int slot = -1;
    char slavesPath[128] = {0};
    char buffer[32] = {0};
    FILE *File = NULL;
    char Path[128] = {0};
    long FileSize = 0;
    vector<Disk> _Disks;
    
    sprintf(slavesPath, SYSFS_MD_SLAVE_FORMAT, md.c_str());
    if ((dir = opendir (slavesPath)) != NULL) {
        /* print all the files and directories within directory */
        while ((ent = readdir (dir)) != NULL) {
            memset(Path, 0, 128);
            memset(buffer, 0, 32);
            sprintf(Path, SYSFS_MD_DISK_OFFSET_FORMAT, md.c_str(), ent->d_name);
            if (parseFile(Path, buffer, 32) < 0) {
                continue;
            }
            sscanf(buffer, "%lu", &offset);

            memset(Path, 0, 128);
            memset(buffer, 0, 32);
            sprintf(Path, SYSFS_MD_DISK_SLOT_FORMAT, md.c_str(), ent->d_name);
            if (parseFile(Path, buffer, 32) < 0) {
                continue;
            }
            sscanf(buffer, "%d", &slot);

            _Disks.push_back(Disk(ent->d_name, offset, slot));

            memset(Path, 0, 128);
            memset(buffer, 0, 32);
            sprintf(Path, SYSFS_MD_DISK_SIZE_FORMAT, md.c_str(), ent->d_name);
            if (parseFile(Path, buffer, 32) < 0) {
                continue;
            }
            sscanf(buffer, "%lu", &disksize);
            disksize = disksize * 1024;
            if (this->minDiskSize > disksize) {
                this->minDiskSize = disksize;
            }
        }
        closedir (dir);
    }
    else {
        perror("Open slaves directory, failed\n");
    }

    this->Disks = vector<Disk>(_Disks);
    for (auto disk : _Disks) {
        this->Disks[disk.slot] = disk;
    }

    memset(Path, 0, 128);
    memset(buffer, 0, 32);
    sprintf(Path, SYSFS_MD_CHUNK_SIZE_FORMAT, md.c_str());
    if (parseFile(Path, buffer, 32) < 0) {
        goto fail;
    }
    sscanf(buffer, "%lu", &this->chunkSize);

    memset(Path, 0, 128);
    memset(buffer, 0, 32);
    sprintf(Path, SYSFS_MD_LEVEL_FORMAT, md.c_str());
    if (parseFile(Path, buffer, 32) < 0) {
        goto fail;
    }
    this->level = buffer;
    this->level.erase(this->level.end() - 1);

    this->parseArraysize();
fail:
    return;
}

Kprobe

Here is a blog I wrote about the kprobe: https://ztex.medium.com/kprobe-筆記-59d4bdb1e1fe

I acutally did use this to track the bio in the raid5 elevator algorithm in the kernel.

Semaphore

This is a note about a bug I fix.

This job involve knowledge about semaphore in the linux.
either its SYSTEMV based, POSIX or its a named one or an unnamed semaphore.

And how I solve this bug with a mechanism in the linux called inotify

the blog is over here: https://ztex.medium.com/how-to-protect-disk-with-linux-semaphore-lock-inotify-and-poll-50c58fbb578e

code is over here: https://github.com/tony2037/spotmerge

base: ? * 12
RSU: 10 * 3500 * 30 share (4 years avg) (5 15 40 40)
site on bonus: 36 (first 2 years)