2021q1 第 15 週測驗題

tags: `linux2021`

目的: 檢驗學員對 Linux 記憶體管理、memfd 和 mmap 系統呼叫的認知

測驗 `1`

在你所不知道的 C 語言：連結器和執行檔資訊提過 ELF 執行檔格式，更多資訊可見 Executable and Linkable Format，以 64 位元 ELF 來說，開頭的幾個位元組的意義:

offset	size	Purpose
0x00	4	0x7F followed by ELF(`45` `4c` `46`) in ASCII; these four bytes constitute the magic number.
0x04	1	This byte is set to either 1 or 2 to signify 32- or 64-bit format, respectively.
0x05	1	This byte is set to either 1 or 2 to signify little or big endianness, respectively. This affects interpretation of multi-byte fields starting with offset `0x10`.
x06	1	Set to 1 for the original and current version of ELF.
…	…	…待續…

以下程式碼嘗試在既有的 ELF 檔案內嵌另一個 ELF 檔案 (可預先加密)，目的是隱匿特定的程式，避免被掃毒程式或防火牆偵測出來，或將高價值的程式嵌入到文件、圖片，甚至是影音檔案中，透過特定的載入器自檔案提取出執行檔並執行，這手法在 Digital rights management (DRM) 和 Digital watermarking 領域不算少見。

假設即將被嵌入的程式碼名為 payload.c:

#include <stdio.h>                          
int main() { puts("Hello world!"); return 0; }

編譯並移去除錯用的符號:

$ gcc -Os payload.c -o payload
$ strip -s payload

接著我們要開發得以載入 ELF 的程式，在這之前，先探討以下函式及系統呼叫:

memfd_create: 詳見解析 Linux 共享記憶體機制一文
memmem: GNU extension，在給定的記憶體範圍找到「非」C-style 字串 (仍為連續記憶體)
fexecve: 類似 execve 系統呼叫，但由給定的 file descriptor 載入程式並執行

假定程式載入器檔名為 loader.c，內容如下:

/* A program that executes a second (embedded) ELF */

#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>

/* No glibc wrappers exist for memfd_create(2), so provide our own. */
#include <sys/syscall.h>
static inline int memfd_create(const char *name, unsigned int flags)
{
    return syscall(__NR_memfd_create, name, flags);
}

/* ELF format
 * https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
 */
static bool valid_elf(char *ptr)
{
    return (ptr[4] == 1 || ptr[4] == 2) /* offset 0x4: 32/64-bit format */ &&
           (ptr[5] == 1 || ptr[5] == 2) /* offset 0x5: endianness */ &&
           (ptr[6] == 1); /* offset 0x6: current version */
}

int main(int argc, char *argv[], char **envp)
{
    int pid = getpid();
    int ret = 0;

    char proc_path[32];
    sprintf(proc_path, "/proc/%d/exe", pid);
    int filedesc = open(proc_path, O_RDONLY);
    if (filedesc < 0) {
        printf("Invalid file descriptor for /proc: %d\n", filedesc);
        return -1;
    }

    /* Find the size of this executable */
    struct stat st;
    stat(proc_path, &st);
    size_t size = st.st_size;

    char *entirefile = malloc(size);
    if (!entirefile) {
        printf("Insufficient memory.\n");
        return -2;
    }

    read(filedesc, entirefile, size);
    close(filedesc);

    /* find the second ELF header, which 52 or 64 bytes long for 32-bit and
     * 64-bit binaries respectively.
     */
    const char elf_magic[] = {0x7F, 'E', 'L', 'F'};
    char *newelf = memmem(entirefile + 52, size - 52, elf_magic, 4);
    if (newelf && !valid_elf(newelf)) /* forcely find again for real ELF */
        newelf = memmem(newelf + 6, size - (intptr_t) newelf - 6, elf_magic, 4);
    if (!newelf || !valid_elf(newelf)) {
        printf("No second ELF header found.\n");
        ret = -3;
        goto cleanup;
    }

    int newsize = AAA;
    int memfd = memfd_create("hidden", 0);
    if (memfd < 0) {
        printf("Invalid memfd.\n");
        ret = -4;
        goto cleanup;
    }

    /* Write ELF to temporary memory file */
    write(memfd, newelf, newsize);

    // Deploy the payload as a different process
    fork();
    if (BBB) {
        ret = fexecve(memfd, argv, envp); /* Execute the in-memory ELF */
        /* The above will only return if there is an error. */
        printf("Fail to execute payload. ret=%d (%s)\n", ret, strerror(errno));
    }

cleanup:
    free(entirefile);
    return ret;
}

編譯、嵌入上述 payload 執行檔，然後再執行: (你沒看錯，真的用 cat 命令)

$ gcc -Wall loader.c -o loader
$ cat payload >> loader
$ ./loader

在 x86_64 GNU/Linux (核心版本: 4.15+) 預期輸出為:

Hello world!

注意：只有一行 "Hello world!" 字串

請補完程式碼，只要考慮 x86_64 硬體架構即可。

作答區 (注意: 複選題，儘量選取有效的答案)

AAA = ?

(a) newelf - entirefile
(b) size - newelf
(c) entirefile - newelf
(d) size - newelf - entirefile
(e) size - newelf + entirefile
(f) newelf - entirefile + size
(g) entirefile - newelf - size

size - (newelf - entirefile)

BBB = ?

(a) getpid()
(b) getpid() != pid
(c) getpid() == pid
(d) 0
(e) 1

延伸問題:

解釋上述程式碼運作原理，指出其中不足處並改進;
參照 Embedding binary data in executables 和 incbin，將 payload 加密並嵌入到給定的 C 程式中，允許在執行時期解密再載入 payload 並執行
學習 Digital rights management (DRM) 手法，實作一個電子書程式，將特定的文字檔案加密再嵌入於執行檔中，只有在特定的機器 (例如偵測 MAC address) 才能開啟閱讀，過程中不會在檔案系統出現明文的文字檔案暫存檔。

測驗 `2`

考慮以下透過 mmap 實作快速檔案複製的程式碼: mmap-filecopy.c

/* copy modified blocks of source file to destination file efficiently
 * using mmap.
 */

#include <assert.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sysexits.h>
#include <unistd.h>

int main(int argc, char *argv[])
{
    if (argc != 3) {
        printf("Usage: %s <source> <destination>\n", argv[0]);
        return EX_USAGE;
    }

    const char *src_name = argv[1];
    const char *dst_name = argv[2];
    int src_fd, dst_fd;
    struct stat dst_stat = {0};
    off_t src_len, dst_len;

    src_fd = open(src_name, O_RDONLY);
    if (src_fd == -1) {
        perror(src_name);
        return EX_DATAERR;
    }

    dst_fd = open(dst_name, O_RDWR | O_CREAT,
                  S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
    if (dst_fd == -1 || fstat(dst_fd, &dst_stat) != 0) {
        perror(dst_name);
        return EX_DATAERR;
    }

    src_len = lseek(src_fd, 0, SEEK_END);
    if (src_len < 0) {
        perror(src_name);
        return EX_DATAERR;
    }

    dst_len = lseek(dst_fd, 0, SEEK_END);
    if (dst_len < 0) {
        perror(dst_name);
        return EX_DATAERR;
    }

    if (dst_len > src_len) {
        printf("Destination file is larger (%zd) than input file (%zd)\n",
               dst_len, src_len);
        return EX_DATAERR;
    }

    const size_t page_size =
        dst_stat.st_blksize > 0 ? dst_stat.st_blksize : BUFSIZ;
    const size_t len = src_len;

    if (ftruncate(dst_fd, len) != 0) {
        perror(dst_name);
        return EX_DATAERR;
    }

    size_t read_count = 0;
    size_t write_count = 0;

    if (len > 0) {
        const uint8_t *src;
        uint8_t *dst;

        src = mmap(NULL, len, PROT_READ, MAP_SHARED, src_fd, 0);
        if (src == NULL ||
            posix_madvise((void *) src, len, POSIX_MADV_SEQUENTIAL) != 0) {
            perror(src_name);
            return EX_UNAVAILABLE;
        }

        dst = mmap(NULL, len, PROP, MAP_SHARED, dst_fd, 0);
        if (dst == NULL ||
            posix_madvise(dst, len, POSIX_MADV_SEQUENTIAL) != 0) {
            perror(dst_name);
            return EX_UNAVAILABLE;
        }

        for (size_t i = 0; i < len; i += page_size) {
            size_t block_size = (len - i) >= page_size ? page_size : (len - i);

            if (memcmp(src + i, dst + i, block_size)) {
                memcpy(dst + i, src + i, block_size);
                write_count += block_size;
            }

            read_count += block_size;
        }

        if (munmap((void *) src, len) != 0) {
            perror(src_name);
            return EX_UNAVAILABLE;
        }

        if (msync(dst, len, MS_SYNC) != 0 || munmap(dst, len) != 0) {
            perror(dst_name);
            return EX_UNAVAILABLE;
        }
    }

    if (close(src_fd) != 0) {
        perror(src_name);
        return EX_UNAVAILABLE;
    }

    if (close(dst_fd) != 0) {
        perror(dst_name);
        return EX_UNAVAILABLE;
    }

    printf("%zu bytes read\n", read_count);
    printf("%zu bytes written\n", write_count);
    return EXIT_SUCCESS;
}

編譯方式:

$ gcc -std=c11 -D_POSIX_C_SOURCE=200809L -o mmap-filecopy mmap-filecopy.c

假設原本已有檔名為 in 的檔案，且 out 不存在目前的路徑，可執行以下命令:

$ ./mmap-filecopy in out

這樣即可達成快速的檔案複製。

請補完程式碼，使得符合預期。

作答區

PROP = ?

(a) PROT_READ | PROT_WRITE
(b) PROT_READ

延伸問題:

解釋上述程式碼運作原理，並指出其缺失
探討 sendfile 和 splice 等系統系統在上述程式的應用
- 參見以 sendfile 和 splice 系統呼叫達到 Zero-Copy

2021q1 第 15 週測驗題

tags: linux2021

測驗 1

測驗 2

Read more

單一指令處理器 (OISC)

從 CPU cache coherence 談 Linux spinlock 可擴展能力議題

淺談 Microkernel 設計和真實世界中的應用

並行程式設計: 概念

tags: `linux2021`

測驗 `1`

測驗 `2`