owned this note
owned this note
Published
Linked with GitHub
# Project 1
### 組員名單
* 110525015 劉松靄
* 111525013 何雋永
* 109502547 楊晴方
### 系統環境
* 作業系統: ubuntu 18.04
* Kernel 版本: 5.4.0-131-generic
### 新增 syscall 過程
由於篇幅較多,因此另外寫了一篇,在[這裡](https://hackmd.io/7x2suD5FRPuoavelhqUP4Q)。
### 其他兩個 project
[project 2](https://hackmd.io/4Ax7oedqSFyVfvgI04PiZQ)
[project 3](https://hackmd.io/TStjhzZwTtqv82RHsfUJVg)
### kernel space code
```
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
int segNum = 6;
int *data[20];
char* segmentName[6] = {"BSS", "Text", "Data", "Heap", "Stack", "Shared library"};
void printVMA(struct task_struct *task){
struct mm_struct *mm = task->mm;
struct vm_area_struct *vma;
int count = 0, index = 0;
printk("\nText Segment start = 0x%lx, end = 0x%lx\n"
"\nData Segment start = 0x%lx, end = 0x%lx\n"
"\nStack Segment start = 0x%lx\n"
"\nHeap Segment start = 0x%lx, end = 0x%lx\n",
mm->start_code, mm->end_code,
mm->start_data, mm->end_data,
mm->start_stack,
mm->start_brk, mm->brk);
for(; index < segNum; index++){
count = 0;
for(vma = mm->mmap; vma; vma = vma->vm_next){
++count;
if(data[index] >= vma->vm_start && data[index] <= vma->vm_end){
printk("\n%s Segment is in vma %d\n", segmentName[index], count);
printk("\nStarts at 0x%lx, Ends at 0x%lx\n", vma->vm_start, vma->vm_end);
}
}
}
}
void convertToPhysical(struct task_struct *task){
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long paddr=0, page_addr=0, page_offset=0;
int index=0;
for(; index<segNum; index++){
pgd = pgd_offset(task->mm, data[index]);
if(pgd_none(*pgd)){
printk("not mapped in pgd\n");
return;
}
p4d = p4d_offset(pgd, data[index]);
if(p4d_none(*p4d)){
printk("not mapped in p4d\n");
return;
}
pud = pud_offset(p4d, data[index]);
if(pud_none(*pud)){
printk("not mapped in pud\n");
return;
}
pmd = pmd_offset(pud, data[index]);
if(pmd_none(*pmd)){
printk("not mapped in pmd\n");
return;
}
pte = pte_offset_kernel(pmd, data[index]);
if(pte_none(*pte)){
printk("not mapped in pte\n");
return;
}
page_addr = pte_val(*pte) & PAGE_MASK;
page_offset = (unsigned long)data[index] & ~PAGE_MASK;
paddr = page_addr | page_offset;
printk("\nphysical address %s Segment is 0x%lx\n", segmentName[index], paddr);
}
}
SYSCALL_DEFINE2(printSegment, int, pid, const int*, userData){
copy_from_user(data, userData, sizeof(data));
printk("\ntask id is %d\n", current->pid);
convertToPhysical(current);
printVMA(current);
return 0;
}
```
### user space code
```
#include <stdio.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <linux/kernel.h>
#include <pthread.h>
#include <unistd.h>
int initData = 1, cnt = 0;
int notInitData;
int *userData[20];
void* t1(){
int a = 1;
int *p = malloc(sizeof(int));
*p = 1;
cnt = 0;
userData[cnt++] = ¬InitData;
userData[cnt++] = t1;
userData[cnt++] = &initData;
userData[cnt++] = p;
userData[cnt++] = &a;
userData[cnt++] = printf;
syscall(336, (int)syscall(SYS_gettid), userData);
pthread_exit(NULL);
}
void* t2(){
int a = 1;
int *p = malloc(sizeof(int));
*p = 1;
cnt = 0;
userData[cnt++] = ¬InitData;
userData[cnt++] = t2;
userData[cnt++] = &initData;
userData[cnt++] = p;
userData[cnt++] = &a;
userData[cnt++] = printf;
syscall(336, (int)syscall(SYS_gettid), userData);
pthread_exit(NULL);
}
int main(){
printf("%d\n", initData);
pthread_t thread1, thread2;
int a = 1;
int *p = malloc(sizeof(int));
*p = 1;
userData[cnt++] = ¬InitData;
userData[cnt++] = main;
userData[cnt++] = &initData;
userData[cnt++] = p;
userData[cnt++] = &a;
userData[cnt++] = printf;
syscall(336, (int)syscall(SYS_gettid), userData);
pthread_create(&thread1, NULL, t1, NULL);
pthread_join(thread1, NULL);
pthread_create(&thread2, NULL, t2, NULL);
pthread_join(thread2, NULL);
// syscall(337);
return 0;
}
```
### kernel 輸出結果
**main thread**
```
[ 236.564583]
task id is 2430
[ 236.564593]
physical address BSS Segment is 0x800000016a9d8040
[ 236.564598]
physical address Text Segment is 0x178bf5b20
[ 236.564601]
physical address Data Segment is 0x800000016a9d8010
[ 236.564604]
physical address Heap Segment is 0x8000000179153670
[ 236.564607]
physical address Stack Segment is 0x800000018cf6604c
[ 236.564610]
physical address Shared library Segment is 0x1105e6e40
[ 236.564613]
Text Segment start = 0x55abd9a00000, end = 0x55abd9a00f18
Data Segment start = 0x55abd9c01d78, end = 0x55abd9c02014
Stack Segment start = 0x7fff1861a150
Heap Segment start = 0x55abda33d000, end = 0x55abda35e000
[ 236.564621]
BSS Segment is in vma 3
[ 236.564623]
Starts at 0x55abd9c02000, Ends at 0x55abd9c03000
[ 236.564628]
Text Segment is in vma 1
[ 236.564630]
Starts at 0x55abd9a00000, Ends at 0x55abd9a01000
[ 236.564633]
Data Segment is in vma 3
[ 236.564635]
Starts at 0x55abd9c02000, Ends at 0x55abd9c03000
[ 236.564638]
Heap Segment is in vma 4
[ 236.564640]
Starts at 0x55abda33d000, Ends at 0x55abda35e000
[ 236.564643]
Stack Segment is in vma 20
[ 236.564646]
Starts at 0x7fff185fb000, Ends at 0x7fff1861c000
[ 236.564648]
Shared library Segment is in vma 5
[ 236.564651]
Starts at 0x7f3bd3400000, Ends at 0x7f3bd35e7000
```
**thread 1**
```
[ 236.564856]
task id is 2431
[ 236.564864]
physical address BSS Segment is 0x800000016a9d8040
[ 236.564869]
physical address Text Segment is 0x178bf585a
[ 236.564873]
physical address Data Segment is 0x800000016a9d8010
[ 236.564875]
physical address Heap Segment is 0x8000000178ffab20
[ 236.564878]
physical address Stack Segment is 0x8000000179162edc
[ 236.564882]
physical address Shared library Segment is 0x1105e6e40
[ 236.564885]
Text Segment start = 0x55abd9a00000, end = 0x55abd9a00f18
Data Segment start = 0x55abd9c01d78, end = 0x55abd9c02014
Stack Segment start = 0x7fff1861a150
Heap Segment start = 0x55abda33d000, end = 0x55abda35e000
[ 236.564892]
BSS Segment is in vma 3
[ 236.564896]
Starts at 0x55abd9c02000, Ends at 0x55abd9c03000
[ 236.564901]
Text Segment is in vma 1
[ 236.564904]
Starts at 0x55abd9a00000, Ends at 0x55abd9a01000
[ 236.564907]
Data Segment is in vma 3
[ 236.564910]
Starts at 0x55abd9c02000, Ends at 0x55abd9c03000
[ 236.564913]
Heap Segment is in vma 5
[ 236.564915]
Starts at 0x7f3bcc000000, Ends at 0x7f3bcc021000
[ 236.564919]
Stack Segment is in vma 8
[ 236.564922]
Starts at 0x7f3bd2c00000, Ends at 0x7f3bd3400000
[ 236.564925]
Shared library Segment is in vma 9
[ 236.564928]
Starts at 0x7f3bd3400000, Ends at 0x7f3bd35e7000
```
**thread 2**
```
[ 236.565424]
task id is 2432
[ 236.565432]
physical address BSS Segment is 0x800000016a9d8040
[ 236.565437]
physical address Text Segment is 0x178bf59bd
[ 236.565440]
physical address Data Segment is 0x800000016a9d8010
[ 236.565443]
physical address Heap Segment is 0x8000000179ae8220
[ 236.565445]
physical address Stack Segment is 0x8000000179162edc
[ 236.565448]
physical address Shared library Segment is 0x1105e6e40
[ 236.565451]
Text Segment start = 0x55abd9a00000, end = 0x55abd9a00f18
Data Segment start = 0x55abd9c01d78, end = 0x55abd9c02014
Stack Segment start = 0x7fff1861a150
Heap Segment start = 0x55abda33d000, end = 0x55abda35e000
[ 236.565459]
BSS Segment is in vma 3
[ 236.565462]
Starts at 0x55abd9c02000, Ends at 0x55abd9c03000
[ 236.565467]
Text Segment is in vma 1
[ 236.565469]
Starts at 0x55abd9a00000, Ends at 0x55abd9a01000
[ 236.565472]
Data Segment is in vma 3
[ 236.565475]
Starts at 0x55abd9c02000, Ends at 0x55abd9c03000
[ 236.565478]
Heap Segment is in vma 5
[ 236.565480]
Starts at 0x7f3bcc000000, Ends at 0x7f3bcc021000
[ 236.565483]
Stack Segment is in vma 12
[ 236.565485]
Starts at 0x7f3bd2c00000, Ends at 0x7f3bd3400000
[ 236.565488]
Shared library Segment is in vma 13
[ 236.565491]
Starts at 0x7f3bd3400000, Ends at 0x7f3bd35e7000
```
### 記憶體 layout
![](https://i.imgur.com/ug44NIh.png)
### 補充的知識點
#### 1. pgdir_shift, pgd_offset
```
#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address)
{
return (pgd + pgd_index(address));
};
#define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
```
![](https://i.imgur.com/5ucWfFJ.png)
![](https://i.imgur.com/VBjDlT3.png)
#### 2. syscall_define 做了哪些事
```
#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
```
```
#define SYSCALL_DEFINEx(x, sname, ...) \
SYSCALL_METADATA(sname, x, __VA_ARGS__) \
__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
SYSCALL_METADATA 有點看不懂,而且覺得不是重點,所以先略過
```
```
#define __SYSCALL_DEFINEx(x, name, ...) \
__diag_push(); \
__diag_ignore(GCC, 8, "-Wattribute-alias", \
"Type aliasing is used to sanitize syscall arguments");\
asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(__se_sys##name)))); \
ALLOW_ERROR_INJECTION(sys##name, ERRNO); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\
__MAP(x,__SC_TEST,__VA_ARGS__); \
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
return ret; \
} \
__diag_pop(); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
```
```
asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(__se_sys##name))));
例子 : asmlinkage long sys##_hello(__MAP(1, __SC_DECL, int, a))
__MAP(1, __SC_DECL, int, a)
=> __MAP1(__SC_DECL, int, a)
=> __SC_DECL(int, a)
=> int a
asmlinkage long sys##_hello(__MAP(1, __SC_DECL, int, a))
=> asmlinkage long sys_hello(int, a)
## 為連接符號
alias 讓它等價於 __se_sys_hello
```
```
asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));
跟上面的 asmlinkage long sys##name 轉換類似
唯一的差別是 __SC_LONG
#define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a
在 32 bit 系統上比較有意義,因為 64 bit 中 LL 與 L 都是 8 bytes
asmlinkage long __se_sys##_hello(__MAP(1,__SC_LONG,int, a));
=> asmlinkage long __se_sys_hello(long a);
重點 : 將輸入擴展成 64 bit
```
```
asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
__MAP(x,__SC_TEST,__VA_ARGS__); \
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
return ret; \
}
#define __SC_CAST(t, a) (__force t) a
直接把它當作強制型態轉換就好
=> __SC_CAST(int, a) == (int) a
long ret = __do_sys##_hello(__MAP(1,__SC_CAST, int, a));
=> asmlinkage long __do_sys_hello((int) a);
```
總而言之,就是將輸入強制轉換到 long,然後再轉回 int,也就是 syscall 一開始傳入的型別。
至於為甚麼要多做這些事,跟 [CVE-2009-2009](https://nvd.nist.gov/vuln/detail/CVE-2009-2009) 有關。
#### 3. current
current 定義在 <asm/current.h> 中
```
#ifndef _ASM_X86_CURRENT_H
#define _ASM_X86_CURRENT_H
#include <linux/compiler.h>
#include <asm/percpu.h>
#ifndef __ASSEMBLY__
struct task_struct;
DECLARE_PER_CPU(struct task_struct *, current_task);
static __always_inline struct task_struct *get_current(void)
{
return this_cpu_read_stable(current_task);
}
#define current get_current()
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_CURRENT_H */
```
可以看到 currrent 變數是一個指向 struct task_struct 的指標
### reference
[https://www.jabperf.com/5-level-vs-4-level-page-tables-does-it-matter/](https://www.jabperf.com/5-level-vs-4-level-page-tables-does-it-matter/)
[https://www.cnblogs.com/muahao/p/10297852.html](https://www.cnblogs.com/muahao/p/10297852.html)
[https://elixir.bootlin.com/linux/v6.0.6/](https://elixir.bootlin.com/linux/v6.0.6/)