## 簡述
> 這份筆記是 [C2Rust for Linux Kernel](/IR2_2mL7QTe_riQJzbPl-g) 的一部份
這份筆記記錄了使用 C2Rust 轉換 SeKVM 中 `BootAux.c` 程式碼的步驟、過程與結果。
轉換 SeKVM 有以下困難點:
1. SeKVM 使用 Linux 5.15,此時核心沒有 RFL,還不支援 Rust 編譯參數。
2. SeKVM 的程式碼較底層,不是 C2Rust 擅長的領域,因此我們必須從小段程式碼開始轉換起。
我分三個步驟轉換程式碼:
1. 只轉換函式簽章
2. 加入轉換變數宣告與定義
3. 加入轉換函式呼叫
從實驗的結果發現:
1. C2Rust 產生的錯誤大部分來自於核心的標頭而不是 `hypsec.h` 中直接定義的內容,這表示 C2Rust 無法處理的程式碼大部分都存在於核心標頭中。
2. C2Rust 會先對 C 程式碼進行預處理,其中的 `#ifdef` 不會被保留在 Rust 程式碼中,從第三階段的結果可以看到 `BootAux.c` 中原本有兩種程式碼,但是轉換後只剩一種。
## 準備
我們先著重於成功轉換程式碼,先不管是否能夠執行於核心中。我們先從比較簡單的程式碼開始著手,我選擇的是 `BootAux.c`,程式碼較少而且只依賴於 `hypsec.h` 標頭檔。這兩個檔案我們各複製一份作為修改用,檔案名稱為 `BootAuxToy.c` 與 `hypsecToy.h`,修改 `BootAuxToy.c` 中的標頭名稱並在 `Makefile` 加入 `BootAuxToy.o` 就可以開始了。
> [!Tip] 自動轉譯腳本
> ```sh
> rm arch/arm64/sekvm/BootAuxToy.o
> rm arch/arm64/sekvm/BootAuxToy.rs
> intercept-build make LLVM=1 arch/arm64/sekvm/BootAuxToy.o && \
> c2rust transpile compile_commands.json
> ```
## 第一階段:只轉換函式簽章
我們把函式本體挖空,只留下函式簽章。目前 `BootAuxToy.c` 需要的定義是 `__hyp_text` 與 `u32/u64`,我們將前者從複雜的 `<asm/kvm_hyp.h>` 中抽取出來直接放在 `hypsecToy.h` 中,而後者已經在 `hypsec.h` 中定義了,我們保留它即可。
如果我們直接 `#include <asm/kvm_hyp.h>` 會導致轉換失敗,這意味著核心標頭檔中有 C2Rust 無法處理的程式碼。
> [!Note] 什麼原因導致轉換失敗?
> 延伸主題:[C2Rust 無法處理的核心標頭檔](/SDMcAZuSQSCo8gihRnnLug)
```c=
#include "hypsecToy.h"
/*
* BootAux
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void __hyp_text unmap_and_load_vm_image(u32 vmid, u64 target_addr, u64 remap_addr, u64 num)
{
}
#else
void __hyp_text unmap_and_load_vm_image(u32 vmid, u64 target_addr, u64 remap_addr, u64 num)
{
}
#endif
```
```c=
#ifndef HYPSEC_HYPSEC_H
#define HYPSEC_HYPSEC_H
// #include <asm/kvm_hyp.h>
#define __hyp_text __section(".hyp.text") notrace
typedef unsigned long long u64;
typedef unsigned u32;
typedef u64 phys_addr_t;
#endif //HYPSEC_HYPSEC_H
```
轉換過後得到以下程式碼,值得注意的是:
1. 使用 `typedef` 為 C 的資料型別定義別名可以減少 Rust 程式碼中 `libc:c_type` 的數量,C2Rust 會使用 `pub type` 代替 `typedef`。
2. 函式簽章中的 `__section(".hyp.text") notrace` 沒有反應在 Rust 程式碼中。
```rust=
#![allow(
dead_code,
mutable_transmutes,
non_camel_case_types,
non_snake_case,
non_upper_case_globals,
unused_assignments,
unused_mut
)]
pub type u64_0 = libc::c_ulonglong;
pub type u32_0 = libc::c_uint;
#[no_mangle]
pub unsafe extern "C" fn unmap_and_load_vm_image(
mut vmid: u32_0,
mut target_addr: u64_0,
mut remap_addr: u64_0,
mut num: u64_0,
) {}
```
## 第二階段:簡單變數宣告與定義
這個階段主要是繼續把定義逐漸從標頭中拉出來,需要用到的定義有 `PMD_SIZE` 與 `PAGE_SIZE`,這次我們直接在 `hypsecToy.h` 中加入 `#include <asm/kernel-pgtable.h>` 即可。
雖然直接轉譯的話 C2Rust 會產生大量警告,但是如此複雜的標頭可以直接轉譯成功讓我感到驚訝。原因可能是因為這個標頭都是 `#define`,沒有函式簽章,所以比較容易轉換。
```c=
#include "hypsecToy.h"
/*
* BootAux
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void __hyp_text unmap_and_load_vm_image(u32 vmid, u64 target_addr, u64 remap_addr, u64 num)
{
u64 gfn, pte, pa, pfn, start, end, mb_num;
start = target_addr / PMD_SIZE * PMD_SIZE;
end = target_addr + num * PAGE_SIZE;
mb_num = (end - start + (PMD_SIZE - 1)) / PMD_SIZE;
}
#else
void __hyp_text unmap_and_load_vm_image(u32 vmid, u64 target_addr, u64 remap_addr, u64 num)
{
u64 gfn, pte, pa, pfn, start, end, mb_num;
start = target_addr;
end = target_addr + num * PAGE_SIZE;
mb_num = (end - start) / PAGE_SIZE;
}
#endif
```
```c=
#ifndef HYPSEC_HYPSEC_H
#define HYPSEC_HYPSEC_H
// #include <asm/kvm_hyp.h>
#define __hyp_text __section(".hyp.text") notrace
#include <asm/kernel-pgtable.h>
typedef unsigned long long u64;
typedef unsigned u32;
typedef u64 phys_addr_t;
#endif //HYPSEC_HYPSEC_H
```
轉換過後得到以下程式碼,值得注意的是:
1. 使用 `.wrapping_add()` 來運算兩個不同的資料型態。
```rust=
#![allow(
dead_code,
mutable_transmutes,
non_camel_case_types,
non_snake_case,
non_upper_case_globals,
unused_assignments,
unused_mut
)]
pub type __u32 = libc::c_uint;
pub type __u64 = libc::c_ulonglong;
pub type u32_0 = __u32;
pub type u64_0 = __u64;
#[no_mangle]
pub unsafe extern "C" fn unmap_and_load_vm_image(
mut vmid: u32_0,
mut target_addr: u64_0,
mut remap_addr: u64_0,
mut num: u64_0,
) {
let mut gfn: u64_0 = 0;
let mut pte: u64_0 = 0;
let mut pa: u64_0 = 0;
let mut pfn: u64_0 = 0;
let mut start: u64_0 = 0;
let mut end: u64_0 = 0;
let mut mb_num: u64_0 = 0;
start = target_addr
/ ((1 as libc::c_ulong)
<< (12 as libc::c_int - 3 as libc::c_int)
* (4 as libc::c_int - 2 as libc::c_int) + 3 as libc::c_int) as u64_0
* ((1 as libc::c_ulong)
<< (12 as libc::c_int - 3 as libc::c_int)
* (4 as libc::c_int - 2 as libc::c_int) + 3 as libc::c_int) as u64_0;
end = target_addr
.wrapping_add(num * ((1 as libc::c_ulong) << 12 as libc::c_int) as u64_0);
mb_num = end
.wrapping_sub(start)
.wrapping_add(
((1 as libc::c_ulong)
<< (12 as libc::c_int - 3 as libc::c_int)
* (4 as libc::c_int - 2 as libc::c_int) + 3 as libc::c_int)
.wrapping_sub(1 as libc::c_int as libc::c_ulong) as u64_0,
)
/ ((1 as libc::c_ulong)
<< (12 as libc::c_int - 3 as libc::c_int)
* (4 as libc::c_int - 2 as libc::c_int) + 3 as libc::c_int) as u64_0;
}
```
## 第三階段:完整轉換
最後一階段完整轉換了 `BootAux.c`,這階段做的事和第二階段相同,不過這次引入更多的是巨集與函式,其中也包括了 `hypsec.h` 中定義的函式簽章。這次的發現是 C2Rust 產生的錯誤大部分來自於核心的標頭而不是 `hypsec.h` 中直接定義的內容,這給出了一個可能的方向:C2Rust 無法處理的程式碼大部分都存在於核心標頭中。
```c=
#include "hypsecToy.h"
/*
* BootAux
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void __hyp_text unmap_and_load_vm_image(u32 vmid, u64 target_addr, u64 remap_addr, u64 num)
{
u64 gfn, pte, pa, pfn, start, end, mb_num;
start = target_addr / PMD_SIZE * PMD_SIZE;
end = target_addr + num * PAGE_SIZE;
mb_num = (end - start + (PMD_SIZE - 1)) / PMD_SIZE;
while (mb_num > 0UL)
{
pte = walk_s2pt(COREVISOR, remap_addr);
pa = phys_page(pte);
pfn = phys_page(pte) / PMD_SIZE * PTRS_PER_PMD;
gfn = start / PAGE_SIZE;
if (pfn == 0UL)
{
v_panic();
}
else
{
prot_and_map_vm_s2pt(vmid, gfn * PAGE_SIZE, pfn * PAGE_SIZE, 2U);
}
start += PMD_SIZE;
remap_addr = remap_addr + (start - target_addr);
target_addr = start;
mb_num--;
}
}
#else
void __hyp_text unmap_and_load_vm_image(u32 vmid, u64 target_addr, u64 remap_addr, u64 num)
{
u64 gfn, pte, pa, pfn, start, end, mb_num;
start = target_addr;
end = target_addr + num * PAGE_SIZE;
mb_num = (end - start) / PAGE_SIZE;
while (mb_num > 0UL)
{
pte = walk_s2pt(COREVISOR, remap_addr);
pa = phys_page(pte);
pfn = phys_page(pte) / PAGE_SIZE;
gfn = start / PAGE_SIZE;
if (pfn == 0UL)
{
v_panic();
}
else
{
prot_and_map_vm_s2pt(vmid, gfn * PAGE_SIZE, pfn * PAGE_SIZE, 3U);
}
start += PAGE_SIZE;
remap_addr = remap_addr + (start - target_addr);
target_addr = start;
mb_num--;
}
}
#endif
```
```c=
#ifndef HYPSEC_HYPSEC_H
#define HYPSEC_HYPSEC_H
// #include <asm/kvm_hyp.h>
#define __hyp_text __section(".hyp.text") notrace
#include <asm/kernel-pgtable.h>
// #include <asm/hypsec_host.h>
#define EL2_MAX_VMID 65 // <asm/hypsec_host.h> -> <asm/hypsec_boot.h>
#include <asm/hypsec_constant.h> // <asm/hypsec_host.h> -> <asm/hypsec_constant.h>
typedef unsigned long long u64;
typedef unsigned u32;
typedef u64 phys_addr_t;
// /*
// * AbstractMachine
// */
void v_panic(void);
// /*
// * NPTOps
// */
u64 walk_s2pt(u32 vmid, u64 addr);
// /*
// * MemoryOps
// */
void prot_and_map_vm_s2pt(u32 vmid, u64 addr, u64 pte, u32 level);
```
轉換過後得到以下程式碼,值得注意的是:
1. 轉換過後的程式碼很單純,並沒有複雜的邏輯。
2. 轉換的複雜度來自於核心標頭。
```rust=
#![allow(
dead_code,
mutable_transmutes,
non_camel_case_types,
non_snake_case,
non_upper_case_globals,
unused_assignments,
unused_mut
)]
extern "C" {
fn v_panic();
fn walk_s2pt(vmid: u32_0, addr: u64_0) -> u64_0;
fn prot_and_map_vm_s2pt(vmid: u32_0, addr: u64_0, pte: u64_0, level: u32_0);
}
pub type __u32 = libc::c_uint;
pub type __u64 = libc::c_ulonglong;
pub type u32_0 = __u32;
pub type u64_0 = __u64;
#[no_mangle]
pub unsafe extern "C" fn unmap_and_load_vm_image(
mut vmid: u32_0,
mut target_addr: u64_0,
mut remap_addr: u64_0,
mut num: u64_0,
) {
let mut gfn: u64_0 = 0;
let mut pte: u64_0 = 0;
let mut pa: u64_0 = 0;
let mut pfn: u64_0 = 0;
let mut start: u64_0 = 0;
let mut end: u64_0 = 0;
let mut mb_num: u64_0 = 0;
start = target_addr
/ ((1 as libc::c_ulong)
<< (12 as libc::c_int - 3 as libc::c_int)
* (4 as libc::c_int - 2 as libc::c_int) + 3 as libc::c_int) as u64_0
* ((1 as libc::c_ulong)
<< (12 as libc::c_int - 3 as libc::c_int)
* (4 as libc::c_int - 2 as libc::c_int) + 3 as libc::c_int) as u64_0;
end = target_addr
.wrapping_add(num * ((1 as libc::c_ulong) << 12 as libc::c_int) as u64_0);
mb_num = end
.wrapping_sub(start)
.wrapping_add(
((1 as libc::c_ulong)
<< (12 as libc::c_int - 3 as libc::c_int)
* (4 as libc::c_int - 2 as libc::c_int) + 3 as libc::c_int)
.wrapping_sub(1 as libc::c_int as libc::c_ulong) as u64_0,
)
/ ((1 as libc::c_ulong)
<< (12 as libc::c_int - 3 as libc::c_int)
* (4 as libc::c_int - 2 as libc::c_int) + 3 as libc::c_int) as u64_0;
while mb_num > 0 as libc::c_ulong as u64_0 {
pte = walk_s2pt(65 as libc::c_int as u32_0, remap_addr);
pa = pte
& ((1 as libc::c_ulong) << 48 as libc::c_int)
.wrapping_sub(1 as libc::c_int as libc::c_ulong) as u64_0
& !((1 as libc::c_ulong) << 12 as libc::c_int)
.wrapping_sub(1 as libc::c_int as libc::c_ulong) as u64_0;
pfn = (pte
& ((1 as libc::c_ulong) << 48 as libc::c_int)
.wrapping_sub(1 as libc::c_int as libc::c_ulong) as u64_0
& !((1 as libc::c_ulong) << 12 as libc::c_int)
.wrapping_sub(1 as libc::c_int as libc::c_ulong) as u64_0)
/ ((1 as libc::c_ulong)
<< (12 as libc::c_int - 3 as libc::c_int)
* (4 as libc::c_int - 2 as libc::c_int) + 3 as libc::c_int) as u64_0
* ((1 as libc::c_int) << 12 as libc::c_int - 3 as libc::c_int) as u64_0;
gfn = start / ((1 as libc::c_ulong) << 12 as libc::c_int) as u64_0;
if pfn == 0 as libc::c_ulong as u64_0 {
v_panic();
} else {
prot_and_map_vm_s2pt(
vmid,
gfn * ((1 as libc::c_ulong) << 12 as libc::c_int) as u64_0,
pfn * ((1 as libc::c_ulong) << 12 as libc::c_int) as u64_0,
2 as libc::c_uint,
);
}
start = start
.wrapping_add(
((1 as libc::c_ulong)
<< (12 as libc::c_int - 3 as libc::c_int)
* (4 as libc::c_int - 2 as libc::c_int) + 3 as libc::c_int)
as u64_0,
);
remap_addr = remap_addr.wrapping_add(start.wrapping_sub(target_addr));
target_addr = start;
mb_num = mb_num.wrapping_sub(1);
mb_num;
}
}
```
## 後記:`hypsec.h` 中有 C2Rust 無法處理的程式碼嗎?
既然我們透過以上實驗知道核心標頭中有 C2Rust 無法處理的程式碼,很自然地一個問題就會浮現在腦海中:
> `hypsec.h` 直接定義的內容中,有 C2Rust 無法處理的程式碼嗎?
為了檢驗這個論述是否為真,我將 `hypsec.h` 中不需要其他核心標頭的部分加入 `hypsecToy.h` 並測試是否可以編譯。`hypsec.h` 直接定義的內容中有以下:
- 函式實作:大部分都會用到核心標頭,因此沒有加入 `hypsecToy.h`。
- `#define`:都沒有用到核心標頭,全數加入 `hypsecToy.h`。
- 函式簽章:定義於 SeKVM 其他檔案的函式,全數加入 `hypsecToy.h`。
結果發現可以直接編譯,因此可以推測 C2Rust 無法處理的程式碼大多來自於核心標頭。