# 2017q3 Homework3 (simulator) contributed by <`ChiuYiTang`> [GitHub](https://github.com/ChiuYiTang/full-stack-hello) ## 開發環境 ``` Ubuntu 16.04.5 Linux 4.4.0-96-generic gcc version 5.4.0 20160609 CPU family: 6 Model: 94 Model name: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 8192K ``` ## 編譯器和最佳化原理 * 編譯器設計流程 * [How A Compiler Works: GNU Toolchain](https://www.slideshare.net/jserv/how-a-compiler-works-gnu-toolchain) ## 原始碼解析 ### driver.c * 操縱其他函式提供需求 * `vm`:虛擬機器的架構與運作。 * `opcode`:full-stack-hello ISA 與相關實作。 * `as`:組譯器。 * `elf`:產生 elf 格式之目的檔。 * 透過`driver.c`操作三種模式。 ```clike= switch (req) { case ASSEMBLE_AND_EVAL: { vm_env *env = vm_new(); assemble_from_fd(env, in_fd); hook_opcodes(env); vm_run(env); vm_free(env); break; } case ASSEMBLE_AND_WRITE_ELF: { int len; vm_env *env = vm_new(); assemble_from_fd(env, in_fd); len = write_to_elf(env, out_fd); vm_free(env); if (len < 0) FATAL(-1, "write ELF file %s failed (%s)\n", out_file, strerror(errno)); break; } case LOAD_ELF_AND_EVAL: { vm_env *env = vm_new(); load_from_elf(env, in_fd); hook_opcodes(env); vm_run(env); vm_free(env); break; } default: FATAL(-1, "unknown request: %d\n", req); break; } ``` * ASSEMBLE_AND_EVAL:組譯和執行 * ASSEMBLE_AND_WRITE_ELF:組譯並輸出 ELF 到給定的路徑 * LOAD_ELF_AND_EVAL:載入之前組譯得到的 ELF 檔案並執行 ### vm.c * 透過`vm_regs`與`__vm_env`觀察整體架構。 ```clike= typedef struct { size_t pc; // program counter. size_t sp; // stack runs from the end of 'temps' region. size_t from; // the immediate PC before last branch/return. size_t to; // the immediate PC after last branch/return. } vm_regs; struct __vm_env { vm_inst insts[INSTS_MAX_SIZE]; /* Program instructions */ vm_value cpool[CPOOL_MAX_SIZE]; /* Constant pool */ vm_value temps[TEMPS_MAX_SIZE]; /* Temporary storage */ vm_opcode_impl impl[OPCODE_IMPL_MAX_SIZE]; /* OPCODE impl */ vm_regs r; int insts_count; int cpool_count; int temps_count; }; ``` * `vm_new()`建立虛擬機器空間。 * `vm_free()`刪除空間。 * `vm_add_inst`用於組譯器 $ 操作,以建立 constant。 ```clike= switch (data[0]) { case '$': op.type = CONST; int temp = atoi(data + 1); op.value.id = vm_add_const(env, INT, &temp); break; ... case '\'': op.type = CONST; op.value.id = vm_add_const(env, STR, quoted_strdup(data)); break; ... } return op; } ``` * `vm_run()`透過一系列 macro 與 computed goto 之機制將 label 與實作相對應。 ```clkie= void vm_run(vm_env *env) { BEGIN_OPCODES; OP(ADD) : VM_CALL_HANDLER(); OP(SUB) : VM_CALL_HANDLER(); OP(MUL) : VM_CALL_HANDLER(); OP(DIV) : VM_CALL_HANDLER(); OP(MOD) : VM_CALL_HANDLER(); OP(AND) : VM_CALL_HANDLER(); OP(OR) : VM_CALL_HANDLER(); OP(NOT) : VM_CALL_HANDLER(); OP(XOR) : VM_CALL_HANDLER(); OP(LSL) : VM_CALL_HANDLER(); OP(LSR) : VM_CALL_HANDLER(); OP(ASR) : VM_CALL_HANDLER(); OP(PRINT) : VM_CALL_HANDLER(); OP(JLT) : VM_JLT(); OP(JLE) : VM_JLE(); OP(JZ) : VM_JZ(); OP(JGE) : VM_JGE(); OP(JGT) : VM_JGT(); OP(JNZ) : VM_JNZ(); OP(JMP) : GOTO(OPCODE.op1.value.id); OP(CALL) : VM_CALL(OPCODE.op1.value.id); OP(RET) : VM_RET(); OP(HALT) : goto terminate; END_OPCODES; terminate: return; } ``` 其中: * `BEGIN_OPCODES` ```clike= #define BEGIN_OPCODES \ const static void *labels[] = {OP_LABELS}; \ goto *labels[OPCODE] ``` * * 藉由 macro`OP(name)`所形成之 label 存入 op label set 中。 * 並透過 computed goto 跳轉至第一個指令之 op label address。 * `VM_CALL_HANDLER` `VM_J#` `VM_RET`接續執行`OPCODE_IMPL`對應之實作。 ```clike= #define VM_CALL_HANDLER() \ do { \ if (OPCODE_IMPL(OPCODE).handler) \ OPCODE_IMPL(OPCODE).handler( \ vm_get_op_value(env, &OPCODE.op1), \ vm_get_op_value(env, &OPCODE.op2), \ vm_get_temp_value(env, OPCODE.result)); \ DISPATCH; \ } while (0) ``` * `DISPATCH`將 pc 下移並執行對應 label 之實作。 ```clike= #define DISPATCH \ do { \ ++env->r.pc; \ goto *labels[OPCODE.opcode]; \ } while (0) ``` * `GOTO`跳轉至特定 address 並紀錄跳轉前後所在地址。 ```clike= #define GOTO(n) \ do { \ env->r.from = env->r.pc; \ env->r.to = n; \ env->r.pc = n; \ goto *labels[OPCODE.opcode]; \ } while (0) ``` * 參考 [Computed goto for efficient dispatch tables](https://eli.thegreenplace.net/2012/07/12/computed-goto-for-efficient-dispatch-tables),發現此部份利用 GCC computed goto 取代 switch case 來增進整體效能。 ### as.c * `assemble_from_fd`開啟特定檔案 (.s) 並組譯。 * `assemble_line` * Lexical Analyzer 處理成 token stream * 分析語法及語意 * 逐行透過`quoted_strsep`處理空字符與符號。 * 並透過`make_operand`建立變數`$` `\`與常數`#`類型標示。 * `write_to_elf` `load_from_elf`可寫入/讀取 ELF 格式之目的檔。 * 可透過輸入`file`了解 ELF 目的檔之內容。 ``` joe@joe-LinuxMint ~/workspace/full-stack-hello $ file tests/halt.o tests/halt.o: ELF 64-bit LSB executable, no machine, version 1 (SYSV), statically linked, interpreter *empty*, corrupted section header size ``` ### opcode.c * 定義 opcode 之多項實作。 * `hook_opcodes`透過`vm_hook_opcode_handler`將 opcode 連結在一起。 ```clike= void vm_hook_opcode_handler(vm_env *env, int opcode, vm_handler handler) { env->impl[opcode].opcode = opcode; env->impl[opcode].handler = handler; } ``` * 一系列實作皆對應至`opcode.h`。 ### opcode.h * 透過`scripts/gen_opcode.py`並輸`opcode.def`資料所產生。 ### gen_opcode.py 與自訂指令集 (opcode.def) * 觀察`opcode.h`發現其作為 CSV file(delimiter=',', quotechar='"'),接著藉`scripts/gen_opcode.py`處理。 * 透過`f.write()`與迭代產生相對應於`opcode.def`之`opcode.h`。 #### gen_opcode.py ```python= ... enum {\n''') # Write opcode list f.write(',\n'.join([' OP_{} = {}'.format(op.name, op.code) for op in opcodes])) f.write('\n};\n\n') # Write opcode labels f.write('#define OP_LABELS ') for index, op in enumerate(opcodes): f.write('&&OP_{}, '.format(op.name)) if index and index % 4 == 0: f.write('\\\n') if op != opcodes[-1]: f.write(' ') f.write('\n\n') ... f.write('static const struct instruction instrs[] = {\n') f.write(',\n'.join([' {}'.format(op) for op in opcodes])) f.write(',\n {NULL, 0}\n};') # Write helper function(s) f.write('\n\nvoid hook_opcodes(vm_env *);') f.write('\n\n#endif /* OPCODE_H */\n') ``` #### 對應之`opcode.h` ```clike= ... enum { OP_HALT = 0, OP_ADD = 1, OP_SUB = 2, OP_PRINT = 3, OP_JLT = 4, OP_JLE = 5, OP_JZ = 6, OP_JGE = 7, OP_JGT = 8, OP_JNZ = 9, OP_JMP = 10, OP_MUL = 11, OP_DIV = 12, OP_MOD = 13, OP_CALL = 14, OP_RET = 15, OP_AND = 16, OP_OR = 17, OP_NOT = 18, OP_XOR = 19, OP_LSL = 20, OP_LSR = 21, OP_ASR = 22 }; #define OP_LABELS &&OP_HALT, &&OP_ADD, &&OP_SUB, &&OP_PRINT, &&OP_JLT, \ &&OP_JLE, &&OP_JZ, &&OP_JGE, &&OP_JGT, \ &&OP_JNZ, &&OP_JMP, &&OP_MUL, &&OP_DIV, \ &&OP_MOD, &&OP_CALL, &&OP_RET, &&OP_AND, \ &&OP_OR, &&OP_NOT, &&OP_XOR, &&OP_LSL, \ &&OP_LSR, &&OP_ASR, ... static const struct instruction instrs[] = { {"halt", OP_HALT, 0, 0, 0}, {"add", OP_ADD, 1, 1, 1}, {"sub", OP_SUB, 1, 1, 1}, {"print", OP_PRINT, 1, 0, 0}, {"jlt", OP_JLT, 1, 1, 0}, {"jle", OP_JLE, 1, 1, 0}, {"jz", OP_JZ, 1, 1, 0}, {"jge", OP_JGE, 1, 1, 0}, {"jgt", OP_JGT, 1, 1, 0}, {"jnz", OP_JNZ, 1, 1, 0}, {"jmp", OP_JMP, 1, 0, 0}, {"mul", OP_MUL, 1, 1, 1}, {"div", OP_DIV, 1, 1, 1}, {"mod", OP_MOD, 1, 1, 1}, {"call", OP_CALL, 1, 0, 0}, {"ret", OP_RET, 0, 0, 0}, {"and", OP_AND, 1, 1, 1}, {"or", OP_OR, 1, 1, 1}, {"not", OP_NOT, 1, 1, 0}, {"xor", OP_XOR, 1, 1, 1}, {"lsl", OP_LSL, 1, 1, 1}, {"lsr", OP_LSR, 1, 1, 1}, {"asr", OP_ASR, 1, 1, 1}, {NULL, 0} }; ``` * 藉此方式如有任何修改自訂指令集之情事,皆可於`opcode.def`新增後在於`opcode.c`實作相對應行為即可。 ## Run time 使用者帶入參數 * 參考強者同學 [st9007a](https://hackmd.io/s/Hkf6pkPAb) * 透過使用者輸入參數,首先建立 temp 來儲存使用者輸入字串檢測部份。 ```clike= int main(int argc, char **argv) { int vm_init_var = 0; int vm_pos = 0; ... } else if (!strcmp(argv[i], "--input")) { if (!argv[i + 1]) FATAL(-1, "Missing an integer as --input argument, see -h\n"); vm_init_var = atoi(argv[++i]); ``` * 事先初始化欲帶入之數值及位置 * 透過字串檢測`--input`,轉為 int 存入 `vm_init_var` * 觀察 variable 儲存於`temps[]`中,在不影響處理`VM_CALL`之`env->temp[]`儲存 function call 前後地址之存放的情況下,將`temps[0]`作為存放`--input argument`的位置。 #### vm.c ```clike= inline void vm_set_var(vm_env *env, int pos, int n) { env->temps[pos].type = INT; env->temps[pos].value.vint = n; } ``` #### vm.h ```clike= void vm_set_var(vm_env *env, int pos, int n); ``` * 於`driver.c`增加`temps[]`中 input varible 初始化。 ```clike= switch (req) { case ASSEMBLE_AND_EVAL: { vm_env *env = vm_new(); assemble_from_fd(env, in_fd); vm_set_var(env, vm_pos, vm_init_var); ... case ASSEMBLE_AND_WRITE_ELF: { int len; vm_env *env = vm_new(); assemble_from_fd(env, in_fd); vm_set_var(env, vm_pos, vm_init_var); ... case LOAD_ELF_AND_EVAL: { vm_env *env = vm_new(); vm_set_var(env, vm_pos, vm_init_var); ``` * 接著修正 Makefile Fibonacci 部份。 ```clike= fib: $(EXEC) @./$(EXEC) --input $(TEMP0) $(IN_FILE) ``` ## Fibonacci ### Iterative #### 原始碼 ```clike= int fib(int n) { int pre = -1; int i = n; n = 1; int sum = 0; while (i > 0) { i--; sum = n + pre; pre = n; n = sum; } return n; } ``` #### full-stack-hello ISA ``` ; initialize result = 0 or $0 $0 #3 ; if input = 0, print 0 jz #0 #11 sub #0 $1 #0 ; initialize f(0) = 0, f(1) = 1 or $0 $0 #1 or $0 $1 #2 ; iteration add #1 #2 #3 or #2 $0 #1 or #3 $0 #2 sub #0 $1 #0 jz #0 #11 ; if #0 = 0, jump out to print jgt #0 #5 ; if #0 != 0, continute next iteration ; print result print #3 halt ``` ### Recursive #### 原始碼 ```clike= int fib(int n) { if (n == 0) return 0; if (n == 1) return 1; return fib(n - 1) + fib (n - 2); } ``` #### full-stack-hello ISA ``` ; initialize temp result, f(0), f(1) or $0 $0 #3 or $0 $0 #1 or $1 $0 #2 ; recursion call address call #6 ; all return, then print result print #3 halt ; recursion xor #0 $1 #5 jz #5 #11 xor #0 $0 #5 jz #5 #12 jnz #5 #13 ; if #0 != 0 or 1, return f(n-1) + f(n-2) ; if #0 = 1, return 1 *(add 1 to temp result) add #3 #2 #3 ret ; return f(#0 - 1) + f(#0 - 2) sub #0 $1 #0 ; f(#0 - 1) call #6 add #0 $1 #0 ; compensate #0 sub #0 $2 #0 ; f(#0 - 2) call #6 add #0 $2 #0 ; compensate #0 ret ``` ### Tail recursion ```clike= int fib(int n, int a, int b) { if (n == 0) return a; return fib(n - 1 , b, a + b); } ``` #### full-stack-hello ISA ``` ; initialize temp result, f(a) = 0, f(b) = 1 or $0 $0 #3 or $0 $0 #1 or $1 $0 #2 ; recursion call address call #6 ; all return, then print result print #3 halt ; recursion if(a) = 0, return 0 xor #0 $0 #5 ; if n == 0, return 0 jz #5 #9 jnz #5 #11 ; if n != 0, return f(n - 1, b, a + b) or #1 $0 #3 ret ; return f(n - 1, b, a + b) add #1 #2 #4 or #2 $0 #1 or #4 $0 #2 sub #0 $1 #0 ; f(#0 - 1) call #6 ret ``` ### Q-Matirx ```clike= int[][] matrix_multiply(int[][] a,int [][] b) { int t[2][2] = { { 0 , 0 } , { 0 , 0 } }; for (int i = 0 ; i < 2 ; i ++ ) for (int j = 0 ; j < 2 ; j ++ ) for (int k = 0 ; k < 2 ; k ++) t[i][j] += a[i][k] * b[k][j]; return t; } // 使用 Divide-and-Conquer 方法加速矩陣次方 int[][] matrix_pow ( int a[][] , int n ) { if ( n == 1 ) return a; if (n % 2 == 0) { int t[2][2]; t = matrix_pow(a , n >> 1); return matrix_multiply(t , t); } else { int t1[2][2], t2[2][2]; t1 = matrix_pow(a, n >> 1 ); t2 = matrix_pow(a, n >> 1 + 1 ); return matrix_multiply(t1 , t2); } } ``` * 透過此基礎來計算 ```clike= int fib(int n) { if (n < = 0) return 0; int A1[2][2] = { { 1 , 1 } , { 1 , 0 } }; int result[2][2]; result = matrix_pow(A1, n); return result[0][1]; } ``` * 組語難寫,故繼續推得 Fast doubling ### Fast doubling ```clike= int fib(int n) { if (n == 0) return 0; int t0 = 1; // F(n) int t1 = 1; // F(n + 1) int t3 = 1; // F(2n) int t4; // F(2n+1) int i = 1; while (i < n) { if ((i << 1) <= n) { t4 = t1 * t1 + t0 * t0; t3 = t0 * (2 * t1 - t0); t0 = t3; t1 = t4; i = i << 1; } else { t0 = t3; t3 = t4; t4 = t0 + t4; i++; } } return t3; } ``` :::danger 上述程式碼列表不正確,請修正 :notes: jserv ::: #### full-stack-hello ISA ``` ; if f(0) = 0, return 0 jz #0 #28 ; initialize or $1 $0 #1 ; F(n) or $1 $0 #2 ; F(n + 1) or $1 $0 #3 ; F(2n) or $0 $0 #4 ; F(2n + 1) or $1 $0 #5 ; i = 1 ; while(i < n) sub #0 #5 #6 ; n - 1 = #6 jle #6 #26 ; jump out ; if ((i << 1) <= n) lsl #5 $1 #7 ; (i << 1) sub #0 #7 #7 jlt #7 #21 ; branch mul #2 #2 #4 ; t1 * t1 mul #1 #1 #8 ; t0 * t0 add #4 #8 #4 ; t4 = t1 * t1 + t0 * t0 mul #2 $2 #3 ; 2 * t1 sub #3 #1 #3 ; 2 * t1 - t0 mul #3 #1 #3 ; t0 * (2 * t1 - t0) or #3 $0 #1 ; t0 = t3 or #4 $0 #2 ; t1 = t4 lsl #5 $1 #5 ; i << 1 jmp #6 ; else or #3 $0 #1 ; t0 = t3 or #4 $0 #3 ; t3 = t4 add #4 #1 #4 ; t4 = t0 + t4 add #5 $1 #5 ; i++ jmp #6 ; if i >= n, break print #3 halt ; if input = 0 print $0 halt ``` ## 參考資料 [Simple Virtual Machine](http://bartoszsypytkowski.com/simple-virtual-machine/) [simple.vm](https://github.com/skx/simple.vm)