# 研究qemu內部機制與監控指令狀態 ![image](https://hackmd.io/_uploads/BkENSJWIxe.png) 如果想理解一個指令級架構整個指令指行前和執行後的狀態,我最想知道的是每一行指令執行前和執行後的register的狀態和,整個cpu 記憶體的狀態這是我比較感興趣的,所以大概研究了一下qemu 有發現其實可以透過plugin 的方式去攔截qemu 指令執行過程中的info ,大概需要幾點,第一假設我指令級架構是 riscv 但是他要跑在x86上透過qemu 內部大概是會經過哪些stage呢 目前看下來就是 qemu 會把 riscv 一次翻譯一整個tb 也就是 TB = Translation Block ,但是一般的qemu 預設你加-d 其實就有很多可以看 就是-d cpu vpu fpu等等,對我而言來講,還不夠細緻,因為我是在做軟體驗證,所以我必須要看到每一個tb裡面的inst 的內容,那第一我必須要修改所謂的tcg,也就是我要將risv to x86 的tcg 每一到的inst 再額外掛入兩個自定義的觸發事件,也就是攔截記憶體內容和 register的狀態 大概分成幾大塊,首先我要攔截 register的狀態 這邊,單獨透過plugin 除非改寫它內部的機制不然很麻煩, 這邊用比較暴力的方法是 修改它們的 # /root/andesqemu/qemu/target/riscv/gdbstub.c 它裡面有riscv_cpu_gdb_read_register ,他預設是返回 相對應的 gpr 的register 回插件,那這邊的話 /root/andesqemu/qemu/target/riscv/cpu.c透過這邊可以看到 一般qemu 在下-d的話會觸發這個riscv_cpu_dump_state,那我第一想法就是先插在tb翻譯的階段 /root/andesqemu/qemu/accel/tcg/translator.c事實證明ok,但是有一個盲點就是其實我看到的register狀態是一個tb 整個指令執行後的cpu狀態,這就不是我想要的功能。 ```c= #if HOST_BIG_ENDIAN #define BYTE(x) ((x) ^ 7) #else #define BYTE(x) (x) #endif const char * const riscv_rvv_regnames2[] = { "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" }; void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, vaddr pc, void *host_pc, const TranslatorOps *ops, DisasContextBase *db) { uint32_t cflags = tb_cflags(tb); TCGOp *icount_start_insn; TCGOp *first_insn_start = NULL; bool plugin_enabled; /* Initialize DisasContext */ db->tb = tb; db->pc_first = pc; db->pc_next = pc; db->is_jmp = DISAS_NEXT; db->num_insns = 0; db->max_insns = *max_insns; db->insn_start = NULL; db->fake_insn = false; db->host_addr[0] = host_pc; db->host_addr[1] = NULL; db->record_start = 0; db->record_len = 0; ops->init_disas_context(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ /* Start translating. */ icount_start_insn = gen_tb_start(db, cflags); ops->tb_start(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ plugin_enabled = plugin_gen_tb_start(cpu, db); db->plugin_enabled = plugin_enabled; while (true) { *max_insns = ++db->num_insns; ops->insn_start(db, cpu); /* —— 只打印这个 TB 的第一条指令 —— */ // // if ( qemu_log_in_addr_range(db->pc_first)) { // FILE *logfile2 = qemu_log_trylock(); // if (logfile2) { // RISCVCPU *rcpu = RISCV_CPU(cpu); // CPURISCVState *env = &rcpu->env; // enum { VBYTES = RV_VLEN_MAX / 8 }; /* 每个寄存器的字节数 */ // // uint8_t *vptr = (uint8_t *)env->vreg; // // unsigned vlenb = rcpu->cfg.vlenb; // fprintf(logfile2, "PC = 0x%016" PRIx64 "\n", (uint64_t)db->pc_next); // // tb->size = db->pc_next - db->pc_first; // // tb->icount = db->num_insns; // // fprintf(logfile2, "----------------\n"); // // /* 如果 ops->disas_log 存在且返回真,就跳过,否则由 target_disas 处理 */ // // if (!ops->disas_log || !ops->disas_log(db, cpu, logfile2)) { // // // fprintf(logfile2, "IN: %s\n", lookup_symbol(db->pc_first)); // // /* 我们改过的 target_disas 已经只打印第一条指令 */ // // // target_disas(logfile2, cpu, db); // // } // /* 打印 32 个 GPR */ // fprintf(logfile2, "-- GPRs --\n"); // for (int i = 0; i < 32; i++) { // fprintf(logfile2, " x%-2d=0x%016" PRIx64, // i, (uint64_t)env->gpr[i]); // if ((i & 3) == 3) { // fputc('\n', logfile2); // } // } // fputc('\n', logfile2); // /* FPR */ // fprintf(logfile2, "-- FPRs --\n"); // for (int i = 0; i < 32; i++) { // fprintf(logfile2, " f%-2d=0x%016" PRIx64, // i, (uint64_t)env->fpr[i]); // if ((i & 3) == 3) { // fputc('\n', logfile2); // } // } // fputc('\n', logfile2); // fprintf(logfile2, "-- VPRs --\n"); // static const int dump_rvv_csrs[] = { // CSR_VSTART, // CSR_VXSAT, // CSR_VXRM, // CSR_VCSR, // CSR_VL, // CSR_VTYPE, // CSR_VLENB, // }; // for (int i = 0; i < ARRAY_SIZE(dump_rvv_csrs); ++i) { // int csrno = dump_rvv_csrs[i]; // target_ulong val = 0; // RISCVException res = riscv_csrrw_debug(env, csrno, &val, 0, 0); // /* // * Rely on the smode, hmode, etc, predicates within csr.c // * to do the filtering of the registers that are present. // */ // if (res == RISCV_EXCP_NONE) { // fprintf(logfile2, " %-8s " TARGET_FMT_lx "\n", // csr_ops[csrno].name, val); // } // } // uint16_t vlenb = rcpu->cfg.vlenb; // for (int i = 0; i < 32; i++) { // fprintf(logfile2, " %-8s ", riscv_rvv_regnames2[i]); // uint8_t *p= (uint8_t *)env->vreg; // for (int j = vlenb - 1 ; j >= 0; j--) { // fprintf(logfile2, "%02x", *(p + i * vlenb + BYTE(j))); // } // fprintf(logfile2, "\n"); // } // fprintf(logfile2, "\n"); // // RISCVException res = riscv_csrrw_debug(env, CSR_FCSR, &val, 0, 0); // static const int dump_csrs[] = { // CSR_MHARTID, // CSR_MSTATUS, // CSR_MSTATUSH, // /* // * CSR_SSTATUS is intentionally omitted here as its value // * can be figured out by looking at CSR_MSTATUS // */ // CSR_HSTATUS, // CSR_VSSTATUS, // CSR_MIP, // CSR_MIE, // CSR_MIDELEG, // CSR_HIDELEG, // CSR_MEDELEG, // CSR_HEDELEG, // CSR_MTVEC, // CSR_STVEC, // CSR_VSTVEC, // CSR_MEPC, // CSR_SEPC, // CSR_VSEPC, // CSR_MCAUSE, // CSR_SCAUSE, // CSR_VSCAUSE, // CSR_MTVAL, // CSR_STVAL, // CSR_HTVAL, // CSR_MTVAL2, // CSR_MSCRATCH, // CSR_SSCRATCH, // CSR_SATP, // }; // for (int i = 0; i < ARRAY_SIZE(dump_csrs); ++i) { // int csrno = dump_csrs[i]; // target_ulong val = 0; // RISCVException res = riscv_csrrw_debug(env, csrno, &val, 0, 0); // /* // * Rely on the smode, hmode, etc, predicates within csr.c // * to do the filtering of the registers that are present. // */ // if (res == RISCV_EXCP_NONE) { // fprintf(logfile2, " %-8s " TARGET_FMT_lx "\n", // csr_ops[csrno].name, val); // } // } // // if (res == RISCV_EXCP_NONE) { // // qemu_fprintf(logfile2, " %-8s " TARGET_FMT_lx "\n", // // csr_ops[CSR_FCSR].name, val); // // } // qemu_log_unlock(logfile2); // } // } // gen_helper_my_helloworld_helper(); db->insn_start = tcg_last_op(); if (first_insn_start == NULL) { first_insn_start = db->insn_start; } tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ if (plugin_enabled) { plugin_gen_insn_start(cpu, db); } /* * Disassemble one instruction. The translate_insn hook should * update db->pc_next and db->is_jmp to indicate what should be * done next -- either exiting this loop or locate the start of * the next instruction. */ ops->translate_insn(db, cpu); /* * We can't instrument after instructions that change control * flow although this only really affects post-load operations. * * Calling plugin_gen_insn_end() before we possibly stop translation * is important. Even if this ends up as dead code, plugin generation * needs to see a matching plugin_gen_insn_{start,end}() pair in order * to accurately track instrumented helpers that might access memory. */ if (plugin_enabled) { plugin_gen_insn_end(); } /* Stop translation if translate_insn so indicated. */ if (db->is_jmp != DISAS_NEXT) { break; } /* Stop translation if the output buffer is full, or we have executed all of the allowed instructions. */ if (tcg_op_buf_full() || db->num_insns >= db->max_insns) { db->is_jmp = DISAS_TOO_MANY; break; } } /* Emit code to exit the TB, as indicated by db->is_jmp. */ ops->tb_stop(db, cpu); gen_tb_end(tb, cflags, icount_start_insn, db->num_insns); /* * Manage can_do_io for the translation block: set to false before * the first insn and set to true before the last insn. */ if (db->num_insns == 1) { tcg_debug_assert(first_insn_start == db->insn_start); } else { tcg_debug_assert(first_insn_start != db->insn_start); tcg_ctx->emit_before_op = first_insn_start; set_can_do_io(db, false); } tcg_ctx->emit_before_op = db->insn_start; set_can_do_io(db, true); tcg_ctx->emit_before_op = NULL; /* May be used by disas_log or plugin callbacks. */ tb->size = db->pc_next - db->pc_first; tb->icount = db->num_insns; if (plugin_enabled) { plugin_gen_tb_end(cpu, db->num_insns); } if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && qemu_log_in_addr_range(db->pc_first)) { FILE *logfile = qemu_log_trylock(); if (logfile) { fprintf(logfile, "----------------\n"); if (!ops->disas_log || !ops->disas_log(db, cpu, logfile)) { fprintf(logfile, "IN: %s\n", lookup_symbol(db->pc_first)); target_disas(logfile, cpu, db); } fprintf(logfile, "\n"); qemu_log_unlock(logfile); } } } ``` 後來就想說好那從插件下手,下面是chatgpu 給一個範例由此可以觀察到我如果要拿到cpu的register狀態 那我每次都要呼叫這個function qemu_plugin_read_register,那我就將次數降為1,因為我從 /root/andesqemu/qemu/target/riscv/gdbstub.c 這邊進行下手,也就是我只呼叫一次,但是我要在這邊print 出 vpu cpu fpu的所有rigister ```c= /* slice_print_regs_with_vpu_final.c * * After every instruction, print: * - PC + disassembly * - 32 GPRs (x0–x31) * - 32 FPRs (f0–f31) * - All VPRs named v0–v31 * Also keep taint & slice collection logic */ #include <qemu-plugin.h> #include <glib.h> #include <stdio.h> #include <inttypes.h> #include <string.h> #include <ctype.h> QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; /* -------- User argument: seed PC for slice -------- */ static uint64_t seed_pc = 0; /* -------- taint tag & result collection -------- */ __thread uint8_t reg_tag[32]; typedef struct { uint64_t pc; const char *dis; } rec_t; static GHashTable *rec_ht; static GPtrArray *rec_arr; /* -------- Global register descriptors & temp buffer -------- */ static GArray *reg_descs = NULL; static GByteArray *reg_buf = NULL; /* vCPU init callback: fetch register list & allocate buffer */ static void vcpu_init_cb(qemu_plugin_id_t id, unsigned cpu_index) { if (!reg_descs) { reg_descs = qemu_plugin_get_registers(); /* Maximum vector register: 512 bits = 64 bytes */ reg_buf = g_byte_array_sized_new(64); } } /* Add a record to the slice result (de-duplicated) */ static void add_rec(uint64_t pc, const char *dis) { if (!g_hash_table_contains(rec_ht, GSIZE_TO_POINTER(pc))) { rec_t *r = g_new(rec_t, 1); r->pc = pc; r->dis = dis; g_hash_table_insert(rec_ht, GSIZE_TO_POINTER(pc), r); g_ptr_array_add(rec_arr, r); } } /* Callback after each instruction: print PC/Disasm + GPR/FPR/VPR + taint */ static void exec_cb(unsigned cpu, void *ud) { const uint64_t *info = ud; uint64_t pc = info[0]; uint32_t raw = (uint32_t)info[1]; const char *dis = (const char *)info[2]; /* 1) PC + Disassembly */ printf("pc=0x%016" PRIx64 " %s\n", pc, dis); // /* 2) GPR x0–x31 */ // printf("-- GPRs --\n"); // for (int i = 0; i < 32 && i < (int)reg_descs->len; i++) { // qemu_plugin_reg_descriptor *d = // &g_array_index(reg_descs, qemu_plugin_reg_descriptor, i); // g_byte_array_set_size(reg_buf, 0); // int sz = qemu_plugin_read_register(d->handle, reg_buf); // if (sz >= 8) { // uint64_t val; // memcpy(&val, reg_buf->data, 8); // printf(" %-4s = 0x%016" PRIx64, d->name, val); // } // if ((i & 3) == 3) { // printf("\n"); // } // } // /* 3) FPR f0–f31 */ // if ((int)reg_descs->len >= 64) { // printf("-- FPRs --\n"); // for (int i = 32; i < 64 && i < (int)reg_descs->len; i++) { // qemu_plugin_reg_descriptor *d = // &g_array_index(reg_descs, qemu_plugin_reg_descriptor, i); // g_byte_array_set_size(reg_buf, 0); // int sz = qemu_plugin_read_register(d->handle, reg_buf); // if (sz >= 8) { // uint64_t val; // memcpy(&val, reg_buf->data, 8); // printf(" %-4s = 0x%016" PRIx64, d->name, val); // } // if (((i - 32) & 3) == 3) { // printf("\n"); // } // } // } // /* 4) VPR v0–v31 */ // printf("-- VPRs --\n"); // for (int i = 0; i < (int)reg_descs->len; i++) { // qemu_plugin_reg_descriptor *d = // &g_array_index(reg_descs, qemu_plugin_reg_descriptor, i); // printf("reg[%d]: %s size=%d\n", i, d->name, d->size); // const char *nm = d->name; // if (nm[0] == 'v' && isdigit((unsigned char)nm[1])) { // g_byte_array_set_size(reg_buf, 0); // int sz = qemu_plugin_read_register(d->handle, reg_buf); // if (sz > 0) { // printf(" %-4s =", nm); // for (int b = sz - 1; b >= 0; b--) { // printf(" %02x", reg_buf->data[b]); // } // printf("\n"); // } // } // } } /* TB translation done callback: register exec_cb, needs register read permission */ static void tb_cb(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) { size_t n = qemu_plugin_tb_n_insns(tb); for (size_t i = 0; i < n; i++) { struct qemu_plugin_insn *in = qemu_plugin_tb_get_insn(tb, i); /* Prepare params: pc, raw, dis */ static uint64_t ring[4096][3]; static size_t idx = 0; uint64_t *slot = ring[idx++ & 4095]; slot[0] = qemu_plugin_insn_vaddr(in); uint8_t buf[4] = {0}; qemu_plugin_insn_data(in, buf, sizeof(buf)); slot[1] = buf[0] | (buf[1]<<8) | (buf[2]<<16) | (buf[3]<<24); slot[2] = (uint64_t)qemu_plugin_insn_disas(in); qemu_plugin_register_vcpu_insn_exec_cb( in, exec_cb, QEMU_PLUGIN_CB_R_REGS, slot ); } } /* Output slice result at exit */ static void dump_slice(void) { printf("\n=== Data slice from 0x%lx ===\n", seed_pc); for (guint i = 0; i < rec_arr->len; i++) { rec_t *r = rec_arr->pdata[i]; printf("0x%016" PRIx64 " %s\n", r->pc, r->dis); } } /* Plugin entry: register vCPU init, TB translate & exit */ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, int argc, char **argv) { /* Parse seed parameter */ for (int i = 0; i < argc; i++) { if (g_str_has_prefix(argv[i], "seed=")) { sscanf(argv[i] + 5, "%lx", &seed_pc); } } if (!seed_pc) { fprintf(stderr, "[err] need seed=0xPC\n"); return -1; } /* Initialize taint & slice */ rec_ht = g_hash_table_new(g_direct_hash, g_direct_equal); rec_arr = g_ptr_array_new(); /* **Ensure vcpu_init_cb is called first** */ qemu_plugin_register_vcpu_init_cb(id, vcpu_init_cb); qemu_plugin_register_vcpu_tb_trans_cb(id, tb_cb); qemu_plugin_register_atexit_cb(id, (void *)dump_slice, NULL); return 0; } ``` 這樣的好處就是我不用pluing include 一堆 headfile 每次呼叫我就可以拿到 整個 cpu的狀態 ```c= int riscv_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) { RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cs); RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; target_ulong tmp; /* 先取出要返回给 GDB 的那个寄存器值 */ if (n < 32) { tmp = env->gpr[n]; } else if (n == 32) { tmp = env->pc; } else { return 0; } // --- GPR --- printf("-- GPRs --\n"); #if UINTPTR_MAX == 0xffffffff for (int i = 0; i < 32; i++) { uint32_t v = (uint32_t)env->gpr[i]; if (v) { printf("%-8s=0x%08" PRIx32 "\n", riscv_int_regnames[i], v); } } #elif UINTPTR_MAX == 0xffffffffffffffff for (int i = 0; i < 32; i++) { uint64_t v = (uint64_t)env->gpr[i]; if (v) { printf("%-8s=0x%016" PRIx64 "\n", riscv_int_regnames[i], v); } } #endif // --- FPR --- printf("-- FPRs --\n"); if (env->misa_ext & RVD) { #if UINTPTR_MAX == 0xffffffff for (int i = 0; i < 32; i++) { uint32_t v = (uint32_t)env->fpr[i]; if (v) { printf("%-8s=0x%08" PRIx32 "\n", riscv_fpr_regnames[i], v); } } #elif UINTPTR_MAX == 0xffffffffffffffff for (int i = 0; i < 32; i++) { uint64_t v = (uint64_t)env->fpr[i]; if (v) { printf("%-8s=0x%016" PRIx64 "\n", riscv_fpr_regnames[i], v); } } #endif } else if (env->misa_ext & RVF) { for (int i = 0; i < 32; i++) { uint32_t v = (uint32_t)env->fpr[i]; if (v) { printf("%-8s=0x%08x\n", riscv_fpr_regnames[i], v); } } } // --- VPR --- // printf("-- VPRs --\n"); // uint16_t vlenb = cpu->cfg.vlenb; /* bytes per vector reg */ printf("-- VPRs --\n"); uint16_t vlenb = cpu->cfg.vlenb; uint8_t *base = (uint8_t *)env->vreg; for (int i = 0; i < 32; i++) { uint8_t *p = base + i * vlenb; // 先扫描,看是否全为 0 bool all_zero = true; for (int j = 0; j < vlenb; j++) { if (p[j] != 0) { all_zero = false; break; } } if (all_zero) { continue; // 全 0,跳过不打印 } printf("v%-2d = ", i); for (int j = 0; j < vlenb; j++) { printf("%02x", p[j]); } printf("\n"); } // for (int i = 0; i < 32; i++) { // uint8_t *p = (uint8_t *)&env->vreg[i * vlenb]; // // bool nonzero = false; // // for (int j = 0; j < vlenb; j++) { // // if (p[j]) { // // nonzero = true; // // break; // // } // // } // // if (!nonzero) { // // continue; // // } // printf("v%-2d=", i); // for (int j = 0; j < vlenb; j++) { // printf("%02x", p[j]); // } // printf("\n"); // } // --- CSR Registers --- static const int dump_csrs[] = { CSR_MHARTID, CSR_MSTATUS, CSR_MSTATUSH, CSR_HSTATUS, CSR_VSSTATUS, CSR_MIP, CSR_MIE, CSR_MIDELEG, CSR_HIDELEG, CSR_MEDELEG, CSR_HEDELEG, CSR_MTVEC, CSR_STVEC, CSR_VSTVEC, CSR_MEPC, CSR_SEPC, CSR_VSEPC, CSR_MCAUSE, CSR_SCAUSE, CSR_VSCAUSE, CSR_MTVAL, CSR_STVAL, CSR_HTVAL, CSR_MTVAL2, CSR_MSCRATCH, CSR_SSCRATCH, CSR_SATP, }; printf("-- CSR Registers --\n"); for (int i = 0; i < ARRAY_SIZE(dump_csrs); i++) { int csrno = dump_csrs[i]; target_ulong val = 0; RISCVException res = riscv_csrrw_debug(env, csrno, &val, 0, 0); if (res == RISCV_EXCP_NONE && val != 0) { const char *name = csr_ops[csrno].name; #if UINTPTR_MAX == 0xffffffff printf("%-10s=0x%08" PRIx32 "\n", name, (uint32_t)val); #elif UINTPTR_MAX == 0xffffffffffffffff printf("%-10s=0x%016" PRIx64 "\n", name, (uint64_t)val); #endif } } if (n < 32) { tmp = env->gpr[n]; } else if (n == 32) { tmp = env->pc; } else { return 0; } switch (mcc->misa_mxl_max) { case MXL_RV32: return gdb_get_reg32(mem_buf, tmp); case MXL_RV64: case MXL_RV128: return gdb_get_reg64(mem_buf, tmp); default: g_assert_not_reached(); } return 0; } ``` 注意在plugin 這邊 qemu_plugin_register_vcpu_tb_trans_cb(id, tb_cb); 當一個tb翻譯完成tcg 我再針對他每一個inst 進行掛進去qemu_plugin_register_vcpu_insn_exec_cb 事件 進行掛進去qemu_plugin_register_vcpu_insn_exec_cb ```c= static void tb_cb(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) { size_t n = qemu_plugin_tb_n_insns(tb); for (size_t i = 0; i < n; i++) { struct qemu_plugin_insn *in = qemu_plugin_tb_get_insn(tb, i); /* 准备参数:pc, raw, dis */ static uint64_t ring[4096][3]; static size_t idx = 0; uint64_t *slot = ring[idx++ & 4095]; slot[0] = qemu_plugin_insn_vaddr(in); uint8_t buf[4] = {0}; qemu_plugin_insn_data(in, buf, sizeof(buf)); slot[1] = buf[0] | (buf[1]<<8) | (buf[2]<<16) | (buf[3]<<24); slot[2] = (uint64_t)qemu_plugin_insn_disas(in); qemu_plugin_register_vcpu_insn_exec_cb( in, exec_cb, QEMU_PLUGIN_CB_R_REGS, slot ); } } ``` exec_cb ```c= static void exec_cb(unsigned cpu, void *ud) { const uint64_t *info = ud; uint64_t pc = info[0]; uint32_t raw = (uint32_t)info[1]; const char *dis = (const char *)info[2]; /* 1) PC + Disassembly */ printf("pc=0x%016" PRIx64 " %s\n", pc, dis); // /* 2) GPR x0–x31 */ // printf("-- GPRs --\n"); // for (int i = 0; i < 32 && i < (int)reg_descs->len; i++) { // qemu_plugin_reg_descriptor *d = // &g_array_index(reg_descs, qemu_plugin_reg_descriptor, i); // g_byte_array_set_size(reg_buf, 0); // int sz = qemu_plugin_read_register(d->handle, reg_buf); // if (sz >= 8) { // uint64_t val; // memcpy(&val, reg_buf->data, 8); // printf(" %-4s = 0x%016" PRIx64, d->name, val); // } // if ((i & 3) == 3) { // printf("\n"); // } // } ``` 不過剛剛在 gdbstub 你可以把 for (int i = 0; i < 1 && i < (int)reg_descs->len; i++) { 改成呼叫一次就ok了,到這邊你已經完成指令執行前和執行後的狀態了,那比較感興趣的 memory 狀態呢 ```c= /* 2) GPR x0–x31 */ printf("-- GPRs --\n"); for (int i = 0; i < 1 && i < (int)reg_descs->len; i++) { qemu_plugin_reg_descriptor *d = &g_array_index(reg_descs, qemu_plugin_reg_descriptor, i); g_byte_array_set_size(reg_buf, 0); int sz = qemu_plugin_read_register(d->handle, reg_buf); if (sz >= 8) { uint64_t val; memcpy(&val, reg_buf->data, 8); printf(" %-4s = 0x%016" PRIx64, d->name, val); } if ((i & 3) == 3) { printf("\n"); } } ``` 這邊想法大概就是,大部分的指令都是在算offset 和 一些branch 的 imm ,那我想就是真的寫到記憶體的時候最無腦的方式是什麼,那我最初嘗試的方式是 qemu_plugin_read_memory_vaddr 事實也證明當我持續watch 某一段address他確實可以返回相對應的值這樣代表我其實是可以watch 一個記憶體位置,那就可以用另一個方式,比如說我watch 一塊256mb的記憶體位置,每個指令執行前和執行後實際的記憶體狀態, ```c= if (!qemu_plugin_read_memory_vaddr(watch_addr, watch_buf, 8)) { printf(" [read @ 0x%016" PRIx64 " failed]\n", watch_addr); } else { uint64_t v = 0; memcpy(&v, watch_buf->data, 8); printf(" [mem[0x%016" PRIx64 "]=0x%016" PRIx64 "]\n", watch_addr, v); } ``` 那下面的改動就是 ```c= static const uint64_t watch_addr = 0x0000000002fffdd8ULL; static GByteArray *watch_buf = NULL; static const uint64_t base_addr = 0x0000000002F00000ULL; static const size_t region_size = 10UL * 1024 * 1024; static GByteArray *prev_snap = NULL; static GByteArray *cur_snap = NULL; static bool first_done = false; static void exec_cb(unsigned cpu, void *ud){ if (!prev_snap) { prev_snap = g_byte_array_sized_new(region_size); cur_snap = g_byte_array_sized_new(region_size); g_byte_array_set_size(prev_snap, region_size); if (!qemu_plugin_read_memory_vaddr(base_addr, prev_snap, region_size)) { fprintf(stderr, "init snap fail @0x%016" PRIx64 "\n", base_addr); return; } first_done = true; return; } g_byte_array_set_size(cur_snap, region_size); if (!qemu_plugin_read_memory_vaddr(base_addr, cur_snap, region_size)) { fprintf(stderr, "read sbnp @0x%016" PRIx64 "\n", base_addr); return; } printf("\n"); for (size_t off = 0; off < region_size; off++) { uint8_t old = prev_snap->data[off]; uint8_t nw = cur_snap->data[off]; if (old != nw) { printf("MEM_CHANGE @ 0x%016" PRIx64 ": 0x%02x -> 0x%02x\n", base_addr + off, old, nw); } } memcpy(prev_snap->data, cur_snap->data, region_size); } ``` 透過這樣的方式,我就可以在指令每一個執行前後看實際記憶體到底對哪個地方做寫入,但是這樣效率太慢了, 我們知道差不多只有load和 store是我想看的記憶體區間,那我只要攔截load 和 store 歷史寫過的記憶體位置不就好了嗎 好第三次升級這次qemu_plugin_register_vcpu_mem_cb 多加了這一個事件,當有load和 store的時候 就會觸發這個事件 ```c= static void tb_cb(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) { size_t n = qemu_plugin_tb_n_insns(tb); for (size_t i = 0; i < n; i++) { struct qemu_plugin_insn *in = qemu_plugin_tb_get_insn(tb, i); /* 准备参数:pc, raw, dis */ static uint64_t ring[4096][3]; static size_t idx = 0; uint64_t *slot = ring[idx++ & 4095]; slot[0] = qemu_plugin_insn_vaddr(in); uint8_t buf[4] = {0}; qemu_plugin_insn_data(in, buf, sizeof(buf)); slot[1] = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); slot[2] = (uint64_t)qemu_plugin_insn_disas(in); qemu_plugin_register_vcpu_insn_exec_cb( in, exec_cb, QEMU_PLUGIN_CB_R_REGS, slot); qemu_plugin_register_vcpu_mem_cb( in, taint_mem_cb_rw, QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_MEM_RW, NULL); } } ``` 這邊可以透過hashtable去紀錄已經拜訪過的記憶體狀態, ```c= static void taint_mem_cb_rw(unsigned int cpu_index, qemu_plugin_meminfo_t info, uint64_t vaddr, void *udata) { bool is_store = qemu_plugin_mem_is_store(info); unsigned int shift = qemu_plugin_mem_size_shift(info); size_t size = 1U << shift; size_t to_print = size < 16 ? size : 16; printf("\n%s @ 0x%016" PRIx64 " size=%zu value=", is_store ? "STORE" : "LOAD \n", vaddr, size); qemu_plugin_mem_value val = qemu_plugin_mem_get_value(info); switch (val.type) { case QEMU_PLUGIN_MEM_VALUE_U8: printf("%02x", val.data.u8); break; case QEMU_PLUGIN_MEM_VALUE_U16: for (size_t i = to_print; i > 0; i--) { uint8_t b = (val.data.u16 >> ((i - 1) * 8)) & 0xFF; printf("%02x", b); } break; case QEMU_PLUGIN_MEM_VALUE_U32: for (size_t i = to_print; i > 0; i--) { uint8_t b = (val.data.u32 >> ((i - 1) * 8)) & 0xFF; printf("%02x", b); } break; case QEMU_PLUGIN_MEM_VALUE_U64: for (size_t i = to_print; i > 0; i--) { uint8_t b = (val.data.u64 >> ((i - 1) * 8)) & 0xFF; printf("%02x", b); } break; case QEMU_PLUGIN_MEM_VALUE_U128: for (size_t i = to_print; i > 0; i--) { uint8_t b; if (i > 8) { b = (val.data.u128.high >> ((i - 9) * 8)) & 0xFF; } else { b = (val.data.u128.low >> ((i - 1) * 8)) & 0xFF; } printf("%02x", b); } break; } if (size > to_print) { printf("..."); } printf("\n"); } ``` 那麼再增加一些東西吧,歷史訪問過的記憶體狀態我都會加到table下一次指令執行時候,因為我不確定是否有指令會透過偏移的方式去影響data 所以之前經過的記憶體我都要進行監控 ```c= static GHashTable *rec_ht; static GPtrArray *rec_arr; static GHashTable *prev_val_ht = NULL; static GArray *reg_descs = NULL; static GByteArray *reg_buf = NULL; static GHashTable *taint_map; static GHashTable *mem_prev_map; static void taint_mem_cb_rw(unsigned int cpu_index, qemu_plugin_meminfo_t info, uint64_t vaddr, void *udata) { bool is_store = qemu_plugin_mem_is_store(info); unsigned int shift = qemu_plugin_mem_size_shift(info); size_t size = 1U << shift; size_t to_print = size < 16 ? size : 16; GByteArray *cur = g_byte_array_sized_new(size); if (!qemu_plugin_read_memory_vaddr(vaddr, cur, size)) { fprintf(stderr, "read failed @0x%016" PRIx64 "\n", vaddr); g_byte_array_free(cur, TRUE); return; } GByteArray *prev = g_hash_table_lookup(mem_prev_map, GSIZE_TO_POINTER(vaddr)); if (prev) { printf("\nPREV @ 0x%016" PRIx64 " value=", vaddr); for (size_t i = prev->len < to_print ? prev->len : to_print; i > 0; i--) { printf("%02x", prev->data[i - 1]); } if (prev->len > to_print) { printf("..."); } printf("\n"); } printf("\n%s @ 0x%016" PRIx64 " size=%zu value=", is_store ? "STORE" : "LOAD", vaddr, size); for (size_t i = to_print; i > 0; i--) { printf("%02x", cur->data[i - 1]); } if (size > to_print) { printf("..."); } printf("\n"); g_hash_table_replace(mem_prev_map, GSIZE_TO_POINTER(vaddr), cur); } ``` 那麼返回到exec_cb 可以發現到,之前走過的記憶體我再次透過qemu_plugin_read_memory_vaddr進行監控,這樣我就可以動態的去掃描 已經走過的記憶體狀態 ```c= static void exec_cb(unsigned cpu, void *ud) { const uint64_t *info = ud; uint64_t pc = info[0]; uint32_t raw = (uint32_t)info[1]; const char *dis = (const char *)info[2]; GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, mem_prev_map); while (g_hash_table_iter_next(&iter, &key, &value)) { uint64_t vaddr = GPOINTER_TO_SIZE(key); GByteArray *prev = (GByteArray*)value; size_t size = prev->len; size_t to_print = size < 16 ? size : 16; GByteArray *cur = g_byte_array_sized_new(size); if (!qemu_plugin_read_memory_vaddr(vaddr, cur, size)) { g_byte_array_free(cur, TRUE); continue; } if (memcmp(prev->data, cur->data, size) != 0) { printf("\nMEM_CHANGE @ 0x%016" PRIx64 " PREV=", vaddr); for (size_t i = to_print; i > 0; i--) { printf("%02x", prev->data[i-1]); } if (size > to_print) printf("..."); printf("\n"); printf("MEM_CHANGE @ 0x%016" PRIx64 " NEW =", vaddr); for (size_t i = to_print; i > 0; i--) { printf("%02x", cur->data[i-1]); } if (size > to_print) printf("..."); printf("\n"); g_hash_table_replace(mem_prev_map, GSIZE_TO_POINTER(vaddr), cur); } else { g_byte_array_free(cur, TRUE); } } ``` 下面是我最近寫得感興趣的plugin ,如果稍微改裝就可以變成汙點分析之類的 ```c= /* slice_print_regs_with_vpu_final.c * * After each instruction, print: * - PC + disassembly * - 32 GPRs (x0–x31) * - 32 FPRs (f0–f31) * - All VPRs named v0–v31 * Also keep taint & slice collection logic */ #include <qemu-plugin.h> #include <glib.h> #include <stdio.h> #include <inttypes.h> #include <string.h> #include <ctype.h> QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; /* -------- User parameter: seed PC for slice -------- */ static uint64_t seed_pc = 0; /* -------- taint tag & result collection -------- */ __thread uint8_t reg_tag[32]; typedef struct { uint64_t pc; const char *dis; } rec_t; static GHashTable *rec_ht; static GPtrArray *rec_arr; static GHashTable *prev_val_ht = NULL; /* -------- Global register descriptors & temp buffer -------- */ static GArray *reg_descs = NULL; static GByteArray *reg_buf = NULL; // Global taint map: record which guest physical/virtual addresses have been tagged static GHashTable *taint_map; static GHashTable *mem_prev_map; /* In qemu-plugin.h: * unsigned int qemu_plugin_mem_size_shift(qemu_plugin_meminfo_t info); * Returns log2(access size): 0=1B, 1=2B, 2=4B, … */ /* Load/Store byte tracing: address -> previous value */ // static GHashTable *prev_val_ht = NULL; /* Load/Store callback — compare old and new value byte by byte */ /* Modify taint_mem_cb_rw */ static void taint_mem_cb_rw(unsigned int cpu_index, qemu_plugin_meminfo_t info, uint64_t vaddr, void *udata) { bool is_store = qemu_plugin_mem_is_store(info); unsigned int shift = qemu_plugin_mem_size_shift(info); size_t size = 1U << shift; size_t to_print = size < 16 ? size : 16; /* 1) Allocate a GByteArray as current snapshot */ GByteArray *cur = g_byte_array_sized_new(size); if (!qemu_plugin_read_memory_vaddr(vaddr, cur, size)) { fprintf(stderr, "read failed @0x%016" PRIx64 "\n", vaddr); g_byte_array_free(cur, TRUE); return; } /* 2) If accessed before, print previous snapshot */ GByteArray *prev = g_hash_table_lookup(mem_prev_map, GSIZE_TO_POINTER(vaddr)); if (prev) { printf("\nPREV @ 0x%016" PRIx64 " value=", vaddr); for (size_t i = prev->len < to_print ? prev->len : to_print; i > 0; i--) { printf("%02x", prev->data[i - 1]); } if (prev->len > to_print) { printf("..."); } printf("\n"); } /* 3) Print the value of this LOAD/STORE */ printf("\n%s @ 0x%016" PRIx64 " size=%zu value=", is_store ? "STORE" : "LOAD", vaddr, size); for (size_t i = to_print; i > 0; i--) { printf("%02x", cur->data[i - 1]); } if (size > to_print) { printf("..."); } printf("\n"); /* 4) Update hash table: replace old snapshot with cur, glib will free old GByteArray automatically */ g_hash_table_replace(mem_prev_map, GSIZE_TO_POINTER(vaddr), cur); } // static void taint_mem_cb_rw(unsigned int cpu_index, // ... (keep rest of the commented-out legacy code as-is, unchanged) /* vCPU init callback: fetch register list & allocate buffer */ static void vcpu_init_cb(qemu_plugin_id_t id, unsigned cpu_index) { if (!reg_descs) { reg_descs = qemu_plugin_get_registers(); /* Max vector register 512 bit = 64 bytes */ reg_buf = g_byte_array_sized_new(64); } } /* Add a record to slice results (deduplicate) */ static void add_rec(uint64_t pc, const char *dis) { if (!g_hash_table_contains(rec_ht, GSIZE_TO_POINTER(pc))) { rec_t *r = g_new(rec_t, 1); r->pc = pc; r->dis = dis; g_hash_table_insert(rec_ht, GSIZE_TO_POINTER(pc), r); g_ptr_array_add(rec_arr, r); } } /* The fixed address we want to monitor */ static const uint64_t watch_addr = 0x0000000002fffdd8ULL; /* Read buffer */ static GByteArray *watch_buf = NULL; // Start address of the 256 MB region to monitor static const uint64_t base_addr = 0x0000000002F00000ULL; // Region size: 256 MB static const size_t region_size = 10UL * 1024 * 1024; // Snapshot buffer static GByteArray *prev_snap = NULL; static GByteArray *cur_snap = NULL; // Whether the first snapshot is completed static bool first_done = false; static int64_t sign_extend(int64_t val, int bits) { int64_t m = 1LL << (bits - 1); return (val ^ m) - m; } // ---------- Vector ALU/Control instruction table (V1.1 spec, opcode=0x57) ---------- static const struct { uint8_t funct6; const char *mnemonic; } vector_mnemonics[] = { // vsetvli/vsetivli {0x00, "vsetvli"}, {0x01, "vsetivli"}, // Slide {0x08, "vslide1down.vv"}, {0x09, "vslide1down.vx"}, {0x0A, "vslide1up.vv"}, {0x0B, "vslide1up.vx"}, // Compress/Expand {0x34, "vcompress.vm"}, {0x35, "vecompress.vm"}, {0x36, "vexpand.vm"}, {0x37, "veexpand.vm"}, // Reduction {0x3C, "vredsum.vs"}, {0x3D, "vredand.vs"}, {0x3E, "vredor.vs"}, {0x3F, "vredxor.vs"}, // Integer arithmetic/logical {0x10, "vadd.vv"}, {0x11, "vadd.vx"}, {0x12, "vsub.vv"}, {0x13, "vsub.vx"}, {0x14, "vmin.vv"}, {0x15, "vmin.vx"}, {0x16, "vmax.vv"}, {0x17, "vmax.vx"}, {0x1C, "vmxor"}, {0x1E, "vmor"}, {0x18, "vmand.not"}, {0x19, "vmand"}, {0x1A, "vmandnot"}, {0x1B, "vmnand"}, {0x1D, "vmornot"}, {0x1F, "vmnor"}, // Shift {0x21, "vsrl.vv"}, {0x22, "vsrl.vx"}, {0x23, "vsrl.vi"}, {0x24, "vsra.vv"}, {0x25, "vsra.vx"}, {0x26, "vsra.vi"}, {0x27, "vsll.vv"}, {0x28, "vsll.vx"}, {0x29, "vsll.vi"}, // Widening arithmetic {0x2A, "vwaddu.vv"}, {0x2B, "vwaddu.vx"}, {0x2C, "vadd.vv"}, {0x2D, "vadd.vx"}, {0x2E, "vsubu.vv"}, {0x2F, "vsubu.vx"}, {0x30, "vminu.vv"}, {0x31, "vminu.vx"}, {0x32, "vmaxu.vv"}, {0x33, "vmaxu.vx"}, // Floating-point (partial example) {0x20, "vsext.vf2"}, {0x38, "vfadd.vv"}, {0x39, "vfsub.vv"}, {0x3A, "vfmul.vv"}, {0x3B, "vfdiv.vv"}, }; static unsigned global_vlen = 512; static GPtrArray *mem_touch_list; /* Instruction execution callback: print PC/Disasm + GPR/FPR/VPR + taint */ static void exec_cb(unsigned cpu, void *ud) { const uint64_t *info = ud; uint64_t pc = info[0]; uint32_t raw = (uint32_t)info[1]; const char *dis = (const char *)info[2]; // if (!prev_snap) // { // prev_snap = g_byte_array_sized_new(region_size); // cur_snap = g_byte_array_sized_new(region_size); // // Read once into prev_snap, then mark first as done // g_byte_array_set_size(prev_snap, region_size); // if (!qemu_plugin_read_memory_vaddr(base_addr, prev_snap, region_size)) // { // fprintf(stderr, "initial snapshot failed @0x%016" PRIx64 "\n", base_addr); // return; // } // first_done = true; // return; // } // // Read current snapshot into cur_snap // g_byte_array_set_size(cur_snap, region_size); // if (!qemu_plugin_read_memory_vaddr(base_addr, cur_snap, region_size)) // { // fprintf(stderr, "read snapshot failed @0x%016" PRIx64 "\n", base_addr); // return; // } // printf("\n"); // // Compare cur_snap and prev_snap, find differences // for (size_t off = 0; off < region_size; off++) // { // uint8_t old = prev_snap->data[off]; // uint8_t nw = cur_snap->data[off]; // if (old != nw) // { // printf("MEM_CHANGE @ 0x%016" PRIx64 ": 0x%02x -> 0x%02x\n", // base_addr + off, old, nw); // } // } // // Swap snapshots: next time use cur_snap as prev_snap // memcpy(prev_snap->data, cur_snap->data, region_size); /* 1) PC + Disassembly */ /* 2) GPR x0–x31 */ // printf("-- GPRs --\n"); // printf("============================================\n="); // printf("\npc=0x%016" PRIx64 " %s\n", pc, dis); /* 2) GPR x0–x31 */ // printf("-- GPRs --\n"); // printf("============================================\n="); GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, mem_prev_map); while (g_hash_table_iter_next(&iter, &key, &value)) { uint64_t vaddr = GPOINTER_TO_SIZE(key); GByteArray *prev = (GByteArray*)value; size_t size = prev->len; size_t to_print = size < 16 ? size : 16; /* Read current snapshot */ GByteArray *cur = g_byte_array_sized_new(size); if (!qemu_plugin_read_memory_vaddr(vaddr, cur, size)) { g_byte_array_free(cur, TRUE); continue; } /* If changed, print and update prev */ if (memcmp(prev->data, cur->data, size) != 0) { printf("\nMEM_CHANGE @ 0x%016" PRIx64 " PREV=", vaddr); for (size_t i = to_print; i > 0; i--) { printf("%02x", prev->data[i-1]); } if (size > to_print) printf("..."); printf("\n"); printf("MEM_CHANGE @ 0x%016" PRIx64 " NEW =", vaddr); for (size_t i = to_print; i > 0; i--) { printf("%02x", cur->data[i-1]); } if (size > to_print) printf("..."); printf("\n"); /* Directly replace old snapshot with new */ g_hash_table_replace(mem_prev_map, GSIZE_TO_POINTER(vaddr), cur); } else { /* Not changed, just free new snapshot */ g_byte_array_free(cur, TRUE); } } printf("\n============================================\n"); printf("pc=0x%016" PRIx64 " %s\n", pc, dis); uint8_t opcode = raw & 0x7F; uint8_t funct3 = (raw >> 12) & 0x07; uint8_t funct6 = (raw >> 26) & 0x3F; uint8_t rd = (raw >> 7) & 0x1F; uint8_t rs1 = (raw >> 15) & 0x1F; uint8_t rs2 = (raw >> 20) & 0x1F; uint8_t vm = (raw >> 25) & 0x01; /* ================================================================================= * A) Vector Configuration Instructions (vsetvl/vsetvli/vsetivli) * opcode=0x57, funct3=0b111 * --- This decode logic is already correct and conforms to the 1.0 spec --- * ================================================================================= */ if (opcode == 0x57 && funct3 == 0x7) { // Use funct6 or other method to tell vsetvli or vsetivli bool is_iv = (funct6 == 0x33 || (rd == 0 && rs1 != 0)); uint32_t imm12 = (raw >> 20) & 0xFFF; /* 1) Extract AVL (zimm) - vsetivli's AVL is in rs1, vsetvli's in imm[10:0] */ uint32_t zimm = is_iv ? rs1 : (imm12 & 0x7FF); /* 2) Per V-Spec 1.0, vtype encoded in imm[7:0] */ uint8_t vtype = imm12 & 0xFF; /* 2a) Spec check: imm[11:8] must be 0 */ if ((imm12 & 0xF00) != 0) { printf(" [V-C] WARNING: Illegal vset(i)vli encoding with non-zero imm[11:8]. " "Binary may be from a pre-1.0 toolchain.\n"); } /* 3) Decode fields from vtype */ uint8_t vsew_enc = vtype & 0x7; uint8_t vlmul_enc = (vtype >> 3) & 0x7; bool vta = (vtype >> 6) & 0x1; bool vma = (vtype >> 7) & 0x1; /* 4) Calculate SEW */ int sew = 8 << vsew_enc; /* 5) Decode LMUL by standard encoding */ char mstr[16]; switch (vlmul_enc) { case 0b000: snprintf(mstr, sizeof(mstr), "m1"); break; case 0b001: snprintf(mstr, sizeof(mstr), "m2"); break; case 0b010: snprintf(mstr, sizeof(mstr), "m4"); break; case 0b011: snprintf(mstr, sizeof(mstr), "m8"); break; case 0b101: snprintf(mstr, sizeof(mstr), "mf2"); break; case 0b110: snprintf(mstr, sizeof(mstr), "mf4"); break; case 0b111: snprintf(mstr, sizeof(mstr), "mf8"); break; case 0b100: default: snprintf(mstr, sizeof(mstr), "RESERVED"); break; } /* 6) Output. Destination register rd is a GPR (x-register) */ printf(" [V-C] %-8s rd=x%-2d, zimm=%u, e%d, %s, ta=%d, ma=%d [vlen=%u]\n", is_iv ? "vsetivli" : "vsetvli", rd, zimm, sew, mstr, vta, vma, global_vlen); goto skip_base; } /* ================================================================================= * B) Vector Memory Instructions (vle/vse/vlse/vsse/...) * opcode=0x07 (LOAD-FP), 0x27 (STORE-FP) * --- Fixed EEW decoding and the source register for store instructions --- * ================================================================================= */ if (opcode == 0x07 || opcode == 0x27) { bool is_load = (opcode == 0x07); // vs3 (store data) and vd (load data) are at same place in the instruction uint8_t data_reg = rd; // Per V-Spec, funct3(width) field used to distinguish EEW int eew = -1; switch(funct3) { case 0b000: eew = 8; break; case 0b101: eew = 16; break; case 0b110: eew = 32; break; case 0b111: eew = 64; break; } if (eew != -1) { // Confirm it's a vector memory instruction uint8_t mop = (raw >> 26) & 0x03; // Addressing mode char mnem[32]; switch(mop) { case 0b00: // Unit-stride snprintf(mnem, sizeof(mnem), is_load ? "vle%d.v" : "vse%d.v", eew); printf(" [V-MEM] %-12s v%-2d, (x%-2d), vm=%d\n", mnem, data_reg, rs1, vm); break; case 0b10: // Strided snprintf(mnem, sizeof(mnem), is_load ? "vlse%d.v" : "vsse%d.v", eew); printf(" [V-MEM] %-12s v%-2d, (x%-2d), x%-2d, vm=%d\n", mnem, data_reg, rs1, rs2, vm); break; case 0b01: // Indexed-unordered case 0b11: // Indexed-ordered snprintf(mnem, sizeof(mnem), is_load ? "vluxei%d.v" : "vsuxei%d.v", eew); // Show simplified, don't distinguish ordered // data_reg is vd/vs3, rs1 is base, rs2 is index vector (vs2) printf(" [V-MEM] %-12s v%-2d, (x%-2d), v%-2d, vm=%d\n", mnem, data_reg, rs1, rs2, vm); break; } goto skip_base; } } /* ================================================================================= * C) Vector ALU Instructions (and others under OP-V) * opcode=0x57 * --- Fixed to distinguish operand format by funct3 --- * ================================================================================= */ if (opcode == 0x57) { // Lookup mnemonic from your list const char *mn = "v_unknown"; for (size_t i = 0; i < sizeof(vector_mnemonics)/sizeof(*vector_mnemonics); i++) { if (vector_mnemonics[i].funct6 == funct6) { mn = vector_mnemonics[i].mnemonic; break; } } // Determine operand format by funct3 and print switch (funct3) { case 0b000: // OPIVV: vector-vector (e.g., vadd.vv vd, vs2, vs1) printf(" [V-ALU] %-12s v%-2d, v%-2d, v%-2d, vm=%d\n", mn, rd, rs2, rs1, vm); break; case 0b100: // OPIVX: vector-scalar (e.g., vadd.vx vd, vs2, rs1) printf(" [V-ALU] %-12s v%-2d, v%-2d, x%-2d, vm=%d\n", mn, rd, rs2, rs1, vm); break; case 0b111: // OPIVI: vector-immediate (e.g., vadd.vi vd, vs2, imm) { // 5-bit immediate is in rs1 field, need sign extension int32_t imm = (int32_t)rs1; if (imm & 0x10) imm |= 0xFFFFFFE0; printf(" [V-ALU] %-12s v%-2d, v%-2d, %d, vm=%d\n", mn, rd, rs2, imm, vm); break; } case 0b010: // OPMVV: mask-mask (e.g., vmand.mm vd, vs2, vs1) // Mask instructions are usually unmasked (vm=1) printf(" [V-MSK] %-12s v%-2d, v%-2d, v%-2d\n", mn, rd, rs2, rs1); break; // Other formats such as OPFVV (0b001), OPFVF (0b101), OPMVX (0b110) can be added here default: printf(" [V-???] %-12s (funct3=0x%x) raw operands: rd=%d, rs1=%d, rs2=%d\n", mn, funct3, rd, rs1, rs2); break; } goto skip_base; } // Base ISA decode { uint8_t rd_b = rd; int64_t imm_i = sign_extend((int64_t)(raw>>20), 12); int64_t imm_s = sign_extend((int64_t)(((raw>>7)&0x1F)|((raw>>25)<<5)), 12); int64_t imm_b = sign_extend((int64_t)((((raw>>8)&0x0F)<<1)|(((raw>>25)&0x3F)<<5)|(((raw>>7)&1)<<11)|(((raw>>31)&1)<<12)), 13); int64_t imm_u = (int64_t)(raw & 0xFFFFF000); int64_t imm_j = sign_extend((int64_t)((((raw>>21)&0x3FF)<<1)|(((raw>>20)&1)<<11)|(((raw>>12)&0xFF)<<12)|(((raw>>31)&1)<<20)), 21); switch (opcode) { case 0b0110011: printf(" [R] rd=x%-2d rs1=x%-2d rs2=x%-2d\n", rd_b, rs1, rs2); break; case 0b0010011: printf(" [I] rd=x%-2d rs1=x%-2d imm=0x%016" PRIx64 "\n", rd_b, rs1, (uint64_t)imm_i); break; case 0b0000011: printf(" [I-ld] rd=x%-2d rs1=x%-2d imm=0x%016" PRIx64 "\n", rd_b, rs1, (uint64_t)imm_i); break; case 0b1100111: printf(" [I-jalr] rd=x%-2d rs1=x%-2d imm=0x%016" PRIx64 "\n", rd_b, rs1, (uint64_t)imm_i); break; case 0b0100011: printf(" [S] rs1=x%-2d rs2=x%-2d imm=0x%016" PRIx64 "\n", rs1, rs2, (uint64_t)imm_s); break; case 0b1100011: printf(" [B] rs1=x%-2d rs2=x%-2d imm=0x%016" PRIx64 "\n", rs1, rs2, (uint64_t)imm_b); break; case 0b0110111: case 0b0010111: printf(" [U] rd=x%-2d imm=0x%016" PRIx64 "\n", rd_b, (uint64_t)imm_u); break; case 0b1101111: printf(" [J] rd=x%-2d imm=0x%016" PRIx64 "\n", rd_b, (uint64_t)imm_j); break; default: printf(" [op=0x%02x] rd=x%-2d rs1=x%-2d rs2=x%-2d\n", opcode, rd_b, rs1, rs2); break; } } skip_base: ; for (int i = 0; i < 1 && i < (int)reg_descs->len; i++) { qemu_plugin_reg_descriptor *d = &g_array_index(reg_descs, qemu_plugin_reg_descriptor, i); // g_byte_array_set_size(reg_buf, 0); int sz = qemu_plugin_read_register(d->handle, reg_buf); // if (sz >= 8) { // uint64_t val; // memcpy(&val, reg_buf->data, 8); // printf(" %-4s = 0x%016" PRIx64, d->name, val); // } // if ((i & 3) == 3) { // printf("\n"); // } } // if (!qemu_plugin_read_memory_vaddr(watch_addr, watch_buf, 8)) { // printf(" [read @ 0x%016" PRIx64 " failed]\n", watch_addr); // } else { // /* Assume little-endian */ // uint64_t v = 0; // memcpy(&v, watch_buf->data, 8); // printf(" [mem[0x%016" PRIx64 "]=0x%016" PRIx64 "]\n", // watch_addr, v); // } // /* 3) FPR f0–f31 */ // if ((int)reg_descs->len >= 64) { // printf("-- FPRs --\n"); // for (int i = 32; i < 64 && i < (int)reg_descs->len; i++) { // qemu_plugin_reg_descriptor *d = // &g_array_index(reg_descs, qemu_plugin_reg_descriptor, i); // g_byte_array_set_size(reg_buf, 0); // int sz = qemu_plugin_read_register(d->handle, reg_buf); // if (sz >= 8) { // uint64_t val; // memcpy(&val, reg_buf->data, 8); // printf(" %-4s = 0x%016" PRIx64, d->name, val); // } // if (((i - 32) & 3) == 3) { // printf("\n"); // } // } // } // /* 4) VPR v0–v31 */ // printf("-- VPRs --\n"); // for (int i = 0; i < (int)reg_descs->len; i++) { // qemu_plugin_reg_descriptor *d = // &g_array_index(reg_descs, qemu_plugin_reg_descriptor, i); // printf("reg[%d]: %s size=%d\n", i, d->name, d->size); // const char *nm = d->name; // if (nm[0] == 'v' && isdigit((unsigned char)nm[1])) { // g_byte_array_set_size(reg_buf, 0); // int sz = qemu_plugin_read_register(d->handle, reg_buf); // if (sz > 0) { // printf(" %-4s =", nm); // for (int b = sz - 1; b >= 0; b--) { // printf(" %02x", reg_buf->data[b]); // } // printf("\n"); // } // } // } // printf("--------\n"); // /* 5) taint propagation & slice collection */ // uint32_t opc = raw & 0x7F; // uint32_t rd = (raw >> 7) & 0x1F; // uint32_t rs1 = (raw >> 15) & 0x1F; // uint32_t rs2 = (raw >> 20) & 0x1F; // if (pc == seed_pc && rd) { // reg_tag[rd] = 1; // add_rec(pc, dis); // } // if ((opc == 0x33 || opc == 0x13) && // ((rs1 && reg_tag[rs1]) || (opc == 0x33 && reg_tag[rs2]))) { // if (!reg_tag[rd]) { // add_rec(pc, dis); // } // reg_tag[rd] = 1; // } // On first call, allocate and initialize prev_snap } /* TB translation complete callback: register exec_cb, needs read register permission */ static void tb_cb(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) { size_t n = qemu_plugin_tb_n_insns(tb); for (size_t i = 0; i < n; i++) { struct qemu_plugin_insn *in = qemu_plugin_tb_get_insn(tb, i); /* Prepare params: pc, raw, dis */ static uint64_t ring[4096][3]; static size_t idx = 0; uint64_t *slot = ring[idx++ & 4095]; slot[0] = qemu_plugin_insn_vaddr(in); uint8_t buf[4] = {0}; qemu_plugin_insn_data(in, buf, sizeof(buf)); slot[1] = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); slot[2] = (uint64_t)qemu_plugin_insn_disas(in); qemu_plugin_register_vcpu_insn_exec_cb( in, exec_cb, QEMU_PLUGIN_CB_R_REGS, slot); qemu_plugin_register_vcpu_mem_cb( in, /* instruction */ taint_mem_cb_rw, /* unified read/write callback */ QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_MEM_RW, /* monitor both read and write */ NULL); } } /* Output slice results at exit */ static void dump_slice(void) { printf("\n=== Data slice from 0x%lx ===\n", seed_pc); for (guint i = 0; i < rec_arr->len; i++) { rec_t *r = rec_arr->pdata[i]; printf("0x%016" PRIx64 " %s\n", r->pc, r->dis); } } /* Plugin entry: register vCPU init, TB translate & exit */ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, int argc, char **argv) { /* Parse seed parameter */ for (int i = 0; i < argc; i++) { if (g_str_has_prefix(argv[i], "seed=")) { sscanf(argv[i] + 5, "%lx", &seed_pc); } } if (!seed_pc) { fprintf(stderr, "[err] need seed=0xPC\n"); return -1; } if (!prev_val_ht) { prev_val_ht = g_hash_table_new(g_int64_hash, g_int64_equal); } if (!watch_buf) { /* Here we assume we care about 8 bytes; if another size, change accordingly */ watch_buf = g_byte_array_sized_new(8); } mem_prev_map = g_hash_table_new(g_direct_hash, g_direct_equal); taint_map = g_hash_table_new(g_direct_hash, g_direct_equal); /* Initialize taint & slice */ rec_ht = g_hash_table_new(g_direct_hash, g_direct_equal); rec_arr = g_ptr_array_new(); mem_touch_list = g_ptr_array_new(); /* **Ensure vcpu_init_cb is called first** */ qemu_plugin_register_vcpu_init_cb(id, vcpu_init_cb); qemu_plugin_register_vcpu_tb_trans_cb(id, tb_cb); qemu_plugin_register_atexit_cb(id, (void *)dump_slice, NULL); return 0;mem_touch_list = g_ptr_array_new(); } ``` 編譯過程大概就這樣 ```c= gcc -std=c11 -shared -fPIC -O3 -funroll-loops test.c -o watch_mem.so -I./include -Iinclude/hw/core -Iinclude/qapi -I$QEMU_SRC/src/include -I$QEMU_SRC/src/qapi -I$QEMU_SRC/src/target/riscv $(pkg-config --cflags --libs glib-2.0) ./qemu-system-riscv64 -nographic -M andes_ae350 -cpu andes-ax45mpv,v=on,vlen=512,elen=64 -m 1G -semihosting-config enable=on,chardev=char0 -chardev stdio,id=char0,mux=on,signal=off -mon char0 -serial none -bios ./a2.out -plugin ./watch_mem.so,seed=0x400080 | tee > log.txt ``` 這是最終效果圖 ```bash= ============================================ pc=0x0000000000011b60 ret [op=0x02] rd=x1 rs1=x1 rs2=x0 -- GPRs -- x1/ra =0x0000000000010744 x2/sp =0x0000000002ffff50 x3/gp =0x00000000000142a0 x5/t0 =0x0000000000011aa4 x7/t2 =0x0000000000013e40 x8/s0 =0x0000000003000000 x10/a0 =0x0000000000000018 x11/a1 =0x0000000002ffff28 x12/a2 =0x0000000000000018 x13/a3 =0x0000000000011bf8 x14/a4 =0x000000000000013c x15/a5 =0x0000000000013d50 x16/a6 =0x0000000000013b38 -- FPRs -- -- VPRs -- v1 = 28000000320000003c00000046000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 v2 = 2c00000037000000420000004d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 -- CSR Registers -- mstatus =0x8000000a00001e00 ============================================ pc=0x0000000000010744 bltz a0,30 # 0x10762 [B] rs1=x10 rs2=x0 imm=0x000000000000001e -- GPRs -- x1/ra =0x0000000000010744 x2/sp =0x0000000002ffff50 x3/gp =0x00000000000142a0 x5/t0 =0x0000000000011aa4 x7/t2 =0x0000000000013e40 x8/s0 =0x0000000003000000 x10/a0 =0x0000000000000018 x11/a1 =0x0000000002ffff28 x12/a2 =0x0000000000000018 x13/a3 =0x0000000000011bf8 x14/a4 =0x000000000000013c x15/a5 =0x0000000000013d50 x16/a6 =0x0000000000013b38 -- FPRs -- -- VPRs -- v1 = 28000000320000003c00000046000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 v2 = 2c00000037000000420000004d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 -- CSR Registers -- mstatus =0x8000000a00001e00 ============================================ pc=0x0000000000010748 addi a2,zero,1 [op=0x05] rd=x12 rs1=x0 rs2=x0 -- GPRs -- x1/ra =0x0000000000010744 x2/sp =0x0000000002ffff50 x3/gp =0x00000000000142a0 x5/t0 =0x0000000000011aa4 x7/t2 =0x0000000000013e40 x8/s0 =0x0000000003000000 x10/a0 =0x0000000000000018 x11/a1 =0x0000000002ffff28 x12/a2 =0x0000000000000018 x13/a3 =0x0000000000011bf8 x14/a4 =0x000000000000013c x15/a5 =0x0000000000013d50 x16/a6 =0x0000000000013b38 -- FPRs -- -- VPRs -- v1 = 28000000320000003c00000046000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 v2 = 2c00000037000000420000004d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 -- CSR Registers -- mstatus =0x8000000a00001e00 ============================================ pc=0x000000000001074a addi a5,zero,10 [op=0x29] rd=x15 rs1=x0 rs2=x0 -- GPRs -- x1/ra =0x0000000000010744 x2/sp =0x0000000002ffff50 x3/gp =0x00000000000142a0 x5/t0 =0x0000000000011aa4 x7/t2 =0x0000000000013e40 x8/s0 =0x0000000003000000 x10/a0 =0x0000000000000018 x11/a1 =0x0000000002ffff28 x12/a2 =0x0000000000000001 x13/a3 =0x0000000000011bf8 x14/a4 =0x000000000000013c x15/a5 =0x0000000000013d50 x16/a6 =0x0000000000013b38 -- FPRs -- -- VPRs -- v1 = 28000000320000003c00000046000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 v2 = 2c00000037000000420000004d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 -- CSR Registers -- mstatus =0x8000000a00001e00 ============================================ pc=0x000000000001074c addi a1,sp,28 [op=0x6c] rd=x16 rs1=x0 rs2=x0 -- GPRs -- x1/ra =0x0000000000010744 x2/sp =0x0000000002ffff50 x3/gp =0x00000000000142a0 x5/t0 =0x0000000000011aa4 x7/t2 =0x0000000000013e40 x8/s0 =0x0000000003000000 x10/a0 =0x0000000000000018 x11/a1 =0x0000000002ffff28 x12/a2 =0x0000000000000001 x13/a3 =0x0000000000011bf8 x14/a4 =0x000000000000013c x15/a5 =0x000000000000000a x16/a6 =0x0000000000013b38 -- FPRs -- -- VPRs -- v1 = 28000000320000003c00000046000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 v2 = 2c00000037000000420000004d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 -- CSR Registers -- mstatus =0x8000000a00001e00 ============================================ pc=0x000000000001074e mv a0,a2 [op=0x32] rd=x10 rs1=x1 rs2=x0 -- GPRs -- x1/ra =0x0000000000010744 x2/sp =0x0000000002ffff50 x3/gp =0x00000000000142a0 x5/t0 =0x0000000000011aa4 x7/t2 =0x0000000000013e40 x8/s0 =0x0000000003000000 x10/a0 =0x0000000000000018 x11/a1 =0x0000000002ffff6c x12/a2 =0x0000000000000001 x13/a3 =0x0000000000011bf8 x14/a4 =0x000000000000013c x15/a5 =0x000000000000000a x16/a6 =0x0000000000013b38 -- FPRs -- -- VPRs -- v1 = 28000000320000003c00000046000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 v2 = 2c00000037000000420000004d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 -- CSR Registers -- mstatus =0x8000000a00001e00 ============================================ pc=0x0000000000010750 sb a5,28(sp) [S] rs1=x2 rs2=x15 imm=0x000000000000001c -- GPRs -- x1/ra =0x0000000000010744 x2/sp =0x0000000002ffff50 x3/gp =0x00000000000142a0 x5/t0 =0x0000000000011aa4 x7/t2 =0x0000000000013e40 x8/s0 =0x0000000003000000 x10/a0 =0x0000000000000001 x11/a1 =0x0000000002ffff6c x12/a2 =0x0000000000000001 x13/a3 =0x0000000000011bf8 x14/a4 =0x000000000000013c x15/a5 =0x000000000000000a x16/a6 =0x0000000000013b38 -- FPRs -- -- VPRs -- v1 = 28000000320000003c00000046000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 v2 = 2c00000037000000420000004d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 -- CSR Registers -- mstatus =0x8000000a00001e00 PREV @ 0x0000000002ffff6c value=00 STORE @ 0x0000000002ffff6c size=1 value=0a MEM_CHANGE @ 0x0000000002ffff68 PREV=0000000000000007 MEM_CHANGE @ 0x0000000002ffff68 NEW =0000000a00000007 ============================================ pc=0x0000000000010754 jal ra,4940 # 0x11aa0 [J] rd=x1 imm=0x000000000000134c -- GPRs -- x1/ra =0x0000000000010744 x2/sp =0x0000000002ffff50 x3/gp =0x00000000000142a0 x5/t0 =0x0000000000011aa4 x7/t2 =0x0000000000013e40 x8/s0 =0x0000000003000000 x10/a0 =0x0000000000000001 x11/a1 =0x0000000002ffff6c x12/a2 =0x0000000000000001 x13/a3 =0x0000000000011bf8 x14/a4 =0x000000000000013c x15/a5 =0x000000000000000a x16/a6 =0x0000000000013b38 -- FPRs -- -- VPRs -- v1 = 28000000320000003c00000046000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 v2 = 2c00000037000000420000004d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 -- CSR Registers -- mstatus =0x8000000a00001e00 ============================================ pc=0x0000000000011aa0 jal t0,144 # 0x11b30 [J] rd=x5 imm=0x0000000000000090 -- GPRs -- x1/ra =0x0000000000010758 x2/sp =0x0000000002ffff50 x3/gp =0x00000000000142a0 x5/t0 =0x0000000000011aa4 x7/t2 =0x0000000000013e40 x8/s0 =0x0000000003000000 x10/a0 =0x0000000000000001 x11/a1 =0x0000000002ffff6c x12/a2 =0x0000000000000001 x13/a3 =0x0000000000011bf8 x14/a4 =0x000000000000013c x15/a5 =0x000000000000000a x16/a6 =0x0000000000013b38 -- FPRs -- -- VPRs -- v1 = 28000000320000003c00000046000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 v2 = 2c00000037000000420000004d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 -- CSR Registers -- mstatus =0x8000000a00001e00 ============================================ pc=0x0000000000011b30 addi sp,sp,-16 [op=0x41] rd=x2 rs1=x0 rs2=x0 -- GPRs -- x1/ra =0x0000000000010758 x2/sp =0x0000000002ffff50 x3/gp =0x00000000000142a0 x5/t0 =0x0000000000011aa4 x7/t2 =0x0000000000013e40 x8/s0 =0x0000000003000000 x10/a0 =0x0000000000000001 x11/a1 =0x0000000002ffff6c x12/a2 =0x0000000000000001 x13/a3 =0x0000000000011bf8 x14/a4 =0x000000000000013c x15/a5 =0x000000000000000a x16/a6 =0x0000000000013b38 -- FPRs -- -- VPRs -- v1 = 28000000320000003c00000046000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 v2 = 2c00000037000000420000004d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 -- CSR Registers -- mstatus =0x8000000a00001e00 ```