从 PANDA 跟踪获取操作码列表

逆向工程 拆卸 x86 奇木
2021-06-25 04:30:30

作为对“使用 QEmu 监视器接口从二进制文件中提取执行跟踪?”一文的回答,其中一位 PANDA 作者概述了如何使用 QUEMU 记录执行跟踪。

我想和熊猫一起做。

我发现 QEMU 函数cpu_memory_rw_debug(env, pc, buf, size, is_write);允许访问来宾的内存。然而,我不知道我必须读取多少内存,因为操作码的大小不同。

熊猫提供的功能panda_disas

void panda_disas(FILE *out, void *code, unsigned long size);

在 size 字节的虚拟地址代码处写入来宾代码反汇编的文本表示。

但同样的问题,我如何确定指令的大小?

由于PANDA_CB_INSN_EXEC并且PANDA_CB_INSN_TRANSLATE不提供指令的大小,我认为它必须在CPUState.

我看了看cpu.hcpu-all.h但找不到任何东西。

还有另一种方法还是我错过了什么?

1个回答

目前 PANDA 不提供有关单个指令级别的指令大小(在翻译之前未知)的信息。但是,可以做的一件事是在 QEMU 使用翻译后获取整个基本块的大小,PANDA_CB_AFTER_BLOCK_TRANSLATE然后查看该tb->size字段。然后,您可以缓存该块的反汇编并在PANDA_CB_BEFORE_BLOCK_EXEC回调中将其打印出来

这是一个插件,它使用这个技巧使用capstone disassembler计算滚动指令操作码直方图您必须对其进行一些调整才能获得完整的指令跟踪,但它应该演示原理。

// This needs to be defined before anything is included in order to get
// the PRIx64 macro
#define __STDC_FORMAT_MACROS

extern "C" {

#include "config.h"
#include "qemu-common.h"

#include "panda_plugin.h"
#include "panda/panda_common.h"
#include "rr_log.h"
#include <capstone/capstone.h>

}

#include <map>
#include <string>

typedef std::map<std::string,int> instr_hist;


// These need to be extern "C" so that the ABI is compatible with
// QEMU/PANDA, which is written in C
extern "C" {

bool init_plugin(void *);
void uninit_plugin(void *);

}

#define WINDOW_SIZE 100

csh handle;
cs_insn *insn;
bool init_capstone_done = false;
target_ulong asid;
int sample_rate = 100;
FILE *histlog;

// PC => Mnemonic histogram
std::map<target_ulong,instr_hist> code_hists;

// PC => number of instructions in the TB
std::map<target_ulong,int> tb_insns;

// Circular buffer PCs in the window
target_ulong window[WINDOW_SIZE] = {};

// Rolling histogram of PCs
instr_hist window_hist;
uint64_t window_insns = 0;
uint64_t bbcount = 0;

void init_capstone(CPUState *env) {
    cs_arch arch;
    cs_mode mode;
#ifdef TARGET_I386
    arch = CS_ARCH_X86;
    mode = env->hflags & HF_LMA_MASK ? CS_MODE_64 : CS_MODE_32;
#elif defined(TARGET_ARM)
    arch = CS_ARCH_ARM;
    mode = env->thumb ? CS_MODE_THUMB : CS_MODE_ARM;
#endif

    if (cs_open(arch, mode, &handle) != CS_ERR_OK) {
        printf("Error initializing capstone\n");
    }
    init_capstone_done = true;
}

void add_hist(instr_hist &a, instr_hist &b) {
    for (auto &kvp : b) a[kvp.first] += kvp.second;
}

void sub_hist(instr_hist &a, instr_hist &b) {
    for (auto &kvp : b) a[kvp.first] -= kvp.second;
}

void print_hist(instr_hist &ih, uint64_t window_insns) { 
    fprintf(histlog, "%" PRIu64 " ", rr_get_guest_instr_count());
    fprintf(histlog, "{");
    for (auto &kvp : ih) {
        fprintf (histlog, "\"%s\": %f, ", kvp.first.c_str(), kvp.second/(float)window_insns);
    }
    fprintf(histlog, "}\n");
}

// During retranslation we may end up with different
// instructions. Since we don't have TB generations we just
// remove it from the rolling histogram first.
void clear_hist(target_ulong pc) {
    for (int i = 0; i < WINDOW_SIZE; i++) {
        if (window[i] == pc) {
            window[i] = 0;
            window_insns -= tb_insns[pc];
            sub_hist(window_hist, code_hists[pc]);
        }
    }
}

static int after_block_translate(CPUState *env, TranslationBlock *tb) {
    size_t count;
    uint8_t mem[1024] = {};

    if (asid && panda_current_asid(env) != asid) return 0;

    if (!init_capstone_done) init_capstone(env);

    if (code_hists.find(tb->pc) != code_hists.end()) {
        clear_hist(tb->pc);
        return 0;
    }

    panda_virtual_memory_rw(env, tb->pc, mem, tb->size, false);
    count = cs_disasm_ex(handle, mem, tb->size, tb->pc, 0, &insn);
    for (unsigned i = 0; i < count; i++)
        code_hists[tb->pc][insn[i].mnemonic]++;
    tb_insns[tb->pc] = count;
    return 1;
}

static int before_block_exec(CPUState *env, TranslationBlock *tb) {
    if (asid && panda_current_asid(env) != asid) return 0;

    if (window[bbcount % WINDOW_SIZE] != 0) {
        target_ulong old_pc = window[bbcount % WINDOW_SIZE];
        window_insns -= tb_insns[old_pc];
        sub_hist(window_hist, code_hists[old_pc]);
    }

    window[bbcount % WINDOW_SIZE] = tb->pc;
    window_insns += tb_insns[tb->pc];
    add_hist(window_hist, code_hists[tb->pc]);

    bbcount++;

    if (bbcount % sample_rate == 0) {
        // write out to the histlog
        print_hist(window_hist, window_insns);
    }
    return 1;
}

bool init_plugin(void *self) {
    panda_cb pcb;

    panda_arg_list *args = panda_get_args("insthist");
    const char *name = panda_parse_string(args, "name", "insthist");
    asid = panda_parse_ulong(args, "asid", 0);
    sample_rate = panda_parse_uint32(args, "sample_rate", 1000);

    char fname[260];
    sprintf(fname, "%s_insthist.txt", name);
    histlog = fopen(fname, "w");

    pcb.after_block_translate = after_block_translate;
    panda_register_callback(self, PANDA_CB_AFTER_BLOCK_TRANSLATE, pcb);
    pcb.before_block_exec = before_block_exec;
    panda_register_callback(self, PANDA_CB_BEFORE_BLOCK_EXEC, pcb);

    return true;
}

void uninit_plugin(void *self) {
    print_hist(window_hist, window_insns);
    fclose(histlog);
}