我会汇编代码,然后使用仿真进行分析。
从链接中获取的示例程序集:
mov rax, QWORD PTR [rbp-16] ; Move i (=9) to RAX
movabs rdx, -3689348814741910323 ; Move some magic number to RDX (?)
mul rdx ; Multiply 9 by magic number
mov rax, rdx ; Take only the upper 64 bits of the result
shr rax, 2 ; Shift these bits 2 places to the right (?)
mov QWORD PTR [rbp-8], rax ; Magically, RAX contains 9/5=1 now,
; so we can assign it to j
要模拟的代码:
mov rax, 9 ;
movabs rdx, -3689348814741910323 ;
mul rdx ;
mov rax, rdx ;
shr rax, 2 ;
仿真输出:
Initial state:
RAX = 0x0
RDX = 0x0
=================================================
>>> 0x1000000: mov rax, 9
RAX = 0x9
RDX = 0x0
=================================================
>>> 0x1000007: movabs rdx, 0xcccccccccccccccd
RAX = 0x9
RDX = 0xcccccccccccccccd
=================================================
>>> 0x1000011: mul rdx
RAX = 0x3333333333333335
RDX = 0x7
=================================================
>>> 0x1000014: mov rax, rdx
RAX = 0x7
RDX = 0x7
=================================================
>>> 0x1000017: shr rax, 2
RAX = 0x1
RDX = 0x7
>>>Emulation complete.
正如我们所看到的,RAX 持有 1,这是为 9 / 5 计算的值。仿真使我们能够轻松查看计算的每一步的结果,以便了解正在发生的事情。
执行仿真的程序包含在下面。可以在此处找到彩色要点。
它由 3 个主要组件组成:
- 汇编和模拟的汇编代码
- 通过 Keystone 引擎组装,正如 blabb 在上面的评论中所提到的
- 通过 Unicorn 进行仿真
注册到仿真引擎的回调函数允许我们将执行的指令流中的每条指令的寄存器值等信息打印到 STDOUT。
#!/usr/bin/python3
from keystone import *
from capstone import *
from unicorn import *
from unicorn.x86_const import *
# 9 divided by 5
ASM = "mov rax, 9; \
movabs rdx, -3689348814741910323; \
mul rdx; \
mov rax, rdx; \
shr rax, 2;"
# Use Keystone Engine to assemble
def assemble_snippet():
try:
ks = Ks(KS_ARCH_X86, KS_MODE_64) # initialize assembler object
encoding, count = ks.asm(ASM) # save results of assembly
except KsError as e:
print("ERROR: %s" %e)
return bytes(encoding) # return assembled object code
# callback for tracing instructions
# Use Capstone Engine to disassemble
def hook_code(uc, address, size, user_data):
# print contents of registers of interest
print("RAX = 0x%x" % uc.reg_read(UC_X86_REG_RAX))
print("RDX = 0x%x" % uc.reg_read(UC_X86_REG_RDX))
print("=================================================")
# print disassembly of intruction stream
instruction = uc.mem_read(address, size)
md = user_data
for i in md.disasm(instruction, address):
print(">>> 0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
# from https://github.com/unicorn-engine/unicorn/blob/8621bca53758532ad6a4ec5d17684fcdb9923cc6/bindings/python/sample_x86.py#L475
def emulate():
ADDRESS = 0x1000000 # memory address where emulation starts
CODE = assemble_snippet() # object code to emulate
mu = Uc(UC_ARCH_X86, UC_MODE_64) # Initialize emulator in X86-64bit mode
mu.mem_map(ADDRESS, 2 * 1024 * 1024) # map 2MB memory for this emulation
mu.mem_write(ADDRESS, CODE) # map machine code to be emulated to memory
mu.reg_write(UC_X86_REG_RSP, ADDRESS + 0x200000) # set up stack
md = Cs(CS_ARCH_X86, CS_MODE_64) # initialize disassembler
mu.hook_add(UC_HOOK_CODE, hook_code, md)
print("Initial state:")
try:
# emulate machine code in infinite time
mu.emu_start(ADDRESS, ADDRESS + len(CODE))
except UcError as e:
print("ERROR: %s" % e)
# final state
print("RAX = 0x%x" % mu.reg_read(UC_X86_REG_RAX))
print("RDX = 0x%x" % mu.reg_read(UC_X86_REG_RDX))
print("\n>>>Emulation complete.")
if __name__ == "__main__":
emulate()