我写了一个类似的脚本,同时解决了一个crackme。
有问题的二进制文件被混淆了
call imm32
jmp imm32
指示
push imm32
xor dword ptr [esp], imm32
mov edx, imm32
xor edx, imm32
jmp edx
和
jmp imm32
指示
mov edx, imm32
xor edx, imm32
jmp edx
为了去混淆,我使用DynamoRIO编写了一个指令跟踪器,它将所有执行的基本块记录在跟踪文件中。
现在通过解析跟踪文件和使用正则表达式的模式匹配,可以对混淆的指令进行反混淆处理。
由于跳转和调用目的地需要动态计算,我使用 FASM 即时组装指令。
这是脚本。它可能没有直接用处,但你会得到一个想法。
from capstone import *
from capstone.x86 import *
import re
import binascii
import subprocess
import os
import mmap
'''
pattern1
-------------------------------------------------------
68 ?? ?? ?? ?? push imm32
81 34 24 ?? ?? ?? ?? xor dword ptr [esp], imm32
ba ?? ?? ?? ?? mov edx, imm32
81 f2 ?? ?? ?? ?? xor edx, imm32
ff e2 jmp edx
Will be rewritten to
call imm32
jmp imm32
pattern2 (only if pattern1 does not match)
-------------------------------------------------------
ba ?? ?? ?? ?? mov edx, imm32
81 f2 ?? ?? ?? ?? xor edx, imm32
ff e2 jmp edx
Will be written to
jmp imm32
'''
pat1 = re.compile(r'68.{8}813424.{8}ba.{8}81f2.{8}ffe2')
pat2 = re.compile(r'ba.{8}81f2.{8}ffe2')
md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True
handle = open('code_section_p.bin', 'r+b')
mm = mmap.mmap(handle.fileno(), 0, access = mmap.ACCESS_WRITE)
def assemble(asm):
f = open('temp.asm', 'w')
f.write(asm)
f.close()
devnull = open(os.devnull, 'w')
subprocess.call(['fasm.exe', 'temp.asm'], stdout=devnull, stderr=devnull)
return open('temp.bin', 'rb').read()
def pat1_rewrite(buf, off_start, va_start, numBytes):
gen = md.disasm(buf[off_start: off_start + numBytes], va_start)
oper1 = gen.next().operands # push imm32
oper2 = gen.next().operands # xor dword ptr [esp], imm32
oper3 = gen.next().operands # mov edx, imm32
oper4 = gen.next().operands # xor edx, imm32
jmp_target = oper1[0].value.imm ^ oper2[1].value.imm
call_target = oper3[1].value.imm ^ oper4[1].value.imm
return 'use32\n' \
'org {}\n'\
'call {}\n'\
'jmp {}'.format(va_start, call_target, jmp_target)
def pat2_rewrite(buf, off_start, va_start, numBytes):
gen = md.disasm(buf[off_start: off_start + numBytes], va_start)
oper1 = gen.next().operands # mov edx, imm32
oper2 = gen.next().operands # xor edx, imm32
jmp_target = oper1[1].value.imm ^ oper2[1].value.imm
return 'use32\n' \
'org {}\n'\
'jmp {}'.format(va_start, jmp_target)
def analyze_bb(buf, off_start, va_start, bbsize):
hexstr = binascii.hexlify(buf[off_start: off_start + bbsize])
mtch = pat1.search(hexstr)
if mtch is not None:
start = off_start + mtch.start() / 2
numBytes = bbsize - mtch.start() / 2
asm = pat1_rewrite(buf, start, va_start + mtch.start() / 2, numBytes)
mm.seek(start)
assembled = assemble(asm)
mm.write(assembled)
mm.write('\xCC' * (numBytes - len(assembled)))
else:
mtch = pat2.search(hexstr)
if mtch is not None:
start = off_start + mtch.start() / 2
numBytes = bbsize - mtch.start() / 2
asm = pat2_rewrite(buf, start, va_start + mtch.start() / 2, numBytes)
mm.seek(start)
assembled = assemble(asm)
mm.write(assembled)
mm.write('\xCC' * (numBytes - len(assembled)))
def main():
addrStartEnd_list = open('trace.txt', 'r').readlines()
buf = open('code_section.bin', 'rb').read()
for addrStartEnd in addrStartEnd_list:
va_start, bbsize = map(lambda x: int(x, 16), addrStartEnd.split())
off_start = va_start - 0x30001000
analyze_bb(buf, off_start, va_start, bbsize)
handle.close()
if __name__ == '__main__':
main()