小型转储中的堆栈结构是什么?

逆向工程 风袋
2021-06-24 18:44:46

我目前在 Linux 中使用 Python3.9 从小型转储文件中获取必要的信息。我在我的 windows 系统上使用 WinDBG 来检查我得到的信息是否正确。

虽然 [1]、[2] 和 [3] 有所帮助,但仍有一些漏洞没有被覆盖。这样做的目的是创建一个可以剖析小型转储的脚本。我设法获得了 ThreadList、MemoryList、MemoryInfoList 和 moduleList。但是我缺少堆栈信息,它似乎在 MINIDUMP_THREAD 信息的 Stack 字段中,如下所示:

typedef struct _MINIDUMP_THREAD {
  ULONG32                      ThreadId;
  ULONG32                      SuspendCount;
  ULONG32                      PriorityClass;
  ULONG32                      Priority;
  ULONG64                      Teb;
  MINIDUMP_MEMORY_DESCRIPTOR   Stack;
  MINIDUMP_LOCATION_DESCRIPTOR ThreadContext;
} MINIDUMP_THREAD, *PMINIDUMP_THREAD;

它是一个 MINIDUMP_MEMORY_DESCRIPTOR,其结构如下:

typedef struct _MINIDUMP_MEMORY_DESCRIPTOR {
  ULONG64                      StartOfMemoryRange;
  MINIDUMP_LOCATION_DESCRIPTOR Memory;
} MINIDUMP_MEMORY_DESCRIPTOR, *PMINIDUMP_MEMORY_DESCRIPTOR;

Memory 字段具有以下结构:

typedef struct _MINIDUMP_LOCATION_DESCRIPTOR {
  ULONG32 DataSize;
  RVA     Rva;
} MINIDUMP_LOCATION_DESCRIPTOR;

总而言之,Stack.Rva 包含小型转储文件中的相对虚拟地址。

转到那个地址,我看到了“东西”,但在文档中的这一点上,没有说明存储在那里的结构。我认为这将是一个 STACKFRAME 结构(正在抓住吸管),其给出如下:

typedef struct _tagSTACKFRAME {
  ADDRESS AddrPC;
  ADDRESS AddrReturn;
  ADDRESS AddrFrame;
  ADDRESS AddrStack;
  PVOID   FuncTableEntry;
  DWORD   Params[4];
  BOOL    Far;
  BOOL    Virtual;
  DWORD   Reserved[3];
  KDHELP  KdHelp;
  ADDRESS AddrBStore;
} STACKFRAME, *LPSTACKFRAME;

但是查看十六进制值,这是没有意义的:

00 00 00 00 D1 F8 AF 77 29 16 6B 77 C8 01 00 00
00 00 00 00 00 00 00 00 D0 87 C0 BD E0 60 85 00
c8 01 00 00 28 c1 39 00 24 00 00 00 01 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 2B 00 2B 00
87 02 21 00 00 00 00 00 AC BF 39 00 AB F0 6A 77
...

这意味着 AddrPC = {Offset: 00 00 00 00, Segment: D1 F8 AF 77, Mode: 29}

所以我想我会通过在这个故障转储文件上运行 Windbg 来找到相应的信息来作弊;但我不知道如何将上述十六进制转储转换为以下内容:

00 0039bf98 776b1629 000001c8 00000000 00000000 ntdll!NtWaitForSingleObject+0x15
01 0039c004 75491194 000001c8 ffffffff 00000000 KERNELBASE!WaitForSingleObjectEx+0x98
02 0039c01c 75491148 000001c8 ffffffff 00000000 kernel32!WaitForSingleObjectExtImplementation+0x75
03 0039c030 5a581e3a 000001c8 ffffffff 00000000 kernel32!WatiForSingleObject+0x12
...

虽然我可以从 windbg 的调用堆栈中看到一些信息,但这些信息并不是一组连续的信息。

不幸的是,我对 C++/C 的理解充其量是有限的,所以我无法掌握 [5] 中给出的信息

可能有人对如何对这个地址的结构进行逆向工程有建议吗?

我知道这是一种包含堆栈帧列表的结构;但是 [1] 中的文档没有指定什么样的结构。我猜有一个标题和一些结构数组。不幸的是,我还没有找到(还)显示小型转储地图的文档(类似于 [4])。这样的事情会让我的理解更容易。

*Addendum:*

尽管有@blabb 的帮助,但在断断续续地解决这个问题后,我仍然没有弄清楚。我去看了 [6],它指出了 X86 的堆栈结构。丢失了原始转储文件后,我使用了一个新的转储文件。我使用了一个 minidump_stackwalker 二进制文件,它提出了以下内容:(崩溃线程的 rva 是 0x0141ff - 二进制转储如下)


Crash reason:  EXCEPTION_ACCESS_VIOLATION_READ
Crash address: 0x8

Thread 0 (crashed)
 0  k.dll + 0x310bf3f
    eip = 0x5d42bf3f   esp = 0x0053bea0   ebp = 0x0053bf74   ebx = 0x0053bed0
    esi = 0x00a1b000   edi = 0x0053bf98   eax = 0x00000008   ecx = 0x00000004
    edx = 0x0053bfb0   efl = 0x00210287
    Found by: given as instruction pointer in context
 1  k.dll + 0x310bc3b
    eip = 0x5d42bc3c   esp = 0x0053bf7c   ebp = 0x0053bfd8
    Found by: previous frame's frame pointer
 2  k.dll + 0x311e7b3
    eip = 0x5d43e7b4   esp = 0x0053bfe0   ebp = 0x0053c004
    Found by: previous frame's frame pointer
 3  k.dll + 0x3280958
    eip = 0x5d5a0959   esp = 0x0053c00c   ebp = 0x0053c064
    Found by: previous frame's frame pointer
...

0x0141ff 处的二进制转储:

000141f0h: FC EC 23 00 00 00 00 AC 03 00 00 34 7B 03 00 8B
00014200h: 55 18 8B 45 0C FF 24 8D CC 4A D7 5D C7 44 24 14
00014210h: 00 00 00 00 8D 4E 10 89 4C 24 0C 8B 56 10 89 54
00014220h: 24 10 8D 54 24 0C 89 56 10 8B 00 89 44 24 04 8B
00014230h: 07 89 44 24 38 89 4c 24 30 89 54 24 34 c7 44 24
...

根据我从 [6] 收集到的信息,由于这是一个 x86 二进制文件,我假设 [可能错误地] 它会使用 [6] 而不是 [7] 给出的堆栈结构。

这意味着 context_flags 从 0x000141ff 开始,这给了我 8B 55 18 8B。从 [6] 中的注释来看,这个 context_flag 意味着这个堆栈是一个 MD_CONTEXT_X86_ALL。所以在使用以下脚本后:

#!/bin/env python

import os
import sys



hdrs_x86 = {
    "context_flags": 4,
    "dr0": 4,
    "dr1": 4,
    "dr2": 4,
    "dr3": 4,
    "dr6": 4,
    "dr7": 4,
    "fs_control_word": 4,
    "fs_status_word": 4,
    "fs_tag_word": 4,
    "fs_error_offset": 4,
    "fs_error_selector": 4,
    "fs_data_offset": 4,
    "fs_data_selector": 4,
    "fs_register_area": (1, 80),
    "fs_cr0_npx_state": 4,
    "gs": 4,
    "fs": 4,
    "es": 4,
    "edi": 4,
    "esi": 4,
    "ebx": 4,
    "edx": 4,
    "ecx": 4,
    "eax": 4,
    "ebp": 4,
    "eip": 4,
    "cs": 4,
    "eflags": 4,
    "esp": 4,
    "ss": 4,
    "extended_registers": (1, 80)
}

hdrs_x64 = {
    "p1_home": 8,
    "p2_home": 8,
    "p3_home": 8,
    "p4_home": 8,
    "p5_home": 8,
    "p6_home": 8,
    "context_flags": 4,
    "mx_csr": 4,
    "cs": 2,
    "ds": 2,
    "es": 2,
    "fs": 2,
    "gs": 2,
    "ss": 2,
    "eflags": 4,
    "dr0": 8,
    "dr1": 8,
    "dr2": 8,
    "dr3": 8,
    "dr6": 8,
    "dr7": 8,
    "rax": 8,
    "rcx": 8,
    "rdx": 8,
    "rbx": 8,
    "rsp": 8,

    "rsp": 8,
    "rbp": 8,
    "rsi": 8,
    "rdi": 8,
    "r8": 8,
    "r9": 8,
    "r10": 8,
    "r11": 8,
    "r12": 8,
    "r13": 8,
    "r14": 8,
    "r15": 8,
    "rip": 8
}

MDCTXX86 = 0x00010000
MDCTXX86_CONTROL = MDCTXX86 | 0x00000001
MDCTXX86_INTEGER = MDCTXX86 | 0x00000002
MDCTXX86_SEGMENTS = MDCTXX86 | 0x00000004
MDCTXX86_FLOATING_POINT = MDCTXX86 | 0x00000008
MDCTXX86_DEBUG_REGISTERS = MDCTXX86 | 0x00000010
MDCTXX86_EXTENDED_REGISTERS = MDCTXX86 | 0x00000020
MDCTXX86_XSTATE = MDCTXX86 | 0x00000040

MDCTXX86_FULL = MDCTXX86_CONTROL | MDCTXX86_INTEGER | MDCTXX86_SEGMENTS

ALL_P1 = MDCTXX86_FULL | MDCTXX86_FLOATING_POINT 
ALL_P2 = MDCTXX86_DEBUG_REGISTERS | MDCTXX86_EXTENDED_REGISTERS
MDCTXX86_ALL = ALL_P1 | ALL_P2 


def rev_item(in_bytes, no_rev=False):
    tmp = [x for x in in_bytes]
    if not no_rev:
        tmp.reverse()
    retval = []
    for item in tmp:
        hv = hex(item).replace("0x", "")
        if len(hv) < 2:
            hv = "0" + hv
        retval.append(hv)
    return retval


def is_dr(in_ctx, in_item):
    return in_ctx is not None and \
        in_item in ["dr0", "dr1", "dr2", "dr3", "dr6", "dr7"] and \
        in_ctx & MDCTXX86_DEBUG_REGISTERS > 0


def is_seg(in_ctx, in_item):
    return in_ctx is not None and \
        in_item in ["gs", "fs", "es", "ds"] and \
        in_ctx & MDCTXX86_SEGMENTS


def is_int(in_ctx, in_item):
    return in_ctx is not None and \
        in_item in ["edi", "esi", "ebx", "edx", "ecx", "eax"] and \
        in_ctx & MDCTXX86_INTEGER


def is_fp(in_ctx, in_item):
    return in_ctx is not None and \
        in_item.startswith("fs_") and \
        in_ctx & MDCTXX86_FLOATING_POINT


def is_control(in_ctx, in_item):
    return in_ctx is not None and \
        in_item in ["ebp", "eip", "cs", "eflags", "esp", "ss"] and \
        in_ctx & MDCTXX86_CONTROL


def is_ext_reg(in_ctx, in_item):
    return in_ctx is not None and \
        in_item in ['extended_registers'] and \
        in_ctx & MDCTXX86_EXTENDED_REGISTERS


def check_for_ctx(in_ctx, in_item):
    retval = False
    for itemfn in [is_dr, is_seg, is_int, is_fp,
                   is_control, is_ext_reg]:
        retval = itemfn(in_ctx, in_item)
        if retval:
            break

    return retval


res = []
res2 = []

hdrv = {}

hdrs = hdrs_x86

with open("e:\\test.dmp", 'rb') as fp:
    addr = 0x141ff
    fp.seek(addr)
    ctx = None
    for item, item_rl in hdrs.items():
        read_len = item_rl
        add_item = False

        if isinstance(item_rl, tuple):
            vr = []
            read_len = item_rl[0]
            for i in range(item_rl[1]):
                tmp = fp.read(read_len)
                tmph = tmp.hex().replace("0x", "")
                vr.append(tmph)
            read_len = item_rl[1]
        else:
            v = fp.read(item_rl)
            vr = rev_item(v, no_rev=True)
            if item == "context_flags":
                ctx = int("".join(vr), 16)

        if check_for_ctx(ctx, item):
            if item not in hdrv:
                hdrv[item] = vr
        addr += read_len

for item, iteminfo in hdrv.items():
    print(item, "".join(iteminfo))

它显示

dr0 450cff24
dr1 8dcc4ad7
dr2 5dc74424
dr3 14000000
dr6 008d4e10
dr7 894c240c
fs_control_word 8b561089
fs_status_word 5424108d
fs_tag_word 54240c89
fs_error_offset 56108b00
fs_error_selector 89442404
fs_data_offset 8b078944
fs_data_selector 2438894c
fs_register_area 243089542434c744242c00000000894c24248d442438895c24288d4c2424894e108d4c242c31ff515056e89a6adfff83c40c84c074178d4424186a09ff74243050e813aedfff83c40c8b7c24188b4424
fs_cr0_npx_state 248b4c24
gs 2889088b
fs 4424308b
es 4c243489
edi 08897c24
esi 1485ffb3
ebx 010f841a
edx 0300008d
ecx 4424148b
eax 54240489
ebp d1c1f91f
eip 6a005152
cs e9de0200
eflags 000fbe00
esp e94d0100
ss 00c74424
extended_registers 14000000008d4e10894c240c8b5610895424108d54240c8956108b00894424048b0789442438894c243089542434c744242c00000000894c24248d442438895c24288d4c2424894e108d4c242c31ff51

但这没有任何意义,因为它甚至与结果中给出的内容没有任何相似之处。就像我d1c1f91f是 EBP,但实际上是0x0053BF74 Ergo,我误解了整件事。

*Additional Addendum*: 附录有两点是错误的。

  1. 我叫错了树。我把minidump Memory info列表误认为是堆栈所在的位置。
  2. 我正在处理同一个小型转储。只是对我正在研究的部分感到困惑。

我选择保留附录部分而不是删除它。(沿着 1000 种不做某事的方式。)

非常感谢任何帮助,

:wong

[1] - https://docs.microsoft.com/en-us/windows/win32/api/minidumpapiset/

[2] - https://github.com/utds3lab/sigpath/blob/master/scripts/minidump.py

[3] - https://github.com/libyal/libmdmp/blob/main/documentation/Minidump%20%28MDMP%29%20format.asciidoc#thread_information_stream

[4] - https://upload.wikimedia.org/wikipedia/commons/thumb/1/1b/Portable_Executable_32_bit_Structure_in_SVG_fixed.svg/1920px-Portable_Executable_32_bit_Structure_in_SVG_fixed.svg.png

[5] - https://chromium.googlesource.com/breakpad/breakpad/+/refs/heads/main/src/client/minidump_file_writer.cc

[6] - https://github.com/google/breakpad/blob/main/src/google_breakpad/common/minidump_cpu_x86.h

[7] - https://github.com/google/breakpad/blob/main/src/google_breakpad/common/minidump_cpu_amd64.h

1个回答

rva 似乎没有指向 _tagSTACKFRAME64 大小似乎不同 0x108 与 0x4d0

是否有使用 dbghelp 的特定原因?

来自 dbgen 的 outputstacktrace 是不可接受的吗?

您是否查看了 DIA_SDK 的 com 接口以寻找替代方案

使用以下代码检查任意转储的 sizeof(_tagStackFrame) 与转储中的大小

#include <windows.h>
#include <stdio.h>
#include <dbghelp.h>
#pragma comment(lib, "dbghelp.lib")
int main(void)
{
    HANDLE hFile = NULL;
    hFile = CreateFileA(
        "tdump.dmp", GENERIC_READ, 0, NULL, OPEN_EXISTING,
        FILE_ATTRIBUTE_NORMAL, NULL);
    if (hFile != INVALID_HANDLE_VALUE)
    {
        printf("file handle is %p\n", hFile);
        HANDLE hMapFile = NULL;
        hMapFile = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
        if (hMapFile != NULL)
        {
            printf("file Map handle is %p\n", hMapFile);
            LPVOID lpMapAddress = NULL;
            lpMapAddress = MapViewOfFile(hMapFile, FILE_MAP_READ, 0, 0, 0);
            if (lpMapAddress != NULL)
            {
                printf("view of map file is %p\n", lpMapAddress);
                PMINIDUMP_DIRECTORY dudir = NULL;
                PVOID strptr = NULL;
                ULONG ssiz = 0;
                BOOL res = FALSE;
                res = MiniDumpReadDumpStream(lpMapAddress, 3, &dudir, &strptr, &ssiz);
                if (res && strptr != NULL)
                {
                    PMINIDUMP_THREAD_LIST tlist = (PMINIDUMP_THREAD_LIST)strptr;
                    for (ULONG32 i = 0; i < tlist->NumberOfThreads; i++)
                    {
                        ULONG64 dsiz = tlist->Threads[i].ThreadContext.DataSize;
                        ULONG64 rva = tlist->Threads[i].ThreadContext.Rva;
                        ULONG64 memsta = tlist->Threads[i].Stack.StartOfMemoryRange;
                        ULONG64 memsiz = tlist->Threads[i].Stack.Memory.DataSize;
                        ULONG64 memrva = tlist->Threads[i].Stack.Memory.Rva;
                        printf("look in debugger %I64x\t%I64x\t%I64x\t%I64x\t%I64x\n",
                               dsiz, rva, memsta, memsiz, memrva);
                    }
                    _tagSTACKFRAME64 tsf = {0};
                    printf("%zx\n", sizeof(tsf));
                }
                UnmapViewOfFile(lpMapAddress);
                CloseHandle(hMapFile);
                CloseHandle(hFile);
            }
        }
    }
    return 0;
} 

编译并执行

cl /Zi /W4 /analyze:autolog- /Od /EHsc /nologo dumpdis.cpp /link /release
dumpdis.cpp

dumpdis.exe
file handle is 000000000000009C
file Map handle is 00000000000000A0
view of map file is 0000029D96410000
look in debugger 4d0    2076    dce012edb0      1250    0
look in debugger 4d0    2546    dce01af858      7a8     0
look in debugger 4d0    2a16    dce047fa68      598     0
look in debugger 4d0    2ee6    dce04ffb48      4b8     0
108

这是使用来自 dbgeng 的 GetScope 的堆栈帧 IDebugSymbols
下面的代码是一个 windbg 扩展,一个 dll,但您可以使用 dbgeng 制作独立的 exe(请参阅 windbg sdk 中的示例)

代码

#include <engextcpp.cpp>
#define bufsiz 0x2000
class EXT_CLASS : public ExtExtension
{
public:
    EXT_COMMAND_METHOD(gscope);
};
EXT_DECLARE_GLOBALS();
EXT_COMMAND(gscope, "", "")
{
    PULONG64 ip = 0;
    DEBUG_STACK_FRAME sfr = {0};
    BYTE scont[bufsiz] = {0};
    HRESULT hr = m_Symbols->GetScope(ip, &sfr, &scont, bufsiz);
    if (hr == S_OK)
    {
        Out("insptr\t=\t%I64x\n", ip);
        Out("instof\t=\t%I64x\n", sfr.InstructionOffset);
        Out("retoff\t=\t%I64x\n", sfr.ReturnOffset);
        Out("fraoff\t=\t%I64x\n", sfr.FrameOffset);
        Out("staoff\t=\t%I64x\n", sfr.StackOffset);
        Out("ftentr\t=\t%I64x\n", sfr.FuncTableEntry);
        Out("parone\t=\t%I64x\n", sfr.Params[0]);
        Out("partwo\t=\t%I64x\n", sfr.Params[1]);
        Out("partre\t=\t%I64x\n", sfr.Params[2]);
        Out("parfor\t=\t%I64x\n", sfr.Params[3]);
        Out("resone\t=\t%I64x\n", sfr.Reserved[0]);
        Out("virtua\t=\t%I64x\n", sfr.Virtual);
        Out("franum\t=\t%I64x\n", sfr.FrameNumber);
    }
}

编译和链接

cat complink.bat
cl /LD /nologo /W4 /Ox  /Zi /EHsc /I"C:\Program Files (x86)\Windows Kits\10\Debuggers\inc" %1.cpp /link /EXPORT:DebugExtensionInitialize /Export:%1 /Export:help /RELEASE

执行 !gscope 和 kb1 进行比较

cdb -c ".load gscope;!gscope;kb1;q" -z ..\dumsta\tdump.dmp |awk "/Reading/,/quit/"     
0:000> cdb: Reading initial command '.load gscope;!gscope;kb1;q'
insptr  =       0
instof  =       7ffe652f108c
retoff  =       7ffe652f444f
fraoff  =       dce012ede0
staoff  =       dce012edb0
ftentr  =       0
parone  =       dce0245000
partwo  =       7ffe6534d4b0
partre  =       7ffe6534d4b0
parfor  =       7ffe6534d4b0
resone  =       0
virtua  =       1
franum  =       0
RetAddr           : Args to Child                                                           : Call Site      
00007ffe`652f444f : 000000dc`e0245000 00007ffe`6534d4b0 00007ffe`6534d4b0 00007ffe`6534d4b0 : ntdll!LdrpDoDebuggerBreak+0x30
quit: