如何从一个可执行文件中提取函数以在另一个可执行文件中包含/链接

逆向工程 拆卸 反编译 可执行 二进制
2021-07-06 06:49:07

我想知道从一个二进制文件中提取一系列函数时有哪些选择(除了尝试反汇编/反编译和重新组装/重新编译),为特定平台和体系结构编译并将其包含在一个单独的程序中以便在新编译的程序中调用这些相同的函数。

我尝试了反编译提取路线,但由常用工具(如 IDA Pro)生成的代码几乎不是可编译的代码,而且我正在查看的逻辑很模糊且复杂,无法手动理解和重写/复制。

我已经看到大约有十几个应用程序能够使用我试图提取的相同提到的函数来执行此操作,并且可以确认在反编译所有其他可执行文件时它们看起来都完全相同,无法找出通往照着做。

任何指针将不胜感激。

2个回答

假设您有一些未知的目标文件。它的源代码是

#include <stdio.h>
#include <string.h>

#define NCHAR   26

void setkey(int i);
char *encrypt(char *s);
char keybuf[NCHAR];
char codepoint(char c);

int main(void) {
    char *secret;
    setkey(5);
    secret=strdup("Hello World");
    printf("%s\n", encrypt(secret));
}

void setkey(int key) {
    int i, val;
    val=key;
    for (i=0; i<NCHAR; i++) {
        keybuf[i]=val%NCHAR;
        val+=key;
    }
}

char *encrypt(char *s) {
    char *t=s;
    while (*s) {
        if (*s>='a' && *s<='z') {
            *s='a'+codepoint(*s-'a');
        } else if (*s>='A' && *s<='Z') {
            *s='A'+codepoint(*s-'A');
        }
        s++;
    }
    return t;
}

char codepoint(char c) {
    return keybuf[c];
}

您唯一拥有的是目标代码。供参考,objdump -d original产量

Disassembly of section .text:

0000000000400490 <_start>:
  400490:   31 ed                   xor    %ebp,%ebp
...
000000000040057d <main>:
  40057d:   55                      push   %rbp
  40057e:   48 89 e5                mov    %rsp,%rbp
...
  4005b2:   c3                      retq   

00000000004005b3 <setkey>:
  4005b3:   55                      push   %rbp
  4005b4:   48 89 e5                mov    %rsp,%rbp
...
  4005ed:   48 98                   cltq   
  4005ef:   88 90 60 10 60 00       mov    %dl,0x601060(%rax)
  4005f5:   8b 45 ec                mov    -0x14(%rbp),%eax
...
  400606:   c3                      retq   

0000000000400607 <encrypt>:
  400607:   55                      push   %rbp
  400608:   48 89 e5                mov    %rsp,%rbp
...
  400677:   89 c7                   mov    %eax,%edi
  400679:   e8 21 00 00 00          callq  40069f <codepoint>
  40067e:   83 c0 41                add    $0x41,%eax
...
  40069e:   c3                      retq   

000000000040069f <codepoint>:
  40069f:   55                      push   %rbp
  4006a0:   48 89 e5                mov    %rsp,%rbp
...
  4006ac:   48 98                   cltq   
  4006ae:   0f b6 80 60 10 60 00    movzbl 0x601060(%rax),%eax
  4006b5:   5d                      pop    %rbp
  4006b6:   c3                      retq   

您想在程序中使用该代码。

要做到这一点,您实际上不必理解或反汇编它,或使其可编译

使用 IDA、调试器或您喜欢的工具,您唯一需要了解的内容是:

  • 代码中有趣的部分是从 0x4005B3 到 0x4006B6 的部分。
  • 此代码在两个不同位置(0x4005ef 和 0x4006ae)引用了位于 0x601060 的某个变量。(指令需要 2 个和 3 个字节,因此地址位于 0x4005b1 和 0x4006b1)。
  • 要使用它,您必须先使用整数调用 0x4005b3 处的函数,然后使用字符串调用 0x400607 处的函数以获取加密字符串。

让我们将整个 .text 部分转换为一个字符数组,准备嵌入到 C 代码中:

$ objdump -s -j .text original | grep '^ 4' | cut -d' ' -f3-6 | tr -d ' ' | sed 's/../0x&,/g'
0x31,0xed,0x49,0x89,0xd1,0x5e,0x48,0x89,0xe2,0x48,0x83,0xe4,0xf0,0x50,0x54,0x49,
...
0x41,0x5e,0x41,0x5f,0xc3,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0xf3,0xc3,

并编写一个新程序来使用它。

#include <stdio.h>
#include <string.h>

char code[]={
0x31,0xed,0x49,0x89,0xd1,0x5e,0x48,0x89,0xe2,0x48,0x83,0xe4,0xf0,0x50,0x54,0x49,
0xc7,0xc0,0x30,0x07,0x40,0x00,0x48,0xc7,0xc1,0xc0,0x06,0x40,0x00,0x48,0xc7,0xc7,
0x7d,0x05,0x40,0x00,0xe8,0xa7,0xff,0xff,0xff,0xf4,0x66,0x0f,0x1f,0x44,0x00,0x00,
0xb8,0x4f,0x10,0x60,0x00,0x55,0x48,0x2d,0x48,0x10,0x60,0x00,0x48,0x83,0xf8,0x0e,
0x48,0x89,0xe5,0x77,0x02,0x5d,0xc3,0xb8,0x00,0x00,0x00,0x00,0x48,0x85,0xc0,0x74,
0xf4,0x5d,0xbf,0x48,0x10,0x60,0x00,0xff,0xe0,0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
0xb8,0x48,0x10,0x60,0x00,0x55,0x48,0x2d,0x48,0x10,0x60,0x00,0x48,0xc1,0xf8,0x03,
0x48,0x89,0xe5,0x48,0x89,0xc2,0x48,0xc1,0xea,0x3f,0x48,0x01,0xd0,0x48,0xd1,0xf8,
0x75,0x02,0x5d,0xc3,0xba,0x00,0x00,0x00,0x00,0x48,0x85,0xd2,0x74,0xf4,0x5d,0x48,
0x89,0xc6,0xbf,0x48,0x10,0x60,0x00,0xff,0xe2,0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
0x80,0x3d,0x19,0x0b,0x20,0x00,0x00,0x75,0x11,0x55,0x48,0x89,0xe5,0xe8,0x7e,0xff,
0xff,0xff,0x5d,0xc6,0x05,0x06,0x0b,0x20,0x00,0x01,0xf3,0xc3,0x0f,0x1f,0x40,0x00,
0x48,0x83,0x3d,0xc8,0x08,0x20,0x00,0x00,0x74,0x1e,0xb8,0x00,0x00,0x00,0x00,0x48,
0x85,0xc0,0x74,0x14,0x55,0xbf,0x20,0x0e,0x60,0x00,0x48,0x89,0xe5,0xff,0xd0,0x5d,
0xe9,0x7b,0xff,0xff,0xff,0x0f,0x1f,0x00,0xe9,0x73,0xff,0xff,0xff,0x55,0x48,0x89,
0xe5,0x48,0x83,0xec,0x10,0xbf,0x05,0x00,0x00,0x00,0xe8,0x24,0x00,0x00,0x00,0xbf,
0x44,0x07,0x40,0x00,0xe8,0xe7,0xfe,0xff,0xff,0x48,0x89,0x45,0xf8,0x48,0x8b,0x45,
0xf8,0x48,0x89,0xc7,0xe8,0x5e,0x00,0x00,0x00,0x48,0x89,0xc7,0xe8,0x9f,0xfe,0xff,
0xff,0xc9,0xc3,0x55,0x48,0x89,0xe5,0x89,0x7d,0xec,0x8b,0x45,0xec,0x89,0x45,0xfc,
0xc7,0x45,0xf8,0x00,0x00,0x00,0x00,0xeb,0x36,0x8b,0x4d,0xfc,0xba,0x4f,0xec,0xc4,
0x4e,0x89,0xc8,0xf7,0xea,0xc1,0xfa,0x03,0x89,0xc8,0xc1,0xf8,0x1f,0x29,0xc2,0x89,
0xd0,0x6b,0xc0,0x1a,0x29,0xc1,0x89,0xc8,0x89,0xc2,0x8b,0x45,0xf8,0x48,0x98,0x88,
0x90,0x60,0x10,0x60,0x00,0x8b,0x45,0xec,0x01,0x45,0xfc,0x83,0x45,0xf8,0x01,0x83,
0x7d,0xf8,0x19,0x7e,0xc4,0x5d,0xc3,0x55,0x48,0x89,0xe5,0x48,0x83,0xec,0x20,0x48,
0x89,0x7d,0xe8,0x48,0x8b,0x45,0xe8,0x48,0x89,0x45,0xf8,0xeb,0x71,0x48,0x8b,0x45,
0xe8,0x0f,0xb6,0x00,0x3c,0x60,0x7e,0x2c,0x48,0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x3c,
0x7a,0x7f,0x21,0x48,0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x83,0xe8,0x61,0x0f,0xbe,0xc0,
0x89,0xc7,0xe8,0x58,0x00,0x00,0x00,0x83,0xc0,0x61,0x89,0xc2,0x48,0x8b,0x45,0xe8,
0x88,0x10,0xeb,0x35,0x48,0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x3c,0x40,0x7e,0x2a,0x48,
0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x3c,0x5a,0x7f,0x1f,0x48,0x8b,0x45,0xe8,0x0f,0xb6,
0x00,0x83,0xe8,0x41,0x0f,0xbe,0xc0,0x89,0xc7,0xe8,0x21,0x00,0x00,0x00,0x83,0xc0,
0x41,0x89,0xc2,0x48,0x8b,0x45,0xe8,0x88,0x10,0x48,0x83,0x45,0xe8,0x01,0x48,0x8b,
0x45,0xe8,0x0f,0xb6,0x00,0x84,0xc0,0x75,0x84,0x48,0x8b,0x45,0xf8,0xc9,0xc3,0x55,
0x48,0x89,0xe5,0x89,0xf8,0x88,0x45,0xfc,0x0f,0xbe,0x45,0xfc,0x48,0x98,0x0f,0xb6,
0x80,0x60,0x10,0x60,0x00,0x5d,0xc3,0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0x41,0x57,0x41,0x89,0xff,0x41,0x56,0x49,0x89,0xf6,0x41,0x55,0x49,0x89,0xd5,0x41,
0x54,0x4c,0x8d,0x25,0x38,0x07,0x20,0x00,0x55,0x48,0x8d,0x2d,0x38,0x07,0x20,0x00,
0x53,0x4c,0x29,0xe5,0x31,0xdb,0x48,0xc1,0xfd,0x03,0x48,0x83,0xec,0x08,0xe8,0x25,
0xfd,0xff,0xff,0x48,0x85,0xed,0x74,0x1e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0x4c,0x89,0xea,0x4c,0x89,0xf6,0x44,0x89,0xff,0x41,0xff,0x14,0xdc,0x48,0x83,0xc3,
0x01,0x48,0x39,0xeb,0x75,0xea,0x48,0x83,0xc4,0x08,0x5b,0x5d,0x41,0x5c,0x41,0x5d,
0x41,0x5e,0x41,0x5f,0xc3,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0xf3,0xc3,
};

// we don't know how large it has to be. From the disassembly,
// we learned 26 bytes should be enough. Better make it larger and be
// on the safe side.
char buffer[1000];

int main(void) {

    void (*f1)(int)=(void *)(code-0x400490+0x4005b3);
    char* (*f2)(char *)=(void *)(code-0x400490+0x400607);

    *(int *)(code-0x400490+0x4005f1)=(int)(long)&buffer;
    *(int *)(code-0x400490+0x4006b1)=(int)(long)&buffer;

    (*f1)(5);
    printf("%s\n", (*f2)(strdup("Hello World")));
    printf("%s\n", (*f2)(strdup("Some other String")));
}

由于原始文本段从 0x400490 开始,而我们的函数从 0x4005b3 和 0x400607 开始,我们从code数组的开头计算新的偏移量由于我们在原始程序之外发现了两个对缓冲区的引用,因此我们将它们修补为指向我们的缓冲区。请注意,我们不必对从 encrypt 到 codepoint 的调用做任何事情,因为无论如何代码都是与位置无关的(从这里调用 0x21 字节的函数)。

现在,不要忘记在编译程序时使数据/堆栈段可执行:

$ cc -zexecstack -g -o copy copy.c
$ ./copy
Oziix Lxmiu
Rxnz xwozm Rwmtsj

当然,还有一些问题需要解决:

  • 您必须找到可能从您的函数中调用的所有内容,并将其全部包含在您的代码中。
  • 您必须在代码之外找到对数据的所有引用,并相应地修补指针;如果数据包含指针,也修补它们。
  • 如果您的代码调用任何库函数,则必须进行重定位。幸运的是,(ELF/PE) 二进制文件的重定位表向您展示了执行此操作的位置。

这种方法的好处是它甚至可以在操作系统之间工作,如果它们使用相同的处理器架构。但是,如果 ABI 不同,您可能需要围绕调用编写包装函数。

如果你想花哨,不要直接调用函数;在您的程序中嵌入一个模拟器引擎并让模拟器执行代码。这可以在很大程度上帮助您检测是否缺少某些代码,或者对外部数据的引用在哪里。显然,一旦您使用模拟器运行它,您就可以将其保留在最终程序中,这甚至可以让您在新的 PC 程序中使用一些 ARM 例程。或相反亦然。

您可以在https://mega.co.nz/#!8dR0TZhA!Z4DdQ07JCUzV5nJiJ79PZhHbiKDu9QZEw10IXr7ssuI下载文件

McSema是唯一一个声称可以实现类似功能(需要大量半自动和手动工作)的项目查看示例,它可能有用。