在一些可执行文件中的一些标准库函数上练习 Ghidra 时,我通常无法在使用 SSE 寄存器作为优化的情况下获得良好的反编译输出。我试过在这里和互联网的其他地方搜索各种关键字组合,但找不到任何相关的内容。
下面的例子来自没有调试符号的 x64 代码,所以函数/结构/变量名称是我做的。类型选择并没有改变/改善所展示的问题 - 但也许我只是做错了。我还尝试保留足够的上下文以提高可读性,但我找不到该站点在这方面的任何指导方针。
当
std::string
使用两个 128 位 SSE 移动复制32 字节(作为移动构造的一部分)时,整个内容被分解为 4 字节块:我的
std::string
完整性数据类型定义(导出):struct std_string { char * data; char * field_1; ulonglong size; ulonglong capacity; };
拆卸:
************************************************************** * FUNCTION * ************************************************************** std_string * __fastcall std_string_operator+(std_string std_string * RAX:8 <RETURN> std_string * RCX:8 thisOut undefined8 RDX:8 thisIn char * R8:8 stringIn XREF[1]: 140106dcc(W) longlong R8:8 size XREF[1]: 140106dcc(W) undefined8 RAX:8 thisIn_ XREF[1]: 140106de6(W) std_string_operator+ 140106dc0 40 53 PUSH RBX 140106dc2 48 83 ec 20 SUB RSP,0x20 140106dc6 49 8b c0 MOV RAX,stringIn 140106dc9 4c 8b ca MOV R9,thisIn 140106dcc 49 c7 c0 MOV size,-0x1 ff ff ff ff 140106dd3 48 8b d9 MOV RBX,thisOut LAB_140106dd6 XREF[1]: 140106dde(j) 140106dd6 49 ff c0 INC size 140106dd9 42 80 3c CMP byte ptr [RAX + size*0x1],0x0 00 00 140106dde 75 f6 JNZ LAB_140106dd6 140106de0 48 8b d0 MOV thisIn,RAX 140106de3 49 8b c9 MOV thisOut,R9 140106de6 e8 75 fe CALL std_string_append std_string * std_string_append(s ff ff 140106deb 33 c9 XOR thisOut,thisOut 140106ded 48 89 4b 10 MOV qword ptr [RBX + 0x10],thisOut 140106df1 48 89 4b 18 MOV qword ptr [RBX + 0x18],thisOut 140106df5 0f 10 00 MOVUPS XMM0,xmmword ptr [thisIn_] 140106df8 0f 11 03 MOVUPS xmmword ptr [RBX],XMM0 140106dfb 0f 10 48 10 MOVUPS XMM1,xmmword ptr [thisIn_ + 0x10] 140106dff 0f 11 4b 10 MOVUPS xmmword ptr [RBX + 0x10],XMM1 140106e03 48 89 48 10 MOV qword ptr [thisIn_ + 0x10],thisOut 140106e07 48 c7 40 MOV qword ptr [thisIn_ + 0x18],0xf 18 0f 00 00 00 140106e0f 88 08 MOV byte ptr [thisIn_],thisOut 140106e11 48 8b c3 MOV thisIn_,RBX 140106e14 48 83 c4 20 ADD RSP,0x20 140106e18 5b POP RBX 140106e19 c3 RET
反编译:
std_string * std_string_operator+(std_string *thisOut,std_string *thisIn,char *stringIn) { undefined4 uVar1; undefined4 uVar2; undefined4 uVar3; std_string *thisIn_; longlong size; size = -1; do { size = size + 1; } while (stringIn[size] != '\0'); thisIn_ = std_string_append(thisIn,stringIn,size); thisOut->size = 0; thisOut->capacity = 0; uVar1 = *(undefined4 *)((longlong)&thisIn_->data + 4); uVar2 = *(undefined4 *)&thisIn_->field_1; uVar3 = *(undefined4 *)((longlong)&thisIn_->field_1 + 4); *(undefined4 *)&thisOut->data = *(undefined4 *)&thisIn_->data; *(undefined4 *)((longlong)&thisOut->data + 4) = uVar1; *(undefined4 *)&thisOut->field_1 = uVar2; *(undefined4 *)((longlong)&thisOut->field_1 + 4) = uVar3; uVar1 = *(undefined4 *)((longlong)&thisIn_->size + 4); uVar2 = *(undefined4 *)&thisIn_->capacity; uVar3 = *(undefined4 *)((longlong)&thisIn_->capacity + 4); *(undefined4 *)&thisOut->size = *(undefined4 *)&thisIn_->size; *(undefined4 *)((longlong)&thisOut->size + 4) = uVar1; *(undefined4 *)&thisOut->capacity = uVar2; *(undefined4 *)((longlong)&thisOut->capacity + 4) = uVar3; thisIn_->size = 0; thisIn_->capacity = 0xf; *(undefined *)&thisIn_->data = 0; return thisOut; }
我会理解复制八个字节的四个字段,或者(以某种形式)表达两个 128 位或一个 256 位副本。我假设上面的四个字节块是 MOVUPS 在 Ghidra 中的编码方式,但它对我来说似乎一点帮助都没有。这种复制(通过 SSE)在任何地方都比较频繁地发生,所以每次都有 16 行噪音很烦人。
(另外,不要问我关于时髦的函数签名,我不知道编译器在做什么。)
PUNPCKLBW
在 memset 中使用的A (这里只是XMM0
通过重复每个较低的 8 个字节来填充,有效地广播单个字节以设置为 的所有 16 个字节XMM0
)爆炸成几十行胡言乱语(忠实地模拟了效果,我敢肯定,但这没有帮助):拆卸:
************************************************************** * FUNCTION * ************************************************************** longlong * __fastcall memset(void * location, byte byteT longlong * RAX:8 <RETURN> void * RCX:8 location byte DL:1 byteToSet XREF[1]: 1411960a8(W) ulonglong R8:8 count undefined8 R9:8 inputByteRepeated8 XREF[1]: 1411960a0(W) undefined2 DX:2 inputByteRepeated2 XREF[1]: 1411960a8(W) memset XREF[518]: [...] 141196090 4c 8b d9 MOV R11,location 141196093 0f b6 d2 MOVZX EDX,DL 141196096 49 b9 01 MOV R9,0x101010101010101 01 01 01 01 01 01 01 1411960a0 4c 0f af ca IMUL R9,RDX 1411960a4 49 83 f8 10 CMP R8,0x10 1411960a8 0f 86 f2 JBE LAB_1411961a0 00 00 00 1411960ae 66 49 0f MOVQ XMM0,R9 6e c1 1411960b3 66 0f 60 c0 PUNPCKLBW XMM0,XMM0 [...]
反编译:
longlong * memset(void *location,byte byteToSet,ulonglong count) { // [...] ushort inputByteRepeated2; ulonglong inputByteRepeated8; undefined4 uVar5; undefined4 uVar7; undefined4 uVar8; undefined auVar6 [13]; inputByteRepeated8 = (ulonglong)byteToSet * 0x101010101010101; inputByteRepeated2 = (ushort)inputByteRepeated8; _inputByteRepeated2 = (uint)inputByteRepeated8; if (count < 0x11) { // [...] } auVar6[6] = SUB141(ZEXT814(inputByteRepeated8) >> 0x30,0); auVar6 = ZEXT813(inputByteRepeated8); register0x0000120c = SUB164(CONCAT313(SUB163(CONCAT214(SUB162(CONCAT115(SUB161(ZEXT816(inputByteRepeated8) >> 0x38 ,0), CONCAT114(SUB151(ZEXT815( inputByteRepeated8) >> 0x38,0), ZEXT814(inputByteRepeated8))) >> 0x70,0), CONCAT113(auVar6[6],auVar6)) >> 0x68,0), CONCAT112(auVar6[6],ZEXT812(inputByteRepeated8))) >> 0x60,0); _auVar6 = CONCAT79(SUB167(CONCAT610(SUB166(CONCAT511(SUB165(CONCAT412(register0x0000120c, CONCAT111(SUB131(auVar6 >> 0x28,0),ZEXT811(inputByteRepeated8))) >> 0x58,0), CONCAT110(SUB121(ZEXT812(inputByteRepeated8) >> 0x28,0), (unkuint10)inputByteRepeated8)) >> 0x50, 0), CONCAT19(SUB131(auVar6 >> 0x20,0),(unkuint9)inputByteRepeated8 )) >> 0x48,0), (unkuint9)inputByteRepeated8 & 0xffffffffffffffff | 0 << 0x40); register0x00001208 = SUB168(_auVar6 >> 0x40,0); _auVar6 = CONCAT115(SUB1611(CONCAT106(SUB1610(CONCAT97(SUB169(CONCAT88(register0x00001208, (inputByteRepeated8 >> 0x18 ) << 0x38) >> 0x38,0), ((uint7)inputByteRepeated8 >> 0x18) << 0x30 ) >> 0x30,0), ((uint6)inputByteRepeated8 >> 0x10) << 0x28) >> 0x28,0), ((uint5)inputByteRepeated8 >> 0x10) << 0x20); _auVar6 = CONCAT142(SUB1614(CONCAT133(SUB1613(CONCAT124(SUB1612(_auVar6 >> 0x20,0), (_inputByteRepeated2 >> 8) << 0x18) >> 0x18,0),((uint3)inputByteRepeated8 >> 8) << 0x10) >> 0x10,0),inputByteRepeated2 & 0xff | inputByteRepeated2 << 8); uVar7 = SUB164(_auVar6 >> 0x20,0); uVar5 = SUB164(_auVar6,0); uVar8 = SUB164(_auVar6 >> 0x40,0); // [...]
剩下的反编译也使用这些笨拙的独立 4 字节块,只要代码只是执行 a
MOVAPS xmmword ptr [location],XMM0
或类似操作。结果我花了一段时间才认出这整个功能memcpy
!
在这些情况下,如何使反编译的代码更具可读性(或首先避免混乱)?
我会用SSE
它是否已经作为标签存在来标记这个问题,但我不愿意创建它,因为我还不够了解这个社区。当然:欢迎改进建议!