编辑 2
无需求助于内联汇编/单独的汇编模块,我就能够使用内在的 _addcarry_uxx (16,32,64)生成连续的 adc 指令 (对 msvc cl.exe 中的 /O1 开关进行全面优化)
相关代码及其反汇编如下
#include <stdio.h>
#include <intrin.h>
#define BUSIZ 16
#define UNROLL 8
unsigned int dat[BUSIZ] =
{
0x10001337, 0xffffffff, 0x10001337 , 0xffffffff,
0x00001337, 0xdeadbeef, 0xbeadbed5 , 0xdad15dad,
0xba5eba11, 0xf001b055, 0xc001b055 , 0x501eb055,
0xba11ba75, 0xbadba115, 0xbed15bad , 0xdaff0d11
};
int main (void)
{
unsigned int chksum = 0;
unsigned char carry = 0;
for(int i=0; i< BUSIZ; i += UNROLL )
{
carry = _addcarry_u32(carry,chksum,dat[i+0],&chksum);
carry = _addcarry_u32(carry,chksum,dat[i+1],&chksum);
carry = _addcarry_u32(carry,chksum,dat[i+2],&chksum);
carry = _addcarry_u32(carry,chksum,dat[i+3],&chksum);
carry = _addcarry_u32(carry,chksum,dat[i+4],&chksum);
carry = _addcarry_u32(carry,chksum,dat[i+5],&chksum);
carry = _addcarry_u32(carry,chksum,dat[i+6],&chksum);
carry = _addcarry_u32(carry,chksum,dat[i+7],&chksum);
}
printf("FINAL CHECKSUM = %8x\n" , chksum + carry );
return 0;
}
执行它打印出来
:\>addcarry.exe
FINAL CHECKSUM = 616ba476
python脚本来chk结果的真实性
:\>cat chksum.py
dat = [
0x10001337, 0xffffffff, 0x10001337 , 0xffffffff,
0x00001337, 0xdeadbeef, 0xbeadbed5 , 0xdad15dad,
0xba5eba11, 0xf001b055, 0xc001b055 , 0x501eb055,
0xba11ba75, 0xbadba115, 0xbed15bad , 0xdaff0d11
]
chksum33bit = 0
for i in range (0,16,1):
chksum33bit = chksum33bit + dat[i]
chklow = chksum33bit & 0xffffffff
chkhig = chksum33bit >> 32
chkfin = chklow+chkhig
print (hex(chkfin))
:\>python chksum.py
0x616ba476L
反汇编我们看到编译器生成了连续的 adc 指令
:\>cdb -c "uf addcarry!main;q" addcarry.exe | grep -B 31 quit
0:000> cdb: Reading initial command 'uf addcarry!main;q'
addcarry!main:
01151029 33c9 xor ecx,ecx
0115102b b804901901 mov eax,offset addcarry!dat+0x4 (01199004)
01151030 8ad1 mov dl,cl
addcarry!main+0x9:
01151032 80c2ff add dl,0FFh
01151035 1348fc adc ecx,dword ptr [eax-4]
01151038 1308 adc ecx,dword ptr [eax]
0115103a 134804 adc ecx,dword ptr [eax+4]
0115103d 134808 adc ecx,dword ptr [eax+8]
01151040 13480c adc ecx,dword ptr [eax+0Ch]
01151043 134810 adc ecx,dword ptr [eax+10h]
01151046 134814 adc ecx,dword ptr [eax+14h]
01151049 134818 adc ecx,dword ptr [eax+18h]
0115104c 0f92c2 setb dl
0115104f 83c020 add eax,20h
01151052 3d44901901 cmp eax,offset addcarry!__scrt_default_matherr (01199044)
01151057 7cd9 jl addcarry!main+0x9 (01151032)
addcarry!main+0x30:
01151059 0fb6c2 movzx eax,dl
0115105c 03c1 add eax,ecx
0115105e 50 push eax
0115105f 6890011901 push offset addcarry!`string' (01190190)
01151064 e805000000 call addcarry!printf (0115106e)
01151069 59 pop ecx
0115106a 59 pop ecx
0115106b 33c0 xor eax,eax
0115106d c3 ret
quit:
编辑 2 结束
编辑 1:
即使我可以生成几个 adc,
我也无法使用 c 编译器成功生成连续的 adc,
所以我环顾了 windows\system32 目录,如果任何二进制文件
在imagehlp.dll 中
有一个连续的 adc ,然后在谷歌上搜索它似乎是RFC1071 IP标题校验和
可能使用手写程序集编写(在链接中找到了一些摩托罗拉、cray 等的汇编代码,这里还有一个讨论 ChkSum 实现的谷歌群组链接, 还有一个旧的 Dave Cutler 从 NT imagehelp 工具包 sdk 中编写的 ChkSum() ms 于 1993 年左右发布的)
下面是 win7 sp1 32 位 imagehlp.dll adc 序列
grep -obUaPs "\x13\x46\x04\x13\x46\x08" --include=*.dll *
imagehlp.dll:17883:‼F♦‼F
imagehlp.dll:17917:‼F♦‼F
imagehlp.dll:17963:‼F♦‼F
imagehlp.dll:18021:‼F♦‼F
xxd -c 12 -g 3 -s 18021 -l 99 imagehlp.dll
0004665: 134604 134608 13460c 134610 .F..F..F..F.
0004671: 134614 134618 13461c 134620 .F..F..F..F
000467d: 134624 134628 13462c 134630 .F$.F(.F,.F0
0004689: 134634 134638 13463c 134640 .F4.F8.F<.F@
0004695: 134644 134648 13464c 134650 .FD.FH.FL.FP
00046a1: 134654 134658 13465c 134660 .FT.FX.F\.F`
00046ad: 134664 134668 13466c 134670 .Fd.Fh.Fl.Fp
00046b9: 134674 134678 13467c 83d000 .Ft.Fx.F|...
00046c5: 81c680 ...
dumpbin /headers imagehlp.dll | grep -i "section header #1" -A 5
SECTION HEADER #1
.text name
23249 virtual size
1000 virtual address (41871000 to 41894248)
23400 size of raw data
600 file pointer to raw data (00000600 to 000239FF)
cdb -c "ln (imagehlp + 1000 + (0n18021-0x600));q" -z imagehlp.dll | grep -i -A 1 reading
0:000> cdb: Reading initial command 'ln (imagehlp + 1000 + (0n18021-0x600));q'
(41874f7b) imagehlp!ChkSum+0xea | (41875105) imagehlp!_SEH_prolog4_GS
编辑结束 1
可能有几个adc,因为编译没有优化是原因之一
假设你有一些这样的代码
#include <stdio.h>
#include <windows.h>
int main (void) {
DWORD64 a = 0x100002000;
DWORD64 b = 0x00000000ffffffff;
DWORD64 c = a + b;
printf("%I64x\n" , a);
printf("%I64x\n" , b);
printf("%I64x\n" , c);
c = c + a;
printf("%I64x\n" , c);
c = c + b;
printf("%I64x\n" , c);
c = c + c;
printf("%I64x\n" , c);
return 0;
}
如果你在没有优化
的情况下编译这个函数,反汇编函数 main 并查找 adc 指令,
你会看到几个 adc 指令,包括一些使用相同寄存器的指令
cl /Zi /EHsc /W4 /Od /nologo /analyze adc.cpp /link /release /nologo adc.cpp
cdb -c "g adc!main;uf .;q" adc.exe | grep -i "adc "
000a102b 134dec adc ecx,dword ptr [ebp-14h]
000a107c 1345f4 adc eax,dword ptr [ebp-0Ch]
000a10a3 134dec adc ecx,dword ptr [ebp-14h]
000a10ca 1355fc adc edx,dword ptr [ebp-4]
但是如果你优化编译器足够聪明,可以删除所有的 Adc 指令
cl /Zi /EHsc /W4 /Ox /nologo /analyze adc.cpp /link /release /nologo adc.cpp
cdb -c "g adc!main;uf .;q" adc.exe | grep -i "adc "
adc.exe
100002000
ffffffff
200001fff
300003fff
400003ffe
800007ffc
您有问题的代码可能是执行纯加法并返回结果的子程序的结果,如下所示
#include <stdio.h>
#include <windows.h>
#pragma optimize ( "t" , off)
DWORD64 madd (DWORD64 a, DWORD64 b)
{
DWORD64 c = 0;
c = a + b;
c = c + a;
c = c + b;
c = c + c;
return c;
}
#pragma optimize ( "t" , on)
int main (void) {
DWORD64 a = 0x100002000;
DWORD64 b = 0x00000000ffffffff;
printf("%I64x\n" , madd(a,b));
return 0;
}
这是添加多个64位值并返回结果的函数的反汇编
函数madd的反汇编
0:000> uf adc!madd
adc!madd [adc.cpp @ 6]:
6 00291000 55 push ebp
6 00291001 8bec mov ebp,esp
6 00291003 51 push ecx
6 00291004 51 push ecx
7 00291005 0f57c0 xorps xmm0,xmm0
7 00291008 660f1345f8 movlpd qword ptr [ebp-8],xmm0
8 0029100d 8b4508 mov eax,dword ptr [ebp+8]
8 00291010 034510 add eax,dword ptr [ebp+10h]
8 00291013 8b4d0c mov ecx,dword ptr [ebp+0Ch]
8 00291016 134d14 adc ecx,dword ptr [ebp+14h]
8 00291019 8945f8 mov dword ptr [ebp-8],eax
8 0029101c 894dfc mov dword ptr [ebp-4],ecx
9 0029101f 8b45f8 mov eax,dword ptr [ebp-8]
9 00291022 034508 add eax,dword ptr [ebp+8]
9 00291025 8b4dfc mov ecx,dword ptr [ebp-4]
9 00291028 134d0c adc ecx,dword ptr [ebp+0Ch]
9 0029102b 8945f8 mov dword ptr [ebp-8],eax
9 0029102e 894dfc mov dword ptr [ebp-4],ecx
10 00291031 8b45f8 mov eax,dword ptr [ebp-8]
10 00291034 034510 add eax,dword ptr [ebp+10h]
10 00291037 8b4dfc mov ecx,dword ptr [ebp-4]
10 0029103a 134d14 adc ecx,dword ptr [ebp+14h]
10 0029103d 8945f8 mov dword ptr [ebp-8],eax
10 00291040 894dfc mov dword ptr [ebp-4],ecx
11 00291043 8b45f8 mov eax,dword ptr [ebp-8]
11 00291046 0345f8 add eax,dword ptr [ebp-8]
11 00291049 8b4dfc mov ecx,dword ptr [ebp-4]
11 0029104c 134dfc adc ecx,dword ptr [ebp-4]
11 0029104f 8945f8 mov dword ptr [ebp-8],eax
11 00291052 894dfc mov dword ptr [ebp-4],ecx
12 00291055 8b45f8 mov eax,dword ptr [ebp-8]
12 00291058 8b55fc mov edx,dword ptr [ebp-4]
14 0029105b c9 leave
14 0029105c c3 ret
该函数使用相同的寄存器 ecx 来添加多个结转
0:000> # adc*, adc!madd l 60
adc!madd+0x16 [adc.cpp @ 8]:
00291016 134d14 adc ecx,dword ptr [ebp+14h]
adc!madd+0x28 [adc.cpp @ 9]:
00291028 134d0c adc ecx,dword ptr [ebp+0Ch]
adc!madd+0x3a [adc.cpp @ 10]:
0029103a 134d14 adc ecx,dword ptr [ebp+14h]
adc!madd+0x4c [adc.cpp @ 11]:
0029104c 134dfc adc ecx,dword ptr [ebp-4]
0:000>