我想更多地了解 GCC 如何优化 C 程序。我对优化和未优化的随机函数做了一个 disas,我想看看其中的一些差异。在我的脑海中,优化的程序集跳跃较少,并且似乎主要使用寄存器,而未优化的程序集更频繁地使用内存。这两者还有什么其他不同需要注意的?
C代码
uint countPairsUpTo(int index, int* intArray, int first, int second)
{
uint i;
uint sum = 0;
for (i = 0; i < index; i++)
if ((first == intArray[i]) && (second == intArray[i+2]))
sum++;
return sum ;
}
未优化
0x080485b1 <countPairsUpTo+0>: push %ebp
0x080485b2 <countPairsUpTo+1>: mov %esp,%ebp
0x080485b4 <countPairsUpTo+3>: sub $0x10,%esp
0x080485b7 <countPairsUpTo+6>: call 0x8048418 <mcount@plt>
0x080485bc <countPairsUpTo+11>: movl $0x0,-0x4(%ebp)
0x080485c3 <countPairsUpTo+18>: movl $0x0,-0x8(%ebp)
0x080485ca <countPairsUpTo+25>: jmp 0x80485fa <countPairsUpTo+73>
0x080485cc <countPairsUpTo+27>: mov -0x8(%ebp),%eax
0x080485cf <countPairsUpTo+30>: shl $0x2,%eax
0x080485d2 <countPairsUpTo+33>: add 0xc(%ebp),%eax
0x080485d5 <countPairsUpTo+36>: mov (%eax),%eax
0x080485d7 <countPairsUpTo+38>: cmp 0x10(%ebp),%eax
0x080485da <countPairsUpTo+41>: jne 0x80485f6 <countPairsUpTo+69>
0x080485dc <countPairsUpTo+43>: mov 0xc(%ebp),%edx
0x080485df <countPairsUpTo+46>: add $0x8,%edx
0x080485e2 <countPairsUpTo+49>: mov -0x8(%ebp),%eax
0x080485e5 <countPairsUpTo+52>: shl $0x2,%eax
0x080485e8 <countPairsUpTo+55>: lea (%edx,%eax,1),%eax
0x080485eb <countPairsUpTo+58>: mov (%eax),%eax
0x080485ed <countPairsUpTo+60>: cmp 0x14(%ebp),%eax
0x080485f0 <countPairsUpTo+63>: jne 0x80485f6 <countPairsUpTo+69>
0x080485f2 <countPairsUpTo+65>: addl $0x1,-0x4(%ebp)
0x080485f6 <countPairsUpTo+69>: addl $0x1,-0x8(%ebp)
0x080485fa <countPairsUpTo+73>: mov 0x8(%ebp),%eax
0x080485fd <countPairsUpTo+76>: cmp -0x8(%ebp),%eax
0x08048600 <countPairsUpTo+79>: ja 0x80485cc <countPairsUpTo+27>
0x08048602 <countPairsUpTo+81>: mov -0x4(%ebp),%eax
0x08048605 <countPairsUpTo+84>: leave
0x08048606 <countPairsUpTo+85>: ret
优化
0x08048570 <countPairsUpTo+0>: push %ebp
0x08048571 <countPairsUpTo+1>: mov %esp,%ebp
0x08048573 <countPairsUpTo+3>: push %edi
0x08048574 <countPairsUpTo+4>: push %esi
0x08048575 <countPairsUpTo+5>: push %ebx
0x08048576 <countPairsUpTo+6>: call 0x8048418 <mcount@plt>
0x0804857b <countPairsUpTo+11>: mov 0xc(%ebp),%ebx
0x0804857e <countPairsUpTo+14>: mov 0x10(%ebp),%esi
0x08048581 <countPairsUpTo+17>: mov 0x8(%ebp),%ecx
0x08048584 <countPairsUpTo+20>: mov $0x0,%edi
0x08048589 <countPairsUpTo+25>: test %ecx,%ecx
0x0804858b <countPairsUpTo+27>: je 0x80485b2 <countPairsUpTo+66>
0x0804858d <countPairsUpTo+29>: mov $0x0,%edi
0x08048592 <countPairsUpTo+34>: mov $0x0,%edx
0x08048597 <countPairsUpTo+39>: cmp %esi,(%ebx,%edx,4)
0x0804859a <countPairsUpTo+42>: jne 0x80485ab <countPairsUpTo+59>
0x0804859c <countPairsUpTo+44>: mov 0x14(%ebp),%eax
0x0804859f <countPairsUpTo+47>: cmp %eax,0x8(%ebx,%edx,4)
0x080485a3 <countPairsUpTo+51>: sete %al
0x080485a6 <countPairsUpTo+54>: movzbl %al,%eax
0x080485a9 <countPairsUpTo+57>: add %eax,%edi
0x080485ab <countPairsUpTo+59>: add $0x1,%edx
0x080485ae <countPairsUpTo+62>: cmp %ecx,%edx
0x080485b0 <countPairsUpTo+64>: jne 0x8048597 <countPairsUpTo+39>
0x080485b2 <countPairsUpTo+66>: mov %edi,%eax
0x080485b4 <countPairsUpTo+68>: pop %ebx
0x080485b5 <countPairsUpTo+69>: pop %esi
0x080485b6 <countPairsUpTo+70>: pop %edi
0x080485b7 <countPairsUpTo+71>: pop %ebp
0x080485b8 <countPairsUpTo+72>: ret