Есть такой вопрос. Предистория
здесь. Если вкратце, то компилятор интела известен тем, что вставляет функции проверки типа процессора,
и, в зависимости от его типа, программа подключает математические и другие функции с разным уровнем оптимизации (SSE2,SSE3,SSE4.x и.т.д)
При этом он делает это ущербно, если ему попадается АМД процессор (
пруфлинк )
Проблема в том, что новый компилятор интела генерирует слегка другой код в функции проверки, и
патч для программ больше не работает.
Я в асме не силен, а тем более в асм с 64битными инструкциями. Ниже приведены сравнения функции проверки от старого компилятора (оригинал и подправленый),
а также код функции проверки, вставляемый новым компилятором в программу.
Прошу знающих людей посмотреть на код и обьяснить что происходит в новой функции по сравнению со старыми (оригинальной и модифицированой после патча)
[more=код здесь]
Код: #old intel compiler, original code
00000000005078f0 <__intel_cpu_indicator_init>:
5078f0: 50 push %rax
5078f1: 52 push %rdx
5078f2: 51 push %rcx
5078f3: 53 push %rbx
5078f4: 57 push %rdi
5078f5: 56 push %rsi
5078f6: 55 push %rbp
5078f7: 41 54 push %r12
5078f9: 41 55 push %r13
5078fb: 41 56 push %r14
5078fd: 41 57 push %r15
5078ff: 48 89 e5 mov %rsp,%rbp
507902: 48 83 ec 50 sub $0x50,%rsp
507906: 9c pushfq
507907: 58 pop %rax
507908: 48 89 c1 mov %rax,%rcx
50790b: 48 35 00 00 20 00 xor $0x200000,%rax
507911: 50 push %rax
507912: 9d popfq
507913: 9c pushfq
507914: 58 pop %rax
507915: 48 3b c1 cmp %rcx,%rax
507918: 74 2a je 507944 <__intel_cpu_indicator_init+0x54>
50791a: 51 push %rcx
50791b: 9d popfq
50791c: 48 33 c0 xor %rax,%rax
50791f: 0f a2 cpuid
507921: 89 45 f8 mov %eax,-0x8(%rbp)
507924: 89 5d fc mov %ebx,-0x4(%rbp)
507927: 89 4d ec mov %ecx,-0x14(%rbp)
50792a: 89 55 f4 mov %edx,-0xc(%rbp)
50792d: 48 c7 c0 01 00 00 00 mov $0x1,%rax
507934: 0f a2 cpuid
507936: 89 45 f0 mov %eax,-0x10(%rbp)
507939: 89 5d e0 mov %ebx,-0x20(%rbp)
50793c: 89 4d e8 mov %ecx,-0x18(%rbp)
50793f: 89 55 e4 mov %edx,-0x1c(%rbp)
507942: eb 1b jmp 50795f <__intel_cpu_indicator_init+0x6f>
507944: 48 33 c0 xor %rax,%rax
507947: 89 45 f8 mov %eax,-0x8(%rbp)
50794a: 89 45 fc mov %eax,-0x4(%rbp)
50794d: 89 45 ec mov %eax,-0x14(%rbp)
507950: 89 45 f4 mov %eax,-0xc(%rbp)
507953: 89 45 f0 mov %eax,-0x10(%rbp)
507956: 89 45 e0 mov %eax,-0x20(%rbp)
507959: 89 45 e8 mov %eax,-0x18(%rbp)
50795c: 89 45 e4 mov %eax,-0x1c(%rbp)
50795f: 8b 45 fc mov -0x4(%rbp),%eax
507962: bb 01 00 00 00 mov $0x1,%ebx
507967: 3d 47 65 6e 75 cmp $0x756e6547,%eax
50796c: 75 1b jne 507989 <__intel_cpu_indicator_init+0x99>
50796e: 8b 45 f4 mov -0xc(%rbp),%eax
507971: 3d 69 6e 65 49 cmp $0x49656e69,%eax
507976: 75 11 jne 507989 <__intel_cpu_indicator_init+0x99>
507978: 8b 45 ec mov -0x14(%rbp),%eax
50797b: 3d 6e 74 65 6c cmp $0x6c65746e,%eax
507980: 75 07 jne 507989 <__intel_cpu_indicator_init+0x99>
507982: ba 01 00 00 00 mov $0x1,%edx
507987: eb 02 jmp 50798b <__intel_cpu_indicator_init+0x9b>
507989: 33 d2 xor %edx,%edx
50798b: 8b 45 f8 mov -0x8(%rbp),%eax
50798e: 85 c0 test %eax,%eax
507990: 0f 84 8e 00 00 00 je 507a24 <__intel_cpu_indicator_init+0x134>
507996: 85 d2 test %edx,%edx
507998: 0f 84 86 00 00 00 je 507a24 <__intel_cpu_indicator_init+0x134>
50799e: 0f b6 45 f1 movzbl -0xf(%rbp),%eax
5079a2: 83 e0 0f and $0xf,%eax
5079a5: 83 f8 0f cmp $0xf,%eax
5079a8: 75 15 jne 5079bf <__intel_cpu_indicator_init+0xcf>
5079aa: 8b 45 e8 mov -0x18(%rbp),%eax
5079ad: ba 00 08 00 00 mov $0x800,%edx
5079b2: bb 01 00 00 00 mov $0x1,%ebx
5079b7: 83 e0 01 and $0x1,%eax
5079ba: 0f 45 da cmovne %edx,%ebx
5079bd: eb 65 jmp 507a24 <__intel_cpu_indicator_init+0x134>
5079bf: 83 f8 06 cmp $0x6,%eax
5079c2: 75 60 jne 507a24 <__intel_cpu_indicator_init+0x134>
5079c4: 8b 55 e8 mov -0x18(%rbp),%edx
5079c7: b9 00 00 01 00 mov $0x10000,%ecx
5079cc: bb 00 08 00 00 mov $0x800,%ebx
5079d1: f6 c2 01 test $0x1,%dl
5079d4: 75 05 jne 5079db <__intel_cpu_indicator_init+0xeb>
5079d6: bb 01 00 00 00 mov $0x1,%ebx
5079db: f7 c2 00 02 00 00 test $0x200,%edx
5079e1: 89 d0 mov %edx,%eax
5079e3: 74 05 je 5079ea <__intel_cpu_indicator_init+0xfa>
5079e5: bb 00 10 00 00 mov $0x1000,%ebx
5079ea: f7 c2 00 00 40 00 test $0x400000,%edx
5079f0: 74 05 je 5079f7 <__intel_cpu_indicator_init+0x107>
5079f2: bb 00 40 00 00 mov $0x4000,%ebx
5079f7: f7 c2 00 00 08 00 test $0x80000,%edx
5079fd: 74 05 je 507a04 <__intel_cpu_indicator_init+0x114>
5079ff: bb 00 20 00 00 mov $0x2000,%ebx
507a04: 25 00 00 90 00 and $0x900000,%eax
507a09: 3d 00 00 90 00 cmp $0x900000,%eax
507a0e: 75 05 jne 507a15 <__intel_cpu_indicator_init+0x125>
507a10: bb 00 80 00 00 mov $0x8000,%ebx
507a15: 81 e2 02 00 00 02 and $0x2000002,%edx
507a1b: 81 fa 02 00 00 02 cmp $0x2000002,%edx
507a21: 0f 44 d9 cmove %ecx,%ebx
507a24: 48 8b 05 25 15 27 00 mov 0x271525(%rip),%rax # 778f50 <_DYNAMIC+0x308>
507a2b: 89 18 mov %ebx,(%rax)
507a2d: 48 83 c4 50 add $0x50,%rsp
507a31: 41 5f pop %r15
507a33: 41 5e pop %r14
507a35: 41 5d pop %r13
507a37: 41 5c pop %r12
507a39: 5d pop %rbp
507a3a: 5e pop %rsi
507a3b: 5f pop %rdi
507a3c: 5b pop %rbx
507a3d: 59 pop %rcx
507a3e: 5a pop %rdx
507a3f: 58 pop %rax
507a40: c3 retq
507a41: 48 8d b4 26 00 00 00 lea 0x0(%rsi,%riz,1),%rsi
507a48: 00
507a49: 48 8d bf 00 00 00 00 lea 0x0(%rdi),%rdi
#old intel compiler, patched code with intel_check_executable_patch.pl
00000000005078f0 <__intel_cpu_indicator_init>:
5078f0: 50 push %rax
5078f1: 52 push %rdx
5078f2: 51 push %rcx
5078f3: 53 push %rbx
5078f4: 57 push %rdi
5078f5: 56 push %rsi
5078f6: 55 push %rbp
5078f7: 41 54 push %r12
5078f9: 41 55 push %r13
5078fb: 41 56 push %r14
5078fd: 41 57 push %r15
5078ff: 48 89 e5 mov %rsp,%rbp
507902: 48 83 ec 50 sub $0x50,%rsp
507906: 9c pushfq
507907: 58 pop %rax
507908: 48 89 c1 mov %rax,%rcx
50790b: 48 35 00 00 20 00 xor $0x200000,%rax
507911: 50 push %rax
507912: 9d popfq
507913: 9c pushfq
507914: 58 pop %rax
507915: 48 3b c1 cmp %rcx,%rax
507918: 74 2a je 507944 <__intel_cpu_indicator_init+0x54>
50791a: 51 push %rcx
50791b: 9d popfq
50791c: 48 33 c0 xor %rax,%rax
50791f: 0f a2 cpuid
507921: 89 45 f8 mov %eax,-0x8(%rbp)
507924: 89 5d fc mov %ebx,-0x4(%rbp)
507927: 89 4d ec mov %ecx,-0x14(%rbp)
50792a: 89 55 f4 mov %edx,-0xc(%rbp)
50792d: 48 c7 c0 01 00 00 00 mov $0x1,%rax
507934: 0f a2 cpuid
507936: 89 45 f0 mov %eax,-0x10(%rbp)
507939: 89 5d e0 mov %ebx,-0x20(%rbp)
50793c: 89 4d e8 mov %ecx,-0x18(%rbp)
50793f: 89 55 e4 mov %edx,-0x1c(%rbp)
507942: eb 1b jmp 50795f <__intel_cpu_indicator_init+0x6f>
507944: 48 33 c0 xor %rax,%rax
507947: 89 45 f8 mov %eax,-0x8(%rbp)
50794a: 89 45 fc mov %eax,-0x4(%rbp)
50794d: 89 45 ec mov %eax,-0x14(%rbp)
507950: 89 45 f4 mov %eax,-0xc(%rbp)
507953: 89 45 f0 mov %eax,-0x10(%rbp)
507956: 89 45 e0 mov %eax,-0x20(%rbp)
507959: 89 45 e8 mov %eax,-0x18(%rbp)
50795c: 89 45 e4 mov %eax,-0x1c(%rbp)
50795f: 8b 45 fc mov -0x4(%rbp),%eax
507962: bb 01 00 00 00 mov $0x1,%ebx
507967: a9 00 00 00 00 test $0x0,%eax
50796c: 75 1b jne 507989 <__intel_cpu_indicator_init+0x99>
50796e: 8b 45 f4 mov -0xc(%rbp),%eax
507971: a9 00 00 00 00 test $0x0,%eax
507976: 75 11 jne 507989 <__intel_cpu_indicator_init+0x99>
507978: 8b 45 ec mov -0x14(%rbp),%eax
50797b: a9 00 00 00 00 test $0x0,%eax
507980: 75 07 jne 507989 <__intel_cpu_indicator_init+0x99>
507982: ba 01 00 00 00 mov $0x1,%edx
507987: eb 02 jmp 50798b <__intel_cpu_indicator_init+0x9b>
507989: 33 d2 xor %edx,%edx
50798b: 8b 45 f8 mov -0x8(%rbp),%eax
50798e: 85 c0 test %eax,%eax
507990: 0f 84 8e 00 00 00 je 507a24 <__intel_cpu_indicator_init+0x134>
507996: 85 d2 test %edx,%edx
507998: 0f 84 86 00 00 00 je 507a24 <__intel_cpu_indicator_init+0x134>
50799e: 0f b6 45 f1 movzbl -0xf(%rbp),%eax
5079a2: 83 e0 0f and $0xf,%eax
5079a5: 83 f8 0f cmp $0xf,%eax
5079a8: 75 15 jne 5079bf <__intel_cpu_indicator_init+0xcf>
5079aa: 8b 45 e8 mov -0x18(%rbp),%eax
5079ad: ba 00 08 00 00 mov $0x800,%edx
5079b2: bb 01 00 00 00 mov $0x1,%ebx
5079b7: 83 e0 01 and $0x1,%eax
5079ba: 0f 45 da cmovne %edx,%ebx
5079bd: eb 65 jmp 507a24 <__intel_cpu_indicator_init+0x134>
5079bf: 83 f8 06 cmp $0x6,%eax
5079c2: 75 60 jne 507a24 <__intel_cpu_indicator_init+0x134>
5079c4: 8b 55 e8 mov -0x18(%rbp),%edx
5079c7: b9 00 00 01 00 mov $0x10000,%ecx
5079cc: bb 00 08 00 00 mov $0x800,%ebx
5079d1: f6 c2 01 test $0x1,%dl
5079d4: 75 05 jne 5079db <__intel_cpu_indicator_init+0xeb>
5079d6: bb 01 00 00 00 mov $0x1,%ebx
5079db: f7 c2 00 02 00 00 test $0x200,%edx
5079e1: 89 d0 mov %edx,%eax
5079e3: 74 05 je 5079ea <__intel_cpu_indicator_init+0xfa>
5079e5: bb 00 10 00 00 mov $0x1000,%ebx
5079ea: f7 c2 00 00 40 00 test $0x400000,%edx
5079f0: 74 05 je 5079f7 <__intel_cpu_indicator_init+0x107>
5079f2: bb 00 40 00 00 mov $0x4000,%ebx
5079f7: f7 c2 00 00 08 00 test $0x80000,%edx
5079fd: 74 05 je 507a04 <__intel_cpu_indicator_init+0x114>
5079ff: bb 00 20 00 00 mov $0x2000,%ebx
507a04: 25 00 00 90 00 and $0x900000,%eax
507a09: 3d 00 00 90 00 cmp $0x900000,%eax
507a0e: 75 05 jne 507a15 <__intel_cpu_indicator_init+0x125>
507a10: bb 00 80 00 00 mov $0x8000,%ebx
507a15: 81 e2 02 00 00 02 and $0x2000002,%edx
507a1b: 81 fa 02 00 00 02 cmp $0x2000002,%edx
507a21: 0f 44 d9 cmove %ecx,%ebx
507a24: 48 8b 05 25 15 27 00 mov 0x271525(%rip),%rax # 778f50 <_DYNAMIC+0x308>
507a2b: 89 18 mov %ebx,(%rax)
507a2d: 48 83 c4 50 add $0x50,%rsp
507a31: 41 5f pop %r15
507a33: 41 5e pop %r14
507a35: 41 5d pop %r13
507a37: 41 5c pop %r12
507a39: 5d pop %rbp
507a3a: 5e pop %rsi
507a3b: 5f pop %rdi
507a3c: 5b pop %rbx
507a3d: 59 pop %rcx
507a3e: 5a pop %rdx
507a3f: 58 pop %rax
507a40: c3 retq
507a41: 48 8d b4 26 00 00 00 lea 0x0(%rsi,%riz,1),%rsi
507a48: 00
507a49: 48 8d bf 00 00 00 00 lea 0x0(%rdi),%rdi
#new intel compiler original code
000000000052cf40 <__intel_cpu_indicator_init>:
52cf40: 50 push %rax
52cf41: 52 push %rdx
52cf42: 51 push %rcx
52cf43: 53 push %rbx
52cf44: 57 push %rdi
52cf45: 56 push %rsi
52cf46: 55 push %rbp
52cf47: 41 50 push %r8
52cf49: 41 51 push %r9
52cf4b: 41 54 push %r12
52cf4d: 41 55 push %r13
52cf4f: 41 56 push %r14
52cf51: 41 57 push %r15
52cf53: 48 89 e5 mov %rsp,%rbp
52cf56: 48 83 ec 50 sub $0x50,%rsp
52cf5a: 9c pushfq
52cf5b: 58 pop %rax
52cf5c: 48 89 c1 mov %rax,%rcx
52cf5f: 48 35 00 00 20 00 xor $0x200000,%rax
52cf65: 50 push %rax
52cf66: 9d popfq
52cf67: 9c pushfq
52cf68: 58 pop %rax
52cf69: 48 3b c1 cmp %rcx,%rax
52cf6c: 74 2a je 52cf98 <__intel_cpu_indicator_init+0x58>
52cf6e: 51 push %rcx
52cf6f: 9d popfq
52cf70: 48 33 c0 xor %rax,%rax
52cf73: 0f a2 cpuid
52cf75: 89 45 ec mov %eax,-0x14(%rbp)
52cf78: 89 5d f0 mov %ebx,-0x10(%rbp)
52cf7b: 89 4d e0 mov %ecx,-0x20(%rbp)
52cf7e: 89 55 e8 mov %edx,-0x18(%rbp)
52cf81: 48 c7 c0 01 00 00 00 mov $0x1,%rax
52cf88: 0f a2 cpuid
52cf8a: 89 45 e4 mov %eax,-0x1c(%rbp)
52cf8d: 89 5d d0 mov %ebx,-0x30(%rbp)
52cf90: 89 4d dc mov %ecx,-0x24(%rbp)
52cf93: 89 55 d4 mov %edx,-0x2c(%rbp)
52cf96: eb 1b jmp 52cfb3 <__intel_cpu_indicator_init+0x73>
52cf98: 48 33 c0 xor %rax,%rax
52cf9b: 89 45 ec mov %eax,-0x14(%rbp)
52cf9e: 89 45 f0 mov %eax,-0x10(%rbp)
52cfa1: 89 45 e0 mov %eax,-0x20(%rbp)
52cfa4: 89 45 e8 mov %eax,-0x18(%rbp)
52cfa7: 89 45 e4 mov %eax,-0x1c(%rbp)
52cfaa: 89 45 d0 mov %eax,-0x30(%rbp)
52cfad: 89 45 dc mov %eax,-0x24(%rbp)
52cfb0: 89 45 d4 mov %eax,-0x2c(%rbp)
52cfb3: be 01 00 00 00 mov $0x1,%esi
52cfb8: 81 7d f0 47 65 6e 75 cmpl $0x756e6547,-0x10(%rbp)
52cfbf: 75 16 jne 52cfd7 <__intel_cpu_indicator_init+0x97>
52cfc1: 81 7d e8 69 6e 65 49 cmpl $0x49656e69,-0x18(%rbp)
52cfc8: 75 0d jne 52cfd7 <__intel_cpu_indicator_init+0x97>
52cfca: 81 7d e0 6e 74 65 6c cmpl $0x6c65746e,-0x20(%rbp)
52cfd1: 75 04 jne 52cfd7 <__intel_cpu_indicator_init+0x97>
52cfd3: 89 f0 mov %esi,%eax
52cfd5: eb 02 jmp 52cfd9 <__intel_cpu_indicator_init+0x99>
52cfd7: 33 c0 xor %eax,%eax
52cfd9: 83 7d ec 00 cmpl $0x0,-0x14(%rbp)
52cfdd: 0f 84 bd 00 00 00 je 52d0a0 <__intel_cpu_indicator_init+0x160>
52cfe3: 85 c0 test %eax,%eax
52cfe5: 0f 84 b5 00 00 00 je 52d0a0 <__intel_cpu_indicator_init+0x160>
52cfeb: 0f b6 45 e5 movzbl -0x1b(%rbp),%eax
52cfef: 83 e0 0f and $0xf,%eax
52cff2: 83 f8 0f cmp $0xf,%eax
52cff5: 75 11 jne 52d008 <__intel_cpu_indicator_init+0xc8>
52cff7: b8 00 08 00 00 mov $0x800,%eax
52cffc: f6 45 dc 01 testb $0x1,-0x24(%rbp)
52d000: 0f 45 f0 cmovne %eax,%esi
52d003: e9 98 00 00 00 jmpq 52d0a0 <__intel_cpu_indicator_init+0x160>
52d008: 83 f8 06 cmp $0x6,%eax
52d00b: 0f 85 8f 00 00 00 jne 52d0a0 <__intel_cpu_indicator_init+0x160>
52d011: 8b 5d dc mov -0x24(%rbp),%ebx
52d014: b9 00 00 01 00 mov $0x10000,%ecx
52d019: be 00 08 00 00 mov $0x800,%esi
52d01e: f6 c3 01 test $0x1,%bl
52d021: 75 05 jne 52d028 <__intel_cpu_indicator_init+0xe8>
52d023: be 01 00 00 00 mov $0x1,%esi
52d028: f7 c3 00 02 00 00 test $0x200,%ebx
52d02e: 89 d8 mov %ebx,%eax
52d030: 89 da mov %ebx,%edx
52d032: 74 05 je 52d039 <__intel_cpu_indicator_init+0xf9>
52d034: be 00 10 00 00 mov $0x1000,%esi
52d039: f7 c3 00 00 40 00 test $0x400000,%ebx
52d03f: 74 05 je 52d046 <__intel_cpu_indicator_init+0x106>
52d041: be 00 40 00 00 mov $0x4000,%esi
52d046: f7 c3 00 00 08 00 test $0x80000,%ebx
52d04c: 74 05 je 52d053 <__intel_cpu_indicator_init+0x113>
52d04e: be 00 20 00 00 mov $0x2000,%esi
52d053: 25 00 00 90 00 and $0x900000,%eax
52d058: 3d 00 00 90 00 cmp $0x900000,%eax
52d05d: 75 05 jne 52d064 <__intel_cpu_indicator_init+0x124>
52d05f: be 00 80 00 00 mov $0x8000,%esi
52d064: 81 e2 02 00 00 02 and $0x2000002,%edx
52d06a: 81 fa 02 00 00 02 cmp $0x2000002,%edx
52d070: 0f 44 f1 cmove %ecx,%esi
52d073: f7 c3 00 00 00 08 test $0x8000000,%ebx
52d079: 74 25 je 52d0a0 <__intel_cpu_indicator_init+0x160>
52d07b: b9 00 00 00 00 mov $0x0,%ecx
52d080: 0f 01 d0 xgetbv
52d083: 89 45 d8 mov %eax,-0x28(%rbp)
52d086: f7 45 dc 00 00 00 10 testl $0x10000000,-0x24(%rbp)
52d08d: 74 11 je 52d0a0 <__intel_cpu_indicator_init+0x160>
52d08f: 8b 45 d8 mov -0x28(%rbp),%eax
52d092: ba 00 00 02 00 mov $0x20000,%edx
52d097: 83 e0 06 and $0x6,%eax
52d09a: 83 f8 06 cmp $0x6,%eax
52d09d: 0f 44 f2 cmove %edx,%esi
52d0a0: 48 8b 05 a9 ae 26 00 mov 0x26aea9(%rip),%rax # 797f50 <_DYNAMIC+0x2f8>
52d0a7: 89 30 mov %esi,(%rax)
52d0a9: 48 83 c4 50 add $0x50,%rsp
52d0ad: 41 5f pop %r15
52d0af: 41 5e pop %r14
52d0b1: 41 5d pop %r13
52d0b3: 41 5c pop %r12
52d0b5: 41 59 pop %r9
52d0b7: 41 58 pop %r8
52d0b9: 5d pop %rbp
52d0ba: 5e pop %rsi
52d0bb: 5f pop %rdi
52d0bc: 5b pop %rbx
52d0bd: 59 pop %rcx
52d0be: 5a pop %rdx
52d0bf: 58 pop %rax
52d0c0: c3 retq
52d0c1: 48 8d b4 26 00 00 00 lea 0x0(%rsi,%riz,1),%rsi
52d0c8: 00
52d0c9: 48 8d bf 00 00 00 00 lea 0x0(%rdi),%rdi