-
Notifications
You must be signed in to change notification settings - Fork 15.3k
Description
Code to reproduce:
#include <Windows.h>
#include <stdio.h>
typedef struct { const void *x; } S;
int main(void)
{
const void *x = CreateProcess;
S s = { .x = CreateProcess };
printf(" _=%p\n", CreateProcess);
printf(" x=%p\n", x);
printf("s.x=%p\n", s.x);
printf("_.x=%p\n", ((S){.x = CreateProcess}).x);
return 0;
}This should print out the same address four times, and indeed does so when compiling with MSVC:
_=00007FFE1EF74F70
x=00007FFE1EF74F70
s.x=00007FFE1EF74F70
_.x=00007FFE1EF74F70
However when using clang-cl, the third example prints the wrong address:
_=00007FFE1EF74F70
x=00007FFE1EF74F70
s.x=00007FF7E5CF1E80
_.x=00007FFE1EF74F70
The wrong address in this case is actually the address of the CreateProcess trampoline function that's inside the executable, rather than the address of the imported __imp_CreateProcess function that's dynamically loaded from kernel32.dll - but only in the case of being inside a struct initialiser (3rd case), and interestingly not when using a compound literal as a temporary (case 4)!
You can see this from the generated assembly for the two local variable initialisations:
With MSVC:
; Line 8: const void *x = CreateProcess;
mov rax, QWORD PTR __imp_CreateProcessA
mov QWORD PTR x$[rsp], rax
; Line 9: S s = { .x = CreateProcess };
mov rax, QWORD PTR __imp_CreateProcessA
mov QWORD PTR s$[rsp], raxWith clang-cl:
; Line 8: const void *x = CreateProcess;
mov rax, qword ptr [rip + __imp_CreateProcessA]
mov qword ptr [rsp + 56], rax
; Line 9: S s = { .x = CreateProcess };
mov rax, qword ptr [rip + .L__const.main.s]
mov qword ptr [rsp + 72], rax
.section .rdata,"dr"
.p2align 3, 0x0 # @__const.main.s
.L__const.main.s:
.quad CreateProcessANote: if you build with optimisations on, then this doesn't happen and the code works, so this only applies to unoptimised/debug builds.
I can replicate the codegen on regular clang on Linux using say printf instead of CreateProcess - but when you run the compiled code it doesn't exhibit the same faulty behaviour.
Generated ASM with GCC:
movq printf@GOTPCREL(%rip), %rax
movq %rax, -8(%rbp)
movq printf@GOTPCREL(%rip), %rax
movq %rax, -16(%rbp)Generated ASM with Clang:
movq printf@GOTPCREL(%rip), %rax
movq %rax, -16(%rbp)
movq .L__const.main.s(%rip), %rax
movq %rax, -24(%rbp)
.type .L__const.main.s,@object # @__const.main.s
.section .data.rel.ro,"aw",@progbits
.p2align 3, 0x0
.L__const.main.s:
.quad printf
.size .L__const.main.s, 8So it looks like it's fundamentally a clang issue rather than specifically clang-cl, it's just that on Linux, there is no impact from the difference in codegen, but presumably due to the way dynamic imports work on Windows vs Linux, on Windows the code ends up taking the address of the trampoline function rather than the "real" function.