-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Closed
Labels
clang-cl`clang-cl` driver. Don't use for other compiler parts`clang-cl` driver. Don't use for other compiler partsmissed-optimization
Description
Consider this example https://godbolt.org/z/fEjrzrW8n
struct A {
int a;
};
extern __declspec(thread) struct A *a;
struct A*
getA(void)
{
return a;
}
clang-cl 19.1.0-rc2 generates the following when targeting x64
getA: # @getA
.seh_proc getA
# %bb.0:
sub rsp, 40
.seh_stackalloc 40
.seh_endprologue
mov eax, dword ptr [rip + _tls_index]
mov rcx, qword ptr gs:[88]
mov rax, qword ptr [rcx + 8*rax]
cmp byte ptr [rax + __tls_guard@SECREL32], 0
jne .LBB0_2
# %bb.1:
call __dyn_tls_on_demand_init
.LBB0_2:
mov eax, dword ptr [rip + _tls_index]
mov rcx, qword ptr gs:[88]
mov rax, qword ptr [rcx + 8*rax]
mov rax, qword ptr [rax + a@SECREL32]
add rsp, 40
ret
and below when targeting arm64
getA: // @getA
.seh_proc getA
// %bb.0:
str x19, [sp, #-16]! // 8-byte Folded Spill
.seh_save_reg_x x19, 16
str x30, [sp, #8] // 8-byte Folded Spill
.seh_save_reg x30, 8
.seh_endprologue
adrp x19, _tls_index
ldr x9, [x18, #88]
ldr w8, [x19, :lo12:_tls_index]
ldr x8, [x9, x8, lsl #3]
add x8, x8, :secrel_hi12:__tls_guard
ldrb w8, [x8, :secrel_lo12:__tls_guard]
cbnz w8, .LBB0_2
// %bb.1:
bl __dyn_tls_on_demand_init
.LBB0_2:
ldr w8, [x19, :lo12:_tls_index]
ldr x9, [x18, #88]
ldr x8, [x9, x8, lsl #3]
add x8, x8, :secrel_hi12:a
ldr x0, [x8, :secrel_lo12:a]
.seh_startepilogue
ldr x30, [sp, #8] // 8-byte Folded Reload
.seh_save_reg x30, 8
ldr x19, [sp], #16 // 8-byte Folded Reload
.seh_save_reg_x x19, 16
.seh_endepilogue
ret
Compared to MSVC-generated code below, clang-cl generates suboptimal code. If you look at the code generated by MSVC, there's no call against __dyn_tls_on_demand_init. In general, MSVC doesn't call __dyn_tls_on_demand_init if a thread-local variable doesn't have a constructor.
x64 getA PROC ; COMDAT
mov ecx, DWORD PTR _tls_index
mov rax, QWORD PTR gs:88
mov edx, OFFSET FLAT:a
mov rax, QWORD PTR [rax+rcx*8]
mov rax, QWORD PTR [rdx+rax]
ret 0
getA ENDP
arm64 |getA| PROC
adrp x8,_tls_index
ldr w9,[x8,_tls_index]
ldr x8,[xpr,#0x58]
ldr x8,[x8,w9 uxtw #3]
add x8,x8,a,lsl #0xC
ldr x0,[x8,a]
ret
Metadata
Metadata
Assignees
Labels
clang-cl`clang-cl` driver. Don't use for other compiler parts`clang-cl` driver. Don't use for other compiler partsmissed-optimization