Skip to content

Commit 28328c3

Browse files
committed
Use musttail for variadic method thunks when possible
This avoids cloning variadic virtual methods when the target supports musttail and the return type is not covariant. I think we never implemented this previously because it doesn't handle the covariant case. But, in the MS ABI, there are some cases where vtable thunks must be emitted even when the variadic method defintion is not available, so it looks like we need to implement this. Do it for both ABIs, since it's a nice size improvement and simplification for Itanium. Emit an error when emitting thunks for variadic methods with a covariant return type. This case is essentially not implementable unless the ABI provides a way to perfectly forward variadic arguments without a tail call. Fixes PR43173. Differential Revision: https://reviews.llvm.org/D67028 llvm-svn: 371269
1 parent c177919 commit 28328c3

File tree

4 files changed

+184
-19
lines changed

4 files changed

+184
-19
lines changed

clang/lib/CodeGen/CGVTables.cpp

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,15 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
166166
llvm::Value *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true);
167167
llvm::Function *BaseFn = cast<llvm::Function>(Callee);
168168

169+
// Cloning can't work if we don't have a definition. The Microsoft ABI may
170+
// require thunks when a definition is not available. Emit an error in these
171+
// cases.
172+
if (!MD->isDefined()) {
173+
CGM.ErrorUnsupported(MD, "return-adjusting thunk with variadic arguments");
174+
return Fn;
175+
}
176+
assert(!BaseFn->isDeclaration() && "cannot clone undefined variadic method");
177+
169178
// Clone to thunk.
170179
llvm::ValueToValueMapTy VMap;
171180

@@ -201,6 +210,8 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
201210
Builder.SetInsertPoint(&*ThisStore);
202211
llvm::Value *AdjustedThisPtr =
203212
CGM.getCXXABI().performThisAdjustment(*this, ThisPtr, Thunk.This);
213+
AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr,
214+
ThisStore->getOperand(0)->getType());
204215
ThisStore->setOperand(0, AdjustedThisPtr);
205216

206217
if (!Thunk.Return.isEmpty()) {
@@ -291,14 +302,17 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::FunctionCallee Callee,
291302
*this, LoadCXXThisAddress(), Thunk->This)
292303
: LoadCXXThis();
293304

294-
if (CurFnInfo->usesInAlloca() || IsUnprototyped) {
295-
// We don't handle return adjusting thunks, because they require us to call
296-
// the copy constructor. For now, fall through and pretend the return
297-
// adjustment was empty so we don't crash.
305+
// If perfect forwarding is required a variadic method, a method using
306+
// inalloca, or an unprototyped thunk, use musttail. Emit an error if this
307+
// thunk requires a return adjustment, since that is impossible with musttail.
308+
if (CurFnInfo->usesInAlloca() || CurFnInfo->isVariadic() || IsUnprototyped) {
298309
if (Thunk && !Thunk->Return.isEmpty()) {
299310
if (IsUnprototyped)
300311
CGM.ErrorUnsupported(
301312
MD, "return-adjusting thunk with incomplete parameter type");
313+
else if (CurFnInfo->isVariadic())
314+
llvm_unreachable("shouldn't try to emit musttail return-adjusting "
315+
"thunks for variadic functions");
302316
else
303317
CGM.ErrorUnsupported(
304318
MD, "non-trivial argument copy for return-adjusting thunk");
@@ -549,16 +563,32 @@ llvm::Constant *CodeGenVTables::maybeEmitThunk(GlobalDecl GD,
549563

550564
CGM.SetLLVMFunctionAttributesForDefinition(GD.getDecl(), ThunkFn);
551565

566+
// Thunks for variadic methods are special because in general variadic
567+
// arguments cannot be perferctly forwarded. In the general case, clang
568+
// implements such thunks by cloning the original function body. However, for
569+
// thunks with no return adjustment on targets that support musttail, we can
570+
// use musttail to perfectly forward the variadic arguments.
571+
bool ShouldCloneVarArgs = false;
552572
if (!IsUnprototyped && ThunkFn->isVarArg()) {
553-
// Varargs thunks are special; we can't just generate a call because
554-
// we can't copy the varargs. Our implementation is rather
555-
// expensive/sucky at the moment, so don't generate the thunk unless
556-
// we have to.
557-
// FIXME: Do something better here; GenerateVarArgsThunk is extremely ugly.
573+
ShouldCloneVarArgs = true;
574+
if (TI.Return.isEmpty()) {
575+
switch (CGM.getTriple().getArch()) {
576+
case llvm::Triple::x86_64:
577+
case llvm::Triple::x86:
578+
case llvm::Triple::aarch64:
579+
ShouldCloneVarArgs = false;
580+
break;
581+
default:
582+
break;
583+
}
584+
}
585+
}
586+
587+
if (ShouldCloneVarArgs) {
558588
if (UseAvailableExternallyLinkage)
559589
return ThunkFn;
560-
ThunkFn = CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD,
561-
TI);
590+
ThunkFn =
591+
CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, TI);
562592
} else {
563593
// Normal thunk body generation.
564594
CodeGenFunction(CGM).generateThunk(ThunkFn, FnInfo, GD, TI, IsUnprototyped);

clang/test/CodeGenCXX/linetable-virtual-variadic.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
2-
// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -debug-info-kind=line-directives-only %s -o - | FileCheck %s
1+
// Sparc64 is used because AArch64 and X86_64 would both use musttail.
2+
// RUN: %clang_cc1 -triple sparc64-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
3+
// RUN: %clang_cc1 -triple sparc64-linux-gnu -emit-llvm -debug-info-kind=line-directives-only %s -o - | FileCheck %s
34
// Crasher for PR22929.
45
class Base {
56
virtual void VariadicFunction(...);
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// RUN: %clang_cc1 -fno-rtti-data -triple x86_64-windows-msvc -emit-llvm-only %s -verify
2+
3+
// Verify that we error out on this return adjusting thunk that we can't emit.
4+
5+
struct A {
6+
virtual A *clone(const char *f, ...) = 0;
7+
};
8+
struct B : virtual A {
9+
// expected-error@+1 2 {{cannot compile this return-adjusting thunk with variadic arguments yet}}
10+
B *clone(const char *f, ...) override;
11+
};
12+
struct C : B { int c; };
13+
C c;

clang/test/CodeGenCXX/thunks.cpp

Lines changed: 127 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
1-
// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONOPT %s
2-
// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -debug-info-kind=standalone -dwarf-version=5 -munwind-tables -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONOPT --check-prefix=CHECK-DBG %s
3-
// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes | FileCheck --check-prefix=CHECK --check-prefix=CHECK-OPT %s
1+
// Sparc64 doesn't support musttail (yet), so it uses method cloning for
2+
// variadic thunks. Use it for testing.
3+
// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -munwind-tables -emit-llvm -o - \
4+
// RUN: | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-NONOPT %s
5+
// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -debug-info-kind=standalone -dwarf-version=5 -munwind-tables -emit-llvm -o - \
6+
// RUN: | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-NONOPT,CHECK-DBG %s
7+
// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
8+
// RUN: | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-OPT %s
9+
10+
// Test x86_64, which uses musttail for variadic thunks.
11+
// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
12+
// RUN: | FileCheck --check-prefixes=CHECK,CHECK-TAIL,CHECK-OPT %s
13+
14+
// Finally, reuse these tests for the MS ABI.
15+
// RUN: %clang_cc1 %s -triple=x86_64-windows-msvc -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
16+
// RUN: | FileCheck --check-prefixes=WIN64 %s
17+
418

519
namespace Test1 {
620

@@ -23,6 +37,11 @@ struct C : A, B {
2337
// CHECK-LABEL: define void @_ZThn8_N5Test11C1fEv(
2438
// CHECK-DBG-NOT: dbg.declare
2539
// CHECK: ret void
40+
//
41+
// WIN64-LABEL: define dso_local void @"?f@C@Test1@@UEAAXXZ"(
42+
// WIN64-LABEL: define linkonce_odr dso_local void @"?f@C@Test1@@W7EAAXXZ"(
43+
// WIN64: getelementptr i8, i8* {{.*}}, i32 -8
44+
// WIN64: ret void
2645
void C::f() { }
2746

2847
}
@@ -45,6 +64,10 @@ struct B : virtual A {
4564
// CHECK: ret void
4665
void B::f() { }
4766

67+
// No thunk is used for this case in the MS ABI.
68+
// WIN64-LABEL: define dso_local void @"?f@B@Test2@@UEAAXXZ"(
69+
// WIN64-NOT: define {{.*}} void @"?f@B@Test2
70+
4871
}
4972

5073
namespace Test3 {
@@ -65,6 +88,7 @@ struct B : A {
6588
};
6689

6790
// CHECK: define %{{.*}}* @_ZTch0_v0_n24_N5Test31B1fEv(
91+
// WIN64: define weak_odr dso_local %{{.*}} @"?f@B@Test3@@QEAAPEAUV1@2@XZ"(
6892
V2 *B::f() { return 0; }
6993

7094
}
@@ -92,6 +116,10 @@ struct __attribute__((visibility("protected"))) C : A, B {
92116
// CHECK: ret void
93117
void C::f() { }
94118

119+
// Visibility doesn't matter on COFF, but whatever. We could add an ELF test
120+
// mode later.
121+
// WIN64-LABEL: define protected void @"?f@C@Test4@@UEAAXXZ"(
122+
// WIN64-LABEL: define linkonce_odr protected void @"?f@C@Test4@@W7EAAXXZ"(
95123
}
96124

97125
// Check that the thunk gets internal linkage.
@@ -119,6 +147,8 @@ namespace Test4B {
119147
c.f();
120148
}
121149
}
150+
// Not sure why this isn't delayed like in Itanium.
151+
// WIN64-LABEL: define internal void @"?f@C@?A0xAEF74CE7@Test4B@@UEAAXXZ"(
122152

123153
namespace Test5 {
124154

@@ -134,6 +164,7 @@ struct B : virtual A {
134164
void f(B b) {
135165
b.f();
136166
}
167+
// No thunk in MS ABI in this case.
137168
}
138169

139170
namespace Test6 {
@@ -178,6 +209,10 @@ namespace Test6 {
178209
// CHECK: {{call void @_ZN5Test66Thunks1fEv.*sret}}
179210
// CHECK: ret void
180211
X Thunks::f() { return X(); }
212+
213+
// WIN64-LABEL: define linkonce_odr dso_local void @"?f@Thunks@Test6@@WBA@EAA?AUX@2@XZ"({{.*}} sret %{{.*}})
214+
// WIN64-NOT: memcpy
215+
// WIN64: tail call void @"?f@Thunks@Test6@@UEAA?AUX@2@XZ"({{.*}} sret %{{.*}})
181216
}
182217

183218
namespace Test7 {
@@ -224,6 +259,8 @@ namespace Test7 {
224259
// CHECK-NOT: memcpy
225260
// CHECK: ret void
226261
void testD() { D d; }
262+
263+
// MS C++ ABI doesn't use a thunk, so this case isn't interesting.
227264
}
228265

229266
namespace Test8 {
@@ -241,6 +278,8 @@ namespace Test8 {
241278
// CHECK-NOT: memcpy
242279
// CHECK: ret void
243280
void C::bar(NonPOD var) {}
281+
282+
// MS C++ ABI doesn't use a thunk, so this case isn't interesting.
244283
}
245284

246285
// PR7241: Emitting thunks for a method shouldn't require the vtable for
@@ -287,6 +326,16 @@ namespace Test11 {
287326
// CHECK: define {{.*}} @_ZTch0_v0_n32_N6Test111C1fEv(
288327
// CHECK-DBG-NOT: dbg.declare
289328
// CHECK: ret
329+
330+
// WIN64-LABEL: define dso_local %{{.*}}* @"?f@C@Test11@@UEAAPEAU12@XZ"(i8*
331+
332+
// WIN64-LABEL: define weak_odr dso_local %{{.*}}* @"?f@C@Test11@@QEAAPEAUA@2@XZ"(i8*
333+
// WIN64: call %{{.*}}* @"?f@C@Test11@@UEAAPEAU12@XZ"(i8* %{{.*}})
334+
//
335+
// Match the vbtable return adjustment.
336+
// WIN64: load i32*, i32** %{{[^,]*}}, align 8
337+
// WIN64: getelementptr inbounds i32, i32* %{{[^,]*}}, i32 1
338+
// WIN64: load i32, i32* %{{[^,]*}}, align 4
290339
}
291340

292341
// Varargs thunk test.
@@ -301,7 +350,8 @@ namespace Test12 {
301350
virtual void c();
302351
virtual C* f(int x, ...);
303352
};
304-
C* C::f(int x, ...) { return this; }
353+
C* makeC();
354+
C* C::f(int x, ...) { return makeC(); }
305355

306356
// C::f
307357
// CHECK: define {{.*}} @_ZN6Test121C1fEiz
@@ -312,6 +362,28 @@ namespace Test12 {
312362
// CHECK-DBG-NOT: dbg.declare
313363
// CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 -8
314364
// CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 8
365+
366+
// The vtable layout goes:
367+
// C vtable in A:
368+
// - f impl, no adjustment
369+
// C vtable in B:
370+
// - f thunk 2, covariant, clone
371+
// - f thunk 2, musttail this adjust to impl
372+
// FIXME: The weak_odr linkage is probably not necessary and just an artifact
373+
// of Itanium ABI details.
374+
// WIN64-LABEL: define dso_local {{.*}} @"?f@C@Test12@@UEAAPEAU12@HZZ"(
375+
// WIN64: call %{{.*}}* @"?makeC@Test12@@YAPEAUC@1@XZ"()
376+
//
377+
// This thunk needs return adjustment, clone.
378+
// WIN64-LABEL: define weak_odr dso_local {{.*}} @"?f@C@Test12@@W7EAAPEAUB@2@HZZ"(
379+
// WIN64: call %{{.*}}* @"?makeC@Test12@@YAPEAUC@1@XZ"()
380+
// WIN64: getelementptr inbounds i8, i8* %{{.*}}, i32 8
381+
//
382+
// Musttail call back to the A implementation after this adjustment from B to A.
383+
// WIN64-LABEL: define linkonce_odr dso_local %{{.*}}* @"?f@C@Test12@@W7EAAPEAU12@HZZ"(
384+
// WIN64: getelementptr i8, i8* %{{[^,]*}}, i32 -8
385+
// WIN64: musttail call {{.*}} @"?f@C@Test12@@UEAAPEAU12@HZZ"(
386+
C c;
315387
}
316388

317389
// PR13832
@@ -339,6 +411,17 @@ namespace Test13 {
339411
// CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 -24
340412
// CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 8
341413
// CHECK: ret %"struct.Test13::D"*
414+
415+
// WIN64-LABEL: define weak_odr dso_local dereferenceable(32) %"struct.Test13::D"* @"?foo1@D@Test13@@$4PPPPPPPE@A@EAAAEAUB1@2@XZ"(
416+
// This adjustment.
417+
// WIN64: getelementptr inbounds i8, i8* {{.*}}, i64 -12
418+
// Call implementation.
419+
// WIN64: call {{.*}} @"?foo1@D@Test13@@UEAAAEAU12@XZ"(i8* {{.*}})
420+
// Virtual + nonvirtual return adjustment.
421+
// WIN64: load i32*, i32** %{{[^,]*}}, align 8
422+
// WIN64: getelementptr inbounds i32, i32* %{{[^,]*}}, i32 1
423+
// WIN64: load i32, i32* %{{[^,]*}}, align 4
424+
// WIN64: getelementptr inbounds i8, i8* %{{[^,]*}}, i32 %{{[^,]*}}
342425
}
343426

344427
namespace Test14 {
@@ -374,9 +457,16 @@ namespace Test15 {
374457
void C::c() {}
375458

376459
// C::c
377-
// CHECK: declare void @_ZN6Test151C1fEiz
460+
// CHECK-CLONE: declare void @_ZN6Test151C1fEiz
378461
// non-virtual thunk to C::f
379-
// CHECK: declare void @_ZThn8_N6Test151C1fEiz
462+
// CHECK-CLONE: declare void @_ZThn8_N6Test151C1fEiz
463+
464+
// If we have musttail, then we emit the thunk as available_externally.
465+
// CHECK-TAIL: declare void @_ZN6Test151C1fEiz
466+
// CHECK-TAIL: define available_externally void @_ZThn8_N6Test151C1fEiz({{.*}})
467+
// CHECK-TAIL: musttail call void (%"struct.Test15::C"*, i32, ...) @_ZN6Test151C1fEiz({{.*}}, ...)
468+
469+
// MS C++ ABI doesn't use a thunk, so this case isn't interesting.
380470
}
381471

382472
namespace Test16 {
@@ -398,6 +488,33 @@ D::~D() {}
398488
// CHECK: ret void
399489
}
400490

491+
namespace Test17 {
492+
class A {
493+
virtual void f(const char *, ...);
494+
};
495+
class B {
496+
virtual void f(const char *, ...);
497+
};
498+
class C : A, B {
499+
virtual void anchor();
500+
void f(const char *, ...) override;
501+
};
502+
// Key method and object anchor vtable for Itanium and MSVC.
503+
void C::anchor() {}
504+
C c;
505+
506+
// CHECK-CLONE-LABEL: declare void @_ZThn8_N6Test171C1fEPKcz(
507+
508+
// CHECK-TAIL-LABEL: define available_externally void @_ZThn8_N6Test171C1fEPKcz(
509+
// CHECK-TAIL: getelementptr inbounds i8, i8* %{{.*}}, i64 -8
510+
// CHECK-TAIL: musttail call {{.*}} @_ZN6Test171C1fEPKcz({{.*}}, ...)
511+
512+
// MSVC-LABEL: define linkonce_odr dso_local void @"?f@C@Test17@@G7EAAXPEBDZZ"
513+
// MSVC-SAME: (%"class.Test17::C"* %this, i8* %[[ARG:[^,]+]], ...)
514+
// MSVC: getelementptr i8, i8* %{{.*}}, i32 -8
515+
// MSVC: musttail call void (%"class.Test17::C"*, i8*, ...) @"?f@C@Test17@@EEAAXPEBDZZ"(%"class.Test17::C"* %{{.*}}, i8* %[[ARG]], ...)
516+
}
517+
401518
/**** The following has to go at the end of the file ****/
402519

403520
// checking without opt
@@ -421,5 +538,9 @@ D::~D() {}
421538
// CHECK-OPT-LABEL: define linkonce_odr void @_ZN6Test101C3fooEv
422539
// CHECK-OPT-LABEL: define linkonce_odr void @_ZThn8_N6Test101C3fooEv
423540

541+
// This is from Test10:
542+
// WIN64-LABEL: define linkonce_odr dso_local void @"?foo@C@Test10@@UEAAXXZ"(
543+
// WIN64-LABEL: define linkonce_odr dso_local void @"?foo@C@Test10@@W7EAAXXZ"(
544+
424545
// CHECK-NONOPT: attributes [[NUW]] = { noinline nounwind optnone uwtable{{.*}} }
425546
// CHECK-OPT: attributes [[NUW]] = { nounwind uwtable{{.*}} }

0 commit comments

Comments
 (0)