Skip to content

Commit 54296ce

Browse files
committed
[WPD]: Apply speculative WPD in non-lto mode.
- This patch apply speculative devirtualization in non-lto mode where visibility is not needed. - It's still safe to devirtualize becasue we do speculation. - In non-lto mode, only speculative devirtualization is allowed without other features like vitual constant propagation to minimize the drawback of wrong speculation.
1 parent 945b12f commit 54296ce

File tree

10 files changed

+164
-30
lines changed

10 files changed

+164
-30
lines changed

clang/docs/UsersManual.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2275,9 +2275,13 @@ are listed below.
22752275

22762276
.. option:: -fwhole-program-vtables
22772277

2278+
In LTO mode:
22782279
Enable whole-program vtable optimizations, such as single-implementation
22792280
devirtualization and virtual constant propagation, for classes with
2280-
:doc:`hidden LTO visibility <LTOVisibility>`. Requires ``-flto``.
2281+
:doc:`hidden LTO visibility <LTOVisibility>`.
2282+
In non-LTO mode:
2283+
Enables speculative devirtualization only without other features.
2284+
Doesn't require ``-flto`` or visibility.
22812285

22822286
.. option:: -f[no]split-lto-unit
22832287

@@ -5170,7 +5174,7 @@ Execute ``clang-cl /?`` to see a list of supported options:
51705174
-fstandalone-debug Emit full debug info for all types used by the program
51715175
-fstrict-aliasing Enable optimizations based on strict aliasing rules
51725176
-fsyntax-only Run the preprocessor, parser and semantic analysis stages
5173-
-fwhole-program-vtables Enables whole-program vtable optimization. Requires -flto
5177+
-fwhole-program-vtables Enables whole-program vtable optimization.
51745178
-gcodeview-ghash Emit type record hashes in a .debug$H section
51755179
-gcodeview Generate CodeView debug information
51765180
-gline-directives-only Emit debug line info directives only

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
902902
// non-integrated assemblers don't recognize .cgprofile section.
903903
PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
904904
PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO;
905+
PTO.WholeProgramDevirt = CodeGenOpts.WholeProgramVTables;
905906

906907
LoopAnalysisManager LAM;
907908
FunctionAnalysisManager FAM;

clang/lib/CodeGen/CGVTables.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1359,7 +1359,8 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
13591359
// Emit type metadata on vtables with LTO or IR instrumentation.
13601360
// In IR instrumentation, the type metadata is used to find out vtable
13611361
// definitions (for type profiling) among all global variables.
1362-
if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr())
1362+
if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() &&
1363+
!getCodeGenOpts().WholeProgramVTables)
13631364
return;
13641365

13651366
CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType());

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7847,8 +7847,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
78477847
IsDeviceOffloadAction ? D.getLTOMode() : D.getOffloadLTOMode();
78487848
auto OtherIsUsingLTO = OtherLTOMode != LTOK_None;
78497849

7850-
if ((!IsUsingLTO && !OtherIsUsingLTO) ||
7851-
(IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full)))
7850+
if (!IsUsingLTO && !OtherIsUsingLTO && !UnifiedLTO) {
7851+
if (const Arg *A = Args.getLastArg(options::OPT_O_Group))
7852+
if (!A->getOption().matches(options::OPT_O0))
7853+
CmdArgs.push_back("-fwhole-program-vtables");
7854+
} else if ((!IsUsingLTO && !OtherIsUsingLTO) ||
7855+
(IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full)))
78527856
D.Diag(diag::err_drv_argument_only_allowed_with)
78537857
<< "-fwhole-program-vtables"
78547858
<< ((IsPS4 && !UnifiedLTO) ? "-flto=full" : "-flto");
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Check that speculative devirtualization works without the need for LTO or visibility.
2+
// RUN: %clang_cc1 -fwhole-program-vtables -O1 %s -emit-llvm -o - | FileCheck %s
3+
4+
struct A {
5+
A(){}
6+
__attribute__((noinline))
7+
virtual int virtual1(){return 20;}
8+
__attribute__((noinline))
9+
virtual void empty_virtual(){}
10+
};
11+
12+
struct B : A {
13+
B(){}
14+
__attribute__((noinline))
15+
virtual int virtual1() override {return 50;}
16+
__attribute__((noinline))
17+
virtual void empty_virtual() override {}
18+
};
19+
20+
// Test that we can apply speculative devirtualization
21+
// without the need for LTO or visibility.
22+
__attribute__((noinline))
23+
int test_devirtual(A *a) {
24+
// CHECK: %0 = load ptr, ptr %vtable, align 8
25+
// CHECK-NEXT: %1 = icmp eq ptr %0, @_ZN1B8virtual1Ev
26+
// CHECK-NEXT: br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect, !prof !12
27+
28+
// CHECK: if.true.direct_targ: ; preds = %entry
29+
// CHECK-NEXT: %2 = tail call noundef i32 @_ZN1B8virtual1Ev(ptr noundef nonnull align 8 dereferenceable(8) %a)
30+
// CHECK-NEXT: br label %if.end.icp
31+
32+
// CHECK: if.false.orig_indirect: ; preds = %entry
33+
// CHECK-NEXT: %call = tail call noundef i32 %0(ptr noundef nonnull align 8 dereferenceable(8) %a)
34+
// CHECK-NEXT: br label %if.end.icp
35+
36+
// CHECK: if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ
37+
// CHECK-NEXT: %3 = phi i32 [ %call, %if.false.orig_indirect ], [ %2, %if.true.direct_targ ]
38+
// CHECK-NEXT: ret i32 %3
39+
40+
return a->virtual1();
41+
}
42+
43+
// Test that we skip devirtualization for empty virtual functions as most probably
44+
// they are used for interfaces.
45+
__attribute__((noinline))
46+
void test_devirtual_empty_fn(A *a) {
47+
// CHECK: load ptr, ptr %vfn, align 8
48+
// CHECK-NEXT: tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %a)
49+
a->empty_virtual();
50+
}
51+
52+
void test() {
53+
A *a = new B();
54+
test_devirtual(a);
55+
test_devirtual_empty_fn(a);
56+
}
Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,11 @@
1-
// RUN: not %clang -target x86_64-unknown-linux -fwhole-program-vtables -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
2-
// RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -### -- %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
3-
// NO-LTO: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
1+
// RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -O1 -### %s 2>&1 | FileCheck --check-prefix=WPD-NO-LTO %s
2+
// RUN: %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -O1 -### -- %s 2>&1 | FileCheck --check-prefix=WPD-NO-LTO %s
3+
// WPD-NO-LTO: "-fwhole-program-vtables"
44

55
// RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=LTO %s
66
// RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -flto -### -- %s 2>&1 | FileCheck --check-prefix=LTO %s
77
// LTO: "-fwhole-program-vtables"
88

9-
/// -funified-lto does not imply -flto, so we still get an error that fwhole-program-vtables has no effect without -flto
10-
// RUN: not %clang --target=x86_64-pc-linux-gnu -fwhole-program-vtables -funified-lto -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
11-
// RUN: not %clang --target=x86_64-pc-linux-gnu -fwhole-program-vtables -fno-unified-lto -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
12-
139
// RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -fno-whole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=LTO-DISABLE %s
1410
// RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -fno-whole-program-vtables -flto -### -- %s 2>&1 | FileCheck --check-prefix=LTO-DISABLE %s
1511
// LTO-DISABLE-NOT: "-fwhole-program-vtables"

llvm/include/llvm/Passes/PassBuilder.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,12 @@ class PipelineTuningOptions {
9898
// analyses after various module->function or cgscc->function adaptors in the
9999
// default pipelines.
100100
bool EagerlyInvalidateAnalyses;
101+
102+
/// Tuning option to enable/disable whole program devirtualization.
103+
/// Its default value is false.
104+
/// This is controlled by the `-whole-program-vtables` flag.
105+
/// Used only in non-LTO mode.
106+
bool WholeProgramDevirt;
101107
};
102108

103109
/// This class provides access to building LLVM's passes.

llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,11 +226,15 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> {
226226
ModuleSummaryIndex *ExportSummary;
227227
const ModuleSummaryIndex *ImportSummary;
228228
bool UseCommandLine = false;
229+
const bool InLTOMode;
229230
WholeProgramDevirtPass()
230-
: ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {}
231+
: ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true),
232+
InLTOMode(true) {}
231233
WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
232-
const ModuleSummaryIndex *ImportSummary)
233-
: ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
234+
const ModuleSummaryIndex *ImportSummary,
235+
bool InLTOMode = true)
236+
: ExportSummary(ExportSummary), ImportSummary(ImportSummary),
237+
InLTOMode(InLTOMode) {
234238
assert(!(ExportSummary && ImportSummary));
235239
}
236240
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &);

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
321321
MergeFunctions = EnableMergeFunctions;
322322
InlinerThreshold = -1;
323323
EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
324+
WholeProgramDevirt = false;
324325
}
325326

326327
namespace llvm {
@@ -1629,6 +1630,23 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
16291630
if (!LTOPreLink)
16301631
MPM.addPass(RelLookupTableConverterPass());
16311632

1633+
if (PTO.WholeProgramDevirt && LTOPhase == ThinOrFullLTOPhase::None) {
1634+
MPM.addPass(WholeProgramDevirtPass(/*ExportSummary*/ nullptr,
1635+
/*ImportSummary*/ nullptr,
1636+
/*InLTOMode=*/false));
1637+
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1638+
lowertypetests::DropTestKind::Assume));
1639+
if (EnableModuleInliner) {
1640+
MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
1641+
UseInlineAdvisor,
1642+
ThinOrFullLTOPhase::None));
1643+
} else {
1644+
MPM.addPass(ModuleInlinerWrapperPass(
1645+
getInlineParamsFromOptLevel(Level),
1646+
/* MandatoryFirst */ true,
1647+
InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner}));
1648+
}
1649+
}
16321650
return MPM;
16331651
}
16341652

llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
// returns 0, or a single vtable's function returns 1, replace each virtual
2525
// call with a comparison of the vptr against that vtable's address.
2626
//
27-
// This pass is intended to be used during the regular and thin LTO pipelines:
27+
// This pass is intended to be used during the regular/thinLTO and non-LTO
28+
// pipelines:
2829
//
2930
// During regular LTO, the pass determines the best optimization for each
3031
// virtual call and applies the resolutions directly to virtual calls that are
@@ -48,6 +49,13 @@
4849
// is supported.
4950
// - Import phase: (same as with hybrid case above).
5051
//
52+
// In non-LTO mode:
53+
// - The pass apply speculative devirtualization without requiring any type of
54+
// visibility.
55+
// - Skips other features like virtual constant propagation, uniform return
56+
// value
57+
// optimization, unique return value optimization, branch funnels to minimize
58+
// the drawbacks of wrong speculation.
5159
//===----------------------------------------------------------------------===//
5260

5361
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -60,7 +68,9 @@
6068
#include "llvm/ADT/Statistic.h"
6169
#include "llvm/Analysis/AssumptionCache.h"
6270
#include "llvm/Analysis/BasicAliasAnalysis.h"
71+
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
6372
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
73+
#include "llvm/Analysis/ProfileSummaryInfo.h"
6474
#include "llvm/Analysis/TypeMetadataUtils.h"
6575
#include "llvm/Bitcode/BitcodeReader.h"
6676
#include "llvm/Bitcode/BitcodeWriter.h"
@@ -798,6 +808,21 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
798808
return PreservedAnalyses::all();
799809
return PreservedAnalyses::none();
800810
}
811+
std::optional<ModuleSummaryIndex> Index;
812+
// Force Fallback mode as it's safe in case it's non-LTO mode where
813+
// we don't have hidden visibility.
814+
if (!InLTOMode) {
815+
DevirtCheckMode = WPDCheckMode::Fallback;
816+
// In non-LTO mode, we don't have an ExportSummary, so we
817+
// build the ExportSummary from the module.
818+
assert(!ExportSummary &&
819+
"ExportSummary is expected to be empty in non-LTO mode");
820+
if (DevirtCheckMode == WPDCheckMode::Fallback && !ExportSummary) {
821+
ProfileSummaryInfo PSI(M);
822+
Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI));
823+
ExportSummary = Index.has_value() ? &Index.value() : nullptr;
824+
}
825+
}
801826
if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
802827
ImportSummary)
803828
.run())
@@ -1091,10 +1116,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
10911116
if (!TM.Bits->GV->isConstant())
10921117
return false;
10931118

1094-
// We cannot perform whole program devirtualization analysis on a vtable
1095-
// with public LTO visibility.
1096-
if (TM.Bits->GV->getVCallVisibility() ==
1097-
GlobalObject::VCallVisibilityPublic)
1119+
// If speculative devirtualization is NOT enabled, it's not safe to perform
1120+
// whole program devirtualization
1121+
// analysis on a vtable with public LTO visibility.
1122+
if (DevirtCheckMode != WPDCheckMode::Fallback &&
1123+
TM.Bits->GV->getVCallVisibility() ==
1124+
GlobalObject::VCallVisibilityPublic)
10981125
return false;
10991126

11001127
Function *Fn = nullptr;
@@ -1112,6 +1139,11 @@ bool DevirtModule::tryFindVirtualCallTargets(
11121139
// calls to pure virtuals are UB.
11131140
if (Fn->getName() == "__cxa_pure_virtual")
11141141
continue;
1142+
// In Most cases empty functions will be overridden by the
1143+
// implementation of the derived class, so we can skip them.
1144+
if (DevirtCheckMode == WPDCheckMode::Fallback &&
1145+
Fn->getReturnType()->isVoidTy() && Fn->getInstructionCount() <= 1)
1146+
continue;
11151147

11161148
// We can disregard unreachable functions as possible call targets, as
11171149
// unreachable functions shouldn't be called.
@@ -1333,10 +1365,11 @@ bool DevirtModule::trySingleImplDevirt(
13331365
if (!IsExported)
13341366
return false;
13351367

1336-
// If the only implementation has local linkage, we must promote to external
1337-
// to make it visible to thin LTO objects. We can only get here during the
1338-
// ThinLTO export phase.
1339-
if (TheFn->hasLocalLinkage()) {
1368+
// In case of non-speculative devirtualization, If the only implementation has
1369+
// local linkage, we must promote to external
1370+
// to make it visible to thin LTO objects. We can only get here during the
1371+
// ThinLTO export phase.
1372+
if (DevirtCheckMode != WPDCheckMode::Fallback && TheFn->hasLocalLinkage()) {
13401373
std::string NewName = (TheFn->getName() + ".llvm.merged").str();
13411374

13421375
// Since we are renaming the function, any comdats with the same name must
@@ -2315,6 +2348,11 @@ bool DevirtModule::run() {
23152348

23162349
Function *TypeTestFunc =
23172350
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
2351+
// If we are applying speculative devirtualization, we can work on the public
2352+
// type test intrinsics.
2353+
if (!TypeTestFunc && DevirtCheckMode == WPDCheckMode::Fallback)
2354+
TypeTestFunc =
2355+
Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
23182356
Function *TypeCheckedLoadFunc =
23192357
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
23202358
Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
@@ -2437,12 +2475,18 @@ bool DevirtModule::run() {
24372475
.WPDRes[S.first.ByteOffset];
24382476
if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
24392477
S.first.ByteOffset, ExportSummary)) {
2440-
2441-
if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
2442-
DidVirtualConstProp |=
2443-
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2444-
2445-
tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2478+
trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
2479+
// In Speculative devirt mode, we skip virtual constant propagation
2480+
// and branch funneling to minimize the drawback if we got wrong
2481+
// speculation during devirtualization.
2482+
if (DevirtCheckMode != WPDCheckMode::Fallback) {
2483+
if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second,
2484+
Res)) {
2485+
DidVirtualConstProp |=
2486+
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2487+
2488+
tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2489+
}
24462490
}
24472491

24482492
// Collect functions devirtualized at least for one call site for stats.

0 commit comments

Comments
 (0)