Skip to content

Commit 8b2aba2

Browse files
authored
[WPD]: Enable speculative devirtualizatoin. (llvm#159048)
This patch implements the speculative devirtualization feature in the LLVM backend. It handles the case of single implementation devirtualization where there is a single possible callee of a virtual function. - Add cl::opt 'devirtualize-speculatively' to enable it. - Flag is disabled by default. - It works regardless of the visibility of the object. - Not enabled for LTO for now.
1 parent ec546ce commit 8b2aba2

File tree

3 files changed

+199
-18
lines changed

3 files changed

+199
-18
lines changed

llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
// returns 0, or a single vtable's function returns 1, replace each virtual
2525
// call with a comparison of the vptr against that vtable's address.
2626
//
27-
// This pass is intended to be used during the regular and thin LTO pipelines:
27+
// This pass is intended to be used during the regular/thin and non-LTO
28+
// pipelines:
2829
//
2930
// During regular LTO, the pass determines the best optimization for each
3031
// virtual call and applies the resolutions directly to virtual calls that are
@@ -48,6 +49,14 @@
4849
// is supported.
4950
// - Import phase: (same as with hybrid case above).
5051
//
52+
// During Speculative devirtualization mode -not restricted to LTO-:
53+
// - The pass applies speculative devirtualization without requiring any type of
54+
// visibility.
55+
// - Skips other features like virtual constant propagation, uniform return
56+
// value optimization, unique return value optimization and branch funnels as
57+
// they need LTO.
58+
// - This mode is enabled via 'devirtualize-speculatively' flag.
59+
//
5160
//===----------------------------------------------------------------------===//
5261

5362
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -61,7 +70,9 @@
6170
#include "llvm/Analysis/AssumptionCache.h"
6271
#include "llvm/Analysis/BasicAliasAnalysis.h"
6372
#include "llvm/Analysis/BlockFrequencyInfo.h"
73+
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
6474
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
75+
#include "llvm/Analysis/ProfileSummaryInfo.h"
6576
#include "llvm/Analysis/TypeMetadataUtils.h"
6677
#include "llvm/Bitcode/BitcodeReader.h"
6778
#include "llvm/Bitcode/BitcodeWriter.h"
@@ -145,6 +156,13 @@ static cl::opt<std::string> ClWriteSummary(
145156
"bitcode, otherwise YAML"),
146157
cl::Hidden);
147158

159+
// TODO: This option eventually should support any public visibility vtables
160+
// with/out LTO.
161+
static cl::opt<bool> ClDevirtualizeSpeculatively(
162+
"devirtualize-speculatively",
163+
cl::desc("Enable speculative devirtualization optimization"),
164+
cl::init(false));
165+
148166
static cl::opt<unsigned>
149167
ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
150168
cl::init(10),
@@ -892,6 +910,8 @@ void llvm::updatePublicTypeTestCalls(Module &M,
892910
CI->eraseFromParent();
893911
}
894912
} else {
913+
// TODO: Don't replace public type tests when speculative devirtualization
914+
// gets enabled in LTO mode.
895915
auto *True = ConstantInt::getTrue(M.getContext());
896916
for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
897917
auto *CI = cast<CallInst>(U.getUser());
@@ -1083,10 +1103,10 @@ bool DevirtModule::tryFindVirtualCallTargets(
10831103
if (!TM.Bits->GV->isConstant())
10841104
return false;
10851105

1086-
// We cannot perform whole program devirtualization analysis on a vtable
1087-
// with public LTO visibility.
1088-
if (TM.Bits->GV->getVCallVisibility() ==
1089-
GlobalObject::VCallVisibilityPublic)
1106+
// Without ClDevirtualizeSpeculatively, we cannot perform whole program
1107+
// devirtualization analysis on a vtable with public LTO visibility.
1108+
if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() ==
1109+
GlobalObject::VCallVisibilityPublic)
10901110
return false;
10911111

10921112
Function *Fn = nullptr;
@@ -1105,6 +1125,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
11051125
if (Fn->getName() == "__cxa_pure_virtual")
11061126
continue;
11071127

1128+
// In most cases empty functions will be overridden by the
1129+
// implementation of the derived class, so we can skip them.
1130+
if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() &&
1131+
Fn->getInstructionCount() <= 1)
1132+
continue;
1133+
11081134
// We can disregard unreachable functions as possible call targets, as
11091135
// unreachable functions shouldn't be called.
11101136
if (mustBeUnreachableFunction(Fn, ExportSummary))
@@ -1223,10 +1249,12 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
12231249
CallTrap->setDebugLoc(CB.getDebugLoc());
12241250
}
12251251

1226-
// If fallback checking is enabled, add support to compare the virtual
1227-
// function pointer to the devirtualized target. In case of a mismatch,
1228-
// fall back to indirect call.
1229-
if (DevirtCheckMode == WPDCheckMode::Fallback) {
1252+
// If fallback checking or speculative devirtualization are enabled,
1253+
// add support to compare the virtual function pointer to the
1254+
// devirtualized target. In case of a mismatch, fall back to indirect
1255+
// call.
1256+
if (DevirtCheckMode == WPDCheckMode::Fallback ||
1257+
ClDevirtualizeSpeculatively) {
12301258
MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
12311259
// Version the indirect call site. If the called value is equal to the
12321260
// given callee, 'NewInst' will be executed, otherwise the original call
@@ -2057,15 +2085,15 @@ void DevirtModule::scanTypeTestUsers(
20572085
Function *TypeTestFunc,
20582086
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
20592087
// Find all virtual calls via a virtual table pointer %p under an assumption
2060-
// of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
2061-
// points to a member of the type identifier %md. Group calls by (type ID,
2062-
// offset) pair (effectively the identity of the virtual function) and store
2063-
// to CallSlots.
2088+
// of the form llvm.assume(llvm.type.test(%p, %md)) or
2089+
// llvm.assume(llvm.public.type.test(%p, %md)).
2090+
// This indicates that %p points to a member of the type identifier %md.
2091+
// Group calls by (type ID, offset) pair (effectively the identity of the
2092+
// virtual function) and store to CallSlots.
20642093
for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
20652094
auto *CI = dyn_cast<CallInst>(U.getUser());
20662095
if (!CI)
20672096
continue;
2068-
20692097
// Search for virtual calls based on %p and add them to DevirtCalls.
20702098
SmallVector<DevirtCallSite, 1> DevirtCalls;
20712099
SmallVector<CallInst *, 1> Assumes;
@@ -2348,6 +2376,12 @@ bool DevirtModule::run() {
23482376
(ImportSummary && ImportSummary->partiallySplitLTOUnits()))
23492377
return false;
23502378

2379+
Function *PublicTypeTestFunc = nullptr;
2380+
// If we are in speculative devirtualization mode, we can work on the public
2381+
// type test intrinsics.
2382+
if (ClDevirtualizeSpeculatively)
2383+
PublicTypeTestFunc =
2384+
Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
23512385
Function *TypeTestFunc =
23522386
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
23532387
Function *TypeCheckedLoadFunc =
@@ -2361,8 +2395,9 @@ bool DevirtModule::run() {
23612395
// module, this pass has nothing to do. But if we are exporting, we also need
23622396
// to handle any users that appear only in the function summaries.
23632397
if (!ExportSummary &&
2364-
(!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
2365-
AssumeFunc->use_empty()) &&
2398+
(((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) &&
2399+
(!TypeTestFunc || TypeTestFunc->use_empty())) ||
2400+
!AssumeFunc || AssumeFunc->use_empty()) &&
23662401
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
23672402
(!TypeCheckedLoadRelativeFunc ||
23682403
TypeCheckedLoadRelativeFunc->use_empty()))
@@ -2373,6 +2408,9 @@ bool DevirtModule::run() {
23732408
DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
23742409
buildTypeIdentifierMap(Bits, TypeIdMap);
23752410

2411+
if (PublicTypeTestFunc && AssumeFunc)
2412+
scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap);
2413+
23762414
if (TypeTestFunc && AssumeFunc)
23772415
scanTypeTestUsers(TypeTestFunc, TypeIdMap);
23782416

@@ -2472,8 +2510,12 @@ bool DevirtModule::run() {
24722510
.WPDRes[S.first.ByteOffset];
24732511
if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
24742512
S.first.ByteOffset, ExportSummary)) {
2475-
2476-
if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
2513+
bool SingleImplDevirt =
2514+
trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
2515+
// Out of speculative devirtualization mode, Try to apply virtual constant
2516+
// propagation or branch funneling.
2517+
// TODO: This should eventually be enabled for non-public type tests.
2518+
if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) {
24772519
DidVirtualConstProp |=
24782520
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
24792521

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
; -stats requires asserts
2+
; REQUIRES: asserts
3+
4+
; Check that we can still devirtualize outside LTO mode when speculative devirtualization is enabled.
5+
; Check that we skip devirtualization for empty functions in speculative devirtualization mode
6+
7+
; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively \
8+
; RUN: -pass-remarks=wholeprogramdevirt -stats %s 2>&1 | FileCheck %s
9+
10+
target datalayout = "e-p:64:64"
11+
target triple = "x86_64-unknown-linux-gnu"
12+
13+
; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf
14+
; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf
15+
; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf
16+
; CHECK: remark: devirt-single.cc:13:0: devirtualized vf
17+
; CHECK-NOT: devirtualized
18+
19+
@vt1 = constant [1 x ptr] [ptr @vf], !type !8
20+
@vt2 = constant [1 x ptr] [ptr @vf_empty], !type !12
21+
22+
define i1 @vf(ptr %this) #0 !dbg !7 {
23+
ret i1 true
24+
}
25+
26+
; This should NOT be devirtualized because during non-lto empty functions
27+
; are skipped.
28+
define void @vf_empty(ptr %this) !dbg !11 {
29+
ret void
30+
}
31+
32+
; CHECK: define void @call
33+
define void @call(ptr %obj) #1 !dbg !5 {
34+
%vtable = load ptr, ptr %obj
35+
%p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid")
36+
call void @llvm.assume(i1 %p)
37+
%fptr = load ptr, ptr %vtable
38+
; CHECK: if.true.direct_targ:
39+
; CHECK: call i1 @vf(
40+
; CHECK: if.false.orig_indirect:
41+
; CHECK: call i1 %fptr(
42+
call i1 %fptr(ptr %obj), !dbg !6
43+
ret void
44+
}
45+
46+
47+
; CHECK: define void @call1
48+
define void @call1(ptr %obj) #1 !dbg !9 {
49+
%vtable = load ptr, ptr %obj
50+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1")
51+
call void @llvm.assume(i1 %p)
52+
%fptr = load ptr, ptr %vtable, align 8
53+
; CHECK: call i1 %fptr
54+
%1 = call i1 %fptr(ptr %obj), !dbg !10
55+
ret void
56+
}
57+
declare ptr @llvm.load.relative.i32(ptr, i32)
58+
59+
@vt3 = private unnamed_addr constant [1 x i32] [
60+
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32)
61+
], align 4, !type !15
62+
63+
; CHECK: define void @call2
64+
define void @call2(ptr %obj) #1 !dbg !13 {
65+
%vtable = load ptr, ptr %obj
66+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2")
67+
call void @llvm.assume(i1 %p)
68+
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)
69+
; CHECK: if.true.direct_targ:
70+
; CHECK: call i1 @vf(
71+
; CHECK: if.false.orig_indirect:
72+
; CHECK: call i1 %fptr(
73+
call i1 %fptr(ptr %obj), !dbg !14
74+
ret void
75+
}
76+
77+
@_ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [
78+
i32 0, ; offset to top
79+
i32 0, ; rtti
80+
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32) ; vf_emptyunc offset
81+
] }, align 4, !type !18
82+
83+
; CHECK: define void @call3
84+
define void @call3(ptr %obj) #1 !dbg !16 {
85+
%vtable = load ptr, ptr %obj
86+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3")
87+
call void @llvm.assume(i1 %p)
88+
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8)
89+
; CHECK: if.true.direct_targ:
90+
; CHECK: call i1 @vf(
91+
; CHECK: if.false.orig_indirect:
92+
; CHECK: call i1 %fptr(
93+
call i1 %fptr(ptr %obj), !dbg !17
94+
ret void
95+
}
96+
97+
98+
declare i1 @llvm.type.test(ptr, metadata)
99+
declare i1 @llvm.public.type.test(ptr, metadata)
100+
declare void @llvm.assume(i1)
101+
102+
!llvm.dbg.cu = !{!0}
103+
!llvm.module.flags = !{!2, !3}
104+
!llvm.ident = !{!4}
105+
106+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
107+
!1 = !DIFile(filename: "devirt-single.cc", directory: ".")
108+
!2 = !{i32 2, !"Dwarf Version", i32 4}
109+
!3 = !{i32 2, !"Debug Info Version", i32 3}
110+
!4 = !{!"clang version 4.0.0 (trunk 278098)"}
111+
!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
112+
!6 = !DILocation(line: 30, column: 32, scope: !5)
113+
!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
114+
!8 = !{i32 0, !"typeid"}
115+
116+
!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
117+
!10 = !DILocation(line: 35, column: 32, scope: !9)
118+
!11 = distinct !DISubprogram(name: "vf_empty", linkageName: "_ZN3vt18vf_emptyEv", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
119+
!12 = !{i32 0, !"typeid1"}
120+
121+
!13 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
122+
!14 = !DILocation(line: 41, column: 32, scope: !13)
123+
!15 = !{i32 0, !"typeid2"}
124+
125+
!16 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
126+
!17 = !DILocation(line: 51, column: 32, scope: !16)
127+
!18 = !{i32 0, !"typeid3"}
128+
129+
130+
131+
; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets
132+
; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations

llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
; Check wildcard
1212
; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt -wholeprogramdevirt-skip=vf?i1 %s 2>&1 | FileCheck %s --check-prefix=SKIP
1313

14+
; Check that no stats are reported in speculative devirtualization mode as the virtual const prop is disabled.
15+
; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively -stats %s 2>&1 | FileCheck %s --check-prefix=CHECK-SPECULATIVE-WPD
16+
1417
target datalayout = "e-p:64:64"
1518
target triple = "x86_64-unknown-linux-gnu"
1619

@@ -225,3 +228,7 @@ declare ptr @llvm.load.relative.i32(ptr, i32)
225228
; CHECK: 2 wholeprogramdevirt - Number of unique return value optimizations
226229
; CHECK: 2 wholeprogramdevirt - Number of virtual constant propagations
227230
; CHECK: 2 wholeprogramdevirt - Number of 1 bit virtual constant propagations
231+
232+
; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of unique return value optimizations
233+
; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of virtual constant propagations
234+
; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of 1 bit virtual constant propagations

0 commit comments

Comments
 (0)