Skip to content

Commit 12d4eeb

Browse files
committed
[WPD] set the function entry count
1 parent a8680be commit 12d4eeb

File tree

5 files changed

+263
-18
lines changed

5 files changed

+263
-18
lines changed

llvm/include/llvm/IR/ProfDataUtils.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,11 @@ inline uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale) {
180180
/// info.
181181
LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I);
182182

183-
LLVM_ABI bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD);
183+
/// Analogous to setExplicitlyUnknownBranchWeights, but for functions and their
184+
/// entry counts.
185+
LLVM_ABI void setExplicitlyUnknownFunctionEntryCount(Function &F);
186+
187+
LLVM_ABI bool isExplicitlyUnknownProfileMetadata(const MDNode &MD);
184188
LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I);
185189

186190
/// Scaling the profile data attached to 'I' using the ratio of S/T.

llvm/lib/IR/ProfDataUtils.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,15 @@ void setExplicitlyUnknownBranchWeights(Instruction &I) {
250250
MDB.createString(MDProfLabels::UnknownBranchWeightsMarker)));
251251
}
252252

253-
bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD) {
253+
void setExplicitlyUnknownFunctionEntryCount(Function &F) {
254+
MDBuilder MDB(F.getContext());
255+
F.setMetadata(
256+
LLVMContext::MD_prof,
257+
MDNode::get(F.getContext(),
258+
MDB.createString(MDProfLabels::UnknownBranchWeightsMarker)));
259+
}
260+
261+
bool isExplicitlyUnknownProfileMetadata(const MDNode &MD) {
254262
if (MD.getNumOperands() != 1)
255263
return false;
256264
return MD.getOperand(0).equalsStr(MDProfLabels::UnknownBranchWeightsMarker);
@@ -260,7 +268,7 @@ bool hasExplicitlyUnknownBranchWeights(const Instruction &I) {
260268
auto *MD = I.getMetadata(LLVMContext::MD_prof);
261269
if (!MD)
262270
return false;
263-
return isExplicitlyUnknownBranchWeightsMetadata(*MD);
271+
return isExplicitlyUnknownProfileMetadata(*MD);
264272
}
265273

266274
void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,

llvm/lib/IR/Verifier.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2526,12 +2526,11 @@ void Verifier::verifyFunctionMetadata(
25262526
for (const auto &Pair : MDs) {
25272527
if (Pair.first == LLVMContext::MD_prof) {
25282528
MDNode *MD = Pair.second;
2529-
if (isExplicitlyUnknownBranchWeightsMetadata(*MD)) {
2530-
CheckFailed("'unknown' !prof metadata should appear only on "
2531-
"instructions supporting the 'branch_weights' metadata",
2532-
MD);
2529+
// We may have functions that are synthesized by the compiler, e.g. in
2530+
// WPD, that we can't currently determine the entry count.
2531+
if (isExplicitlyUnknownProfileMetadata(*MD))
25332532
continue;
2534-
}
2533+
25352534
Check(MD->getNumOperands() >= 2,
25362535
"!prof annotations should have no less than 2 operands", MD);
25372536

llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
#include "llvm/ADT/Statistic.h"
6161
#include "llvm/Analysis/AssumptionCache.h"
6262
#include "llvm/Analysis/BasicAliasAnalysis.h"
63+
#include "llvm/Analysis/BlockFrequencyInfo.h"
6364
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
6465
#include "llvm/Analysis/TypeMetadataUtils.h"
6566
#include "llvm/Bitcode/BitcodeReader.h"
@@ -84,6 +85,7 @@
8485
#include "llvm/IR/Module.h"
8586
#include "llvm/IR/ModuleSummaryIndexYAML.h"
8687
#include "llvm/IR/PassManager.h"
88+
#include "llvm/IR/ProfDataUtils.h"
8789
#include "llvm/Support/Casting.h"
8890
#include "llvm/Support/CommandLine.h"
8991
#include "llvm/Support/Errc.h"
@@ -97,6 +99,7 @@
9799
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
98100
#include "llvm/Transforms/Utils/Evaluator.h"
99101
#include <algorithm>
102+
#include <cmath>
100103
#include <cstddef>
101104
#include <map>
102105
#include <set>
@@ -169,6 +172,8 @@ static cl::list<std::string>
169172
cl::desc("Prevent function(s) from being devirtualized"),
170173
cl::Hidden, cl::CommaSeparated);
171174

175+
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
176+
172177
/// With Clang, a pure virtual class's deleting destructor is emitted as a
173178
/// `llvm.trap` intrinsic followed by an unreachable IR instruction. In the
174179
/// context of whole program devirtualization, the deleting destructor of a pure
@@ -656,7 +661,7 @@ struct DevirtModule {
656661
VTableSlotInfo &SlotInfo,
657662
WholeProgramDevirtResolution *Res);
658663

659-
void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT,
664+
void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Function &JT,
660665
bool &IsExported);
661666
void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
662667
VTableSlotInfo &SlotInfo,
@@ -1453,7 +1458,7 @@ void DevirtModule::tryICallBranchFunnel(
14531458

14541459
FunctionType *FT =
14551460
FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
1456-
Function *JT;
1461+
Function *JT = nullptr;
14571462
if (isa<MDString>(Slot.TypeID)) {
14581463
JT = Function::Create(FT, Function::ExternalLinkage,
14591464
M.getDataLayout().getProgramAddressSpace(),
@@ -1482,13 +1487,19 @@ void DevirtModule::tryICallBranchFunnel(
14821487
ReturnInst::Create(M.getContext(), nullptr, BB);
14831488

14841489
bool IsExported = false;
1485-
applyICallBranchFunnel(SlotInfo, JT, IsExported);
1490+
applyICallBranchFunnel(SlotInfo, *JT, IsExported);
14861491
if (IsExported)
14871492
Res->TheKind = WholeProgramDevirtResolution::BranchFunnel;
1493+
1494+
if (!JT->getEntryCount().has_value()) {
1495+
// FIXME: we could pass through thinlto the necessary information.
1496+
setExplicitlyUnknownFunctionEntryCount(*JT);
1497+
}
14881498
}
14891499

14901500
void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
1491-
Constant *JT, bool &IsExported) {
1501+
Function &JT, bool &IsExported) {
1502+
DenseMap<Function *, double> FunctionEntryCounts;
14921503
auto Apply = [&](CallSiteInfo &CSInfo) {
14931504
if (CSInfo.isExported())
14941505
IsExported = true;
@@ -1516,8 +1527,8 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
15161527

15171528
NumBranchFunnel++;
15181529
if (RemarksEnabled)
1519-
VCallSite.emitRemark("branch-funnel",
1520-
JT->stripPointerCasts()->getName(), OREGetter);
1530+
VCallSite.emitRemark("branch-funnel", JT.stripPointerCasts()->getName(),
1531+
OREGetter);
15211532

15221533
// Pass the address of the vtable in the nest register, which is r10 on
15231534
// x86_64.
@@ -1533,11 +1544,26 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
15331544
llvm::append_range(Args, CB.args());
15341545

15351546
CallBase *NewCS = nullptr;
1547+
if (!JT.isDeclaration() && !ProfcheckDisableMetadataFixes) {
1548+
auto &F = *CB.getCaller();
1549+
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
1550+
auto EC = BFI.getBlockFreq(&F.getEntryBlock());
1551+
auto CC = F.getEntryCount(/*AllowSynthetic=*/true);
1552+
double CallCount = 0.0;
1553+
if (EC.getFrequency() != 0 && CC && CC->getCount() != 0) {
1554+
double CallFreq =
1555+
static_cast<double>(
1556+
BFI.getBlockFreq(CB.getParent()).getFrequency()) /
1557+
EC.getFrequency();
1558+
CallCount = CallFreq * CC->getCount();
1559+
}
1560+
FunctionEntryCounts[&JT] += CallCount;
1561+
}
15361562
if (isa<CallInst>(CB))
1537-
NewCS = IRB.CreateCall(NewFT, JT, Args);
1563+
NewCS = IRB.CreateCall(NewFT, &JT, Args);
15381564
else
15391565
NewCS =
1540-
IRB.CreateInvoke(NewFT, JT, cast<InvokeInst>(CB).getNormalDest(),
1566+
IRB.CreateInvoke(NewFT, &JT, cast<InvokeInst>(CB).getNormalDest(),
15411567
cast<InvokeInst>(CB).getUnwindDest(), Args);
15421568
NewCS->setCallingConv(CB.getCallingConv());
15431569

@@ -1571,6 +1597,11 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
15711597
Apply(SlotInfo.CSInfo);
15721598
for (auto &P : SlotInfo.ConstCSInfo)
15731599
Apply(P.second);
1600+
for (auto &[F, C] : FunctionEntryCounts) {
1601+
assert(!F->getEntryCount(/*AllowSynthetic=*/true) &&
1602+
"Unexpected entry count for funnel that was freshly synthesized");
1603+
F->setEntryCount(static_cast<uint64_t>(std::round(C)));
1604+
}
15741605
}
15751606

15761607
bool DevirtModule::tryEvaluateFunctionsWithArgs(
@@ -2244,12 +2275,12 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
22442275
if (Res.TheKind == WholeProgramDevirtResolution::BranchFunnel) {
22452276
// The type of the function is irrelevant, because it's bitcast at calls
22462277
// anyhow.
2247-
Constant *JT = cast<Constant>(
2278+
auto *JT = cast<Function>(
22482279
M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
22492280
Type::getVoidTy(M.getContext()))
22502281
.getCallee());
22512282
bool IsExported = false;
2252-
applyICallBranchFunnel(SlotInfo, JT, IsExported);
2283+
applyICallBranchFunnel(SlotInfo, *JT, IsExported);
22532284
assert(!IsExported);
22542285
}
22552286
}
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
; A variant of branch-funnel.ll where we just check that the funnels' entry counts
2+
; are correctly set.
3+
;
4+
; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility %s | FileCheck --check-prefixes=RETP %s
5+
; RUN: sed -e 's,+retpoline,-retpoline,g' %s | opt -S -passes=wholeprogramdevirt -whole-program-visibility | FileCheck --check-prefixes=NORETP %s
6+
; RUN: opt -passes=wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=RETP %s
7+
; RUN: opt -passes='wholeprogramdevirt,default<O3>' -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=O3 %s
8+
9+
; RETP: define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) !prof !11
10+
; RETP: define hidden void @__typeid_typeid1_rv_0_branch_funnel(ptr nest %0, ...) !prof !11
11+
; RETP: define internal void @branch_funnel(ptr nest %0, ...) !prof !10
12+
; RETP: define internal void @branch_funnel.1(ptr nest %0, ...) !prof !10
13+
; RETP: !10 = !{!"function_entry_count", i64 1000}
14+
; RETP: !11 = !{!"function_entry_count", i64 3000}
15+
16+
; NORETP: define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) !prof !11
17+
; NORETP: define hidden void @__typeid_typeid1_rv_0_branch_funnel(ptr nest %0, ...) !prof !11
18+
; NORETP: define internal void @branch_funnel(ptr nest %0, ...) !prof !11
19+
; NORETP: define internal void @branch_funnel.1(ptr nest %0, ...) !prof !11
20+
; NORETP: !11 = !{!"unknown"}
21+
22+
; O3: define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) local_unnamed_addr #5 !prof !11
23+
; O3: define hidden void @__typeid_typeid1_rv_0_branch_funnel(ptr nest %0, ...) local_unnamed_addr #5 !prof !11
24+
; O3: define internal void @branch_funnel(ptr nest %0, ...) unnamed_addr #5 !prof !10
25+
; O3: define internal void @branch_funnel.1(ptr nest %0, ...) unnamed_addr #5 !prof !10
26+
; O3: define hidden void @__typeid_typeid3_0_branch_funnel(ptr nest %0, ...) local_unnamed_addr #5 !prof !12
27+
; O3: define hidden void @__typeid_typeid3_rv_0_branch_funnel(ptr nest %0, ...) local_unnamed_addr #5 !prof !12
28+
; O3: !10 = !{!"function_entry_count", i64 1000}
29+
; O3: !11 = !{!"function_entry_count", i64 3000}
30+
; O3: !12 = !{!"unknown"}
31+
32+
target datalayout = "e-p:64:64"
33+
target triple = "x86_64-unknown-linux-gnu"
34+
35+
@vt1_1 = constant [1 x ptr] [ptr @vf1_1], !type !0
36+
@vt1_2 = constant [1 x ptr] [ptr @vf1_2], !type !0
37+
38+
declare i32 @vf1_1(ptr %this, i32 %arg)
39+
declare i32 @vf1_2(ptr %this, i32 %arg)
40+
41+
@vt2_1 = constant [1 x ptr] [ptr @vf2_1], !type !1
42+
@vt2_2 = constant [1 x ptr] [ptr @vf2_2], !type !1
43+
@vt2_3 = constant [1 x ptr] [ptr @vf2_3], !type !1
44+
@vt2_4 = constant [1 x ptr] [ptr @vf2_4], !type !1
45+
@vt2_5 = constant [1 x ptr] [ptr @vf2_5], !type !1
46+
@vt2_6 = constant [1 x ptr] [ptr @vf2_6], !type !1
47+
@vt2_7 = constant [1 x ptr] [ptr @vf2_7], !type !1
48+
@vt2_8 = constant [1 x ptr] [ptr @vf2_8], !type !1
49+
@vt2_9 = constant [1 x ptr] [ptr @vf2_9], !type !1
50+
@vt2_10 = constant [1 x ptr] [ptr @vf2_10], !type !1
51+
@vt2_11 = constant [1 x ptr] [ptr @vf2_11], !type !1
52+
53+
declare i32 @vf2_1(ptr %this, i32 %arg)
54+
declare i32 @vf2_2(ptr %this, i32 %arg)
55+
declare i32 @vf2_3(ptr %this, i32 %arg)
56+
declare i32 @vf2_4(ptr %this, i32 %arg)
57+
declare i32 @vf2_5(ptr %this, i32 %arg)
58+
declare i32 @vf2_6(ptr %this, i32 %arg)
59+
declare i32 @vf2_7(ptr %this, i32 %arg)
60+
declare i32 @vf2_8(ptr %this, i32 %arg)
61+
declare i32 @vf2_9(ptr %this, i32 %arg)
62+
declare i32 @vf2_10(ptr %this, i32 %arg)
63+
declare i32 @vf2_11(ptr %this, i32 %arg)
64+
65+
@vt3_1 = constant [1 x ptr] [ptr @vf3_1], !type !2
66+
@vt3_2 = constant [1 x ptr] [ptr @vf3_2], !type !2
67+
68+
declare i32 @vf3_1(ptr %this, i32 %arg)
69+
declare i32 @vf3_2(ptr %this, i32 %arg)
70+
71+
@vt4_1 = constant [1 x ptr] [ptr @vf4_1], !type !3
72+
@vt4_2 = constant [1 x ptr] [ptr @vf4_2], !type !3
73+
74+
declare i32 @vf4_1(ptr %this, i32 %arg)
75+
declare i32 @vf4_2(ptr %this, i32 %arg)
76+
77+
declare ptr @llvm.load.relative.i32(ptr, i32)
78+
79+
;; These are relative vtables equivalent to the ones above.
80+
@vt1_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1_1 to i64), i64 ptrtoint (ptr @vt1_1_rv to i64)) to i32)], !type !5
81+
@vt1_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1_2 to i64), i64 ptrtoint (ptr @vt1_2_rv to i64)) to i32)], !type !5
82+
83+
@vt2_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_1 to i64), i64 ptrtoint (ptr @vt2_1_rv to i64)) to i32)], !type !6
84+
@vt2_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_2 to i64), i64 ptrtoint (ptr @vt2_2_rv to i64)) to i32)], !type !6
85+
@vt2_3_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_3 to i64), i64 ptrtoint (ptr @vt2_3_rv to i64)) to i32)], !type !6
86+
@vt2_4_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_4 to i64), i64 ptrtoint (ptr @vt2_4_rv to i64)) to i32)], !type !6
87+
@vt2_5_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_5 to i64), i64 ptrtoint (ptr @vt2_5_rv to i64)) to i32)], !type !6
88+
@vt2_6_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_6 to i64), i64 ptrtoint (ptr @vt2_6_rv to i64)) to i32)], !type !6
89+
@vt2_7_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_7 to i64), i64 ptrtoint (ptr @vt2_7_rv to i64)) to i32)], !type !6
90+
@vt2_8_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_8 to i64), i64 ptrtoint (ptr @vt2_8_rv to i64)) to i32)], !type !6
91+
@vt2_9_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_9 to i64), i64 ptrtoint (ptr @vt2_9_rv to i64)) to i32)], !type !6
92+
@vt2_10_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_10 to i64), i64 ptrtoint (ptr @vt2_10_rv to i64)) to i32)], !type !6
93+
@vt2_11_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_11 to i64), i64 ptrtoint (ptr @vt2_11_rv to i64)) to i32)], !type !6
94+
95+
@vt3_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf3_1 to i64), i64 ptrtoint (ptr @vt3_1_rv to i64)) to i32)], !type !7
96+
@vt3_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf3_2 to i64), i64 ptrtoint (ptr @vt3_2_rv to i64)) to i32)], !type !7
97+
98+
@vt4_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4_1 to i64), i64 ptrtoint (ptr @vt4_1_rv to i64)) to i32)], !type !8
99+
@vt4_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4_2 to i64), i64 ptrtoint (ptr @vt4_2_rv to i64)) to i32)], !type !8
100+
101+
102+
define i32 @fn1(ptr %obj) #0 !prof !10 {
103+
%vtable = load ptr, ptr %obj
104+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1")
105+
call void @llvm.assume(i1 %p)
106+
%fptr = load ptr, ptr %vtable
107+
%result = call i32 %fptr(ptr %obj, i32 1)
108+
ret i32 %result
109+
}
110+
111+
define i32 @fn1_rv(ptr %obj) #0 !prof !10 {
112+
%vtable = load ptr, ptr %obj
113+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1_rv")
114+
call void @llvm.assume(i1 %p)
115+
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)
116+
%result = call i32 %fptr(ptr %obj, i32 1)
117+
ret i32 %result
118+
}
119+
120+
define i32 @fn2(ptr %obj) #0 !prof !10 {
121+
%vtable = load ptr, ptr %obj
122+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2")
123+
call void @llvm.assume(i1 %p)
124+
%fptr = load ptr, ptr %vtable
125+
%result = call i32 %fptr(ptr %obj, i32 1)
126+
ret i32 %result
127+
}
128+
129+
define i32 @fn2_rv(ptr %obj) #0 !prof !10 {
130+
%vtable = load ptr, ptr %obj
131+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2_rv")
132+
call void @llvm.assume(i1 %p)
133+
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)
134+
%result = call i32 %fptr(ptr %obj, i32 1)
135+
ret i32 %result
136+
}
137+
138+
define i32 @fn3(ptr %obj) #0 !prof !10 {
139+
%vtable = load ptr, ptr %obj
140+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !4)
141+
call void @llvm.assume(i1 %p)
142+
%fptr = load ptr, ptr %vtable
143+
%result = call i32 %fptr(ptr %obj, i32 1)
144+
ret i32 %result
145+
}
146+
147+
define i32 @fn3_rv(ptr %obj) #0 !prof !10 {
148+
%vtable = load ptr, ptr %obj
149+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !9)
150+
call void @llvm.assume(i1 %p)
151+
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)
152+
%result = call i32 %fptr(ptr %obj, i32 1)
153+
ret i32 %result
154+
}
155+
156+
define i32 @fn4(ptr %obj) #0 !prof !10 {
157+
%p = call i1 @llvm.type.test(ptr @vt1_1, metadata !"typeid1")
158+
call void @llvm.assume(i1 %p)
159+
%fptr = load ptr, ptr @vt1_1
160+
%result = call i32 %fptr(ptr %obj, i32 1)
161+
ret i32 %result
162+
}
163+
164+
define i32 @fn4_cpy(ptr %obj) #0 !prof !10 {
165+
%p = call i1 @llvm.type.test(ptr @vt1_1, metadata !"typeid1")
166+
call void @llvm.assume(i1 %p)
167+
%fptr = load ptr, ptr @vt1_1
168+
%result = call i32 %fptr(ptr %obj, i32 1)
169+
ret i32 %result
170+
}
171+
172+
define i32 @fn4_rv(ptr %obj) #0 !prof !10 {
173+
%p = call i1 @llvm.type.test(ptr @vt1_1_rv, metadata !"typeid1_rv")
174+
call void @llvm.assume(i1 %p)
175+
%fptr = call ptr @llvm.load.relative.i32(ptr @vt1_1_rv, i32 0)
176+
%result = call i32 %fptr(ptr %obj, i32 1)
177+
ret i32 %result
178+
}
179+
180+
define i32 @fn4_rv_cpy(ptr %obj) #0 !prof !10 {
181+
%p = call i1 @llvm.type.test(ptr @vt1_1_rv, metadata !"typeid1_rv")
182+
call void @llvm.assume(i1 %p)
183+
%fptr = call ptr @llvm.load.relative.i32(ptr @vt1_1_rv, i32 0)
184+
%result = call i32 %fptr(ptr %obj, i32 1)
185+
ret i32 %result
186+
}
187+
188+
declare i1 @llvm.type.test(ptr, metadata)
189+
declare void @llvm.assume(i1)
190+
191+
!0 = !{i32 0, !"typeid1"}
192+
!1 = !{i32 0, !"typeid2"}
193+
!2 = !{i32 0, !"typeid3"}
194+
!3 = !{i32 0, !4}
195+
!4 = distinct !{}
196+
!5 = !{i32 0, !"typeid1_rv"}
197+
!6 = !{i32 0, !"typeid2_rv"}
198+
!7 = !{i32 0, !"typeid3_rv"}
199+
!8 = !{i32 0, !9}
200+
!9 = distinct !{}
201+
!10 = !{!"function_entry_count", i64 1000}
202+
203+
attributes #0 = { "target-features"="+retpoline" }

0 commit comments

Comments
 (0)