-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[WPD]: Enable speculative devirtualizatoin. #159048
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,8 @@ | |
// returns 0, or a single vtable's function returns 1, replace each virtual | ||
// call with a comparison of the vptr against that vtable's address. | ||
// | ||
// This pass is intended to be used during the regular and thin LTO pipelines: | ||
// This pass is intended to be used during the regular/thin and non-LTO | ||
// pipelines: | ||
// | ||
// During regular LTO, the pass determines the best optimization for each | ||
// virtual call and applies the resolutions directly to virtual calls that are | ||
|
@@ -48,6 +49,14 @@ | |
// is supported. | ||
// - Import phase: (same as with hybrid case above). | ||
// | ||
// During Speculative devirtualization mode -not restricted to LTO-: | ||
// - The pass applies speculative devirtualization without requiring any type of | ||
// visibility. | ||
// - Skips other features like virtual constant propagation, uniform return | ||
// value optimization, unique return value optimization and branch funnels as | ||
// they need LTO. | ||
// - This mode is enabled via 'devirtualize-speculatively' flag. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "llvm/Transforms/IPO/WholeProgramDevirt.h" | ||
|
@@ -61,7 +70,9 @@ | |
#include "llvm/Analysis/AssumptionCache.h" | ||
#include "llvm/Analysis/BasicAliasAnalysis.h" | ||
#include "llvm/Analysis/BlockFrequencyInfo.h" | ||
#include "llvm/Analysis/ModuleSummaryAnalysis.h" | ||
#include "llvm/Analysis/OptimizationRemarkEmitter.h" | ||
#include "llvm/Analysis/ProfileSummaryInfo.h" | ||
#include "llvm/Analysis/TypeMetadataUtils.h" | ||
#include "llvm/Bitcode/BitcodeReader.h" | ||
#include "llvm/Bitcode/BitcodeWriter.h" | ||
|
@@ -145,6 +156,13 @@ static cl::opt<std::string> ClWriteSummary( | |
"bitcode, otherwise YAML"), | ||
cl::Hidden); | ||
|
||
// TODO: This option eventually should support any public visibility vtables | ||
// with/out LTO. | ||
static cl::opt<bool> ClDevirtualizeSpeculatively( | ||
"devirtualize-speculatively", | ||
cl::desc("Enable speculative devirtualization optimization"), | ||
cl::init(false)); | ||
|
||
static cl::opt<unsigned> | ||
ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, | ||
cl::init(10), | ||
|
@@ -587,6 +605,9 @@ struct DevirtModule { | |
|
||
ModuleSummaryIndex *const ExportSummary; | ||
const ModuleSummaryIndex *const ImportSummary; | ||
// True if ExportSummary was built locally from the module. | ||
// Default is false unless explicitly set. | ||
const bool HasLocalSummary; | ||
|
||
IntegerType *const Int8Ty; | ||
PointerType *const Int8PtrTy; | ||
|
@@ -624,10 +645,12 @@ struct DevirtModule { | |
|
||
DevirtModule(Module &M, ModuleAnalysisManager &MAM, | ||
ModuleSummaryIndex *ExportSummary, | ||
const ModuleSummaryIndex *ImportSummary) | ||
const ModuleSummaryIndex *ImportSummary, | ||
bool HasLocalSummary = false) | ||
: M(M), MAM(MAM), | ||
FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), | ||
ExportSummary(ExportSummary), ImportSummary(ImportSummary), | ||
HasLocalSummary(HasLocalSummary), | ||
Int8Ty(Type::getInt8Ty(M.getContext())), | ||
Int8PtrTy(PointerType::getUnqual(M.getContext())), | ||
Int32Ty(Type::getInt32Ty(M.getContext())), | ||
|
@@ -798,7 +821,8 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, | |
return PreservedAnalyses::all(); | ||
return PreservedAnalyses::none(); | ||
} | ||
if (!DevirtModule(M, MAM, ExportSummary, ImportSummary).run()) | ||
if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, HasLocalSummary) | ||
.run()) | ||
return PreservedAnalyses::all(); | ||
return PreservedAnalyses::none(); | ||
} | ||
|
@@ -896,6 +920,8 @@ void llvm::updatePublicTypeTestCalls(Module &M, | |
CI->eraseFromParent(); | ||
} | ||
} else { | ||
// TODO: Don't replace public type tests when speculative devirtualization | ||
// gets enabled in LTO mode. | ||
auto *True = ConstantInt::getTrue(M.getContext()); | ||
for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) { | ||
auto *CI = cast<CallInst>(U.getUser()); | ||
|
@@ -1087,10 +1113,10 @@ bool DevirtModule::tryFindVirtualCallTargets( | |
if (!TM.Bits->GV->isConstant()) | ||
return false; | ||
|
||
// We cannot perform whole program devirtualization analysis on a vtable | ||
// with public LTO visibility. | ||
if (TM.Bits->GV->getVCallVisibility() == | ||
GlobalObject::VCallVisibilityPublic) | ||
// Without ClDevirtualizeSpeculatively, we cannot perform whole program | ||
// devirtualization analysis on a vtable with public LTO visibility. | ||
if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() == | ||
GlobalObject::VCallVisibilityPublic) | ||
return false; | ||
|
||
Function *Fn = nullptr; | ||
|
@@ -1109,6 +1135,12 @@ bool DevirtModule::tryFindVirtualCallTargets( | |
if (Fn->getName() == "__cxa_pure_virtual") | ||
continue; | ||
|
||
// In most cases empty functions will be overridden by the | ||
// implementation of the derived class, so we can skip them. | ||
if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() && | ||
Fn->getInstructionCount() <= 1) | ||
continue; | ||
|
||
// We can disregard unreachable functions as possible call targets, as | ||
// unreachable functions shouldn't be called. | ||
if (mustBeUnreachableFunction(Fn, ExportSummary)) | ||
|
@@ -1227,10 +1259,12 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, | |
CallTrap->setDebugLoc(CB.getDebugLoc()); | ||
} | ||
|
||
// If fallback checking is enabled, add support to compare the virtual | ||
// function pointer to the devirtualized target. In case of a mismatch, | ||
// fall back to indirect call. | ||
if (DevirtCheckMode == WPDCheckMode::Fallback) { | ||
// If fallback checking or speculative devirtualization are enabled, | ||
// add support to compare the virtual function pointer to the | ||
// devirtualized target. In case of a mismatch, fall back to indirect | ||
// call. | ||
if (DevirtCheckMode == WPDCheckMode::Fallback || | ||
ClDevirtualizeSpeculatively) { | ||
MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights(); | ||
// Version the indirect call site. If the called value is equal to the | ||
// given callee, 'NewInst' will be executed, otherwise the original call | ||
|
@@ -1329,10 +1363,10 @@ bool DevirtModule::trySingleImplDevirt( | |
if (!IsExported) | ||
return false; | ||
|
||
// If the only implementation has local linkage, we must promote to external | ||
// to make it visible to thin LTO objects. We can only get here during the | ||
// ThinLTO export phase. | ||
if (TheFn->hasLocalLinkage()) { | ||
// If the only implementation has local linkage, we must promote | ||
// to external to make it visible to thin LTO objects. | ||
// This change should be safe only in LTO mode. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe change this to something like: "We only want to do this in the ThinLTO export phase, in which case we will not have built a local summary for the module." |
||
if (!HasLocalSummary && TheFn->hasLocalLinkage()) { | ||
std::string NewName = (TheFn->getName() + ".llvm.merged").str(); | ||
|
||
// Since we are renaming the function, any comdats with the same name must | ||
|
@@ -2061,15 +2095,15 @@ void DevirtModule::scanTypeTestUsers( | |
Function *TypeTestFunc, | ||
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) { | ||
// Find all virtual calls via a virtual table pointer %p under an assumption | ||
// of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p | ||
// points to a member of the type identifier %md. Group calls by (type ID, | ||
// offset) pair (effectively the identity of the virtual function) and store | ||
// to CallSlots. | ||
// of the form llvm.assume(llvm.type.test(%p, %md)) or | ||
// llvm.assume(llvm.public.type.test(%p, %md)). | ||
// This indicates that %p points to a member of the type identifier %md. | ||
// Group calls by (type ID, offset) pair (effectively the identity of the | ||
// virtual function) and store to CallSlots. | ||
for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) { | ||
auto *CI = dyn_cast<CallInst>(U.getUser()); | ||
if (!CI) | ||
continue; | ||
|
||
// Search for virtual calls based on %p and add them to DevirtCalls. | ||
SmallVector<DevirtCallSite, 1> DevirtCalls; | ||
SmallVector<CallInst *, 1> Assumes; | ||
|
@@ -2352,6 +2386,12 @@ bool DevirtModule::run() { | |
(ImportSummary && ImportSummary->partiallySplitLTOUnits())) | ||
return false; | ||
|
||
Function *PublicTypeTestFunc = nullptr; | ||
// If we are in speculative devirtualization mode, we can work on the public | ||
// type test intrinsics. | ||
if (ClDevirtualizeSpeculatively) | ||
PublicTypeTestFunc = | ||
Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a test with public type tests? |
||
Function *TypeTestFunc = | ||
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test); | ||
Function *TypeCheckedLoadFunc = | ||
|
@@ -2365,8 +2405,9 @@ bool DevirtModule::run() { | |
// module, this pass has nothing to do. But if we are exporting, we also need | ||
// to handle any users that appear only in the function summaries. | ||
if (!ExportSummary && | ||
(!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc || | ||
AssumeFunc->use_empty()) && | ||
(((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) && | ||
(!TypeTestFunc || TypeTestFunc->use_empty())) || | ||
!AssumeFunc || AssumeFunc->use_empty()) && | ||
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) && | ||
(!TypeCheckedLoadRelativeFunc || | ||
TypeCheckedLoadRelativeFunc->use_empty())) | ||
|
@@ -2377,6 +2418,9 @@ bool DevirtModule::run() { | |
DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap; | ||
buildTypeIdentifierMap(Bits, TypeIdMap); | ||
|
||
if (PublicTypeTestFunc && AssumeFunc) | ||
scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap); | ||
|
||
if (TypeTestFunc && AssumeFunc) | ||
scanTypeTestUsers(TypeTestFunc, TypeIdMap); | ||
|
||
|
@@ -2476,8 +2520,12 @@ bool DevirtModule::run() { | |
.WPDRes[S.first.ByteOffset]; | ||
if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos, | ||
S.first.ByteOffset, ExportSummary)) { | ||
|
||
if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { | ||
bool SingleImplDevirt = | ||
trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res); | ||
// Out of speculative devirtualization mode, Try to apply virtual constant | ||
// propagation or branch funneling. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a TODO that these optimizations should eventually be allowed on calls from non-public type tests? |
||
// TODO: This should eventually be enabled for non-public type tests. | ||
if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) { | ||
DidVirtualConstProp |= | ||
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
; -stats requires asserts | ||
; REQUIRES: asserts | ||
|
||
; Check that we can still devirtualize outside LTO mode when speculative devirtualization is enabled. | ||
; Check that we skip devirtualization for empty functions in speculative devirtualization mode | ||
|
||
; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively \ | ||
; RUN: -pass-remarks=wholeprogramdevirt -stats %s 2>&1 | FileCheck %s | ||
|
||
target datalayout = "e-p:64:64" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf | ||
; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf | ||
; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf | ||
; CHECK: remark: devirt-single.cc:13:0: devirtualized vf | ||
; CHECK-NOT: devirtualized | ||
|
||
@vt1 = constant [1 x ptr] [ptr @vf], !type !8 | ||
@vt2 = constant [1 x ptr] [ptr @vf_empty], !type !12 | ||
|
||
define i1 @vf(ptr %this) #0 !dbg !7 { | ||
ret i1 true | ||
} | ||
|
||
; This should NOT be devirtualized because during non-lto empty functions | ||
; are skipped. | ||
define void @vf_empty(ptr %this) !dbg !11 { | ||
ret void | ||
} | ||
|
||
; CHECK: define void @call | ||
define void @call(ptr %obj) #1 !dbg !5 { | ||
%vtable = load ptr, ptr %obj | ||
%p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid") | ||
call void @llvm.assume(i1 %p) | ||
%fptr = load ptr, ptr %vtable | ||
; CHECK: if.true.direct_targ: | ||
; CHECK: call i1 @vf( | ||
; CHECK: if.false.orig_indirect: | ||
; CHECK: call i1 %fptr( | ||
call i1 %fptr(ptr %obj), !dbg !6 | ||
ret void | ||
} | ||
|
||
|
||
; CHECK: define void @call1 | ||
define void @call1(ptr %obj) #1 !dbg !9 { | ||
%vtable = load ptr, ptr %obj | ||
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1") | ||
call void @llvm.assume(i1 %p) | ||
%fptr = load ptr, ptr %vtable, align 8 | ||
; CHECK: call i1 %fptr | ||
%1 = call i1 %fptr(ptr %obj), !dbg !10 | ||
ret void | ||
} | ||
declare ptr @llvm.load.relative.i32(ptr, i32) | ||
|
||
@vt3 = private unnamed_addr constant [1 x i32] [ | ||
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32) | ||
], align 4, !type !15 | ||
|
||
; CHECK: define void @call2 | ||
define void @call2(ptr %obj) #1 !dbg !13 { | ||
%vtable = load ptr, ptr %obj | ||
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") | ||
call void @llvm.assume(i1 %p) | ||
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) | ||
; CHECK: if.true.direct_targ: | ||
; CHECK: call i1 @vf( | ||
; CHECK: if.false.orig_indirect: | ||
; CHECK: call i1 %fptr( | ||
call i1 %fptr(ptr %obj), !dbg !14 | ||
ret void | ||
} | ||
|
||
@_ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [ | ||
i32 0, ; offset to top | ||
i32 0, ; rtti | ||
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32) ; vf_emptyunc offset | ||
] }, align 4, !type !18 | ||
|
||
; CHECK: define void @call3 | ||
define void @call3(ptr %obj) #1 !dbg !16 { | ||
%vtable = load ptr, ptr %obj | ||
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3") | ||
call void @llvm.assume(i1 %p) | ||
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8) | ||
; CHECK: if.true.direct_targ: | ||
; CHECK: call i1 @vf( | ||
; CHECK: if.false.orig_indirect: | ||
; CHECK: call i1 %fptr( | ||
call i1 %fptr(ptr %obj), !dbg !17 | ||
ret void | ||
} | ||
|
||
|
||
declare i1 @llvm.type.test(ptr, metadata) | ||
declare i1 @llvm.public.type.test(ptr, metadata) | ||
declare void @llvm.assume(i1) | ||
|
||
!llvm.dbg.cu = !{!0} | ||
!llvm.module.flags = !{!2, !3} | ||
!llvm.ident = !{!4} | ||
|
||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) | ||
!1 = !DIFile(filename: "devirt-single.cc", directory: ".") | ||
!2 = !{i32 2, !"Dwarf Version", i32 4} | ||
!3 = !{i32 2, !"Debug Info Version", i32 3} | ||
!4 = !{!"clang version 4.0.0 (trunk 278098)"} | ||
!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) | ||
!6 = !DILocation(line: 30, column: 32, scope: !5) | ||
!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0) | ||
!8 = !{i32 0, !"typeid"} | ||
|
||
!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) | ||
!10 = !DILocation(line: 35, column: 32, scope: !9) | ||
!11 = distinct !DISubprogram(name: "vf_empty", linkageName: "_ZN3vt18vf_emptyEv", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0) | ||
!12 = !{i32 0, !"typeid1"} | ||
|
||
!13 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) | ||
!14 = !DILocation(line: 41, column: 32, scope: !13) | ||
!15 = !{i32 0, !"typeid2"} | ||
|
||
!16 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) | ||
!17 = !DILocation(line: 51, column: 32, scope: !16) | ||
!18 = !{i32 0, !"typeid3"} | ||
|
||
|
||
|
||
; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets | ||
; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't find where this is set. In fact, I can't find anymore where ExportSummary is being built locally?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, It's not built locally. But for future patches it will be built locally. So here I want to add the default behaviour (no summary is built)