Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,10 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> {
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
bool UseCommandLine = false;
// True if ExportSummary was built locally from the module rather than
// provided externally to the pass (e.g., during LTO). Default value is false
// unless explicitly set when the Summary is explicitly built.
bool HasLocalSummary = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't find where this is set. In fact, I can't find anymore where ExportSummary is being built locally?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, It's not built locally. But for future patches it will be built locally. So here I want to add the default behaviour (no summary is built)

WholeProgramDevirtPass()
: ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {}
WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
Expand Down
96 changes: 72 additions & 24 deletions llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
// This pass is intended to be used during the regular and thin LTO pipelines:
// This pass is intended to be used during the regular/thin and non-LTO
// pipelines:
//
// During regular LTO, the pass determines the best optimization for each
// virtual call and applies the resolutions directly to virtual calls that are
Expand All @@ -48,6 +49,14 @@
// is supported.
// - Import phase: (same as with hybrid case above).
//
// During Speculative devirtualization mode -not restricted to LTO-:
// - The pass applies speculative devirtualization without requiring any type of
// visibility.
// - Skips other features like virtual constant propagation, uniform return
// value optimization, unique return value optimization and branch funnels as
// they need LTO.
// - This mode is enabled via 'devirtualize-speculatively' flag.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
Expand All @@ -61,7 +70,9 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
Expand Down Expand Up @@ -145,6 +156,13 @@ static cl::opt<std::string> ClWriteSummary(
"bitcode, otherwise YAML"),
cl::Hidden);

// TODO: This option eventually should support any public visibility vtables
// with/out LTO.
static cl::opt<bool> ClDevirtualizeSpeculatively(
"devirtualize-speculatively",
cl::desc("Enable speculative devirtualization optimization"),
cl::init(false));

static cl::opt<unsigned>
ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
cl::init(10),
Expand Down Expand Up @@ -587,6 +605,9 @@ struct DevirtModule {

ModuleSummaryIndex *const ExportSummary;
const ModuleSummaryIndex *const ImportSummary;
// True if ExportSummary was built locally from the module.
// Default is false unless explicitly set.
const bool HasLocalSummary;

IntegerType *const Int8Ty;
PointerType *const Int8PtrTy;
Expand Down Expand Up @@ -624,10 +645,12 @@ struct DevirtModule {

DevirtModule(Module &M, ModuleAnalysisManager &MAM,
ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary)
const ModuleSummaryIndex *ImportSummary,
bool HasLocalSummary = false)
: M(M), MAM(MAM),
FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
ExportSummary(ExportSummary), ImportSummary(ImportSummary),
HasLocalSummary(HasLocalSummary),
Int8Ty(Type::getInt8Ty(M.getContext())),
Int8PtrTy(PointerType::getUnqual(M.getContext())),
Int32Ty(Type::getInt32Ty(M.getContext())),
Expand Down Expand Up @@ -798,7 +821,8 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
if (!DevirtModule(M, MAM, ExportSummary, ImportSummary).run())
if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, HasLocalSummary)
.run())
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
Expand Down Expand Up @@ -896,6 +920,8 @@ void llvm::updatePublicTypeTestCalls(Module &M,
CI->eraseFromParent();
}
} else {
// TODO: Don't replace public type tests when speculative devirtualization
// gets enabled in LTO mode.
auto *True = ConstantInt::getTrue(M.getContext());
for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
auto *CI = cast<CallInst>(U.getUser());
Expand Down Expand Up @@ -1087,10 +1113,10 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (!TM.Bits->GV->isConstant())
return false;

// We cannot perform whole program devirtualization analysis on a vtable
// with public LTO visibility.
if (TM.Bits->GV->getVCallVisibility() ==
GlobalObject::VCallVisibilityPublic)
// Without ClDevirtualizeSpeculatively, we cannot perform whole program
// devirtualization analysis on a vtable with public LTO visibility.
if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() ==
GlobalObject::VCallVisibilityPublic)
return false;

Function *Fn = nullptr;
Expand All @@ -1109,6 +1135,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (Fn->getName() == "__cxa_pure_virtual")
continue;

// In most cases empty functions will be overridden by the
// implementation of the derived class, so we can skip them.
if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() &&
Fn->getInstructionCount() <= 1)
continue;

// We can disregard unreachable functions as possible call targets, as
// unreachable functions shouldn't be called.
if (mustBeUnreachableFunction(Fn, ExportSummary))
Expand Down Expand Up @@ -1227,10 +1259,12 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
CallTrap->setDebugLoc(CB.getDebugLoc());
}

// If fallback checking is enabled, add support to compare the virtual
// function pointer to the devirtualized target. In case of a mismatch,
// fall back to indirect call.
if (DevirtCheckMode == WPDCheckMode::Fallback) {
// If fallback checking or speculative devirtualization are enabled,
// add support to compare the virtual function pointer to the
// devirtualized target. In case of a mismatch, fall back to indirect
// call.
if (DevirtCheckMode == WPDCheckMode::Fallback ||
ClDevirtualizeSpeculatively) {
MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
// Version the indirect call site. If the called value is equal to the
// given callee, 'NewInst' will be executed, otherwise the original call
Expand Down Expand Up @@ -1329,10 +1363,10 @@ bool DevirtModule::trySingleImplDevirt(
if (!IsExported)
return false;

// If the only implementation has local linkage, we must promote to external
// to make it visible to thin LTO objects. We can only get here during the
// ThinLTO export phase.
if (TheFn->hasLocalLinkage()) {
// If the only implementation has local linkage, we must promote
// to external to make it visible to thin LTO objects.
// This change should be safe only in LTO mode.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe change this to something like: "We only want to do this in the ThinLTO export phase, in which case we will not have built a local summary for the module."

if (!HasLocalSummary && TheFn->hasLocalLinkage()) {
std::string NewName = (TheFn->getName() + ".llvm.merged").str();

// Since we are renaming the function, any comdats with the same name must
Expand Down Expand Up @@ -2061,15 +2095,15 @@ void DevirtModule::scanTypeTestUsers(
Function *TypeTestFunc,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
// Find all virtual calls via a virtual table pointer %p under an assumption
// of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
// points to a member of the type identifier %md. Group calls by (type ID,
// offset) pair (effectively the identity of the virtual function) and store
// to CallSlots.
// of the form llvm.assume(llvm.type.test(%p, %md)) or
// llvm.assume(llvm.public.type.test(%p, %md)).
// This indicates that %p points to a member of the type identifier %md.
// Group calls by (type ID, offset) pair (effectively the identity of the
// virtual function) and store to CallSlots.
for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
auto *CI = dyn_cast<CallInst>(U.getUser());
if (!CI)
continue;

// Search for virtual calls based on %p and add them to DevirtCalls.
SmallVector<DevirtCallSite, 1> DevirtCalls;
SmallVector<CallInst *, 1> Assumes;
Expand Down Expand Up @@ -2352,6 +2386,12 @@ bool DevirtModule::run() {
(ImportSummary && ImportSummary->partiallySplitLTOUnits()))
return false;

Function *PublicTypeTestFunc = nullptr;
// If we are in speculative devirtualization mode, we can work on the public
// type test intrinsics.
if (ClDevirtualizeSpeculatively)
PublicTypeTestFunc =
Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a test with public type tests?

Function *TypeTestFunc =
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
Function *TypeCheckedLoadFunc =
Expand All @@ -2365,8 +2405,9 @@ bool DevirtModule::run() {
// module, this pass has nothing to do. But if we are exporting, we also need
// to handle any users that appear only in the function summaries.
if (!ExportSummary &&
(!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
AssumeFunc->use_empty()) &&
(((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) &&
(!TypeTestFunc || TypeTestFunc->use_empty())) ||
!AssumeFunc || AssumeFunc->use_empty()) &&
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
(!TypeCheckedLoadRelativeFunc ||
TypeCheckedLoadRelativeFunc->use_empty()))
Expand All @@ -2377,6 +2418,9 @@ bool DevirtModule::run() {
DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
buildTypeIdentifierMap(Bits, TypeIdMap);

if (PublicTypeTestFunc && AssumeFunc)
scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap);

if (TypeTestFunc && AssumeFunc)
scanTypeTestUsers(TypeTestFunc, TypeIdMap);

Expand Down Expand Up @@ -2476,8 +2520,12 @@ bool DevirtModule::run() {
.WPDRes[S.first.ByteOffset];
if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
S.first.ByteOffset, ExportSummary)) {

if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
bool SingleImplDevirt =
trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
// Out of speculative devirtualization mode, Try to apply virtual constant
// propagation or branch funneling.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a TODO that these optimizations should eventually be allowed on calls from non-public type tests?

// TODO: This should eventually be enabled for non-public type tests.
if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) {
DidVirtualConstProp |=
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
; -stats requires asserts
; REQUIRES: asserts

; Check that we can still devirtualize outside LTO mode when speculative devirtualization is enabled.
; Check that we skip devirtualization for empty functions in speculative devirtualization mode

; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively \
; RUN: -pass-remarks=wholeprogramdevirt -stats %s 2>&1 | FileCheck %s

target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf
; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf
; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf
; CHECK: remark: devirt-single.cc:13:0: devirtualized vf
; CHECK-NOT: devirtualized

@vt1 = constant [1 x ptr] [ptr @vf], !type !8
@vt2 = constant [1 x ptr] [ptr @vf_empty], !type !12

define i1 @vf(ptr %this) #0 !dbg !7 {
ret i1 true
}

; This should NOT be devirtualized because during non-lto empty functions
; are skipped.
define void @vf_empty(ptr %this) !dbg !11 {
ret void
}

; CHECK: define void @call
define void @call(ptr %obj) #1 !dbg !5 {
%vtable = load ptr, ptr %obj
%p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid")
call void @llvm.assume(i1 %p)
%fptr = load ptr, ptr %vtable
; CHECK: if.true.direct_targ:
; CHECK: call i1 @vf(
; CHECK: if.false.orig_indirect:
; CHECK: call i1 %fptr(
call i1 %fptr(ptr %obj), !dbg !6
ret void
}


; CHECK: define void @call1
define void @call1(ptr %obj) #1 !dbg !9 {
%vtable = load ptr, ptr %obj
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1")
call void @llvm.assume(i1 %p)
%fptr = load ptr, ptr %vtable, align 8
; CHECK: call i1 %fptr
%1 = call i1 %fptr(ptr %obj), !dbg !10
ret void
}
declare ptr @llvm.load.relative.i32(ptr, i32)

@vt3 = private unnamed_addr constant [1 x i32] [
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32)
], align 4, !type !15

; CHECK: define void @call2
define void @call2(ptr %obj) #1 !dbg !13 {
%vtable = load ptr, ptr %obj
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2")
call void @llvm.assume(i1 %p)
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)
; CHECK: if.true.direct_targ:
; CHECK: call i1 @vf(
; CHECK: if.false.orig_indirect:
; CHECK: call i1 %fptr(
call i1 %fptr(ptr %obj), !dbg !14
ret void
}

@_ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [
i32 0, ; offset to top
i32 0, ; rtti
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32) ; vf_emptyunc offset
] }, align 4, !type !18

; CHECK: define void @call3
define void @call3(ptr %obj) #1 !dbg !16 {
%vtable = load ptr, ptr %obj
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3")
call void @llvm.assume(i1 %p)
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8)
; CHECK: if.true.direct_targ:
; CHECK: call i1 @vf(
; CHECK: if.false.orig_indirect:
; CHECK: call i1 %fptr(
call i1 %fptr(ptr %obj), !dbg !17
ret void
}


declare i1 @llvm.type.test(ptr, metadata)
declare i1 @llvm.public.type.test(ptr, metadata)
declare void @llvm.assume(i1)

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3}
!llvm.ident = !{!4}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
!1 = !DIFile(filename: "devirt-single.cc", directory: ".")
!2 = !{i32 2, !"Dwarf Version", i32 4}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{!"clang version 4.0.0 (trunk 278098)"}
!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
!6 = !DILocation(line: 30, column: 32, scope: !5)
!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
!8 = !{i32 0, !"typeid"}

!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
!10 = !DILocation(line: 35, column: 32, scope: !9)
!11 = distinct !DISubprogram(name: "vf_empty", linkageName: "_ZN3vt18vf_emptyEv", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
!12 = !{i32 0, !"typeid1"}

!13 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
!14 = !DILocation(line: 41, column: 32, scope: !13)
!15 = !{i32 0, !"typeid2"}

!16 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
!17 = !DILocation(line: 51, column: 32, scope: !16)
!18 = !{i32 0, !"typeid3"}



; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets
; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
; Check wildcard
; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt -wholeprogramdevirt-skip=vf?i1 %s 2>&1 | FileCheck %s --check-prefix=SKIP

; Check that no stats are reported in speculative devirtualization mode as the virtual const prop is disabled.
; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively -stats %s 2>&1 | FileCheck %s --check-prefix=CHECK-SPECULATIVE-WPD

target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

Expand Down Expand Up @@ -225,3 +228,7 @@ declare ptr @llvm.load.relative.i32(ptr, i32)
; CHECK: 2 wholeprogramdevirt - Number of unique return value optimizations
; CHECK: 2 wholeprogramdevirt - Number of virtual constant propagations
; CHECK: 2 wholeprogramdevirt - Number of 1 bit virtual constant propagations

; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of unique return value optimizations
; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of virtual constant propagations
; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of 1 bit virtual constant propagations