Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,14 @@ static cl::opt<int> ICPMaxNumVTableLastCandidate(
"icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
cl::desc("The maximum number of vtable for the last candidate."));

static cl::list<std::string> ICPIgnoredBaseTypes(
"icp-ignored-base-types", cl::Hidden,
cl::desc(
"A list of mangled vtable names. Classes specified by the vtables "
"and their derived ones will not be vtable-ICP'ed. Useful when the "
"profiled types and actual types in the optimized binary could be "
"different due to profiling limitations."));

namespace {

// The key is a vtable global variable, and the value is a map.
Expand Down Expand Up @@ -316,6 +324,8 @@ class IndirectCallPromoter {

OptimizationRemarkEmitter &ORE;

const DenseSet<StringRef> &IgnoredBaseTypes;

// A struct that records the direct target and it's call count.
struct PromotionCandidate {
Function *const TargetFunction;
Expand Down Expand Up @@ -391,10 +401,12 @@ class IndirectCallPromoter {
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
const DenseSet<StringRef> &IgnoredBaseTypes,
OptimizationRemarkEmitter &ORE)
: F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
VirtualCSInfo(VirtualCSInfo),
VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
IgnoredBaseTypes(IgnoredBaseTypes) {}
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;

Expand Down Expand Up @@ -851,8 +863,25 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
LLVM_DEBUG(dbgs() << "\n");

uint64_t CandidateVTableCount = 0;
for (auto &[GUID, Count] : VTableGUIDAndCounts)

for (auto &[GUID, Count] : VTableGUIDAndCounts) {
CandidateVTableCount += Count;
auto *VTableVar = Symtab->getGlobalVariable(GUID);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we skip all the new handling when IgnoredBaseTypes is empty?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes sense.

I took the liberty to use a helper function for the new handling, to simplify the control flow (e.g., continue, inner-for-loop and return) inside the outer loop.


assert(VTableVar &&
"VTableVar must exist for GUID in VTableGUIDAndCounts");

SmallVector<MDNode *, 2> Types;
VTableVar->getMetadata(LLVMContext::MD_type, Types);

for (auto *Type : Types)
if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
if (IgnoredBaseTypes.contains(TypeId->getString())) {
LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail "
"out vtable comparison.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/out/out of/

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

return false;
}
}

if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
LLVM_DEBUG(
Expand Down Expand Up @@ -956,9 +985,15 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
bool Changed = false;
VirtualCallSiteTypeInfoMap VirtualCSInfo;

if (EnableVTableProfileUse)
DenseSet<StringRef> IgnoredBaseTypes;

if (EnableVTableProfileUse) {
computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);

for (StringRef Str : ICPIgnoredBaseTypes)
IgnoredBaseTypes.insert(Str);
}

// VTableAddressPointOffsetVal stores the vtable address points. The vtable
// address point of a given <vtable, address point offset> is static (doesn't
// change after being computed once).
Expand All @@ -977,7 +1012,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);

IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
VTableAddressPointOffsetVal, ORE);
VTableAddressPointOffsetVal,
IgnoredBaseTypes, ORE);
bool FuncChanged = CallPromoter.processFunction(PSI);
if (ICPDUMPAFTER && FuncChanged) {
LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5

; Tests that ICP compares vtables by checking IR.
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
; Require exactly one vtable candidate for each function candidate. Tests that ICP compares function by checking IR.
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
; On top of line 4, ignore 'Base1' and its derived types for vtable-based comparison. Tests that ICP compares functions.
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -icp-ignored-base-types='Base1' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
Expand Down
Loading