Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 42 additions & 4 deletions llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ static cl::opt<int> ICPMaxNumVTableLastCandidate(
"icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
cl::desc("The maximum number of vtable for the last candidate."));

static cl::opt<std::string> ICPKnownUnrepresentativeVTables(
"icp-known-unrepresentative-vtables", cl::init(""), cl::Hidden,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to make this option and the code (e.g. variable names, debug messages) specific to "profile unrepresentativeness"? Can we instead generalize it like naming the option "icp-ignore-vtables", the variable can be DenseSet<StringRef> IgnoredBaseTypes and so on?

I think the motivation is justified and can be mentioned in the comments and cl::desc but the implementation don't need to be. IMO this makes the code a little easier to read and follow. What do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes sense.

I updated the PR to use cl::opt<DenseSet<StringRef>> ICPIgnoredBaseTypes, and will spin off the changes in CommandLine.h/cpp (with unit tests).

cl::desc("A comma-separated list of mangled vtable names for which instrumented
profiles are not representative. For instance, the instantiated class is arch or micro-arch specific, while instrumented profiles are collected on one arch."));

namespace {

// The key is a vtable global variable, and the value is a map.
Expand Down Expand Up @@ -316,6 +321,8 @@ class IndirectCallPromoter {

OptimizationRemarkEmitter &ORE;

const DenseSet<StringRef> &KnownUnrepresentativeBaseTypes;

// A struct that records the direct target and it's call count.
struct PromotionCandidate {
Function *const TargetFunction;
Expand Down Expand Up @@ -391,10 +398,12 @@ class IndirectCallPromoter {
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
DenseSet<StringRef> &KnownUnrepresentativeTypes,
OptimizationRemarkEmitter &ORE)
: F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
VirtualCSInfo(VirtualCSInfo),
VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
KnownUnrepresentativeBaseTypes(KnownUnrepresentativeTypes) {}
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;

Expand Down Expand Up @@ -851,8 +860,26 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
LLVM_DEBUG(dbgs() << "\n");

uint64_t CandidateVTableCount = 0;
for (auto &[GUID, Count] : VTableGUIDAndCounts)
SmallVector<MDNode *, 2> Types;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can move this to line L870 and drop the clear(). Since it's a small vector with only 2 pointers (on the stack) I don't expect changing this will affect performance.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree that it's better to move this vector inside the loop. Done.

for (auto &[GUID, Count] : VTableGUIDAndCounts) {
CandidateVTableCount += Count;
auto *VTableVar = Symtab->getGlobalVariable(GUID);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we skip all the new handling when IgnoredBaseTypes is empty?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes sense.

I took the liberty to use a helper function for the new handling, to simplify the control flow (e.g., continue, inner-for-loop and return) inside the outer loop.


assert(VTableVar &&
"VTableVar must exist for GUID in VTableGUIDAndCounts");

Types.clear();
VTableVar->getMetadata(LLVMContext::MD_type, Types);

for (auto *Type : Types)
if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
if (KnownUnrepresentativeBaseTypes.contains(TypeId->getString())) {
LLVM_DEBUG(dbgs()
<< " vtable profiles are known to be "

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update the comment? s/known to be unrepresentative/ignored by option ..../

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

"unrepresentative. Bail out vtable comparison.")
return false;
}
}

if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
LLVM_DEBUG(
Expand Down Expand Up @@ -956,9 +983,19 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
bool Changed = false;
VirtualCallSiteTypeInfoMap VirtualCSInfo;

if (EnableVTableProfileUse)
DenseSet<StringRef> KnownUnrepresentativeTypeSet;

if (EnableVTableProfileUse) {
computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);

SmallVector<StringRef> KnownUnrepresentativeTypes;
llvm::SplitString(ICPKnownUnrepresentativeVTables,
KnownUnrepresentativeTypes);

for (const StringRef Str : KnownUnrepresentativeTypes)
KnownUnrepresentativeTypeSet.insert(Str);
}

// VTableAddressPointOffsetVal stores the vtable address points. The vtable
// address point of a given <vtable, address point offset> is static (doesn't
// change after being computed once).
Expand All @@ -977,7 +1014,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);

IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
VTableAddressPointOffsetVal, ORE);
VTableAddressPointOffsetVal,
KnownUnrepresentativeTypeSet, ORE);
bool FuncChanged = CallPromoter.processFunction(PSI);
if (ICPDUMPAFTER && FuncChanged) {
LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -icp-known-unrepresentative-vtables='Base1,Derived3' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
Expand Down