diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index fbed593ab3aa7..b605dfc9c2675 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -132,6 +132,15 @@ static cl::opt ICPMaxNumVTableLastCandidate( "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden, cl::desc("The maximum number of vtable for the last candidate.")); +static cl::list ICPIgnoredBaseTypes( + "icp-ignored-base-types", cl::Hidden, + cl::desc( + "A list of mangled vtable type info names. Classes specified by the " + "type info names and their derived ones will not be vtable-ICP'ed. " + "Useful when the profiled types and actual types in the optimized " + "binary could be different due to profiling limitations. Type info " + "names are those string literals used in LLVM type metadata")); + namespace { // The key is a vtable global variable, and the value is a map. @@ -316,6 +325,8 @@ class IndirectCallPromoter { OptimizationRemarkEmitter &ORE; + const DenseSet &IgnoredBaseTypes; + // A struct that records the direct target and it's call count. struct PromotionCandidate { Function *const TargetFunction; @@ -366,6 +377,10 @@ class IndirectCallPromoter { bool isProfitableToCompareVTables(const CallBase &CB, ArrayRef Candidates); + // Return true if the vtable corresponding to VTableGUID should be skipped + // for vtable-based comparison. + bool shouldSkipVTable(uint64_t VTableGUID); + // Given an indirect callsite and the list of function candidates, compute // the following vtable information in output parameters and return vtable // pointer if type profiles exist. @@ -391,10 +406,12 @@ class IndirectCallPromoter { Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO, const VirtualCallSiteTypeInfoMap &VirtualCSInfo, VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal, + const DenseSet &IgnoredBaseTypes, OptimizationRemarkEmitter &ORE) : F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO), VirtualCSInfo(VirtualCSInfo), - VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {} + VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE), + IgnoredBaseTypes(IgnoredBaseTypes) {} IndirectCallPromoter(const IndirectCallPromoter &) = delete; IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete; @@ -851,9 +868,14 @@ bool IndirectCallPromoter::isProfitableToCompareVTables( LLVM_DEBUG(dbgs() << "\n"); uint64_t CandidateVTableCount = 0; - for (auto &[GUID, Count] : VTableGUIDAndCounts) + + for (auto &[GUID, Count] : VTableGUIDAndCounts) { CandidateVTableCount += Count; + if (shouldSkipVTable(GUID)) + return false; + } + if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) { LLVM_DEBUG( dbgs() << " function count " << Candidate.Count @@ -883,6 +905,27 @@ bool IndirectCallPromoter::isProfitableToCompareVTables( return true; } +bool IndirectCallPromoter::shouldSkipVTable(uint64_t VTableGUID) { + if (IgnoredBaseTypes.empty()) + return false; + + auto *VTableVar = Symtab->getGlobalVariable(VTableGUID); + + assert(VTableVar && "VTableVar must exist for GUID in VTableGUIDAndCounts"); + + SmallVector Types; + VTableVar->getMetadata(LLVMContext::MD_type, Types); + + for (auto *Type : Types) + if (auto *TypeId = dyn_cast(Type->getOperand(1).get())) + if (IgnoredBaseTypes.contains(TypeId->getString())) { + LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail " + "out of vtable comparison."); + return true; + } + return false; +} + // For virtual calls in the module, collect per-callsite information which will // be used to associate an ICP candidate with a vtable and a specific function // in the vtable. With type intrinsics (llvm.type.test), we can find virtual @@ -956,9 +999,15 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool Changed = false; VirtualCallSiteTypeInfoMap VirtualCSInfo; - if (EnableVTableProfileUse) + DenseSet IgnoredBaseTypes; + + if (EnableVTableProfileUse) { computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo); + for (StringRef Str : ICPIgnoredBaseTypes) + IgnoredBaseTypes.insert(Str); + } + // VTableAddressPointOffsetVal stores the vtable address points. The vtable // address point of a given is static (doesn't // change after being computed once). @@ -977,7 +1026,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, auto &ORE = FAM.getResult(F); IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo, - VTableAddressPointOffsetVal, ORE); + VTableAddressPointOffsetVal, + IgnoredBaseTypes, ORE); bool FuncChanged = CallPromoter.processFunction(PSI); if (ICPDUMPAFTER && FuncChanged) { LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll index b6afce3d7c6d5..84bb7a5830af2 100644 --- a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll +++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; Tests that ICP compares vtables by checking IR. ; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP +; Require exactly one vtable candidate for each function candidate. Tests that ICP compares function by checking IR. ; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP +; On top of line 4, ignore 'Base1' and its derived types for vtable-based comparison. Tests that ICP compares functions. +; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -icp-ignored-base-types='Base1' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"