From 0a0a1d59bbcd81551b35940fce12abcd1f5639bc Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Mon, 23 Sep 2024 15:20:18 -0700 Subject: [PATCH] [ctx_prof] Simple ICP criteria during module inliner --- llvm/include/llvm/Analysis/CtxProfAnalysis.h | 13 +++++ llvm/lib/Analysis/CtxProfAnalysis.cpp | 23 ++++++++ llvm/lib/Transforms/IPO/ModuleInliner.cpp | 22 +++++++- .../Transforms/Utils/CallPromotionUtils.cpp | 35 ++++++------ .../Analysis/CtxProfAnalysis/flatten-icp.ll | 55 +++++++++++++++++++ 5 files changed, 130 insertions(+), 18 deletions(-) create mode 100644 llvm/test/Analysis/CtxProfAnalysis/flatten-icp.ll diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index 0a5beb92fcbcc..0a9543f037eb5 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -9,6 +9,7 @@ #ifndef LLVM_ANALYSIS_CTXPROFANALYSIS_H #define LLVM_ANALYSIS_CTXPROFANALYSIS_H +#include "llvm/ADT/SetVector.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/IntrinsicInst.h" @@ -63,6 +64,13 @@ class PGOContextualProfile { return getDefinedFunctionGUID(F) != 0; } + StringRef getFunctionName(GlobalValue::GUID GUID) const { + auto It = FuncInfo.find(GUID); + if (It == FuncInfo.end()) + return ""; + return It->second.Name; + } + uint32_t getNumCounters(const Function &F) const { assert(isFunctionKnown(F)); return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex; @@ -120,6 +128,11 @@ class CtxProfAnalysis : public AnalysisInfoMixin { /// Get the step instrumentation associated with a `select` static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI); + + // FIXME: refactor to an advisor model, and separate + static void collectIndirectCallPromotionList( + CallBase &IC, Result &Profile, + SetVector> &Candidates); }; class CtxProfAnalysisPrinterPass diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 7517011395a7d..873277cf51d6b 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" #define DEBUG_TYPE "ctx_prof" @@ -309,3 +310,25 @@ const CtxProfFlatProfile PGOContextualProfile::flatten() const { }); return Flat; } + +void CtxProfAnalysis::collectIndirectCallPromotionList( + CallBase &IC, Result &Profile, + SetVector> &Candidates) { + const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC); + if (!Instr) + return; + Module &M = *IC.getParent()->getModule(); + const uint32_t CallID = Instr->getIndex()->getZExtValue(); + Profile.visit( + [&](const PGOCtxProfContext &Ctx) { + const auto &Targets = Ctx.callsites().find(CallID); + if (Targets == Ctx.callsites().end()) + return; + for (const auto &[Guid, _] : Targets->second) + if (auto Name = Profile.getFunctionName(Guid); !Name.empty()) + if (auto *Target = M.getFunction(Name)) + if (Target->hasFnAttribute(Attribute::AlwaysInline)) + Candidates.insert({&IC, Target}); + }, + IC.getCaller()); +} diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp index 542c319b88074..dbc733826944b 100644 --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -49,6 +49,13 @@ using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); +cl::opt CtxProfPromoteAlwaysInline( + "ctx-prof-promote-alwaysinline", cl::init(false), cl::Hidden, + cl::desc("If using a contextual profile in this module, and an indirect " + "call target is marked as alwaysinline, perform indirect call " + "promotion for that target. If multiple targets for an indirect " + "call site fit this description, they are all promoted.")); + /// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. static bool inlineHistoryIncludes( @@ -145,10 +152,11 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, assert(Calls != nullptr && "Expected an initialized InlineOrder"); // Populate the initial list of calls in this module. + SetVector> ICPCandidates; for (Function &F : M) { auto &ORE = FAM.getResult(F); - for (Instruction &I : instructions(F)) - if (auto *CB = dyn_cast(&I)) + for (Instruction &I : instructions(F)) { + if (auto *CB = dyn_cast(&I)) { if (Function *Callee = CB->getCalledFunction()) { if (!Callee->isDeclaration()) Calls->push({CB, -1}); @@ -163,7 +171,17 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, << setIsVerbose(); }); } + } else if (CtxProfPromoteAlwaysInline && CtxProf && + CB->isIndirectCall()) { + CtxProfAnalysis::collectIndirectCallPromotionList(*CB, CtxProf, + ICPCandidates); } + } + } + } + for (auto &[CB, Target] : ICPCandidates) { + if (auto *DirectCB = promoteCallWithIfThenElse(*CB, *Target, CtxProf)) + Calls->push({DirectCB, -1}); } if (Calls->empty()) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index 5f872c352429c..3d2fa226ff15b 100644 --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -623,34 +623,37 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee, // All the ctx-es belonging to a function must have the same size counters. Ctx.resizeCounters(NewCountersSize); - // Maybe in this context, the indirect callsite wasn't observed at all + // Maybe in this context, the indirect callsite wasn't observed at all. That + // would make both direct and indirect BBs cold - which is what we already + // have from resising the counters. if (!Ctx.hasCallsite(CSIndex)) return; auto &CSData = Ctx.callsite(CSIndex); - auto It = CSData.find(CalleeGUID); - // Maybe we did notice the indirect callsite, but to other targets. - if (It == CSData.end()) - return; - - assert(CalleeGUID == It->second.guid()); - - uint32_t DirectCount = It->second.getEntrycount(); - uint32_t TotalCount = 0; + uint64_t TotalCount = 0; for (const auto &[_, V] : CSData) TotalCount += V.getEntrycount(); + uint64_t DirectCount = 0; + // If we called the direct target, update the DirectCount. If we didn't, we + // still want to update the indirect BB (to which the TotalCount goes, in + // that case). + if (auto It = CSData.find(CalleeGUID); It != CSData.end()) { + assert(CalleeGUID == It->second.guid()); + DirectCount = It->second.getEntrycount(); + // This direct target needs to be moved to this caller under the + // newly-allocated callsite index. + assert(Ctx.callsites().count(NewCSID) == 0); + Ctx.ingestContext(NewCSID, std::move(It->second)); + CSData.erase(CalleeGUID); + } + assert(TotalCount >= DirectCount); - uint32_t IndirectCount = TotalCount - DirectCount; + uint64_t IndirectCount = TotalCount - DirectCount; // The ICP's effect is as-if the direct BB would have been taken DirectCount // times, and the indirect BB, IndirectCount times Ctx.counters()[DirectID] = DirectCount; Ctx.counters()[IndirectID] = IndirectCount; - // This particular indirect target needs to be moved to this caller under - // the newly-allocated callsite index. - assert(Ctx.callsites().count(NewCSID) == 0); - Ctx.ingestContext(NewCSID, std::move(It->second)); - CSData.erase(CalleeGUID); }; CtxProf.update(ProfileUpdater, &Caller); return &DirectCall; diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-icp.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-icp.ll new file mode 100644 index 0000000000000..fbffe780f0afa --- /dev/null +++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-icp.ll @@ -0,0 +1,55 @@ +; RUN: split-file %s %t +; RUN: llvm-ctxprof-util fromJSON --input %t/profile.json --output %t/profile.ctxprofdata +; +; In the given profile, in one of the contexts the indirect call is taken, the +; target we're trying to ICP - GUID:2000 - doesn't appear at all. That should +; contribute to the count of the "indirect call BB". +; RUN: opt %t/test.ll -S -passes='require,module-inline,ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata -ctx-prof-promote-alwaysinline + +; CHECK-LABEL: define i32 @caller(ptr %c) +; CHECK-NEXT: [[CND:[0-9]+]] = icmp eq ptr %c, @one +; CHECK-NEXT: br i1 [[CND]], label %{{.*}}, label %{{.*}}, !prof ![[BW:[0-9]+]] + +; CHECK: ![[BW]] = !{!"branch_weights", i32 10, i32 10} + +;--- test.ll +declare i32 @external(i32 %x) +define i32 @one() #0 !guid !0 { + call void @llvm.instrprof.increment(ptr @one, i64 123, i32 1, i32 0) + call void @llvm.instrprof.callsite(ptr @one, i64 123, i32 1, i32 0, ptr @external) + %ret = call i32 @external(i32 1) + ret i32 %ret +} + +define i32 @caller(ptr %c) #1 !guid !1 { + call void @llvm.instrprof.increment(ptr @caller, i64 567, i32 1, i32 0) + call void @llvm.instrprof.callsite(ptr @caller, i64 567, i32 1, i32 0, ptr %c) + %ret = call i32 %c() + ret i32 %ret +} + +define i32 @root(ptr %c) !guid !2 { + call void @llvm.instrprof.increment(ptr @root, i64 432, i32 1, i32 0) + call void @llvm.instrprof.callsite(ptr @root, i64 432, i32 2, i32 0, ptr @caller) + %a = call i32 @caller(ptr %c) + call void @llvm.instrprof.callsite(ptr @root, i64 432, i32 2, i32 1, ptr @caller) + %b = call i32 @caller(ptr %c) + %ret = add i32 %a, %b + ret i32 %ret + +} + +attributes #0 = { alwaysinline } +attributes #1 = { noinline } +!0 = !{i64 1000} +!1 = !{i64 3000} +!2 = !{i64 4000} + +;--- profile.json +[ { + "Guid": 4000, "Counters":[10], "Callsites": [ + [{"Guid":3000, "Counters":[10], "Callsites":[[{"Guid":1000, "Counters":[10]}]]}], + [{"Guid":3000, "Counters":[10], "Callsites":[[{"Guid":9000, "Counters":[10]}]]}] + ] +} +]