@@ -177,6 +177,16 @@ static cl::opt<bool> ProfileTopDownLoad(
177
177
" order of call graph during sample profile loading. It only "
178
178
" works for new pass manager. " ));
179
179
180
+ static cl::opt<bool > UseProfileIndirectCallEdges (
181
+ " use-profile-indirect-call-edges" , cl::init(true ), cl::Hidden,
182
+ cl::desc(" Considering indirect call samples from profile when top-down "
183
+ " processing functions. Only CSSPGO is supported." ));
184
+
185
+ static cl::opt<bool > UseProfileTopDownOrder (
186
+ " use-profile-top-down-order" , cl::init(false ), cl::Hidden,
187
+ cl::desc(" Process functions in one SCC in a top-down order "
188
+ " based on the input profile." ));
189
+
180
190
static cl::opt<bool > ProfileSizeInline (
181
191
" sample-profile-inline-size" , cl::Hidden, cl::init(false ),
182
192
cl::desc(" Inline cold call sites in profile loader if it's beneficial "
@@ -458,6 +468,8 @@ class SampleProfileLoader {
458
468
uint64_t visitEdge (Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
459
469
void buildEdges (Function &F);
460
470
std::vector<Function *> buildFunctionOrder (Module &M, CallGraph *CG);
471
+ void addCallGraphEdges (CallGraph &CG, const FunctionSamples &Samples);
472
+ void replaceCallGraphEdges (CallGraph &CG, StringMap<Function *> &SymbolMap);
461
473
bool propagateThroughEdges (Function &F, bool UpdateBlockCount);
462
474
void computeDominanceAndLoopInfo (Function &F);
463
475
void clearFunctionData ();
@@ -2278,6 +2290,45 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
2278
2290
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, " sample-profile" ,
2279
2291
" Sample Profile loader" , false , false )
2280
2292
2293
+ // Add inlined profile call edges to the call graph.
2294
+ void SampleProfileLoader::addCallGraphEdges(CallGraph &CG,
2295
+ const FunctionSamples &Samples) {
2296
+ Function *Caller = SymbolMap.lookup (Samples.getFuncName ());
2297
+ if (!Caller || Caller->isDeclaration ())
2298
+ return ;
2299
+
2300
+ // Skip non-inlined call edges which are not important since top down inlining
2301
+ // for non-CS profile is to get more precise profile matching, not to enable
2302
+ // more inlining.
2303
+
2304
+ for (const auto &CallsiteSamples : Samples.getCallsiteSamples ()) {
2305
+ for (const auto &InlinedSamples : CallsiteSamples.second ) {
2306
+ Function *Callee = SymbolMap.lookup (InlinedSamples.first );
2307
+ if (Callee && !Callee->isDeclaration ())
2308
+ CG[Caller]->addCalledFunction (nullptr , CG[Callee]);
2309
+ addCallGraphEdges (CG, InlinedSamples.second );
2310
+ }
2311
+ }
2312
+ }
2313
+
2314
+ // Replace call graph edges with dynamic call edges from the profile.
2315
+ void SampleProfileLoader::replaceCallGraphEdges (
2316
+ CallGraph &CG, StringMap<Function *> &SymbolMap) {
2317
+ // Remove static call edges from the call graph except for the ones from the
2318
+ // root which make the call graph connected.
2319
+ for (const auto &Node : CG)
2320
+ if (Node.second .get () != CG.getExternalCallingNode ())
2321
+ Node.second ->removeAllCalledFunctions ();
2322
+
2323
+ // Add profile call edges to the call graph.
2324
+ if (ProfileIsCS) {
2325
+ ContextTracker->addCallGraphEdges (CG, SymbolMap);
2326
+ } else {
2327
+ for (const auto &Samples : Reader->getProfiles ())
2328
+ addCallGraphEdges (CG, Samples.second );
2329
+ }
2330
+ }
2331
+
2281
2332
std::vector<Function *>
2282
2333
SampleProfileLoader::buildFunctionOrder (Module &M, CallGraph *CG) {
2283
2334
std::vector<Function *> FunctionOrderList;
@@ -2300,16 +2351,97 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
2300
2351
}
2301
2352
2302
2353
assert (&CG->getModule () == &M);
2354
+
2355
+ // Add indirect call edges from profile to augment the static call graph.
2356
+ // Functions will be processed in a top-down order defined by the static call
2357
+ // graph. Adjusting the order by considering indirect call edges from the
2358
+ // profile (which don't exist in the static call graph) can enable the
2359
+ // inlining of indirect call targets by processing the caller before them.
2360
+ // TODO: enable this for non-CS profile and fix the counts returning logic to
2361
+ // have a full support for indirect calls.
2362
+ if (UseProfileIndirectCallEdges && ProfileIsCS) {
2363
+ for (auto &Entry : *CG) {
2364
+ const auto *F = Entry.first ;
2365
+ if (!F || F->isDeclaration () || !F->hasFnAttribute (" use-sample-profile" ))
2366
+ continue ;
2367
+ auto &AllContexts = ContextTracker->getAllContextSamplesFor (F->getName ());
2368
+ if (AllContexts.empty ())
2369
+ continue ;
2370
+
2371
+ for (const auto &BB : *F) {
2372
+ for (const auto &I : BB.getInstList ()) {
2373
+ const auto *CB = dyn_cast<CallBase>(&I);
2374
+ if (!CB || !CB->isIndirectCall ())
2375
+ continue ;
2376
+ const DebugLoc &DLoc = I.getDebugLoc ();
2377
+ if (!DLoc)
2378
+ continue ;
2379
+ auto CallSite = FunctionSamples::getCallSiteIdentifier (DLoc);
2380
+ for (FunctionSamples *Samples : AllContexts) {
2381
+ if (auto CallTargets = Samples->findCallTargetMapAt (CallSite)) {
2382
+ for (const auto &Target : CallTargets.get ()) {
2383
+ Function *Callee = SymbolMap.lookup (Target.first ());
2384
+ if (Callee && !Callee->isDeclaration ())
2385
+ Entry.second ->addCalledFunction (nullptr , (*CG)[Callee]);
2386
+ }
2387
+ }
2388
+ }
2389
+ }
2390
+ }
2391
+ }
2392
+ }
2393
+
2394
+ // Compute a top-down order the profile which is used to sort functions in
2395
+ // one SCC later. The static processing order computed for an SCC may not
2396
+ // reflect the call contexts in the context-sensitive profile, thus may cause
2397
+ // potential inlining to be overlooked. The function order in one SCC is being
2398
+ // adjusted to a top-down order based on the profile to favor more inlining.
2399
+ DenseMap<Function *, uint64_t > ProfileOrderMap;
2400
+ if (UseProfileTopDownOrder ||
2401
+ (ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences ())) {
2402
+ // Create a static call graph. The call edges are not important since they
2403
+ // will be replaced by dynamic edges from the profile.
2404
+ CallGraph ProfileCG (M);
2405
+ replaceCallGraphEdges (ProfileCG, SymbolMap);
2406
+ scc_iterator<CallGraph *> CGI = scc_begin (&ProfileCG);
2407
+ uint64_t I = 0 ;
2408
+ while (!CGI.isAtEnd ()) {
2409
+ for (CallGraphNode *Node : *CGI) {
2410
+ if (auto *F = Node->getFunction ())
2411
+ ProfileOrderMap[F] = ++I;
2412
+ }
2413
+ ++CGI;
2414
+ }
2415
+ }
2416
+
2303
2417
scc_iterator<CallGraph *> CGI = scc_begin (CG);
2304
2418
while (!CGI.isAtEnd ()) {
2305
- for (CallGraphNode *node : *CGI) {
2306
- auto F = node->getFunction ();
2419
+ uint64_t Start = FunctionOrderList.size ();
2420
+ for (CallGraphNode *Node : *CGI) {
2421
+ auto *F = Node->getFunction ();
2307
2422
if (F && !F->isDeclaration () && F->hasFnAttribute (" use-sample-profile" ))
2308
2423
FunctionOrderList.push_back (F);
2309
2424
}
2425
+
2426
+ // Sort nodes in SCC based on the profile top-down order.
2427
+ if (!ProfileOrderMap.empty ()) {
2428
+ std::stable_sort (FunctionOrderList.begin () + Start,
2429
+ FunctionOrderList.end (),
2430
+ [&ProfileOrderMap](Function *Left, Function *Right) {
2431
+ return ProfileOrderMap[Left] < ProfileOrderMap[Right];
2432
+ });
2433
+ }
2434
+
2310
2435
++CGI;
2311
2436
}
2312
2437
2438
+ LLVM_DEBUG ({
2439
+ dbgs () << " Function processing order:\n " ;
2440
+ for (auto F : reverse (FunctionOrderList)) {
2441
+ dbgs () << F->getName () << " \n " ;
2442
+ }
2443
+ });
2444
+
2313
2445
std::reverse (FunctionOrderList.begin (), FunctionOrderList.end ());
2314
2446
return FunctionOrderList;
2315
2447
}
@@ -2461,6 +2593,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
2461
2593
}
2462
2594
2463
2595
bool SampleProfileLoader::runOnFunction (Function &F, ModuleAnalysisManager *AM) {
2596
+ LLVM_DEBUG (dbgs () << " \n\n Processing Function " << F.getName () << " \n " );
2464
2597
DILocation2SampleMap.clear ();
2465
2598
// By default the entry count is initialized to -1, which will be treated
2466
2599
// conservatively by getEntryCount as the same as unknown (None). This is
0 commit comments