-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[memprof] Teach extractCallsFromIR to look into inline stacks #115441
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[memprof] Teach extractCallsFromIR to look into inline stacks #115441
Conversation
To undrift the profile, we need to extract as many caller-callee pairs from the IR as we can to maximize the number of call sites in the profile we can undrift. Now, since MemProfUsePass runs after early inlining, some functions have been inlined, and we may no longer have bodies for those functions in the IR. To cope with this, this patch teaches extractCallsFromIR to extract caller-calee pairs from inline stacks. The output format of extractCallsFromIR remains the same. We still return a map from caller GUIDs to lists of corresponding call sites.
|
@llvm/pr-subscribers-llvm-transforms Author: Kazu Hirata (kazutakahirata) ChangesTo undrift the profile, we need to extract as many caller-callee pairs Now, since MemProfUsePass runs after early inlining, some functions The output format of extractCallsFromIR remains the same. We still Full diff: https://github.com/llvm/llvm-project/pull/115441.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 0b4d3ff201e622..afcde4a479d134 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -824,11 +824,22 @@ memprof::extractCallsFromIR(Module &M) {
continue;
StringRef CalleeName = CalledFunction->getName();
- uint64_t CallerGUID =
- IndexedMemProfRecord::getGUID(DIL->getSubprogramLinkageName());
- uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
- LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
- Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
+ if (DIL->getInlinedAt()) {
+ for (; DIL; DIL = DIL->getInlinedAt()) {
+ uint64_t CallerGUID =
+ IndexedMemProfRecord::getGUID(DIL->getSubprogramLinkageName());
+ uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
+ LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
+ Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
+ CalleeName = DIL->getSubprogramLinkageName();
+ }
+ } else {
+ uint64_t CallerGUID =
+ IndexedMemProfRecord::getGUID(DIL->getSubprogramLinkageName());
+ uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
+ LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
+ Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
+ }
}
}
}
diff --git a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
index a510a57099aba4..50ef151923e992 100644
--- a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
+++ b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
@@ -101,4 +101,116 @@ declare !dbg !19 void @_Z2f3v()
EXPECT_THAT(CallSites[2],
Pair(FieldsAre(2U, 9U), IndexedMemProfRecord::getGUID("_Z2f3v")));
}
+
+TEST(MemProf, ExtractDirectCallsFromIRInline) {
+ // The following IR is generated from:
+ //
+ // void f1();
+ // static inline void f2() { f1(); }
+ // static inline void f3() { f2(); }
+ //
+ // void g1();
+ // void g2();
+ // static inline void g3() { g1(); g2(); }
+ //
+ // void foo() {
+ // f3();
+ // g3();
+ // }
+ StringRef IR = R"IR(
+define dso_local void @_Z3foov() local_unnamed_addr !dbg !10 {
+entry:
+ tail call void @_Z2f1v(), !dbg !13
+ tail call void @_Z2g1v(), !dbg !18
+ tail call void @_Z2g2v(), !dbg !21
+ ret void, !dbg !23
+}
+
+declare !dbg !24 void @_Z2f1v() local_unnamed_addr
+
+declare !dbg !25 void @_Z2g1v() local_unnamed_addr
+
+declare !dbg !26 void @_Z2g2v() local_unnamed_addr
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "foobar.cc", directory: "/")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 1, !"MemProfProfileFilename", !"memprof.profraw"}
+!6 = !{i32 8, !"PIC Level", i32 2}
+!7 = !{i32 7, !"PIE Level", i32 2}
+!8 = !{i32 7, !"uwtable", i32 2}
+!9 = !{!"clang"}
+!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 9, type: !11, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!11 = !DISubroutineType(types: !12)
+!12 = !{}
+!13 = !DILocation(line: 2, column: 27, scope: !14, inlinedAt: !15)
+!14 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!15 = distinct !DILocation(line: 3, column: 27, scope: !16, inlinedAt: !17)
+!16 = distinct !DISubprogram(name: "f3", linkageName: "_ZL2f3v", scope: !1, file: !1, line: 3, type: !11, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!17 = distinct !DILocation(line: 10, column: 3, scope: !10)
+!18 = !DILocation(line: 7, column: 27, scope: !19, inlinedAt: !20)
+!19 = distinct !DISubprogram(name: "g3", linkageName: "_ZL2g3v", scope: !1, file: !1, line: 7, type: !11, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!20 = distinct !DILocation(line: 11, column: 3, scope: !10)
+!21 = !DILocation(line: 7, column: 33, scope: !22, inlinedAt: !20)
+!22 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 2)
+!23 = !DILocation(line: 12, column: 1, scope: !10)
+!24 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !1, file: !1, line: 1, type: !11, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+!25 = !DISubprogram(name: "g1", linkageName: "_Z2g1v", scope: !1, file: !1, line: 5, type: !11, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+!26 = !DISubprogram(name: "g2", linkageName: "_Z2g2v", scope: !1, file: !1, line: 6, type: !11, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+)IR";
+
+ LLVMContext Ctx;
+ SMDiagnostic Err;
+ std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Ctx);
+ ASSERT_TRUE(M);
+
+ auto Calls = extractCallsFromIR(*M);
+
+ // Expect exactly 4 callers.
+ ASSERT_THAT(Calls, SizeIs(4));
+
+ // Verify each key-value pair.
+
+ auto FooIt = Calls.find(IndexedMemProfRecord::getGUID("_Z3foov"));
+ ASSERT_NE(FooIt, Calls.end());
+ const auto &[FooCallerGUID, FooCallSites] = *FooIt;
+ EXPECT_EQ(FooCallerGUID, IndexedMemProfRecord::getGUID("_Z3foov"));
+ ASSERT_THAT(FooCallSites, SizeIs(2));
+ EXPECT_THAT(FooCallSites[0], Pair(FieldsAre(1U, 3U),
+ IndexedMemProfRecord::getGUID("_ZL2f3v")));
+ EXPECT_THAT(FooCallSites[1], Pair(FieldsAre(2U, 3U),
+ IndexedMemProfRecord::getGUID("_ZL2g3v")));
+
+ auto F2It = Calls.find(IndexedMemProfRecord::getGUID("_ZL2f2v"));
+ ASSERT_NE(F2It, Calls.end());
+ const auto &[F2CallerGUID, F2CallSites] = *F2It;
+ EXPECT_EQ(F2CallerGUID, IndexedMemProfRecord::getGUID("_ZL2f2v"));
+ ASSERT_THAT(F2CallSites, SizeIs(1));
+ EXPECT_THAT(F2CallSites[0], Pair(FieldsAre(0U, 27U),
+ IndexedMemProfRecord::getGUID("_Z2f1v")));
+
+ auto F3It = Calls.find(IndexedMemProfRecord::getGUID("_ZL2f3v"));
+ ASSERT_NE(F3It, Calls.end());
+ const auto &[F3CallerGUID, F3CallSites] = *F3It;
+ EXPECT_EQ(F3CallerGUID, IndexedMemProfRecord::getGUID("_ZL2f3v"));
+ ASSERT_THAT(F3CallSites, SizeIs(1));
+ EXPECT_THAT(F3CallSites[0], Pair(FieldsAre(0U, 27U),
+ IndexedMemProfRecord::getGUID("_ZL2f2v")));
+
+ auto G3It = Calls.find(IndexedMemProfRecord::getGUID("_ZL2g3v"));
+ ASSERT_NE(G3It, Calls.end());
+ const auto &[G3CallerGUID, G3CallSites] = *G3It;
+ EXPECT_EQ(G3CallerGUID, IndexedMemProfRecord::getGUID("_ZL2g3v"));
+ ASSERT_THAT(G3CallSites, SizeIs(2));
+ EXPECT_THAT(G3CallSites[0], Pair(FieldsAre(0U, 27U),
+ IndexedMemProfRecord::getGUID("_Z2g1v")));
+ EXPECT_THAT(G3CallSites[1], Pair(FieldsAre(0U, 33U),
+ IndexedMemProfRecord::getGUID("_Z2g2v")));
+}
} // namespace
|
Make the line/column numbers in the test more interesting.
kazutakahirata
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've revised the patch. Please take a look. Thanks!
|
Revised the patch. Please take a look. Thanks! |
snehasish
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
…15441) To undrift the profile, we need to extract as many caller-callee pairs from the IR as we can to maximize the number of call sites in the profile we can undrift. Now, since MemProfUsePass runs after early inlining, some functions have been inlined, and we may no longer have bodies for those functions in the IR. To cope with this, this patch teaches extractCallsFromIR to extract caller-calee pairs from inline stacks. The output format of extractCallsFromIR remains the same. We still return a map from caller GUIDs to lists of corresponding call sites.
To undrift the profile, we need to extract as many caller-callee pairs
from the IR as we can to maximize the number of call sites in the
profile we can undrift.
Now, since MemProfUsePass runs after early inlining, some functions
have been inlined, and we may no longer have bodies for those
functions in the IR. To cope with this, this patch teaches
extractCallsFromIR to extract caller-calee pairs from inline stacks.
The output format of extractCallsFromIR remains the same. We still
return a map from caller GUIDs to lists of corresponding call sites.