-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[MemProf] Attach value profile metadata to the IR using CalleeGuids. #141164
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
2858bad
da9b985
c31f858
cd8536a
e7dbf78
918d650
85ed751
ca1cc24
a6283e7
2310c53
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -177,6 +177,12 @@ static cl::opt<bool> | |
| cl::desc("Salvage stale MemProf profile"), | ||
| cl::init(false), cl::Hidden); | ||
|
|
||
| static cl::opt<bool> ClMemProfAttachCalleeGuids( | ||
| "memprof-attach-calleeguids", | ||
| cl::desc( | ||
| "Attach calleeguids as value profile metadata for indirect calls."), | ||
| cl::init(true), cl::Hidden); | ||
|
|
||
| extern cl::opt<bool> MemProfReportHintedSizes; | ||
| extern cl::opt<unsigned> MinClonedColdBytePercent; | ||
| extern cl::opt<unsigned> MinCallsiteColdBytePercent; | ||
|
|
@@ -964,6 +970,41 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps, | |
| UndriftCallStack(CS.Frames); | ||
| } | ||
|
|
||
| // Helper function to process CalleeGuids and create value profile metadata | ||
| static void addVPMetadata(Module &M, Instruction &I, | ||
| ArrayRef<GlobalValue::GUID> CalleeGuids) { | ||
| if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty()) | ||
| return; | ||
|
|
||
| if (I.getMetadata(LLVMContext::MD_prof)) { | ||
| uint64_t Unused; | ||
| auto ExistingVD = | ||
| getValueProfDataFromInst(I, IPVK_IndirectCallTarget, ~0U, Unused); | ||
| // We don't know how to merge value profile data yet. | ||
| if (!ExistingVD.empty()) { | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| SmallVector<InstrProfValueData, 4> VDs; | ||
| uint64_t TotalCount = 0; | ||
|
|
||
| for (const GlobalValue::GUID CalleeGUID : CalleeGuids) { | ||
| InstrProfValueData VD; | ||
| VD.Value = CalleeGUID; | ||
| // For MemProf, we don't have actual call counts, so we assign | ||
| // a weight of 1 to each potential target. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally this could be used to drive ICP too, but I'm not sure a weight of 1 will be hot enough. Should it either be a configurable value, or add a FIXME or TODO to adjust this to allow for ICP?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added TODO. |
||
| VD.Count = 1; | ||
| VDs.push_back(VD); | ||
| TotalCount += VD.Count; | ||
| } | ||
|
|
||
| if (!VDs.empty()) { | ||
| annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget, | ||
| VDs.size()); | ||
| } | ||
| } | ||
|
|
||
| static void | ||
| readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, | ||
| const TargetLibraryInfo &TLI, | ||
|
|
@@ -1031,15 +1072,32 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, | |
| // Build maps of the location hash to all profile data with that leaf location | ||
| // (allocation info and the callsites). | ||
| std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; | ||
| // A hash function for std::unordered_set<ArrayRef<Frame>> to work. | ||
| struct CallStackHash { | ||
| size_t operator()(ArrayRef<Frame> CS) const { | ||
| return computeFullStackId(CS); | ||
|
|
||
| // Helper struct for maintaining refs to callsite data. As an alternative we | ||
| // could store a pointer to the CallSiteInfo struct but we also need the frame | ||
| // index. Using ArrayRefs instead makes it a little easier to read. | ||
| struct CallSiteEntry { | ||
| // Subset of frames for the corresponding CallSiteInfo. | ||
| ArrayRef<Frame> Frames; | ||
| // Potential targets for indirect calls. | ||
| ArrayRef<GlobalValue::GUID> CalleeGuids; | ||
|
|
||
| // Only compare Frame contents. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe comment why you aren't using ArrayRef operator==. I guess that does elementwise equality and here you are looking for the underlying pointer being the same?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, added comment. It's also faster this way. |
||
| bool operator==(const CallSiteEntry &Other) const { | ||
| return Frames.data() == Other.Frames.data() && | ||
| Frames.size() == Other.Frames.size(); | ||
| } | ||
| }; | ||
|
|
||
| struct CallSiteEntryHash { | ||
| size_t operator()(const CallSiteEntry &Entry) const { | ||
| return computeFullStackId(Entry.Frames); | ||
| } | ||
| }; | ||
|
|
||
| // For the callsites we need to record slices of the frame array (see comments | ||
| // below where the map entries are added). | ||
| std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>> | ||
| // below where the map entries are added) along with their CalleeGuids. | ||
| std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>> | ||
| LocHashToCallSites; | ||
| for (auto &AI : MemProfRec->AllocSites) { | ||
| NumOfMemProfAllocContextProfiles++; | ||
|
|
@@ -1057,8 +1115,10 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, | |
| unsigned Idx = 0; | ||
| for (auto &StackFrame : CS.Frames) { | ||
| uint64_t StackId = computeStackId(StackFrame); | ||
| LocHashToCallSites[StackId].insert( | ||
| ArrayRef<Frame>(CS.Frames).drop_front(Idx++)); | ||
| ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++); | ||
| ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids); | ||
| LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids}); | ||
|
|
||
| ProfileHasColumns |= StackFrame.Column; | ||
| // Once we find this function, we can stop recording. | ||
| if (StackFrame.Function == FuncGUID) | ||
|
|
@@ -1202,13 +1262,19 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, | |
| // Otherwise, add callsite metadata. If we reach here then we found the | ||
| // instruction's leaf location in the callsites map and not the allocation | ||
| // map. | ||
| for (auto CallStackIdx : CallSitesIter->second) { | ||
| for (const auto &CallSiteEntry : CallSitesIter->second) { | ||
| // If we found and thus matched all frames on the call, create and | ||
| // attach call stack metadata. | ||
| if (stackFrameIncludesInlinedCallStack(CallStackIdx, | ||
| if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames, | ||
| InlinedCallStack)) { | ||
| NumOfMemProfMatchedCallSites++; | ||
| addCallsiteMetadata(I, InlinedCallStack, Ctx); | ||
|
|
||
| // Try to attach indirect call metadata if possible. | ||
| if (!CalledFunction) { | ||
|
||
| addVPMetadata(M, I, CallSiteEntry.CalleeGuids); | ||
| } | ||
|
|
||
| // Only need to find one with a matching call stack and add a single | ||
| // callsite metadata. | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| ; RUN: split-file %s %t | ||
|
|
||
| ; COM: Basic functionality with flag toggle | ||
|
||
| ; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata | ||
| ; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE | ||
| ; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE | ||
|
|
||
| ; COM: FDO conflict handling | ||
| ; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata | ||
| ; RUN: opt < %t/fdo_conflict.ll -passes='memprof-use<profile-filename=%t/fdo_conflict.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-FDO | ||
|
|
||
| ;--- basic.yaml | ||
| --- | ||
| HeapProfileRecords: | ||
| - GUID: _Z3barv | ||
| AllocSites: [] | ||
| CallSites: | ||
| - Frames: | ||
| - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false } | ||
| CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01] | ||
| ... | ||
|
|
||
| ;--- basic.ll | ||
| define dso_local void @_Z3barv() !dbg !4 { | ||
| entry: | ||
| %fp = alloca ptr, align 8 | ||
| %0 = load ptr, ptr %fp, align 8 | ||
| call void %0(), !dbg !5 | ||
| ; CHECK-ENABLE: call void %0(), {{.*}} !prof !6 | ||
| ; CHECK-DISABLE-NOT: !prof | ||
| ret void | ||
| } | ||
|
|
||
| ; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1} | ||
|
|
||
| !llvm.module.flags = !{!2, !3} | ||
|
|
||
| !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1) | ||
| !1 = !DIFile(filename: "t", directory: "/") | ||
| !2 = !{i32 7, !"Dwarf Version", i32 5} | ||
| !3 = !{i32 2, !"Debug Info Version", i32 3} | ||
| !4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0) | ||
| !5 = !DILocation(line: 4, column: 5, scope: !4) | ||
|
|
||
| ;--- fdo_conflict.yaml | ||
| --- | ||
| HeapProfileRecords: | ||
| - GUID: _Z3foov | ||
| AllocSites: [] | ||
| CallSites: | ||
| - Frames: | ||
| - { Function: _Z3foov, LineOffset: 3, Column: 5, IsInlineFrame: false } | ||
| CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01] | ||
| - Frames: | ||
| - { Function: _Z3foov, LineOffset: 5, Column: 5, IsInlineFrame: false } | ||
| CalleeGuids: [0x555556789abcdef0, 0x666656789abcdef1] | ||
| ... | ||
|
|
||
| ;--- fdo_conflict.ll | ||
| define dso_local void @_Z3foov() !dbg !14 { | ||
| entry: | ||
| %fp = alloca ptr, align 8 | ||
| %0 = load ptr, ptr %fp, align 8 | ||
| ; This call already has FDO value profile metadata - should NOT be modified | ||
| ; CHECK-FDO: call void %0(), {{.*}} !prof !6 | ||
| ; CHECK-FDO-NOT: call void %0(), {{.*}} !prof !9 | ||
|
||
| call void %0(), !dbg !15, !prof !16 | ||
|
|
||
| %1 = load ptr, ptr %fp, align 8 | ||
| ; This call does NOT have existing metadata - should get MemProf annotation | ||
| ; CHECK-FDO: call void %1(), {{.*}} !prof !9 | ||
| call void %1(), !dbg !17 | ||
| ret void | ||
| } | ||
|
|
||
| !16 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20} | ||
|
|
||
| ; CHECK-FDO: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20} | ||
| ; CHECK-FDO: !9 = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1} | ||
|
|
||
| !llvm.module.flags = !{!12, !13} | ||
|
|
||
| !10 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !11) | ||
| !11 = !DIFile(filename: "t", directory: "/") | ||
| !12 = !{i32 7, !"Dwarf Version", i32 5} | ||
| !13 = !{i32 2, !"Debug Info Version", i32 3} | ||
| !14 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !11, file: !11, line: 1, unit: !10) | ||
| !15 = !DILocation(line: 4, column: 5, scope: !14) | ||
| !17 = !DILocation(line: 6, column: 5, scope: !14) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Document constant parameter
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.