diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index 31e69784262da..615801f653d96 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -504,59 +504,59 @@ struct CallSiteEntry {
ArrayRef Frames;
// Potential targets for indirect calls.
ArrayRef CalleeGuids;
-
- // Only compare Frame contents.
- // Use pointer-based equality instead of ArrayRef's operator== which does
- // element-wise comparison. We want to check if it's the same slice of the
- // underlying array, not just equivalent content.
- bool operator==(const CallSiteEntry &Other) const {
- return Frames.data() == Other.Frames.data() &&
- Frames.size() == Other.Frames.size();
- }
};
-struct CallSiteEntryHash {
- size_t operator()(const CallSiteEntry &Entry) const {
- return computeFullStackId(Entry.Frames);
- }
-};
-
-static void handleCallSite(
- Instruction &I, const Function *CalledFunction,
- ArrayRef InlinedCallStack,
- const std::unordered_set &CallSiteEntries,
- Module &M, std::set> &MatchedCallSites,
- OptimizationRemarkEmitter &ORE) {
+static void handleCallSite(Instruction &I, const Function *CalledFunction,
+ ArrayRef InlinedCallStack,
+ const std::vector &CallSiteEntries,
+ Module &M,
+ std::set> &MatchedCallSites,
+ OptimizationRemarkEmitter &ORE) {
auto &Ctx = M.getContext();
+ // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them
+ // to support cases where the instuction's inlined frames match multiple call
+ // site entries, which can happen if the profile was collected from a binary
+ // where this instruction was eventually inlined into multiple callers.
+ SetVector CalleeGuids;
+ bool CallsiteMDAdded = false;
for (const auto &CallSiteEntry : CallSiteEntries) {
// If we found and thus matched all frames on the call, create and
// attach call stack metadata.
if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames,
InlinedCallStack)) {
NumOfMemProfMatchedCallSites++;
- addCallsiteMetadata(I, InlinedCallStack, Ctx);
-
- // Try to attach indirect call metadata if possible.
- if (!CalledFunction)
- addVPMetadata(M, I, CallSiteEntry.CalleeGuids);
-
// Only need to find one with a matching call stack and add a single
// callsite metadata.
-
- // Accumulate call site matching information upon request.
- if (ClPrintMemProfMatchInfo) {
- std::vector CallStack;
- append_range(CallStack, InlinedCallStack);
- MatchedCallSites.insert(std::move(CallStack));
+ if (!CallsiteMDAdded) {
+ addCallsiteMetadata(I, InlinedCallStack, Ctx);
+
+ // Accumulate call site matching information upon request.
+ if (ClPrintMemProfMatchInfo) {
+ std::vector CallStack;
+ append_range(CallStack, InlinedCallStack);
+ MatchedCallSites.insert(std::move(CallStack));
+ }
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I)
+ << ore::NV("CallSite", &I) << " in function "
+ << ore::NV("Caller", I.getFunction())
+ << " matched callsite with frame count "
+ << ore::NV("Frames", InlinedCallStack.size()));
+
+ // If this is a direct call, we're done.
+ if (CalledFunction)
+ break;
+ CallsiteMDAdded = true;
}
- ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I)
- << ore::NV("CallSite", &I) << " in function "
- << ore::NV("Caller", I.getFunction())
- << " matched callsite with frame count "
- << ore::NV("Frames", InlinedCallStack.size()));
- break;
+
+ assert(!CalledFunction && "Didn't expect direct call");
+
+ // Collect Callee GUIDs from all matching CallSiteEntries.
+ CalleeGuids.insert(CallSiteEntry.CalleeGuids.begin(),
+ CallSiteEntry.CalleeGuids.end());
}
}
+ // Try to attach indirect call metadata if possible.
+ addVPMetadata(M, I, CalleeGuids.getArrayRef());
}
static void readMemprof(Module &M, Function &F,
@@ -631,8 +631,7 @@ static void readMemprof(Module &M, Function &F,
// For the callsites we need to record slices of the frame array (see comments
// below where the map entries are added) along with their CalleeGuids.
- std::map>
- LocHashToCallSites;
+ std::map> LocHashToCallSites;
for (auto &AI : MemProfRec->AllocSites) {
NumOfMemProfAllocContextProfiles++;
// Associate the allocation info with the leaf frame. The later matching
@@ -651,7 +650,7 @@ static void readMemprof(Module &M, Function &F,
uint64_t StackId = computeStackId(StackFrame);
ArrayRef FrameSlice = ArrayRef(CS.Frames).drop_front(Idx++);
ArrayRef CalleeGuids(CS.CalleeGuids);
- LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
+ LocHashToCallSites[StackId].push_back({FrameSlice, CalleeGuids});
ProfileHasColumns |= StackFrame.Column;
// Once we find this function, we can stop recording.
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
index ad83da285694a..ac7dc77d85b3f 100644
--- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
@@ -3,7 +3,7 @@
;; Basic functionality with flag toggle
; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata
; RUN: opt < %t/basic.ll -passes='memprof-use' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
-; RUN: opt < %t/basic.ll -passes='memprof-use' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
+; RUN: opt < %t/basic.ll -passes='memprof-use' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE --dump-input-filter=all
;; FDO conflict handling
; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata
@@ -18,6 +18,18 @@ HeapProfileRecords:
- Frames:
- { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01]
+ # The next 2 sets of frames simulates the case where this function was
+ # eventually inlined into multiple callers. We would have propagated the
+ # resulting frames and callee guids here for matching with they not yet
+ # inlined bar. We should aggregate all callee guids into the metadata.
+ - Frames:
+ - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: true }
+ - { Function: _Z3foov, LineOffset: 1, Column: 6, IsInlineFrame: false }
+ CalleeGuids: [0x1234, 0x2345]
+ - Frames:
+ - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: true }
+ - { Function: _Z3foov, LineOffset: 10, Column: 7, IsInlineFrame: false }
+ CalleeGuids: [0x3456, 0x4567]
...
;--- basic.ll
@@ -31,7 +43,7 @@ entry:
ret void
}
-; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
+; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 6, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1, i64 4660, i64 1, i64 9029, i64 1, i64 13398, i64 1, i64 17767, i64 1}
!llvm.module.flags = !{!2, !3}