diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index 31e69784262da..615801f653d96 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -504,59 +504,59 @@ struct CallSiteEntry { ArrayRef Frames; // Potential targets for indirect calls. ArrayRef CalleeGuids; - - // Only compare Frame contents. - // Use pointer-based equality instead of ArrayRef's operator== which does - // element-wise comparison. We want to check if it's the same slice of the - // underlying array, not just equivalent content. - bool operator==(const CallSiteEntry &Other) const { - return Frames.data() == Other.Frames.data() && - Frames.size() == Other.Frames.size(); - } }; -struct CallSiteEntryHash { - size_t operator()(const CallSiteEntry &Entry) const { - return computeFullStackId(Entry.Frames); - } -}; - -static void handleCallSite( - Instruction &I, const Function *CalledFunction, - ArrayRef InlinedCallStack, - const std::unordered_set &CallSiteEntries, - Module &M, std::set> &MatchedCallSites, - OptimizationRemarkEmitter &ORE) { +static void handleCallSite(Instruction &I, const Function *CalledFunction, + ArrayRef InlinedCallStack, + const std::vector &CallSiteEntries, + Module &M, + std::set> &MatchedCallSites, + OptimizationRemarkEmitter &ORE) { auto &Ctx = M.getContext(); + // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them + // to support cases where the instuction's inlined frames match multiple call + // site entries, which can happen if the profile was collected from a binary + // where this instruction was eventually inlined into multiple callers. + SetVector CalleeGuids; + bool CallsiteMDAdded = false; for (const auto &CallSiteEntry : CallSiteEntries) { // If we found and thus matched all frames on the call, create and // attach call stack metadata. if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames, InlinedCallStack)) { NumOfMemProfMatchedCallSites++; - addCallsiteMetadata(I, InlinedCallStack, Ctx); - - // Try to attach indirect call metadata if possible. - if (!CalledFunction) - addVPMetadata(M, I, CallSiteEntry.CalleeGuids); - // Only need to find one with a matching call stack and add a single // callsite metadata. - - // Accumulate call site matching information upon request. - if (ClPrintMemProfMatchInfo) { - std::vector CallStack; - append_range(CallStack, InlinedCallStack); - MatchedCallSites.insert(std::move(CallStack)); + if (!CallsiteMDAdded) { + addCallsiteMetadata(I, InlinedCallStack, Ctx); + + // Accumulate call site matching information upon request. + if (ClPrintMemProfMatchInfo) { + std::vector CallStack; + append_range(CallStack, InlinedCallStack); + MatchedCallSites.insert(std::move(CallStack)); + } + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I) + << ore::NV("CallSite", &I) << " in function " + << ore::NV("Caller", I.getFunction()) + << " matched callsite with frame count " + << ore::NV("Frames", InlinedCallStack.size())); + + // If this is a direct call, we're done. + if (CalledFunction) + break; + CallsiteMDAdded = true; } - ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I) - << ore::NV("CallSite", &I) << " in function " - << ore::NV("Caller", I.getFunction()) - << " matched callsite with frame count " - << ore::NV("Frames", InlinedCallStack.size())); - break; + + assert(!CalledFunction && "Didn't expect direct call"); + + // Collect Callee GUIDs from all matching CallSiteEntries. + CalleeGuids.insert(CallSiteEntry.CalleeGuids.begin(), + CallSiteEntry.CalleeGuids.end()); } } + // Try to attach indirect call metadata if possible. + addVPMetadata(M, I, CalleeGuids.getArrayRef()); } static void readMemprof(Module &M, Function &F, @@ -631,8 +631,7 @@ static void readMemprof(Module &M, Function &F, // For the callsites we need to record slices of the frame array (see comments // below where the map entries are added) along with their CalleeGuids. - std::map> - LocHashToCallSites; + std::map> LocHashToCallSites; for (auto &AI : MemProfRec->AllocSites) { NumOfMemProfAllocContextProfiles++; // Associate the allocation info with the leaf frame. The later matching @@ -651,7 +650,7 @@ static void readMemprof(Module &M, Function &F, uint64_t StackId = computeStackId(StackFrame); ArrayRef FrameSlice = ArrayRef(CS.Frames).drop_front(Idx++); ArrayRef CalleeGuids(CS.CalleeGuids); - LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids}); + LocHashToCallSites[StackId].push_back({FrameSlice, CalleeGuids}); ProfileHasColumns |= StackFrame.Column; // Once we find this function, we can stop recording. diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test index ad83da285694a..ac7dc77d85b3f 100644 --- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test +++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test @@ -3,7 +3,7 @@ ;; Basic functionality with flag toggle ; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata ; RUN: opt < %t/basic.ll -passes='memprof-use' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE -; RUN: opt < %t/basic.ll -passes='memprof-use' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE +; RUN: opt < %t/basic.ll -passes='memprof-use' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE --dump-input-filter=all ;; FDO conflict handling ; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata @@ -18,6 +18,18 @@ HeapProfileRecords: - Frames: - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false } CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01] + # The next 2 sets of frames simulates the case where this function was + # eventually inlined into multiple callers. We would have propagated the + # resulting frames and callee guids here for matching with they not yet + # inlined bar. We should aggregate all callee guids into the metadata. + - Frames: + - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: true } + - { Function: _Z3foov, LineOffset: 1, Column: 6, IsInlineFrame: false } + CalleeGuids: [0x1234, 0x2345] + - Frames: + - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: true } + - { Function: _Z3foov, LineOffset: 10, Column: 7, IsInlineFrame: false } + CalleeGuids: [0x3456, 0x4567] ... ;--- basic.ll @@ -31,7 +43,7 @@ entry: ret void } -; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1} +; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 6, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1, i64 4660, i64 1, i64 9029, i64 1, i64 13398, i64 1, i64 17767, i64 1} !llvm.module.flags = !{!2, !3}