@@ -509,59 +509,59 @@ struct CallSiteEntry {
509509 ArrayRef<Frame> Frames;
510510 // Potential targets for indirect calls.
511511 ArrayRef<GlobalValue::GUID> CalleeGuids;
512-
513- // Only compare Frame contents.
514- // Use pointer-based equality instead of ArrayRef's operator== which does
515- // element-wise comparison. We want to check if it's the same slice of the
516- // underlying array, not just equivalent content.
517- bool operator ==(const CallSiteEntry &Other) const {
518- return Frames.data () == Other.Frames .data () &&
519- Frames.size () == Other.Frames .size ();
520- }
521512};
522513
523- struct CallSiteEntryHash {
524- size_t operator ()(const CallSiteEntry &Entry) const {
525- return computeFullStackId (Entry.Frames );
526- }
527- };
528-
529- static void handleCallSite (
530- Instruction &I, const Function *CalledFunction,
531- ArrayRef<uint64_t > InlinedCallStack,
532- const std::unordered_set<CallSiteEntry, CallSiteEntryHash> &CallSiteEntries,
533- Module &M, std::set<std::vector<uint64_t >> &MatchedCallSites,
534- OptimizationRemarkEmitter &ORE) {
514+ static void handleCallSite (Instruction &I, const Function *CalledFunction,
515+ ArrayRef<uint64_t > InlinedCallStack,
516+ const std::vector<CallSiteEntry> &CallSiteEntries,
517+ Module &M,
518+ std::set<std::vector<uint64_t >> &MatchedCallSites,
519+ OptimizationRemarkEmitter &ORE) {
535520 auto &Ctx = M.getContext ();
521+ // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them
522+ // to support cases where the instuction's inlined frames match multiple call
523+ // site entries, which can happen if the profile was collected from a binary
524+ // where this instruction was eventually inlined into multiple callers.
525+ SetVector<GlobalValue::GUID> CalleeGuids;
526+ bool CallsiteMDAdded = false ;
536527 for (const auto &CallSiteEntry : CallSiteEntries) {
537528 // If we found and thus matched all frames on the call, create and
538529 // attach call stack metadata.
539530 if (stackFrameIncludesInlinedCallStack (CallSiteEntry.Frames ,
540531 InlinedCallStack)) {
541532 NumOfMemProfMatchedCallSites++;
542- addCallsiteMetadata (I, InlinedCallStack, Ctx);
543-
544- // Try to attach indirect call metadata if possible.
545- if (!CalledFunction)
546- addVPMetadata (M, I, CallSiteEntry.CalleeGuids );
547-
548533 // Only need to find one with a matching call stack and add a single
549534 // callsite metadata.
550-
551- // Accumulate call site matching information upon request.
552- if (ClPrintMemProfMatchInfo) {
553- std::vector<uint64_t > CallStack;
554- append_range (CallStack, InlinedCallStack);
555- MatchedCallSites.insert (std::move (CallStack));
535+ if (!CallsiteMDAdded) {
536+ addCallsiteMetadata (I, InlinedCallStack, Ctx);
537+
538+ // Accumulate call site matching information upon request.
539+ if (ClPrintMemProfMatchInfo) {
540+ std::vector<uint64_t > CallStack;
541+ append_range (CallStack, InlinedCallStack);
542+ MatchedCallSites.insert (std::move (CallStack));
543+ }
544+ ORE.emit (OptimizationRemark (DEBUG_TYPE, " MemProfUse" , &I)
545+ << ore::NV (" CallSite" , &I) << " in function "
546+ << ore::NV (" Caller" , I.getFunction ())
547+ << " matched callsite with frame count "
548+ << ore::NV (" Frames" , InlinedCallStack.size ()));
549+
550+ // If this is a direct call, we're done.
551+ if (CalledFunction)
552+ break ;
553+ CallsiteMDAdded = true ;
556554 }
557- ORE. emit ( OptimizationRemark (DEBUG_TYPE, " MemProfUse " , &I)
558- << ore::NV ( " CallSite " , &I) << " in function "
559- << ore::NV ( " Caller " , I. getFunction ())
560- << " matched callsite with frame count "
561- << ore::NV ( " Frames " , InlinedCallStack. size ()));
562- break ;
555+
556+ assert (!CalledFunction && " Didn't expect direct call " );
557+
558+ // Collect Callee GUIDs from all matching CallSiteEntries.
559+ CalleeGuids. insert (CallSiteEntry. CalleeGuids . begin (),
560+ CallSiteEntry. CalleeGuids . end ()) ;
563561 }
564562 }
563+ // Try to attach indirect call metadata if possible.
564+ addVPMetadata (M, I, CalleeGuids.getArrayRef ());
565565}
566566
567567static void readMemprof (Module &M, Function &F,
@@ -639,8 +639,7 @@ static void readMemprof(Module &M, Function &F,
639639
640640 // For the callsites we need to record slices of the frame array (see comments
641641 // below where the map entries are added) along with their CalleeGuids.
642- std::map<uint64_t , std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
643- LocHashToCallSites;
642+ std::map<uint64_t , std::vector<CallSiteEntry>> LocHashToCallSites;
644643 for (auto &AI : MemProfRec->AllocSites ) {
645644 NumOfMemProfAllocContextProfiles++;
646645 // Associate the allocation info with the leaf frame. The later matching
@@ -659,7 +658,7 @@ static void readMemprof(Module &M, Function &F,
659658 uint64_t StackId = computeStackId (StackFrame);
660659 ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames ).drop_front (Idx++);
661660 ArrayRef<GlobalValue::GUID> CalleeGuids (CS.CalleeGuids );
662- LocHashToCallSites[StackId].insert ({FrameSlice, CalleeGuids});
661+ LocHashToCallSites[StackId].push_back ({FrameSlice, CalleeGuids});
663662
664663 ProfileHasColumns |= StackFrame.Column ;
665664 // Once we find this function, we can stop recording.
0 commit comments