@@ -178,6 +178,12 @@ static cl::opt<bool>
178178 cl::desc (" Salvage stale MemProf profile" ),
179179 cl::init(false ), cl::Hidden);
180180
181+ static cl::opt<bool > ClMemProfAttachCalleeGuids (
182+ " memprof-attach-calleeguids" ,
183+ cl::desc (
184+ " Attach calleeguids as value profile metadata for indirect calls." ),
185+ cl::init(true ), cl::Hidden);
186+
181187extern cl::opt<bool > MemProfReportHintedSizes;
182188extern cl::opt<unsigned > MinClonedColdBytePercent;
183189extern cl::opt<unsigned > MinCallsiteColdBytePercent;
@@ -952,6 +958,46 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
952958 UndriftCallStack (CS.Frames );
953959}
954960
961+ // Helper function to process CalleeGuids and create value profile metadata
962+ static void addVPMetadata (Module &M, Instruction &I,
963+ ArrayRef<GlobalValue::GUID> CalleeGuids) {
964+ if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty ())
965+ return ;
966+
967+ if (I.getMetadata (LLVMContext::MD_prof)) {
968+ uint64_t Unused;
969+ // TODO: When merging is implemented, increase this to a typical ICP value
970+ // (e.g., 3-6) For now, we only need to check if existing data exists, so 1
971+ // is sufficient
972+ auto ExistingVD = getValueProfDataFromInst (I, IPVK_IndirectCallTarget,
973+ /* MaxNumValueData=*/ 1 , Unused);
974+ // We don't know how to merge value profile data yet.
975+ if (!ExistingVD.empty ()) {
976+ return ;
977+ }
978+ }
979+
980+ SmallVector<InstrProfValueData, 4 > VDs;
981+ uint64_t TotalCount = 0 ;
982+
983+ for (const GlobalValue::GUID CalleeGUID : CalleeGuids) {
984+ InstrProfValueData VD;
985+ VD.Value = CalleeGUID;
986+ // For MemProf, we don't have actual call counts, so we assign
987+ // a weight of 1 to each potential target.
988+ // TODO: Consider making this weight configurable or increasing it to
989+ // improve effectiveness for ICP.
990+ VD.Count = 1 ;
991+ VDs.push_back (VD);
992+ TotalCount += VD.Count ;
993+ }
994+
995+ if (!VDs.empty ()) {
996+ annotateValueSite (M, I, VDs, TotalCount, IPVK_IndirectCallTarget,
997+ VDs.size ());
998+ }
999+ }
1000+
9551001static void
9561002readMemprof (Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
9571003 const TargetLibraryInfo &TLI,
@@ -1020,15 +1066,35 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
10201066 // Build maps of the location hash to all profile data with that leaf location
10211067 // (allocation info and the callsites).
10221068 std::map<uint64_t , std::set<const AllocationInfo *>> LocHashToAllocInfo;
1023- // A hash function for std::unordered_set<ArrayRef<Frame>> to work.
1024- struct CallStackHash {
1025- size_t operator ()(ArrayRef<Frame> CS) const {
1026- return computeFullStackId (CS);
1069+
1070+ // Helper struct for maintaining refs to callsite data. As an alternative we
1071+ // could store a pointer to the CallSiteInfo struct but we also need the frame
1072+ // index. Using ArrayRefs instead makes it a little easier to read.
1073+ struct CallSiteEntry {
1074+ // Subset of frames for the corresponding CallSiteInfo.
1075+ ArrayRef<Frame> Frames;
1076+ // Potential targets for indirect calls.
1077+ ArrayRef<GlobalValue::GUID> CalleeGuids;
1078+
1079+ // Only compare Frame contents.
1080+ // Use pointer-based equality instead of ArrayRef's operator== which does
1081+ // element-wise comparison. We want to check if it's the same slice of the
1082+ // underlying array, not just equivalent content.
1083+ bool operator ==(const CallSiteEntry &Other) const {
1084+ return Frames.data () == Other.Frames .data () &&
1085+ Frames.size () == Other.Frames .size ();
1086+ }
1087+ };
1088+
1089+ struct CallSiteEntryHash {
1090+ size_t operator ()(const CallSiteEntry &Entry) const {
1091+ return computeFullStackId (Entry.Frames );
10271092 }
10281093 };
1094+
10291095 // For the callsites we need to record slices of the frame array (see comments
1030- // below where the map entries are added).
1031- std::map<uint64_t , std::unordered_set<ArrayRef<Frame>, CallStackHash >>
1096+ // below where the map entries are added) along with their CalleeGuids .
1097+ std::map<uint64_t , std::unordered_set<CallSiteEntry, CallSiteEntryHash >>
10321098 LocHashToCallSites;
10331099 for (auto &AI : MemProfRec->AllocSites ) {
10341100 NumOfMemProfAllocContextProfiles++;
@@ -1046,8 +1112,10 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
10461112 unsigned Idx = 0 ;
10471113 for (auto &StackFrame : CS.Frames ) {
10481114 uint64_t StackId = computeStackId (StackFrame);
1049- LocHashToCallSites[StackId].insert (
1050- ArrayRef<Frame>(CS.Frames ).drop_front (Idx++));
1115+ ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames ).drop_front (Idx++);
1116+ ArrayRef<GlobalValue::GUID> CalleeGuids (CS.CalleeGuids );
1117+ LocHashToCallSites[StackId].insert ({FrameSlice, CalleeGuids});
1118+
10511119 ProfileHasColumns |= StackFrame.Column ;
10521120 // Once we find this function, we can stop recording.
10531121 if (StackFrame.Function == FuncGUID)
@@ -1191,13 +1259,18 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
11911259 // Otherwise, add callsite metadata. If we reach here then we found the
11921260 // instruction's leaf location in the callsites map and not the allocation
11931261 // map.
1194- for (auto CallStackIdx : CallSitesIter->second ) {
1262+ for (const auto &CallSiteEntry : CallSitesIter->second ) {
11951263 // If we found and thus matched all frames on the call, create and
11961264 // attach call stack metadata.
1197- if (stackFrameIncludesInlinedCallStack (CallStackIdx ,
1265+ if (stackFrameIncludesInlinedCallStack (CallSiteEntry. Frames ,
11981266 InlinedCallStack)) {
11991267 NumOfMemProfMatchedCallSites++;
12001268 addCallsiteMetadata (I, InlinedCallStack, Ctx);
1269+
1270+ // Try to attach indirect call metadata if possible.
1271+ if (!CalledFunction)
1272+ addVPMetadata (M, I, CallSiteEntry.CalleeGuids );
1273+
12011274 // Only need to find one with a matching call stack and add a single
12021275 // callsite metadata.
12031276
0 commit comments