@@ -966,11 +966,12 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
966966 UndriftCallStack (CS);
967967}
968968
969- static void
970- readMemprof (Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
971- const TargetLibraryInfo &TLI,
972- std::map<uint64_t , AllocMatchInfo> &FullStackIdToAllocMatchInfo,
973- DenseMap<uint64_t , LocToLocMap> &UndriftMaps) {
969+ static void readMemprof (
970+ Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
971+ const TargetLibraryInfo &TLI,
972+ std::map<uint64_t , AllocMatchInfo> &FullStackIdToAllocMatchInfo,
973+ std::set<std::vector<uint64_t >> &MatchedCallSites,
974+ DenseMap<uint64_t , LocToLocMap> &UndriftMaps) {
974975 auto &Ctx = M.getContext ();
975976 // Previously we used getIRPGOFuncName() here. If F is local linkage,
976977 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -1034,15 +1035,13 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
10341035 std::map<uint64_t , std::set<const AllocationInfo *>> LocHashToAllocInfo;
10351036 // A hash function for std::unordered_set<ArrayRef<Frame>> to work.
10361037 struct CallStackHash {
1037- size_t operator ()(const std::pair<ArrayRef<Frame>, unsigned > &CS) const {
1038- auto &[CallStack, Idx] = CS;
1039- return computeFullStackId (ArrayRef<Frame>(CallStack).drop_front (Idx));
1038+ size_t operator ()(ArrayRef<Frame> CS) const {
1039+ return computeFullStackId (CS);
10401040 }
10411041 };
10421042 // For the callsites we need to record slices of the frame array (see comments
10431043 // below where the map entries are added).
1044- std::map<uint64_t , std::unordered_set<std::pair<ArrayRef<Frame>, unsigned >,
1045- CallStackHash>>
1044+ std::map<uint64_t , std::unordered_set<ArrayRef<Frame>, CallStackHash>>
10461045 LocHashToCallSites;
10471046 for (auto &AI : MemProfRec->AllocSites ) {
10481047 NumOfMemProfAllocContextProfiles++;
@@ -1060,7 +1059,7 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
10601059 unsigned Idx = 0 ;
10611060 for (auto &StackFrame : CS) {
10621061 uint64_t StackId = computeStackId (StackFrame);
1063- LocHashToCallSites[StackId].emplace (CS, Idx++);
1062+ LocHashToCallSites[StackId].insert (ArrayRef<Frame>(CS). drop_front ( Idx++) );
10641063 ProfileHasColumns |= StackFrame.Column ;
10651064 // Once we find this function, we can stop recording.
10661065 if (StackFrame.Function == FuncGUID)
@@ -1203,21 +1202,21 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
12031202 // instruction's leaf location in the callsites map and not the allocation
12041203 // map.
12051204 assert (CallSitesIter != LocHashToCallSites.end ());
1206- for (auto &[ProfileCallStack, Idx] : CallSitesIter->second ) {
1205+ for (auto CallStackIdx : CallSitesIter->second ) {
12071206 // If we found and thus matched all frames on the call, create and
12081207 // attach call stack metadata.
1209- if (stackFrameIncludesInlinedCallStack (ProfileCallStack. drop_front (Idx) ,
1208+ if (stackFrameIncludesInlinedCallStack (CallStackIdx ,
12101209 InlinedCallStack)) {
12111210 NumOfMemProfMatchedCallSites++;
12121211 addCallsiteMetadata (I, InlinedCallStack, Ctx);
12131212 // Only need to find one with a matching call stack and add a single
12141213 // callsite metadata.
12151214
1216- // Dump call site matching information upon request.
1215+ // Accumulate call site matching information upon request.
12171216 if (ClPrintMemProfMatchInfo) {
1218- uint64_t FullStackId = computeFullStackId (ProfileCallStack) ;
1219- errs () << " MemProf callsite " << FullStackId << " " << Idx << " "
1220- << InlinedCallStack. size () << " \n " ;
1217+ std::vector< uint64_t > CallStack ;
1218+ append_range (CallStack, InlinedCallStack);
1219+ MatchedCallSites. insert ( std::move (CallStack)) ;
12211220 }
12221221 break ;
12231222 }
@@ -1275,13 +1274,17 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
12751274 // it to an allocation in the IR.
12761275 std::map<uint64_t , AllocMatchInfo> FullStackIdToAllocMatchInfo;
12771276
1277+ // Set of the matched call sites, each expressed as a sequence of an inline
1278+ // call stack.
1279+ std::set<std::vector<uint64_t >> MatchedCallSites;
1280+
12781281 for (auto &F : M) {
12791282 if (F.isDeclaration ())
12801283 continue ;
12811284
12821285 const TargetLibraryInfo &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
12831286 readMemprof (M, F, MemProfReader.get (), TLI, FullStackIdToAllocMatchInfo,
1284- UndriftMaps);
1287+ MatchedCallSites, UndriftMaps);
12851288 }
12861289
12871290 if (ClPrintMemProfMatchInfo) {
@@ -1290,6 +1293,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
12901293 << " context with id " << Id << " has total profiled size "
12911294 << Info.TotalSize << (Info.Matched ? " is" : " not" )
12921295 << " matched\n " ;
1296+
1297+ for (const auto &CallStack : MatchedCallSites) {
1298+ errs () << " MemProf callsite match for inline call stack" ;
1299+ for (uint64_t StackId : CallStack)
1300+ errs () << " " << StackId;
1301+ errs () << " \n " ;
1302+ }
12931303 }
12941304
12951305 return PreservedAnalyses::none ();
0 commit comments