From ed99d519dcd80f5bddefaee20b006bcdf2514513 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 8 Nov 2024 18:24:03 -0800 Subject: [PATCH 1/3] [memprof] Add IndexedMemProfReader::getMemProfCallerCalleePairs Undrifting the MemProf profile requires two sets of information: - caller-callee pairs from the profile - callee-callee pairs from the IR This patch adds a function to do the former. The latter has been addressed by extractCallsFromIR. Unfortunately, the current MemProf format does not directly give us the caller-callee pairs from the profile. "struct Frame" just tells us where the call site is -- Caller GUID and line/column numbers; it doesn't tell us what function a given Frame is calling. To extract caller-callee pairs, we need to scan each call stack, look at two adjacent Frames, and extract a caller-callee pair. Conceptually, we would extract caller-callee pairs with: for each MemProfRecord in the profile: for each call stack in AllocSites: extract caller-callee pairs from adjacent pairs of Frames However, this is highly inefficient. Obtaining MemProfRecord involves looking up the OnDiskHashTable, allocating several vectors on the heap, and populating fields that are irrelevant to us, such as MIB and CallSites. This patch adds an efficient way of doing the above. Specifically, we - go though all IndexedMemProfRecords, - look at each linear call stack ID - extract caller-callee pairs from each call stack The extraction is done by a new class CallerCalleePairExtractor, modified from LinearCallStackIdConverter, which reconstructs a call stack from the radix tree array. For our purposes, we skip the reconstruction and immediately populates the data structure for caller-callee pairs. The resulting caller-callee-pairs is of the type: DenseMap> CallerCalleePairs; which can be passed directly to longestCommonSequence just like the result of extractCallsFromIR. Further performance optimizations are possible for the new functions in this patch. I'll address those in follow-up patches. --- .../llvm/ProfileData/InstrProfReader.h | 8 ++ llvm/include/llvm/ProfileData/MemProf.h | 74 +++++++++++++++++++ .../Transforms/Instrumentation/MemProfiler.h | 26 +------ llvm/lib/ProfileData/InstrProfReader.cpp | 29 ++++++++ llvm/unittests/ProfileData/InstrProfTest.cpp | 62 ++++++++++++++++ 5 files changed, 174 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 6be3fad41824a..42414bc193bc8 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -695,6 +695,9 @@ class IndexedMemProfReader { Expected getMemProfRecord(const uint64_t FuncNameHash) const; + + DenseMap> + getMemProfCallerCalleePairs() const; }; /// Reader for the indexed binary instrprof format. @@ -793,6 +796,11 @@ class IndexedInstrProfReader : public InstrProfReader { return MemProfReader.getMemProfRecord(FuncNameHash); } + DenseMap> + getMemProfCallerCalleePairs() { + return MemProfReader.getMemProfCallerCalleePairs(); + } + /// Fill Counts with the profile data for the given function name. Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts); diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index da2cc80737009..0d47e12cb2b5d 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -931,6 +931,80 @@ struct LinearCallStackIdConverter { } }; +struct LineLocation { + LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Column(D) {} + + bool operator<(const LineLocation &O) const { + return LineOffset < O.LineOffset || + (LineOffset == O.LineOffset && Column < O.Column); + } + + bool operator==(const LineLocation &O) const { + return LineOffset == O.LineOffset && Column == O.Column; + } + + bool operator!=(const LineLocation &O) const { + return LineOffset != O.LineOffset || Column != O.Column; + } + + uint64_t getHashCode() const { return ((uint64_t)Column << 32) | LineOffset; } + + uint32_t LineOffset; + uint32_t Column; +}; + +// A pair of a call site location and its corresponding callee GUID. +using CallEdgeTy = std::pair; + +// Used to extract caller-callee pairs from the call stack array. The leaf +// frame is assumed to call a heap allocation function with GUID 0. The +// resulting pairs are accumulated in CallerCalleePairs. Users can take it +// with: +// +// auto Pairs = std::move(Extractor.CallerCalleePairs); +struct CallerCalleePairExtractor { + const unsigned char *CallStackBase; + std::function FrameIdToFrame; + DenseMap> CallerCalleePairs; + + CallerCalleePairExtractor() = delete; + CallerCalleePairExtractor(const unsigned char *CallStackBase, + std::function FrameIdToFrame) + : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {} + + void operator()(LinearCallStackId LinearCSId) { + const unsigned char *Ptr = + CallStackBase + + static_cast(LinearCSId) * sizeof(LinearFrameId); + uint32_t NumFrames = + support::endian::readNext(Ptr); + // The leaf frame calls a function with GUID 0. + uint64_t CalleeGUID = 0; + for (; NumFrames; --NumFrames) { + LinearFrameId Elem = + support::endian::read(Ptr); + // Follow a pointer to the parent, if any. See comments below on + // CallStackRadixTreeBuilder for the description of the radix tree format. + if (static_cast>(Elem) < 0) { + Ptr += (-Elem) * sizeof(LinearFrameId); + Elem = + support::endian::read(Ptr); + } + // We shouldn't encounter another pointer. + assert(static_cast>(Elem) >= 0); + + // Add a new caller-callee pair. + Frame F = FrameIdToFrame(Elem); + uint64_t CallerGUID = F.Function; + LineLocation Loc(F.LineOffset, F.Column); + CallerCalleePairs[CallerGUID].emplace_back(Loc, CalleeGUID); + + Ptr += sizeof(LinearFrameId); + CalleeGUID = CallerGUID; + } + } +}; + struct IndexedMemProfData { // A map to hold memprof data per function. The lower 64 bits obtained from // the md5 hash of the function name is used to index into the map. diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h index f168ffc4fdb1e..2b8debd872c12 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -14,6 +14,7 @@ #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/PassManager.h" +#include "llvm/ProfileData/MemProf.h" namespace llvm { class Function; @@ -59,31 +60,6 @@ class MemProfUsePass : public PassInfoMixin { namespace memprof { -struct LineLocation { - LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Column(D) {} - - bool operator<(const LineLocation &O) const { - return LineOffset < O.LineOffset || - (LineOffset == O.LineOffset && Column < O.Column); - } - - bool operator==(const LineLocation &O) const { - return LineOffset == O.LineOffset && Column == O.Column; - } - - bool operator!=(const LineLocation &O) const { - return LineOffset != O.LineOffset || Column != O.Column; - } - - uint64_t getHashCode() const { return ((uint64_t)Column << 32) | LineOffset; } - - uint32_t LineOffset; - uint32_t Column; -}; - -// A pair of a call site location and its corresponding callee GUID. -using CallEdgeTy = std::pair; - // Extract all calls from the IR. Arrange them in a map from caller GUIDs to a // list of call sites, each of the form {LineLocation, CalleeGUID}. DenseMap> extractCallsFromIR(Module &M); diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index b90617c74f6d1..034ae14b39bdd 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1666,6 +1666,35 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const { memprof::MaximumSupportedVersion)); } +DenseMap> +IndexedMemProfReader::getMemProfCallerCalleePairs() const { + assert(MemProfRecordTable); + assert(Version == memprof::Version3); + + memprof::LinearFrameIdConverter FrameIdConv(FrameBase); + memprof::CallerCalleePairExtractor Extractor(CallStackBase, FrameIdConv); + + // Collect the set of linear call stack IDs. Since we expect a lot of + // duplicates, we first collect them in the form a bit vector before + // processing them. + for (const memprof::IndexedMemProfRecord &IndexedRecord : + MemProfRecordTable->data()) + for (const memprof::IndexedAllocationInfo &IndexedAI : + IndexedRecord.AllocSites) + Extractor(IndexedAI.CSId); + + DenseMap> Pairs = + std::move(Extractor.CallerCalleePairs); + + // Sort each call list by the source location. + for (auto &[CallerGUID, CallList] : Pairs) { + llvm::sort(CallList); + CallList.erase(llvm::unique(CallList), CallList.end()); + } + + return Pairs; +} + Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts) { diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 7fdfd15e7bc99..cf3cf7fb95273 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -580,6 +580,68 @@ TEST_F(InstrProfTest, test_memprof_v2_partial_schema) { EXPECT_THAT(WantRecord, EqualsRecord(Record)); } +TEST_F(InstrProfTest, test_caller_callee_pairs) { + const MemInfoBlock MIB = makePartialMIB(); + + Writer.setMemProfVersionRequested(memprof::Version3); + Writer.setMemProfFullSchema(false); + + ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), + Succeeded()); + + // Call Hierarchy + // + // Function GUID:0x123 + // Line: 1, Column: 2 + // Function GUID: 0x234 + // Line: 3, Column: 4 + // new(...) + // Line: 5, Column: 6 + // Function GUID: 0x345 + // Line: 7, Column: 8 + // new(...) + + const std::pair Frames[] = { + {0, {0x123, 1, 2, false}}, + {1, {0x234, 3, 4, true}}, + {2, {0x123, 5, 6, false}}, + {3, {0x345, 7, 8, true}}}; + for (const auto &[FrameId, Frame] : Frames) + Writer.addMemProfFrame(FrameId, Frame, Err); + + const std::pair> + CallStacks[] = {{0x111, {1, 0}}, {0x222, {3, 2}}}; + for (const auto &[CSId, CallStack] : CallStacks) + Writer.addMemProfCallStack(CSId, CallStack, Err); + + const IndexedMemProfRecord IndexedMR = makeRecordV2( + /*AllocFrames=*/{0x111, 0x222}, + /*CallSiteFrames=*/{}, MIB, memprof::getHotColdSchema()); + Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR); + + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + auto Pairs = Reader->getMemProfCallerCalleePairs(); + ASSERT_THAT(Pairs, SizeIs(3)); + + auto It = Pairs.find(0x123); + ASSERT_NE(It, Pairs.end()); + ASSERT_THAT(It->second, SizeIs(2)); + EXPECT_THAT(It->second[0], testing::Pair(testing::FieldsAre(1U, 2U), 0x234U)); + EXPECT_THAT(It->second[1], testing::Pair(testing::FieldsAre(5U, 6U), 0x345U)); + + It = Pairs.find(0x234); + ASSERT_NE(It, Pairs.end()); + ASSERT_THAT(It->second, SizeIs(1)); + EXPECT_THAT(It->second[0], testing::Pair(testing::FieldsAre(3U, 4U), 0U)); + + It = Pairs.find(0x345); + ASSERT_NE(It, Pairs.end()); + ASSERT_THAT(It->second, SizeIs(1)); + EXPECT_THAT(It->second[0], testing::Pair(testing::FieldsAre(7U, 8U), 0U)); +} + TEST_F(InstrProfTest, test_memprof_getrecord_error) { ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), Succeeded()); From 3554a47fdd04ebb6666998accd0a3456e80fde49 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 13 Nov 2024 19:19:24 -0800 Subject: [PATCH 2/3] Add comments. --- llvm/include/llvm/ProfileData/MemProf.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 0d47e12cb2b5d..ff05bb7da2f79 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -963,8 +963,11 @@ using CallEdgeTy = std::pair; // // auto Pairs = std::move(Extractor.CallerCalleePairs); struct CallerCalleePairExtractor { + // The base address of the radix tree array. const unsigned char *CallStackBase; + // A functor to convert a linear FrameId to a Frame. std::function FrameIdToFrame; + // A map from caller GUIDs to lists of call sites in respective callers. DenseMap> CallerCalleePairs; CallerCalleePairExtractor() = delete; From 7a514d69564dc2bf041908d3361678599a75de94 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 13 Nov 2024 22:41:24 -0800 Subject: [PATCH 3/3] Remove a comment. --- llvm/lib/ProfileData/InstrProfReader.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 034ae14b39bdd..cae6ce5b824e6 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1674,9 +1674,6 @@ IndexedMemProfReader::getMemProfCallerCalleePairs() const { memprof::LinearFrameIdConverter FrameIdConv(FrameBase); memprof::CallerCalleePairExtractor Extractor(CallStackBase, FrameIdConv); - // Collect the set of linear call stack IDs. Since we expect a lot of - // duplicates, we first collect them in the form a bit vector before - // processing them. for (const memprof::IndexedMemProfRecord &IndexedRecord : MemProfRecordTable->data()) for (const memprof::IndexedAllocationInfo &IndexedAI :