Skip to content

Commit 1ee8dff

Browse files
[memprof] Add YAML-based deserialization for MemProf profile
This patch adds YAML-based deserialization for MemProf profile. It's been painful to write tests for MemProf passes because we do not have a text format for the MemProf profile. We would write a test case in C++, run it for a binary MemProf profile, and then finally run a test written in LLVM IR with the binary profile. This patch paves the way toward YAML-based MemProf profile. Specifically, it adds new class YAMLMemProfReader derived from MemProfReader. For now, it only adds a function to parse StringRef pointing to YAML data. Subseqeunt patches will wire it to llvm-profdata and read from a file. The field names are based on various printYAML functions in MemProf.h. I'm not aiming for compatibility with the format used in printYAML, but I don't see a point in changing the field names.
1 parent 3433e41 commit 1ee8dff

File tree

4 files changed

+211
-0
lines changed

4 files changed

+211
-0
lines changed

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
#include <optional>
2020

2121
namespace llvm {
22+
namespace yaml {
23+
template <typename T> struct CustomMappingTraits;
24+
} // namespace yaml
25+
2226
namespace memprof {
2327

2428
struct MemProfRecord;
@@ -193,6 +197,9 @@ struct PortableMemInfoBlock {
193197
return Result;
194198
}
195199

200+
// Give YAML access to the individual MIB fields.
201+
friend struct yaml::CustomMappingTraits<memprof::PortableMemInfoBlock>;
202+
196203
private:
197204
// The set of available fields, indexed by Meta::Name.
198205
std::bitset<llvm::to_underlying(Meta::Size)> Schema;
@@ -362,6 +369,8 @@ struct IndexedAllocationInfo {
362369
IndexedAllocationInfo(CallStackId CSId, const MemInfoBlock &MB,
363370
const MemProfSchema &Schema = getFullSchema())
364371
: CSId(CSId), Info(MB, Schema) {}
372+
IndexedAllocationInfo(CallStackId CSId, const PortableMemInfoBlock &MB)
373+
: CSId(CSId), Info(MB) {}
365374

366375
// Returns the size in bytes when this allocation info struct is serialized.
367376
size_t serializedSize(const MemProfSchema &Schema,
@@ -498,6 +507,19 @@ struct MemProfRecord {
498507
}
499508
};
500509

510+
// Helper struct for AllMemProfData. In YAML, we treat the GUID and the fields
511+
// within MemProfRecord at the same level as if the GUID were part of
512+
// MemProfRecord.
513+
struct GUIDMemProfRecordPair {
514+
GlobalValue::GUID GUID;
515+
MemProfRecord Record;
516+
};
517+
518+
// The top-level data structure, only used with YAML for now.
519+
struct AllMemProfData {
520+
std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
521+
};
522+
501523
// Reads a memprof schema from a buffer. All entries in the buffer are
502524
// interpreted as uint64_t. The first entry in the buffer denotes the number of
503525
// ids in the schema. Subsequent entries are integers which map to memprof::Meta

llvm/include/llvm/ProfileData/MemProfReader.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,12 @@ class RawMemProfReader final : public MemProfReader {
209209
// A mapping of the hash to symbol name, only used if KeepSymbolName is true.
210210
llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
211211
};
212+
213+
class YAMLMemProfReader final : public MemProfReader {
214+
public:
215+
YAMLMemProfReader() = default;
216+
void parse(StringRef YAMLData);
217+
};
212218
} // namespace memprof
213219
} // namespace llvm
214220

llvm/lib/ProfileData/MemProfReader.cpp

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,71 @@
4040
#include "llvm/Support/Path.h"
4141

4242
#define DEBUG_TYPE "memprof"
43+
44+
namespace llvm {
45+
namespace yaml {
46+
template <> struct MappingTraits<memprof::Frame> {
47+
static void mapping(IO &Io, memprof::Frame &F) {
48+
Io.mapRequired("Function", F.Function);
49+
Io.mapRequired("LineOffset", F.LineOffset);
50+
Io.mapRequired("Column", F.Column);
51+
Io.mapRequired("Inline", F.IsInlineFrame);
52+
}
53+
};
54+
55+
template <> struct CustomMappingTraits<memprof::PortableMemInfoBlock> {
56+
static void inputOne(IO &Io, StringRef KeyStr,
57+
memprof::PortableMemInfoBlock &MIB) {
58+
// PortableMemInfoBlock keeps track of the set of fields that actually have
59+
// values. We update the set here as we receive a key-value pair from the
60+
// YAML document.
61+
#define MIBEntryDef(NameTag, Name, Type) \
62+
if (KeyStr == #Name) { \
63+
Io.mapRequired(KeyStr.str().c_str(), MIB.Name); \
64+
MIB.Schema.set(llvm::to_underlying(memprof::Meta::Name)); \
65+
return; \
66+
}
67+
#include "llvm/ProfileData/MIBEntryDef.inc"
68+
#undef MIBEntryDef
69+
Io.setError("Key is not a valid validation event");
70+
}
71+
72+
static void output(IO &Io, memprof::PortableMemInfoBlock &VI) {
73+
llvm_unreachable("To be implemented");
74+
}
75+
};
76+
77+
template <> struct MappingTraits<memprof::AllocationInfo> {
78+
static void mapping(IO &Io, memprof::AllocationInfo &AI) {
79+
Io.mapRequired("Callstack", AI.CallStack);
80+
Io.mapRequired("MemInfoBlock", AI.Info);
81+
}
82+
};
83+
84+
// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can
85+
// treat the GUID and the fields within MemProfRecord at the same level as if
86+
// the GUID were part of MemProfRecord.
87+
template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
88+
static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) {
89+
Io.mapRequired("GUID", Pair.GUID);
90+
Io.mapRequired("AllocSites", Pair.Record.AllocSites);
91+
Io.mapRequired("CallSites", Pair.Record.CallSites);
92+
}
93+
};
94+
95+
template <> struct MappingTraits<memprof::AllMemProfData> {
96+
static void mapping(IO &Io, memprof::AllMemProfData &Data) {
97+
Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
98+
}
99+
};
100+
} // namespace yaml
101+
} // namespace llvm
102+
103+
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame)
104+
LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
105+
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
106+
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
107+
43108
namespace llvm {
44109
namespace memprof {
45110
namespace {
@@ -756,5 +821,46 @@ Error RawMemProfReader::readNextRecord(
756821
};
757822
return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback);
758823
}
824+
825+
void YAMLMemProfReader::parse(StringRef YAMLData) {
826+
memprof::AllMemProfData Doc;
827+
yaml::Input Yin(YAMLData);
828+
829+
Yin >> Doc;
830+
if (Yin.error())
831+
return;
832+
833+
// Add a call stack to MemProfData.CallStacks and return its CallStackId.
834+
auto AddCallStack = [&](ArrayRef<Frame> CallStack) -> CallStackId {
835+
SmallVector<FrameId> IndexedCallStack;
836+
IndexedCallStack.reserve(CallStack.size());
837+
for (const Frame &F : CallStack) {
838+
FrameId Id = F.hash();
839+
MemProfData.Frames.try_emplace(Id, F);
840+
IndexedCallStack.push_back(Id);
841+
}
842+
CallStackId CSId = hashCallStack(IndexedCallStack);
843+
MemProfData.CallStacks.try_emplace(CSId, std::move(IndexedCallStack));
844+
return CSId;
845+
};
846+
847+
for (const auto &[GUID, Record] : Doc.HeapProfileRecords) {
848+
IndexedMemProfRecord IndexedRecord;
849+
850+
// Convert AllocationInfo to IndexedAllocationInfo.
851+
for (const AllocationInfo &AI : Record.AllocSites) {
852+
CallStackId CSId = AddCallStack(AI.CallStack);
853+
IndexedRecord.AllocSites.emplace_back(CSId, AI.Info);
854+
}
855+
856+
// Populate CallSiteIds.
857+
for (const auto &CallSite : Record.CallSites) {
858+
CallStackId CSId = AddCallStack(CallSite);
859+
IndexedRecord.CallSiteIds.push_back(CSId);
860+
}
861+
862+
MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord));
863+
}
864+
}
759865
} // namespace memprof
760866
} // namespace llvm

llvm/unittests/ProfileData/MemProfTest.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -742,4 +742,81 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) {
742742
EXPECT_THAT(Mappings, testing::Contains(testing::Pair(
743743
llvm::memprof::hashCallStack(CS4), 10U)));
744744
}
745+
746+
// Verify that we can parse YAML and retrieve IndexedMemProfData as expected.
747+
TEST(MemProf, YAMLParser) {
748+
StringRef YAMLData = R"YAML(
749+
---
750+
HeapProfileRecords:
751+
- GUID: 0xdeadbeef12345678
752+
AllocSites:
753+
- Callstack:
754+
- {Function: 0x100, LineOffset: 11, Column: 10, Inline: true}
755+
- {Function: 0x200, LineOffset: 22, Column: 20, Inline: false}
756+
MemInfoBlock:
757+
AllocCount: 777
758+
TotalSize: 888
759+
- Callstack:
760+
- {Function: 0x300, LineOffset: 33, Column: 30, Inline: false}
761+
- {Function: 0x400, LineOffset: 44, Column: 40, Inline: true}
762+
MemInfoBlock:
763+
AllocCount: 666
764+
TotalSize: 555
765+
CallSites:
766+
- - {Function: 0x500, LineOffset: 55, Column: 50, Inline: true}
767+
- {Function: 0x600, LineOffset: 66, Column: 60, Inline: false}
768+
- - {Function: 0x700, LineOffset: 77, Column: 70, Inline: true}
769+
- {Function: 0x800, LineOffset: 88, Column: 80, Inline: false}
770+
)YAML";
771+
772+
llvm::memprof::YAMLMemProfReader YAMLReader;
773+
YAMLReader.parse(YAMLData);
774+
llvm::memprof::IndexedMemProfData MemProfData = YAMLReader.takeMemProfData();
775+
776+
Frame F1(0x100, 11, 10, true);
777+
Frame F2(0x200, 22, 20, false);
778+
Frame F3(0x300, 33, 30, false);
779+
Frame F4(0x400, 44, 40, true);
780+
Frame F5(0x500, 55, 50, true);
781+
Frame F6(0x600, 66, 60, false);
782+
Frame F7(0x700, 77, 70, true);
783+
Frame F8(0x800, 88, 80, false);
784+
785+
llvm::SmallVector<FrameId> CS1 = {F1.hash(), F2.hash()};
786+
llvm::SmallVector<FrameId> CS2 = {F3.hash(), F4.hash()};
787+
llvm::SmallVector<FrameId> CS3 = {F5.hash(), F6.hash()};
788+
llvm::SmallVector<FrameId> CS4 = {F7.hash(), F8.hash()};
789+
790+
// Verify the entire contents of MemProfData.Frames.
791+
EXPECT_THAT(
792+
MemProfData.Frames,
793+
::testing::UnorderedElementsAre(
794+
::testing::Pair(F1.hash(), F1), ::testing::Pair(F2.hash(), F2),
795+
::testing::Pair(F3.hash(), F3), ::testing::Pair(F4.hash(), F4),
796+
::testing::Pair(F5.hash(), F5), ::testing::Pair(F6.hash(), F6),
797+
::testing::Pair(F7.hash(), F7), ::testing::Pair(F8.hash(), F8)));
798+
799+
// Verify the entire contents of MemProfData.Frames.
800+
EXPECT_THAT(MemProfData.CallStacks,
801+
::testing::UnorderedElementsAre(
802+
::testing::Pair(llvm::memprof::hashCallStack(CS1), CS1),
803+
::testing::Pair(llvm::memprof::hashCallStack(CS2), CS2),
804+
::testing::Pair(llvm::memprof::hashCallStack(CS3), CS3),
805+
::testing::Pair(llvm::memprof::hashCallStack(CS4), CS4)));
806+
807+
// Verify the entire contents of MemProfData.Records.
808+
ASSERT_THAT(MemProfData.Records, SizeIs(1));
809+
const auto &[GUID, Record] = *MemProfData.Records.begin();
810+
EXPECT_EQ(GUID, 0xdeadbeef12345678ULL);
811+
ASSERT_THAT(Record.AllocSites, SizeIs(2));
812+
EXPECT_EQ(Record.AllocSites[0].CSId, llvm::memprof::hashCallStack(CS1));
813+
EXPECT_EQ(Record.AllocSites[0].Info.getAllocCount(), 777U);
814+
EXPECT_EQ(Record.AllocSites[0].Info.getTotalSize(), 888U);
815+
EXPECT_EQ(Record.AllocSites[1].CSId, llvm::memprof::hashCallStack(CS2));
816+
EXPECT_EQ(Record.AllocSites[1].Info.getAllocCount(), 666U);
817+
EXPECT_EQ(Record.AllocSites[1].Info.getTotalSize(), 555U);
818+
EXPECT_THAT(Record.CallSiteIds,
819+
::testing::ElementsAre(llvm::memprof::hashCallStack(CS3),
820+
llvm::memprof::hashCallStack(CS4)));
821+
}
745822
} // namespace

0 commit comments

Comments
 (0)