Skip to content

Commit 86a5cfb

Browse files
kyulee-comPhilippRados
authored andcommitted
[CGData][llvm-cgdata] Support for stable function map (llvm#112664)
This introduces a new cgdata format for stable function maps. The raw data is embedded in the __llvm_merge section during compile time. This data can be read and merged using the llvm-cgdata tool, into an indexed cgdata file. Consequently, the tool is now capable of handling either outlined hash trees, stable function maps, or both, as they are orthogonal. Depends on llvm#112662. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608.
1 parent 7a2c753 commit 86a5cfb

21 files changed

+578
-88
lines changed

lld/test/MachO/cgdata-generate.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33

44
# RUN: rm -rf %t; split-file %s %t
55

6-
# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
6+
# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
77
# RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
8-
# RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
8+
# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
99
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-template.s > %t/merge-1.s
1010
# RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
11-
# RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
11+
# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
1212
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-template.s > %t/merge-2.s
1313

1414
# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o

llvm/docs/CommandGuide/llvm-cgdata.rst

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,13 @@ SYNOPSIS
1111
DESCRIPTION
1212
-----------
1313

14-
The :program:llvm-cgdata utility parses raw codegen data embedded
15-
in compiled binary files and merges them into a single .cgdata file.
16-
It can also inspect and manipulate .cgdata files.
17-
Currently, the tool supports saving and restoring outlined hash trees,
18-
enabling global function outlining across modules, allowing for more
19-
efficient function outlining in subsequent compilations.
20-
The design is extensible, allowing for the incorporation of additional
21-
codegen summaries and optimization techniques, such as global function
22-
merging, in the future.
14+
The :program:llvm-cgdata utility parses raw codegen data embedded in compiled
15+
binary files and merges them into a single .cgdata file. It can also inspect
16+
and manipulate .cgdata files. Currently, the tool supports saving and restoring
17+
outlined hash trees and stable function maps, allowing for more efficient
18+
function outlining and function merging across modules in subsequent
19+
compilations. The design is extensible, allowing for the incorporation of
20+
additional codegen summaries and optimization techniques.
2321

2422
COMMANDS
2523
--------

llvm/include/llvm/CGData/CodeGenData.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/Bitcode/BitcodeReader.h"
2020
#include "llvm/CGData/OutlinedHashTree.h"
2121
#include "llvm/CGData/OutlinedHashTreeRecord.h"
22+
#include "llvm/CGData/StableFunctionMapRecord.h"
2223
#include "llvm/IR/Module.h"
2324
#include "llvm/Object/ObjectFile.h"
2425
#include "llvm/Support/Caching.h"
@@ -41,7 +42,9 @@ enum class CGDataKind {
4142
Unknown = 0x0,
4243
// A function outlining info.
4344
FunctionOutlinedHashTree = 0x1,
44-
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
45+
// A function merging info.
46+
StableFunctionMergingMap = 0x2,
47+
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap)
4548
};
4649

4750
const std::error_category &cgdata_category();
@@ -108,6 +111,8 @@ enum CGDataMode {
108111
class CodeGenData {
109112
/// Global outlined hash tree that has oulined hash sequences across modules.
110113
std::unique_ptr<OutlinedHashTree> PublishedHashTree;
114+
/// Global stable function map that has stable function info across modules.
115+
std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap;
111116

112117
/// This flag is set when -fcodegen-data-generate is passed.
113118
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
@@ -131,6 +136,9 @@ class CodeGenData {
131136
bool hasOutlinedHashTree() {
132137
return PublishedHashTree && !PublishedHashTree->empty();
133138
}
139+
bool hasStableFunctionMap() {
140+
return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty();
141+
}
134142

135143
/// Returns the outlined hash tree. This can be globally used in a read-only
136144
/// manner.
@@ -147,6 +155,12 @@ class CodeGenData {
147155
// Ensure we disable emitCGData as we do not want to read and write both.
148156
EmitCGData = false;
149157
}
158+
void
159+
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
160+
PublishedStableFunctionMap = std::move(FunctionMap);
161+
// Ensure we disable emitCGData as we do not want to read and write both.
162+
EmitCGData = false;
163+
}
150164
};
151165

152166
namespace cgdata {
@@ -166,6 +180,11 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
166180
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
167181
}
168182

183+
inline void
184+
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
185+
CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap));
186+
}
187+
169188
struct StreamCacheData {
170189
/// Backing buffer for serialized data stream.
171190
SmallVector<SmallString<0>> Outputs;
@@ -249,6 +268,8 @@ enum CGDataVersion {
249268
// Version 1 is the first version. This version supports the outlined
250269
// hash tree.
251270
Version1 = 1,
271+
// Version 2 supports the stable function merging map.
272+
Version2 = 2,
252273
CurrentVersion = CG_DATA_INDEX_VERSION
253274
};
254275
const uint64_t Version = CGDataVersion::CurrentVersion;
@@ -258,6 +279,7 @@ struct Header {
258279
uint32_t Version;
259280
uint32_t DataKind;
260281
uint64_t OutlinedHashTreeOffset;
282+
uint64_t StableFunctionMapOffset;
261283

262284
// New fields should only be added at the end to ensure that the size
263285
// computation is correct. The methods below need to be updated to ensure that

llvm/include/llvm/CGData/CodeGenData.inc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,33 @@
2020
#define CG_DATA_DEFINED
2121
CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
2222
CG_DATA_OUTLINE_COFF, "__DATA,")
23+
CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
24+
CG_DATA_MERGE_COFF, "__DATA,")
2325

2426
#undef CG_DATA_SECT_ENTRY
2527
#endif
2628

2729
/* section name strings common to all targets other
2830
than WIN32 */
2931
#define CG_DATA_OUTLINE_COMMON __llvm_outline
32+
#define CG_DATA_MERGE_COMMON __llvm_merge
3033
/* Since cg data sections are not allocated, we don't need to
3134
* access them at runtime.
3235
*/
3336
#define CG_DATA_OUTLINE_COFF ".loutline"
37+
#define CG_DATA_MERGE_COFF ".lmerge"
3438

3539
#ifdef _WIN32
3640
/* Runtime section names and name strings. */
37-
#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF
41+
#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_OUTLINE_COFF
42+
#define CG_DATA_MERGE_SECT_NAME CG_DATA_MERGE_COFF
3843

3944
#else
4045
/* Runtime section names and name strings. */
41-
#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
46+
#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
47+
#define CG_DATA_MERGE_SECT_NAME CG_DATA_QUOTE(CG_DATA_MERGE_COMMON)
4248

4349
#endif
4450

4551
/* Indexed codegen data format version (start from 1). */
46-
#define CG_DATA_INDEX_VERSION 1
52+
#define CG_DATA_INDEX_VERSION 2

llvm/include/llvm/CGData/CodeGenDataReader.h

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "llvm/CGData/CodeGenData.h"
1717
#include "llvm/CGData/OutlinedHashTreeRecord.h"
18+
#include "llvm/CGData/StableFunctionMapRecord.h"
1819
#include "llvm/Support/LineIterator.h"
1920
#include "llvm/Support/VirtualFileSystem.h"
2021

@@ -36,10 +37,15 @@ class CodeGenDataReader {
3637
virtual CGDataKind getDataKind() const = 0;
3738
/// Return true if the data has an outlined hash tree.
3839
virtual bool hasOutlinedHashTree() const = 0;
40+
/// Return true if the data has a stable function map.
41+
virtual bool hasStableFunctionMap() const = 0;
3942
/// Return the outlined hash tree that is released from the reader.
4043
std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() {
4144
return std::move(HashTreeRecord.HashTree);
4245
}
46+
std::unique_ptr<StableFunctionMap> releaseStableFunctionMap() {
47+
return std::move(FunctionMapRecord.FunctionMap);
48+
}
4349

4450
/// Factory method to create an appropriately typed reader for the given
4551
/// codegen data file path and file system.
@@ -56,15 +62,21 @@ class CodeGenDataReader {
5662
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
5763
/// Optionally, \p CombinedHash can be used to compuate the combined hash of
5864
/// the merged data.
59-
static Error mergeFromObjectFile(const object::ObjectFile *Obj,
60-
OutlinedHashTreeRecord &GlobalOutlineRecord,
61-
stable_hash *CombinedHash = nullptr);
65+
static Error
66+
mergeFromObjectFile(const object::ObjectFile *Obj,
67+
OutlinedHashTreeRecord &GlobalOutlineRecord,
68+
StableFunctionMapRecord &GlobalFunctionMapRecord,
69+
stable_hash *CombinedHash = nullptr);
6270

6371
protected:
6472
/// The outlined hash tree that has been read. When it's released by
6573
/// releaseOutlinedHashTree(), it's no longer valid.
6674
OutlinedHashTreeRecord HashTreeRecord;
6775

76+
/// The stable function map that has been read. When it's released by
77+
// releaseStableFunctionMap(), it's no longer valid.
78+
StableFunctionMapRecord FunctionMapRecord;
79+
6880
/// Set the current error and return same.
6981
Error error(cgdata_error Err, const std::string &ErrMsg = "") {
7082
LastError = Err;
@@ -115,6 +127,11 @@ class IndexedCodeGenDataReader : public CodeGenDataReader {
115127
return Header.DataKind &
116128
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
117129
}
130+
/// Return true if the header indicates the data has a stable function map.
131+
bool hasStableFunctionMap() const override {
132+
return Header.DataKind &
133+
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
134+
}
118135
};
119136

120137
/// This format is a simple text format that's suitable for test data.
@@ -150,6 +167,12 @@ class TextCodeGenDataReader : public CodeGenDataReader {
150167
return static_cast<uint32_t>(DataKind) &
151168
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
152169
}
170+
/// Return true if the header indicates the data has a stable function map.
171+
/// This does not mean that the data is still available.
172+
bool hasStableFunctionMap() const override {
173+
return static_cast<uint32_t>(DataKind) &
174+
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
175+
}
153176
};
154177

155178
} // end namespace llvm

llvm/include/llvm/CGData/CodeGenDataWriter.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "llvm/CGData/CodeGenData.h"
1717
#include "llvm/CGData/OutlinedHashTreeRecord.h"
18+
#include "llvm/CGData/StableFunctionMapRecord.h"
1819
#include "llvm/Support/EndianStream.h"
1920
#include "llvm/Support/Error.h"
2021

@@ -57,16 +58,22 @@ class CodeGenDataWriter {
5758
/// The outlined hash tree to be written.
5859
OutlinedHashTreeRecord HashTreeRecord;
5960

61+
/// The stable function map to be written.
62+
StableFunctionMapRecord FunctionMapRecord;
63+
6064
/// A bit mask describing the kind of the codegen data.
6165
CGDataKind DataKind = CGDataKind::Unknown;
6266

6367
public:
6468
CodeGenDataWriter() = default;
6569
~CodeGenDataWriter() = default;
6670

67-
/// Add the outlined hash tree record. The input Record is released.
71+
/// Add the outlined hash tree record. The input hash tree is released.
6872
void addRecord(OutlinedHashTreeRecord &Record);
6973

74+
/// Add the stable function map record. The input function map is released.
75+
void addRecord(StableFunctionMapRecord &Record);
76+
7077
/// Write the codegen data to \c OS
7178
Error write(raw_fd_ostream &OS);
7279

@@ -81,11 +88,19 @@ class CodeGenDataWriter {
8188
return static_cast<uint32_t>(DataKind) &
8289
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
8390
}
91+
/// Return true if the header indicates the data has a stable function map.
92+
bool hasStableFunctionMap() const {
93+
return static_cast<uint32_t>(DataKind) &
94+
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
95+
}
8496

8597
private:
8698
/// The offset of the outlined hash tree in the file.
8799
uint64_t OutlinedHashTreeOffset;
88100

101+
/// The offset of the stable function map in the file.
102+
uint64_t StableFunctionMapOffset;
103+
89104
/// Write the codegen data header to \c COS
90105
Error writeHeader(CGDataOStream &COS);
91106

llvm/lib/CGData/CodeGenData.cpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/Bitcode/BitcodeWriter.h"
1515
#include "llvm/CGData/CodeGenDataReader.h"
1616
#include "llvm/CGData/OutlinedHashTreeRecord.h"
17+
#include "llvm/CGData/StableFunctionMapRecord.h"
1718
#include "llvm/Object/ObjectFile.h"
1819
#include "llvm/Support/Caching.h"
1920
#include "llvm/Support/CommandLine.h"
@@ -163,6 +164,8 @@ CodeGenData &CodeGenData::getInstance() {
163164
auto Reader = ReaderOrErr->get();
164165
if (Reader->hasOutlinedHashTree())
165166
Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
167+
if (Reader->hasStableFunctionMap())
168+
Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap());
166169
}
167170
});
168171
return *Instance;
@@ -185,18 +188,14 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
185188
return make_error<CGDataError>(cgdata_error::unsupported_version);
186189
H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
187190

188-
switch (H.Version) {
189-
// When a new field is added to the header add a case statement here to
190-
// compute the size as offset of the new field + size of the new field. This
191-
// relies on the field being added to the end of the list.
192-
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1,
193-
"Please update the size computation below if a new field has "
194-
"been added to the header, if not add a case statement to "
195-
"fall through to the latest version.");
196-
case 1ull:
197-
H.OutlinedHashTreeOffset =
191+
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2,
192+
"Please update the offset computation below if a new field has "
193+
"been added to the header.");
194+
H.OutlinedHashTreeOffset =
195+
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
196+
if (H.Version >= 2)
197+
H.StableFunctionMapOffset =
198198
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
199-
}
200199

201200
return H;
202201
}
@@ -257,6 +256,7 @@ std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
257256

258257
Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
259258
OutlinedHashTreeRecord GlobalOutlineRecord;
259+
StableFunctionMapRecord GlobalStableFunctionMapRecord;
260260
stable_hash CombinedHash = 0;
261261
for (auto File : ObjFiles) {
262262
if (File.empty())
@@ -270,12 +270,18 @@ Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
270270

271271
std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
272272
if (auto E = CodeGenDataReader::mergeFromObjectFile(
273-
Obj.get(), GlobalOutlineRecord, &CombinedHash))
273+
Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord,
274+
&CombinedHash))
274275
return E;
275276
}
276277

278+
GlobalStableFunctionMapRecord.finalize();
279+
277280
if (!GlobalOutlineRecord.empty())
278281
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
282+
if (!GlobalStableFunctionMapRecord.empty())
283+
cgdata::publishStableFunctionMap(
284+
std::move(GlobalStableFunctionMapRecord.FunctionMap));
279285

280286
return CombinedHash;
281287
}

0 commit comments

Comments
 (0)