Skip to content

Commit b7457cc

Browse files
committed
[𝘀𝗽𝗿] changes to main this commit is based on
Created using spr 1.3.5 [skip ci]
1 parent 145176d commit b7457cc

File tree

10 files changed

+251
-94
lines changed

10 files changed

+251
-94
lines changed

bolt/docs/BAT.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,9 @@ Hot indices are delta encoded, implicitly starting at zero.
8181
| `FuncHash` | 8b | Function hash for input function | Hot |
8282
| `NumBlocks` | ULEB128 | Number of basic blocks in the original function | Hot |
8383
| `NumSecEntryPoints` | ULEB128 | Number of secondary entry points in the original function | Hot |
84-
| `ColdInputSkew` | ULEB128 | Skew to apply to all input offsets | Cold |
8584
| `NumEntries` | ULEB128 | Number of address translation entries for a function | Both |
86-
| `EqualElems` | ULEB128 | Number of equal offsets in the beginning of a function | Both |
87-
| `BranchEntries` | Bitmask, `alignTo(EqualElems, 8)` bits | If `EqualElems` is non-zero, bitmask denoting entries with `BRANCHENTRY` bit | Both |
85+
| `EqualElems` | ULEB128 | Number of equal offsets in the beginning of a function | Hot |
86+
| `BranchEntries` | Bitmask, `alignTo(EqualElems, 8)` bits | If `EqualElems` is non-zero, bitmask denoting entries with `BRANCHENTRY` bit | Hot |
8887

8988
Function header is followed by *Address Translation Table* with `NumEntries`
9089
total entries, and *Secondary Entry Points* table with `NumSecEntryPoints`
@@ -100,8 +99,8 @@ entry is encoded. Input offsets implicitly start at zero.
10099
| `BBHash` | Optional, 8b | Basic block hash in input binary | BB |
101100
| `BBIdx` | Optional, Delta, ULEB128 | Basic block index in input binary | BB |
102101

103-
The table omits the first `EqualElems` input offsets where the input offset
104-
equals output offset.
102+
For hot fragments, the table omits the first `EqualElems` input offsets
103+
where the input offset equals output offset.
105104

106105
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
107106
(branch or call instruction). If not set, it signifies a control flow target

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,9 @@ class BoltAddressTranslation {
149149
/// entries in function address translation map.
150150
APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems);
151151

152-
/// Calculate the number of equal offsets (output = input - skew) in the
153-
/// beginning of the function.
154-
size_t getNumEqualOffsets(const MapTy &Map, uint32_t Skew) const;
152+
/// Calculate the number of equal offsets (output = input) in the beginning
153+
/// of the function.
154+
size_t getNumEqualOffsets(const MapTy &Map) const;
155155

156156
std::map<uint64_t, MapTy> Maps;
157157

bolt/include/bolt/Rewrite/RewriteInstance.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,9 @@ class RewriteInstance {
494494
/// Store all non-zero symbols in this map for a quick address lookup.
495495
std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
496496

497+
/// FILE symbols used for disambiguating split function parents.
498+
std::vector<ELFSymbolRef> FileSymbols;
499+
497500
std::unique_ptr<DWARFRewriter> DebugInfoRewriter;
498501

499502
std::unique_ptr<BoltAddressTranslation> BAT;

bolt/include/bolt/Utils/NameResolver.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,23 @@ class NameResolver {
2828
static constexpr char Sep = '/';
2929

3030
public:
31-
/// Return unique version of the \p Name in the form "Name<Sep><Number>".
31+
/// Return the number of uniquified versions of a given \p Name.
32+
uint64_t getUniquifiedNameCount(StringRef Name) const {
33+
if (Counters.contains(Name))
34+
return Counters.at(Name);
35+
return 0;
36+
}
37+
38+
/// Return unique version of the \p Name in the form "Name<Sep><ID>".
39+
std::string getUniqueName(StringRef Name, const uint64_t ID) const {
40+
return (Name + Twine(Sep) + Twine(ID)).str();
41+
}
42+
43+
/// Register new version of \p Name and return unique version in the form
44+
/// "Name<Sep><Number>".
3245
std::string uniquify(StringRef Name) {
3346
const uint64_t ID = ++Counters[Name];
34-
return (Name + Twine(Sep) + Twine(ID)).str();
47+
return getUniqueName(Name, ID);
3548
}
3649

3750
/// For uniquified \p Name, return the original form (that may no longer be

bolt/lib/Profile/BoltAddressTranslation.cpp

Lines changed: 42 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -153,13 +153,12 @@ APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
153153
return BitMask;
154154
}
155155

156-
size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map,
157-
uint32_t Skew) const {
156+
size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map) const {
158157
size_t EqualOffsets = 0;
159158
for (const std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
160159
const uint32_t OutputOffset = KeyVal.first;
161160
const uint32_t InputOffset = KeyVal.second >> 1;
162-
if (OutputOffset == InputOffset - Skew)
161+
if (OutputOffset == InputOffset)
163162
++EqualOffsets;
164163
else
165164
break;
@@ -197,17 +196,12 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
197196
SecondaryEntryPointsMap.count(Address)
198197
? SecondaryEntryPointsMap[Address].size()
199198
: 0;
200-
uint32_t Skew = 0;
201199
if (Cold) {
202200
auto HotEntryIt = Maps.find(ColdPartSource[Address]);
203201
assert(HotEntryIt != Maps.end());
204202
size_t HotIndex = std::distance(Maps.begin(), HotEntryIt);
205203
encodeULEB128(HotIndex - PrevIndex, OS);
206204
PrevIndex = HotIndex;
207-
// Skew of all input offsets for cold fragments is simply the first input
208-
// offset.
209-
Skew = Map.begin()->second >> 1;
210-
encodeULEB128(Skew, OS);
211205
} else {
212206
// Function hash
213207
size_t BFHash = getBFHash(HotInputAddress);
@@ -223,21 +217,24 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
223217
<< '\n');
224218
}
225219
encodeULEB128(NumEntries, OS);
226-
// Encode the number of equal offsets (output = input - skew) in the
227-
// beginning of the function. Only encode one offset in these cases.
228-
const size_t EqualElems = getNumEqualOffsets(Map, Skew);
229-
encodeULEB128(EqualElems, OS);
230-
if (EqualElems) {
231-
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
232-
APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
233-
OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
234-
BranchEntriesBytes);
235-
LLVM_DEBUG({
236-
dbgs() << "BranchEntries: ";
237-
SmallString<8> BitMaskStr;
238-
BranchEntries.toString(BitMaskStr, 2, false);
239-
dbgs() << BitMaskStr << '\n';
240-
});
220+
// For hot fragments only: encode the number of equal offsets
221+
// (output = input) in the beginning of the function. Only encode one offset
222+
// in these cases.
223+
const size_t EqualElems = Cold ? 0 : getNumEqualOffsets(Map);
224+
if (!Cold) {
225+
encodeULEB128(EqualElems, OS);
226+
if (EqualElems) {
227+
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
228+
APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
229+
OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
230+
BranchEntriesBytes);
231+
LLVM_DEBUG({
232+
dbgs() << "BranchEntries: ";
233+
SmallString<8> BitMaskStr;
234+
BranchEntries.toString(BitMaskStr, 2, false);
235+
dbgs() << BitMaskStr << '\n';
236+
});
237+
}
241238
}
242239
const BBHashMapTy &BBHashMap = getBBHashMap(HotInputAddress);
243240
size_t Index = 0;
@@ -318,12 +315,10 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
318315
uint64_t HotAddress = Cold ? 0 : Address;
319316
PrevAddress = Address;
320317
uint32_t SecondaryEntryPoints = 0;
321-
uint64_t ColdInputSkew = 0;
322318
if (Cold) {
323319
HotIndex += DE.getULEB128(&Offset, &Err);
324320
HotAddress = HotFuncs[HotIndex];
325321
ColdPartSource.emplace(Address, HotAddress);
326-
ColdInputSkew = DE.getULEB128(&Offset, &Err);
327322
} else {
328323
HotFuncs.push_back(Address);
329324
// Function hash
@@ -344,25 +339,28 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
344339
getULEB128Size(SecondaryEntryPoints)));
345340
}
346341
const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
347-
// Equal offsets.
348-
const size_t EqualElems = DE.getULEB128(&Offset, &Err);
342+
// Equal offsets, hot fragments only.
343+
size_t EqualElems = 0;
349344
APInt BEBitMask;
350-
LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n", EqualElems,
351-
getULEB128Size(EqualElems)));
352-
if (EqualElems) {
353-
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
354-
BEBitMask = APInt(alignTo(EqualElems, 8), 0);
355-
LoadIntFromMemory(
356-
BEBitMask,
357-
reinterpret_cast<const uint8_t *>(
358-
DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
359-
BranchEntriesBytes);
360-
LLVM_DEBUG({
361-
dbgs() << "BEBitMask: ";
362-
SmallString<8> BitMaskStr;
363-
BEBitMask.toString(BitMaskStr, 2, false);
364-
dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
365-
});
345+
if (!Cold) {
346+
EqualElems = DE.getULEB128(&Offset, &Err);
347+
LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n",
348+
EqualElems, getULEB128Size(EqualElems)));
349+
if (EqualElems) {
350+
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
351+
BEBitMask = APInt(alignTo(EqualElems, 8), 0);
352+
LoadIntFromMemory(
353+
BEBitMask,
354+
reinterpret_cast<const uint8_t *>(
355+
DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
356+
BranchEntriesBytes);
357+
LLVM_DEBUG({
358+
dbgs() << "BEBitMask: ";
359+
SmallString<8> BitMaskStr;
360+
BEBitMask.toString(BitMaskStr, 2, false);
361+
dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
362+
});
363+
}
366364
}
367365
MapTy Map;
368366

@@ -377,7 +375,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
377375
PrevAddress = OutputAddress;
378376
int64_t InputDelta = 0;
379377
if (J < EqualElems) {
380-
InputOffset = ((OutputOffset + ColdInputSkew) << 1) | BEBitMask[J];
378+
InputOffset = (OutputOffset << 1) | BEBitMask[J];
381379
} else {
382380
InputDelta = DE.getSLEB128(&Offset, &Err);
383381
InputOffset += InputDelta;

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 118 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,7 @@ void RewriteInstance::discoverFileObjects() {
840840
continue;
841841

842842
if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
843+
FileSymbols.emplace_back(Symbol);
843844
StringRef Name =
844845
cantFail(std::move(NameOrError), "cannot get symbol name for file");
845846
// Ignore Clang LTO artificial FILE symbol as it is not always generated,
@@ -1340,6 +1341,7 @@ void RewriteInstance::discoverFileObjects() {
13401341
}
13411342

13421343
registerFragments();
1344+
FileSymbols.clear();
13431345
}
13441346

13451347
Error RewriteInstance::discoverRtFiniAddress() {
@@ -1417,50 +1419,139 @@ void RewriteInstance::registerFragments() {
14171419
if (!BC->HasSplitFunctions)
14181420
return;
14191421

1422+
// Process fragments with ambiguous parents separately as they are typically a
1423+
// vanishing minority of cases and require expensive symbol table lookups.
1424+
std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments;
14201425
for (auto &BFI : BC->getBinaryFunctions()) {
14211426
BinaryFunction &Function = BFI.second;
14221427
if (!Function.isFragment())
14231428
continue;
1424-
unsigned ParentsFound = 0;
14251429
for (StringRef Name : Function.getNames()) {
1426-
StringRef BaseName, Suffix;
1427-
std::tie(BaseName, Suffix) = Name.split('/');
1430+
StringRef BaseName = NR.restore(Name);
1431+
const bool IsGlobal = BaseName == Name;
14281432
const size_t ColdSuffixPos = BaseName.find(".cold");
14291433
if (ColdSuffixPos == StringRef::npos)
14301434
continue;
1431-
// For cold function with local (foo.cold/1) symbol, prefer a parent with
1432-
// local symbol as well (foo/1) over global symbol (foo).
1433-
std::string ParentName = BaseName.substr(0, ColdSuffixPos).str();
1435+
StringRef ParentName = BaseName.substr(0, ColdSuffixPos);
14341436
const BinaryData *BD = BC->getBinaryDataByName(ParentName);
1435-
if (Suffix != "") {
1436-
ParentName.append(Twine("/", Suffix).str());
1437-
const BinaryData *BDLocal = BC->getBinaryDataByName(ParentName);
1438-
if (BDLocal || !BD)
1439-
BD = BDLocal;
1440-
}
1441-
if (!BD) {
1442-
if (opts::Verbosity >= 1)
1443-
BC->outs() << "BOLT-INFO: parent function not found for " << Name
1444-
<< "\n";
1437+
const uint64_t NumPossibleLocalParents =
1438+
NR.getUniquifiedNameCount(ParentName);
1439+
// The most common case: single local parent fragment.
1440+
if (!BD && NumPossibleLocalParents == 1) {
1441+
BD = BC->getBinaryDataByName(NR.getUniqueName(ParentName, 1));
1442+
} else if (BD && (!NumPossibleLocalParents || IsGlobal)) {
1443+
// Global parent and either no local candidates (second most common), or
1444+
// the fragment is global as well (uncommon).
1445+
} else {
1446+
// Any other case: need to disambiguate using FILE symbols.
1447+
AmbiguousFragments.emplace_back(ParentName, &Function);
14451448
continue;
14461449
}
1447-
const uint64_t Address = BD->getAddress();
1448-
BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
1449-
if (!BF) {
1450-
if (opts::Verbosity >= 1)
1451-
BC->outs() << formatv(
1452-
"BOLT-INFO: parent function not found at {0:x}\n", Address);
1453-
continue;
1450+
if (BD) {
1451+
BinaryFunction *BF = BC->getFunctionForSymbol(BD->getSymbol());
1452+
if (BF) {
1453+
BC->registerFragment(Function, *BF);
1454+
continue;
1455+
}
14541456
}
1455-
BC->registerFragment(Function, *BF);
1456-
++ParentsFound;
1457-
}
1458-
if (!ParentsFound) {
14591457
BC->errs() << "BOLT-ERROR: parent function not found for " << Function
14601458
<< '\n';
14611459
exit(1);
14621460
}
14631461
}
1462+
1463+
if (AmbiguousFragments.empty())
1464+
return;
1465+
1466+
if (!BC->hasSymbolsWithFileName()) {
1467+
BC->errs() << "BOLT-ERROR: input file has split functions but does not "
1468+
"have FILE symbols. If the binary was stripped, preserve "
1469+
"FILE symbols with --keep-file-symbols strip option";
1470+
exit(1);
1471+
}
1472+
1473+
// The first global symbol is identified by the symbol table sh_info value.
1474+
// Used as local symbol search stopping point.
1475+
auto *ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
1476+
const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
1477+
auto *SymTab = llvm::find_if(cantFail(Obj.sections()), [](const auto &Sec) {
1478+
return Sec.sh_type == ELF::SHT_SYMTAB;
1479+
});
1480+
assert(SymTab);
1481+
if (!SymTab->sh_info) {
1482+
BC->errs() << "BOLT-ERROR: malformed SYMTAB sh_info\n";
1483+
exit(1);
1484+
}
1485+
ELFSymbolRef FirstGlobal = ELF64LEFile->toSymbolRef(SymTab, SymTab->sh_info);
1486+
1487+
for (auto &[ParentName, BF] : AmbiguousFragments) {
1488+
const uint64_t Address = BF->getAddress();
1489+
1490+
// Get fragment's own symbol
1491+
const auto SymIt = FileSymRefs.find(Address);
1492+
if (SymIt == FileSymRefs.end()) {
1493+
BC->errs()
1494+
<< "BOLT-ERROR: symbol lookup failed for function at address 0x"
1495+
<< Twine::utohexstr(Address) << '\n';
1496+
exit(1);
1497+
}
1498+
1499+
// Find containing FILE symbol
1500+
ELFSymbolRef Symbol = SymIt->second;
1501+
auto FSI = llvm::upper_bound(FileSymbols, Symbol);
1502+
if (FSI == FileSymbols.begin()) {
1503+
BC->errs() << "BOLT-ERROR: owning FILE symbol not found for symbol "
1504+
<< cantFail(Symbol.getName()) << '\n';
1505+
exit(1);
1506+
}
1507+
1508+
ELFSymbolRef StopSymbol = FirstGlobal;
1509+
if (FSI != FileSymbols.end())
1510+
StopSymbol = *FSI;
1511+
1512+
uint64_t ParentAddress{0};
1513+
1514+
// BOLT split fragment symbols are emitted just before the main function
1515+
// symbol.
1516+
for (ELFSymbolRef NextSymbol = Symbol; NextSymbol < StopSymbol;
1517+
NextSymbol.moveNext()) {
1518+
Expected<StringRef> NameOrError = Symbol.getName();
1519+
if (!NameOrError)
1520+
break;
1521+
StringRef Name = *NameOrError;
1522+
if (Name == ParentName) {
1523+
ParentAddress = cantFail(NextSymbol.getValue());
1524+
goto registerParent;
1525+
}
1526+
if (Name.starts_with(ParentName))
1527+
// With multi-way splitting, there are multiple fragments with different
1528+
// suffixes. Parent follows the last fragment.
1529+
continue;
1530+
break;
1531+
}
1532+
1533+
// Iterate over local file symbols and check symbol names to match parent.
1534+
for (ELFSymbolRef Symbol(FSI[-1]); Symbol < StopSymbol; Symbol.moveNext()) {
1535+
if (cantFail(Symbol.getName()) == ParentName) {
1536+
ParentAddress = cantFail(Symbol.getAddress());
1537+
break;
1538+
}
1539+
}
1540+
1541+
registerParent:
1542+
// No local parent is found, use global parent function.
1543+
if (!ParentAddress)
1544+
if (BinaryData *ParentBD = BC->getBinaryDataByName(ParentName))
1545+
ParentAddress = ParentBD->getAddress();
1546+
1547+
if (BinaryFunction *ParentBF =
1548+
BC->getBinaryFunctionAtAddress(ParentAddress)) {
1549+
BC->registerFragment(*BF, *ParentBF);
1550+
continue;
1551+
}
1552+
BC->errs() << "BOLT-ERROR: parent function not found for " << *BF << '\n';
1553+
exit(1);
1554+
}
14641555
}
14651556

14661557
void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,

0 commit comments

Comments
 (0)