Skip to content

Commit 9000a38

Browse files
committed
[BOLT] Validate extra entry point candidate by querying data marker symbols
Look up marker symbols and decide whether candidate is really extra entry point in `adjustFunctionBoundaries()`.
1 parent 4e6c88b commit 9000a38

File tree

5 files changed

+63
-38
lines changed

5 files changed

+63
-38
lines changed

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,11 +1196,6 @@ class BinaryFunction {
11961196
return getSecondaryEntryPointSymbol(BB.getLabel());
11971197
}
11981198

1199-
/// Remove a label from the secondary entry point map.
1200-
void removeSymbolFromSecondaryEntryPointMap(const MCSymbol *Label) {
1201-
SecondaryEntryPoints.erase(Label);
1202-
}
1203-
12041199
/// Return true if the basic block is an entry point into the function
12051200
/// (either primary or secondary).
12061201
bool isEntryPoint(const BinaryBasicBlock &BB) const {

bolt/include/bolt/Rewrite/RewriteInstance.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ class RewriteInstance {
241241

242242
/// Adjust function sizes and set proper maximum size values after the whole
243243
/// symbol table has been processed.
244-
void adjustFunctionBoundaries();
244+
void adjustFunctionBoundaries(DenseMap<uint64_t, MarkerSymType> &MarkerSyms);
245245

246246
/// Make .eh_frame section relocatable.
247247
void relocateEHFrameSection();

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1915,13 +1915,9 @@ void BinaryFunction::postProcessEntryPoints() {
19151915
continue;
19161916

19171917
// If we have grabbed a wrong code label which actually points to some
1918-
// constant island inside the function, ignore this label and remove it
1919-
// from the secondary entry point map.
1920-
if (isStartOfConstantIsland(Offset)) {
1921-
BC.SymbolToFunctionMap.erase(Label);
1922-
removeSymbolFromSecondaryEntryPointMap(Label);
1918+
// constant island inside the function, ignore this label.
1919+
if (isStartOfConstantIsland(Offset))
19231920
continue;
1924-
}
19251921

19261922
BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
19271923
"detected in function "

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -881,14 +881,9 @@ void RewriteInstance::discoverFileObjects() {
881881
// code section (see IHI0056B). $d identifies data contents.
882882
// Compilers usually merge multiple data objects in a single $d-$x interval,
883883
// but we need every data object to be marked with $d. Because of that we
884-
// create a vector of MarkerSyms with all locations of data objects.
884+
// keep track of marker symbols with all locations of data objects.
885885

886-
struct MarkerSym {
887-
uint64_t Address;
888-
MarkerSymType Type;
889-
};
890-
891-
std::vector<MarkerSym> SortedMarkerSymbols;
886+
DenseMap<uint64_t, MarkerSymType> MarkerSymbols;
892887
auto addExtraDataMarkerPerSymbol = [&]() {
893888
bool IsData = false;
894889
uint64_t LastAddr = 0;
@@ -912,14 +907,14 @@ void RewriteInstance::discoverFileObjects() {
912907
}
913908

914909
if (MarkerType != MarkerSymType::NONE) {
915-
SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType});
910+
MarkerSymbols[SymInfo.Address] = MarkerType;
916911
LastAddr = SymInfo.Address;
917912
IsData = MarkerType == MarkerSymType::DATA;
918913
continue;
919914
}
920915

921916
if (IsData) {
922-
SortedMarkerSymbols.push_back({SymInfo.Address, MarkerSymType::DATA});
917+
MarkerSymbols[SymInfo.Address] = MarkerSymType::DATA;
923918
LastAddr = SymInfo.Address;
924919
}
925920
}
@@ -1284,27 +1279,24 @@ void RewriteInstance::discoverFileObjects() {
12841279
BC->setHasSymbolsWithFileName(FileSymbols.size());
12851280

12861281
// Now that all the functions were created - adjust their boundaries.
1287-
adjustFunctionBoundaries();
1282+
adjustFunctionBoundaries(MarkerSymbols);
12881283

12891284
// Annotate functions with code/data markers in AArch64
1290-
for (auto ISym = SortedMarkerSymbols.begin();
1291-
ISym != SortedMarkerSymbols.end(); ++ISym) {
1292-
1293-
auto *BF =
1294-
BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
1285+
for (auto &[Address, Type] : MarkerSymbols) {
1286+
auto *BF = BC->getBinaryFunctionContainingAddress(Address, true, true);
12951287

12961288
if (!BF) {
12971289
// Stray marker
12981290
continue;
12991291
}
1300-
const auto EntryOffset = ISym->Address - BF->getAddress();
1301-
if (ISym->Type == MarkerSymType::CODE) {
1292+
const auto EntryOffset = Address - BF->getAddress();
1293+
if (Type == MarkerSymType::CODE) {
13021294
BF->markCodeAtOffset(EntryOffset);
13031295
continue;
13041296
}
1305-
if (ISym->Type == MarkerSymType::DATA) {
1297+
if (Type == MarkerSymType::DATA) {
13061298
BF->markDataAtOffset(EntryOffset);
1307-
BC->AddressToConstantIslandMap[ISym->Address] = BF;
1299+
BC->AddressToConstantIslandMap[Address] = BF;
13081300
continue;
13091301
}
13101302
llvm_unreachable("Unknown marker");
@@ -1833,7 +1825,8 @@ void RewriteInstance::disassemblePLT() {
18331825
}
18341826
}
18351827

1836-
void RewriteInstance::adjustFunctionBoundaries() {
1828+
void RewriteInstance::adjustFunctionBoundaries(
1829+
DenseMap<uint64_t, MarkerSymType> &MarkerSyms) {
18371830
for (auto BFI = BC->getBinaryFunctions().begin(),
18381831
BFE = BC->getBinaryFunctions().end();
18391832
BFI != BFE; ++BFI) {
@@ -1871,12 +1864,15 @@ void RewriteInstance::adjustFunctionBoundaries() {
18711864
continue;
18721865
}
18731866

1874-
// This is potentially another entry point into the function.
1875-
uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
1876-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
1877-
<< Function << " at offset 0x"
1878-
<< Twine::utohexstr(EntryOffset) << '\n');
1879-
Function.addEntryPointAtOffset(EntryOffset);
1867+
auto It = MarkerSyms.find(NextSymRefI->first);
1868+
if (It == MarkerSyms.end() || It->second != MarkerSymType::DATA) {
1869+
// This is potentially another entry point into the function.
1870+
uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
1871+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
1872+
<< Function << " at offset 0x"
1873+
<< Twine::utohexstr(EntryOffset) << '\n');
1874+
Function.addEntryPointAtOffset(EntryOffset);
1875+
}
18801876

18811877
++NextSymRefI;
18821878
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# This test is to ensure that we query data marker symbols to avoid
2+
# misidentifying constant data island symbol as extra entry point.
3+
4+
# RUN: %clang %cflags %s -o %t.so -Wl,-q -Wl,--init=_bar -Wl,--fini=_bar
5+
# RUN: llvm-bolt %t.so -o %t.instr.so
6+
7+
.text
8+
.global _start
9+
.type _start, %function
10+
_start:
11+
ret
12+
13+
.text
14+
.global _foo
15+
.type _foo, %function
16+
_foo:
17+
cbz x1, _foo_2
18+
_foo_1:
19+
add x1, x2, x0
20+
b _foo
21+
_foo_2:
22+
ret
23+
24+
# None of these constant island symbols should be identified as extra entry
25+
# point for function `_foo'.
26+
.align 4
27+
_const1: .short 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80
28+
_const2: .short 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0
29+
_const3: .short 0x04, 0x08, 0x0c, 0x20, 0x60, 0x80, 0xa0, 0xc0
30+
31+
.text
32+
.global _bar
33+
.type _bar, %function
34+
_bar:
35+
ret
36+
37+
# Dummy relocation to force relocation mode
38+
.reloc 0, R_AARCH64_NONE

0 commit comments

Comments
 (0)