-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[BOLT] Validate extra entry point by querying data marker symbols #154611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Member
|
@llvm/pr-subscribers-bolt Author: YongKang Zhu (yozhu) ChangesLook up marker symbols and decide whether candidate is really extra entry Full diff: https://github.com/llvm/llvm-project/pull/154611.diff 5 Files Affected:
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index ae580520b9110..b59926cc75571 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -1196,11 +1196,6 @@ class BinaryFunction {
return getSecondaryEntryPointSymbol(BB.getLabel());
}
- /// Remove a label from the secondary entry point map.
- void removeSymbolFromSecondaryEntryPointMap(const MCSymbol *Label) {
- SecondaryEntryPoints.erase(Label);
- }
-
/// Return true if the basic block is an entry point into the function
/// (either primary or secondary).
bool isEntryPoint(const BinaryBasicBlock &BB) const {
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 91d62a78de390..19dcce8205ebc 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -241,7 +241,7 @@ class RewriteInstance {
/// Adjust function sizes and set proper maximum size values after the whole
/// symbol table has been processed.
- void adjustFunctionBoundaries();
+ void adjustFunctionBoundaries(DenseMap<uint64_t, MarkerSymType> &MarkerSyms);
/// Make .eh_frame section relocatable.
void relocateEHFrameSection();
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index eec68ff5a5fce..8f494f105fbba 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1915,13 +1915,9 @@ void BinaryFunction::postProcessEntryPoints() {
continue;
// If we have grabbed a wrong code label which actually points to some
- // constant island inside the function, ignore this label and remove it
- // from the secondary entry point map.
- if (isStartOfConstantIsland(Offset)) {
- BC.SymbolToFunctionMap.erase(Label);
- removeSymbolFromSecondaryEntryPointMap(Label);
+ // constant island inside the function, ignore this label.
+ if (isStartOfConstantIsland(Offset))
continue;
- }
BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
"detected in function "
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index b2056ba2380fb..79daa38142ae8 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -881,14 +881,9 @@ void RewriteInstance::discoverFileObjects() {
// code section (see IHI0056B). $d identifies data contents.
// Compilers usually merge multiple data objects in a single $d-$x interval,
// but we need every data object to be marked with $d. Because of that we
- // create a vector of MarkerSyms with all locations of data objects.
+ // keep track of marker symbols with all locations of data objects.
- struct MarkerSym {
- uint64_t Address;
- MarkerSymType Type;
- };
-
- std::vector<MarkerSym> SortedMarkerSymbols;
+ DenseMap<uint64_t, MarkerSymType> MarkerSymbols;
auto addExtraDataMarkerPerSymbol = [&]() {
bool IsData = false;
uint64_t LastAddr = 0;
@@ -912,14 +907,14 @@ void RewriteInstance::discoverFileObjects() {
}
if (MarkerType != MarkerSymType::NONE) {
- SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType});
+ MarkerSymbols[SymInfo.Address] = MarkerType;
LastAddr = SymInfo.Address;
IsData = MarkerType == MarkerSymType::DATA;
continue;
}
if (IsData) {
- SortedMarkerSymbols.push_back({SymInfo.Address, MarkerSymType::DATA});
+ MarkerSymbols[SymInfo.Address] = MarkerSymType::DATA;
LastAddr = SymInfo.Address;
}
}
@@ -1284,27 +1279,24 @@ void RewriteInstance::discoverFileObjects() {
BC->setHasSymbolsWithFileName(FileSymbols.size());
// Now that all the functions were created - adjust their boundaries.
- adjustFunctionBoundaries();
+ adjustFunctionBoundaries(MarkerSymbols);
// Annotate functions with code/data markers in AArch64
- for (auto ISym = SortedMarkerSymbols.begin();
- ISym != SortedMarkerSymbols.end(); ++ISym) {
-
- auto *BF =
- BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
+ for (auto &[Address, Type] : MarkerSymbols) {
+ auto *BF = BC->getBinaryFunctionContainingAddress(Address, true, true);
if (!BF) {
// Stray marker
continue;
}
- const auto EntryOffset = ISym->Address - BF->getAddress();
- if (ISym->Type == MarkerSymType::CODE) {
+ const auto EntryOffset = Address - BF->getAddress();
+ if (Type == MarkerSymType::CODE) {
BF->markCodeAtOffset(EntryOffset);
continue;
}
- if (ISym->Type == MarkerSymType::DATA) {
+ if (Type == MarkerSymType::DATA) {
BF->markDataAtOffset(EntryOffset);
- BC->AddressToConstantIslandMap[ISym->Address] = BF;
+ BC->AddressToConstantIslandMap[Address] = BF;
continue;
}
llvm_unreachable("Unknown marker");
@@ -1833,7 +1825,8 @@ void RewriteInstance::disassemblePLT() {
}
}
-void RewriteInstance::adjustFunctionBoundaries() {
+void RewriteInstance::adjustFunctionBoundaries(
+ DenseMap<uint64_t, MarkerSymType> &MarkerSyms) {
for (auto BFI = BC->getBinaryFunctions().begin(),
BFE = BC->getBinaryFunctions().end();
BFI != BFE; ++BFI) {
@@ -1871,12 +1864,15 @@ void RewriteInstance::adjustFunctionBoundaries() {
continue;
}
- // This is potentially another entry point into the function.
- uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
- LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
- << Function << " at offset 0x"
- << Twine::utohexstr(EntryOffset) << '\n');
- Function.addEntryPointAtOffset(EntryOffset);
+ auto It = MarkerSyms.find(NextSymRefI->first);
+ if (It == MarkerSyms.end() || It->second != MarkerSymType::DATA) {
+ // This is potentially another entry point into the function.
+ uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
+ << Function << " at offset 0x"
+ << Twine::utohexstr(EntryOffset) << '\n');
+ Function.addEntryPointAtOffset(EntryOffset);
+ }
++NextSymRefI;
}
diff --git a/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
new file mode 100644
index 0000000000000..9c77ecefa4b96
--- /dev/null
+++ b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
@@ -0,0 +1,38 @@
+# This test is to ensure that discovered data marker symbol would invalidate
+# extra entry point that was added earlier.
+
+# RUN: %clang %cflags %s -o %t.so -Wl,-q -Wl,--init=_bar -Wl,--fini=_bar
+# RUN: llvm-bolt %t.so -o %t.instr.so
+
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ ret
+
+ .text
+ .global _foo
+ .type _foo, %function
+_foo:
+ cbz x1, _foo_2
+_foo_1:
+ add x1, x2, x0
+ b _foo
+_foo_2:
+ ret
+
+# None of these constant island symbols should be identified as extra entry
+# point for function `_foo'.
+ .align 4
+_const1: .short 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80
+_const2: .short 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0
+_const3: .short 0x04, 0x08, 0x0c, 0x20, 0x60, 0x80, 0xa0, 0xc0
+
+ .text
+ .global _bar
+ .type _bar, %function
+_bar:
+ ret
+
+ # Dummy relocation to force relocation mode
+ .reloc 0, R_AARCH64_NONE
|
…ymbols Look up marker symbols and decide whether candidate is really extra entry point in `adjustFunctionBoundaries()`.
maksfb
approved these changes
Aug 20, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Look up marker symbols and decide whether candidate is really extra entry
point in
adjustFunctionBoundaries().