Skip to content

Commit 1a6199d

Browse files
committed
[BOLT][AArch64] Patch functions targeted by optional relocs
On AArch64, we create optional/weak relocations that may not be processed due to the relocated value overflow. When the overflow happens, we used to enforce patching for all functions in the binary via --force-patch option. This PR relaxes the requirement, and enforces patching only for functions that are target of optional relocations. Moreover, if the compact code model is used, the relocation overflow is guaranteed not to happen and the patching will be skipped.
1 parent dbe070e commit 1a6199d

File tree

9 files changed

+74
-57
lines changed

9 files changed

+74
-57
lines changed

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,11 @@ class BinaryFunction {
360360
/// True if the function is used for patching code at a fixed address.
361361
bool IsPatch{false};
362362

363+
/// True if the original entry point of the function may get called, but the
364+
/// original body cannot be executed and needs to be patched with code that
365+
/// redirects execution to the new function body.
366+
bool NeedsPatch{false};
367+
363368
/// True if the function should not have an associated symbol table entry.
364369
bool IsAnonymous{false};
365370

@@ -1372,9 +1377,17 @@ class BinaryFunction {
13721377
/// Return true if this function is used for patching existing code.
13731378
bool isPatch() const { return IsPatch; }
13741379

1380+
/// Return true if the function requires a patch.
1381+
bool needsPatch() const { return NeedsPatch; }
1382+
13751383
/// Return true if the function should not have associated symbol table entry.
13761384
bool isAnonymous() const { return IsAnonymous; }
13771385

1386+
/// Return true if we can allow the execution of the original body of the
1387+
/// function and its rewritten copy. This means, e.g., that metadata
1388+
/// associated with the function can be duplicated/cloned.
1389+
bool canClone() const;
1390+
13781391
/// If this function was folded, return the function it was folded into.
13791392
BinaryFunction *getFoldedIntoFunction() const { return FoldedIntoFunction; }
13801393

@@ -1757,6 +1770,9 @@ class BinaryFunction {
17571770
IsPatch = V;
17581771
}
17591772

1773+
/// Mark the function for patching.
1774+
void setNeedsPatch(bool V) { NeedsPatch = V; }
1775+
17601776
/// Indicate if the function should have a name in the symbol table.
17611777
void setAnonymous(bool V) {
17621778
assert(isInjected() && "Only injected functions could be anonymous");

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ extern llvm::cl::opt<unsigned> AlignText;
3434
extern llvm::cl::opt<unsigned> AlignFunctions;
3535
extern llvm::cl::opt<bool> AggregateOnly;
3636
extern llvm::cl::opt<unsigned> BucketsPerLine;
37+
extern llvm::cl::opt<bool> CompactCodeModel;
3738
extern llvm::cl::opt<bool> DiffOnly;
3839
extern llvm::cl::opt<bool> EnableBAT;
3940
extern llvm::cl::opt<bool> EqualizeBBCounts;

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,8 +1797,6 @@ bool BinaryFunction::scanExternalRefs() {
17971797
// Create relocation for every fixup.
17981798
for (const MCFixup &Fixup : Fixups) {
17991799
std::optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
1800-
// Can be skipped in case of overlow during relocation value encoding.
1801-
Rel->setOptional();
18021800
if (!Rel) {
18031801
Success = false;
18041802
continue;
@@ -1814,6 +1812,17 @@ bool BinaryFunction::scanExternalRefs() {
18141812
Success = false;
18151813
continue;
18161814
}
1815+
1816+
if (BC.isAArch64()) {
1817+
// Allow the relocation to be skipped in case of the overflow during the
1818+
// relocation value encoding.
1819+
Rel->setOptional();
1820+
1821+
if (!opts::CompactCodeModel)
1822+
if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Rel->Symbol))
1823+
TargetBF->setNeedsPatch(true);
1824+
}
1825+
18171826
Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset;
18181827
FunctionRelocations.push_back(*Rel);
18191828
}
@@ -3744,6 +3753,14 @@ void BinaryFunction::postProcessBranches() {
37443753
assert(validateCFG() && "invalid CFG");
37453754
}
37463755

3756+
bool BinaryFunction::canClone() const {
3757+
if (opts::Instrument)
3758+
return false;
3759+
3760+
// Check for the presence of metadata that cannot be duplicated.
3761+
return !hasEHRanges() && !hasSDTMarker() && !hasPseudoProbe() && !hasORC();
3762+
}
3763+
37473764
MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
37483765
assert(Offset && "cannot add primary entry point");
37493766
assert(CurrentState == State::Empty || CurrentState == State::Disassembled);

bolt/lib/Core/BinarySection.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -186,16 +186,6 @@ void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
186186
!Relocation::canEncodeValue(Reloc.Type, Value,
187187
SectionAddress + Reloc.Offset)) {
188188

189-
// A successful run of 'scanExternalRefs' means that all pending
190-
// relocations are flushed. Otherwise, PatchEntries should run.
191-
if (!opts::ForcePatch) {
192-
BC.errs()
193-
<< "BOLT-ERROR: cannot encode relocation for symbol "
194-
<< Reloc.Symbol->getName()
195-
<< " as it is out-of-range. To proceed must use -force-patch\n";
196-
exit(1);
197-
}
198-
199189
++SkippedPendingRelocations;
200190
continue;
201191
}

bolt/lib/Passes/LongJmp.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "bolt/Passes/LongJmp.h"
1414
#include "bolt/Core/ParallelUtilities.h"
15+
#include "bolt/Utils/CommandLineOpts.h"
1516
#include "llvm/Support/MathExtras.h"
1617

1718
#define DEBUG_TYPE "longjmp"
@@ -26,11 +27,6 @@ extern cl::opt<unsigned> AlignFunctions;
2627
extern cl::opt<bool> UseOldText;
2728
extern cl::opt<bool> HotFunctionsAtEnd;
2829

29-
static cl::opt<bool>
30-
CompactCodeModel("compact-code-model",
31-
cl::desc("generate code for binaries <128MB on AArch64"),
32-
cl::init(false), cl::cat(BoltCategory));
33-
3430
static cl::opt<bool> GroupStubs("group-stubs",
3531
cl::desc("share stubs across functions"),
3632
cl::init(true), cl::cat(BoltOptCategory));

bolt/lib/Passes/PatchEntries.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) {
3939
bool NeedsPatching = llvm::any_of(
4040
llvm::make_second_range(BC.getBinaryFunctions()),
4141
[&](BinaryFunction &BF) {
42-
return !BC.shouldEmit(BF) && !BF.hasExternalRefRelocations();
42+
return (!BC.shouldEmit(BF) && !BF.hasExternalRefRelocations()) ||
43+
BF.needsPatch();
4344
});
4445

4546
if (!NeedsPatching)
@@ -65,7 +66,7 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) {
6566
continue;
6667

6768
// Check if we can skip patching the function.
68-
if (!opts::ForcePatch && !Function.hasEHRanges() &&
69+
if (!opts::ForcePatch && !Function.needsPatch() && Function.canClone() &&
6970
Function.getSize() < PatchThreshold)
7071
continue;
7172

bolt/lib/Utils/CommandLineOpts.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ cl::opt<unsigned>
6161
cl::desc("number of entries per line (default 256)"),
6262
cl::init(256), cl::Optional, cl::cat(HeatmapCategory));
6363

64+
cl::opt<bool>
65+
CompactCodeModel("compact-code-model",
66+
cl::desc("generate code for binaries <128MB on AArch64"),
67+
cl::init(false), cl::cat(BoltCategory));
68+
6469
cl::opt<bool>
6570
DiffOnly("diff-only",
6671
cl::desc("stop processing once we have enough to compare two binaries"),

bolt/test/AArch64/lite-mode.s

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,17 @@
1010
# RUN: | FileCheck %s --check-prefix=CHECK-INPUT
1111
# RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.bolt \
1212
# RUN: | FileCheck %s
13+
# RUN: llvm-objdump -d --disassemble-symbols=_start.org.0 %t.bolt \
14+
# RUN: | FileCheck %s --check-prefix=CHECK-PATCH
1315

1416

1517
## Verify that the number of FDEs matches the number of functions in the output
1618
## binary. There are three original functions and two optimized.
19+
## NOTE: at the moment we are emitting extra FDEs for patched functions, thus
20+
## there is one more FDE for _start.
1721
# RUN: llvm-readelf -u %t.bolt | grep -wc FDE \
1822
# RUN: | FileCheck --check-prefix=CHECK-FDE %s
19-
# CHECK-FDE: 5
23+
# CHECK-FDE: 6
2024

2125
## In lite mode, optimized code will be separated from the original .text by
2226
## over 128MB, making it impossible for call/bl instructions in cold functions
@@ -28,15 +32,21 @@
2832
_start:
2933
# FDATA: 0 [unknown] 0 1 _start 0 0 100
3034
.cfi_startproc
35+
36+
## Check that the code at the orignal location is converted into a veneer/thunk.
37+
# CHECK-PATCH-LABEL: <_start.org.0>
38+
# CHECK-PATCH-NEXT: adrp x16
39+
# CHECK-PATCH-NEXT: add x16, x16,
40+
# CHECK-PATCH-NEXT: br x16
3141
cmp x0, 1
3242
b.eq .L0
3343
bl cold_function
3444
.L0:
3545
ret x30
3646
.cfi_endproc
37-
.size _start, .-_start
47+
.size _start, .-_start
3848

39-
## Cold non-optimized function with a reference to a hot function (_start).
49+
## Cold non-optimized function with references to hot functions.
4050
# CHECK: Disassembly of section .bolt.org.text:
4151
# CHECK-LABEL: <cold_function>
4252
.globl cold_function
@@ -97,12 +107,24 @@ cold_function:
97107
# CHECK-NEXT: nop
98108
# CHECK-NEXT: ldr x5
99109

110+
## Since _start is relocated further than 128MB from the call site, we check
111+
## that the call is converted into a call to its original version. That original
112+
## version should contain a veneer/thunk code that we check separately.
113+
bl _start
114+
# CHECK-INPUT-NEXT: bl {{.*}} <_start>
115+
# CHECK-NEXT: bl {{.*}} <_start.org.0>
116+
117+
## Same as above, but the instruction is a tail call.
118+
b _start
119+
# CHECK-INPUT-NEXT: b {{.*}} <_start>
120+
# CHECK-NEXT: b {{.*}} <_start.org.0>
121+
100122
.cfi_endproc
101-
.size cold_function, .-cold_function
123+
.size cold_function, .-cold_function
102124

103-
## Reserve 1MB of space to make functions that follow unreachable by ADRs in
125+
## Reserve 128MB of space to make functions that follow unreachable by ADRs in
104126
## code that precedes this gap.
105-
.space 0x100000
127+
.space 0x8000000
106128

107129
.globl far_func
108130
.type far_func, %function
@@ -111,5 +133,4 @@ far_func:
111133
.cfi_startproc
112134
ret x30
113135
.cfi_endproc
114-
.size far_func, .-far_func
115-
136+
.size far_func, .-far_func

bolt/unittests/Core/BinaryContext.cpp

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -162,36 +162,6 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) {
162162
<< "Wrong forward branch value\n";
163163
}
164164

165-
TEST_P(BinaryContextTester,
166-
FlushOptionalOutOfRangePendingRelocCALL26_ForcePatchOff) {
167-
if (GetParam() != Triple::aarch64)
168-
GTEST_SKIP();
169-
170-
// Tests that flushPendingRelocations exits if any pending relocation is out
171-
// of range and PatchEntries hasn't run. Pending relocations are added by
172-
// scanExternalRefs, so this ensures that either all scanExternalRefs
173-
// relocations were flushed or PatchEntries ran.
174-
175-
BinarySection &BS = BC->registerOrUpdateSection(
176-
".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
177-
// Create symbol 'Func0x4'
178-
MCSymbol *RelSymbol = BC->getOrCreateGlobalSymbol(4, "Func");
179-
ASSERT_TRUE(RelSymbol);
180-
Relocation Reloc{8, RelSymbol, ELF::R_AARCH64_CALL26, 0, 0};
181-
Reloc.setOptional();
182-
BS.addPendingRelocation(Reloc);
183-
184-
SmallVector<char> Vect;
185-
raw_svector_ostream OS(Vect);
186-
187-
// Resolve relocation symbol to a high value so encoding will be out of range.
188-
EXPECT_EXIT(BS.flushPendingRelocations(
189-
OS, [&](const MCSymbol *S) { return 0x800000F; }),
190-
::testing::ExitedWithCode(1),
191-
"BOLT-ERROR: cannot encode relocation for symbol Func0x4 as it is"
192-
" out-of-range. To proceed must use -force-patch");
193-
}
194-
195165
TEST_P(BinaryContextTester,
196166
FlushOptionalOutOfRangePendingRelocCALL26_ForcePatchOn) {
197167
if (GetParam() != Triple::aarch64)

0 commit comments

Comments
 (0)