Skip to content

Commit 80a629c

Browse files
committed
Merge remote-tracking branch 'origin/main' into vplan-early-exit-handle-early
2 parents 76c470a + 20d6def commit 80a629c

File tree

608 files changed

+25131
-5787
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

608 files changed

+25131
-5787
lines changed

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,11 @@ class BinaryFunction {
360360
/// True if the function is used for patching code at a fixed address.
361361
bool IsPatch{false};
362362

363+
/// True if the original entry point of the function may get called, but the
364+
/// original body cannot be executed and needs to be patched with code that
365+
/// redirects execution to the new function body.
366+
bool NeedsPatch{false};
367+
363368
/// True if the function should not have an associated symbol table entry.
364369
bool IsAnonymous{false};
365370

@@ -1372,6 +1377,9 @@ class BinaryFunction {
13721377
/// Return true if this function is used for patching existing code.
13731378
bool isPatch() const { return IsPatch; }
13741379

1380+
/// Return true if the function requires a patch.
1381+
bool needsPatch() const { return NeedsPatch; }
1382+
13751383
/// Return true if the function should not have associated symbol table entry.
13761384
bool isAnonymous() const { return IsAnonymous; }
13771385

@@ -1757,6 +1765,9 @@ class BinaryFunction {
17571765
IsPatch = V;
17581766
}
17591767

1768+
/// Mark the function for patching.
1769+
void setNeedsPatch(bool V) { NeedsPatch = V; }
1770+
17601771
/// Indicate if the function should have a name in the symbol table.
17611772
void setAnonymous(bool V) {
17621773
assert(isInjected() && "Only injected functions could be anonymous");

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ extern llvm::cl::opt<unsigned> AlignText;
3434
extern llvm::cl::opt<unsigned> AlignFunctions;
3535
extern llvm::cl::opt<bool> AggregateOnly;
3636
extern llvm::cl::opt<unsigned> BucketsPerLine;
37+
extern llvm::cl::opt<bool> CompactCodeModel;
3738
extern llvm::cl::opt<bool> DiffOnly;
3839
extern llvm::cl::opt<bool> EnableBAT;
3940
extern llvm::cl::opt<bool> EqualizeBBCounts;

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,8 +1797,6 @@ bool BinaryFunction::scanExternalRefs() {
17971797
// Create relocation for every fixup.
17981798
for (const MCFixup &Fixup : Fixups) {
17991799
std::optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
1800-
// Can be skipped in case of overlow during relocation value encoding.
1801-
Rel->setOptional();
18021800
if (!Rel) {
18031801
Success = false;
18041802
continue;
@@ -1814,6 +1812,17 @@ bool BinaryFunction::scanExternalRefs() {
18141812
Success = false;
18151813
continue;
18161814
}
1815+
1816+
if (BC.isAArch64()) {
1817+
// Allow the relocation to be skipped in case of the overflow during the
1818+
// relocation value encoding.
1819+
Rel->setOptional();
1820+
1821+
if (!opts::CompactCodeModel)
1822+
if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Rel->Symbol))
1823+
TargetBF->setNeedsPatch(true);
1824+
}
1825+
18171826
Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset;
18181827
FunctionRelocations.push_back(*Rel);
18191828
}

bolt/lib/Core/BinarySection.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -186,16 +186,6 @@ void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
186186
!Relocation::canEncodeValue(Reloc.Type, Value,
187187
SectionAddress + Reloc.Offset)) {
188188

189-
// A successful run of 'scanExternalRefs' means that all pending
190-
// relocations are flushed. Otherwise, PatchEntries should run.
191-
if (!opts::ForcePatch) {
192-
BC.errs()
193-
<< "BOLT-ERROR: cannot encode relocation for symbol "
194-
<< Reloc.Symbol->getName()
195-
<< " as it is out-of-range. To proceed must use -force-patch\n";
196-
exit(1);
197-
}
198-
199189
++SkippedPendingRelocations;
200190
continue;
201191
}

bolt/lib/Passes/LongJmp.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "bolt/Passes/LongJmp.h"
1414
#include "bolt/Core/ParallelUtilities.h"
15+
#include "bolt/Utils/CommandLineOpts.h"
1516
#include "llvm/Support/MathExtras.h"
1617

1718
#define DEBUG_TYPE "longjmp"
@@ -26,11 +27,6 @@ extern cl::opt<unsigned> AlignFunctions;
2627
extern cl::opt<bool> UseOldText;
2728
extern cl::opt<bool> HotFunctionsAtEnd;
2829

29-
static cl::opt<bool>
30-
CompactCodeModel("compact-code-model",
31-
cl::desc("generate code for binaries <128MB on AArch64"),
32-
cl::init(false), cl::cat(BoltCategory));
33-
3430
static cl::opt<bool> GroupStubs("group-stubs",
3531
cl::desc("share stubs across functions"),
3632
cl::init(true), cl::cat(BoltOptCategory));

bolt/lib/Passes/PatchEntries.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) {
3939
bool NeedsPatching = llvm::any_of(
4040
llvm::make_second_range(BC.getBinaryFunctions()),
4141
[&](BinaryFunction &BF) {
42-
return !BC.shouldEmit(BF) && !BF.hasExternalRefRelocations();
42+
return (!BC.shouldEmit(BF) && !BF.hasExternalRefRelocations()) ||
43+
BF.needsPatch();
4344
});
4445

4546
if (!NeedsPatching)
@@ -66,7 +67,7 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) {
6667

6768
// Check if we can skip patching the function.
6869
if (!opts::ForcePatch && !Function.hasEHRanges() &&
69-
Function.getSize() < PatchThreshold)
70+
!Function.needsPatch() && Function.getSize() < PatchThreshold)
7071
continue;
7172

7273
// List of patches for function entries. We either successfully patch

bolt/lib/Utils/CommandLineOpts.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ cl::opt<unsigned>
6161
cl::desc("number of entries per line (default 256)"),
6262
cl::init(256), cl::Optional, cl::cat(HeatmapCategory));
6363

64+
cl::opt<bool>
65+
CompactCodeModel("compact-code-model",
66+
cl::desc("generate code for binaries <128MB on AArch64"),
67+
cl::init(false), cl::cat(BoltCategory));
68+
6469
cl::opt<bool>
6570
DiffOnly("diff-only",
6671
cl::desc("stop processing once we have enough to compare two binaries"),

bolt/test/AArch64/lite-mode.s

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,34 @@
22
## non-optimized code.
33

44
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
5+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
6+
# RUN: --defsym COMPACT=1 %s -o %t.compact.o
57
# RUN: link_fdata %s %t.o %t.fdata
6-
# RUN: llvm-strip --strip-unneeded %t.o
8+
# RUN: llvm-strip --strip-unneeded %t*.o
79
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static
10+
# RUN: %clang %cflags %t.compact.o -o %t.compact.exe -Wl,-q -static
811
# RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --lite
12+
# RUN: llvm-bolt %t.compact.exe -o %t.compact.bolt --data %t.fdata --lite \
13+
# RUN: --compact-code-model
914
# RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.exe \
1015
# RUN: | FileCheck %s --check-prefix=CHECK-INPUT
1116
# RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.bolt \
1217
# RUN: | FileCheck %s
18+
# RUN: llvm-objdump -d --disassemble-symbols=_start.org.0 %t.bolt \
19+
# RUN: | FileCheck %s --check-prefix=CHECK-PATCH
20+
# RUN: llvm-objdump -d %t.compact.bolt \
21+
# RUN: | FileCheck %s --check-prefix=CHECK-COMPACT
1322

23+
## In compact mode, make sure we do not create an unnecessary patch thunk.
24+
# CHECK-COMPACT-NOT: <_start.org.0>
1425

1526
## Verify that the number of FDEs matches the number of functions in the output
1627
## binary. There are three original functions and two optimized.
28+
## NOTE: at the moment we are emitting extra FDEs for patched functions, thus
29+
## there is one more FDE for _start.
1730
# RUN: llvm-readelf -u %t.bolt | grep -wc FDE \
1831
# RUN: | FileCheck --check-prefix=CHECK-FDE %s
19-
# CHECK-FDE: 5
32+
# CHECK-FDE: 6
2033

2134
## In lite mode, optimized code will be separated from the original .text by
2235
## over 128MB, making it impossible for call/bl instructions in cold functions
@@ -28,15 +41,22 @@
2841
_start:
2942
# FDATA: 0 [unknown] 0 1 _start 0 0 100
3043
.cfi_startproc
44+
45+
## Check that the code at the original location is converted into a
46+
## veneer/thunk.
47+
# CHECK-PATCH-LABEL: <_start.org.0>
48+
# CHECK-PATCH-NEXT: adrp x16
49+
# CHECK-PATCH-NEXT: add x16, x16,
50+
# CHECK-PATCH-NEXT: br x16
3151
cmp x0, 1
3252
b.eq .L0
3353
bl cold_function
3454
.L0:
3555
ret x30
3656
.cfi_endproc
37-
.size _start, .-_start
57+
.size _start, .-_start
3858

39-
## Cold non-optimized function with a reference to a hot function (_start).
59+
## Cold non-optimized function with references to hot functions.
4060
# CHECK: Disassembly of section .bolt.org.text:
4161
# CHECK-LABEL: <cold_function>
4262
.globl cold_function
@@ -97,12 +117,26 @@ cold_function:
97117
# CHECK-NEXT: nop
98118
# CHECK-NEXT: ldr x5
99119

120+
## Since _start is relocated further than 128MB from the call site, we check
121+
## that the call is converted into a call to its original version. That original
122+
## version should contain a veneer/thunk code that we check separately.
123+
bl _start
124+
# CHECK-INPUT-NEXT: bl {{.*}} <_start>
125+
# CHECK-NEXT: bl {{.*}} <_start.org.0>
126+
127+
## Same as above, but the instruction is a tail call.
128+
b _start
129+
# CHECK-INPUT-NEXT: b {{.*}} <_start>
130+
# CHECK-NEXT: b {{.*}} <_start.org.0>
131+
100132
.cfi_endproc
101-
.size cold_function, .-cold_function
133+
.size cold_function, .-cold_function
102134

103-
## Reserve 1MB of space to make functions that follow unreachable by ADRs in
135+
.ifndef COMPACT
136+
## Reserve 128MB of space to make functions that follow unreachable by ADRs in
104137
## code that precedes this gap.
105-
.space 0x100000
138+
.space 0x8000000
139+
.endif
106140

107141
.globl far_func
108142
.type far_func, %function
@@ -111,5 +145,4 @@ far_func:
111145
.cfi_startproc
112146
ret x30
113147
.cfi_endproc
114-
.size far_func, .-far_func
115-
148+
.size far_func, .-far_func

bolt/unittests/Core/BinaryContext.cpp

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -162,36 +162,6 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) {
162162
<< "Wrong forward branch value\n";
163163
}
164164

165-
TEST_P(BinaryContextTester,
166-
FlushOptionalOutOfRangePendingRelocCALL26_ForcePatchOff) {
167-
if (GetParam() != Triple::aarch64)
168-
GTEST_SKIP();
169-
170-
// Tests that flushPendingRelocations exits if any pending relocation is out
171-
// of range and PatchEntries hasn't run. Pending relocations are added by
172-
// scanExternalRefs, so this ensures that either all scanExternalRefs
173-
// relocations were flushed or PatchEntries ran.
174-
175-
BinarySection &BS = BC->registerOrUpdateSection(
176-
".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
177-
// Create symbol 'Func0x4'
178-
MCSymbol *RelSymbol = BC->getOrCreateGlobalSymbol(4, "Func");
179-
ASSERT_TRUE(RelSymbol);
180-
Relocation Reloc{8, RelSymbol, ELF::R_AARCH64_CALL26, 0, 0};
181-
Reloc.setOptional();
182-
BS.addPendingRelocation(Reloc);
183-
184-
SmallVector<char> Vect;
185-
raw_svector_ostream OS(Vect);
186-
187-
// Resolve relocation symbol to a high value so encoding will be out of range.
188-
EXPECT_EXIT(BS.flushPendingRelocations(
189-
OS, [&](const MCSymbol *S) { return 0x800000F; }),
190-
::testing::ExitedWithCode(1),
191-
"BOLT-ERROR: cannot encode relocation for symbol Func0x4 as it is"
192-
" out-of-range. To proceed must use -force-patch");
193-
}
194-
195165
TEST_P(BinaryContextTester,
196166
FlushOptionalOutOfRangePendingRelocCALL26_ForcePatchOn) {
197167
if (GetParam() != Triple::aarch64)

clang/docs/ReleaseNotes.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,8 @@ Improvements to Clang's diagnostics
506506
behavior of the C99 feature as it was introduced into C++20. Fixes #GH47037
507507
- ``-Wreserved-identifier`` now fires on reserved parameter names in a function
508508
declaration which is not a definition.
509+
- Clang now prints the namespace for an attribute, if any,
510+
when emitting an unknown attribute diagnostic.
509511

510512
- Several compatibility diagnostics that were incorrectly being grouped under
511513
``-Wpre-c++20-compat`` are now part of ``-Wc++20-compat``. (#GH138775)
@@ -564,7 +566,7 @@ Bug Fixes in This Version
564566
- Fixed a bug where an attribute before a ``pragma clang attribute`` or
565567
``pragma clang __debug`` would cause an assertion. Instead, this now diagnoses
566568
the invalid attribute location appropriately. (#GH137861)
567-
- Fixed a crash when a malformed ``_Pragma`` directive appears as part of an
569+
- Fixed a crash when a malformed ``_Pragma`` directive appears as part of an
568570
``#include`` directive. (#GH138094)
569571
- Fixed a crash during constant evaluation involving invalid lambda captures
570572
(#GH138832)
@@ -673,7 +675,6 @@ Bug Fixes to C++ Support
673675
- Fixed an assertion when trying to constant-fold various builtins when the argument
674676
referred to a reference to an incomplete type. (#GH129397)
675677
- Fixed a crash when a cast involved a parenthesized aggregate initialization in dependent context. (#GH72880)
676-
- Fixed a crash when forming an invalid function type in a dependent context. (#GH138657) (#GH115725) (#GH68852)
677678
- No longer crashes when instantiating invalid variable template specialization
678679
whose type depends on itself. (#GH51347), (#GH55872)
679680
- Improved parser recovery of invalid requirement expressions. In turn, this
@@ -900,6 +901,8 @@ OpenMP Support
900901
- Added support 'no_openmp_constructs' assumption clause.
901902
- Added support for 'self_maps' in map and requirement clause.
902903
- Added support for 'omp stripe' directive.
904+
- Fixed a crashing bug with ``omp tile sizes`` if the argument to ``sizes`` was
905+
an invalid expression. (#GH139073)
903906

904907
Improvements
905908
^^^^^^^^^^^^

0 commit comments

Comments
 (0)