Skip to content

Commit f80c279

Browse files
committed
Merge branch 'main' into flangrt_gtest-independence
2 parents 7cd1a75 + 249883d commit f80c279

File tree

915 files changed

+29249
-10233
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

915 files changed

+29249
-10233
lines changed

bolt/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ $ perf2bolt -p perf.data -o perf.fdata <executable>
164164
This command will aggregate branch data from `perf.data` and store it in a
165165
format that is both more compact and more resilient to binary modifications.
166166

167-
If the profile was collected without brstacks, you will need to add `-nl` flag to
167+
If the profile was collected without brstacks, you will need to add `-ba` flag to
168168
the command line above.
169169

170170
### Step 3: Optimize with BOLT

bolt/docs/Heatmaps.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ $ perf record -e cycles:u -j any,u [-p PID|-a] -- sleep <interval>
2121
```
2222

2323
Running with brstack (`-j any,u` or `-b`) is recommended. Heatmaps can be generated
24-
from basic events by using the llvm-bolt-heatmap option `-nl` (no brstack) but
24+
from basic events by using the llvm-bolt-heatmap option `-ba` (basic events) but
2525
such heatmaps do not have the coverage provided by brstack and may only be useful
2626
for finding event hotspots at larger code block granularities.
2727

bolt/docs/index.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ This command will aggregate branch data from ``perf.data`` and store it
205205
in a format that is both more compact and more resilient to binary
206206
modifications.
207207

208-
If the profile was collected without LBRs, you will need to add ``-nl``
209-
flag to the command line above.
208+
If the profile was collected without brstacks, you will need to add `-ba` flag to
209+
the command line above.
210210

211211
Step 3: Optimize with BOLT
212212
~~~~~~~~~~~~~~~~~~~~~~~~~~

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2142,8 +2142,9 @@ class BinaryFunction {
21422142
}
21432143

21442144
/// Detects whether \p Address is inside a data region in this function
2145-
/// (constant islands).
2146-
bool isInConstantIsland(uint64_t Address) const {
2145+
/// (constant islands), and optionally return the island size starting
2146+
/// from the given \p Address.
2147+
bool isInConstantIsland(uint64_t Address, uint64_t *Size = nullptr) const {
21472148
if (!Islands)
21482149
return false;
21492150

@@ -2161,10 +2162,15 @@ class BinaryFunction {
21612162
DataIter = std::prev(DataIter);
21622163

21632164
auto CodeIter = Islands->CodeOffsets.upper_bound(Offset);
2164-
if (CodeIter == Islands->CodeOffsets.begin())
2165+
if (CodeIter == Islands->CodeOffsets.begin() ||
2166+
*std::prev(CodeIter) <= *DataIter) {
2167+
if (Size)
2168+
*Size = (CodeIter == Islands->CodeOffsets.end() ? getMaxSize()
2169+
: *CodeIter) -
2170+
Offset;
21652171
return true;
2166-
2167-
return *std::prev(CodeIter) <= *DataIter;
2172+
}
2173+
return false;
21682174
}
21692175

21702176
uint16_t getConstantIslandAlignment() const;

bolt/lib/Core/BinaryContext.cpp

Lines changed: 79 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ cl::opt<std::string> CompDirOverride(
7777
"location, which is used with DW_AT_dwo_name to construct a path "
7878
"to *.dwo files."),
7979
cl::Hidden, cl::init(""), cl::cat(BoltCategory));
80+
81+
static cl::opt<bool>
82+
FailOnInvalidPadding("fail-on-invalid-padding", cl::Hidden, cl::init(false),
83+
cl::desc("treat invalid code padding as error"),
84+
cl::ZeroOrMore, cl::cat(BoltCategory));
8085
} // namespace opts
8186

8287
namespace llvm {
@@ -942,8 +947,7 @@ std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
942947
}
943948

944949
bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
945-
// FIXME: aarch64 support is missing.
946-
if (!isX86())
950+
if (!isX86() && !isAArch64())
947951
return true;
948952

949953
if (BF.getSize() == BF.getMaxSize())
@@ -973,14 +977,26 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
973977
return Offset - StartOffset;
974978
};
975979

976-
// Skip a sequence of zero bytes.
980+
// Skip a sequence of zero bytes. For AArch64 we only skip 4 bytes of zeros
981+
// in case the following zeros belong to constant island or veneer.
977982
auto skipZeros = [&]() {
978983
const uint64_t StartOffset = Offset;
979-
for (; Offset < BF.getMaxSize(); ++Offset)
980-
if ((*FunctionData)[Offset] != 0)
984+
uint64_t CurrentOffset = Offset;
985+
for (; CurrentOffset < BF.getMaxSize() &&
986+
(!isAArch64() || CurrentOffset < StartOffset + 4);
987+
++CurrentOffset)
988+
if ((*FunctionData)[CurrentOffset] != 0)
981989
break;
982990

983-
return Offset - StartOffset;
991+
uint64_t NumZeros = CurrentOffset - StartOffset;
992+
if (isAArch64())
993+
NumZeros &= ~((uint64_t)0x3);
994+
995+
if (NumZeros == 0)
996+
return false;
997+
Offset += NumZeros;
998+
InstrAddress += NumZeros;
999+
return true;
9841000
};
9851001

9861002
// Accept the whole padding area filled with breakpoints.
@@ -993,6 +1009,8 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
9931009
// Some functions have a jump to the next function or to the padding area
9941010
// inserted after the body.
9951011
auto isSkipJump = [&](const MCInst &Instr) {
1012+
if (!isX86())
1013+
return false;
9961014
uint64_t TargetAddress = 0;
9971015
if (MIB->isUnconditionalBranch(Instr) &&
9981016
MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
@@ -1004,34 +1022,73 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
10041022
return false;
10051023
};
10061024

1025+
// For veneers that are not already covered by binary functions, only those
1026+
// that handleAArch64Veneer() can recognize are checked here.
1027+
auto skipAArch64Veneer = [&]() {
1028+
if (!isAArch64() || Offset >= BF.getMaxSize())
1029+
return false;
1030+
BinaryFunction *BFVeneer = getBinaryFunctionContainingAddress(InstrAddress);
1031+
if (BFVeneer) {
1032+
// A binary function may have been created to point to this veneer.
1033+
Offset += BFVeneer->getSize();
1034+
assert(Offset <= BF.getMaxSize() &&
1035+
"AArch64 veneeer goes past the max size of function");
1036+
InstrAddress += BFVeneer->getSize();
1037+
return true;
1038+
}
1039+
const uint64_t AArch64VeneerSize = 12;
1040+
if (Offset + AArch64VeneerSize <= BF.getMaxSize() &&
1041+
handleAArch64Veneer(InstrAddress, /*MatchOnly*/ true)) {
1042+
Offset += AArch64VeneerSize;
1043+
InstrAddress += AArch64VeneerSize;
1044+
this->errs() << "BOLT-WARNING: found unmarked AArch64 veneer at 0x"
1045+
<< Twine::utohexstr(BF.getAddress() + Offset) << '\n';
1046+
return true;
1047+
}
1048+
return false;
1049+
};
1050+
1051+
auto skipAArch64ConstantIsland = [&]() {
1052+
if (!isAArch64() || Offset >= BF.getMaxSize())
1053+
return false;
1054+
uint64_t Size;
1055+
if (BF.isInConstantIsland(InstrAddress, &Size)) {
1056+
Offset += Size;
1057+
InstrAddress += Size;
1058+
return true;
1059+
}
1060+
return false;
1061+
};
1062+
10071063
// Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1008-
while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
1064+
// For AArch64 also check veneers and skip constant islands.
1065+
while (skipAArch64Veneer() || skipAArch64ConstantIsland() ||
1066+
skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
10091067
skipZeros())
10101068
;
10111069

10121070
if (Offset == BF.getMaxSize())
10131071
return true;
10141072

1015-
if (opts::Verbosity >= 1) {
1016-
this->errs() << "BOLT-WARNING: bad padding at address 0x"
1017-
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
1018-
<< " starting at offset " << (Offset - BF.getSize())
1019-
<< " in function " << BF << '\n'
1020-
<< FunctionData->slice(BF.getSize(),
1021-
BF.getMaxSize() - BF.getSize())
1022-
<< '\n';
1023-
}
1024-
1073+
this->errs() << "BOLT-WARNING: bad padding at address 0x"
1074+
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
1075+
<< " starting at offset " << (Offset - BF.getSize())
1076+
<< " in function " << BF << '\n'
1077+
<< FunctionData->slice(BF.getSize(),
1078+
BF.getMaxSize() - BF.getSize())
1079+
<< '\n';
10251080
return false;
10261081
}
10271082

10281083
void BinaryContext::adjustCodePadding() {
1084+
uint64_t NumInvalid = 0;
10291085
for (auto &BFI : BinaryFunctions) {
10301086
BinaryFunction &BF = BFI.second;
10311087
if (!shouldEmit(BF))
10321088
continue;
10331089

10341090
if (!hasValidCodePadding(BF)) {
1091+
NumInvalid++;
10351092
if (HasRelocations) {
10361093
this->errs() << "BOLT-WARNING: function " << BF
10371094
<< " has invalid padding. Ignoring the function\n";
@@ -1041,6 +1098,11 @@ void BinaryContext::adjustCodePadding() {
10411098
}
10421099
}
10431100
}
1101+
if (NumInvalid && opts::FailOnInvalidPadding) {
1102+
this->errs() << "BOLT-ERROR: found " << NumInvalid
1103+
<< " instance(s) of invalid code padding\n";
1104+
exit(1);
1105+
}
10441106
}
10451107

10461108
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,10 +1427,9 @@ Error BinaryFunction::disassemble() {
14271427
!(BC.isAArch64() &&
14281428
BC.handleAArch64Veneer(TargetAddress, /*MatchOnly*/ true))) {
14291429
// Result of __builtin_unreachable().
1430-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
1431-
<< Twine::utohexstr(AbsoluteInstrAddr)
1432-
<< " in function " << *this
1433-
<< " : replacing with nop.\n");
1430+
errs() << "BOLT-WARNING: jump past end detected at 0x"
1431+
<< Twine::utohexstr(AbsoluteInstrAddr) << " in function "
1432+
<< *this << " : replacing with nop.\n";
14341433
BC.MIB->createNoop(Instruction);
14351434
if (IsCondBranch) {
14361435
// Register branch offset for profile validation.

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,23 @@ using namespace bolt;
4545
namespace opts {
4646

4747
static cl::opt<bool>
48-
BasicAggregation("nl",
49-
cl::desc("aggregate basic samples (without brstack info)"),
48+
BasicAggregation("basic-events",
49+
cl::desc("aggregate basic events (without brstack info)"),
5050
cl::cat(AggregatorCategory));
5151

52+
static cl::alias BasicAggregationAlias("ba",
53+
cl::desc("Alias for --basic-events"),
54+
cl::aliasopt(BasicAggregation));
55+
56+
static cl::opt<bool> DeprecatedBasicAggregationNl(
57+
"nl", cl::desc("Alias for --basic-events (deprecated. Use --ba)"),
58+
cl::cat(AggregatorCategory), cl::ReallyHidden,
59+
cl::callback([](const bool &Enabled) {
60+
errs()
61+
<< "BOLT-WARNING: '-nl' is deprecated, please use '--ba' instead.\n";
62+
BasicAggregation = Enabled;
63+
}));
64+
5265
cl::opt<bool> ArmSPE("spe", cl::desc("Enable Arm SPE mode."),
5366
cl::cat(AggregatorCategory));
5467

@@ -1433,7 +1446,7 @@ std::error_code DataAggregator::printLBRHeatMap() {
14331446
"Cannot build heatmap.";
14341447
} else {
14351448
errs() << "HEATMAP-ERROR: no brstack traces detected in profile. "
1436-
"Cannot build heatmap. Use -nl for building heatmap from "
1449+
"Cannot build heatmap. Use -ba for building heatmap from "
14371450
"basic events.\n";
14381451
}
14391452
exit(1);
@@ -1629,8 +1642,8 @@ std::error_code DataAggregator::parseBranchEvents() {
16291642
<< "PERF2BOLT-WARNING: all recorded samples for this binary lack "
16301643
"brstack. Record profile with perf record -j any or run "
16311644
"perf2bolt "
1632-
"in non-brstack mode with -nl (the performance improvement in "
1633-
"-nl "
1645+
"in non-brstack mode with -ba (the performance improvement in "
1646+
"-ba "
16341647
"mode may be limited)\n";
16351648
else
16361649
errs()

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
157157
MCPhysReg getStackPointer() const override { return AArch64::SP; }
158158
MCPhysReg getFramePointer() const override { return AArch64::FP; }
159159

160+
bool isBreakpoint(const MCInst &Inst) const override {
161+
return Inst.getOpcode() == AArch64::BRK;
162+
}
163+
160164
bool isPush(const MCInst &Inst) const override {
161165
return isStoreToStack(Inst);
162166
};
@@ -2153,7 +2157,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
21532157

21542158
--I;
21552159
Address -= 4;
2156-
if (I->getOpcode() != AArch64::ADRP ||
2160+
if (I != Begin || I->getOpcode() != AArch64::ADRP ||
21572161
MCPlus::getNumPrimeOperands(*I) < 2 || !I->getOperand(0).isReg() ||
21582162
!I->getOperand(1).isImm() || I->getOperand(0).getReg() != AArch64::X16)
21592163
return 0;

bolt/test/AArch64/Inputs/plt-gnu-ld.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ Sections:
9393
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
9494
Address: 0x400510
9595
AddressAlign: 0x8
96-
Content: 1D0080D21E0080D2E50300AAE10340F9E2230091E603009100000090002015910300009063201B910400009084201D91E0FFFF97EBFFFF972F0000148000009000F047F9400000B4E2FFFF17C0035FD6800000B000000191810000B0210001913F0000EBC00000540100009021B443F9610000B4F00301AA00021FD6C0035FD6800000B000000191810000B021000191210000CB22FC7FD3410C818BFF0781EB21FC4193C00000540200009042B843F9620000B4F00302AA00021FD6C0035FD6FD7BBEA9FD030091F30B00F9930000B06002413980000035DEFFFF972000805260020139F30B40F9FD7BC2A8C0035FD6E4FFFF17FF8300D1FD7B01A9FD430091BFC31FB8E1230091E8DD9752A8D5BB72E80300B9E80B00B9E0130091FF0700B9880000B00900009029C11291092500F9082540F9820080D200013FD6E90340B9E80740B90801096BA00000540100001428008052A8C31FB814000014880000B00900009029411391092900F9082940F9E0230091E1031F2A820080D200013FD6E80B40B9A80000340100001428008052A8C31FB8050000140000009000E01D9194FFFF9701000014A0C35FB8FD7B41A9FF830091C0035FD6FD7BBCA9FD030091F35301A99400009094023891F55B02A995000090B5E23791940215CBF603002AF76303A9F70301AAF80302AA5DFFFF97FF0F94EB6001005494FE4393130080D2A37A73F8E20318AA73060091E10317AAE003162A60003FD69F0213EB21FFFF54F35341A9F55B42A9F76343A9FD7BC4A8C0035FD61F2003D5C0035FD6
96+
Content: 1D0080D21E0080D2E50300AAE10340F9E2230091E603009100000090002015910300009063201B910400009084201D91E0FFFF97EBFFFF972F0000148000009000F047F9400000B4E2FFFF17C0035FD6800000B000000191810000B0210001913F0000EBC00000540100009021B443F9610000B4F00301AA00021FD6C0035FD6800000B000000191810000B021000191210000CB22FC7FD3410C818BFF0781EB21FC4193C00000540200009042B843F9620000B4F00302AA00021FD6C0035FD6FD7BBEA9FD030091F30B00F9930000B06002413980000035DEFFFF972000805260020139F30B40F9FD7BC2A8C0035FD6E4FFFF17FF8300D1FD7B01A9FD430091BFC31FB8E1230091E8DD9752A8D5BB72E80300B9E80B00B9E0130091FF0700B9880000B00900009029C11291092500F9082540F9820080D200013FD6E90340B9E80740B90801096BA00000540100001428008052A8C31FB814000014880000B00900009029411391092900F9082940F9E0230091E1031F2A820080D200013FD6E80B40B9A80000340100001428008052A8C31FB8050000140000009000E01D9194FFFF9701000014A0C35FB8FD7B41A9FF830091C0035FD6FD7BBCA9FD030091F35301A99400009094023891F55B02A995000090B5E23791940215CBF603002AF76303A9F70301AAF80302AA60003FD6FF0F94EB6001005494FE4393130080D2A37A73F8E20318AA73060091E10317AAE003162A60003FD69F0213EB21FFFF54F35341A9F55B42A9F76343A9FD7BC4A8C0035FD61F2003D5C0035FD6
9797
- Name: .rodata
9898
Type: SHT_PROGBITS
9999
Flags: [ SHF_ALLOC ]
@@ -435,6 +435,12 @@ Symbols:
435435
Binding: STB_GLOBAL
436436
Value: 0x400604
437437
Size: 0xC4
438+
- Name: post_main
439+
Type: STT_FUNC
440+
Section: .text
441+
Binding: STB_GLOBAL
442+
Value: 0x4006C8
443+
Size: 0x7C
438444
- Name: 'printf@@GLIBC_2.17'
439445
Type: STT_FUNC
440446
Binding: STB_GLOBAL
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# RUN: %clang %cflags %s -o %t.so -Wl,-q
2+
# RUN: not llvm-bolt %t.so -o %t.bolt --fail-on-invalid-padding \
3+
# RUN: 2>&1 | FileCheck %s
4+
# CHECK: BOLT-ERROR: found 1 instance(s) of invalid code padding
5+
6+
.text
7+
.align 2
8+
.global foo
9+
.type foo, %function
10+
foo:
11+
cmp x0, x1
12+
b.eq .Ltmp1
13+
adrp x1, jmptbl
14+
add x1, x1, :lo12:jmptbl
15+
ldrsw x2, [x1, x2, lsl #2]
16+
br x2
17+
b .Ltmp1
18+
.Ltmp2:
19+
add x0, x0, x1
20+
ret
21+
.size foo, .-foo
22+
23+
.Ltmp1:
24+
add x0, x0, x1
25+
b .Ltmp2
26+
27+
# Dummy relocation to force relocation mode
28+
.reloc 0, R_AARCH64_NONE
29+
30+
.section .rodata, "a"
31+
.align 2
32+
.global jmptbl
33+
jmptbl:
34+
.word .text+0x28 - .

0 commit comments

Comments
 (0)