Skip to content

Commit f075654

Browse files
Merge branch 'main' into tosa_disable_pad_fold
2 parents 4451864 + 6b19a54 commit f075654

File tree

1,514 files changed

+77668
-25699
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,514 files changed

+77668
-25699
lines changed

.github/workflows/libcxx-run-benchmarks.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,17 +64,21 @@ jobs:
6464
path: repo # Avoid nuking the workspace, where we have the Python virtualenv
6565

6666
- name: Run baseline
67+
env:
68+
BENCHMARKS: ${{ steps.vars.outputs.benchmarks }}
6769
run: |
6870
source .venv/bin/activate && cd repo
6971
python -m pip install -r libcxx/utils/requirements.txt
7072
baseline_commit=$(git merge-base ${{ steps.vars.outputs.pr_base }} ${{ steps.vars.outputs.pr_head }})
71-
./libcxx/utils/test-at-commit --commit ${baseline_commit} -B build/baseline -- -sv -j1 --param optimization=speed ${{ steps.vars.outputs.benchmarks }}
73+
./libcxx/utils/test-at-commit --commit ${baseline_commit} -B build/baseline -- -sv -j1 --param optimization=speed "$BENCHMARKS"
7274
./libcxx/utils/consolidate-benchmarks build/baseline | tee baseline.lnt
7375
7476
- name: Run candidate
77+
env:
78+
BENCHMARKS: ${{ steps.vars.outputs.benchmarks }}
7579
run: |
7680
source .venv/bin/activate && cd repo
77-
./libcxx/utils/test-at-commit --commit ${{ steps.vars.outputs.pr_head }} -B build/candidate -- -sv -j1 --param optimization=speed ${{ steps.vars.outputs.benchmarks }}
81+
./libcxx/utils/test-at-commit --commit ${{ steps.vars.outputs.pr_head }} -B build/candidate -- -sv -j1 --param optimization=speed "$BENCHMARKS"
7882
./libcxx/utils/consolidate-benchmarks build/candidate | tee candidate.lnt
7983
8084
- name: Compare baseline and candidate runs

.github/workflows/premerge.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ jobs:
193193
uses: llvm/actions/install-ninja@main
194194
- name: Build and Test
195195
run: |
196-
source <(git diff --name-only HEAD~2..HEAD | python3 .ci/compute_projects.py)
196+
source <(git diff --name-only HEAD~1...HEAD | python3 .ci/compute_projects.py)
197197
198198
if [[ "${projects_to_build}" == "" ]]; then
199199
echo "No projects to build"

.github/workflows/release-binaries.yml

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ jobs:
5858
ref: ${{ steps.vars.outputs.ref }}
5959
upload: ${{ steps.vars.outputs.upload }}
6060
target-cmake-flags: ${{ steps.vars.outputs.target-cmake-flags }}
61-
ccache: ${{ steps.vars.outputs.ccache }}
6261
build-flang: ${{ steps.vars.outputs.build-flang }}
6362
release-binary-basename: ${{ steps.vars.outputs.release-binary-basename }}
6463
release-binary-filename: ${{ steps.vars.outputs.release-binary-filename }}
@@ -123,13 +122,6 @@ jobs:
123122
echo "release-binary-filename=$release_binary_basename.tar.xz" >> $GITHUB_OUTPUT
124123
125124
target="$RUNNER_OS-$RUNNER_ARCH"
126-
# The hendrikmuhs/ccache-action action does not support installing sccache
127-
# on arm64 Linux.
128-
if [ "$target" = "Linux-ARM64" ]; then
129-
echo ccache=ccache >> $GITHUB_OUTPUT
130-
else
131-
echo ccache=sccache >> $GITHUB_OUTPUT
132-
fi
133125
134126
# The macOS builds try to cross compile some libraries so we need to
135127
# add extra CMake args to disable them.
@@ -222,15 +214,12 @@ jobs:
222214
- name: Configure
223215
id: build
224216
shell: bash
225-
env:
226-
CCACHE_BIN: ${{ needs.prepare.outputs.ccache }}
227217
run: |
228218
# There were some issues on the ARM64 MacOS runners with trying to build x86 object,
229219
# so we need to set some extra cmake flags to disable this.
230220
cmake -G Ninja -S llvm -B ${{ steps.setup-stage.outputs.build-prefix }}/build \
231221
${{ needs.prepare.outputs.target-cmake-flags }} \
232222
-C clang/cmake/caches/Release.cmake \
233-
-DBOOTSTRAP_LLVM_PARALLEL_LINK_JOBS=1 \
234223
-DBOOTSTRAP_BOOTSTRAP_CPACK_PACKAGE_FILE_NAME="${{ needs.prepare.outputs.release-binary-basename }}"
235224
236225
- name: Build

.github/workflows/release-documentation.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ on:
2525
description: 'Upload documentation'
2626
required: false
2727
type: boolean
28+
secrets:
29+
WWW_RELEASES_TOKEN:
30+
description: "Secret used to create a PR with the documentation changes."
31+
required: false
2832

2933
jobs:
3034
release-documentation:

.github/workflows/release-tasks.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ jobs:
5454
with:
5555
release-version: ${{ needs.validate-tag.outputs.release-version }}
5656
upload: true
57+
# Called workflows don't have access to secrets by default, so we need to explicitly pass secrets that we use.
58+
secrets:
59+
WWW_RELEASES_TOKEN: ${{ secrets.WWW_RELEASES_TOKEN }}
5760

5861
release-doxygen:
5962
name: Build and Upload Release Doxygen

bolt/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ $ perf2bolt -p perf.data -o perf.fdata <executable>
164164
This command will aggregate branch data from `perf.data` and store it in a
165165
format that is both more compact and more resilient to binary modifications.
166166

167-
If the profile was collected without brstacks, you will need to add `-nl` flag to
167+
If the profile was collected without brstacks, you will need to add `-ba` flag to
168168
the command line above.
169169

170170
### Step 3: Optimize with BOLT

bolt/docs/Heatmaps.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ $ perf record -e cycles:u -j any,u [-p PID|-a] -- sleep <interval>
2121
```
2222

2323
Running with brstack (`-j any,u` or `-b`) is recommended. Heatmaps can be generated
24-
from basic events by using the llvm-bolt-heatmap option `-nl` (no brstack) but
24+
from basic events by using the llvm-bolt-heatmap option `-ba` (basic events) but
2525
such heatmaps do not have the coverage provided by brstack and may only be useful
2626
for finding event hotspots at larger code block granularities.
2727

bolt/docs/index.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ This command will aggregate branch data from ``perf.data`` and store it
205205
in a format that is both more compact and more resilient to binary
206206
modifications.
207207

208-
If the profile was collected without LBRs, you will need to add ``-nl``
209-
flag to the command line above.
208+
If the profile was collected without brstacks, you will need to add `-ba` flag to
209+
the command line above.
210210

211211
Step 3: Optimize with BOLT
212212
~~~~~~~~~~~~~~~~~~~~~~~~~~

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2142,8 +2142,9 @@ class BinaryFunction {
21422142
}
21432143

21442144
/// Detects whether \p Address is inside a data region in this function
2145-
/// (constant islands).
2146-
bool isInConstantIsland(uint64_t Address) const {
2145+
/// (constant islands), and optionally return the island size starting
2146+
/// from the given \p Address.
2147+
bool isInConstantIsland(uint64_t Address, uint64_t *Size = nullptr) const {
21472148
if (!Islands)
21482149
return false;
21492150

@@ -2161,10 +2162,15 @@ class BinaryFunction {
21612162
DataIter = std::prev(DataIter);
21622163

21632164
auto CodeIter = Islands->CodeOffsets.upper_bound(Offset);
2164-
if (CodeIter == Islands->CodeOffsets.begin())
2165+
if (CodeIter == Islands->CodeOffsets.begin() ||
2166+
*std::prev(CodeIter) <= *DataIter) {
2167+
if (Size)
2168+
*Size = (CodeIter == Islands->CodeOffsets.end() ? getMaxSize()
2169+
: *CodeIter) -
2170+
Offset;
21652171
return true;
2166-
2167-
return *std::prev(CodeIter) <= *DataIter;
2172+
}
2173+
return false;
21682174
}
21692175

21702176
uint16_t getConstantIslandAlignment() const;

bolt/lib/Core/BinaryContext.cpp

Lines changed: 90 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ cl::opt<std::string> CompDirOverride(
7777
"location, which is used with DW_AT_dwo_name to construct a path "
7878
"to *.dwo files."),
7979
cl::Hidden, cl::init(""), cl::cat(BoltCategory));
80+
81+
static cl::opt<bool>
82+
FailOnInvalidPadding("fail-on-invalid-padding", cl::Hidden, cl::init(false),
83+
cl::desc("treat invalid code padding as error"),
84+
cl::ZeroOrMore, cl::cat(BoltCategory));
8085
} // namespace opts
8186

8287
namespace llvm {
@@ -942,8 +947,7 @@ std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
942947
}
943948

944949
bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
945-
// FIXME: aarch64 support is missing.
946-
if (!isX86())
950+
if (!isX86() && !isAArch64())
947951
return true;
948952

949953
if (BF.getSize() == BF.getMaxSize())
@@ -973,14 +977,26 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
973977
return Offset - StartOffset;
974978
};
975979

976-
// Skip a sequence of zero bytes.
980+
// Skip a sequence of zero bytes. For AArch64 we only skip 4 bytes of zeros
981+
// in case the following zeros belong to constant island or veneer.
977982
auto skipZeros = [&]() {
978983
const uint64_t StartOffset = Offset;
979-
for (; Offset < BF.getMaxSize(); ++Offset)
980-
if ((*FunctionData)[Offset] != 0)
984+
uint64_t CurrentOffset = Offset;
985+
for (; CurrentOffset < BF.getMaxSize() &&
986+
(!isAArch64() || CurrentOffset < StartOffset + 4);
987+
++CurrentOffset)
988+
if ((*FunctionData)[CurrentOffset] != 0)
981989
break;
982990

983-
return Offset - StartOffset;
991+
uint64_t NumZeros = CurrentOffset - StartOffset;
992+
if (isAArch64())
993+
NumZeros &= ~((uint64_t)0x3);
994+
995+
if (NumZeros == 0)
996+
return false;
997+
Offset += NumZeros;
998+
InstrAddress += NumZeros;
999+
return true;
9841000
};
9851001

9861002
// Accept the whole padding area filled with breakpoints.
@@ -993,6 +1009,8 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
9931009
// Some functions have a jump to the next function or to the padding area
9941010
// inserted after the body.
9951011
auto isSkipJump = [&](const MCInst &Instr) {
1012+
if (!isX86())
1013+
return false;
9961014
uint64_t TargetAddress = 0;
9971015
if (MIB->isUnconditionalBranch(Instr) &&
9981016
MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
@@ -1004,34 +1022,73 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
10041022
return false;
10051023
};
10061024

1025+
// For veneers that are not already covered by binary functions, only those
1026+
// that handleAArch64Veneer() can recognize are checked here.
1027+
auto skipAArch64Veneer = [&]() {
1028+
if (!isAArch64() || Offset >= BF.getMaxSize())
1029+
return false;
1030+
BinaryFunction *BFVeneer = getBinaryFunctionContainingAddress(InstrAddress);
1031+
if (BFVeneer) {
1032+
// A binary function may have been created to point to this veneer.
1033+
Offset += BFVeneer->getSize();
1034+
assert(Offset <= BF.getMaxSize() &&
1035+
"AArch64 veneeer goes past the max size of function");
1036+
InstrAddress += BFVeneer->getSize();
1037+
return true;
1038+
}
1039+
const uint64_t AArch64VeneerSize = 12;
1040+
if (Offset + AArch64VeneerSize <= BF.getMaxSize() &&
1041+
handleAArch64Veneer(InstrAddress, /*MatchOnly*/ true)) {
1042+
Offset += AArch64VeneerSize;
1043+
InstrAddress += AArch64VeneerSize;
1044+
this->errs() << "BOLT-WARNING: found unmarked AArch64 veneer at 0x"
1045+
<< Twine::utohexstr(BF.getAddress() + Offset) << '\n';
1046+
return true;
1047+
}
1048+
return false;
1049+
};
1050+
1051+
auto skipAArch64ConstantIsland = [&]() {
1052+
if (!isAArch64() || Offset >= BF.getMaxSize())
1053+
return false;
1054+
uint64_t Size;
1055+
if (BF.isInConstantIsland(InstrAddress, &Size)) {
1056+
Offset += Size;
1057+
InstrAddress += Size;
1058+
return true;
1059+
}
1060+
return false;
1061+
};
1062+
10071063
// Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1008-
while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
1064+
// For AArch64 also check veneers and skip constant islands.
1065+
while (skipAArch64Veneer() || skipAArch64ConstantIsland() ||
1066+
skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
10091067
skipZeros())
10101068
;
10111069

10121070
if (Offset == BF.getMaxSize())
10131071
return true;
10141072

1015-
if (opts::Verbosity >= 1) {
1016-
this->errs() << "BOLT-WARNING: bad padding at address 0x"
1017-
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
1018-
<< " starting at offset " << (Offset - BF.getSize())
1019-
<< " in function " << BF << '\n'
1020-
<< FunctionData->slice(BF.getSize(),
1021-
BF.getMaxSize() - BF.getSize())
1022-
<< '\n';
1023-
}
1024-
1073+
this->errs() << "BOLT-WARNING: bad padding at address 0x"
1074+
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
1075+
<< " starting at offset " << (Offset - BF.getSize())
1076+
<< " in function " << BF << '\n'
1077+
<< FunctionData->slice(BF.getSize(),
1078+
BF.getMaxSize() - BF.getSize())
1079+
<< '\n';
10251080
return false;
10261081
}
10271082

10281083
void BinaryContext::adjustCodePadding() {
1084+
uint64_t NumInvalid = 0;
10291085
for (auto &BFI : BinaryFunctions) {
10301086
BinaryFunction &BF = BFI.second;
10311087
if (!shouldEmit(BF))
10321088
continue;
10331089

10341090
if (!hasValidCodePadding(BF)) {
1091+
NumInvalid++;
10351092
if (HasRelocations) {
10361093
this->errs() << "BOLT-WARNING: function " << BF
10371094
<< " has invalid padding. Ignoring the function\n";
@@ -1041,6 +1098,11 @@ void BinaryContext::adjustCodePadding() {
10411098
}
10421099
}
10431100
}
1101+
if (NumInvalid && opts::FailOnInvalidPadding) {
1102+
this->errs() << "BOLT-ERROR: found " << NumInvalid
1103+
<< " instance(s) of invalid code padding\n";
1104+
exit(1);
1105+
}
10441106
}
10451107

10461108
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
@@ -1337,8 +1399,17 @@ void BinaryContext::processInterproceduralReferences() {
13371399
<< Function.getPrintName() << " and "
13381400
<< TargetFunction->getPrintName() << '\n';
13391401
}
1340-
if (uint64_t Offset = Address - TargetFunction->getAddress())
1341-
TargetFunction->addEntryPointAtOffset(Offset);
1402+
if (uint64_t Offset = Address - TargetFunction->getAddress()) {
1403+
if (!TargetFunction->isInConstantIsland(Address)) {
1404+
TargetFunction->addEntryPointAtOffset(Offset);
1405+
} else {
1406+
TargetFunction->setIgnored();
1407+
this->outs() << "BOLT-WARNING: Ignoring entry point at address 0x"
1408+
<< Twine::utohexstr(Address)
1409+
<< " in constant island of function " << *TargetFunction
1410+
<< '\n';
1411+
}
1412+
}
13421413

13431414
continue;
13441415
}

0 commit comments

Comments
 (0)