Skip to content

Commit bf2f287

Browse files
committed
Merge remote-tracking branch 'origin/main' into libcxx-overriden-function-detection
2 parents d8ef6d0 + df2356b commit bf2f287

File tree

389 files changed

+14007
-11337
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

389 files changed

+14007
-11337
lines changed

.github/workflows/build-ci-container.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ jobs:
6060
- name: Test Container
6161
run: |
6262
for image in ${{ steps.vars.outputs.container-name-tag }} ${{ steps.vars.outputs.container-name }}; do
63-
podman run --rm -it $image /usr/bin/bash -x -c 'printf '\''#include <iostream>\nint main(int argc, char **argv) { std::cout << "Hello\\n"; }'\'' | clang++ -x c++ - && ./a.out | grep Hello'
63+
podman run --rm -it $image /usr/bin/bash -x -c 'cd $HOME && printf '\''#include <iostream>\nint main(int argc, char **argv) { std::cout << "Hello\\n"; }'\'' | clang++ -x c++ - && ./a.out | grep Hello'
6464
done
6565
6666
push-ci-container:

.github/workflows/containers/github-action-ci/Dockerfile

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,13 @@ RUN ninja -C ./build stage2-clang-bolt stage2-install-distribution && ninja -C .
4141
FROM base
4242

4343
COPY --from=stage1-toolchain $LLVM_SYSROOT $LLVM_SYSROOT
44-
44+
4545
# Need to install curl for hendrikmuhs/ccache-action
4646
# Need nodejs for some of the GitHub actions.
4747
# Need perl-modules for clang analyzer tests.
4848
# Need git for SPIRV-Tools tests.
4949
RUN apt-get update && \
50-
apt-get install -y \
50+
DEBIAN_FRONTEND=noninteractive apt-get install -y \
5151
binutils \
5252
cmake \
5353
curl \
@@ -56,7 +56,22 @@ RUN apt-get update && \
5656
ninja-build \
5757
nodejs \
5858
perl-modules \
59-
python3-psutil
59+
python3-psutil \
60+
61+
# These are needed by the premerge pipeline. Pip is used to install
62+
# dependent python packages and ccache is used for build caching. File and
63+
# tzdata are used for tests.
64+
python3-pip \
65+
ccache \
66+
file \
67+
tzdata
6068

6169
ENV LLVM_SYSROOT=$LLVM_SYSROOT
6270
ENV PATH=${LLVM_SYSROOT}/bin:${PATH}
71+
72+
# Create a new user to avoid test failures related to a lack of expected
73+
# permissions issues in some tests. Set the user id to 1001 as that is the
74+
# user id that Github Actions uses to perform the checkout action.
75+
RUN useradd gha -u 1001 -m -s /bin/bash
76+
USER gha
77+

.github/workflows/premerge.yaml

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
name: LLVM Premerge Checks
2+
3+
permissions:
4+
contents: read
5+
6+
on:
7+
pull_request:
8+
paths:
9+
- .github/workflows/premerge.yaml
10+
push:
11+
branches:
12+
- 'main'
13+
14+
jobs:
15+
premerge-checks-linux:
16+
if: github.repository_owner == 'llvm'
17+
runs-on: llvm-premerge-linux-runners
18+
concurrency:
19+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
20+
cancel-in-progress: true
21+
container:
22+
image: ghcr.io/llvm/ci-ubuntu-22.04:latest
23+
defaults:
24+
run:
25+
shell: bash
26+
steps:
27+
- name: Checkout LLVM
28+
uses: actions/checkout@v4
29+
with:
30+
fetch-depth: 2
31+
- name: Setup ccache
32+
uses: hendrikmuhs/[email protected]
33+
- name: Build and Test
34+
run: |
35+
git config --global --add safe.directory '*'
36+
37+
modified_files=$(git diff --name-only HEAD~1...HEAD)
38+
modified_dirs=$(echo "$modified_files" | cut -d'/' -f1 | sort -u)
39+
40+
echo $modified_files
41+
echo $modified_dirs
42+
43+
. ./.ci/compute-projects.sh
44+
45+
all_projects="bolt clang clang-tools-extra compiler-rt cross-project-tests flang libc libclc lld lldb llvm mlir openmp polly pstl"
46+
modified_projects="$(keep-modified-projects ${all_projects})"
47+
48+
linux_projects_to_test=$(exclude-linux $(compute-projects-to-test 0 ${modified_projects}))
49+
linux_check_targets=$(check-targets ${linux_projects_to_test} | sort | uniq)
50+
linux_projects=$(add-dependencies ${linux_projects_to_test} | sort | uniq)
51+
52+
linux_runtimes_to_test=$(compute-runtimes-to-test ${linux_projects_to_test})
53+
linux_runtime_check_targets=$(check-targets ${linux_runtimes_to_test} | sort | uniq)
54+
linux_runtimes=$(echo ${linux_runtimes_to_test} | sort | uniq)
55+
56+
if [[ "${linux_projects}" == "" ]]; then
57+
echo "No projects to build"
58+
exit 0
59+
fi
60+
61+
echo "Building projects: ${linux_projects}"
62+
echo "Running project checks targets: ${linux_check_targets}"
63+
echo "Building runtimes: ${linux_runtimes}"
64+
echo "Running runtimes checks targets: ${linux_runtime_check_targets}"
65+
66+
export CC=/opt/llvm/bin/clang
67+
export CXX=/opt/llvm/bin/clang++
68+
69+
./.ci/monolithic-linux.sh "$(echo ${linux_projects} | tr ' ' ';')" "$(echo ${linux_check_targets})" "$(echo ${linux_runtimes} | tr ' ' ';')" "$(echo ${linux_runtime_check_targets})"

bolt/docs/CommandLineArgumentReference.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,9 +498,12 @@
498498
Automatically put hot code on 2MB page(s) (hugify) at runtime. No manual call
499499
to hugify is needed in the binary (which is what --hot-text relies on).
500500

501-
- `--icf`
501+
- `--icf=<value>`
502502

503503
Fold functions with identical code
504+
- `all`: Enable identical code folding
505+
- `none`: Disable identical code folding (default)
506+
- `safe`: Enable safe identical code folding
504507

505508
- `--icp`
506509

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,9 @@ class BinaryFunction {
428428
/// Function order for streaming into the destination binary.
429429
uint32_t Index{-1U};
430430

431+
/// Function is referenced by a non-control flow instruction.
432+
bool HasAddressTaken{false};
433+
431434
/// Get basic block index assuming it belongs to this function.
432435
unsigned getIndex(const BinaryBasicBlock *BB) const {
433436
assert(BB->getIndex() < BasicBlocks.size());
@@ -822,6 +825,14 @@ class BinaryFunction {
822825
return nullptr;
823826
}
824827

828+
/// Return true if function is referenced in a non-control flow instruction.
829+
/// This flag is set when the code and relocation analyses are being
830+
/// performed, which occurs when safe ICF (Identical Code Folding) is enabled.
831+
bool hasAddressTaken() const { return HasAddressTaken; }
832+
833+
/// Set whether function is referenced in a non-control flow instruction.
834+
void setHasAddressTaken(bool AddressTaken) { HasAddressTaken = AddressTaken; }
835+
825836
/// Returns the raw binary encoding of this function.
826837
ErrorOr<ArrayRef<uint8_t>> getData() const;
827838

@@ -2135,6 +2146,9 @@ class BinaryFunction {
21352146
// adjustments.
21362147
void handleAArch64IndirectCall(MCInst &Instruction, const uint64_t Offset);
21372148

2149+
/// Analyze instruction to identify a function reference.
2150+
void analyzeInstructionForFuncReference(const MCInst &Inst);
2151+
21382152
/// Scan function for references to other functions. In relocation mode,
21392153
/// add relocations for external references. In non-relocation mode, detect
21402154
/// and mark new entry points.

bolt/include/bolt/Passes/IdenticalCodeFolding.h

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "bolt/Core/BinaryFunction.h"
1313
#include "bolt/Passes/BinaryPasses.h"
14+
#include "llvm/ADT/SparseBitVector.h"
1415

1516
namespace llvm {
1617
namespace bolt {
@@ -20,22 +21,72 @@ namespace bolt {
2021
///
2122
class IdenticalCodeFolding : public BinaryFunctionPass {
2223
protected:
23-
bool shouldOptimize(const BinaryFunction &BF) const override {
24-
if (BF.hasUnknownControlFlow())
25-
return false;
26-
if (BF.isFolded())
27-
return false;
28-
if (BF.hasSDTMarker())
29-
return false;
30-
return BinaryFunctionPass::shouldOptimize(BF);
31-
}
24+
/// Return true if the function is safe to fold.
25+
bool shouldOptimize(const BinaryFunction &BF) const override;
3226

3327
public:
28+
enum class ICFLevel {
29+
None, /// No ICF. (Default)
30+
Safe, /// Safe ICF.
31+
All, /// Aggressive ICF.
32+
};
3433
explicit IdenticalCodeFolding(const cl::opt<bool> &PrintPass)
3534
: BinaryFunctionPass(PrintPass) {}
3635

3736
const char *getName() const override { return "identical-code-folding"; }
3837
Error runOnFunctions(BinaryContext &BC) override;
38+
39+
private:
40+
/// Bit vector of memory addresses of vtables.
41+
llvm::SparseBitVector<> VTableBitVector;
42+
43+
/// Return true if the memory address is in a vtable.
44+
bool isAddressInVTable(uint64_t Address) const {
45+
return VTableBitVector.test(Address / 8);
46+
}
47+
48+
/// Mark memory address of a vtable as used.
49+
void setAddressUsedInVTable(uint64_t Address) {
50+
VTableBitVector.set(Address / 8);
51+
}
52+
53+
/// Scan symbol table and mark memory addresses of
54+
/// vtables.
55+
void initVTableReferences(const BinaryContext &BC);
56+
57+
/// Analyze code section and relocations and mark functions that are not
58+
/// safe to fold.
59+
void markFunctionsUnsafeToFold(BinaryContext &BC);
60+
61+
/// Process static and dynamic relocations in the data sections to identify
62+
/// function references, and mark them as unsafe to fold. It filters out
63+
/// symbol references that are in vtables.
64+
void analyzeDataRelocations(BinaryContext &BC);
65+
66+
/// Process functions that have been disassembled and mark functions that are
67+
/// used in non-control flow instructions as unsafe to fold.
68+
void analyzeFunctions(BinaryContext &BC);
69+
};
70+
71+
class DeprecatedICFNumericOptionParser
72+
: public cl::parser<IdenticalCodeFolding::ICFLevel> {
73+
public:
74+
explicit DeprecatedICFNumericOptionParser(cl::Option &O)
75+
: cl::parser<IdenticalCodeFolding::ICFLevel>(O) {}
76+
77+
bool parse(cl::Option &O, StringRef ArgName, StringRef Arg,
78+
IdenticalCodeFolding::ICFLevel &Value) {
79+
if (Arg == "0" || Arg == "1") {
80+
Value = (Arg == "0") ? IdenticalCodeFolding::ICFLevel::None
81+
: IdenticalCodeFolding::ICFLevel::All;
82+
errs() << formatv("BOLT-WARNING: specifying numeric value \"{0}\" "
83+
"for option -{1} is deprecated\n",
84+
Arg, ArgName);
85+
return false;
86+
}
87+
return cl::parser<IdenticalCodeFolding::ICFLevel>::parse(O, ArgName, Arg,
88+
Value);
89+
}
3990
};
4091

4192
} // namespace bolt

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1504,6 +1504,20 @@ MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) {
15041504
return Target;
15051505
}
15061506

1507+
void BinaryFunction::analyzeInstructionForFuncReference(const MCInst &Inst) {
1508+
for (const MCOperand &Op : MCPlus::primeOperands(Inst)) {
1509+
if (!Op.isExpr())
1510+
continue;
1511+
const MCExpr &Expr = *Op.getExpr();
1512+
if (Expr.getKind() != MCExpr::SymbolRef)
1513+
continue;
1514+
const MCSymbol &Symbol = cast<MCSymbolRefExpr>(Expr).getSymbol();
1515+
// Set HasAddressTaken for a function regardless of the ICF level.
1516+
if (BinaryFunction *BF = BC.getFunctionForSymbol(&Symbol))
1517+
BF->setHasAddressTaken(true);
1518+
}
1519+
}
1520+
15071521
bool BinaryFunction::scanExternalRefs() {
15081522
bool Success = true;
15091523
bool DisassemblyFailed = false;
@@ -1624,6 +1638,8 @@ bool BinaryFunction::scanExternalRefs() {
16241638
[](const MCOperand &Op) { return Op.isExpr(); })) {
16251639
// Skip assembly if the instruction may not have any symbolic operands.
16261640
continue;
1641+
} else {
1642+
analyzeInstructionForFuncReference(Instruction);
16271643
}
16281644

16291645
// Emit the instruction using temp emitter and generate relocations.

0 commit comments

Comments
 (0)