Skip to content

Commit 6e3552f

Browse files
Merge branch 'llvm:main' into gh-101657
2 parents 9dcf6b9 + 127d77d commit 6e3552f

File tree

4,187 files changed

+286943
-126478
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

4,187 files changed

+286943
-126478
lines changed

.ci/all_requirements.txt

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,9 @@ ml-dtypes==0.5.1 ; python_version < "3.13" \
194194
--hash=sha256:d13755f8e8445b3870114e5b6240facaa7cb0c3361e54beba3e07fa912a6e12b \
195195
--hash=sha256:fd918d4e6a4e0c110e2e05be7a7814d10dc1b95872accbf6512b80a109b71ae1
196196
# via -r mlir/python/requirements.txt
197-
nanobind==2.7.0 \
198-
--hash=sha256:73b12d0e751d140d6c1bf4b215e18818a8debfdb374f08dc3776ad208d808e74 \
199-
--hash=sha256:f9f1b160580c50dcf37b6495a0fd5ec61dc0d95dae5f8004f87dd9ad7eb46b34
197+
nanobind==2.9.2 \
198+
--hash=sha256:c37957ffd5eac7eda349cff3622ecd32e5ee1244ecc912c99b5bc8188bafd16e \
199+
--hash=sha256:e7608472de99d375759814cab3e2c94aba3f9ec80e62cfef8ced495ca5c27d6e
200200
# via -r mlir/python/requirements.txt
201201
numpy==2.0.2 \
202202
--hash=sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a \
@@ -383,6 +383,10 @@ swig==4.3.1 \
383383
--hash=sha256:efec16327029f682f649a26da726bb0305be8800bd0f1fa3e81bf0769cf5b476 \
384384
--hash=sha256:fc496c0d600cf1bb2d91e28d3d6eae9c4301e5ea7a0dec5a4281b5efed4245a8
385385
# via -r lldb/test/requirements.txt
386+
typing-extensions==4.15.0 \
387+
--hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
388+
--hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
389+
# via -r mlir/python/requirements.txt
386390
urllib3==2.5.0 \
387391
--hash=sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760 \
388392
--hash=sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc

.ci/monolithic-windows.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ start-group "ninja"
5656
# Targets are not escaped as they are passed as separate arguments.
5757
ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log
5858

59-
start-group "ninja runtimes"
60-
61-
ninja -C "${BUILD_DIR}" -k 0 ${runtimes_targets} |& tee ninja_runtimes.log
59+
if [[ "${runtime_targets}" != "" ]]; then
60+
start-group "ninja runtimes"
61+
62+
ninja -C "${BUILD_DIR}" -k 0 ${runtimes_targets} |& tee ninja_runtimes.log
63+
fi

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
/runtimes/ @llvm/reviewers-libcxx
1818

1919
/llvm/lib/Analysis/BasicAliasAnalysis.cpp @nikic
20+
/llvm/lib/Analysis/HashRecognize.cpp @artagnon @pfusik
2021
/llvm/lib/Analysis/InstructionSimplify.cpp @nikic
2122
/llvm/lib/Analysis/LazyValueInfo.cpp @nikic
2223
/llvm/lib/Analysis/ScalarEvolution.cpp @nikic

bolt/docs/CommandLineArgumentReference.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@
637637

638638
- `--inline-memcpy`
639639

640-
Inline memcpy using 'rep movsb' instruction (X86-only)
640+
Inline memcpy using optimized instruction sequences (X86: 'rep movsb', AArch64: width-optimized register operations)
641641

642642
- `--inline-small-functions`
643643

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#ifndef BOLT_CORE_MCPLUSBUILDER_H
1515
#define BOLT_CORE_MCPLUSBUILDER_H
1616

17+
#include "bolt/Core/BinaryBasicBlock.h"
1718
#include "bolt/Core/MCPlus.h"
1819
#include "bolt/Core/Relocation.h"
1920
#include "llvm/ADT/ArrayRef.h"
@@ -1902,13 +1903,38 @@ class MCPlusBuilder {
19021903
return {};
19031904
}
19041905

1906+
/// Find memcpy size in bytes by using preceding instructions.
1907+
/// Returns std::nullopt if size cannot be determined (no-op for most
1908+
/// targets).
1909+
virtual std::optional<uint64_t>
1910+
findMemcpySizeInBytes(const BinaryBasicBlock &BB,
1911+
BinaryBasicBlock::iterator CallInst) const {
1912+
return std::nullopt;
1913+
}
1914+
19051915
/// Creates inline memcpy instruction. If \p ReturnEnd is true, then return
19061916
/// (dest + n) instead of dest.
19071917
virtual InstructionListType createInlineMemcpy(bool ReturnEnd) const {
19081918
llvm_unreachable("not implemented");
19091919
return {};
19101920
}
19111921

1922+
/// Creates size-aware inline memcpy instruction. If \p KnownSize is provided,
1923+
/// generates optimized code for that specific size. Falls back to regular
1924+
/// createInlineMemcpy if size is unknown or not needed (e.g. with X86).
1925+
virtual InstructionListType
1926+
createInlineMemcpy(bool ReturnEnd, std::optional<uint64_t> KnownSize) const {
1927+
return createInlineMemcpy(ReturnEnd);
1928+
}
1929+
1930+
/// Extract immediate value from move instruction that sets the given
1931+
/// register. Returns the immediate value if the instruction is a
1932+
/// move-immediate to TargetReg.
1933+
virtual std::optional<uint64_t>
1934+
extractMoveImmediate(const MCInst &Inst, MCPhysReg TargetReg) const {
1935+
return std::nullopt;
1936+
}
1937+
19121938
/// Create a target-specific relocation out of the \p Fixup.
19131939
/// Note that not every fixup could be converted into a relocation.
19141940
virtual std::optional<Relocation>

bolt/lib/Core/BinaryContext.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,15 +207,15 @@ Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
207207
Twine("BOLT-ERROR: ", Error));
208208

209209
std::unique_ptr<const MCRegisterInfo> MRI(
210-
TheTarget->createMCRegInfo(TripleName));
210+
TheTarget->createMCRegInfo(TheTriple));
211211
if (!MRI)
212212
return createStringError(
213213
make_error_code(std::errc::not_supported),
214214
Twine("BOLT-ERROR: no register info for target ", TripleName));
215215

216216
// Set up disassembler.
217217
std::unique_ptr<MCAsmInfo> AsmInfo(
218-
TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
218+
TheTarget->createMCAsmInfo(*MRI, TheTriple, MCTargetOptions()));
219219
if (!AsmInfo)
220220
return createStringError(
221221
make_error_code(std::errc::not_supported),
@@ -227,7 +227,7 @@ Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
227227
AsmInfo->setAllowAtInName(true);
228228

229229
std::unique_ptr<const MCSubtargetInfo> STI(
230-
TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
230+
TheTarget->createMCSubtargetInfo(TheTriple, "", FeaturesStr));
231231
if (!STI)
232232
return createStringError(
233233
make_error_code(std::errc::not_supported),

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1843,7 +1843,7 @@ Error StripRepRet::runOnFunctions(BinaryContext &BC) {
18431843
}
18441844

18451845
Error InlineMemcpy::runOnFunctions(BinaryContext &BC) {
1846-
if (!BC.isX86())
1846+
if (!BC.isX86() && !BC.isAArch64())
18471847
return Error::success();
18481848

18491849
uint64_t NumInlined = 0;
@@ -1866,8 +1866,16 @@ Error InlineMemcpy::runOnFunctions(BinaryContext &BC) {
18661866
const bool IsMemcpy8 = (CalleeSymbol->getName() == "_memcpy8");
18671867
const bool IsTailCall = BC.MIB->isTailCall(Inst);
18681868

1869+
// Extract size from preceding instructions (AArch64 only).
1870+
// Pattern: MOV X2, #nb-bytes; BL memcpy src, dest, X2.
1871+
std::optional<uint64_t> KnownSize =
1872+
BC.MIB->findMemcpySizeInBytes(BB, II);
1873+
1874+
if (BC.isAArch64() && (!KnownSize.has_value() || *KnownSize > 64))
1875+
continue;
1876+
18691877
const InstructionListType NewCode =
1870-
BC.MIB->createInlineMemcpy(IsMemcpy8);
1878+
BC.MIB->createInlineMemcpy(IsMemcpy8, KnownSize);
18711879
II = BB.replaceInstruction(II, NewCode);
18721880
std::advance(II, NewCode.size() - 1);
18731881
if (IsTailCall) {

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,9 @@ static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
248248

249249
static cl::opt<bool> StringOps(
250250
"inline-memcpy",
251-
cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
251+
cl::desc(
252+
"inline memcpy using size-specific optimized instructions "
253+
"(X86: 'rep movsb', AArch64: width-optimized register operations)"),
252254
cl::cat(BoltOptCategory));
253255

254256
static cl::opt<bool> StripRepRet(

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 117 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2517,10 +2517,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
25172517
createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, bool IsLeaf,
25182518
unsigned CodePointerSize) const override {
25192519
unsigned int I = 0;
2520-
InstructionListType Instrs(IsLeaf ? 12 : 10);
2520+
InstructionListType Instrs(10);
25212521

2522-
if (IsLeaf)
2523-
createStackPointerIncrement(Instrs[I++], 128);
25242522
createPushRegisters(Instrs[I++], AArch64::X0, AArch64::X1);
25252523
getSystemFlag(Instrs[I++], AArch64::X1);
25262524
InstructionListType Addr = materializeAddress(Target, Ctx, AArch64::X0);
@@ -2535,8 +2533,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
25352533
loadReg(Instrs[I++], AArch64::X2, AArch64::SP);
25362534
setSystemFlag(Instrs[I++], AArch64::X1);
25372535
createPopRegisters(Instrs[I++], AArch64::X0, AArch64::X1);
2538-
if (IsLeaf)
2539-
createStackPointerDecrement(Instrs[I++], 128);
25402536
return Instrs;
25412537
}
25422538

@@ -2624,6 +2620,122 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
26242620
getInstructionSize(const MCInst &Inst) const override {
26252621
return 4;
26262622
}
2623+
2624+
std::optional<uint64_t>
2625+
extractMoveImmediate(const MCInst &Inst, MCPhysReg TargetReg) const override {
2626+
// Match MOVZ instructions (both X and W register variants) with no shift.
2627+
if ((Inst.getOpcode() == AArch64::MOVZXi ||
2628+
Inst.getOpcode() == AArch64::MOVZWi) &&
2629+
Inst.getOperand(2).getImm() == 0 &&
2630+
getAliases(TargetReg)[Inst.getOperand(0).getReg()])
2631+
return Inst.getOperand(1).getImm();
2632+
return std::nullopt;
2633+
}
2634+
2635+
std::optional<uint64_t>
2636+
findMemcpySizeInBytes(const BinaryBasicBlock &BB,
2637+
BinaryBasicBlock::iterator CallInst) const override {
2638+
MCPhysReg SizeReg = getIntArgRegister(2);
2639+
if (SizeReg == getNoRegister())
2640+
return std::nullopt;
2641+
2642+
BitVector WrittenRegs(RegInfo->getNumRegs());
2643+
const BitVector &SizeRegAliases = getAliases(SizeReg);
2644+
2645+
for (auto InstIt = BB.begin(); InstIt != CallInst; ++InstIt) {
2646+
const MCInst &Inst = *InstIt;
2647+
WrittenRegs.reset();
2648+
getWrittenRegs(Inst, WrittenRegs);
2649+
2650+
if (WrittenRegs.anyCommon(SizeRegAliases))
2651+
return extractMoveImmediate(Inst, SizeReg);
2652+
}
2653+
return std::nullopt;
2654+
}
2655+
2656+
InstructionListType
2657+
createInlineMemcpy(bool ReturnEnd,
2658+
std::optional<uint64_t> KnownSize) const override {
2659+
assert(KnownSize.has_value() &&
2660+
"AArch64 memcpy inlining requires known size");
2661+
InstructionListType Code;
2662+
uint64_t Size = *KnownSize;
2663+
2664+
generateSizeSpecificMemcpy(Code, Size);
2665+
2666+
// If _memcpy8, adjust X0 to return dest+size instead of dest.
2667+
if (ReturnEnd)
2668+
Code.emplace_back(MCInstBuilder(AArch64::ADDXri)
2669+
.addReg(AArch64::X0)
2670+
.addReg(AArch64::X0)
2671+
.addImm(Size)
2672+
.addImm(0));
2673+
return Code;
2674+
}
2675+
2676+
InstructionListType generateSizeSpecificMemcpy(InstructionListType &Code,
2677+
uint64_t Size) const {
2678+
auto AddLoadStorePair = [&](unsigned LoadOpc, unsigned StoreOpc,
2679+
unsigned Reg, unsigned Offset = 0) {
2680+
Code.emplace_back(MCInstBuilder(LoadOpc)
2681+
.addReg(Reg)
2682+
.addReg(AArch64::X1)
2683+
.addImm(Offset));
2684+
Code.emplace_back(MCInstBuilder(StoreOpc)
2685+
.addReg(Reg)
2686+
.addReg(AArch64::X0)
2687+
.addImm(Offset));
2688+
};
2689+
2690+
// Generate optimal instruction sequences based on exact size.
2691+
switch (Size) {
2692+
case 1:
2693+
AddLoadStorePair(AArch64::LDRBBui, AArch64::STRBBui, AArch64::W9);
2694+
break;
2695+
case 2:
2696+
AddLoadStorePair(AArch64::LDRHHui, AArch64::STRHHui, AArch64::W9);
2697+
break;
2698+
case 4:
2699+
AddLoadStorePair(AArch64::LDRWui, AArch64::STRWui, AArch64::W9);
2700+
break;
2701+
case 8:
2702+
AddLoadStorePair(AArch64::LDRXui, AArch64::STRXui, AArch64::X9);
2703+
break;
2704+
case 16:
2705+
AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q16);
2706+
break;
2707+
case 32:
2708+
AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q16, 0);
2709+
AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q17, 1);
2710+
break;
2711+
2712+
default:
2713+
// For sizes up to 64 bytes, greedily use the largest possible loads.
2714+
// Caller should have already filtered out sizes > 64 bytes.
2715+
assert(Size <= 64 &&
2716+
"Size should be <= 64 bytes for AArch64 memcpy inlining");
2717+
2718+
uint64_t Remaining = Size;
2719+
uint64_t Offset = 0;
2720+
2721+
const std::array<std::tuple<uint64_t, unsigned, unsigned, unsigned>, 5>
2722+
LoadStoreOps = {
2723+
{{16, AArch64::LDRQui, AArch64::STRQui, AArch64::Q16},
2724+
{8, AArch64::LDRXui, AArch64::STRXui, AArch64::X9},
2725+
{4, AArch64::LDRWui, AArch64::STRWui, AArch64::W9},
2726+
{2, AArch64::LDRHHui, AArch64::STRHHui, AArch64::W9},
2727+
{1, AArch64::LDRBBui, AArch64::STRBBui, AArch64::W9}}};
2728+
2729+
for (const auto &[OpSize, LoadOp, StoreOp, TempReg] : LoadStoreOps)
2730+
while (Remaining >= OpSize) {
2731+
AddLoadStorePair(LoadOp, StoreOp, TempReg, Offset / OpSize);
2732+
Remaining -= OpSize;
2733+
Offset += OpSize;
2734+
}
2735+
break;
2736+
}
2737+
return Code;
2738+
}
26272739
};
26282740

26292741
} // end anonymous namespace

bolt/runtime/instr.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,9 +714,11 @@ static char *getBinaryPath() {
714714
uint32_t Ret = __readlink(FindBuf, TargetPath, sizeof(TargetPath));
715715
assert(Ret != -1 && Ret != BufSize, "readlink error");
716716
TargetPath[Ret] = '\0';
717+
__close(FDdir);
717718
return TargetPath;
718719
}
719720
}
721+
__close(FDdir);
720722
return nullptr;
721723
}
722724

0 commit comments

Comments
 (0)