Skip to content

Commit dfa4144

Browse files
committed
Merge branch 'main' of https://github.com/llvm/llvm-project into res-create-2-use-methods-single-res
2 parents f64525e + 0d40450 commit dfa4144

File tree

2,653 files changed

+280369
-85053
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,653 files changed

+280369
-85053
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
/runtimes/ @llvm/reviewers-libcxx
1818

1919
/llvm/lib/Analysis/BasicAliasAnalysis.cpp @nikic
20+
/llvm/lib/Analysis/HashRecognize.cpp @artagnon @pfusik
2021
/llvm/lib/Analysis/InstructionSimplify.cpp @nikic
2122
/llvm/lib/Analysis/LazyValueInfo.cpp @nikic
2223
/llvm/lib/Analysis/ScalarEvolution.cpp @nikic

.github/new-prs-labeler.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,14 @@ clang:openmp:
10901090
- llvm/unittests/Frontend/OpenMP*
10911091
- llvm/test/Transforms/OpenMP/**
10921092

1093+
clang:temporal-safety:
1094+
- clang/include/clang/Analysis/Analyses/LifetimeSafety*
1095+
- clang/lib/Analysis/LifetimeSafety*
1096+
- clang/unittests/Analysis/LifetimeSafety*
1097+
- clang/test/Sema/*lifetime-safety*
1098+
- clang/test/Sema/*lifetime-analysis*
1099+
- clang/test/Analysis/LifetimeSafety/**
1100+
10931101
clang:as-a-library:
10941102
- clang/tools/libclang/**
10951103
- clang/bindings/**

bolt/docs/CommandLineArgumentReference.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@
637637

638638
- `--inline-memcpy`
639639

640-
Inline memcpy using 'rep movsb' instruction (X86-only)
640+
Inline memcpy using optimized instruction sequences (X86: 'rep movsb', AArch64: width-optimized register operations)
641641

642642
- `--inline-small-functions`
643643

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#ifndef BOLT_CORE_MCPLUSBUILDER_H
1515
#define BOLT_CORE_MCPLUSBUILDER_H
1616

17+
#include "bolt/Core/BinaryBasicBlock.h"
1718
#include "bolt/Core/MCPlus.h"
1819
#include "bolt/Core/Relocation.h"
1920
#include "llvm/ADT/ArrayRef.h"
@@ -1902,13 +1903,38 @@ class MCPlusBuilder {
19021903
return {};
19031904
}
19041905

1906+
/// Find memcpy size in bytes by using preceding instructions.
1907+
/// Returns std::nullopt if size cannot be determined (no-op for most
1908+
/// targets).
1909+
virtual std::optional<uint64_t>
1910+
findMemcpySizeInBytes(const BinaryBasicBlock &BB,
1911+
BinaryBasicBlock::iterator CallInst) const {
1912+
return std::nullopt;
1913+
}
1914+
19051915
/// Creates inline memcpy instruction. If \p ReturnEnd is true, then return
19061916
/// (dest + n) instead of dest.
19071917
virtual InstructionListType createInlineMemcpy(bool ReturnEnd) const {
19081918
llvm_unreachable("not implemented");
19091919
return {};
19101920
}
19111921

1922+
/// Creates size-aware inline memcpy instruction. If \p KnownSize is provided,
1923+
/// generates optimized code for that specific size. Falls back to regular
1924+
/// createInlineMemcpy if size is unknown or not needed (e.g. with X86).
1925+
virtual InstructionListType
1926+
createInlineMemcpy(bool ReturnEnd, std::optional<uint64_t> KnownSize) const {
1927+
return createInlineMemcpy(ReturnEnd);
1928+
}
1929+
1930+
/// Extract immediate value from move instruction that sets the given
1931+
/// register. Returns the immediate value if the instruction is a
1932+
/// move-immediate to TargetReg.
1933+
virtual std::optional<uint64_t>
1934+
extractMoveImmediate(const MCInst &Inst, MCPhysReg TargetReg) const {
1935+
return std::nullopt;
1936+
}
1937+
19121938
/// Create a target-specific relocation out of the \p Fixup.
19131939
/// Note that not every fixup could be converted into a relocation.
19141940
virtual std::optional<Relocation>

bolt/lib/Core/BinaryContext.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,15 +207,15 @@ Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
207207
Twine("BOLT-ERROR: ", Error));
208208

209209
std::unique_ptr<const MCRegisterInfo> MRI(
210-
TheTarget->createMCRegInfo(TripleName));
210+
TheTarget->createMCRegInfo(TheTriple));
211211
if (!MRI)
212212
return createStringError(
213213
make_error_code(std::errc::not_supported),
214214
Twine("BOLT-ERROR: no register info for target ", TripleName));
215215

216216
// Set up disassembler.
217217
std::unique_ptr<MCAsmInfo> AsmInfo(
218-
TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
218+
TheTarget->createMCAsmInfo(*MRI, TheTriple, MCTargetOptions()));
219219
if (!AsmInfo)
220220
return createStringError(
221221
make_error_code(std::errc::not_supported),
@@ -227,7 +227,7 @@ Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
227227
AsmInfo->setAllowAtInName(true);
228228

229229
std::unique_ptr<const MCSubtargetInfo> STI(
230-
TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
230+
TheTarget->createMCSubtargetInfo(TheTriple, "", FeaturesStr));
231231
if (!STI)
232232
return createStringError(
233233
make_error_code(std::errc::not_supported),

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1843,7 +1843,7 @@ Error StripRepRet::runOnFunctions(BinaryContext &BC) {
18431843
}
18441844

18451845
Error InlineMemcpy::runOnFunctions(BinaryContext &BC) {
1846-
if (!BC.isX86())
1846+
if (!BC.isX86() && !BC.isAArch64())
18471847
return Error::success();
18481848

18491849
uint64_t NumInlined = 0;
@@ -1866,8 +1866,16 @@ Error InlineMemcpy::runOnFunctions(BinaryContext &BC) {
18661866
const bool IsMemcpy8 = (CalleeSymbol->getName() == "_memcpy8");
18671867
const bool IsTailCall = BC.MIB->isTailCall(Inst);
18681868

1869+
// Extract size from preceding instructions (AArch64 only).
1870+
// Pattern: MOV X2, #nb-bytes; BL memcpy src, dest, X2.
1871+
std::optional<uint64_t> KnownSize =
1872+
BC.MIB->findMemcpySizeInBytes(BB, II);
1873+
1874+
if (BC.isAArch64() && (!KnownSize.has_value() || *KnownSize > 64))
1875+
continue;
1876+
18691877
const InstructionListType NewCode =
1870-
BC.MIB->createInlineMemcpy(IsMemcpy8);
1878+
BC.MIB->createInlineMemcpy(IsMemcpy8, KnownSize);
18711879
II = BB.replaceInstruction(II, NewCode);
18721880
std::advance(II, NewCode.size() - 1);
18731881
if (IsTailCall) {

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,9 @@ static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
248248

249249
static cl::opt<bool> StringOps(
250250
"inline-memcpy",
251-
cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
251+
cl::desc(
252+
"inline memcpy using size-specific optimized instructions "
253+
"(X86: 'rep movsb', AArch64: width-optimized register operations)"),
252254
cl::cat(BoltOptCategory));
253255

254256
static cl::opt<bool> StripRepRet(

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 118 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2517,21 +2517,17 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
25172517
createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, bool IsLeaf,
25182518
unsigned CodePointerSize) const override {
25192519
unsigned int I = 0;
2520-
InstructionListType Instrs(10);
2520+
InstructionListType Instrs(6);
25212521

25222522
createPushRegisters(Instrs[I++], AArch64::X0, AArch64::X1);
2523-
getSystemFlag(Instrs[I++], AArch64::X1);
25242523
InstructionListType Addr = materializeAddress(Target, Ctx, AArch64::X0);
25252524
assert(Addr.size() == 2 && "Invalid Addr size");
25262525
std::copy(Addr.begin(), Addr.end(), Instrs.begin() + I);
25272526
I += Addr.size();
2528-
storeReg(Instrs[I++], AArch64::X2, AArch64::SP);
2529-
InstructionListType Insts = createIncMemory(AArch64::X0, AArch64::X2);
2527+
InstructionListType Insts = createIncMemory(AArch64::X0, AArch64::X1);
25302528
assert(Insts.size() == 2 && "Invalid Insts size");
25312529
std::copy(Insts.begin(), Insts.end(), Instrs.begin() + I);
25322530
I += Insts.size();
2533-
loadReg(Instrs[I++], AArch64::X2, AArch64::SP);
2534-
setSystemFlag(Instrs[I++], AArch64::X1);
25352531
createPopRegisters(Instrs[I++], AArch64::X0, AArch64::X1);
25362532
return Instrs;
25372533
}
@@ -2620,6 +2616,122 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
26202616
getInstructionSize(const MCInst &Inst) const override {
26212617
return 4;
26222618
}
2619+
2620+
std::optional<uint64_t>
2621+
extractMoveImmediate(const MCInst &Inst, MCPhysReg TargetReg) const override {
2622+
// Match MOVZ instructions (both X and W register variants) with no shift.
2623+
if ((Inst.getOpcode() == AArch64::MOVZXi ||
2624+
Inst.getOpcode() == AArch64::MOVZWi) &&
2625+
Inst.getOperand(2).getImm() == 0 &&
2626+
getAliases(TargetReg)[Inst.getOperand(0).getReg()])
2627+
return Inst.getOperand(1).getImm();
2628+
return std::nullopt;
2629+
}
2630+
2631+
std::optional<uint64_t>
2632+
findMemcpySizeInBytes(const BinaryBasicBlock &BB,
2633+
BinaryBasicBlock::iterator CallInst) const override {
2634+
MCPhysReg SizeReg = getIntArgRegister(2);
2635+
if (SizeReg == getNoRegister())
2636+
return std::nullopt;
2637+
2638+
BitVector WrittenRegs(RegInfo->getNumRegs());
2639+
const BitVector &SizeRegAliases = getAliases(SizeReg);
2640+
2641+
for (auto InstIt = BB.begin(); InstIt != CallInst; ++InstIt) {
2642+
const MCInst &Inst = *InstIt;
2643+
WrittenRegs.reset();
2644+
getWrittenRegs(Inst, WrittenRegs);
2645+
2646+
if (WrittenRegs.anyCommon(SizeRegAliases))
2647+
return extractMoveImmediate(Inst, SizeReg);
2648+
}
2649+
return std::nullopt;
2650+
}
2651+
2652+
InstructionListType
2653+
createInlineMemcpy(bool ReturnEnd,
2654+
std::optional<uint64_t> KnownSize) const override {
2655+
assert(KnownSize.has_value() &&
2656+
"AArch64 memcpy inlining requires known size");
2657+
InstructionListType Code;
2658+
uint64_t Size = *KnownSize;
2659+
2660+
generateSizeSpecificMemcpy(Code, Size);
2661+
2662+
// If _memcpy8, adjust X0 to return dest+size instead of dest.
2663+
if (ReturnEnd)
2664+
Code.emplace_back(MCInstBuilder(AArch64::ADDXri)
2665+
.addReg(AArch64::X0)
2666+
.addReg(AArch64::X0)
2667+
.addImm(Size)
2668+
.addImm(0));
2669+
return Code;
2670+
}
2671+
2672+
InstructionListType generateSizeSpecificMemcpy(InstructionListType &Code,
2673+
uint64_t Size) const {
2674+
auto AddLoadStorePair = [&](unsigned LoadOpc, unsigned StoreOpc,
2675+
unsigned Reg, unsigned Offset = 0) {
2676+
Code.emplace_back(MCInstBuilder(LoadOpc)
2677+
.addReg(Reg)
2678+
.addReg(AArch64::X1)
2679+
.addImm(Offset));
2680+
Code.emplace_back(MCInstBuilder(StoreOpc)
2681+
.addReg(Reg)
2682+
.addReg(AArch64::X0)
2683+
.addImm(Offset));
2684+
};
2685+
2686+
// Generate optimal instruction sequences based on exact size.
2687+
switch (Size) {
2688+
case 1:
2689+
AddLoadStorePair(AArch64::LDRBBui, AArch64::STRBBui, AArch64::W9);
2690+
break;
2691+
case 2:
2692+
AddLoadStorePair(AArch64::LDRHHui, AArch64::STRHHui, AArch64::W9);
2693+
break;
2694+
case 4:
2695+
AddLoadStorePair(AArch64::LDRWui, AArch64::STRWui, AArch64::W9);
2696+
break;
2697+
case 8:
2698+
AddLoadStorePair(AArch64::LDRXui, AArch64::STRXui, AArch64::X9);
2699+
break;
2700+
case 16:
2701+
AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q16);
2702+
break;
2703+
case 32:
2704+
AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q16, 0);
2705+
AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q17, 1);
2706+
break;
2707+
2708+
default:
2709+
// For sizes up to 64 bytes, greedily use the largest possible loads.
2710+
// Caller should have already filtered out sizes > 64 bytes.
2711+
assert(Size <= 64 &&
2712+
"Size should be <= 64 bytes for AArch64 memcpy inlining");
2713+
2714+
uint64_t Remaining = Size;
2715+
uint64_t Offset = 0;
2716+
2717+
const std::array<std::tuple<uint64_t, unsigned, unsigned, unsigned>, 5>
2718+
LoadStoreOps = {
2719+
{{16, AArch64::LDRQui, AArch64::STRQui, AArch64::Q16},
2720+
{8, AArch64::LDRXui, AArch64::STRXui, AArch64::X9},
2721+
{4, AArch64::LDRWui, AArch64::STRWui, AArch64::W9},
2722+
{2, AArch64::LDRHHui, AArch64::STRHHui, AArch64::W9},
2723+
{1, AArch64::LDRBBui, AArch64::STRBBui, AArch64::W9}}};
2724+
2725+
for (const auto &[OpSize, LoadOp, StoreOp, TempReg] : LoadStoreOps)
2726+
while (Remaining >= OpSize) {
2727+
AddLoadStorePair(LoadOp, StoreOp, TempReg, Offset / OpSize);
2728+
Remaining -= OpSize;
2729+
Offset += OpSize;
2730+
}
2731+
break;
2732+
}
2733+
return Code;
2734+
}
26232735
};
26242736

26252737
} // end anonymous namespace

0 commit comments

Comments
 (0)