Skip to content

Commit a73d763

Browse files
authored
Merge branch 'llvm:main' into fshl_to_REV_instr
2 parents ac5d7a3 + c93af22 commit a73d763

File tree

95 files changed

+3572
-2416
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+3572
-2416
lines changed

bolt/lib/Core/MCPlusBuilder.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -442,10 +442,10 @@ void MCPlusBuilder::getUsedRegs(const MCInst &Inst, BitVector &Regs) const {
442442
for (MCPhysReg ImplicitUse : InstInfo.implicit_uses())
443443
Regs |= getAliases(ImplicitUse, /*OnlySmaller=*/true);
444444

445-
for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
446-
if (!Inst.getOperand(I).isReg())
445+
for (const MCOperand &Operand : useOperands(Inst)) {
446+
if (!Operand.isReg())
447447
continue;
448-
Regs |= getAliases(Inst.getOperand(I).getReg(), /*OnlySmaller=*/true);
448+
Regs |= getAliases(Operand.getReg(), /*OnlySmaller=*/true);
449449
}
450450
}
451451

bolt/unittests/Core/MCPlusBuilder.cpp

Lines changed: 122 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#ifdef AARCH64_AVAILABLE
1010
#include "AArch64Subtarget.h"
11+
#include "MCTargetDesc/AArch64MCTargetDesc.h"
1112
#endif // AARCH64_AVAILABLE
1213

1314
#ifdef X86_AVAILABLE
@@ -19,6 +20,7 @@
1920
#include "bolt/Rewrite/RewriteInstance.h"
2021
#include "llvm/BinaryFormat/ELF.h"
2122
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
23+
#include "llvm/MC/MCInstBuilder.h"
2224
#include "llvm/Support/TargetSelect.h"
2325
#include "gtest/gtest.h"
2426

@@ -70,16 +72,28 @@ struct MCPlusBuilderTester : public testing::TestWithParam<Triple::ArchType> {
7072
BC->MRI.get(), BC->STI.get())));
7173
}
7274

75+
void assertRegMask(const BitVector &RegMask,
76+
std::initializer_list<MCPhysReg> ExpectedRegs) {
77+
ASSERT_EQ(RegMask.count(), ExpectedRegs.size());
78+
for (MCPhysReg Reg : ExpectedRegs)
79+
ASSERT_TRUE(RegMask[Reg]) << "Expected " << BC->MRI->getName(Reg) << ".";
80+
}
81+
82+
void assertRegMask(std::function<void(BitVector &)> FillRegMask,
83+
std::initializer_list<MCPhysReg> ExpectedRegs) {
84+
BitVector RegMask(BC->MRI->getNumRegs());
85+
FillRegMask(RegMask);
86+
assertRegMask(RegMask, ExpectedRegs);
87+
}
88+
7389
void testRegAliases(Triple::ArchType Arch, uint64_t Register,
74-
uint64_t *Aliases, size_t Count,
90+
std::initializer_list<MCPhysReg> ExpectedAliases,
7591
bool OnlySmaller = false) {
7692
if (GetParam() != Arch)
7793
GTEST_SKIP();
7894

7995
const BitVector &BV = BC->MIB->getAliases(Register, OnlySmaller);
80-
ASSERT_EQ(BV.count(), Count);
81-
for (size_t I = 0; I < Count; ++I)
82-
ASSERT_TRUE(BV[Aliases[I]]);
96+
assertRegMask(BV, ExpectedAliases);
8397
}
8498

8599
char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {};
@@ -94,17 +108,15 @@ INSTANTIATE_TEST_SUITE_P(AArch64, MCPlusBuilderTester,
94108
::testing::Values(Triple::aarch64));
95109

96110
TEST_P(MCPlusBuilderTester, AliasX0) {
97-
uint64_t AliasesX0[] = {AArch64::W0, AArch64::W0_HI,
98-
AArch64::X0, AArch64::W0_W1,
99-
AArch64::X0_X1, AArch64::X0_X1_X2_X3_X4_X5_X6_X7};
100-
size_t AliasesX0Count = sizeof(AliasesX0) / sizeof(*AliasesX0);
101-
testRegAliases(Triple::aarch64, AArch64::X0, AliasesX0, AliasesX0Count);
111+
testRegAliases(Triple::aarch64, AArch64::X0,
112+
{AArch64::W0, AArch64::W0_HI, AArch64::X0, AArch64::W0_W1,
113+
AArch64::X0_X1, AArch64::X0_X1_X2_X3_X4_X5_X6_X7});
102114
}
103115

104116
TEST_P(MCPlusBuilderTester, AliasSmallerX0) {
105-
uint64_t AliasesX0[] = {AArch64::W0, AArch64::W0_HI, AArch64::X0};
106-
size_t AliasesX0Count = sizeof(AliasesX0) / sizeof(*AliasesX0);
107-
testRegAliases(Triple::aarch64, AArch64::X0, AliasesX0, AliasesX0Count, true);
117+
testRegAliases(Triple::aarch64, AArch64::X0,
118+
{AArch64::W0, AArch64::W0_HI, AArch64::X0},
119+
/*OnlySmaller=*/true);
108120
}
109121

110122
TEST_P(MCPlusBuilderTester, AArch64_CmpJE) {
@@ -155,6 +167,100 @@ TEST_P(MCPlusBuilderTester, AArch64_CmpJNE) {
155167
ASSERT_EQ(Label, BB->getLabel());
156168
}
157169

170+
TEST_P(MCPlusBuilderTester, testAccessedRegsImplicitDef) {
171+
if (GetParam() != Triple::aarch64)
172+
GTEST_SKIP();
173+
174+
// adds x0, x5, #42
175+
MCInst Inst = MCInstBuilder(AArch64::ADDSXri)
176+
.addReg(AArch64::X0)
177+
.addReg(AArch64::X5)
178+
.addImm(42)
179+
.addImm(0);
180+
181+
assertRegMask([&](BitVector &BV) { BC->MIB->getClobberedRegs(Inst, BV); },
182+
{AArch64::NZCV, AArch64::W0, AArch64::X0, AArch64::W0_HI,
183+
AArch64::X0_X1_X2_X3_X4_X5_X6_X7, AArch64::W0_W1,
184+
AArch64::X0_X1});
185+
186+
assertRegMask(
187+
[&](BitVector &BV) { BC->MIB->getTouchedRegs(Inst, BV); },
188+
{AArch64::NZCV, AArch64::W0, AArch64::W5, AArch64::X0, AArch64::X5,
189+
AArch64::W0_HI, AArch64::W5_HI, AArch64::X0_X1_X2_X3_X4_X5_X6_X7,
190+
AArch64::X2_X3_X4_X5_X6_X7_X8_X9, AArch64::X4_X5_X6_X7_X8_X9_X10_X11,
191+
AArch64::W0_W1, AArch64::W4_W5, AArch64::X0_X1, AArch64::X4_X5});
192+
193+
assertRegMask([&](BitVector &BV) { BC->MIB->getWrittenRegs(Inst, BV); },
194+
{AArch64::NZCV, AArch64::W0, AArch64::X0, AArch64::W0_HI});
195+
196+
assertRegMask([&](BitVector &BV) { BC->MIB->getUsedRegs(Inst, BV); },
197+
{AArch64::W5, AArch64::X5, AArch64::W5_HI});
198+
199+
assertRegMask([&](BitVector &BV) { BC->MIB->getSrcRegs(Inst, BV); },
200+
{AArch64::W5, AArch64::X5, AArch64::W5_HI});
201+
}
202+
203+
TEST_P(MCPlusBuilderTester, testAccessedRegsImplicitUse) {
204+
if (GetParam() != Triple::aarch64)
205+
GTEST_SKIP();
206+
207+
// b.eq <label>
208+
MCInst Inst =
209+
MCInstBuilder(AArch64::Bcc)
210+
.addImm(AArch64CC::EQ)
211+
.addImm(0); // <label> - should be Expr, but immediate 0 works too.
212+
213+
assertRegMask([&](BitVector &BV) { BC->MIB->getClobberedRegs(Inst, BV); },
214+
{});
215+
216+
assertRegMask([&](BitVector &BV) { BC->MIB->getTouchedRegs(Inst, BV); },
217+
{AArch64::NZCV});
218+
219+
assertRegMask([&](BitVector &BV) { BC->MIB->getWrittenRegs(Inst, BV); }, {});
220+
221+
assertRegMask([&](BitVector &BV) { BC->MIB->getUsedRegs(Inst, BV); },
222+
{AArch64::NZCV});
223+
224+
assertRegMask([&](BitVector &BV) { BC->MIB->getSrcRegs(Inst, BV); },
225+
{AArch64::NZCV});
226+
}
227+
228+
TEST_P(MCPlusBuilderTester, testAccessedRegsMultipleDefs) {
229+
if (GetParam() != Triple::aarch64)
230+
GTEST_SKIP();
231+
232+
// ldr x0, [x5], #16
233+
MCInst Inst = MCInstBuilder(AArch64::LDRXpost)
234+
.addReg(AArch64::X5)
235+
.addReg(AArch64::X0)
236+
.addReg(AArch64::X5)
237+
.addImm(16);
238+
239+
assertRegMask(
240+
[&](BitVector &BV) { BC->MIB->getClobberedRegs(Inst, BV); },
241+
{AArch64::W0, AArch64::W5, AArch64::X0, AArch64::X5, AArch64::W0_HI,
242+
AArch64::W5_HI, AArch64::X0_X1_X2_X3_X4_X5_X6_X7,
243+
AArch64::X2_X3_X4_X5_X6_X7_X8_X9, AArch64::X4_X5_X6_X7_X8_X9_X10_X11,
244+
AArch64::W0_W1, AArch64::W4_W5, AArch64::X0_X1, AArch64::X4_X5});
245+
246+
assertRegMask(
247+
[&](BitVector &BV) { BC->MIB->getTouchedRegs(Inst, BV); },
248+
{AArch64::W0, AArch64::W5, AArch64::X0, AArch64::X5, AArch64::W0_HI,
249+
AArch64::W5_HI, AArch64::X0_X1_X2_X3_X4_X5_X6_X7,
250+
AArch64::X2_X3_X4_X5_X6_X7_X8_X9, AArch64::X4_X5_X6_X7_X8_X9_X10_X11,
251+
AArch64::W0_W1, AArch64::W4_W5, AArch64::X0_X1, AArch64::X4_X5});
252+
253+
assertRegMask([&](BitVector &BV) { BC->MIB->getWrittenRegs(Inst, BV); },
254+
{AArch64::W0, AArch64::X0, AArch64::W0_HI, AArch64::W5,
255+
AArch64::X5, AArch64::W5_HI});
256+
257+
assertRegMask([&](BitVector &BV) { BC->MIB->getUsedRegs(Inst, BV); },
258+
{AArch64::W5, AArch64::X5, AArch64::W5_HI});
259+
260+
assertRegMask([&](BitVector &BV) { BC->MIB->getSrcRegs(Inst, BV); },
261+
{AArch64::W5, AArch64::X5, AArch64::W5_HI});
262+
}
263+
158264
#endif // AARCH64_AVAILABLE
159265

160266
#ifdef X86_AVAILABLE
@@ -163,15 +269,13 @@ INSTANTIATE_TEST_SUITE_P(X86, MCPlusBuilderTester,
163269
::testing::Values(Triple::x86_64));
164270

165271
TEST_P(MCPlusBuilderTester, AliasAX) {
166-
uint64_t AliasesAX[] = {X86::RAX, X86::EAX, X86::AX, X86::AL, X86::AH};
167-
size_t AliasesAXCount = sizeof(AliasesAX) / sizeof(*AliasesAX);
168-
testRegAliases(Triple::x86_64, X86::AX, AliasesAX, AliasesAXCount);
272+
testRegAliases(Triple::x86_64, X86::AX,
273+
{X86::RAX, X86::EAX, X86::AX, X86::AL, X86::AH});
169274
}
170275

171276
TEST_P(MCPlusBuilderTester, AliasSmallerAX) {
172-
uint64_t AliasesAX[] = {X86::AX, X86::AL, X86::AH};
173-
size_t AliasesAXCount = sizeof(AliasesAX) / sizeof(*AliasesAX);
174-
testRegAliases(Triple::x86_64, X86::AX, AliasesAX, AliasesAXCount, true);
277+
testRegAliases(Triple::x86_64, X86::AX, {X86::AX, X86::AL, X86::AH},
278+
/*OnlySmaller=*/true);
175279
}
176280

177281
TEST_P(MCPlusBuilderTester, ReplaceRegWithImm) {

clang/test/Driver/Xclangas.s

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
// RUN: %clang -### -Werror -Xclangas -target-feature -Xclangas=+v5t %s 2>&1 | FileCheck %s
33
// CHECK: -cc1as
44
// CHECK-SAME: "-target-feature" "+v5t"
5+
// XFAIL: target={{.*}}-aix{{.*}}

llvm/docs/SPIRVUsage.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,8 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na
211211
- Adds the ability to specify the maximum error for floating-point operations.
212212
* - ``SPV_INTEL_ternary_bitwise_function``
213213
- Adds a bitwise instruction on three operands and a look-up table index for specifying the bitwise operation to perform.
214+
* - ``SPV_INTEL_subgroup_matrix_multiply_accumulate``
215+
- Adds an instruction to compute the matrix product of an M x K matrix with a K x N matrix and then add an M x N matrix.
214216

215217
To enable multiple extensions, list them separated by comma. For example, to enable support for atomic operations on floating-point numbers and arbitrary precision integers, use:
216218

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2503,7 +2503,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
25032503
return (LT.first * 2);
25042504
else
25052505
return (LT.first * 1);
2506-
} else if (!TLI->isOperationExpand(ISD, LT.second)) {
2506+
} else if (TLI->isOperationCustom(ISD, LT.second)) {
25072507
// If the operation is custom lowered then assume
25082508
// that the code is twice as expensive.
25092509
return (LT.first * 2);

llvm/include/llvm/CodeGen/MachineFrameInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class CalleeSavedInfo {
6161
MCRegister getReg() const { return Reg; }
6262
int getFrameIdx() const { return FrameIdx; }
6363
MCRegister getDstReg() const { return DstReg; }
64+
void setReg(MCRegister R) { Reg = R; }
6465
void setFrameIdx(int FI) {
6566
FrameIdx = FI;
6667
SpilledToReg = false;

llvm/include/llvm/CodeGen/MachinePipeliner.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,13 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
278278
/// Ordered list of DAG postprocessing steps.
279279
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
280280

281+
/// Used to compute single-iteration dependencies (i.e., buildSchedGraph).
282+
AliasAnalysis *AA;
283+
284+
/// Used to compute loop-carried dependencies (i.e.,
285+
/// addLoopCarriedDependences).
286+
BatchAAResults BAA;
287+
281288
/// Helper class to implement Johnson's circuit finding algorithm.
282289
class Circuits {
283290
std::vector<SUnit> &SUnits;
@@ -323,13 +330,14 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
323330
public:
324331
SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
325332
const RegisterClassInfo &rci, unsigned II,
326-
TargetInstrInfo::PipelinerLoopInfo *PLI)
333+
TargetInstrInfo::PipelinerLoopInfo *PLI, AliasAnalysis *AA)
327334
: ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
328335
RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
329-
Topo(SUnits, &ExitSU) {
336+
Topo(SUnits, &ExitSU), AA(AA), BAA(*AA) {
330337
P.MF->getSubtarget().getSMSMutations(Mutations);
331338
if (SwpEnableCopyToPhi)
332339
Mutations.push_back(std::make_unique<CopyToPhiMutation>());
340+
BAA.enableCrossIterationMode();
333341
}
334342

335343
void schedule() override;
@@ -394,7 +402,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
394402
const MachineInstr *OtherMI) const;
395403

396404
private:
397-
void addLoopCarriedDependences(AAResults *AA);
405+
void addLoopCarriedDependences();
398406
void updatePhiDependences();
399407
void changeDependences();
400408
unsigned calculateResMII();

llvm/include/llvm/CodeGen/TargetFrameLowering.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,14 @@ class TargetFrameLowering {
270270
return false;
271271
}
272272

273+
/// spillCalleeSavedRegister - Default implementation for spilling a single
274+
/// callee saved register.
275+
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock,
276+
MachineBasicBlock::iterator MI,
277+
const CalleeSavedInfo &CS,
278+
const TargetInstrInfo *TII,
279+
const TargetRegisterInfo *TRI) const;
280+
273281
/// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee
274282
/// saved registers and returns true if it isn't possible / profitable to do
275283
/// so by issuing a series of load instructions via loadRegToStackSlot().
@@ -284,6 +292,15 @@ class TargetFrameLowering {
284292
return false;
285293
}
286294

295+
// restoreCalleeSavedRegister - Default implementation for restoring a single
296+
// callee saved register. Should be called in reverse order. Can insert
297+
// multiple instructions.
298+
void restoreCalleeSavedRegister(MachineBasicBlock &MBB,
299+
MachineBasicBlock::iterator MI,
300+
const CalleeSavedInfo &CS,
301+
const TargetInstrInfo *TII,
302+
const TargetRegisterInfo *TRI) const;
303+
287304
/// hasFP - Return true if the specified function should have a dedicated
288305
/// frame pointer register. For most targets this is true only if the function
289306
/// has variable sized allocas or if frame pointer elimination is disabled.

llvm/include/llvm/IR/RuntimeLibcalls.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ HANDLE_LIBCALL(NEG_I64, "__negdi2")
8585
HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")
8686
HANDLE_LIBCALL(CTLZ_I64, "__clzdi2")
8787
HANDLE_LIBCALL(CTLZ_I128, "__clzti2")
88+
HANDLE_LIBCALL(CTPOP_I32, "__popcountsi2")
89+
HANDLE_LIBCALL(CTPOP_I64, "__popcountdi2")
90+
HANDLE_LIBCALL(CTPOP_I128, "__popcountti2")
8891

8992
// Floating-point
9093
HANDLE_LIBCALL(ADD_F32, "__addsf3")

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1385,7 +1385,8 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg,
13851385
const MachineRegisterInfo &MRI,
13861386
int64_t SplatValue, bool AllowUndef) {
13871387
if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
1388-
return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
1388+
return SplatValAndReg->Value.getSExtValue() == SplatValue;
1389+
13891390
return false;
13901391
}
13911392

0 commit comments

Comments
 (0)