Skip to content

Commit 645e30b

Browse files
- shouldUseFormStridedPseudo sets RegClass using a switch statement
- Removed INTRINSIC_W_CHAIN from getIntrinsicID - Renamed FORM_STRIDED_TUPLE_X#_PSEUDO -> FORM_TRANSPOSED_REG_TUPLE_X#_PSEUDO - Add switch statement back into getRegAllocationHints - Use getSubReg in getRegAllocationHints and remove index into RegUnits
1 parent 426253c commit 645e30b

File tree

4 files changed

+68
-84
lines changed

4 files changed

+68
-84
lines changed

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1754,9 +1754,9 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
17541754
return expandMultiVecPseudo(
17551755
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
17561756
AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
1757-
case AArch64::FORM_STRIDED_TUPLE_X2_PSEUDO:
1757+
case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
17581758
return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 2);
1759-
case AArch64::FORM_STRIDED_TUPLE_X4_PSEUDO:
1759+
case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
17601760
return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 4);
17611761
}
17621762
return false;

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -7639,11 +7639,6 @@ static unsigned getIntrinsicID(const SDNode *N) {
76397639
return IID;
76407640
return Intrinsic::not_intrinsic;
76417641
}
7642-
case ISD::INTRINSIC_W_CHAIN: {
7643-
unsigned IID = N->getConstantOperandVal(1);
7644-
if (IID < Intrinsic::num_intrinsics)
7645-
return IID;
7646-
}
76477642
}
76487643
}
76497644

@@ -8646,53 +8641,55 @@ static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
86468641
return ZExtBool;
86478642
}
86488643

8644+
// The FORM_TRANSPOSED_REG_TUPLE pseudo should only be used if the
8645+
// input operands are copy nodes where the source register is in a
8646+
// StridedOrContiguous class. For example:
8647+
//
8648+
// %3:zpr2stridedorcontiguous = LD1B_2Z_IMM_PSEUDO ..
8649+
// %4:zpr = COPY %3.zsub1:zpr2stridedorcontiguous
8650+
// %5:zpr = COPY %3.zsub0:zpr2stridedorcontiguous
8651+
// %6:zpr2stridedorcontiguous = LD1B_2Z_PSEUDO ..
8652+
// %7:zpr = COPY %6.zsub1:zpr2stridedorcontiguous
8653+
// %8:zpr = COPY %6.zsub0:zpr2stridedorcontiguous
8654+
// %9:zpr2mul2 = FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO %5:zpr, %8:zpr
8655+
//
86498656
bool shouldUseFormStridedPseudo(MachineInstr &MI) {
86508657
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
8651-
bool UseFormStrided = false;
8652-
unsigned NumOperands =
8653-
MI.getOpcode() == AArch64::FORM_STRIDED_TUPLE_X2_PSEUDO ? 2 : 4;
8654-
8655-
// The FORM_STRIDED_TUPLE pseudo should only be used if the input operands
8656-
// are copy nodes where the source register is in a StridedOrContiguous
8657-
// class. For example:
8658-
// %3:zpr2stridedorcontiguous = LD1B_2Z_IMM_PSEUDO ..
8659-
// %4:zpr = COPY %3.zsub1:zpr2stridedorcontiguous
8660-
// %5:zpr = COPY %3.zsub0:zpr2stridedorcontiguous
8661-
// %6:zpr2stridedorcontiguous = LD1B_2Z_PSEUDO ..
8662-
// %7:zpr = COPY %6.zsub1:zpr2stridedorcontiguous
8663-
// %8:zpr = COPY %6.zsub0:zpr2stridedorcontiguous
8664-
// %9:zpr2mul2 = FORM_STRIDED_TUPLE_X2_PSEUDO %5:zpr, %8:zpr
86658658

86668659
MCRegister SubReg = MCRegister::NoRegister;
86678660
for (unsigned I = 1; I < MI.getNumOperands(); ++I) {
86688661
MachineOperand &MO = MI.getOperand(I);
8669-
assert(MO.isReg() && "Unexpected operand to FORM_STRIDED_TUPLE");
8662+
assert(MO.isReg() && "Unexpected operand to FORM_TRANSPOSED_REG_TUPLE");
86708663

86718664
MachineOperand *Def = MRI.getOneDef(MO.getReg());
8672-
if (!Def || !Def->isReg() || !Def->getParent()->isCopy()) {
8673-
UseFormStrided = false;
8674-
break;
8675-
}
8665+
if (!Def || !Def->getParent()->isCopy())
8666+
return false;
86768667

8677-
MachineOperand CpyOp = Def->getParent()->getOperand(1);
8678-
MachineOperand *Ld = MRI.getOneDef(CpyOp.getReg());
8679-
unsigned OpSubReg = CpyOp.getSubReg();
8668+
const MachineOperand &CpySrc = Def->getParent()->getOperand(1);
8669+
MachineOperand *CopySrcOp = MRI.getOneDef(CpySrc.getReg());
8670+
unsigned OpSubReg = CpySrc.getSubReg();
86808671
if (SubReg == MCRegister::NoRegister)
86818672
SubReg = OpSubReg;
8682-
if (!Ld || !Ld->isReg() || OpSubReg != SubReg) {
8683-
UseFormStrided = false;
8673+
if (!CopySrcOp || !CopySrcOp->isReg() || OpSubReg != SubReg)
8674+
return false;
8675+
8676+
const TargetRegisterClass *RegClass = nullptr;
8677+
switch (MI.getNumOperands() - 1) {
8678+
case 2:
8679+
RegClass = &AArch64::ZPR2StridedOrContiguousRegClass;
8680+
break;
8681+
case 4:
8682+
RegClass = &AArch64::ZPR4StridedOrContiguousRegClass;
86848683
break;
8684+
default:
8685+
llvm_unreachable("Unexpected number of operands to pseudo.");
86858686
}
86868687

8687-
const TargetRegisterClass *RegClass =
8688-
NumOperands == 2 ? &AArch64::ZPR2StridedOrContiguousRegClass
8689-
: &AArch64::ZPR4StridedOrContiguousRegClass;
8690-
8691-
if (MRI.getRegClass(Ld->getReg()) == RegClass)
8692-
UseFormStrided = true;
8688+
if (MRI.getRegClass(CopySrcOp->getReg()) != RegClass)
8689+
return false;
86938690
}
86948691

8695-
return UseFormStrided;
8692+
return true;
86968693
}
86978694

86988695
void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
@@ -8720,10 +8717,10 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
87208717
}
87218718
}
87228719

8723-
if (MI.getOpcode() == AArch64::FORM_STRIDED_TUPLE_X2_PSEUDO ||
8724-
MI.getOpcode() == AArch64::FORM_STRIDED_TUPLE_X4_PSEUDO) {
8725-
// If input values to the FORM_STRIDED_TUPLE pseudo aren't copies from a
8726-
// StridedOrContiguous class, fall back on REG_SEQUENCE node.
8720+
if (MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
8721+
MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) {
8722+
// If input values to the FORM_TRANSPOSED_REG_TUPLE pseudo aren't copies
8723+
// from a StridedOrContiguous class, fall back on REG_SEQUENCE node.
87278724
if (!shouldUseFormStridedPseudo(MI)) {
87288725
static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
87298726
AArch64::zsub2, AArch64::zsub3};

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 25 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,10 +1107,10 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
11071107
}
11081108
}
11091109

1110-
// FORM_STRIDED_TUPLE nodes are created to improve register allocation where
1111-
// a consecutive multi-vector tuple is constructed from the same indices of
1112-
// multiple strided loads. This may still result in unnecessary copies between
1113-
// the loads and the tuple. Here we try to return a hint to assign the
1110+
// FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register allocation
1111+
// where a consecutive multi-vector tuple is constructed from the same indices
1112+
// of multiple strided loads. This may still result in unnecessary copies
1113+
// between the loads and the tuple. Here we try to return a hint to assign the
11141114
// contiguous ZPRMulReg starting at the same register as the first operand of
11151115
// the pseudo, which should be a subregister of the first strided load.
11161116
//
@@ -1123,51 +1123,38 @@ bool AArch64RegisterInfo::getRegAllocationHints(
11231123
Register VirtReg, ArrayRef<MCPhysReg> Order,
11241124
SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
11251125
const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
1126-
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
1127-
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
11281126
const MachineRegisterInfo &MRI = MF.getRegInfo();
1129-
bool DefaultHints =
1130-
TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
1131-
1132-
unsigned RegID = MRI.getRegClass(VirtReg)->getID();
1133-
if (RegID != AArch64::ZPR2Mul2RegClassID &&
1134-
RegID != AArch64::ZPR4Mul4RegClassID)
1135-
return DefaultHints;
11361127

11371128
for (MachineInstr &MI : MRI.def_instructions(VirtReg)) {
1138-
if (MI.getOpcode() != AArch64::FORM_STRIDED_TUPLE_X2_PSEUDO &&
1139-
MI.getOpcode() != AArch64::FORM_STRIDED_TUPLE_X4_PSEUDO)
1129+
if (MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1130+
MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO)
11401131
continue;
11411132

1142-
// Look up the physical register mapped to the first load of the pseudo.
1143-
Register FirstLoadVirtReg = MI.getOperand(1).getReg();
1144-
if (!VRM->hasPhys(FirstLoadVirtReg))
1133+
switch (MI.getOperand(1).getSubReg()) {
1134+
case AArch64::zsub0:
1135+
case AArch64::zsub1:
1136+
case AArch64::zsub2:
1137+
case AArch64::zsub3:
1138+
break;
1139+
default:
11451140
continue;
1141+
}
11461142

1147-
int64_t SubRegIdx = -1;
1148-
MCRegister FirstLoadPhysReg = VRM->getPhys(FirstLoadVirtReg);
1149-
1150-
// The subreg number is used to access the correct unit of the
1151-
// strided register found in the map above.
1152-
SubRegIdx = MI.getOperand(1).getSubReg() - AArch64::zsub0;
1153-
if (SubRegIdx < 0 || SubRegIdx > 3)
1143+
// Look up the physical register mapped to the first operand of the pseudo.
1144+
Register FirstOpVirtReg = MI.getOperand(1).getReg();
1145+
if (!VRM->hasPhys(FirstOpVirtReg))
11541146
continue;
11551147

1156-
SmallVector<Register, 4> RegUnits;
1157-
for (MCRegUnit Unit : TRI->regunits(FirstLoadPhysReg))
1158-
RegUnits.push_back(Unit);
1159-
1160-
// Find the contiguous ZPRMul register which starts with the
1161-
// same register unit as the strided register and add to Hints.
1162-
Register StartReg = RegUnits[SubRegIdx];
1163-
for (unsigned I = 0; I < Order.size(); ++I) {
1164-
Register Reg = *TRI->regunits(Order[I]).begin();
1165-
if (Reg == StartReg)
1166-
Hints.push_back(Order[I]);
1167-
}
1148+
MCRegister TupleStartReg =
1149+
getSubReg(VRM->getPhys(FirstOpVirtReg), MI.getOperand(1).getSubReg());
1150+
for (unsigned I = 0; I < Order.size(); ++I)
1151+
if (MCRegister R = getSubReg(Order[I], AArch64::zsub0))
1152+
if (R == TupleStartReg)
1153+
Hints.push_back(Order[I]);
11681154
}
11691155

1170-
return DefaultHints;
1156+
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
1157+
VRM);
11711158
}
11721159

11731160
unsigned AArch64RegisterInfo::getLocalAddressRegister(

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,14 @@ def tileslicerange0s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 4>", []>;
3434

3535
def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>;
3636

37-
def FORM_STRIDED_TUPLE_X2_PSEUDO :
37+
def FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO :
3838
Pseudo<(outs ZPR2Mul2:$tup),
3939
(ins ZPR:$zn0, ZPR:$zn1), []>, Sched<[]>{
4040
let hasSideEffects = 0;
4141
let hasPostISelHook = 1;
4242
}
4343

44-
def FORM_STRIDED_TUPLE_X4_PSEUDO :
44+
def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO :
4545
Pseudo<(outs ZPR4Mul4:$tup),
4646
(ins ZPR:$zn0, ZPR:$zn1, ZPR:$zn2, ZPR:$zn3), []>, Sched<[]>{
4747
let hasSideEffects = 0;
@@ -186,14 +186,14 @@ class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic
186186
Operand imm_ty, ComplexPattern tileslice>
187187
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)),
188188
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
189-
(FORM_STRIDED_TUPLE_X2_PSEUDO vt:$Zn1,vt:$Zn2), zpr_ty:$Zm, imm_ty:$i)>;
189+
(FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1,vt:$Zn2), zpr_ty:$Zm, imm_ty:$i)>;
190190

191191
class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
192192
Operand imm_ty, ComplexPattern tileslice>
193193
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
194194
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)),
195195
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
196-
(FORM_STRIDED_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
196+
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
197197
zpr_ty:$Zm, imm_ty:$i)>;
198198

199199
class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>

0 commit comments

Comments
 (0)