Skip to content

Commit 463c0fa

Browse files
[AArch64][SME] Create separate FORM_TRANSPOSE pseudos for ZPR & ZPRMul classes
The FORM_TRANSPOSED_REG_TUPLE pseudo nodes use either the ZPR2Mul2 or ZPR4Mul4 register classes for output. To extend these to other multi-vector intrinsics which instead create a ZPR2/ZPR4 REG_SEQUENCE, a new pseudo has been added and the existing one renamed.
1 parent d1c9a3a commit 463c0fa

File tree

6 files changed

+223
-253
lines changed

6 files changed

+223
-253
lines changed

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,8 +1755,10 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
17551755
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
17561756
AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
17571757
case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
1758+
case AArch64::FORM_TRANSPOSED_REG_TUPLE_MULX2_PSEUDO:
17581759
return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 2);
17591760
case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
1761+
case AArch64::FORM_TRANSPOSED_REG_TUPLE_MULX4_PSEUDO:
17601762
return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 4);
17611763
}
17621764
return false;

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8759,17 +8759,19 @@ static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
87598759
// %6:zpr2stridedorcontiguous = LD1B_2Z_PSEUDO ..
87608760
// %7:zpr = COPY %6.zsub1:zpr2stridedorcontiguous
87618761
// %8:zpr = COPY %6.zsub0:zpr2stridedorcontiguous
8762-
// %9:zpr2mul2 = FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO %5:zpr, %8:zpr
8762+
// %9:zpr2mul2 = FORM_TRANSPOSED_REG_TUPLE_MULX2_PSEUDO %5:zpr, %8:zpr
87638763
//
87648764
bool shouldUseFormStridedPseudo(MachineInstr &MI) {
87658765
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
87668766

87678767
const TargetRegisterClass *RegClass = nullptr;
87688768
switch (MI.getOpcode()) {
87698769
case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
8770+
case AArch64::FORM_TRANSPOSED_REG_TUPLE_MULX2_PSEUDO:
87708771
RegClass = &AArch64::ZPR2StridedOrContiguousRegClass;
87718772
break;
87728773
case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
8774+
case AArch64::FORM_TRANSPOSED_REG_TUPLE_MULX4_PSEUDO:
87738775
RegClass = &AArch64::ZPR4StridedOrContiguousRegClass;
87748776
break;
87758777
default:
@@ -8824,14 +8826,14 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
88248826
}
88258827
}
88268828

8827-
if (MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
8828-
MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) {
8829+
const AArch64InstrInfo *TII =
8830+
MI.getMF()->getSubtarget<AArch64Subtarget>().getInstrInfo();
8831+
if (TII->isFormTransposedOpcode(MI.getOpcode())) {
88298832
// If input values to the FORM_TRANSPOSED_REG_TUPLE pseudo aren't copies
88308833
// from a StridedOrContiguous class, fall back on REG_SEQUENCE node.
88318834
if (shouldUseFormStridedPseudo(MI))
88328835
return;
88338836

8834-
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
88358837
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
88368838
TII->get(TargetOpcode::REG_SEQUENCE),
88378839
MI.getOperand(0).getReg());

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,18 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
548548
Register TargetReg,
549549
bool FrameSetup) const;
550550

551+
bool isFormTransposedOpcode(unsigned Opc) const {
552+
switch (Opc) {
553+
case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
554+
case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
555+
case AArch64::FORM_TRANSPOSED_REG_TUPLE_MULX2_PSEUDO:
556+
case AArch64::FORM_TRANSPOSED_REG_TUPLE_MULX4_PSEUDO:
557+
return true;
558+
default:
559+
return false;
560+
}
561+
}
562+
551563
#define GET_INSTRINFO_HELPER_DECLS
552564
#include "AArch64GenInstrInfo.inc"
553565

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,13 +1109,14 @@ bool AArch64RegisterInfo::getRegAllocationHints(
11091109
// so we add the strided registers as a hint.
11101110
unsigned RegID = MRI.getRegClass(VirtReg)->getID();
11111111
// Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
1112+
const AArch64InstrInfo *TII =
1113+
MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
11121114
if ((RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
11131115
RegID == AArch64::ZPR4StridedOrContiguousRegClassID) &&
1114-
any_of(MRI.use_nodbg_instructions(VirtReg), [](const MachineInstr &Use) {
1115-
return Use.getOpcode() ==
1116-
AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
1117-
Use.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO;
1118-
})) {
1116+
any_of(MRI.use_nodbg_instructions(VirtReg),
1117+
[&TII](const MachineInstr &Use) {
1118+
return TII->isFormTransposedOpcode(Use.getOpcode());
1119+
})) {
11191120
const TargetRegisterClass *StridedRC =
11201121
RegID == AArch64::ZPR2StridedOrContiguousRegClassID
11211122
? &AArch64::ZPR2StridedRegClass
@@ -1130,8 +1131,7 @@ bool AArch64RegisterInfo::getRegAllocationHints(
11301131
}
11311132

11321133
for (MachineInstr &MI : MRI.def_instructions(VirtReg)) {
1133-
if (MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1134-
MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO)
1134+
if (!TII->isFormTransposedOpcode(MI.getOpcode()))
11351135
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints,
11361136
MF, VRM);
11371137

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,20 +45,26 @@ def am_sme_indexed_b4 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0, 15>
4545
// If the operands do not match this pattern, the pseudos are expanded
4646
// to a REG_SEQUENCE using the post-isel hook.
4747

48-
def FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO :
49-
Pseudo<(outs ZPR2Mul2:$tup),
50-
(ins ZPR:$zn0, ZPR:$zn1), []>, Sched<[]>{
48+
class sme_form_transpose_x2_pseudo<RegisterClass multi_vector_class>
49+
: Pseudo<(outs multi_vector_class:$tup), (ins ZPR:$zn0, ZPR:$zn1), []>,
50+
Sched<[]> {
5151
let hasSideEffects = 0;
5252
let hasPostISelHook = 1;
5353
}
5454

55-
def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO :
56-
Pseudo<(outs ZPR4Mul4:$tup),
57-
(ins ZPR:$zn0, ZPR:$zn1, ZPR:$zn2, ZPR:$zn3), []>, Sched<[]>{
55+
def FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO : sme_form_transpose_x2_pseudo<ZPR2>;
56+
def FORM_TRANSPOSED_REG_TUPLE_MULX2_PSEUDO : sme_form_transpose_x2_pseudo<ZPR2Mul2>;
57+
58+
class sme_form_transpose_x4_pseudo<RegisterClass multi_vector_class>
59+
: Pseudo<(outs multi_vector_class:$tup), (ins ZPR:$zn0, ZPR:$zn1, ZPR:$zn2, ZPR:$zn3), []>,
60+
Sched<[]> {
5861
let hasSideEffects = 0;
5962
let hasPostISelHook = 1;
6063
}
6164

65+
def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO : sme_form_transpose_x4_pseudo<ZPR4>;
66+
def FORM_TRANSPOSED_REG_TUPLE_MULX4_PSEUDO : sme_form_transpose_x4_pseudo<ZPR4Mul4>;
67+
6268
def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>;
6369
def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore,
6470
[SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>;
@@ -164,14 +170,14 @@ class SME2_ZA_TwoOp_Multi_Single_Pat<string name, SDPatternOperator intrinsic, O
164170
class SME2_ZA_TwoOp_VG2_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
165171
ValueType vt, ComplexPattern tileslice>
166172
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm),
167-
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
173+
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1, vt:$Zn2),
168174
zpr_ty:$Zm)>;
169175
class SME2_ZA_TwoOp_VG4_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
170176
ValueType vt, ComplexPattern tileslice>
171177
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
172178
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm),
173179
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
174-
(REG_SEQUENCE ZPR4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
180+
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
175181
zpr_ty:$Zm)>;
176182

177183
class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
@@ -197,14 +203,14 @@ class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic
197203
Operand imm_ty, ComplexPattern tileslice>
198204
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)),
199205
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
200-
(FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1,vt:$Zn2), zpr_ty:$Zm, imm_ty:$i)>;
206+
(FORM_TRANSPOSED_REG_TUPLE_MULX2_PSEUDO vt:$Zn1,vt:$Zn2), zpr_ty:$Zm, imm_ty:$i)>;
201207

202208
class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
203209
Operand imm_ty, ComplexPattern tileslice>
204210
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
205211
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)),
206212
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
207-
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
213+
(FORM_TRANSPOSED_REG_TUPLE_MULX4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
208214
zpr_ty:$Zm, imm_ty:$i)>;
209215

210216
class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>

0 commit comments

Comments
 (0)