@@ -1100,6 +1100,11 @@ bool AArch64RegisterInfo::getRegAllocationHints(
11001100 const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
11011101 const MachineRegisterInfo &MRI = MF.getRegInfo ();
11021102
1103+ auto &ST = MF.getSubtarget <AArch64Subtarget>();
1104+ if (!ST.hasSME () || !ST.isStreaming ())
1105+ return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints, MF,
1106+ VRM);
1107+
11031108 // The SVE calling convention preserves registers Z8-Z23. As a result, there
11041109 // are no ZPR2Strided or ZPR4Strided registers that do not overlap with the
11051110 // callee-saved registers and so by default these will be pushed to the back
@@ -1109,94 +1114,82 @@ bool AArch64RegisterInfo::getRegAllocationHints(
11091114 // instructions over reducing the number of clobbered callee-save registers,
11101115 // so we add the strided registers as a hint.
11111116 unsigned RegID = MRI.getRegClass (VirtReg)->getID ();
1112- // Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
1113- for (const MachineInstr &Use : MRI.use_nodbg_instructions (VirtReg)) {
1114- if ((RegID != AArch64::ZPR2StridedOrContiguousRegClassID &&
1115- RegID != AArch64::ZPR4StridedOrContiguousRegClassID) ||
1116- (Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1117- Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO))
1118- continue ;
1119-
1120- unsigned LdOps = Use.getNumOperands () - 1 ;
1121- const TargetRegisterClass *StridedRC = LdOps == 2
1122- ? &AArch64::ZPR2StridedRegClass
1123- : &AArch64::ZPR4StridedRegClass;
1124-
1125- SmallVector<MCPhysReg, 4 > StridedOrder;
1126- for (MCPhysReg Reg : Order)
1127- if (StridedRC->contains (Reg))
1128- StridedOrder.push_back (Reg);
1129-
1130- auto GetRegStartingAt = [&](MCPhysReg FirstReg) -> MCPhysReg {
1131- for (MCPhysReg Strided : StridedOrder)
1132- if (getSubReg (Strided, AArch64::zsub0) == FirstReg)
1133- return Strided;
1134- return (MCPhysReg)AArch64::NoRegister;
1135- };
1136-
1137- int OpIdx = Use.findRegisterUseOperandIdx (VirtReg, this );
1138- assert (OpIdx != -1 && " Expected operand index from register use." );
1139-
1140- unsigned TupleID = MRI.getRegClass (Use.getOperand (0 ).getReg ())->getID ();
1141- bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID ||
1142- TupleID == AArch64::ZPR4Mul4RegClassID;
1143-
1144- unsigned AssignedOp = 0 ;
1145- if (!any_of (make_range (Use.operands_begin () + 1 , Use.operands_end ()),
1146- [&](const MachineOperand &Op) {
1147- if (!VRM->hasPhys (Op.getReg ()))
1148- return false ;
1149- AssignedOp = Op.getOperandNo ();
1150- return true ;
1151- })) {
1152- // There are no registers already assigned to any of the pseudo operands.
1153- // Look for a valid starting register for the group.
1154- for (unsigned I = 0 ; I < StridedOrder.size (); ++I) {
1155- MCPhysReg Reg = StridedOrder[I];
1156- unsigned FirstReg = getSubReg (Reg, AArch64::zsub0);
1157-
1158- // If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting
1159- // register of the first load should be a multiple of 2 or 4.
1160- if (IsMulZPR && (FirstReg - AArch64::Z0) % LdOps != 0 )
1161- continue ;
1162- // Skip this register if it has any live intervals assigned.
1163- if (Matrix->isPhysRegUsed (Reg))
1164- continue ;
1165-
1166- // Look for registers in StridedOrder which start with sub-registers
1167- // following sequentially from FirstReg. If all are found and none are
1168- // already live, add Reg to Hints.
1169- MCPhysReg RegToAssign = Reg;
1170- for (unsigned Next = 1 ; Next < LdOps; ++Next) {
1171- MCPhysReg Strided = GetRegStartingAt (FirstReg + Next);
1172- if (Strided == AArch64::NoRegister ||
1173- Matrix->isPhysRegUsed (Strided)) {
1174- RegToAssign = AArch64::NoRegister;
1175- break ;
1117+ if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
1118+ RegID == AArch64::ZPR4StridedOrContiguousRegClassID) {
1119+
1120+ // Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
1121+ for (const MachineInstr &Use : MRI.use_nodbg_instructions (VirtReg)) {
1122+ if (Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1123+ Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO)
1124+ continue ;
1125+
1126+ unsigned LdOps = Use.getNumOperands () - 1 ;
1127+ const TargetRegisterClass *StridedRC =
1128+ LdOps == 2 ? &AArch64::ZPR2StridedRegClass
1129+ : &AArch64::ZPR4StridedRegClass;
1130+
1131+ SmallVector<MCPhysReg, 4 > StridedOrder;
1132+ for (MCPhysReg Reg : Order)
1133+ if (StridedRC->contains (Reg))
1134+ StridedOrder.push_back (Reg);
1135+
1136+ int OpIdx = Use.findRegisterUseOperandIdx (VirtReg, this );
1137+ assert (OpIdx != -1 && " Expected operand index from register use." );
1138+
1139+ unsigned TupleID = MRI.getRegClass (Use.getOperand (0 ).getReg ())->getID ();
1140+ bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID ||
1141+ TupleID == AArch64::ZPR4Mul4RegClassID;
1142+
1143+ const MachineOperand *AssignedRegOp = llvm::find_if (
1144+ make_range (Use.operands_begin () + 1 , Use.operands_end ()),
1145+ [&VRM](const MachineOperand &Op) {
1146+ return VRM->hasPhys (Op.getReg ());
1147+ });
1148+
1149+ if (AssignedRegOp == Use.operands_end ()) {
1150+ // There are no registers already assigned to any of the pseudo
1151+ // operands. Look for a valid starting register for the group.
1152+ for (unsigned I = 0 ; I < StridedOrder.size (); ++I) {
1153+ MCPhysReg Reg = StridedOrder[I];
1154+ SmallVector<MCPhysReg> Regs;
1155+ unsigned FirstStridedReg = Reg - OpIdx + 1 ;
1156+
1157+ // If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting
1158+ // register of the first load should be a multiple of 2 or 4.
1159+ unsigned FirstSubReg = getSubReg (FirstStridedReg, AArch64::zsub0);
1160+ if (IsMulZPR && (FirstSubReg - AArch64::Z0) % LdOps != 0 )
1161+ continue ;
1162+
1163+ for (unsigned Op = 0 ; Op < LdOps; ++Op) {
1164+ if (!is_contained (StridedOrder, FirstStridedReg + Op) ||
1165+ getSubReg (FirstStridedReg + Op, AArch64::zsub0) !=
1166+ FirstSubReg + Op)
1167+ break ;
1168+ Regs.push_back (FirstStridedReg + Op);
11761169 }
1177- if (Next == (unsigned )OpIdx - 1 )
1178- RegToAssign = Strided;
1170+
1171+ if (Regs.size () == LdOps && all_of (Regs, [&](MCPhysReg R) {
1172+ return !Matrix->isPhysRegUsed (R);
1173+ }))
1174+ Hints.push_back (FirstStridedReg + OpIdx - 1 );
11791175 }
1180- if (RegToAssign != AArch64::NoRegister)
1181- Hints.push_back (RegToAssign);
1176+ } else {
1177+ // At least one operand already has a physical register assigned.
1178+ // Find the starting sub-register of this and use it to work out the
1179+ // correct strided register to suggest based on the current op index.
1180+ MCPhysReg TargetStartReg =
1181+ getSubReg (VRM->getPhys (AssignedRegOp->getReg ()), AArch64::zsub0) +
1182+ (OpIdx - AssignedRegOp->getOperandNo ());
1183+
1184+ for (unsigned I = 0 ; I < StridedOrder.size (); ++I)
1185+ if (getSubReg (StridedOrder[I], AArch64::zsub0) == TargetStartReg)
1186+ Hints.push_back (StridedOrder[I]);
11821187 }
1183- } else {
1184- // At least one operand already has a physical register assigned.
1185- // Find the starting sub-register of this and use it to work out the
1186- // correct strided register to suggest based on the current op index.
1187- MCPhysReg TargetStartReg =
1188- getSubReg (VRM->getPhys (Use.getOperand (AssignedOp).getReg ()),
1189- AArch64::zsub0) +
1190- (OpIdx - AssignedOp);
1191-
1192- for (unsigned I = 0 ; I < StridedOrder.size (); ++I)
1193- if (getSubReg (StridedOrder[I], AArch64::zsub0) == TargetStartReg)
1194- Hints.push_back (StridedOrder[I]);
1195- }
11961188
1197- if (!Hints.empty ())
1198- return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints,
1199- MF, VRM);
1189+ if (!Hints.empty ())
1190+ return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints,
1191+ MF, VRM);
1192+ }
12001193 }
12011194
12021195 for (MachineInstr &MI : MRI.def_instructions (VirtReg)) {
0 commit comments