@@ -1108,20 +1108,16 @@ bool AArch64RegisterInfo::getRegAllocationHints(
11081108 // FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy
11091109 // instructions over reducing the number of clobbered callee-save registers,
11101110 // so we add the strided registers as a hint.
1111- const MachineInstr *TupleInst = nullptr ;
11121111 unsigned RegID = MRI.getRegClass (VirtReg)->getID ();
11131112 // Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
1114- if ((RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
1115- RegID == AArch64::ZPR4StridedOrContiguousRegClassID) &&
1116- any_of (MRI.use_nodbg_instructions (VirtReg), [&TupleInst](
1117- const MachineInstr &Use) {
1118- bool IsTuple =
1119- Use.getOpcode () == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
1120- Use.getOpcode () == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO;
1121- TupleInst = &Use;
1122- return IsTuple;
1123- })) {
1124- unsigned LdOps = TupleInst->getNumOperands () - 1 ;
1113+ for (const MachineInstr &Use : MRI.use_nodbg_instructions (VirtReg)) {
1114+ if ((RegID != AArch64::ZPR2StridedOrContiguousRegClassID &&
1115+ RegID != AArch64::ZPR4StridedOrContiguousRegClassID) ||
1116+ (Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1117+ Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO))
1118+ continue ;
1119+
1120+ unsigned LdOps = Use.getNumOperands () - 1 ;
11251121 const TargetRegisterClass *StridedRC = LdOps == 2
11261122 ? &AArch64::ZPR2StridedRegClass
11271123 : &AArch64::ZPR4StridedRegClass;
@@ -1131,63 +1127,76 @@ bool AArch64RegisterInfo::getRegAllocationHints(
11311127 if (StridedRC->contains (Reg))
11321128 StridedOrder.push_back (Reg);
11331129
1134- int OpIdx = TupleInst->findRegisterUseOperandIdx (VirtReg, this );
1135- if (OpIdx == -1 )
1136- return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints,
1137- MF, VRM);
1130+ auto GetRegStartingAt = [&](MCPhysReg FirstReg) -> MCPhysReg {
1131+ for (MCPhysReg Strided : StridedOrder)
1132+ if (getSubReg (Strided, AArch64::zsub0) == FirstReg)
1133+ return Strided;
1134+ return (MCPhysReg)AArch64::NoRegister;
1135+ };
1136+
1137+ int OpIdx = Use.findRegisterUseOperandIdx (VirtReg, this );
1138+ assert (OpIdx != -1 && " Expected operand index from register use." );
11381139
1139- unsigned TupleID =
1140- MRI.getRegClass (TupleInst->getOperand (0 ).getReg ())->getID ();
1140+ unsigned TupleID = MRI.getRegClass (Use.getOperand (0 ).getReg ())->getID ();
11411141 bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID ||
11421142 TupleID == AArch64::ZPR4Mul4RegClassID;
11431143
1144- if (OpIdx == 1 ) {
1144+ unsigned AssignedOp = 0 ;
1145+ if (!any_of (make_range (Use.operands_begin () + 1 , Use.operands_end ()),
1146+ [&](const MachineOperand &Op) {
1147+ if (!VRM->hasPhys (Op.getReg ()))
1148+ return false ;
1149+ AssignedOp = Op.getOperandNo ();
1150+ return true ;
1151+ })) {
1152+ // There are no registers already assigned to any of the pseudo operands.
1153+ // Look for a valid starting register for the group.
11451154 for (unsigned I = 0 ; I < StridedOrder.size (); ++I) {
11461155 MCPhysReg Reg = StridedOrder[I];
11471156 unsigned FirstReg = getSubReg (Reg, AArch64::zsub0);
11481157
11491158 // If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting
11501159 // register of the first load should be a multiple of 2 or 4.
1151- if (IsMulZPR &&
1152- (getSubReg (Reg, AArch64::zsub0) - AArch64::Z0) % LdOps != 0 )
1160+ if (IsMulZPR && (FirstReg - AArch64::Z0) % LdOps != 0 )
11531161 continue ;
11541162 // Skip this register if it has any live intervals assigned.
11551163 if (Matrix->isPhysRegUsed (Reg))
11561164 continue ;
11571165
1158- bool CanAssign = true ;
1166+ // Look for registers in StridedOrder which start with sub-registers
1167+ // following sequentially from FirstReg. If all are found and none are
1168+ // already live, add Reg to Hints.
1169+ MCPhysReg RegToAssign = Reg;
11591170 for (unsigned Next = 1 ; Next < LdOps; ++Next) {
1160- // Ensure we can assign enough registers from the list for all loads.
1161- if (I + Next >= StridedOrder.size ()) {
1162- CanAssign = false ;
1163- break ;
1164- }
1165- // Ensure the subsequent registers are not live and that the starting
1166- // sub-registers are sequential.
1167- MCPhysReg NextReg = StridedOrder[I + Next];
1168- if (Matrix->isPhysRegUsed (NextReg) ||
1169- (getSubReg (NextReg, AArch64::zsub0) != FirstReg + Next)) {
1170- CanAssign = false ;
1171+ MCPhysReg Strided = GetRegStartingAt (FirstReg + Next);
1172+ if (Strided == AArch64::NoRegister ||
1173+ Matrix->isPhysRegUsed (Strided)) {
1174+ RegToAssign = AArch64::NoRegister;
11711175 break ;
11721176 }
1177+ if (Next == (unsigned )OpIdx - 1 )
1178+ RegToAssign = Strided;
11731179 }
1174- if (CanAssign)
1175- Hints.push_back (Reg);
1176- }
1177- } else if (VRM->hasPhys (TupleInst->getOperand (1 ).getReg ())) {
1178- // This is not the first load in the sequence. Find the register
1179- // assigned to the first and match to a strided reg in the list.
1180- MCPhysReg FirstLoadPhysReg =
1181- VRM->getPhys (TupleInst->getOperand (1 ).getReg ());
1182- for (unsigned I = 0 ; I < StridedOrder.size (); ++I) {
1183- if (StridedOrder[I] == FirstLoadPhysReg &&
1184- (I + (OpIdx - 1 ) < StridedOrder.size ()))
1185- Hints.push_back (StridedOrder[I + (OpIdx - 1 )]);
1180+ if (RegToAssign != AArch64::NoRegister)
1181+ Hints.push_back (RegToAssign);
11861182 }
1183+ } else {
1184+ // At least one operand already has a physical register assigned.
1185+ // Find the starting sub-register of this and use it to work out the
1186+ // correct strided register to suggest based on the current op index.
1187+ MCPhysReg TargetStartReg =
1188+ getSubReg (VRM->getPhys (Use.getOperand (AssignedOp).getReg ()),
1189+ AArch64::zsub0) +
1190+ (OpIdx - AssignedOp);
1191+
1192+ for (unsigned I = 0 ; I < StridedOrder.size (); ++I)
1193+ if (getSubReg (StridedOrder[I], AArch64::zsub0) == TargetStartReg)
1194+ Hints.push_back (StridedOrder[I]);
11871195 }
11881196
1189- return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints, MF,
1190- VRM);
1197+ if (!Hints.empty ())
1198+ return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints,
1199+ MF, VRM);
11911200 }
11921201
11931202 for (MachineInstr &MI : MRI.def_instructions (VirtReg)) {
0 commit comments