@@ -5553,9 +5553,15 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55535553 unsigned MemSizeBits = ExtLd.getMMO ().getMemoryType ().getSizeInBits ();
55545554 bool IsPre = ExtLd.isPre ();
55555555 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5556- bool InsertIntoXReg = false ;
5556+ unsigned InsertIntoSubReg = 0 ;
55575557 bool IsDst64 = Ty.getSizeInBits () == 64 ;
55585558
5559+ // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5560+ // long as they are scalar.
5561+ bool IsFPR = RBI.getRegBank (Dst, MRI, TRI)->getID () == AArch64::FPRRegBankID;
5562+ if ((IsSExt && IsFPR) || Ty.isVector ())
5563+ return false ;
5564+
55595565 unsigned Opc = 0 ;
55605566 LLT NewLdDstTy;
55615567 LLT s32 = LLT::scalar (32 );
@@ -5568,9 +5574,13 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55685574 else
55695575 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
55705576 NewLdDstTy = IsDst64 ? s64 : s32;
5577+ } else if (IsFPR) {
5578+ Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5579+ InsertIntoSubReg = AArch64::bsub;
5580+ NewLdDstTy = LLT::scalar (MemSizeBits);
55715581 } else {
55725582 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5573- InsertIntoXReg = IsDst64;
5583+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0 ;
55745584 NewLdDstTy = s32;
55755585 }
55765586 } else if (MemSizeBits == 16 ) {
@@ -5580,27 +5590,32 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55805590 else
55815591 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
55825592 NewLdDstTy = IsDst64 ? s64 : s32;
5593+ } else if (IsFPR) {
5594+ Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5595+ InsertIntoSubReg = AArch64::hsub;
5596+ NewLdDstTy = LLT::scalar (MemSizeBits);
55835597 } else {
55845598 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5585- InsertIntoXReg = IsDst64;
5599+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0 ;
55865600 NewLdDstTy = s32;
55875601 }
55885602 } else if (MemSizeBits == 32 ) {
55895603 if (IsSExt) {
55905604 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
55915605 NewLdDstTy = s64;
5606+ } else if (IsFPR) {
5607+ Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5608+ InsertIntoSubReg = AArch64::ssub;
5609+ NewLdDstTy = LLT::scalar (MemSizeBits);
55925610 } else {
55935611 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5594- InsertIntoXReg = IsDst64;
5612+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0 ;
55955613 NewLdDstTy = s32;
55965614 }
55975615 } else {
55985616 llvm_unreachable (" Unexpected size for indexed load" );
55995617 }
56005618
5601- if (RBI.getRegBank (Dst, MRI, TRI)->getID () == AArch64::FPRRegBankID)
5602- return false ; // We should be on gpr.
5603-
56045619 auto Cst = getIConstantVRegVal (Offset, MRI);
56055620 if (!Cst)
56065621 return false ; // Shouldn't happen, but just in case.
@@ -5610,15 +5625,18 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
56105625 LdMI.cloneMemRefs (ExtLd);
56115626 constrainSelectedInstRegOperands (*LdMI, TII, TRI, RBI);
56125627 // Make sure to select the load with the MemTy as the dest type, and then
5613- // insert into X reg if needed.
5614- if (InsertIntoXReg ) {
5628+ // insert into a larger reg if needed.
5629+ if (InsertIntoSubReg ) {
56155630 // Generate a SUBREG_TO_REG.
56165631 auto SubToReg = MIB.buildInstr (TargetOpcode::SUBREG_TO_REG, {Dst}, {})
56175632 .addImm (0 )
56185633 .addUse (LdMI.getReg (1 ))
5619- .addImm (AArch64::sub_32);
5620- RBI.constrainGenericRegister (SubToReg.getReg (0 ), AArch64::GPR64RegClass,
5621- MRI);
5634+ .addImm (InsertIntoSubReg);
5635+ RBI.constrainGenericRegister (
5636+ SubToReg.getReg (0 ),
5637+ *getRegClassForTypeOnBank (MRI.getType (Dst),
5638+ *RBI.getRegBank (Dst, MRI, TRI)),
5639+ MRI);
56225640 } else {
56235641 auto Copy = MIB.buildCopy (Dst, LdMI.getReg (1 ));
56245642 selectCopy (*Copy, TII, MRI, TRI, RBI);
0 commit comments