@@ -5547,9 +5547,15 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55475547 unsigned MemSizeBits = ExtLd.getMMO ().getMemoryType ().getSizeInBits ();
55485548 bool IsPre = ExtLd.isPre ();
55495549 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5550- bool InsertIntoXReg = false ;
5550+ unsigned InsertIntoSubReg = 0 ;
55515551 bool IsDst64 = Ty.getSizeInBits () == 64 ;
55525552
5553+ // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5554+ // long as they are scalar.
5555+ bool IsFPR = RBI.getRegBank (Dst, MRI, TRI)->getID () == AArch64::FPRRegBankID;
5556+ if ((IsSExt && IsFPR) || Ty.isVector ())
5557+ return false ;
5558+
55535559 unsigned Opc = 0 ;
55545560 LLT NewLdDstTy;
55555561 LLT s32 = LLT::scalar (32 );
@@ -5562,9 +5568,13 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55625568 else
55635569 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
55645570 NewLdDstTy = IsDst64 ? s64 : s32;
5571+ } else if (IsFPR) {
5572+ Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5573+ InsertIntoSubReg = AArch64::bsub;
5574+ NewLdDstTy = LLT::scalar (MemSizeBits);
55655575 } else {
55665576 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5567- InsertIntoXReg = IsDst64;
5577+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0 ;
55685578 NewLdDstTy = s32;
55695579 }
55705580 } else if (MemSizeBits == 16 ) {
@@ -5574,27 +5584,32 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55745584 else
55755585 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
55765586 NewLdDstTy = IsDst64 ? s64 : s32;
5587+ } else if (IsFPR) {
5588+ Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5589+ InsertIntoSubReg = AArch64::hsub;
5590+ NewLdDstTy = LLT::scalar (MemSizeBits);
55775591 } else {
55785592 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5579- InsertIntoXReg = IsDst64;
5593+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0 ;
55805594 NewLdDstTy = s32;
55815595 }
55825596 } else if (MemSizeBits == 32 ) {
55835597 if (IsSExt) {
55845598 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
55855599 NewLdDstTy = s64;
5600+ } else if (IsFPR) {
5601+ Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5602+ InsertIntoSubReg = AArch64::ssub;
5603+ NewLdDstTy = LLT::scalar (MemSizeBits);
55865604 } else {
55875605 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5588- InsertIntoXReg = IsDst64;
5606+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0 ;
55895607 NewLdDstTy = s32;
55905608 }
55915609 } else {
55925610 llvm_unreachable (" Unexpected size for indexed load" );
55935611 }
55945612
5595- if (RBI.getRegBank (Dst, MRI, TRI)->getID () == AArch64::FPRRegBankID)
5596- return false ; // We should be on gpr.
5597-
55985613 auto Cst = getIConstantVRegVal (Offset, MRI);
55995614 if (!Cst)
56005615 return false ; // Shouldn't happen, but just in case.
@@ -5604,15 +5619,18 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
56045619 LdMI.cloneMemRefs (ExtLd);
56055620 constrainSelectedInstRegOperands (*LdMI, TII, TRI, RBI);
56065621 // Make sure to select the load with the MemTy as the dest type, and then
5607- // insert into X reg if needed.
5608- if (InsertIntoXReg ) {
5622+ // insert into a larger reg if needed.
5623+ if (InsertIntoSubReg ) {
56095624 // Generate a SUBREG_TO_REG.
56105625 auto SubToReg = MIB.buildInstr (TargetOpcode::SUBREG_TO_REG, {Dst}, {})
56115626 .addImm (0 )
56125627 .addUse (LdMI.getReg (1 ))
5613- .addImm (AArch64::sub_32);
5614- RBI.constrainGenericRegister (SubToReg.getReg (0 ), AArch64::GPR64RegClass,
5615- MRI);
5628+ .addImm (InsertIntoSubReg);
5629+ RBI.constrainGenericRegister (
5630+ SubToReg.getReg (0 ),
5631+ *getRegClassForTypeOnBank (MRI.getType (Dst),
5632+ *RBI.getRegBank (Dst, MRI, TRI)),
5633+ MRI);
56165634 } else {
56175635 auto Copy = MIB.buildCopy (Dst, LdMI.getReg (1 ));
56185636 selectCopy (*Copy, TII, MRI, TRI, RBI);
0 commit comments