Skip to content

Commit 9b1360e

Browse files
committed
[AArch64][GlobalISel] Allow selecting FPR index loads.
We can, through legalization of certain operations, end up generating G_INDEXED_LOAD into FPR registers that require entensions. SExt and ZExt will always opt for GPR, but anyext/noext can curently be set to FPR registers in regbankselect. As writing a subregister will set higher bits in the same register to 0, we can successfully handle zext and anyext on FPR registers, which is what this patch attempts to add.
1 parent a93e55e commit 9b1360e

File tree

3 files changed

+401
-12
lines changed

3 files changed

+401
-12
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5547,9 +5547,15 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55475547
unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
55485548
bool IsPre = ExtLd.isPre();
55495549
bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5550-
bool InsertIntoXReg = false;
5550+
unsigned InsertIntoSubReg = 0;
55515551
bool IsDst64 = Ty.getSizeInBits() == 64;
55525552

5553+
// ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5554+
// long as they are scalar.
5555+
bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5556+
if ((IsSExt && IsFPR) || Ty.isVector())
5557+
return false;
5558+
55535559
unsigned Opc = 0;
55545560
LLT NewLdDstTy;
55555561
LLT s32 = LLT::scalar(32);
@@ -5562,9 +5568,13 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55625568
else
55635569
Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
55645570
NewLdDstTy = IsDst64 ? s64 : s32;
5571+
} else if (IsFPR) {
5572+
Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5573+
InsertIntoSubReg = AArch64::bsub;
5574+
NewLdDstTy = LLT::scalar(MemSizeBits);
55655575
} else {
55665576
Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5567-
InsertIntoXReg = IsDst64;
5577+
InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
55685578
NewLdDstTy = s32;
55695579
}
55705580
} else if (MemSizeBits == 16) {
@@ -5574,27 +5584,32 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
55745584
else
55755585
Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
55765586
NewLdDstTy = IsDst64 ? s64 : s32;
5587+
} else if (IsFPR) {
5588+
Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5589+
InsertIntoSubReg = AArch64::hsub;
5590+
NewLdDstTy = LLT::scalar(MemSizeBits);
55775591
} else {
55785592
Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5579-
InsertIntoXReg = IsDst64;
5593+
InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
55805594
NewLdDstTy = s32;
55815595
}
55825596
} else if (MemSizeBits == 32) {
55835597
if (IsSExt) {
55845598
Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
55855599
NewLdDstTy = s64;
5600+
} else if (IsFPR) {
5601+
Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5602+
InsertIntoSubReg = AArch64::ssub;
5603+
NewLdDstTy = LLT::scalar(MemSizeBits);
55865604
} else {
55875605
Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5588-
InsertIntoXReg = IsDst64;
5606+
InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
55895607
NewLdDstTy = s32;
55905608
}
55915609
} else {
55925610
llvm_unreachable("Unexpected size for indexed load");
55935611
}
55945612

5595-
if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5596-
return false; // We should be on gpr.
5597-
55985613
auto Cst = getIConstantVRegVal(Offset, MRI);
55995614
if (!Cst)
56005615
return false; // Shouldn't happen, but just in case.
@@ -5604,15 +5619,18 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
56045619
LdMI.cloneMemRefs(ExtLd);
56055620
constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);
56065621
// Make sure to select the load with the MemTy as the dest type, and then
5607-
// insert into X reg if needed.
5608-
if (InsertIntoXReg) {
5622+
// insert into a larger reg if needed.
5623+
if (InsertIntoSubReg) {
56095624
// Generate a SUBREG_TO_REG.
56105625
auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
56115626
.addImm(0)
56125627
.addUse(LdMI.getReg(1))
5613-
.addImm(AArch64::sub_32);
5614-
RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5615-
MRI);
5628+
.addImm(InsertIntoSubReg);
5629+
RBI.constrainGenericRegister(
5630+
SubToReg.getReg(0),
5631+
*getRegClassForTypeOnBank(MRI.getType(Dst),
5632+
*RBI.getRegBank(Dst, MRI, TRI)),
5633+
MRI);
56165634
} else {
56175635
auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
56185636
selectCopy(*Copy, TII, MRI, TRI, RBI);

0 commit comments

Comments
 (0)