diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 51b42325ef842..65ea2f850bc55 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -5547,9 +5547,15 @@ bool AArch64InstructionSelector::selectIndexedExtLoad( unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits(); bool IsPre = ExtLd.isPre(); bool IsSExt = isa(ExtLd); - bool InsertIntoXReg = false; + unsigned InsertIntoSubReg = 0; bool IsDst64 = Ty.getSizeInBits() == 64; + // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so + // long as they are scalar. + bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID; + if ((IsSExt && IsFPR) || Ty.isVector()) + return false; + unsigned Opc = 0; LLT NewLdDstTy; LLT s32 = LLT::scalar(32); @@ -5562,9 +5568,13 @@ bool AArch64InstructionSelector::selectIndexedExtLoad( else Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; NewLdDstTy = IsDst64 ? s64 : s32; + } else if (IsFPR) { + Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost; + InsertIntoSubReg = AArch64::bsub; + NewLdDstTy = LLT::scalar(MemSizeBits); } else { Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; - InsertIntoXReg = IsDst64; + InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0; NewLdDstTy = s32; } } else if (MemSizeBits == 16) { @@ -5574,27 +5584,32 @@ bool AArch64InstructionSelector::selectIndexedExtLoad( else Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; NewLdDstTy = IsDst64 ? s64 : s32; + } else if (IsFPR) { + Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; + InsertIntoSubReg = AArch64::hsub; + NewLdDstTy = LLT::scalar(MemSizeBits); } else { Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; - InsertIntoXReg = IsDst64; + InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0; NewLdDstTy = s32; } } else if (MemSizeBits == 32) { if (IsSExt) { Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; NewLdDstTy = s64; + } else if (IsFPR) { + Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; + InsertIntoSubReg = AArch64::ssub; + NewLdDstTy = LLT::scalar(MemSizeBits); } else { Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; - InsertIntoXReg = IsDst64; + InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0; NewLdDstTy = s32; } } else { llvm_unreachable("Unexpected size for indexed load"); } - if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) - return false; // We should be on gpr. - auto Cst = getIConstantVRegVal(Offset, MRI); if (!Cst) return false; // Shouldn't happen, but just in case. @@ -5604,15 +5619,18 @@ bool AArch64InstructionSelector::selectIndexedExtLoad( LdMI.cloneMemRefs(ExtLd); constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI); // Make sure to select the load with the MemTy as the dest type, and then - // insert into X reg if needed. - if (InsertIntoXReg) { + // insert into a larger reg if needed. + if (InsertIntoSubReg) { // Generate a SUBREG_TO_REG. auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {}) .addImm(0) .addUse(LdMI.getReg(1)) - .addImm(AArch64::sub_32); - RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass, - MRI); + .addImm(InsertIntoSubReg); + RBI.constrainGenericRegister( + SubToReg.getReg(0), + *getRegClassForTypeOnBank(MRI.getType(Dst), + *RBI.getRegBank(Dst, MRI, TRI)), + MRI); } else { auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1)); selectCopy(*Copy, TII, MRI, TRI, RBI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-index-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-index-load.mir new file mode 100644 index 0000000000000..80c2f8ca08608 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-index-load.mir @@ -0,0 +1,328 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -mattr=+fullfp16 -o - | FileCheck %s + +... +--- +name: load_s8_s16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s8_s16 + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpost [[COPY]], 4 :: (load (s8)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr16 = SUBREG_TO_REG 0, %4, %subreg.bsub + ; CHECK-NEXT: $h0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s16), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s8)) + $h0 = COPY %2(s16) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... +--- +name: load_s8_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s8_s32 + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpost [[COPY]], 4 :: (load (s8)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.bsub + ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s8)) + $s0 = COPY %2(s32) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... +--- +name: load_s8_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s8_s64 + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpost [[COPY]], 4 :: (load (s8)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.bsub + ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s8)) + $d0 = COPY %2(s64) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... +--- +name: load_s16_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s16_s32 + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpost [[COPY]], 4 :: (load (s16)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.hsub + ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s16)) + $s0 = COPY %2(s32) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... +--- +name: load_s16_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s16_s64 + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpost [[COPY]], 4 :: (load (s16)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.hsub + ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s16)) + $d0 = COPY %2(s64) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... +--- +name: load_s32_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s32_s64 + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr32 = LDRSpost [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.ssub + ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s32)) + $d0 = COPY %2(s64) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... +--- +name: load_s8_s16_pre +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s8_s16_pre + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpre [[COPY]], 4 :: (load (s8)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr16 = SUBREG_TO_REG 0, %4, %subreg.bsub + ; CHECK-NEXT: $h0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s16), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s8)) + $h0 = COPY %2(s16) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... +--- +name: load_s8_s32_pre +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s8_s32_pre + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpre [[COPY]], 4 :: (load (s8)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.bsub + ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s8)) + $s0 = COPY %2(s32) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... +--- +name: load_s8_s64_pre +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s8_s64_pre + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpre [[COPY]], 4 :: (load (s8)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.bsub + ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s8)) + $d0 = COPY %2(s64) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... +--- +name: load_s16_s32_pre +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s16_s32_pre + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpre [[COPY]], 4 :: (load (s16)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.hsub + ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s16)) + $s0 = COPY %2(s32) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... +--- +name: load_s16_s64_pre +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s16_s64_pre + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpre [[COPY]], 4 :: (load (s16)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.hsub + ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s16)) + $d0 = COPY %2(s64) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... +--- +name: load_s32_s64_pre +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x0 + ; CHECK-LABEL: name: load_s32_s64_pre + ; CHECK: liveins: $d0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr32 = LDRSpre [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.ssub + ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s32)) + $d0 = COPY %2(s64) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $d0 + +... diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll index d949f95209577..cb5df07c7ede4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -725,3 +725,46 @@ define ptr @postidx64_sw(ptr %src, ptr %out) { store i64 %sext, ptr %out, align 8 ret ptr %ptr } + +define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) { +; CHECK64-LABEL: postidx32_shalf: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: ldr h1, [x0], #4 +; CHECK64-NEXT: ; kill: def $h0 killed $h0 def $s0 +; CHECK64-NEXT: fcvt s2, h1 +; CHECK64-NEXT: fcmp s2, #0.0 +; CHECK64-NEXT: fcsel s0, s1, s0, mi +; CHECK64-NEXT: str h0, [x1] +; CHECK64-NEXT: ret +; +; GISEL-LABEL: postidx32_shalf: +; GISEL: ; %bb.0: +; GISEL-NEXT: mov w8, #0 ; =0x0 +; GISEL-NEXT: ldr h1, [x0], #4 +; GISEL-NEXT: fmov s2, w8 +; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0 +; GISEL-NEXT: fmov w9, s0 +; GISEL-NEXT: fcvt s3, h1 +; GISEL-NEXT: fmov w8, s1 +; GISEL-NEXT: fcvt s2, h2 +; GISEL-NEXT: fcmp s3, s2 +; GISEL-NEXT: csel w8, w8, w9, mi +; GISEL-NEXT: strh w8, [x1] +; GISEL-NEXT: ret +; +; CHECK32-LABEL: postidx32_shalf: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: ldr h1, [x0], #4 +; CHECK32-NEXT: ; kill: def $h0 killed $h0 def $s0 +; CHECK32-NEXT: fcvt s2, h1 +; CHECK32-NEXT: fcmp s2, #0.0 +; CHECK32-NEXT: fcsel s0, s1, s0, mi +; CHECK32-NEXT: str h0, [x1] +; CHECK32-NEXT: ret + %tmp = load half, ptr %src, align 2 + %ptr = getelementptr inbounds i32, ptr %src, i64 1 + %c = fcmp olt half %tmp, 0.0 + %s = select i1 %c, half %tmp, half %a + store half %s, ptr %out, align 8 + ret ptr %ptr +}