From 66e79e28dd2ba0e8017eefa6837e7533c54392cb Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev Date: Mon, 14 Aug 2023 11:16:04 +0200 Subject: [PATCH 1/2] [AArch64][GlobalISel] Adopt some Ld1Lane* patterns for GlobalISel to reduce codegen regressions Additionally, ignore G_CONSTANTs between adjacent instructions in isObviouslySafeToFold(). Without that, the new patterns don't have an impact. --- .../GlobalISel/GIMatchTableExecutor.cpp | 14 +- .../lib/Target/AArch64/AArch64InstrFormats.td | 3 + llvm/lib/Target/AArch64/AArch64InstrGISel.td | 36 ++++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 + .../GISel/AArch64InstructionSelector.cpp | 11 ++ .../AArch64/arm64-indexed-vector-ldst.ll | 80 ++++----- llvm/test/CodeGen/AArch64/arm64-ld1.ll | 164 +++++------------- 7 files changed, 140 insertions(+), 170 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp index 26752369a7711..1e8dffa26f9e1 100644 --- a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #define DEBUG_TYPE "gi-match-table-executor" @@ -62,9 +63,16 @@ bool GIMatchTableExecutor::isBaseWithConstantOffset( bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI, MachineInstr &IntoMI) const { // Immediate neighbours are already folded. - if (MI.getParent() == IntoMI.getParent() && - std::next(MI.getIterator()) == IntoMI.getIterator()) - return true; + // Any G_CONSTANT between immediate neighbours can be ignored. + if (MI.getParent() == IntoMI.getParent()) { + auto IntoIt = IntoMI.getIterator(); + auto NextIt = std::next(MI.getIterator()); + while (!NextIt.isEnd() && NextIt != IntoIt && + NextIt->getOpcode() == TargetOpcode::G_CONSTANT) + ++NextIt; + if (NextIt == IntoIt) + return true; + } // Convergent instructions cannot be moved in the CFG. if (MI.isConvergent() && MI.getParent() != IntoMI.getParent()) diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index e5dbfa404b3c6..430ed2612f72e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -484,6 +484,9 @@ def UImmS8XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i64); }]>; +def gi_UImmS1XForm : GICustomOperandRenderer<"renderUImmS1">, + GISDNodeXFormEquiv; + // uimm5sN predicate - True if the immediate is a multiple of N in the range // [0 * N, 32 * N]. def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>; diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 27338bd243933..1dd7ae90a24e6 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -271,6 +271,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; @@ -528,3 +529,38 @@ def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), (LD1Rv2d GPR64sp:$Rn)>; def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), (LD1Rv1d GPR64sp:$Rn)>; + +class Ld1Lane64PatGISel + : Pat<(insertelt (VTy VecListOne64:$Rd), + (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (EXTRACT_SUBREG + (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), + (UImmS1XForm VecIndex:$idx), GPR64sp:$Rn), + dsub)>; + +class Ld1Lane128PatGISel + : Pat<(insertelt (VTy VecListOne128:$Rd), + (STy (load GPR64sp:$Rn)), VecIndex:$idx), + (LD1 VecListOne128:$Rd, (UImmS1XForm VecIndex:$idx), GPR64sp:$Rn)>; + +// Enable these patterns only for GlobalISel, since +// SelectionDAG analogues only select insertelt with i32 indices. +let Predicates = [OnlyGISel] in { + def : Ld1Lane64PatGISel; + def : Ld1Lane64PatGISel; + def : Ld1Lane64PatGISel; + def : Ld1Lane64PatGISel; + def : Ld1Lane64PatGISel; + def : Ld1Lane64PatGISel; + + def : Ld1Lane128PatGISel; + def : Ld1Lane128PatGISel; + def : Ld1Lane128PatGISel; + def : Ld1Lane128PatGISel; + def : Ld1Lane128PatGISel; + def : Ld1Lane128PatGISel; + def : Ld1Lane128PatGISel; + def : Ld1Lane128PatGISel; +} diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index df59dc4ad27fa..08877329373b7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -872,6 +872,8 @@ let RecomputePerFunction = 1 in { def SLSBLRMitigation : Predicate<[{ MF->getSubtarget().hardenSlsBlr() }]>; def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget().hardenSlsBlr() }]>; + + def OnlyGISel : Predicate<"MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; // Toggles patterns which aren't beneficial in GlobalISel when we aren't // optimizing. This allows us to selectively use patterns without impacting // SelectionDAG's behaviour. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 1c7a09696e853..212f6d2ee287a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -479,6 +479,9 @@ class AArch64InstructionSelector : public InstructionSelector { const MachineInstr &MI, int OpIdx = -1) const; + void renderUImmS1(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx = -1) const; + // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); @@ -7593,6 +7596,14 @@ void AArch64InstructionSelector::renderFPImm32SIMDModImmType4( .getZExtValue())); } +void AArch64InstructionSelector::renderUImmS1(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); + MIB.addImm(MI.getOperand(1).getCImm()->getZExtValue()); +} + bool AArch64InstructionSelector::isLoadStoreOfNumBytes( const MachineInstr &MI, unsigned NumBytes) const { if (!MI.mayLoadOrStore()) diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 2cab4932def07..9d4e4e36ba83d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -14283,10 +14283,9 @@ define <16 x i8> @test_v16i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <16 x i8> %A) ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr b1, [x0] +; CHECK-GISEL-NEXT: ld1.b { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, #1 ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.b v0[1], v1[0] ; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1 @@ -14304,10 +14303,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16 ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr b1, [x0] +; CHECK-GISEL-NEXT: ld1.b { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, x2 ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.b v0[1], v1[0] ; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1 @@ -14327,11 +14325,10 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) { ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr b1, [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-GISEL-NEXT: add x8, x0, #1 +; CHECK-GISEL-NEXT: ld1.b { v0 }[1], [x0] ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.b v0[1], v1[0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar @@ -14352,11 +14349,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr b1, [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-GISEL-NEXT: add x8, x0, x2 +; CHECK-GISEL-NEXT: ld1.b { v0 }[1], [x0] ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.b v0[1], v1[0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar @@ -14375,10 +14371,9 @@ define <8 x i16> @test_v8i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i16> %A) ; ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr h1, [x0] +; CHECK-GISEL-NEXT: ld1.h { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, #2 ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.h v0[1], v1[0] ; CHECK-GISEL-NEXT: ret %tmp1 = load i16, ptr %bar %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1 @@ -14397,9 +14392,8 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x ; ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr h1, [x0] +; CHECK-GISEL-NEXT: ld1.h { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GISEL-NEXT: mov.h v0[1], v1[0] ; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load i16, ptr %bar @@ -14420,11 +14414,10 @@ define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A) ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr h1, [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-GISEL-NEXT: add x8, x0, #2 +; CHECK-GISEL-NEXT: ld1.h { v0 }[1], [x0] ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.h v0[1], v1[0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-GISEL-NEXT: ret %tmp1 = load i16, ptr %bar @@ -14446,12 +14439,11 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr h1, [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GISEL-NEXT: mov.h v0[1], v1[0] -; CHECK-GISEL-NEXT: str x8, [x1] +; CHECK-GISEL-NEXT: ld1.h { v0 }[1], [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load i16, ptr %bar %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1 @@ -14469,10 +14461,9 @@ define <4 x i32> @test_v4i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i32> %A) ; ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr s1, [x0] +; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, #4 ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.s v0[1], v1[0] ; CHECK-GISEL-NEXT: ret %tmp1 = load i32, ptr %bar %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1 @@ -14491,9 +14482,8 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x ; ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr s1, [x0] +; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GISEL-NEXT: mov.s v0[1], v1[0] ; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load i32, ptr %bar @@ -14514,11 +14504,10 @@ define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A) ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr s1, [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-GISEL-NEXT: add x8, x0, #4 +; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0] ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.s v0[1], v1[0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-GISEL-NEXT: ret %tmp1 = load i32, ptr %bar @@ -14540,12 +14529,11 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr s1, [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GISEL-NEXT: mov.s v0[1], v1[0] -; CHECK-GISEL-NEXT: str x8, [x1] +; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load i32, ptr %bar %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1 @@ -14563,10 +14551,9 @@ define <2 x i64> @test_v2i64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i64> %A) ; ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr d1, [x0] +; CHECK-GISEL-NEXT: ld1.d { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, #8 ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.d v0[1], v1[0] ; CHECK-GISEL-NEXT: ret %tmp1 = load i64, ptr %bar %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1 @@ -14585,9 +14572,8 @@ define <2 x i64> @test_v2i64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x ; ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr d1, [x0] +; CHECK-GISEL-NEXT: ld1.d { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GISEL-NEXT: mov.d v0[1], v1[0] ; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load i64, ptr %bar @@ -14606,10 +14592,9 @@ define <4 x float> @test_v4f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x float> ; ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr s1, [x0] +; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, #4 ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.s v0[1], v1[0] ; CHECK-GISEL-NEXT: ret %tmp1 = load float, ptr %bar %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1 @@ -14628,9 +14613,8 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 ; ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr s1, [x0] +; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GISEL-NEXT: mov.s v0[1], v1[0] ; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load float, ptr %bar @@ -14651,11 +14635,10 @@ define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float> ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr s1, [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-GISEL-NEXT: add x8, x0, #4 +; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0] ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.s v0[1], v1[0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-GISEL-NEXT: ret %tmp1 = load float, ptr %bar @@ -14677,12 +14660,11 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr s1, [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GISEL-NEXT: mov.s v0[1], v1[0] -; CHECK-GISEL-NEXT: str x8, [x1] +; CHECK-GISEL-NEXT: ld1.s { v0 }[1], [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load float, ptr %bar %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1 @@ -14700,10 +14682,9 @@ define <2 x double> @test_v2f64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x double ; ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr d1, [x0] +; CHECK-GISEL-NEXT: ld1.d { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, #8 ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.d v0[1], v1[0] ; CHECK-GISEL-NEXT: ret %tmp1 = load double, ptr %bar %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1 @@ -14722,9 +14703,8 @@ define <2 x double> @test_v2f64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, < ; ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld1lane: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr d1, [x0] +; CHECK-GISEL-NEXT: ld1.d { v0 }[1], [x0] ; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GISEL-NEXT: mov.d v0[1], v1[0] ; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load double, ptr %bar @@ -14779,15 +14759,14 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr, ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow: ; CHECK-GISEL: ; %bb.0: ; CHECK-GISEL-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GISEL-NEXT: ldr h1, [x0] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-GISEL-NEXT: ld1.h { v0 }[1], [x0] ; CHECK-GISEL-NEXT: str x8, [x1] -; CHECK-GISEL-NEXT: mov.h v0[1], v1[0] -; CHECK-GISEL-NEXT: ldr d2, [x3] ; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-GISEL-NEXT: cnt.8b v2, v2 -; CHECK-GISEL-NEXT: uaddlp.4h v2, v2 -; CHECK-GISEL-NEXT: uaddlp.2s v1, v2 +; CHECK-GISEL-NEXT: ldr d1, [x3] +; CHECK-GISEL-NEXT: cnt.8b v1, v1 +; CHECK-GISEL-NEXT: uaddlp.4h v1, v1 +; CHECK-GISEL-NEXT: uaddlp.2s v1, v1 ; CHECK-GISEL-NEXT: str d1, [x3] ; CHECK-GISEL-NEXT: ret %tmp1 = load i16, ptr %bar @@ -14987,9 +14966,8 @@ define <4 x i32> @test_inc_cycle(<4 x i32> %vec, ptr %in) { ; ; CHECK-GISEL-LABEL: test_inc_cycle: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr s1, [x0] +; CHECK-GISEL-NEXT: ld1.s { v0 }[0], [x0] ; CHECK-GISEL-NEXT: adrp x9, _var@PAGE -; CHECK-GISEL-NEXT: mov.s v0[0], v1[0] ; CHECK-GISEL-NEXT: fmov x8, d0 ; CHECK-GISEL-NEXT: add x8, x0, x8, lsl #2 ; CHECK-GISEL-NEXT: str x8, [x9, _var@PAGEOFF] diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll index 54b96520dce41..4cef1f966c838 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll @@ -1004,16 +1004,10 @@ declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwin declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] -; CHECK-GI-NEXT: mov.b v0[0], v1[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1.b { v0 }[0], [x0] +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0 @@ -1021,16 +1015,10 @@ define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) { } define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_8h: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1.h { v0 }[0], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_8h: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr h1, [x0] -; CHECK-GI-NEXT: mov.h v0[0], v1[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1.h { v0 }[0], [x0] +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load i16, ptr %bar %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0 @@ -1038,16 +1026,10 @@ define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) { } define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr s1, [x0] -; CHECK-GI-NEXT: mov.s v0[0], v1[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1.s { v0 }[0], [x0] +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load i32, ptr %bar %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0 @@ -1055,16 +1037,10 @@ define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) { } define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_4s_float: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_4s_float: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr s1, [x0] -; CHECK-GI-NEXT: mov.s v0[0], v1[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_4s_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1.s { v0 }[0], [x0] +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load float, ptr %bar %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0 @@ -1072,16 +1048,10 @@ define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) { } define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_2d: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1.d { v0 }[0], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_2d: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr d1, [x0] -; CHECK-GI-NEXT: mov.d v0[0], v1[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1.d { v0 }[0], [x0] +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load i64, ptr %bar %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0 @@ -1089,16 +1059,10 @@ define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) { } define <2 x double> @ld1_2d_double(<2 x double> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_2d_double: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1.d { v0 }[0], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_2d_double: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr d1, [x0] -; CHECK-GI-NEXT: mov.d v0[0], v1[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_2d_double: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1.d { v0 }[0], [x0] +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load double, ptr %bar %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0 @@ -1116,20 +1080,12 @@ define <1 x i64> @ld1_1d(ptr %p) { } define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov.b v0[0], v1[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1.b { v0 }[0], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load i8, ptr %bar %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0 @@ -1137,20 +1093,12 @@ define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) { } define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_4h: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ld1.h { v0 }[0], [x0] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_4h: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr h1, [x0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov.h v0[0], v1[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_4h: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1.h { v0 }[0], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load i16, ptr %bar %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0 @@ -1158,20 +1106,12 @@ define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) { } define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_2s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_2s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr s1, [x0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov.s v0[0], v1[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1.s { v0 }[0], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load i32, ptr %bar %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0 @@ -1179,20 +1119,12 @@ define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) { } define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_2s_float: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_2s_float: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr s1, [x0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov.s v0[0], v1[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_2s_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1.s { v0 }[0], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load float, ptr %bar %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0 From 2cc56b6eafaa26ce948ecf5f76130b04740d92cd Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev Date: Fri, 27 Oct 2023 16:58:05 +0200 Subject: [PATCH 2/2] Fix OnlyGISel condition --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 08877329373b7..571791d83b2f6 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -873,7 +873,7 @@ let RecomputePerFunction = 1 in { def SLSBLRMitigation : Predicate<[{ MF->getSubtarget().hardenSlsBlr() }]>; def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget().hardenSlsBlr() }]>; - def OnlyGISel : Predicate<"MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; + def OnlyGISel : Predicate<"!MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) && MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; // Toggles patterns which aren't beneficial in GlobalISel when we aren't // optimizing. This allows us to selectively use patterns without impacting // SelectionDAG's behaviour.