diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 278dd95cd969d..d4a114c275fb7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14010,6 +14010,23 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, dl); } + // Check for a "select shuffle", generating a BSL to pick between lanes in + // V1/V2. + if (ShuffleVectorInst::isSelectMask(ShuffleMask, NumElts)) { + assert(VT.getScalarSizeInBits() <= 32 && + "Expected larger vector element sizes to be handled already"); + SmallVector MaskElts; + for (int M : ShuffleMask) + MaskElts.push_back(DAG.getConstant( + M >= static_cast(NumElts) ? 0 : 0xffffffff, dl, MVT::i32)); + EVT IVT = VT.changeVectorElementTypeToInteger(); + SDValue MaskConst = DAG.getBuildVector(IVT, dl, MaskElts); + return DAG.getBitcast(VT, DAG.getNode(AArch64ISD::BSP, dl, IVT, MaskConst, + DAG.getBitcast(IVT, V1), + DAG.getBitcast(IVT, V2))); + } + + // Fall back to generating a TBL return GenerateTBL(Op, ShuffleMask, DAG); } diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll index 79645e32074c8..9fb8e4c8fe031 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -2537,14 +2537,13 @@ entry: define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coerce) { ; CHECK-LABEL: cmplx_mul_combined_re_im: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x9, x0, #16 -; CHECK-NEXT: adrp x8, .LCPI196_0 +; CHECK-NEXT: lsr x8, x0, #16 +; CHECK-NEXT: movi v1.2d, #0xffff0000ffff0000 ; CHECK-NEXT: fmov d5, x0 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI196_0] ; CHECK-NEXT: rev32 v4.8h, v0.8h -; CHECK-NEXT: dup v1.8h, w9 -; CHECK-NEXT: sqneg v2.8h, v1.8h -; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b +; CHECK-NEXT: dup v2.8h, w8 +; CHECK-NEXT: sqneg v3.8h, v2.8h +; CHECK-NEXT: bsl v1.16b, v2.16b, v3.16b ; CHECK-NEXT: sqdmull v2.4s, v0.4h, v5.h[0] ; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v5.h[0] ; CHECK-NEXT: sqdmlal v2.4s, v4.4h, v1.4h diff --git a/llvm/test/CodeGen/AArch64/shuffle-select.ll b/llvm/test/CodeGen/AArch64/shuffle-select.ll index eeccaa170397d..f4e7b314d2001 100644 --- a/llvm/test/CodeGen/AArch64/shuffle-select.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-select.ll @@ -4,12 +4,8 @@ define <8 x i8> @sel_v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: sel_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b +; CHECK-NEXT: movi d2, #0xff00ff00ff00ff +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ret <8 x i8> %tmp0 @@ -18,11 +14,8 @@ define <8 x i8> @sel_v8i8(<8 x i8> %v0, <8 x i8> %v1) { define <16 x i8> @sel_v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: sel_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: movi v2.2d, #0xff00ff00ff00ff +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ret <16 x i8> %tmp0 @@ -32,10 +25,8 @@ define <16 x i8> @sel_v16i8_poison(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: sel_v16i8_poison: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI2_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ret <16 x i8> %tmp0 @@ -45,10 +36,8 @@ define <16 x i8> @sel_v16i8_unregular(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: sel_v16i8_unregular: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ret <16 x i8> %tmp0 @@ -67,11 +56,8 @@ define <4 x i16> @sel_v4i16(<4 x i16> %v0, <4 x i16> %v1) { define <8 x i16> @sel_v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: sel_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ret <8 x i16> %tmp0 @@ -121,11 +107,8 @@ define <4 x half> @sel_v4f16(<4 x half> %v0, <4 x half> %v1) { define <8 x half> @sel_v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: sel_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI10_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ret <8 x half> %tmp0