[AArch64] Enable using SVE2 bit-sel instructions with Neon types. #146906

rj-jesus · 2025-07-03T14:35:27Z

This affects EOR3/BCAX/BSL/NBSL/BSL1N/BSL2N.

This had initially been discussed in #138689 (comment).

llvmbot · 2025-07-03T14:36:02Z

@llvm/pr-subscribers-backend-aarch64

Author: Ricardo Jesus (rj-jesus)

Changes

This affects EOR3/BCAX/BSL/NBSL/BSL1N/BSL2N.

This had initially been discussed in #138689 (comment).

Patch is 22.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146906.diff

5 Files Affected:

(modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+13)
(modified) llvm/test/CodeGen/AArch64/bcax.ll (+37)
(added) llvm/test/CodeGen/AArch64/bsl.ll (+325)
(modified) llvm/test/CodeGen/AArch64/eor3.ll (+79)
(modified) llvm/test/CodeGen/AArch64/machine-combiner.ll (+4-2)

diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 848b1c30bbeb5..d5c12a9658113 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5368,6 +5368,19 @@ multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm,
   def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
   def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME)>;
   def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
+
+  // Allow selecting SVE2 ternary ops with Neon types.
+  foreach VT = [nxv16i8, nxv8i16, nxv4i32, nxv2i64] in {
+    def : Pat<(SVEType<VT>.DSub (op V64:$op1, V64:$op2, V64:$op3)),
+              (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $op1, dsub),
+                                                        (INSERT_SUBREG (IMPLICIT_DEF), $op2, dsub),
+                                                        (INSERT_SUBREG (IMPLICIT_DEF), $op3, dsub)), dsub)>;
+
+    def : Pat<(SVEType<VT>.ZSub (op V128:$op1, V128:$op2, V128:$op3)),
+              (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $op1, zsub),
+                                                        (INSERT_SUBREG (IMPLICIT_DEF), $op2, zsub),
+                                                        (INSERT_SUBREG (IMPLICIT_DEF), $op3, zsub)), zsub)>;
+  }
 }
 
 class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,
diff --git a/llvm/test/CodeGen/AArch64/bcax.ll b/llvm/test/CodeGen/AArch64/bcax.ll
index e3c73c36e534b..e4eb608c2545f 100644
--- a/llvm/test/CodeGen/AArch64/bcax.ll
+++ b/llvm/test/CodeGen/AArch64/bcax.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
 ; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s
 ; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s | FileCheck --check-prefix=SVE2 %s
 
 define <2 x i64> @bcax_64x2(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 ; SHA3-LABEL: bcax_64x2:
@@ -13,6 +14,15 @@ define <2 x i64> @bcax_64x2(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 ; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: bcax_64x2:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    bcax z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <2 x i64> %1, <i64 -1, i64 -1>
   %5 = and <2 x i64> %4, %0
   %6 = xor <2 x i64> %5, %2
@@ -30,6 +40,15 @@ define <4 x i32> @bcax_32x4(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
 ; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: bcax_32x4:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    bcax z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
   %5 = and <4 x i32> %4, %0
   %6 = xor <4 x i32> %5, %2
@@ -47,6 +66,15 @@ define <8 x i16> @bcax_16x8(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
 ; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: bcax_16x8:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    bcax z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <8 x i16> %1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   %5 = and <8 x i16> %4, %0
   %6 = xor <8 x i16> %5, %2
@@ -64,6 +92,15 @@ define <16 x i8> @bcax_8x16(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 ; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: bcax_8x16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    bcax z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <16 x i8> %1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %5 = and <16 x i8> %4, %0
   %6 = xor <16 x i8> %5, %2
diff --git a/llvm/test/CodeGen/AArch64/bsl.ll b/llvm/test/CodeGen/AArch64/bsl.ll
new file mode 100644
index 0000000000000..b672a446e579e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/bsl.ll
@@ -0,0 +1,325 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=NEON
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefix=SVE2
+
+; Test SVE2 BSL/NBSL/BSL1N/BSL2N code generation for:
+;   #define BSL(x,y,z)   (  ((x) & (z)) | ( (y) & ~(z)))
+;   #define NBSL(x,y,z)  (~(((x) & (z)) | ( (y) & ~(z))))
+;   #define BSL1N(x,y,z) ( (~(x) & (z)) | ( (y) & ~(z)))
+;   #define BSL2N(x,y,z) (  ((x) & (z)) | (~(y) & ~(z)))
+;
+; See also llvm/test/CodeGen/AArch64/sve2-bsl.ll.
+
+; Test basic codegen.
+
+define <1 x i64> @bsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
+; NEON-LABEL: bsl_v1i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl_v1i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    bif v0.8b, v1.8b, v2.8b
+; SVE2-NEXT:    ret
+  %4 = and <1 x i64> %2, %0
+  %5 = xor <1 x i64> %2, splat (i64 -1)
+  %6 = and <1 x i64> %1, %5
+  %7 = or <1 x i64> %4, %6
+  ret <1 x i64> %7
+}
+
+define <1 x i64> @nbsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
+; NEON-LABEL: nbsl_v1i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v1i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <1 x i64> %2, %0
+  %5 = xor <1 x i64> %2, splat (i64 -1)
+  %6 = and <1 x i64> %1, %5
+  %7 = or <1 x i64> %4, %6
+  %8 = xor <1 x i64> %7, splat (i64 -1)
+  ret <1 x i64> %8
+}
+
+define <1 x i64> @bsl1n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
+; NEON-LABEL: bsl1n_v1i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl1n_v1i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    bsl1n z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = xor <1 x i64> %0, splat (i64 -1)
+  %5 = and <1 x i64> %2, %4
+  %6 = xor <1 x i64> %2, splat (i64 -1)
+  %7 = and <1 x i64> %1, %6
+  %8 = or <1 x i64> %5, %7
+  ret <1 x i64> %8
+}
+
+define <1 x i64> @bsl2n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
+; NEON-LABEL: bsl2n_v1i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
+; NEON-NEXT:    orr v1.8b, v2.8b, v1.8b
+; NEON-NEXT:    orn v0.8b, v0.8b, v1.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl2n_v1i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    bsl2n z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <1 x i64> %2, %0
+  %5 = or <1 x i64> %2, %1
+  %6 = xor <1 x i64> %5, splat (i64 -1)
+  %7 = or <1 x i64> %4, %6
+  ret <1 x i64> %7
+}
+
+define <2 x i64> @bsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
+; NEON-LABEL: bsl_v2i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl_v2i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    bif v0.16b, v1.16b, v2.16b
+; SVE2-NEXT:    ret
+  %4 = and <2 x i64> %2, %0
+  %5 = xor <2 x i64> %2, splat (i64 -1)
+  %6 = and <2 x i64> %1, %5
+  %7 = or <2 x i64> %4, %6
+  ret <2 x i64> %7
+}
+
+define <2 x i64> @nbsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
+; NEON-LABEL: nbsl_v2i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v2i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <2 x i64> %2, %0
+  %5 = xor <2 x i64> %2, splat (i64 -1)
+  %6 = and <2 x i64> %1, %5
+  %7 = or <2 x i64> %4, %6
+  %8 = xor <2 x i64> %7, splat (i64 -1)
+  ret <2 x i64> %8
+}
+
+define <2 x i64> @bsl1n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
+; NEON-LABEL: bsl1n_v2i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl1n_v2i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    bsl1n z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = xor <2 x i64> %0, splat (i64 -1)
+  %5 = and <2 x i64> %2, %4
+  %6 = xor <2 x i64> %2, splat (i64 -1)
+  %7 = and <2 x i64> %1, %6
+  %8 = or <2 x i64> %5, %7
+  ret <2 x i64> %8
+}
+
+define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
+; NEON-LABEL: bsl2n_v2i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
+; NEON-NEXT:    orr v1.16b, v2.16b, v1.16b
+; NEON-NEXT:    orn v0.16b, v0.16b, v1.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl2n_v2i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    bsl2n z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <2 x i64> %2, %0
+  %5 = or <2 x i64> %2, %1
+  %6 = xor <2 x i64> %5, splat (i64 -1)
+  %7 = or <2 x i64> %4, %6
+  ret <2 x i64> %7
+}
+
+; Test other element types.
+
+define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
+; NEON-LABEL: nbsl_v8i8:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v8i8:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <8 x i8> %2, %0
+  %5 = xor <8 x i8> %2, splat (i8 -1)
+  %6 = and <8 x i8> %1, %5
+  %7 = or <8 x i8> %4, %6
+  %8 = xor <8 x i8> %7, splat (i8 -1)
+  ret <8 x i8> %8
+}
+
+define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) {
+; NEON-LABEL: nbsl_v4i16:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v4i16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <4 x i16> %2, %0
+  %5 = xor <4 x i16> %2, splat (i16 -1)
+  %6 = and <4 x i16> %1, %5
+  %7 = or <4 x i16> %4, %6
+  %8 = xor <4 x i16> %7, splat (i16 -1)
+  ret <4 x i16> %8
+}
+
+define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
+; NEON-LABEL: nbsl_v2i32:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v2i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <2 x i32> %2, %0
+  %5 = xor <2 x i32> %2, splat (i32 -1)
+  %6 = and <2 x i32> %1, %5
+  %7 = or <2 x i32> %4, %6
+  %8 = xor <2 x i32> %7, splat (i32 -1)
+  ret <2 x i32> %8
+}
+
+define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
+; NEON-LABEL: nbsl_v16i8:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v16i8:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <16 x i8> %2, %0
+  %5 = xor <16 x i8> %2, splat (i8 -1)
+  %6 = and <16 x i8> %1, %5
+  %7 = or <16 x i8> %4, %6
+  %8 = xor <16 x i8> %7, splat (i8 -1)
+  ret <16 x i8> %8
+}
+
+define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
+; NEON-LABEL: nbsl_v8i16:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v8i16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <8 x i16> %2, %0
+  %5 = xor <8 x i16> %2, splat (i16 -1)
+  %6 = and <8 x i16> %1, %5
+  %7 = or <8 x i16> %4, %6
+  %8 = xor <8 x i16> %7, splat (i16 -1)
+  ret <8 x i16> %8
+}
+
+define <4 x i32> @nbsl_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
+; NEON-LABEL: nbsl_v4i32:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v4i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <4 x i32> %2, %0
+  %5 = xor <4 x i32> %2, splat (i32 -1)
+  %6 = and <4 x i32> %1, %5
+  %7 = or <4 x i32> %4, %6
+  %8 = xor <4 x i32> %7, splat (i32 -1)
+  ret <4 x i32> %8
+}
diff --git a/llvm/test/CodeGen/AArch64/eor3.ll b/llvm/test/CodeGen/AArch64/eor3.ll
index 06ae6b09d002e..a83b425251c3e 100644
--- a/llvm/test/CodeGen/AArch64/eor3.ll
+++ b/llvm/test/CodeGen/AArch64/eor3.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
 ; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s
 ; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s | FileCheck --check-prefix=SVE2 %s
 
 define <16 x i8> @eor3_16x8_left(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 ; SHA3-LABEL: eor3_16x8_left:
@@ -13,6 +14,15 @@ define <16 x i8> @eor3_16x8_left(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v2.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_16x8_left:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <16 x i8> %0, %1
   %5 = xor <16 x i8> %2, %4
   ret <16 x i8> %5
@@ -29,6 +39,15 @@ define <16 x i8> @eor3_16x8_right(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 ; NOSHA3-NEXT:    eor v1.16b, v1.16b, v2.16b
 ; NOSHA3-NEXT:    eor v0.16b, v1.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_16x8_right:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z1.d, z1.d, z2.d, z0.d
+; SVE2-NEXT:    mov v0.16b, v1.16b
+; SVE2-NEXT:    ret
   %4 = xor <16 x i8> %1, %2
   %5 = xor <16 x i8> %4, %0
   ret <16 x i8> %5
@@ -45,6 +64,15 @@ define <8 x i16> @eor3_8x16_left(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v2.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_8x16_left:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <8 x i16> %0, %1
   %5 = xor <8 x i16> %2, %4
   ret <8 x i16> %5
@@ -61,6 +89,15 @@ define <8 x i16> @eor3_8x16_right(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
 ; NOSHA3-NEXT:    eor v1.16b, v1.16b, v2.16b
 ; NOSHA3-NEXT:    eor v0.16b, v1.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_8x16_right:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z1.d, z1.d, z2.d, z0.d
+; SVE2-NEXT:    mov v0.16b, v1.16b
+; SVE2-NEXT:    ret
   %4 = xor <8 x i16> %1, %2
   %5 = xor <8 x i16> %4, %0
   ret <8 x i16> %5
@@ -77,6 +114,15 @@ define <4 x i32> @eor3_4x32_left(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v2.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_4x32_left:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <4 x i32> %0, %1
   %5 = xor <4 x i32> %2, %4
   ret <4 x i32> %5
@@ -93,6 +139,15 @@ define <4 x i32> @eor3_4x32_right(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
 ; NOSHA3-NEXT:    eor v1.16b, v1.16b, v2.16b
 ; NOSHA3-NEXT:    eor v0.16b, v1.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_4x32_right:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z1.d, z1.d, z2.d, z0.d
+; SVE2-NEXT:    mov v0.16b, v1.16b
+; SVE2-NEXT:    ret
   %4 = xor <4 x i32> %1, %2
   %5 = xor <4 x i32> %4, %0
   ret <4 x i32> %5
@@ -109,6 +164,15 @@ define <2 x i64> @eor3_2x64_left(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v2.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_2x64_left:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed ...
[truncated]

david-arm · 2025-07-03T14:57:53Z

llvm/test/CodeGen/AArch64/bsl.ll

Is it worth having at least one test in this file for fixed-width vectors that aren't 64 or 128 bits, i.e. <4 x i8> or <2 x i16>? I'd expect in this case they'd just get promoted to <4 x i16> or <2 x i32>.

Sure, I didn't do this initially because, as you say, those types will get promoted to something else, which should be unrelated to the patterns added in this patch. But I'm happy to add the tests if you think they're worth having!

I've added a few tests for <4 x i8>, please let me know if they're what you had in mind. :)

Oh I see. I was expecting that after type promotion the types would be legal (v4i8->v4i16 which is SVEType.DSub for VT=nxv8i16) and therefore start matching your isel patterns too, but perhaps the isel patterns are different due to masking? No worries, I guess the tests don't do any harm and maybe we can support them in future?

Yeah I think it's the masking that gets in the way, specifically after promoting v4i8->v4i16, the splat (i8 -1) is converted to a splat (i16 255), which no longer matches the main patterns defined in AArch64SVEInstrInfo. I don't think this is specific to the patterns defined in this patch, but it's certainly something we can revisit in the future. :)

david-arm

LGTM!

david-arm · 2025-07-07T10:45:36Z

llvm/test/CodeGen/AArch64/bsl.ll

Oh I see. I was expecting that after type promotion the types would be legal (v4i8->v4i16 which is SVEType.DSub for VT=nxv8i16) and therefore start matching your isel patterns too, but perhaps the isel patterns are different due to masking? No worries, I guess the tests don't do any harm and maybe we can support them in future?

david-arm

Actually, I just realised the tests are failing so I've temporarily removed my LGTM!

rj-jesus · 2025-07-07T10:59:45Z

Actually, I just realised the tests are failing so I've temporarily removed my LGTM!

Erm I don't think the failures are related to the patch, let me try a rebase to see if that sorts it!

This affects EOR3/BCAX/BSL/NBSL/BSL1N/BSL2N. https://godbolt.org/z/q96K5Ee53

david-arm

LGTM! Looks like the linux build is passing now. :)

paulwalker-arm · 2025-07-07T12:36:34Z

llvm/lib/Target/AArch64/SVEInstrFormats.td

  def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
+
+  // Allow selecting SVE2 ternary ops with Neon types.
+  foreach VT = [nxv16i8, nxv8i16, nxv4i32, nxv2i64] in {


Do you specifically care about the case when FEAT_SHA3 is not available? I ask because when available the NEON variants look to have less restrictive register requirements? Specially for BCAX and EOR3 which have a dedicated result register.

If you just want to use the other instructions (bsl1n etc) then a quick fix would be to pass the VT array into the class and then only set the parameter for the instructions that are in addition to those available under FEAT_SHA3.

Thanks - passing VT into the class sounds good to me. I did check previously that the Neon BCAX/EOR3 instructions were selected over the SVE ones when FEAT_SHA3 is available (due to the less restrictive register requirements, as you point out). Since they were, I didn't see a reason not to enable the SVE2 patterns for them too.

Would you rather I do as you suggested and only enable the patterns for BSL1N/BSL1N/NBSL, or perhaps add a runline such as

RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 < %s | FileCheck --check-prefix=SHA3 %s

in llvm/test/CodeGen/AArch64/bcax.ll and llvm/test/CodeGen/AArch64/eor3.ll to ensure we select the SHA3 patterns even when SVE2 is available?

If this works as is then just adding the RUN lines for verification works for me.

Thanks very much - done. :)

rj-jesus requested review from davemgreen, david-arm and paulwalker-arm July 3, 2025 14:35

llvmbot added the backend:AArch64 label Jul 3, 2025

david-arm reviewed Jul 3, 2025

View reviewed changes

david-arm approved these changes Jul 7, 2025

View reviewed changes

david-arm requested changes Jul 7, 2025

View reviewed changes

rj-jesus added 3 commits July 7, 2025 04:01

Add tests.

6f069e1

[AArch64] Enable SVE2 bit-sel instructions with Neon types.

2e7bb05

This affects EOR3/BCAX/BSL/NBSL/BSL1N/BSL2N. https://godbolt.org/z/q96K5Ee53

Add tests for v4i8.

385ab19

rj-jesus force-pushed the rjj/sve-enable-bitsel-neon branch from 79cc5fc to 385ab19 Compare July 7, 2025 11:08

david-arm approved these changes Jul 7, 2025

View reviewed changes

paulwalker-arm reviewed Jul 7, 2025

View reviewed changes

rj-jesus mentioned this pull request Jul 7, 2025

[AArch64] Lower disjoint_or+not to eon. #147279

Merged

Add RUN lines for BCAX and EOR3 lowering with +sha3,+sve2.

0eeb5a3

paulwalker-arm approved these changes Jul 7, 2025

View reviewed changes

rj-jesus merged commit cd75c2f into llvm:main Jul 8, 2025
9 checks passed

rj-jesus deleted the rjj/sve-enable-bitsel-neon branch July 8, 2025 08:06

[AArch64] Enable using SVE2 bit-sel instructions with Neon types. #146906

[AArch64] Enable using SVE2 bit-sel instructions with Neon types. #146906

Uh oh!

Conversation

rj-jesus commented Jul 3, 2025

Uh oh!

llvmbot commented Jul 3, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

david-arm left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

david-arm left a comment

Choose a reason for hiding this comment

Uh oh!

rj-jesus commented Jul 7, 2025

Uh oh!

david-arm left a comment

Choose a reason for hiding this comment

Uh oh!

paulwalker-arm Jul 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

paulwalker-arm Jul 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

paulwalker-arm Jul 7, 2025 •

edited

Loading

paulwalker-arm Jul 7, 2025 •

edited

Loading