Skip to content

Conversation

@rj-jesus
Copy link
Contributor

@rj-jesus rj-jesus commented Jul 3, 2025

This affects EOR3/BCAX/BSL/NBSL/BSL1N/BSL2N.

This had initially been discussed in #138689 (comment).

@llvmbot
Copy link
Member

llvmbot commented Jul 3, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Ricardo Jesus (rj-jesus)

Changes

This affects EOR3/BCAX/BSL/NBSL/BSL1N/BSL2N.

This had initially been discussed in #138689 (comment).


Patch is 22.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146906.diff

5 Files Affected:

  • (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+13)
  • (modified) llvm/test/CodeGen/AArch64/bcax.ll (+37)
  • (added) llvm/test/CodeGen/AArch64/bsl.ll (+325)
  • (modified) llvm/test/CodeGen/AArch64/eor3.ll (+79)
  • (modified) llvm/test/CodeGen/AArch64/machine-combiner.ll (+4-2)
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 848b1c30bbeb5..d5c12a9658113 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5368,6 +5368,19 @@ multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm,
   def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
   def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME)>;
   def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
+
+  // Allow selecting SVE2 ternary ops with Neon types.
+  foreach VT = [nxv16i8, nxv8i16, nxv4i32, nxv2i64] in {
+    def : Pat<(SVEType<VT>.DSub (op V64:$op1, V64:$op2, V64:$op3)),
+              (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $op1, dsub),
+                                                        (INSERT_SUBREG (IMPLICIT_DEF), $op2, dsub),
+                                                        (INSERT_SUBREG (IMPLICIT_DEF), $op3, dsub)), dsub)>;
+
+    def : Pat<(SVEType<VT>.ZSub (op V128:$op1, V128:$op2, V128:$op3)),
+              (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $op1, zsub),
+                                                        (INSERT_SUBREG (IMPLICIT_DEF), $op2, zsub),
+                                                        (INSERT_SUBREG (IMPLICIT_DEF), $op3, zsub)), zsub)>;
+  }
 }
 
 class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,
diff --git a/llvm/test/CodeGen/AArch64/bcax.ll b/llvm/test/CodeGen/AArch64/bcax.ll
index e3c73c36e534b..e4eb608c2545f 100644
--- a/llvm/test/CodeGen/AArch64/bcax.ll
+++ b/llvm/test/CodeGen/AArch64/bcax.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
 ; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s
 ; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s | FileCheck --check-prefix=SVE2 %s
 
 define <2 x i64> @bcax_64x2(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 ; SHA3-LABEL: bcax_64x2:
@@ -13,6 +14,15 @@ define <2 x i64> @bcax_64x2(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 ; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: bcax_64x2:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    bcax z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <2 x i64> %1, <i64 -1, i64 -1>
   %5 = and <2 x i64> %4, %0
   %6 = xor <2 x i64> %5, %2
@@ -30,6 +40,15 @@ define <4 x i32> @bcax_32x4(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
 ; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: bcax_32x4:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    bcax z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
   %5 = and <4 x i32> %4, %0
   %6 = xor <4 x i32> %5, %2
@@ -47,6 +66,15 @@ define <8 x i16> @bcax_16x8(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
 ; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: bcax_16x8:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    bcax z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <8 x i16> %1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   %5 = and <8 x i16> %4, %0
   %6 = xor <8 x i16> %5, %2
@@ -64,6 +92,15 @@ define <16 x i8> @bcax_8x16(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 ; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: bcax_8x16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    bcax z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <16 x i8> %1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %5 = and <16 x i8> %4, %0
   %6 = xor <16 x i8> %5, %2
diff --git a/llvm/test/CodeGen/AArch64/bsl.ll b/llvm/test/CodeGen/AArch64/bsl.ll
new file mode 100644
index 0000000000000..b672a446e579e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/bsl.ll
@@ -0,0 +1,325 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=NEON
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefix=SVE2
+
+; Test SVE2 BSL/NBSL/BSL1N/BSL2N code generation for:
+;   #define BSL(x,y,z)   (  ((x) & (z)) | ( (y) & ~(z)))
+;   #define NBSL(x,y,z)  (~(((x) & (z)) | ( (y) & ~(z))))
+;   #define BSL1N(x,y,z) ( (~(x) & (z)) | ( (y) & ~(z)))
+;   #define BSL2N(x,y,z) (  ((x) & (z)) | (~(y) & ~(z)))
+;
+; See also llvm/test/CodeGen/AArch64/sve2-bsl.ll.
+
+; Test basic codegen.
+
+define <1 x i64> @bsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
+; NEON-LABEL: bsl_v1i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl_v1i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    bif v0.8b, v1.8b, v2.8b
+; SVE2-NEXT:    ret
+  %4 = and <1 x i64> %2, %0
+  %5 = xor <1 x i64> %2, splat (i64 -1)
+  %6 = and <1 x i64> %1, %5
+  %7 = or <1 x i64> %4, %6
+  ret <1 x i64> %7
+}
+
+define <1 x i64> @nbsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
+; NEON-LABEL: nbsl_v1i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v1i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <1 x i64> %2, %0
+  %5 = xor <1 x i64> %2, splat (i64 -1)
+  %6 = and <1 x i64> %1, %5
+  %7 = or <1 x i64> %4, %6
+  %8 = xor <1 x i64> %7, splat (i64 -1)
+  ret <1 x i64> %8
+}
+
+define <1 x i64> @bsl1n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
+; NEON-LABEL: bsl1n_v1i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl1n_v1i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    bsl1n z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = xor <1 x i64> %0, splat (i64 -1)
+  %5 = and <1 x i64> %2, %4
+  %6 = xor <1 x i64> %2, splat (i64 -1)
+  %7 = and <1 x i64> %1, %6
+  %8 = or <1 x i64> %5, %7
+  ret <1 x i64> %8
+}
+
+define <1 x i64> @bsl2n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
+; NEON-LABEL: bsl2n_v1i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
+; NEON-NEXT:    orr v1.8b, v2.8b, v1.8b
+; NEON-NEXT:    orn v0.8b, v0.8b, v1.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl2n_v1i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    bsl2n z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <1 x i64> %2, %0
+  %5 = or <1 x i64> %2, %1
+  %6 = xor <1 x i64> %5, splat (i64 -1)
+  %7 = or <1 x i64> %4, %6
+  ret <1 x i64> %7
+}
+
+define <2 x i64> @bsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
+; NEON-LABEL: bsl_v2i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl_v2i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    bif v0.16b, v1.16b, v2.16b
+; SVE2-NEXT:    ret
+  %4 = and <2 x i64> %2, %0
+  %5 = xor <2 x i64> %2, splat (i64 -1)
+  %6 = and <2 x i64> %1, %5
+  %7 = or <2 x i64> %4, %6
+  ret <2 x i64> %7
+}
+
+define <2 x i64> @nbsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
+; NEON-LABEL: nbsl_v2i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v2i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <2 x i64> %2, %0
+  %5 = xor <2 x i64> %2, splat (i64 -1)
+  %6 = and <2 x i64> %1, %5
+  %7 = or <2 x i64> %4, %6
+  %8 = xor <2 x i64> %7, splat (i64 -1)
+  ret <2 x i64> %8
+}
+
+define <2 x i64> @bsl1n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
+; NEON-LABEL: bsl1n_v2i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl1n_v2i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    bsl1n z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = xor <2 x i64> %0, splat (i64 -1)
+  %5 = and <2 x i64> %2, %4
+  %6 = xor <2 x i64> %2, splat (i64 -1)
+  %7 = and <2 x i64> %1, %6
+  %8 = or <2 x i64> %5, %7
+  ret <2 x i64> %8
+}
+
+define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
+; NEON-LABEL: bsl2n_v2i64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
+; NEON-NEXT:    orr v1.16b, v2.16b, v1.16b
+; NEON-NEXT:    orn v0.16b, v0.16b, v1.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: bsl2n_v2i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    bsl2n z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <2 x i64> %2, %0
+  %5 = or <2 x i64> %2, %1
+  %6 = xor <2 x i64> %5, splat (i64 -1)
+  %7 = or <2 x i64> %4, %6
+  ret <2 x i64> %7
+}
+
+; Test other element types.
+
+define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
+; NEON-LABEL: nbsl_v8i8:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v8i8:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <8 x i8> %2, %0
+  %5 = xor <8 x i8> %2, splat (i8 -1)
+  %6 = and <8 x i8> %1, %5
+  %7 = or <8 x i8> %4, %6
+  %8 = xor <8 x i8> %7, splat (i8 -1)
+  ret <8 x i8> %8
+}
+
+define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) {
+; NEON-LABEL: nbsl_v4i16:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v4i16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <4 x i16> %2, %0
+  %5 = xor <4 x i16> %2, splat (i16 -1)
+  %6 = and <4 x i16> %1, %5
+  %7 = or <4 x i16> %4, %6
+  %8 = xor <4 x i16> %7, splat (i16 -1)
+  ret <4 x i16> %8
+}
+
+define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
+; NEON-LABEL: nbsl_v2i32:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v2i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <2 x i32> %2, %0
+  %5 = xor <2 x i32> %2, splat (i32 -1)
+  %6 = and <2 x i32> %1, %5
+  %7 = or <2 x i32> %4, %6
+  %8 = xor <2 x i32> %7, splat (i32 -1)
+  ret <2 x i32> %8
+}
+
+define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
+; NEON-LABEL: nbsl_v16i8:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v16i8:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <16 x i8> %2, %0
+  %5 = xor <16 x i8> %2, splat (i8 -1)
+  %6 = and <16 x i8> %1, %5
+  %7 = or <16 x i8> %4, %6
+  %8 = xor <16 x i8> %7, splat (i8 -1)
+  ret <16 x i8> %8
+}
+
+define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
+; NEON-LABEL: nbsl_v8i16:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v8i16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <8 x i16> %2, %0
+  %5 = xor <8 x i16> %2, splat (i16 -1)
+  %6 = and <8 x i16> %1, %5
+  %7 = or <8 x i16> %4, %6
+  %8 = xor <8 x i16> %7, splat (i16 -1)
+  ret <8 x i16> %8
+}
+
+define <4 x i32> @nbsl_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
+; NEON-LABEL: nbsl_v4i32:
+; NEON:       // %bb.0:
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    ret
+;
+; SVE2-LABEL: nbsl_v4i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+  %4 = and <4 x i32> %2, %0
+  %5 = xor <4 x i32> %2, splat (i32 -1)
+  %6 = and <4 x i32> %1, %5
+  %7 = or <4 x i32> %4, %6
+  %8 = xor <4 x i32> %7, splat (i32 -1)
+  ret <4 x i32> %8
+}
diff --git a/llvm/test/CodeGen/AArch64/eor3.ll b/llvm/test/CodeGen/AArch64/eor3.ll
index 06ae6b09d002e..a83b425251c3e 100644
--- a/llvm/test/CodeGen/AArch64/eor3.ll
+++ b/llvm/test/CodeGen/AArch64/eor3.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
 ; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s
 ; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s | FileCheck --check-prefix=SVE2 %s
 
 define <16 x i8> @eor3_16x8_left(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 ; SHA3-LABEL: eor3_16x8_left:
@@ -13,6 +14,15 @@ define <16 x i8> @eor3_16x8_left(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v2.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_16x8_left:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <16 x i8> %0, %1
   %5 = xor <16 x i8> %2, %4
   ret <16 x i8> %5
@@ -29,6 +39,15 @@ define <16 x i8> @eor3_16x8_right(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 ; NOSHA3-NEXT:    eor v1.16b, v1.16b, v2.16b
 ; NOSHA3-NEXT:    eor v0.16b, v1.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_16x8_right:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z1.d, z1.d, z2.d, z0.d
+; SVE2-NEXT:    mov v0.16b, v1.16b
+; SVE2-NEXT:    ret
   %4 = xor <16 x i8> %1, %2
   %5 = xor <16 x i8> %4, %0
   ret <16 x i8> %5
@@ -45,6 +64,15 @@ define <8 x i16> @eor3_8x16_left(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v2.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_8x16_left:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <8 x i16> %0, %1
   %5 = xor <8 x i16> %2, %4
   ret <8 x i16> %5
@@ -61,6 +89,15 @@ define <8 x i16> @eor3_8x16_right(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
 ; NOSHA3-NEXT:    eor v1.16b, v1.16b, v2.16b
 ; NOSHA3-NEXT:    eor v0.16b, v1.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_8x16_right:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z1.d, z1.d, z2.d, z0.d
+; SVE2-NEXT:    mov v0.16b, v1.16b
+; SVE2-NEXT:    ret
   %4 = xor <8 x i16> %1, %2
   %5 = xor <8 x i16> %4, %0
   ret <8 x i16> %5
@@ -77,6 +114,15 @@ define <4 x i32> @eor3_4x32_left(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v2.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_4x32_left:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z2.d, z2.d, z0.d, z1.d
+; SVE2-NEXT:    mov v0.16b, v2.16b
+; SVE2-NEXT:    ret
   %4 = xor <4 x i32> %0, %1
   %5 = xor <4 x i32> %2, %4
   ret <4 x i32> %5
@@ -93,6 +139,15 @@ define <4 x i32> @eor3_4x32_right(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
 ; NOSHA3-NEXT:    eor v1.16b, v1.16b, v2.16b
 ; NOSHA3-NEXT:    eor v0.16b, v1.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_4x32_right:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    eor3 z1.d, z1.d, z2.d, z0.d
+; SVE2-NEXT:    mov v0.16b, v1.16b
+; SVE2-NEXT:    ret
   %4 = xor <4 x i32> %1, %2
   %5 = xor <4 x i32> %4, %0
   ret <4 x i32> %5
@@ -109,6 +164,15 @@ define <2 x i64> @eor3_2x64_left(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 ; NOSHA3-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; NOSHA3-NEXT:    eor v0.16b, v2.16b, v0.16b
 ; NOSHA3-NEXT:    ret
+;
+; SVE2-LABEL: eor3_2x64_left:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    // kill: def $q0 killed ...
[truncated]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth having at least one test in this file for fixed-width vectors that aren't 64 or 128 bits, i.e. <4 x i8> or <2 x i16>? I'd expect in this case they'd just get promoted to <4 x i16> or <2 x i32>.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I didn't do this initially because, as you say, those types will get promoted to something else, which should be unrelated to the patterns added in this patch. But I'm happy to add the tests if you think they're worth having!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added a few tests for <4 x i8>, please let me know if they're what you had in mind. :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see. I was expecting that after type promotion the types would be legal (v4i8->v4i16 which is SVEType.DSub for VT=nxv8i16) and therefore start matching your isel patterns too, but perhaps the isel patterns are different due to masking? No worries, I guess the tests don't do any harm and maybe we can support them in future?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think it's the masking that gets in the way, specifically after promoting v4i8->v4i16, the splat (i8 -1) is converted to a splat (i16 255), which no longer matches the main patterns defined in AArch64SVEInstrInfo. I don't think this is specific to the patterns defined in this patch, but it's certainly something we can revisit in the future. :)

Copy link
Contributor

@david-arm david-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see. I was expecting that after type promotion the types would be legal (v4i8->v4i16 which is SVEType.DSub for VT=nxv8i16) and therefore start matching your isel patterns too, but perhaps the isel patterns are different due to masking? No worries, I guess the tests don't do any harm and maybe we can support them in future?

Copy link
Contributor

@david-arm david-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I just realised the tests are failing so I've temporarily removed my LGTM!

@rj-jesus
Copy link
Contributor Author

rj-jesus commented Jul 7, 2025

Actually, I just realised the tests are failing so I've temporarily removed my LGTM!

Erm I don't think the failures are related to the patch, let me try a rebase to see if that sorts it!

@rj-jesus rj-jesus force-pushed the rjj/sve-enable-bitsel-neon branch from 79cc5fc to 385ab19 Compare July 7, 2025 11:08
Copy link
Contributor

@david-arm david-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM! Looks like the linux build is passing now. :)

def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;

// Allow selecting SVE2 ternary ops with Neon types.
foreach VT = [nxv16i8, nxv8i16, nxv4i32, nxv2i64] in {
Copy link
Collaborator

@paulwalker-arm paulwalker-arm Jul 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you specifically care about the case when FEAT_SHA3 is not available? I ask because when available the NEON variants look to have less restrictive register requirements? Specially for BCAX and EOR3 which have a dedicated result register.

If you just want to use the other instructions (bsl1n etc) then a quick fix would be to pass the VT array into the class and then only set the parameter for the instructions that are in addition to those available under FEAT_SHA3.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks - passing VT into the class sounds good to me. I did check previously that the Neon BCAX/EOR3 instructions were selected over the SVE ones when FEAT_SHA3 is available (due to the less restrictive register requirements, as you point out). Since they were, I didn't see a reason not to enable the SVE2 patterns for them too.

Would you rather I do as you suggested and only enable the patterns for BSL1N/BSL1N/NBSL, or perhaps add a runline such as

RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 < %s | FileCheck --check-prefix=SHA3 %s

in llvm/test/CodeGen/AArch64/bcax.ll and llvm/test/CodeGen/AArch64/eor3.ll to ensure we select the SHA3 patterns even when SVE2 is available?

Copy link
Collaborator

@paulwalker-arm paulwalker-arm Jul 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this works as is then just adding the RUN lines for verification works for me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks very much - done. :)

@rj-jesus rj-jesus merged commit cd75c2f into llvm:main Jul 8, 2025
9 checks passed
@rj-jesus rj-jesus deleted the rjj/sve-enable-bitsel-neon branch July 8, 2025 08:06
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants