diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 848b1c30bbeb5..d5c12a9658113 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -5368,6 +5368,19 @@ multiclass sve2_int_bitwise_ternary_op opc, string asm, def : SVE_3_Op_Pat(NAME)>; def : SVE_3_Op_Pat(NAME)>; def : SVE_3_Op_Pat(NAME)>; + + // Allow selecting SVE2 ternary ops with Neon types. + foreach VT = [nxv16i8, nxv8i16, nxv4i32, nxv2i64] in { + def : Pat<(SVEType.DSub (op V64:$op1, V64:$op2, V64:$op3)), + (EXTRACT_SUBREG (!cast(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $op1, dsub), + (INSERT_SUBREG (IMPLICIT_DEF), $op2, dsub), + (INSERT_SUBREG (IMPLICIT_DEF), $op3, dsub)), dsub)>; + + def : Pat<(SVEType.ZSub (op V128:$op1, V128:$op2, V128:$op3)), + (EXTRACT_SUBREG (!cast(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $op1, zsub), + (INSERT_SUBREG (IMPLICIT_DEF), $op2, zsub), + (INSERT_SUBREG (IMPLICIT_DEF), $op3, zsub)), zsub)>; + } } class sve2_int_rotate_right_imm tsz8_64, string asm, diff --git a/llvm/test/CodeGen/AArch64/bcax.ll b/llvm/test/CodeGen/AArch64/bcax.ll index e3c73c36e534b..e2274a9ce2f0e 100644 --- a/llvm/test/CodeGen/AArch64/bcax.ll +++ b/llvm/test/CodeGen/AArch64/bcax.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub ; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s ; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s +; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s | FileCheck --check-prefix=SVE2 %s +; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 < %s | FileCheck --check-prefix=SHA3 %s define <2 x i64> @bcax_64x2(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { ; SHA3-LABEL: bcax_64x2: @@ -13,6 +15,15 @@ define <2 x i64> @bcax_64x2(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { ; NOSHA3-NEXT: bic v0.16b, v0.16b, v1.16b ; NOSHA3-NEXT: eor v0.16b, v0.16b, v2.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: bcax_64x2: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d +; SVE2-NEXT: mov v0.16b, v2.16b +; SVE2-NEXT: ret %4 = xor <2 x i64> %1, %5 = and <2 x i64> %4, %0 %6 = xor <2 x i64> %5, %2 @@ -30,6 +41,15 @@ define <4 x i32> @bcax_32x4(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) { ; NOSHA3-NEXT: bic v0.16b, v0.16b, v1.16b ; NOSHA3-NEXT: eor v0.16b, v0.16b, v2.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: bcax_32x4: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d +; SVE2-NEXT: mov v0.16b, v2.16b +; SVE2-NEXT: ret %4 = xor <4 x i32> %1, %5 = and <4 x i32> %4, %0 %6 = xor <4 x i32> %5, %2 @@ -47,6 +67,15 @@ define <8 x i16> @bcax_16x8(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) { ; NOSHA3-NEXT: bic v0.16b, v0.16b, v1.16b ; NOSHA3-NEXT: eor v0.16b, v0.16b, v2.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: bcax_16x8: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d +; SVE2-NEXT: mov v0.16b, v2.16b +; SVE2-NEXT: ret %4 = xor <8 x i16> %1, %5 = and <8 x i16> %4, %0 %6 = xor <8 x i16> %5, %2 @@ -64,6 +93,15 @@ define <16 x i8> @bcax_8x16(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { ; NOSHA3-NEXT: bic v0.16b, v0.16b, v1.16b ; NOSHA3-NEXT: eor v0.16b, v0.16b, v2.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: bcax_8x16: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d +; SVE2-NEXT: mov v0.16b, v2.16b +; SVE2-NEXT: ret %4 = xor <16 x i8> %1, %5 = and <16 x i8> %4, %0 %6 = xor <16 x i8> %5, %2 diff --git a/llvm/test/CodeGen/AArch64/bsl.ll b/llvm/test/CodeGen/AArch64/bsl.ll new file mode 100644 index 0000000000000..5a270bc71cfc1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/bsl.ll @@ -0,0 +1,433 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=NEON +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefix=SVE2 + +; Test SVE2 BSL/NBSL/BSL1N/BSL2N code generation for: +; #define BSL(x,y,z) ( ((x) & (z)) | ( (y) & ~(z))) +; #define NBSL(x,y,z) (~(((x) & (z)) | ( (y) & ~(z)))) +; #define BSL1N(x,y,z) ( (~(x) & (z)) | ( (y) & ~(z))) +; #define BSL2N(x,y,z) ( ((x) & (z)) | (~(y) & ~(z))) +; +; See also llvm/test/CodeGen/AArch64/sve2-bsl.ll. + +; Test basic codegen. + +define <1 x i64> @bsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { +; NEON-LABEL: bsl_v1i64: +; NEON: // %bb.0: +; NEON-NEXT: bif v0.8b, v1.8b, v2.8b +; NEON-NEXT: ret +; +; SVE2-LABEL: bsl_v1i64: +; SVE2: // %bb.0: +; SVE2-NEXT: bif v0.8b, v1.8b, v2.8b +; SVE2-NEXT: ret + %4 = and <1 x i64> %2, %0 + %5 = xor <1 x i64> %2, splat (i64 -1) + %6 = and <1 x i64> %1, %5 + %7 = or <1 x i64> %4, %6 + ret <1 x i64> %7 +} + +define <1 x i64> @nbsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { +; NEON-LABEL: nbsl_v1i64: +; NEON: // %bb.0: +; NEON-NEXT: bif v0.8b, v1.8b, v2.8b +; NEON-NEXT: mvn v0.8b, v0.8b +; NEON-NEXT: ret +; +; SVE2-LABEL: nbsl_v1i64: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SVE2-NEXT: ret + %4 = and <1 x i64> %2, %0 + %5 = xor <1 x i64> %2, splat (i64 -1) + %6 = and <1 x i64> %1, %5 + %7 = or <1 x i64> %4, %6 + %8 = xor <1 x i64> %7, splat (i64 -1) + ret <1 x i64> %8 +} + +define <1 x i64> @bsl1n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { +; NEON-LABEL: bsl1n_v1i64: +; NEON: // %bb.0: +; NEON-NEXT: mvn v0.8b, v0.8b +; NEON-NEXT: bif v0.8b, v1.8b, v2.8b +; NEON-NEXT: ret +; +; SVE2-LABEL: bsl1n_v1i64: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SVE2-NEXT: ret + %4 = xor <1 x i64> %0, splat (i64 -1) + %5 = and <1 x i64> %2, %4 + %6 = xor <1 x i64> %2, splat (i64 -1) + %7 = and <1 x i64> %1, %6 + %8 = or <1 x i64> %5, %7 + ret <1 x i64> %8 +} + +define <1 x i64> @bsl2n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { +; NEON-LABEL: bsl2n_v1i64: +; NEON: // %bb.0: +; NEON-NEXT: and v0.8b, v2.8b, v0.8b +; NEON-NEXT: orr v1.8b, v2.8b, v1.8b +; NEON-NEXT: orn v0.8b, v0.8b, v1.8b +; NEON-NEXT: ret +; +; SVE2-LABEL: bsl2n_v1i64: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SVE2-NEXT: ret + %4 = and <1 x i64> %2, %0 + %5 = or <1 x i64> %2, %1 + %6 = xor <1 x i64> %5, splat (i64 -1) + %7 = or <1 x i64> %4, %6 + ret <1 x i64> %7 +} + +define <2 x i64> @bsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { +; NEON-LABEL: bsl_v2i64: +; NEON: // %bb.0: +; NEON-NEXT: bif v0.16b, v1.16b, v2.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: bsl_v2i64: +; SVE2: // %bb.0: +; SVE2-NEXT: bif v0.16b, v1.16b, v2.16b +; SVE2-NEXT: ret + %4 = and <2 x i64> %2, %0 + %5 = xor <2 x i64> %2, splat (i64 -1) + %6 = and <2 x i64> %1, %5 + %7 = or <2 x i64> %4, %6 + ret <2 x i64> %7 +} + +define <2 x i64> @nbsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { +; NEON-LABEL: nbsl_v2i64: +; NEON: // %bb.0: +; NEON-NEXT: bif v0.16b, v1.16b, v2.16b +; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: nbsl_v2i64: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 +; SVE2-NEXT: ret + %4 = and <2 x i64> %2, %0 + %5 = xor <2 x i64> %2, splat (i64 -1) + %6 = and <2 x i64> %1, %5 + %7 = or <2 x i64> %4, %6 + %8 = xor <2 x i64> %7, splat (i64 -1) + ret <2 x i64> %8 +} + +define <2 x i64> @bsl1n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { +; NEON-LABEL: bsl1n_v2i64: +; NEON: // %bb.0: +; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: bif v0.16b, v1.16b, v2.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: bsl1n_v2i64: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 +; SVE2-NEXT: ret + %4 = xor <2 x i64> %0, splat (i64 -1) + %5 = and <2 x i64> %2, %4 + %6 = xor <2 x i64> %2, splat (i64 -1) + %7 = and <2 x i64> %1, %6 + %8 = or <2 x i64> %5, %7 + ret <2 x i64> %8 +} + +define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { +; NEON-LABEL: bsl2n_v2i64: +; NEON: // %bb.0: +; NEON-NEXT: and v0.16b, v2.16b, v0.16b +; NEON-NEXT: orr v1.16b, v2.16b, v1.16b +; NEON-NEXT: orn v0.16b, v0.16b, v1.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: bsl2n_v2i64: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 +; SVE2-NEXT: ret + %4 = and <2 x i64> %2, %0 + %5 = or <2 x i64> %2, %1 + %6 = xor <2 x i64> %5, splat (i64 -1) + %7 = or <2 x i64> %4, %6 + ret <2 x i64> %7 +} + +; Test other element types. + +define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) { +; NEON-LABEL: nbsl_v8i8: +; NEON: // %bb.0: +; NEON-NEXT: bif v0.8b, v1.8b, v2.8b +; NEON-NEXT: mvn v0.8b, v0.8b +; NEON-NEXT: ret +; +; SVE2-LABEL: nbsl_v8i8: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SVE2-NEXT: ret + %4 = and <8 x i8> %2, %0 + %5 = xor <8 x i8> %2, splat (i8 -1) + %6 = and <8 x i8> %1, %5 + %7 = or <8 x i8> %4, %6 + %8 = xor <8 x i8> %7, splat (i8 -1) + ret <8 x i8> %8 +} + +define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) { +; NEON-LABEL: nbsl_v4i16: +; NEON: // %bb.0: +; NEON-NEXT: bif v0.8b, v1.8b, v2.8b +; NEON-NEXT: mvn v0.8b, v0.8b +; NEON-NEXT: ret +; +; SVE2-LABEL: nbsl_v4i16: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SVE2-NEXT: ret + %4 = and <4 x i16> %2, %0 + %5 = xor <4 x i16> %2, splat (i16 -1) + %6 = and <4 x i16> %1, %5 + %7 = or <4 x i16> %4, %6 + %8 = xor <4 x i16> %7, splat (i16 -1) + ret <4 x i16> %8 +} + +define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) { +; NEON-LABEL: nbsl_v2i32: +; NEON: // %bb.0: +; NEON-NEXT: bif v0.8b, v1.8b, v2.8b +; NEON-NEXT: mvn v0.8b, v0.8b +; NEON-NEXT: ret +; +; SVE2-LABEL: nbsl_v2i32: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SVE2-NEXT: ret + %4 = and <2 x i32> %2, %0 + %5 = xor <2 x i32> %2, splat (i32 -1) + %6 = and <2 x i32> %1, %5 + %7 = or <2 x i32> %4, %6 + %8 = xor <2 x i32> %7, splat (i32 -1) + ret <2 x i32> %8 +} + +define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { +; NEON-LABEL: nbsl_v16i8: +; NEON: // %bb.0: +; NEON-NEXT: bif v0.16b, v1.16b, v2.16b +; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: nbsl_v16i8: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 +; SVE2-NEXT: ret + %4 = and <16 x i8> %2, %0 + %5 = xor <16 x i8> %2, splat (i8 -1) + %6 = and <16 x i8> %1, %5 + %7 = or <16 x i8> %4, %6 + %8 = xor <16 x i8> %7, splat (i8 -1) + ret <16 x i8> %8 +} + +define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) { +; NEON-LABEL: nbsl_v8i16: +; NEON: // %bb.0: +; NEON-NEXT: bif v0.16b, v1.16b, v2.16b +; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: nbsl_v8i16: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 +; SVE2-NEXT: ret + %4 = and <8 x i16> %2, %0 + %5 = xor <8 x i16> %2, splat (i16 -1) + %6 = and <8 x i16> %1, %5 + %7 = or <8 x i16> %4, %6 + %8 = xor <8 x i16> %7, splat (i16 -1) + ret <8 x i16> %8 +} + +define <4 x i32> @nbsl_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) { +; NEON-LABEL: nbsl_v4i32: +; NEON: // %bb.0: +; NEON-NEXT: bif v0.16b, v1.16b, v2.16b +; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: nbsl_v4i32: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 +; SVE2-NEXT: ret + %4 = and <4 x i32> %2, %0 + %5 = xor <4 x i32> %2, splat (i32 -1) + %6 = and <4 x i32> %1, %5 + %7 = or <4 x i32> %4, %6 + %8 = xor <4 x i32> %7, splat (i32 -1) + ret <4 x i32> %8 +} + +; Test types that need promotion. + +define <4 x i8> @bsl_v4i8(<4 x i8> %0, <4 x i8> %1, <4 x i8> %2) { +; NEON-LABEL: bsl_v4i8: +; NEON: // %bb.0: +; NEON-NEXT: movi d3, #0xff00ff00ff00ff +; NEON-NEXT: and v0.8b, v2.8b, v0.8b +; NEON-NEXT: eor v3.8b, v2.8b, v3.8b +; NEON-NEXT: and v1.8b, v1.8b, v3.8b +; NEON-NEXT: orr v0.8b, v0.8b, v1.8b +; NEON-NEXT: ret +; +; SVE2-LABEL: bsl_v4i8: +; SVE2: // %bb.0: +; SVE2-NEXT: movi d3, #0xff00ff00ff00ff +; SVE2-NEXT: and v0.8b, v2.8b, v0.8b +; SVE2-NEXT: eor v3.8b, v2.8b, v3.8b +; SVE2-NEXT: and v1.8b, v1.8b, v3.8b +; SVE2-NEXT: orr v0.8b, v0.8b, v1.8b +; SVE2-NEXT: ret + %4 = and <4 x i8> %2, %0 + %5 = xor <4 x i8> %2, splat (i8 -1) + %6 = and <4 x i8> %1, %5 + %7 = or <4 x i8> %4, %6 + ret <4 x i8> %7 +} + +define <4 x i8> @nbsl_v4i8(<4 x i8> %0, <4 x i8> %1, <4 x i8> %2) { +; NEON-LABEL: nbsl_v4i8: +; NEON: // %bb.0: +; NEON-NEXT: movi d3, #0xff00ff00ff00ff +; NEON-NEXT: and v0.8b, v2.8b, v0.8b +; NEON-NEXT: eor v4.8b, v2.8b, v3.8b +; NEON-NEXT: and v1.8b, v1.8b, v4.8b +; NEON-NEXT: orr v0.8b, v0.8b, v1.8b +; NEON-NEXT: eor v0.8b, v0.8b, v3.8b +; NEON-NEXT: ret +; +; SVE2-LABEL: nbsl_v4i8: +; SVE2: // %bb.0: +; SVE2-NEXT: movi d3, #0xff00ff00ff00ff +; SVE2-NEXT: and v0.8b, v2.8b, v0.8b +; SVE2-NEXT: eor v4.8b, v2.8b, v3.8b +; SVE2-NEXT: and v1.8b, v1.8b, v4.8b +; SVE2-NEXT: orr v0.8b, v0.8b, v1.8b +; SVE2-NEXT: eor v0.8b, v0.8b, v3.8b +; SVE2-NEXT: ret + %4 = and <4 x i8> %2, %0 + %5 = xor <4 x i8> %2, splat (i8 -1) + %6 = and <4 x i8> %1, %5 + %7 = or <4 x i8> %4, %6 + %8 = xor <4 x i8> %7, splat (i8 -1) + ret <4 x i8> %8 +} + +define <4 x i8> @bsl1n_v4i8(<4 x i8> %0, <4 x i8> %1, <4 x i8> %2) { +; NEON-LABEL: bsl1n_v4i8: +; NEON: // %bb.0: +; NEON-NEXT: movi d3, #0xff00ff00ff00ff +; NEON-NEXT: eor v0.8b, v0.8b, v3.8b +; NEON-NEXT: eor v3.8b, v2.8b, v3.8b +; NEON-NEXT: and v0.8b, v2.8b, v0.8b +; NEON-NEXT: and v1.8b, v1.8b, v3.8b +; NEON-NEXT: orr v0.8b, v0.8b, v1.8b +; NEON-NEXT: ret +; +; SVE2-LABEL: bsl1n_v4i8: +; SVE2: // %bb.0: +; SVE2-NEXT: movi d3, #0xff00ff00ff00ff +; SVE2-NEXT: eor v0.8b, v0.8b, v3.8b +; SVE2-NEXT: eor v3.8b, v2.8b, v3.8b +; SVE2-NEXT: and v0.8b, v2.8b, v0.8b +; SVE2-NEXT: and v1.8b, v1.8b, v3.8b +; SVE2-NEXT: orr v0.8b, v0.8b, v1.8b +; SVE2-NEXT: ret + %4 = xor <4 x i8> %0, splat (i8 -1) + %5 = and <4 x i8> %2, %4 + %6 = xor <4 x i8> %2, splat (i8 -1) + %7 = and <4 x i8> %1, %6 + %8 = or <4 x i8> %5, %7 + ret <4 x i8> %8 +} + +define <4 x i8> @bsl2n_v4i8(<4 x i8> %0, <4 x i8> %1, <4 x i8> %2) { +; NEON-LABEL: bsl2n_v4i8: +; NEON: // %bb.0: +; NEON-NEXT: movi d3, #0xff00ff00ff00ff +; NEON-NEXT: orr v1.8b, v2.8b, v1.8b +; NEON-NEXT: and v0.8b, v2.8b, v0.8b +; NEON-NEXT: eor v1.8b, v1.8b, v3.8b +; NEON-NEXT: orr v0.8b, v0.8b, v1.8b +; NEON-NEXT: ret +; +; SVE2-LABEL: bsl2n_v4i8: +; SVE2: // %bb.0: +; SVE2-NEXT: movi d3, #0xff00ff00ff00ff +; SVE2-NEXT: orr v1.8b, v2.8b, v1.8b +; SVE2-NEXT: and v0.8b, v2.8b, v0.8b +; SVE2-NEXT: eor v1.8b, v1.8b, v3.8b +; SVE2-NEXT: orr v0.8b, v0.8b, v1.8b +; SVE2-NEXT: ret + %4 = and <4 x i8> %2, %0 + %5 = or <4 x i8> %2, %1 + %6 = xor <4 x i8> %5, splat (i8 -1) + %7 = or <4 x i8> %4, %6 + ret <4 x i8> %7 +} diff --git a/llvm/test/CodeGen/AArch64/eor3.ll b/llvm/test/CodeGen/AArch64/eor3.ll index 06ae6b09d002e..b89d9d608575c 100644 --- a/llvm/test/CodeGen/AArch64/eor3.ll +++ b/llvm/test/CodeGen/AArch64/eor3.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub ; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s ; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s +; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s | FileCheck --check-prefix=SVE2 %s +; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 < %s | FileCheck --check-prefix=SHA3 %s define <16 x i8> @eor3_16x8_left(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { ; SHA3-LABEL: eor3_16x8_left: @@ -13,6 +15,15 @@ define <16 x i8> @eor3_16x8_left(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { ; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b ; NOSHA3-NEXT: eor v0.16b, v2.16b, v0.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_16x8_left: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d +; SVE2-NEXT: mov v0.16b, v2.16b +; SVE2-NEXT: ret %4 = xor <16 x i8> %0, %1 %5 = xor <16 x i8> %2, %4 ret <16 x i8> %5 @@ -29,6 +40,15 @@ define <16 x i8> @eor3_16x8_right(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { ; NOSHA3-NEXT: eor v1.16b, v1.16b, v2.16b ; NOSHA3-NEXT: eor v0.16b, v1.16b, v0.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_16x8_right: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: mov v0.16b, v1.16b +; SVE2-NEXT: ret %4 = xor <16 x i8> %1, %2 %5 = xor <16 x i8> %4, %0 ret <16 x i8> %5 @@ -45,6 +65,15 @@ define <8 x i16> @eor3_8x16_left(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) { ; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b ; NOSHA3-NEXT: eor v0.16b, v2.16b, v0.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_8x16_left: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d +; SVE2-NEXT: mov v0.16b, v2.16b +; SVE2-NEXT: ret %4 = xor <8 x i16> %0, %1 %5 = xor <8 x i16> %2, %4 ret <8 x i16> %5 @@ -61,6 +90,15 @@ define <8 x i16> @eor3_8x16_right(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) { ; NOSHA3-NEXT: eor v1.16b, v1.16b, v2.16b ; NOSHA3-NEXT: eor v0.16b, v1.16b, v0.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_8x16_right: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: mov v0.16b, v1.16b +; SVE2-NEXT: ret %4 = xor <8 x i16> %1, %2 %5 = xor <8 x i16> %4, %0 ret <8 x i16> %5 @@ -77,6 +115,15 @@ define <4 x i32> @eor3_4x32_left(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) { ; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b ; NOSHA3-NEXT: eor v0.16b, v2.16b, v0.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_4x32_left: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d +; SVE2-NEXT: mov v0.16b, v2.16b +; SVE2-NEXT: ret %4 = xor <4 x i32> %0, %1 %5 = xor <4 x i32> %2, %4 ret <4 x i32> %5 @@ -93,6 +140,15 @@ define <4 x i32> @eor3_4x32_right(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) { ; NOSHA3-NEXT: eor v1.16b, v1.16b, v2.16b ; NOSHA3-NEXT: eor v0.16b, v1.16b, v0.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_4x32_right: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: mov v0.16b, v1.16b +; SVE2-NEXT: ret %4 = xor <4 x i32> %1, %2 %5 = xor <4 x i32> %4, %0 ret <4 x i32> %5 @@ -109,6 +165,15 @@ define <2 x i64> @eor3_2x64_left(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { ; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b ; NOSHA3-NEXT: eor v0.16b, v2.16b, v0.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_2x64_left: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d +; SVE2-NEXT: mov v0.16b, v2.16b +; SVE2-NEXT: ret %4 = xor <2 x i64> %0, %1 %5 = xor <2 x i64> %2, %4 ret <2 x i64> %5 @@ -125,6 +190,15 @@ define <2 x i64> @eor3_2x64_right(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { ; NOSHA3-NEXT: eor v1.16b, v1.16b, v2.16b ; NOSHA3-NEXT: eor v0.16b, v1.16b, v0.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_2x64_right: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: mov v0.16b, v1.16b +; SVE2-NEXT: ret %4 = xor <2 x i64> %1, %2 %5 = xor <2 x i64> %4, %0 ret <2 x i64> %5 @@ -142,6 +216,12 @@ define <2 x i64> @eor3_vnot(<2 x i64> %0, <2 x i64> %1) { ; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b ; NOSHA3-NEXT: mvn v0.16b, v0.16b ; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_vnot: +; SVE2: // %bb.0: +; SVE2-NEXT: eor v0.16b, v0.16b, v1.16b +; SVE2-NEXT: mvn v0.16b, v0.16b +; SVE2-NEXT: ret %3 = xor <2 x i64> %0, %4 = xor <2 x i64> %3, %1 ret <2 x i64> %4 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll index 70a638857ce4a..ec61fee1039ad 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll @@ -592,8 +592,10 @@ define <4 x i32> @reassociate_xors_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> ; CHECK-LABEL: reassociate_xors_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: eor v1.16b, v2.16b, v3.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-NEXT: eor3 z0.d, z0.d, z2.d, z3.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %t0 = and <4 x i32> %x0, %x1 %t1 = xor <4 x i32> %t0, %x2