-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (9/11) #116835
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (9/11) #116835
Conversation
8554270 to
6dd2861
Compare
6dd2861 to
c54760e
Compare
|
@llvm/pr-subscribers-backend-aarch64 Author: Momchil Velikov (momchil-velikov) ChangesSVE2.2 introduces instructions with predicated forms with zeroing of This patch adds support for emitting the zeroing forms of certain Patch is 35.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116835.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 7dd6d49bf20227..c805acd56fff5f 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4283,10 +4283,10 @@ let Predicates = [HasSVE2p2orSME2p2] in {
defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb">;
// SVE2 integer unary operations, zeroing predicate
- def URECPE_ZPzZ : sve2_int_un_pred_arit_z<0b10, 0b00, "urecpe", ZPR32>;
- def URSQRTE_ZPzZ : sve2_int_un_pred_arit_z<0b10, 0b01, "ursqrte", ZPR32>;
- defm SQABS_ZPzZ : sve2_int_un_pred_arit_z<0b10, "sqabs">;
- defm SQNEG_ZPzZ : sve2_int_un_pred_arit_z<0b11, "sqneg">;
+ defm URECPE_ZPzZ : sve2_int_un_pred_arit_z_S<0b00, "urecpe", int_aarch64_sve_urecpe>;
+ defm URSQRTE_ZPzZ : sve2_int_un_pred_arit_z_S<0b01, "ursqrte", int_aarch64_sve_ursqrte>;
+ defm SQABS_ZPzZ : sve2_int_un_pred_arit_z< 0b10, "sqabs", int_aarch64_sve_sqabs>;
+ defm SQNEG_ZPzZ : sve2_int_un_pred_arit_z< 0b11, "sqneg", int_aarch64_sve_sqneg>;
// Floating point round to integral fp value in integer size range
// Merging
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 0ef862fc1a27cf..7198e151e0eab3 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4180,11 +4180,22 @@ multiclass sve2_int_un_pred_arit<bits<2> opc, string asm, SDPatternOperator op>
defm : SVE_3_Op_Undef_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
-multiclass sve2_int_un_pred_arit_z<bits<2> opc, string asm> {
+multiclass sve2_int_un_pred_arit_z_S<bits<2> opc, string asm, SDPatternOperator op> {
+ def _S : sve2_int_un_pred_arit_z<0b10, opc, asm, ZPR32>;
+
+ defm : SVE_3_Op_UndefZero_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+}
+
+multiclass sve2_int_un_pred_arit_z<bits<2> opc, string asm, SDPatternOperator op> {
def _B : sve2_int_un_pred_arit_z<0b00, opc, asm, ZPR8>;
def _H : sve2_int_un_pred_arit_z<0b01, opc, asm, ZPR16>;
def _S : sve2_int_un_pred_arit_z<0b10, opc, asm, ZPR32>;
def _D : sve2_int_un_pred_arit_z<0b11, opc, asm, ZPR64>;
+
+ defm : SVE_3_Op_UndefZero_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-urecpe-ursqrte-sqabs-sqneg.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-urecpe-ursqrte-sqabs-sqneg.ll
new file mode 100644
index 00000000000000..787ac4458079c6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-urecpe-ursqrte-sqabs-sqneg.ll
@@ -0,0 +1,858 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s
+; RUN: llc -mattr=+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
+
+; RUN: llc -mattr=+sme --force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sme2p2 --force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
+
+target triple = "aarch64-linux"
+
+define <vscale x 4 x i32> @test_svrecpe_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrecpe_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: urecpe z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrecpe_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: urecpe z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svrecpe_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrecpe_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrecpe_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: urecpe z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svrecpe_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrecpe_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrecpe_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: urecpe z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svrsqrte_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrsqrte_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ursqrte z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrsqrte_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ursqrte z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svrsqrte_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrsqrte_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: ursqrte z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrsqrte_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ursqrte z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svrsqrte_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrsqrte_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: ursqrte z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrsqrte_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ursqrte z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 16 x i8> @test_svqabs_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svqabs_s8_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqabs z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s8_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svqabs_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svqabs_s8_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s8_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svqabs_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svqabs_s8_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s8_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svqabs_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svqabs_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqabs z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svqabs_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svqabs_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svqabs_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svqabs_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svqabs_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svqabs_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqabs z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svqabs_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svqabs_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svqabs_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svqabs_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svqabs_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svqabs_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqabs z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svqabs_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svqabs_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svqabs_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svqabs_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqabs_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqabs z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svqneg_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svqneg_s8_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqneg z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqneg_s8_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqneg z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svqneg_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svqneg_s8_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqneg z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqneg_s8_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqneg z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svqneg_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svqneg_s8_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: sqneg z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqneg_s8_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqneg z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svqneg_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svqneg_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqneg z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqneg_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqneg z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svqneg_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svqneg_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqneg z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqneg_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqneg z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svqneg_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svqneg_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: sqneg z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqneg_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqneg z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svqneg_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svqneg_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqneg z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqneg_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqneg z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svqneg_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svqneg_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqneg z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqneg_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqneg z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svqneg_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svqneg_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: sqneg z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqneg_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqneg z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svqneg_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svqneg_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqneg z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svqneg_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sqneg z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqneg.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svqneg_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABE...
[truncated]
|
jthackray
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Lukacma
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
SVE2.2 introduces instructions with predicated forms with zeroing of
the inactive lanes. This allows in some cases to save a
movprfxora
movinstruction when emitting code for_xor_zvariants ofintrinsics.
This patch adds support for emitting the zeroing forms of certain
URECPE,URSQRTE,SQABSandSQNEGinstructions.