From 87b7d61f23b8aef863d37dcb137603b866ab8c77 Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Tue, 18 Feb 2025 11:02:07 +0000 Subject: [PATCH 01/13] [Clang][LLVM] Implement single-single vectors MOP4{A/S} --- clang/include/clang/Basic/arm_sme.td | 54 ++ .../sme2-intrinsics/acle_sme2_mop4_1x1.c | 465 ++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 51 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 68 +-- llvm/lib/Target/AArch64/SMEInstrFormats.td | 93 +++- .../AArch64/sme2-intrinsics-mop4a_1x1.ll | 247 ++++++++++ 6 files changed, 903 insertions(+), 75 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 288a8c04c217f..2af29ad6699b6 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -376,6 +376,19 @@ let SMETargetGuard = "sme2" in { // Outer product and accumulate/subtract // +multiclass MOP4SingleSingle { + def NAME : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; +} + +multiclass MOP4MixedSignsSingleSingle { + def NAME : SInst<"sv" # n_suffix2 # "_1x1_" # za # "[_{2}_{3}]", + "vid" # !cond(!eq(n_suffix1, "su") : "u", true: "x"), + !cond(!eq(n_suffix1, "su") : "", true: "U") # t, + MergeNone, "aarch64_sme_" # n_suffix2 # "_wide_1x1", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_3>]>; +} + let SMETargetGuard = "sme2" in { def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; @@ -387,6 +400,29 @@ let SMETargetGuard = "sme2" in { def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + defm SVSMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "s", "aarch64_sme_mop4a", "_wide">; + defm SVSMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "s", "aarch64_sme_mop4s", "_wide">; + defm SVSMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "c", "aarch64_sme_mop4a", "_wide">; + defm SVSMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "c", "aarch64_sme_mop4s", "_wide">; + + defm SVUMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "Us", "aarch64_sme_mop4a", "_wide">; + defm SVUMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "Us", "aarch64_sme_mop4s", "_wide">; + defm SVUMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "Uc", "aarch64_sme_mop4a", "_wide">; + defm SVUMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "Uc", "aarch64_sme_mop4s", "_wide">; + + defm SVFMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "h", "aarch64_sme_mop4a", "_wide">; + defm SVFMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "h", "aarch64_sme_mop4s", "_wide">; + defm SVFMOP4A_MZZ_S : MOP4SingleSingle<"a", "za32", "f", "aarch64_sme_mop4a", "">; + defm SVFMOP4S_MZZ_S : MOP4SingleSingle<"s", "za32", "f", "aarch64_sme_mop4s", "">; + + defm SVBMOP4A_MZZ_S : MOP4SingleSingle<"a", "za32", "b", "aarch64_sme_mop4a", "_wide">; + defm SVBMOP4S_MZZ_S : MOP4SingleSingle<"s", "za32", "b", "aarch64_sme_mop4s", "_wide">; + + defm SVSUMOP4A_MZZ_BtoS : MOP4MixedSignsSingleSingle<"su", "mop4a", "za32", "c">; + defm SVUSMOP4A_MZZ_BtoS : MOP4MixedSignsSingleSingle<"us", "mop4a", "za32", "c">; + defm SVSUMOP4S_MZZ_BtoS : MOP4MixedSignsSingleSingle<"su", "mop4s", "za32", "c">; + defm SVUSMOP4S_MZZ_BtoS : MOP4MixedSignsSingleSingle<"us", "mop4s", "za32", "c">; + // VERTICAL DOT-PRODUCT def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; def SVVDOT_LANE_ZA32_VG1x4_S : Inst<"svvdot_lane_za32[_{d}]_vg1x4", "vm4di", "c", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; @@ -437,6 +473,15 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-i16i64" in { + defm SVSMOP4A_MZZ_HtoD : MOP4SingleSingle<"a", "za64", "s", "aarch64_sme_mop4a", "_wide">; + defm SVSMOP4S_MZZ_HtoD : MOP4SingleSingle<"s", "za64", "s", "aarch64_sme_mop4s", "_wide">; + defm SVUMOP4A_MZZ_HtoD : MOP4SingleSingle<"a", "za64", "Us", "aarch64_sme_mop4a", "_wide">; + defm SVUMOP4S_MZZ_HtoD : MOP4SingleSingle<"s", "za64", "Us", "aarch64_sme_mop4s", "_wide">; + defm SVSUMOP4A_MZZ_HtoD : MOP4MixedSignsSingleSingle<"su", "mop4a", "za64", "s">; + defm SVUSMOP4A_MZZ_HtoD : MOP4MixedSignsSingleSingle<"us", "mop4a", "za64", "s">; + defm SVSUMOP4S_MZZ_HtoD : MOP4MixedSignsSingleSingle<"su", "mop4s", "za64", "s">; + defm SVUSMOP4S_MZZ_HtoD : MOP4MixedSignsSingleSingle<"us", "mop4s", "za64", "s">; + def SVVDOT_LANE_ZA64_VG1x4_S : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "s", MergeNone, "aarch64_sme_svdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; def SVVDOT_LANE_ZA64_VG1x4_U : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; @@ -473,6 +518,9 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-f64f64" in { + defm SVFMOP4A_MZZ_D : MOP4SingleSingle<"a", "za64", "d", "aarch64_sme_mop4a", "">; + defm SVFMOP4S_MZZ_D : MOP4SingleSingle<"s", "za64", "d", "aarch64_sme_mop4s", "">; + def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; @@ -490,6 +538,9 @@ let SMETargetGuard = "sme2,sme-f64f64" in { } let SMETargetGuard = "sme-f16f16" in { + defm SVFMOP4A_MZZ_H : MOP4SingleSingle<"a", "za16", "h", "aarch64_sme_mop4a", "">; + defm SVFMOP4S_MZZ_H : MOP4SingleSingle<"s", "za16", "h", "aarch64_sme_mop4s", "">; + def SVMLA_MULTI_VG1x2_F16 : Inst<"svmla_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_F16 : Inst<"svmla_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; def SVMLS_MULTI_VG1x2_F16 : Inst<"svmls_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; @@ -507,6 +558,9 @@ let SMETargetGuard = "sme-f16f16" in { } let SMETargetGuard = "sme-b16b16" in { + defm SVBMOP4A_MZZ_H : MOP4SingleSingle<"a", "za16", "bf", "aarch64_sme_mop4a", "">; + defm SVBMOP4S_MZZ_H : MOP4SingleSingle<"s", "za16", "bf", "aarch64_sme_mop4s", "">; + def SVMLA_MULTI_VG1x2_BF16 : Inst<"svmla_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_BF16 : Inst<"svmla_za16[_bf16]_vg1x4", "vm44", "b", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; def SVMLS_MULTI_VG1x2_BF16 : Inst<"svmls_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c new file mode 100644 index 0000000000000..37238053009fd --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c @@ -0,0 +1,465 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: @test_svmop4a_1x1_za32_s8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_s8u10__SVInt8_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_s8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_s8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_s8u10__SVInt8_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_s8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_u8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_u8u11__SVUint8_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_u8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_u8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_u8u11__SVUint8_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_u8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_s16_s16u11__SVInt16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_s16_s16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_s16_s16u11__SVInt16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_s16_s16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_u16_u16u12__SVUint16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_u16_u16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_u16_u16u12__SVUint16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_u16_u16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_f16_f16u13__SVFloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_f16_f16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_f16_f16u13__SVFloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_f16_f16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svmop4a_1x1_za32_bf16_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_bf16_bf16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svmop4s_1x1_za32_bf16_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_bf16_bf16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za64_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_s16_s16u11__SVInt16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_s16_s16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za64_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_s16_s16u11__SVInt16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_s16_s16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za64_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_u16_u16u12__SVUint16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za64,_u16_u16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za64_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_u16_u16u12__SVUint16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za64,_u16_u16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za64_s16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_s16_u16u11__SVInt16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za64,_s16_u16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za64_s16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_s16_u16u11__SVInt16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za64,_s16_u16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za64_u16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_u16_s16u12__SVUint16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za64,_u16_s16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za64_u16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_u16_s16u12__SVUint16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za64,_u16_s16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_s8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_u8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_s8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_u8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_u8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_s8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_u8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_s8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za16_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za16_f16_f16u13__SVFloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za16_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za16,_f16_f16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za16_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za16_f16_f16u13__SVFloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za16_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za16,_f16_f16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_f32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv4f32(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_f32_f32u13__SVFloat32_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv4f32(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_f32_f32,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_f32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv4f32(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_f32_f32u13__SVFloat32_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv4f32(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_f32_f32,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za64_f64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv2f64(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_f64_f64u13__SVFloat64_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv2f64(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za64_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za64,_f64_f64,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za64_f64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv2f64(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_f64_f64u13__SVFloat64_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv2f64(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za64_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za64,_f64_f64,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za16_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svmop4a_1x1_za16_bf16_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za16_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za16,_bf16_bf16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za16_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svmop4s_1x1_za16_bf16_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za16_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za16,_bf16_bf16,)(3, zn, zm); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 6dfc3c8f2a393..0714602a2f09b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1497,7 +1497,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMSubdivide2VectorType<0>, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - + class SVE2_1VectorArgIndexed_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1512,7 +1512,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - + class SVE2_1VectorArg_Pred_Intrinsic : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyvector_ty], @@ -1522,7 +1522,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - + class SVE2_Pred_1VectorArgIndexed_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -3064,6 +3064,17 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_usmopa_wide : SME_OuterProduct_Intrinsic; def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic; + class SME_OuterProduct_QuaterTile + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>], [ImmArg>]>; + + def int_aarch64_sme_mop4a_wide_1x1 : SME_OuterProduct_QuaterTile; + def int_aarch64_sme_mop4s_wide_1x1 : SME_OuterProduct_QuaterTile; + def int_aarch64_sme_mop4a_1x1 : SME_OuterProduct_QuaterTile; + def int_aarch64_sme_mop4s_1x1 : SME_OuterProduct_QuaterTile; + class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, @@ -3319,11 +3330,11 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_nxv8bf16_ty], [llvm_nxv4f32_ty, llvm_nxv4f32_ty], [IntrNoMem]>; - + class SME2_CVT_WIDENING_VG2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [LLVMSubdivide2VectorType<0>], [IntrNoMem]>; - + class SME2_CVT_VG4_SINGLE_Intrinsic : DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>], @@ -3564,7 +3575,7 @@ let TargetPrefix = "aarch64" in { foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in { def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects]>; } - + // Multi-vector signed saturating doubling multiply high def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic; @@ -3634,7 +3645,7 @@ let TargetPrefix = "aarch64" in { // //Multi-vector floating-point convert from half-precision to deinterleaved single-precision. // - + def int_aarch64_sve_fcvtl_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic; // @@ -3826,7 +3837,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_luti4_lane_zt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, IntrReadMem]>; - + // Lookup table expand two registers // def int_aarch64_sme_luti2_lane_zt_x2 @@ -3835,7 +3846,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_luti4_lane_zt_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, IntrReadMem]>; - + // // Lookup table expand four registers // @@ -3853,7 +3864,7 @@ let TargetPrefix = "aarch64" in { [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; - + // // Register scaling // @@ -3901,7 +3912,7 @@ def int_aarch64_sve_extq : AdvSIMD_2VectorArgIndexed_Intrinsic; // // SVE2.1 - Move predicate to/from vector // -def int_aarch64_sve_pmov_to_pred_lane : SVE2_1VectorArgIndexed_Pred_Intrinsic; +def int_aarch64_sve_pmov_to_pred_lane : SVE2_1VectorArgIndexed_Pred_Intrinsic; def int_aarch64_sve_pmov_to_pred_lane_zero : SVE2_1VectorArg_Pred_Intrinsic; @@ -3943,10 +3954,10 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_anyvector_ty, LLVMMatchType<0>], [IntrReadMem, IntrInaccessibleMemOnly]>; - + def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt; def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt; - + def int_aarch64_sve_fp8_cvtnt : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>], @@ -3958,32 +3969,32 @@ let TargetPrefix = "aarch64" in { [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty], [IntrReadMem, IntrInaccessibleMemOnly]>; - + class SVE2_FP8_FMLA_FDOT_Lane : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], [IntrReadMem, IntrInaccessibleMemOnly, ImmArg>]>; - + def int_aarch64_sve_fp8_fdot : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane; // Fused multiply-add def int_aarch64_sve_fp8_fmlalb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalt_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlallbb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlallbb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlallbt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlallbt_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalltb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalltb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalltt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalltt_lane : SVE2_FP8_FMLA_FDOT_Lane; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index d2aa86f388db2..0673394d4daa9 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -148,30 +148,30 @@ defm USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops", int_aarch64_sme } let Predicates = [HasSME_MOP4] in { - defm SMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b0, "smop4a">; - defm SMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b1, "smop4s">; - defm SUMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b0, "sumop4a">; - defm SUMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b1, "sumop4s">; - defm USMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b0, "usmop4a">; - defm USMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b1, "usmop4s">; - defm UMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b0, "umop4a">; - defm UMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b1, "umop4s">; - - defm SMOP4A : sme_quarter_outer_product_i16_i32<0b0, 0b0, "smop4a">; - defm SMOP4S : sme_quarter_outer_product_i16_i32<0b0, 0b1, "smop4s">; - defm UMOP4A : sme_quarter_outer_product_i16_i32<0b1, 0b0, "umop4a">; - defm UMOP4S : sme_quarter_outer_product_i16_i32<0b1, 0b1, "umop4s">; + defm SMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b0, "smop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm SMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b1, "smop4s", int_aarch64_sme_mop4s_wide_1x1>; + defm SUMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b0, "sumop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm SUMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b1, "sumop4s", int_aarch64_sme_mop4a_wide_1x1>; + defm USMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b0, "usmop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm USMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b1, "usmop4s", int_aarch64_sme_mop4a_wide_1x1>; + defm UMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b0, "umop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm UMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b1, "umop4s", int_aarch64_sme_mop4s_wide_1x1>; + + defm SMOP4A : sme_quarter_outer_product_i16_i32<0b0, 0b0, "smop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm SMOP4S : sme_quarter_outer_product_i16_i32<0b0, 0b1, "smop4s", int_aarch64_sme_mop4s_wide_1x1>; + defm UMOP4A : sme_quarter_outer_product_i16_i32<0b1, 0b0, "umop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm UMOP4S : sme_quarter_outer_product_i16_i32<0b1, 0b1, "umop4s", int_aarch64_sme_mop4s_wide_1x1>; } let Predicates = [HasSME_MOP4, HasSMEI16I64] in { - defm SMOP4A : sme_quarter_outer_product_i64<0b0, 0b0, 0b0, "smop4a">; - defm SMOP4S : sme_quarter_outer_product_i64<0b0, 0b0, 0b1, "smop4s">; - defm SUMOP4A : sme_quarter_outer_product_i64<0b0, 0b1, 0b0, "sumop4a">; - defm SUMOP4S : sme_quarter_outer_product_i64<0b0, 0b1, 0b1, "sumop4s">; - defm UMOP4A : sme_quarter_outer_product_i64<0b1, 0b1, 0b0, "umop4a">; - defm UMOP4S : sme_quarter_outer_product_i64<0b1, 0b1, 0b1, "umop4s">; - defm USMOP4A : sme_quarter_outer_product_i64<0b1, 0b0, 0b0, "usmop4a">; - defm USMOP4S : sme_quarter_outer_product_i64<0b1, 0b0, 0b1, "usmop4s">; + defm SMOP4A : sme_quarter_outer_product_i64<0b0, 0b0, 0b0, "smop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm SMOP4S : sme_quarter_outer_product_i64<0b0, 0b0, 0b1, "smop4s", int_aarch64_sme_mop4s_wide_1x1>; + defm SUMOP4A : sme_quarter_outer_product_i64<0b0, 0b1, 0b0, "sumop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm SUMOP4S : sme_quarter_outer_product_i64<0b0, 0b1, 0b1, "sumop4s", int_aarch64_sme_mop4s_wide_1x1>; + defm UMOP4A : sme_quarter_outer_product_i64<0b1, 0b1, 0b0, "umop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm UMOP4S : sme_quarter_outer_product_i64<0b1, 0b1, 0b1, "umop4s", int_aarch64_sme_mop4s_wide_1x1>; + defm USMOP4A : sme_quarter_outer_product_i64<0b1, 0b0, 0b0, "usmop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm USMOP4S : sme_quarter_outer_product_i64<0b1, 0b0, 0b1, "usmop4s", int_aarch64_sme_mop4s_wide_1x1>; } let Predicates = [HasSME_TMOP] in { @@ -1054,14 +1054,14 @@ let Predicates = [HasSME2, HasSVEBFSCALE] in { } let Predicates = [HasSME_MOP4] in { - defm BFMOP4A : sme2_bfmop4as_widening<0, "bfmop4a">; - defm BFMOP4S : sme2_bfmop4as_widening<1, "bfmop4s">; + defm BFMOP4A : sme2_bfmop4as_widening<0, "bfmop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm BFMOP4S : sme2_bfmop4as_widening<1, "bfmop4s", int_aarch64_sme_mop4s_wide_1x1>; - defm FMOP4A : sme2_fmop4as_fp16_fp32_widening<0, "fmop4a">; - defm FMOP4S : sme2_fmop4as_fp16_fp32_widening<1, "fmop4s">; + defm FMOP4A : sme2_fmop4as_fp16_fp32_widening<0, "fmop4a", int_aarch64_sme_mop4a_wide_1x1>; + defm FMOP4S : sme2_fmop4as_fp16_fp32_widening<1, "fmop4s", int_aarch64_sme_mop4s_wide_1x1>; - defm FMOP4A : sme2_fmop4as_fp32_non_widening<0, "fmop4a">; - defm FMOP4S : sme2_fmop4as_fp32_non_widening<1, "fmop4s">; + defm FMOP4A : sme2_fmop4as_fp32_non_widening<0, "fmop4a", int_aarch64_sme_mop4a_1x1>; + defm FMOP4S : sme2_fmop4as_fp32_non_widening<1, "fmop4s", int_aarch64_sme_mop4s_1x1>; } let Predicates = [HasSME_TMOP] in { @@ -1084,7 +1084,7 @@ let Predicates = [HasSME_TMOP, HasSMEB16B16] in { let Predicates = [HasSME_TMOP, HasSMEF8F32], Uses = [FPMR, FPCR] in { def FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, "ftmopa">; -} +} let Predicates = [HasSME_TMOP, HasSMEF8F16], Uses = [FPMR, FPCR] in { def FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, "ftmopa">; @@ -1099,8 +1099,8 @@ let Predicates = [HasSME_TMOP, HasSMEF16F16] in { } let Predicates = [HasSME_MOP4, HasSMEF16F16] in { - defm FMOP4A : sme2_fmop4as_fp16_non_widening<0, "fmop4a">; - defm FMOP4S : sme2_fmop4as_fp16_non_widening<1, "fmop4s">; + defm FMOP4A : sme2_fmop4as_fp16_non_widening<0, "fmop4a", int_aarch64_sme_mop4a_1x1>; + defm FMOP4S : sme2_fmop4as_fp16_non_widening<1, "fmop4s", int_aarch64_sme_mop4s_1x1>; } let Predicates = [HasSME2, HasSVEBFSCALE] in { @@ -1115,11 +1115,11 @@ let Predicates = [HasSME_MOP4, HasSMEF8F32] in { } let Predicates = [HasSME_MOP4, HasSMEB16B16] in { - defm BFMOP4A : sme2_bfmop4as_non_widening<0, "bfmop4a">; - defm BFMOP4S : sme2_bfmop4as_non_widening<1, "bfmop4s">; + defm BFMOP4A : sme2_bfmop4as_non_widening<0, "bfmop4a", int_aarch64_sme_mop4a_1x1>; + defm BFMOP4S : sme2_bfmop4as_non_widening<1, "bfmop4s", int_aarch64_sme_mop4s_1x1>; } let Predicates = [HasSME_MOP4, HasSMEF64F64] in { - defm FMOP4A : sme2_fmop4as_fp64_non_widening<0, "fmop4a">; - defm FMOP4S : sme2_fmop4as_fp64_non_widening<1, "fmop4s">; + defm FMOP4A : sme2_fmop4as_fp64_non_widening<0, "fmop4a", int_aarch64_sme_mop4a_1x1>; + defm FMOP4S : sme2_fmop4as_fp64_non_widening<1, "fmop4s", int_aarch64_sme_mop4s_1x1>; } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 4f6a413ba5e5c..5a3d12e9f7b8b 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -104,6 +104,15 @@ class sme_outer_product_pseudo let usesCustomInserter = 1; } +class sme2_quarter_tile_outer_product_pseudo + : Pseudo<(outs), (ins i32imm:$tile, + zn_ty:$zn, zm_ty:$zm), []>, + Sched<[]> { + // Translated to the actual instructions in AArch64ISelLowering.cpp + let SMEMatrixType = za_flag; + let usesCustomInserter = 1; +} + class sme2_za_array_2op_multi_single_pseudo : SMEPseudo2Instr, @@ -251,12 +260,15 @@ class SME2_Tile_VG4_Multi_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))), - (!cast(name) $base, $offset)>; + (!cast(name) $base, $offset)>; class SME2_Tile_Movaz_Pat : Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))), (!cast(name # _PSEUDO) $tile, $base, $offset)>; +class SME2_ZA_Tile_TwoVec_Pat + : Pat<(intrinsic imm_ty:$tile, vt:$Zn, vt:$Zm), + (!cast(name # _PSEUDO) $tile, $Zn, $Zm)>; //===----------------------------------------------------------------------===// // SME pattern match helpers. @@ -600,9 +612,14 @@ class sme_quarter_outer_product_i16_i32{ +multiclass sme_quarter_outer_product_i8_i32{ def _MZZ_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 0}, subtr, - ZPR8Mul2_Lo, ZPR8Mul2_Hi, mnemonic>; + ZPR8Mul2_Lo, ZPR8Mul2_Hi, mnemonic>, SMEPseudo2Instr; + + def NAME # _MZZ_BToS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_TwoVec_Pat; + def _M2ZZ_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 0}, subtr, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi, mnemonic>; def _MZ2Z_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 1}, subtr, @@ -611,9 +628,14 @@ multiclass sme_quarter_outer_product_i8_i32; } -multiclass sme_quarter_outer_product_i16_i32{ +multiclass sme_quarter_outer_product_i16_i32{ def _MZZ_HToS : sme_quarter_outer_product_i16_i32; + ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>, SMEPseudo2Instr; + + def NAME # _MZZ_HToS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_TwoVec_Pat; + def _M2ZZ_HToS : sme_quarter_outer_product_i16_i32; def _MZ2Z_HToS : sme_quarter_outer_product_i16_i32; } -multiclass sme_quarter_outer_product_i64{ +multiclass sme_quarter_outer_product_i64{ def _MZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 0}, subtr, - ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>; + ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>, SMEPseudo2Instr; + + def NAME # _MZZ_HtoD # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_TwoVec_Pat; + def _M2ZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 0}, subtr, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>; def _MZ2Z_HtoD : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 1}, subtr, @@ -2231,7 +2258,7 @@ multiclass sme2_int_mla_long_array_vg2_single op, SDPat multiclass sme2_fp_mla_long_array_vg4_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic, list uses=[]> { - def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, + def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr { let Uses = uses; } @@ -5304,7 +5331,7 @@ multiclass sme2p1_zero_matrix { def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; -} +} //===----------------------------------------------------------------------===// // SME2.1 lookup table expand two non-contiguous registers @@ -5470,9 +5497,13 @@ class sme2_bf16_fp32_quarter_tile_outer_product { +multiclass sme2_bfmop4as_widening { // Single vectors - def _MZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; + def _MZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; + + def NAME # _MZZ_S # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_TwoVec_Pat; // Multiple and single vectors def _M2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; @@ -5617,9 +5648,13 @@ class sme2_fp16_quarter_tile_outer_product { +multiclass sme2_fmop4as_fp16_non_widening { // Single vectors - def _MZZ_H : sme2_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; + def _MZZ_H : sme2_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; + + def NAME # _MZZ_H # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_TwoVec_Pat; // Multiple and single vectors def _M2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; @@ -5689,9 +5724,13 @@ class sme2_bf16_fp16_quarter_tile_outer_product { +multiclass sme2_bfmop4as_non_widening { // Single vectors - def _MZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; + def _MZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; + + def NAME # _MZZ_H # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_TwoVec_Pat; // Multiple and single vectors def _M2ZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; @@ -5726,9 +5765,13 @@ class sme2_fp32_quarter_tile_outer_product { +multiclass sme2_fmop4as_fp32_non_widening { // Single vectors - def _MZZ_S : sme2_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR32Mul2_Lo, ZPR32Mul2_Hi>; + def _MZZ_S : sme2_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR32Mul2_Lo, ZPR32Mul2_Hi>, SMEPseudo2Instr; + + def NAME # _MZZ_S # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_TwoVec_Pat; // Multiple and single vectors def _M2ZZ_S : sme2_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZPR32Mul2_Hi>; @@ -5763,9 +5806,13 @@ class sme2_fp64_quarter_tile_outer_product { +multiclass sme2_fmop4as_fp64_non_widening { // Single vectors - def _MZZ_D : sme2_fp64_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR64Mul2_Lo, ZPR64Mul2_Hi>; + def _MZZ_D : sme2_fp64_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR64Mul2_Lo, ZPR64Mul2_Hi>, SMEPseudo2Instr; + + def NAME # _MZZ_D # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_TwoVec_Pat; // Multiple and single vectors def _M2ZZ_D : sme2_fp64_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZPR64Mul2_Hi>; @@ -5800,9 +5847,13 @@ class sme2_fp16_fp32_quarter_tile_outer_product { +multiclass sme2_fmop4as_fp16_fp32_widening { // Single vectors - def _MZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; + def _MZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; + + def NAME # _MZZ_HtoS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_Tile_TwoVec_Pat; // Multiple and single vectors def _M2ZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll new file mode 100644 index 0000000000000..df985675f3070 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll @@ -0,0 +1,247 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -force-streaming -verify-machineinstrs < %s | FileCheck %s + +target triple = "aarch64-linux" + +; Widening +define void @mop4a_za32_s8( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za32_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4a za1.s, z0.b, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za32_s8( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za1.s, z0.b, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za32_u8( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za32_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4a za1.s, z0.b, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za32_u8( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za1.s, z0.b, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za32_s16( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za32_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4a za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za32_s16( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za32_u16( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za32_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4a za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za32_u16( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za32_f16( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za32_f16( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za32_bf16( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za32_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4a za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za32_bf16( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4s za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za64_s16( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za64_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4a za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za64_s16( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za64_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za64_u16( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za64_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4a za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za64_u16( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za64_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + +; Non-widening +define void @mop4a_za16_f16( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za1.h, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za16_f16( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za1.h, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x1.nxv8f16(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za32_f32( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za1.s, z0.s, z24.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.1x1.nxv4f32(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za32_f32( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za1.s, z0.s, z24.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x1.nxv4f32(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za64_f64( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za1.d, z0.d, z24.d +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.1x1.nxv2f64(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za64_f64( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za1.d, z0.d, z24.d +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x1.nxv2f64(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za16_bf16( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za16_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4a za1.h, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4a.1x1.nxv8bf16(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za16_bf16( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za16_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4s za1.h, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x1.nxv8bf16(i32 1, %zn, %zm) + ret void +} +attributes #0 = {nounwind "target-features" = "+sme-i16i64,+sme-f64f64,+sme-b16b16,+sme2p1,+bf16,+sme-f16f16,+sme-mop4" } From 228b75729a799d929a114fbebbcad223520c7d77 Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Wed, 19 Feb 2025 11:58:46 +0000 Subject: [PATCH 02/13] Add white spaces back in to simply patch --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 38 +++++++++++----------- llvm/lib/Target/AArch64/SMEInstrFormats.td | 6 ++-- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 0714602a2f09b..24052d8a45d75 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1497,7 +1497,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMSubdivide2VectorType<0>, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - + class SVE2_1VectorArgIndexed_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1512,7 +1512,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - + class SVE2_1VectorArg_Pred_Intrinsic : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyvector_ty], @@ -1522,7 +1522,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - + class SVE2_Pred_1VectorArgIndexed_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -3330,11 +3330,11 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_nxv8bf16_ty], [llvm_nxv4f32_ty, llvm_nxv4f32_ty], [IntrNoMem]>; - + class SME2_CVT_WIDENING_VG2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [LLVMSubdivide2VectorType<0>], [IntrNoMem]>; - + class SME2_CVT_VG4_SINGLE_Intrinsic : DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>], @@ -3575,7 +3575,7 @@ let TargetPrefix = "aarch64" in { foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in { def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects]>; } - + // Multi-vector signed saturating doubling multiply high def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic; @@ -3645,7 +3645,7 @@ let TargetPrefix = "aarch64" in { // //Multi-vector floating-point convert from half-precision to deinterleaved single-precision. // - + def int_aarch64_sve_fcvtl_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic; // @@ -3837,7 +3837,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_luti4_lane_zt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, IntrReadMem]>; - + // Lookup table expand two registers // def int_aarch64_sme_luti2_lane_zt_x2 @@ -3864,7 +3864,7 @@ let TargetPrefix = "aarch64" in { [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; - + // // Register scaling // @@ -3912,7 +3912,7 @@ def int_aarch64_sve_extq : AdvSIMD_2VectorArgIndexed_Intrinsic; // // SVE2.1 - Move predicate to/from vector // -def int_aarch64_sve_pmov_to_pred_lane : SVE2_1VectorArgIndexed_Pred_Intrinsic; +def int_aarch64_sve_pmov_to_pred_lane : SVE2_1VectorArgIndexed_Pred_Intrinsic; def int_aarch64_sve_pmov_to_pred_lane_zero : SVE2_1VectorArg_Pred_Intrinsic; @@ -3954,10 +3954,10 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_anyvector_ty, LLVMMatchType<0>], [IntrReadMem, IntrInaccessibleMemOnly]>; - + def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt; def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt; - + def int_aarch64_sve_fp8_cvtnt : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>], @@ -3969,32 +3969,32 @@ let TargetPrefix = "aarch64" in { [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty], [IntrReadMem, IntrInaccessibleMemOnly]>; - + class SVE2_FP8_FMLA_FDOT_Lane : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], [IntrReadMem, IntrInaccessibleMemOnly, ImmArg>]>; - + def int_aarch64_sve_fp8_fdot : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane; // Fused multiply-add def int_aarch64_sve_fp8_fmlalb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalt_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlallbb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlallbb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlallbt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlallbt_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalltb : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalltb_lane : SVE2_FP8_FMLA_FDOT_Lane; - + def int_aarch64_sve_fp8_fmlalltt : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fmlalltt_lane : SVE2_FP8_FMLA_FDOT_Lane; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 5a3d12e9f7b8b..755531505636d 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -260,7 +260,7 @@ class SME2_Tile_VG4_Multi_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))), - (!cast(name) $base, $offset)>; + (!cast(name) $base, $offset)>; class SME2_Tile_Movaz_Pat : Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))), @@ -2258,7 +2258,7 @@ multiclass sme2_int_mla_long_array_vg2_single op, SDPat multiclass sme2_fp_mla_long_array_vg4_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic, list uses=[]> { - def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, + def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr { let Uses = uses; } @@ -5331,7 +5331,7 @@ multiclass sme2p1_zero_matrix { def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; -} +} //===----------------------------------------------------------------------===// // SME2.1 lookup table expand two non-contiguous registers From e7dd7caa157b39074eaf5d09aded9708cdd794d5 Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Mon, 24 Feb 2025 13:33:42 +0000 Subject: [PATCH 03/13] Add extra intrinsics to differentiate u/smop4 --- clang/include/clang/Basic/arm_sme.td | 26 +++--- .../sme2-intrinsics/acle_sme2_mop4_1x1.c | 80 +++++++++---------- llvm/include/llvm/IR/IntrinsicsAArch64.td | 26 ++++-- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 66 +++++++-------- llvm/lib/Target/AArch64/SMEInstrFormats.td | 56 ++++++------- .../AArch64/sme2-intrinsics-mop4a_1x1.ll | 36 ++++----- 6 files changed, 153 insertions(+), 137 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 2af29ad6699b6..bf76e4ce53182 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -384,7 +384,7 @@ multiclass MOP4MixedSignsSingleSingle]>; } @@ -400,15 +400,15 @@ let SMETargetGuard = "sme2" in { def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; - defm SVSMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "s", "aarch64_sme_mop4a", "_wide">; - defm SVSMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "s", "aarch64_sme_mop4s", "_wide">; - defm SVSMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "c", "aarch64_sme_mop4a", "_wide">; - defm SVSMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "c", "aarch64_sme_mop4s", "_wide">; + defm SVSMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "s", "aarch64_sme_smop4a", "_wide">; + defm SVSMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "s", "aarch64_sme_smop4s", "_wide">; + defm SVSMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "c", "aarch64_sme_smop4a", "_wide">; + defm SVSMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "c", "aarch64_sme_smop4s", "_wide">; - defm SVUMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "Us", "aarch64_sme_mop4a", "_wide">; - defm SVUMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "Us", "aarch64_sme_mop4s", "_wide">; - defm SVUMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "Uc", "aarch64_sme_mop4a", "_wide">; - defm SVUMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "Uc", "aarch64_sme_mop4s", "_wide">; + defm SVUMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "Us", "aarch64_sme_umop4a", "_wide">; + defm SVUMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "Us", "aarch64_sme_umop4s", "_wide">; + defm SVUMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "Uc", "aarch64_sme_umop4a", "_wide">; + defm SVUMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "Uc", "aarch64_sme_umop4s", "_wide">; defm SVFMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "h", "aarch64_sme_mop4a", "_wide">; defm SVFMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "h", "aarch64_sme_mop4s", "_wide">; @@ -473,10 +473,10 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-i16i64" in { - defm SVSMOP4A_MZZ_HtoD : MOP4SingleSingle<"a", "za64", "s", "aarch64_sme_mop4a", "_wide">; - defm SVSMOP4S_MZZ_HtoD : MOP4SingleSingle<"s", "za64", "s", "aarch64_sme_mop4s", "_wide">; - defm SVUMOP4A_MZZ_HtoD : MOP4SingleSingle<"a", "za64", "Us", "aarch64_sme_mop4a", "_wide">; - defm SVUMOP4S_MZZ_HtoD : MOP4SingleSingle<"s", "za64", "Us", "aarch64_sme_mop4s", "_wide">; + defm SVSMOP4A_MZZ_HtoD : MOP4SingleSingle<"a", "za64", "s", "aarch64_sme_smop4a", "_wide">; + defm SVSMOP4S_MZZ_HtoD : MOP4SingleSingle<"s", "za64", "s", "aarch64_sme_smop4s", "_wide">; + defm SVUMOP4A_MZZ_HtoD : MOP4SingleSingle<"a", "za64", "Us", "aarch64_sme_umop4a", "_wide">; + defm SVUMOP4S_MZZ_HtoD : MOP4SingleSingle<"s", "za64", "Us", "aarch64_sme_umop4s", "_wide">; defm SVSUMOP4A_MZZ_HtoD : MOP4MixedSignsSingleSingle<"su", "mop4a", "za64", "s">; defm SVUSMOP4A_MZZ_HtoD : MOP4MixedSignsSingleSingle<"us", "mop4a", "za64", "s">; defm SVSUMOP4S_MZZ_HtoD : MOP4MixedSignsSingleSingle<"su", "mop4s", "za64", "s">; diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c index 37238053009fd..eac3648f90368 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c @@ -18,12 +18,12 @@ // CHECK-LABEL: @test_svmop4a_1x1_za32_s8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_s8u10__SVInt8_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -32,12 +32,12 @@ void test_svmop4a_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm // CHECK-LABEL: @test_svmop4s_1x1_za32_s8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_s8u10__SVInt8_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -46,12 +46,12 @@ void test_svmop4s_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm // CHECK-LABEL: @test_svmop4a_1x1_za32_u8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_u8u11__SVUint8_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -60,12 +60,12 @@ void test_svmop4a_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __a // CHECK-LABEL: @test_svmop4s_1x1_za32_u8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_u8u11__SVUint8_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -74,12 +74,12 @@ void test_svmop4s_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __a // CHECK-LABEL: @test_svmop4a_1x1_za32_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_s16_s16u11__SVInt16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -88,12 +88,12 @@ void test_svmop4a_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming _ // CHECK-LABEL: @test_svmop4s_1x1_za32_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_s16_s16u11__SVInt16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -102,12 +102,12 @@ void test_svmop4s_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming _ // CHECK-LABEL: @test_svmop4a_1x1_za32_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_u16_u16u12__SVUint16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -116,12 +116,12 @@ void test_svmop4a_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4s_1x1_za32_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_u16_u16u12__SVUint16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -186,12 +186,12 @@ void test_svmop4s_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_str // CHECK-LABEL: @test_svmop4a_1x1_za64_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_s16_s16u11__SVInt16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -200,12 +200,12 @@ void test_svmop4a_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming _ // CHECK-LABEL: @test_svmop4s_1x1_za64_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_s16_s16u11__SVInt16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -214,12 +214,12 @@ void test_svmop4s_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming _ // CHECK-LABEL: @test_svmop4a_1x1_za64_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_u16_u16u12__SVUint16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -228,12 +228,12 @@ void test_svmop4a_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4s_1x1_za64_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_u16_u16u12__SVUint16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -242,12 +242,12 @@ void test_svmop4s_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4a_1x1_za64_s16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_s16_u16u11__SVInt16_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -256,12 +256,12 @@ void test_svmop4a_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4s_1x1_za64_s16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_s16_u16u11__SVInt16_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -270,12 +270,12 @@ void test_svmop4s_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4a_1x1_za64_u16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_u16_s16u12__SVUint16_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -284,12 +284,12 @@ void test_svmop4a_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4s_1x1_za64_u16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_u16_s16u12__SVUint16_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -298,12 +298,12 @@ void test_svmop4s_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4a_1x1_za32_s8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -312,12 +312,12 @@ void test_svmop4a_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __ar // CHECK-LABEL: @test_svmop4s_1x1_za32_s8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -326,12 +326,12 @@ void test_svmop4s_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __ar // CHECK-LABEL: @test_svmop4a_1x1_za32_u8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -340,12 +340,12 @@ void test_svmop4a_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __ar // CHECK-LABEL: @test_svmop4s_1x1_za32_u8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 24052d8a45d75..8556ae83c79f9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3064,16 +3064,32 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_usmopa_wide : SME_OuterProduct_Intrinsic; def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic; - class SME_OuterProduct_QuaterTile + class SME_OuterProduct_QuaterTile_Single : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], [ImmArg>]>; - def int_aarch64_sme_mop4a_wide_1x1 : SME_OuterProduct_QuaterTile; - def int_aarch64_sme_mop4s_wide_1x1 : SME_OuterProduct_QuaterTile; - def int_aarch64_sme_mop4a_1x1 : SME_OuterProduct_QuaterTile; - def int_aarch64_sme_mop4s_1x1 : SME_OuterProduct_QuaterTile; + def int_aarch64_sme_mop4a_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_mop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_mop4a_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_mop4s_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_smop4a_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_smop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_smop4a_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_smop4s_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_umop4a_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_umop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_umop4a_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_umop4s_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_sumop4a_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_sumop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_sumop4a_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_sumop4s_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_usmop4a_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_usmop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_usmop4a_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_usmop4s_1x1 : SME_OuterProduct_QuaterTile_Single; class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 0673394d4daa9..c02e72e8b4c26 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -148,30 +148,30 @@ defm USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops", int_aarch64_sme } let Predicates = [HasSME_MOP4] in { - defm SMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b0, "smop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm SMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b1, "smop4s", int_aarch64_sme_mop4s_wide_1x1>; - defm SUMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b0, "sumop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm SUMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b1, "sumop4s", int_aarch64_sme_mop4a_wide_1x1>; - defm USMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b0, "usmop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm USMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b1, "usmop4s", int_aarch64_sme_mop4a_wide_1x1>; - defm UMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b0, "umop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm UMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b1, "umop4s", int_aarch64_sme_mop4s_wide_1x1>; - - defm SMOP4A : sme_quarter_outer_product_i16_i32<0b0, 0b0, "smop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm SMOP4S : sme_quarter_outer_product_i16_i32<0b0, 0b1, "smop4s", int_aarch64_sme_mop4s_wide_1x1>; - defm UMOP4A : sme_quarter_outer_product_i16_i32<0b1, 0b0, "umop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm UMOP4S : sme_quarter_outer_product_i16_i32<0b1, 0b1, "umop4s", int_aarch64_sme_mop4s_wide_1x1>; + defm SMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b0, "smop4a", "int_aarch64_sme_smop4a_wide">; + defm SMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b1, "smop4s", "int_aarch64_sme_smop4s_wide">; + defm SUMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b0, "sumop4a", "int_aarch64_sme_sumop4a_wide">; + defm SUMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b1, "sumop4s", "int_aarch64_sme_sumop4s_wide">; + defm USMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b0, "usmop4a", "int_aarch64_sme_usmop4a_wide">; + defm USMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b1, "usmop4s", "int_aarch64_sme_usmop4s_wide">; + defm UMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b0, "umop4a", "int_aarch64_sme_umop4a_wide">; + defm UMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b1, "umop4s", "int_aarch64_sme_umop4s_wide">; + + defm SMOP4A : sme_quarter_outer_product_i16_i32<0b0, 0b0, "smop4a", "int_aarch64_sme_smop4a_wide">; + defm SMOP4S : sme_quarter_outer_product_i16_i32<0b0, 0b1, "smop4s", "int_aarch64_sme_smop4s_wide">; + defm UMOP4A : sme_quarter_outer_product_i16_i32<0b1, 0b0, "umop4a", "int_aarch64_sme_umop4a_wide">; + defm UMOP4S : sme_quarter_outer_product_i16_i32<0b1, 0b1, "umop4s", "int_aarch64_sme_umop4s_wide">; } let Predicates = [HasSME_MOP4, HasSMEI16I64] in { - defm SMOP4A : sme_quarter_outer_product_i64<0b0, 0b0, 0b0, "smop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm SMOP4S : sme_quarter_outer_product_i64<0b0, 0b0, 0b1, "smop4s", int_aarch64_sme_mop4s_wide_1x1>; - defm SUMOP4A : sme_quarter_outer_product_i64<0b0, 0b1, 0b0, "sumop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm SUMOP4S : sme_quarter_outer_product_i64<0b0, 0b1, 0b1, "sumop4s", int_aarch64_sme_mop4s_wide_1x1>; - defm UMOP4A : sme_quarter_outer_product_i64<0b1, 0b1, 0b0, "umop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm UMOP4S : sme_quarter_outer_product_i64<0b1, 0b1, 0b1, "umop4s", int_aarch64_sme_mop4s_wide_1x1>; - defm USMOP4A : sme_quarter_outer_product_i64<0b1, 0b0, 0b0, "usmop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm USMOP4S : sme_quarter_outer_product_i64<0b1, 0b0, 0b1, "usmop4s", int_aarch64_sme_mop4s_wide_1x1>; + defm SMOP4A : sme_quarter_outer_product_i64<0b0, 0b0, 0b0, "smop4a", "int_aarch64_sme_smop4a_wide">; + defm SMOP4S : sme_quarter_outer_product_i64<0b0, 0b0, 0b1, "smop4s", "int_aarch64_sme_smop4s_wide">; + defm SUMOP4A : sme_quarter_outer_product_i64<0b0, 0b1, 0b0, "sumop4a", "int_aarch64_sme_sumop4a_wide">; + defm SUMOP4S : sme_quarter_outer_product_i64<0b0, 0b1, 0b1, "sumop4s", "int_aarch64_sme_sumop4s_wide">; + defm UMOP4A : sme_quarter_outer_product_i64<0b1, 0b1, 0b0, "umop4a", "int_aarch64_sme_umop4a_wide">; + defm UMOP4S : sme_quarter_outer_product_i64<0b1, 0b1, 0b1, "umop4s", "int_aarch64_sme_umop4s_wide">; + defm USMOP4A : sme_quarter_outer_product_i64<0b1, 0b0, 0b0, "usmop4a", "int_aarch64_sme_usmop4a_wide">; + defm USMOP4S : sme_quarter_outer_product_i64<0b1, 0b0, 0b1, "usmop4s", "int_aarch64_sme_usmop4s_wide">; } let Predicates = [HasSME_TMOP] in { @@ -1054,14 +1054,14 @@ let Predicates = [HasSME2, HasSVEBFSCALE] in { } let Predicates = [HasSME_MOP4] in { - defm BFMOP4A : sme2_bfmop4as_widening<0, "bfmop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm BFMOP4S : sme2_bfmop4as_widening<1, "bfmop4s", int_aarch64_sme_mop4s_wide_1x1>; + defm BFMOP4A : sme2_bfmop4as_widening<0, "bfmop4a", "int_aarch64_sme_mop4a_wide">; + defm BFMOP4S : sme2_bfmop4as_widening<1, "bfmop4s", "int_aarch64_sme_mop4s_wide">; - defm FMOP4A : sme2_fmop4as_fp16_fp32_widening<0, "fmop4a", int_aarch64_sme_mop4a_wide_1x1>; - defm FMOP4S : sme2_fmop4as_fp16_fp32_widening<1, "fmop4s", int_aarch64_sme_mop4s_wide_1x1>; + defm FMOP4A : sme2_fmop4as_fp16_fp32_widening<0, "fmop4a", "int_aarch64_sme_mop4a_wide">; + defm FMOP4S : sme2_fmop4as_fp16_fp32_widening<1, "fmop4s", "int_aarch64_sme_mop4s_wide">; - defm FMOP4A : sme2_fmop4as_fp32_non_widening<0, "fmop4a", int_aarch64_sme_mop4a_1x1>; - defm FMOP4S : sme2_fmop4as_fp32_non_widening<1, "fmop4s", int_aarch64_sme_mop4s_1x1>; + defm FMOP4A : sme2_fmop4as_fp32_non_widening<0, "fmop4a", "int_aarch64_sme_mop4a">; + defm FMOP4S : sme2_fmop4as_fp32_non_widening<1, "fmop4s", "int_aarch64_sme_mop4s">; } let Predicates = [HasSME_TMOP] in { @@ -1099,8 +1099,8 @@ let Predicates = [HasSME_TMOP, HasSMEF16F16] in { } let Predicates = [HasSME_MOP4, HasSMEF16F16] in { - defm FMOP4A : sme2_fmop4as_fp16_non_widening<0, "fmop4a", int_aarch64_sme_mop4a_1x1>; - defm FMOP4S : sme2_fmop4as_fp16_non_widening<1, "fmop4s", int_aarch64_sme_mop4s_1x1>; + defm FMOP4A : sme2_fmop4as_fp16_non_widening<0, "fmop4a", "int_aarch64_sme_mop4a">; + defm FMOP4S : sme2_fmop4as_fp16_non_widening<1, "fmop4s", "int_aarch64_sme_mop4s">; } let Predicates = [HasSME2, HasSVEBFSCALE] in { @@ -1115,11 +1115,11 @@ let Predicates = [HasSME_MOP4, HasSMEF8F32] in { } let Predicates = [HasSME_MOP4, HasSMEB16B16] in { - defm BFMOP4A : sme2_bfmop4as_non_widening<0, "bfmop4a", int_aarch64_sme_mop4a_1x1>; - defm BFMOP4S : sme2_bfmop4as_non_widening<1, "bfmop4s", int_aarch64_sme_mop4s_1x1>; + defm BFMOP4A : sme2_bfmop4as_non_widening<0, "bfmop4a", "int_aarch64_sme_mop4a">; + defm BFMOP4S : sme2_bfmop4as_non_widening<1, "bfmop4s", "int_aarch64_sme_mop4s">; } let Predicates = [HasSME_MOP4, HasSMEF64F64] in { - defm FMOP4A : sme2_fmop4as_fp64_non_widening<0, "fmop4a", int_aarch64_sme_mop4a_1x1>; - defm FMOP4S : sme2_fmop4as_fp64_non_widening<1, "fmop4s", int_aarch64_sme_mop4s_1x1>; + defm FMOP4A : sme2_fmop4as_fp64_non_widening<0, "fmop4a", "int_aarch64_sme_mop4a">; + defm FMOP4S : sme2_fmop4as_fp64_non_widening<1, "fmop4s", "int_aarch64_sme_mop4s">; } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 755531505636d..9840d36b2c0fc 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -104,7 +104,7 @@ class sme_outer_product_pseudo let usesCustomInserter = 1; } -class sme2_quarter_tile_outer_product_pseudo +class sme2_quarter_tile_outer_product_pseudo_single_single : Pseudo<(outs), (ins i32imm:$tile, zn_ty:$zn, zm_ty:$zm), []>, Sched<[]> { @@ -612,13 +612,13 @@ class sme_quarter_outer_product_i16_i32{ +multiclass sme_quarter_outer_product_i8_i32{ def _MZZ_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 0}, subtr, ZPR8Mul2_Lo, ZPR8Mul2_Hi, mnemonic>, SMEPseudo2Instr; - def NAME # _MZZ_BToS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _MZZ_BToS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv16i8>; def _M2ZZ_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 0}, subtr, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi, mnemonic>; @@ -628,13 +628,13 @@ multiclass sme_quarter_outer_product_i8_i32; } -multiclass sme_quarter_outer_product_i16_i32{ +multiclass sme_quarter_outer_product_i16_i32{ def _MZZ_HToS : sme_quarter_outer_product_i16_i32, SMEPseudo2Instr; - def NAME # _MZZ_HToS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _MZZ_HToS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8i16>; def _M2ZZ_HToS : sme_quarter_outer_product_i16_i32; @@ -644,13 +644,13 @@ multiclass sme_quarter_outer_product_i16_i32; } -multiclass sme_quarter_outer_product_i64{ +multiclass sme_quarter_outer_product_i64{ def _MZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 0}, subtr, ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>, SMEPseudo2Instr; - def NAME # _MZZ_HtoD # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _MZZ_HtoD # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8i16>; def _M2ZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 0}, subtr, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>; @@ -5497,13 +5497,13 @@ class sme2_bf16_fp32_quarter_tile_outer_product { +multiclass sme2_bfmop4as_widening { // Single vectors def _MZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_S # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _MZZ_S # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8bf16>; // Multiple and single vectors def _M2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; @@ -5648,13 +5648,13 @@ class sme2_fp16_quarter_tile_outer_product { +multiclass sme2_fmop4as_fp16_non_widening { // Single vectors def _MZZ_H : sme2_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_H # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _MZZ_H # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8f16>; // Multiple and single vectors def _M2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; @@ -5724,13 +5724,13 @@ class sme2_bf16_fp16_quarter_tile_outer_product { +multiclass sme2_bfmop4as_non_widening { // Single vectors def _MZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_H # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _MZZ_H # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8bf16>; // Multiple and single vectors def _M2ZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; @@ -5765,13 +5765,13 @@ class sme2_fp32_quarter_tile_outer_product { +multiclass sme2_fmop4as_fp32_non_widening { // Single vectors def _MZZ_S : sme2_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR32Mul2_Lo, ZPR32Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_S # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _MZZ_S # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv4f32>; // Multiple and single vectors def _M2ZZ_S : sme2_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZPR32Mul2_Hi>; @@ -5806,13 +5806,13 @@ class sme2_fp64_quarter_tile_outer_product { +multiclass sme2_fmop4as_fp64_non_widening { // Single vectors def _MZZ_D : sme2_fp64_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR64Mul2_Lo, ZPR64Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_D # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _MZZ_D # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv2f64>; // Multiple and single vectors def _M2ZZ_D : sme2_fp64_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZPR64Mul2_Hi>; @@ -5847,13 +5847,13 @@ class sme2_fp16_fp32_quarter_tile_outer_product { +multiclass sme2_fmop4as_fp16_fp32_widening { // Single vectors def _MZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_HtoS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _MZZ_HtoS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8f16>; // Multiple and single vectors def _M2ZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll index df985675f3070..730bdb2d720d2 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll @@ -10,7 +10,7 @@ define void @mop4a_za32_s8( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4a za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) ret void } @@ -20,7 +20,7 @@ define void @mop4s_za32_s8( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4s za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) ret void } @@ -28,9 +28,9 @@ define void @mop4a_za32_u8( %zn, %zm) #0 { ; CHECK-LABEL: mop4a_za32_u8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4a za1.s, z0.b, z24.b +; CHECK-NEXT: umop4a za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) ret void } @@ -38,9 +38,9 @@ define void @mop4s_za32_u8( %zn, %zm) #0 { ; CHECK-LABEL: mop4s_za32_u8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4s za1.s, z0.b, z24.b +; CHECK-NEXT: umop4s za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) ret void } @@ -50,7 +50,7 @@ define void @mop4a_za32_s16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4a za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) ret void } @@ -60,7 +60,7 @@ define void @mop4s_za32_s16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4s za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) ret void } @@ -68,9 +68,9 @@ define void @mop4a_za32_u16( %zn, %zm) #0 { ; CHECK-LABEL: mop4a_za32_u16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4a za1.s, z0.h, z24.h +; CHECK-NEXT: umop4a za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) ret void } @@ -78,9 +78,9 @@ define void @mop4s_za32_u16( %zn, %zm) #0 { ; CHECK-LABEL: mop4s_za32_u16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: umop4s za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) ret void } @@ -130,7 +130,7 @@ define void @mop4a_za64_s16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4a za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) ret void } @@ -140,7 +140,7 @@ define void @mop4s_za64_s16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4s za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) ret void } @@ -148,9 +148,9 @@ define void @mop4a_za64_u16( %zn, %zm) #0 { ; CHECK-LABEL: mop4a_za64_u16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4a za1.s, z0.h, z24.h +; CHECK-NEXT: umop4a za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) ret void } @@ -158,9 +158,9 @@ define void @mop4s_za64_u16( %zn, %zm) #0 { ; CHECK-LABEL: mop4s_za64_u16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: umop4s za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) ret void } From 88e9c0e2be48c6a013dd110ce87b3531986b7203 Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Tue, 25 Feb 2025 13:47:45 +0000 Subject: [PATCH 04/13] Simplify clang multiclasses --- clang/include/clang/Basic/arm_sme.td | 79 ++++++++++--------- .../AArch64/sme2-intrinsics-mop4a_1x1.ll | 40 ++++++++++ 2 files changed, 80 insertions(+), 39 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index bf76e4ce53182..123a1c7a007ba 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -376,15 +376,20 @@ let SMETargetGuard = "sme2" in { // Outer product and accumulate/subtract // -multiclass MOP4SingleSingle { - def NAME : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; +multiclass MOP4 { + def NAME # "_1x1" : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; } -multiclass MOP4MixedSignsSingleSingle { - def NAME : SInst<"sv" # n_suffix2 # "_1x1_" # za # "[_{2}_{3}]", - "vid" # !cond(!eq(n_suffix1, "su") : "u", true: "x"), - !cond(!eq(n_suffix1, "su") : "", true: "U") # t, - MergeNone, "aarch64_sme_" # n_suffix1 # n_suffix2 # "_wide_1x1", +multiclass SUMOP4 { + def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]", + "vidu", t, MergeNone, "aarch64_sme_sumop4" # s # "_wide_1x1", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_3>]>; +} + +multiclass USMOP4 { + def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]", + "vidx", t, MergeNone, "aarch64_sme_usmop4" # s # "_wide_1x1", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>]>; } @@ -400,28 +405,24 @@ let SMETargetGuard = "sme2" in { def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; - defm SVSMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "s", "aarch64_sme_smop4a", "_wide">; - defm SVSMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "s", "aarch64_sme_smop4s", "_wide">; - defm SVSMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "c", "aarch64_sme_smop4a", "_wide">; - defm SVSMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "c", "aarch64_sme_smop4s", "_wide">; + defm SVSMOP4A_H : MOP4<"a", "za32", "cs", "aarch64_sme_smop4a", "_wide">; + defm SVSMOP4S_H : MOP4<"s", "za32", "cs", "aarch64_sme_smop4s", "_wide">; - defm SVUMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "Us", "aarch64_sme_umop4a", "_wide">; - defm SVUMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "Us", "aarch64_sme_umop4s", "_wide">; - defm SVUMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "Uc", "aarch64_sme_umop4a", "_wide">; - defm SVUMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "Uc", "aarch64_sme_umop4s", "_wide">; + defm SVUMOP4A_H : MOP4<"a", "za32", "UcUs", "aarch64_sme_umop4a", "_wide">; + defm SVUMOP4S_H : MOP4<"s", "za32", "UcUs", "aarch64_sme_umop4s", "_wide">; - defm SVFMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "h", "aarch64_sme_mop4a", "_wide">; - defm SVFMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "h", "aarch64_sme_mop4s", "_wide">; - defm SVFMOP4A_MZZ_S : MOP4SingleSingle<"a", "za32", "f", "aarch64_sme_mop4a", "">; - defm SVFMOP4S_MZZ_S : MOP4SingleSingle<"s", "za32", "f", "aarch64_sme_mop4s", "">; + defm SVFMOP4A_HtoS : MOP4<"a", "za32", "h", "aarch64_sme_mop4a", "_wide">; + defm SVFMOP4S_HtoS : MOP4<"s", "za32", "h", "aarch64_sme_mop4s", "_wide">; + defm SVFMOP4A_S : MOP4<"a", "za32", "f", "aarch64_sme_mop4a", "">; + defm SVFMOP4S_S : MOP4<"s", "za32", "f", "aarch64_sme_mop4s", "">; - defm SVBMOP4A_MZZ_S : MOP4SingleSingle<"a", "za32", "b", "aarch64_sme_mop4a", "_wide">; - defm SVBMOP4S_MZZ_S : MOP4SingleSingle<"s", "za32", "b", "aarch64_sme_mop4s", "_wide">; + defm SVBMOP4A_S : MOP4<"a", "za32", "b", "aarch64_sme_mop4a", "_wide">; + defm SVBMOP4S_S : MOP4<"s", "za32", "b", "aarch64_sme_mop4s", "_wide">; - defm SVSUMOP4A_MZZ_BtoS : MOP4MixedSignsSingleSingle<"su", "mop4a", "za32", "c">; - defm SVUSMOP4A_MZZ_BtoS : MOP4MixedSignsSingleSingle<"us", "mop4a", "za32", "c">; - defm SVSUMOP4S_MZZ_BtoS : MOP4MixedSignsSingleSingle<"su", "mop4s", "za32", "c">; - defm SVUSMOP4S_MZZ_BtoS : MOP4MixedSignsSingleSingle<"us", "mop4s", "za32", "c">; + defm SVSUMOP4A_S : SUMOP4<"a", "za32", "cs">; + defm SVSUMOP4S_S : SUMOP4<"s", "za32", "cs">; + defm SVUSMOP4A_S : USMOP4<"a", "za32", "UcUs">; + defm SVUSMOP4S_S : USMOP4<"s", "za32", "UcUs">; // VERTICAL DOT-PRODUCT def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; @@ -473,14 +474,14 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-i16i64" in { - defm SVSMOP4A_MZZ_HtoD : MOP4SingleSingle<"a", "za64", "s", "aarch64_sme_smop4a", "_wide">; - defm SVSMOP4S_MZZ_HtoD : MOP4SingleSingle<"s", "za64", "s", "aarch64_sme_smop4s", "_wide">; - defm SVUMOP4A_MZZ_HtoD : MOP4SingleSingle<"a", "za64", "Us", "aarch64_sme_umop4a", "_wide">; - defm SVUMOP4S_MZZ_HtoD : MOP4SingleSingle<"s", "za64", "Us", "aarch64_sme_umop4s", "_wide">; - defm SVSUMOP4A_MZZ_HtoD : MOP4MixedSignsSingleSingle<"su", "mop4a", "za64", "s">; - defm SVUSMOP4A_MZZ_HtoD : MOP4MixedSignsSingleSingle<"us", "mop4a", "za64", "s">; - defm SVSUMOP4S_MZZ_HtoD : MOP4MixedSignsSingleSingle<"su", "mop4s", "za64", "s">; - defm SVUSMOP4S_MZZ_HtoD : MOP4MixedSignsSingleSingle<"us", "mop4s", "za64", "s">; + defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a", "_wide">; + defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s", "_wide">; + defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a", "_wide">; + defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s", "_wide">; + defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s">; + defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s">; + defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us">; + defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us">; def SVVDOT_LANE_ZA64_VG1x4_S : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "s", MergeNone, "aarch64_sme_svdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; def SVVDOT_LANE_ZA64_VG1x4_U : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; @@ -518,8 +519,8 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-f64f64" in { - defm SVFMOP4A_MZZ_D : MOP4SingleSingle<"a", "za64", "d", "aarch64_sme_mop4a", "">; - defm SVFMOP4S_MZZ_D : MOP4SingleSingle<"s", "za64", "d", "aarch64_sme_mop4s", "">; + defm SVFMOP4A_D : MOP4<"a", "za64", "d", "aarch64_sme_mop4a", "">; + defm SVFMOP4S_D : MOP4<"s", "za64", "d", "aarch64_sme_mop4s", "">; def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; @@ -538,8 +539,8 @@ let SMETargetGuard = "sme2,sme-f64f64" in { } let SMETargetGuard = "sme-f16f16" in { - defm SVFMOP4A_MZZ_H : MOP4SingleSingle<"a", "za16", "h", "aarch64_sme_mop4a", "">; - defm SVFMOP4S_MZZ_H : MOP4SingleSingle<"s", "za16", "h", "aarch64_sme_mop4s", "">; + defm SVFMOP4A_H : MOP4<"a", "za16", "h", "aarch64_sme_mop4a", "">; + defm SVFMOP4S_H : MOP4<"s", "za16", "h", "aarch64_sme_mop4s", "">; def SVMLA_MULTI_VG1x2_F16 : Inst<"svmla_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_F16 : Inst<"svmla_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; @@ -558,8 +559,8 @@ let SMETargetGuard = "sme-f16f16" in { } let SMETargetGuard = "sme-b16b16" in { - defm SVBMOP4A_MZZ_H : MOP4SingleSingle<"a", "za16", "bf", "aarch64_sme_mop4a", "">; - defm SVBMOP4S_MZZ_H : MOP4SingleSingle<"s", "za16", "bf", "aarch64_sme_mop4s", "">; + defm SVBMOP4A_H : MOP4<"a", "za16", "bf", "aarch64_sme_mop4a", "">; + defm SVBMOP4S_H : MOP4<"s", "za16", "bf", "aarch64_sme_mop4s", "">; def SVMLA_MULTI_VG1x2_BF16 : Inst<"svmla_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_BF16 : Inst<"svmla_za16[_bf16]_vg1x4", "vm44", "b", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll index 730bdb2d720d2..938c57ae89200 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll @@ -44,6 +44,46 @@ define void @mop4s_za32_u8( %zn, %zm) #0 { ret void } +define void @mop4a_za32_s8_u8( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za32_s8_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sumop4a za1.s, z0.b, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za32_s8_u8( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_s8_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sumop4s za1.s, z0.b, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za32_u8_s8( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za32_u8_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: usmop4a za1.s, z0.b, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za32_u8_s8( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_u8_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: usmop4s za1.s, z0.b, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) + ret void +} + define void @mop4a_za32_s16( %zn, %zm) #0 { ; CHECK-LABEL: mop4a_za32_s16: ; CHECK: // %bb.0: From d29ca213e2cea26e06cc43e2d6695e3bff1e273b Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Wed, 26 Feb 2025 09:24:38 +0000 Subject: [PATCH 05/13] Add instrinsics to differentiate za64 and za32 --- clang/include/clang/Basic/arm_sme.td | 32 ++-- .../sme2-intrinsics/acle_sme2_mop4_1x1.c | 148 +++++++++--------- llvm/include/llvm/IR/IntrinsicsAArch64.td | 8 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-mop4a_1x1.ll | 40 +++++ 5 files changed, 146 insertions(+), 98 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 123a1c7a007ba..7cf1ae427b072 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -380,16 +380,16 @@ multiclass MOP4 { def NAME # "_1x1" : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; } -multiclass SUMOP4 { +multiclass SUMOP4 { def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]", - "vidu", t, MergeNone, "aarch64_sme_sumop4" # s # "_wide_1x1", + "vidu", t, MergeNone, "aarch64_sme_sumop4" # s # i # "_wide_1x1", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>]>; } -multiclass USMOP4 { +multiclass USMOP4 { def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]", - "vidx", t, MergeNone, "aarch64_sme_usmop4" # s # "_wide_1x1", + "vidx", t, MergeNone, "aarch64_sme_usmop4" # s # i # "_wide_1x1", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>]>; } @@ -419,10 +419,10 @@ let SMETargetGuard = "sme2" in { defm SVBMOP4A_S : MOP4<"a", "za32", "b", "aarch64_sme_mop4a", "_wide">; defm SVBMOP4S_S : MOP4<"s", "za32", "b", "aarch64_sme_mop4s", "_wide">; - defm SVSUMOP4A_S : SUMOP4<"a", "za32", "cs">; - defm SVSUMOP4S_S : SUMOP4<"s", "za32", "cs">; - defm SVUSMOP4A_S : USMOP4<"a", "za32", "UcUs">; - defm SVUSMOP4S_S : USMOP4<"s", "za32", "UcUs">; + defm SVSUMOP4A_S : SUMOP4<"a", "za32", "cs", "">; + defm SVSUMOP4S_S : SUMOP4<"s", "za32", "cs", "">; + defm SVUSMOP4A_S : USMOP4<"a", "za32", "UcUs", "">; + defm SVUSMOP4S_S : USMOP4<"s", "za32", "UcUs", "">; // VERTICAL DOT-PRODUCT def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; @@ -474,14 +474,14 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-i16i64" in { - defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a", "_wide">; - defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s", "_wide">; - defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a", "_wide">; - defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s", "_wide">; - defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s">; - defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s">; - defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us">; - defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us">; + defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a_za64", "_wide">; + defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s_za64", "_wide">; + defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a_za64", "_wide">; + defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s_za64", "_wide">; + defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s", "_za64">; + defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s", "_za64">; + defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us", "_za64">; + defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us", "_za64">; def SVVDOT_LANE_ZA64_VG1x4_S : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "s", MergeNone, "aarch64_sme_svdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; def SVVDOT_LANE_ZA64_VG1x4_U : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c index eac3648f90368..34a9633374d3f 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c @@ -72,6 +72,62 @@ void test_svmop4s_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __a SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_u8,)(3, zn, zm); } +// CHECK-LABEL: @test_svmop4a_1x1_za32_s8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_u8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_s8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_u8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_u8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_s8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_u8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_s8,)(3, zn, zm); +} + // CHECK-LABEL: @test_svmop4a_1x1_za32_s16_s16( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) @@ -186,40 +242,40 @@ void test_svmop4s_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_str // CHECK-LABEL: @test_svmop4a_1x1_za64_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_s16_s16u11__SVInt16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_s16_s16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za64,_s16_s16,)(3, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za64_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_s16_s16u11__SVInt16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_s16_s16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za64,_s16_s16,)(3, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za64_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_u16_u16u12__SVUint16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -228,12 +284,12 @@ void test_svmop4a_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4s_1x1_za64_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_u16_u16u12__SVUint16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -242,12 +298,12 @@ void test_svmop4s_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4a_1x1_za64_s16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_s16_u16u11__SVInt16_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -256,12 +312,12 @@ void test_svmop4a_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4s_1x1_za64_s16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_s16_u16u11__SVInt16_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -270,12 +326,12 @@ void test_svmop4s_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4a_1x1_za64_u16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_u16_s16u12__SVUint16_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -284,74 +340,18 @@ void test_svmop4a_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming // CHECK-LABEL: @test_svmop4s_1x1_za64_u16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_u16_s16u12__SVUint16_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmop4s_1x1_za64,_u16_s16,)(3, zn, zm); } -// CHECK-LABEL: @test_svmop4a_1x1_za32_s8_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: ret void -// -void test_svmop4a_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_u8,)(3, zn, zm); -} - -// CHECK-LABEL: @test_svmop4s_1x1_za32_s8_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: ret void -// -void test_svmop4s_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_u8,)(3, zn, zm); -} - -// CHECK-LABEL: @test_svmop4a_1x1_za32_u8_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: ret void -// -void test_svmop4a_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_s8,)(3, zn, zm); -} - -// CHECK-LABEL: @test_svmop4s_1x1_za32_u8_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: ret void -// -void test_svmop4s_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_s8,)(3, zn, zm); -} - // CHECK-LABEL: @test_svmop4a_1x1_za16_f16_f16( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 8556ae83c79f9..eeea8d77d1e16 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3090,6 +3090,14 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_usmop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single; def int_aarch64_sme_usmop4a_1x1 : SME_OuterProduct_QuaterTile_Single; def int_aarch64_sme_usmop4s_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_smop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_smop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_umop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_umop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_sumop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_sumop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_usmop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_usmop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index c02e72e8b4c26..f992f73171e0e 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -164,14 +164,14 @@ let Predicates = [HasSME_MOP4] in { } let Predicates = [HasSME_MOP4, HasSMEI16I64] in { - defm SMOP4A : sme_quarter_outer_product_i64<0b0, 0b0, 0b0, "smop4a", "int_aarch64_sme_smop4a_wide">; - defm SMOP4S : sme_quarter_outer_product_i64<0b0, 0b0, 0b1, "smop4s", "int_aarch64_sme_smop4s_wide">; - defm SUMOP4A : sme_quarter_outer_product_i64<0b0, 0b1, 0b0, "sumop4a", "int_aarch64_sme_sumop4a_wide">; - defm SUMOP4S : sme_quarter_outer_product_i64<0b0, 0b1, 0b1, "sumop4s", "int_aarch64_sme_sumop4s_wide">; - defm UMOP4A : sme_quarter_outer_product_i64<0b1, 0b1, 0b0, "umop4a", "int_aarch64_sme_umop4a_wide">; - defm UMOP4S : sme_quarter_outer_product_i64<0b1, 0b1, 0b1, "umop4s", "int_aarch64_sme_umop4s_wide">; - defm USMOP4A : sme_quarter_outer_product_i64<0b1, 0b0, 0b0, "usmop4a", "int_aarch64_sme_usmop4a_wide">; - defm USMOP4S : sme_quarter_outer_product_i64<0b1, 0b0, 0b1, "usmop4s", "int_aarch64_sme_usmop4s_wide">; + defm SMOP4A : sme_quarter_outer_product_i64<0b0, 0b0, 0b0, "smop4a", "int_aarch64_sme_smop4a_za64_wide">; + defm SMOP4S : sme_quarter_outer_product_i64<0b0, 0b0, 0b1, "smop4s", "int_aarch64_sme_smop4s_za64_wide">; + defm SUMOP4A : sme_quarter_outer_product_i64<0b0, 0b1, 0b0, "sumop4a", "int_aarch64_sme_sumop4a_za64_wide">; + defm SUMOP4S : sme_quarter_outer_product_i64<0b0, 0b1, 0b1, "sumop4s", "int_aarch64_sme_sumop4s_za64_wide">; + defm UMOP4A : sme_quarter_outer_product_i64<0b1, 0b1, 0b0, "umop4a", "int_aarch64_sme_umop4a_za64_wide">; + defm UMOP4S : sme_quarter_outer_product_i64<0b1, 0b1, 0b1, "umop4s", "int_aarch64_sme_umop4s_za64_wide">; + defm USMOP4A : sme_quarter_outer_product_i64<0b1, 0b0, 0b0, "usmop4a", "int_aarch64_sme_usmop4a_za64_wide">; + defm USMOP4S : sme_quarter_outer_product_i64<0b1, 0b0, 0b1, "usmop4s", "int_aarch64_sme_usmop4s_za64_wide">; } let Predicates = [HasSME_TMOP] in { diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll index 938c57ae89200..7bcf407d23297 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll @@ -204,6 +204,46 @@ define void @mop4s_za64_u16( %zn, %zm) #0 { ret void } +define void @mop4a_za64_s16_u16( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za64_s16_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sumop4a za1.d, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za64_s16_u16( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za64_s16_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sumop4s za1.d, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + +define void @mop4a_za64_u16_s16( %zn, %zm) #0 { +; CHECK-LABEL: mop4a_za64_u16_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: usmop4a za1.d, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za64_u16_s16( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za64_u16_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: usmop4s za1.d, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 1, %zn, %zm) + ret void +} + ; Non-widening define void @mop4a_za16_f16( %zn, %zm) #0 { ; CHECK-LABEL: mop4a_za16_f16: From d69c8ed10a8092fb28871a28a438e02cf9a74fea Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Wed, 26 Feb 2025 14:12:46 +0000 Subject: [PATCH 06/13] Fix immediates and add more tests --- clang/include/clang/Basic/arm_sme.td | 72 +++---- .../sme2-intrinsics/acle_sme2_mop4_1x1.c | 192 +++++++++--------- llvm/lib/Target/AArch64/SMEInstrFormats.td | 8 +- .../AArch64/sme2-intrinsics-mop4a_1x1.ll | 154 +++++++++++--- 4 files changed, 259 insertions(+), 167 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 7cf1ae427b072..ff42f110f72cb 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -376,22 +376,22 @@ let SMETargetGuard = "sme2" in { // Outer product and accumulate/subtract // -multiclass MOP4 { - def NAME # "_1x1" : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; +multiclass MOP4 checks> { + def NAME # "_1x1" : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], checks>; } -multiclass SUMOP4 { - def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]", +multiclass SUMOP4 checks> { + def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{d}_{3}]", "vidu", t, MergeNone, "aarch64_sme_sumop4" # s # i # "_wide_1x1", [IsStreaming, IsInOutZA], - [ImmCheck<0, ImmCheck0_3>]>; + checks>; } -multiclass USMOP4 { - def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]", +multiclass USMOP4 checks> { + def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{d}_{3}]", "vidx", t, MergeNone, "aarch64_sme_usmop4" # s # i # "_wide_1x1", [IsStreaming, IsInOutZA], - [ImmCheck<0, ImmCheck0_3>]>; + checks>; } let SMETargetGuard = "sme2" in { @@ -405,24 +405,24 @@ let SMETargetGuard = "sme2" in { def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; - defm SVSMOP4A_H : MOP4<"a", "za32", "cs", "aarch64_sme_smop4a", "_wide">; - defm SVSMOP4S_H : MOP4<"s", "za32", "cs", "aarch64_sme_smop4s", "_wide">; + defm SVSMOP4A_H : MOP4<"a", "za32", "cs", "aarch64_sme_smop4a", "_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVSMOP4S_H : MOP4<"s", "za32", "cs", "aarch64_sme_smop4s", "_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVUMOP4A_H : MOP4<"a", "za32", "UcUs", "aarch64_sme_umop4a", "_wide">; - defm SVUMOP4S_H : MOP4<"s", "za32", "UcUs", "aarch64_sme_umop4s", "_wide">; + defm SVUMOP4A_H : MOP4<"a", "za32", "UcUs", "aarch64_sme_umop4a", "_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVUMOP4S_H : MOP4<"s", "za32", "UcUs", "aarch64_sme_umop4s", "_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVFMOP4A_HtoS : MOP4<"a", "za32", "h", "aarch64_sme_mop4a", "_wide">; - defm SVFMOP4S_HtoS : MOP4<"s", "za32", "h", "aarch64_sme_mop4s", "_wide">; - defm SVFMOP4A_S : MOP4<"a", "za32", "f", "aarch64_sme_mop4a", "">; - defm SVFMOP4S_S : MOP4<"s", "za32", "f", "aarch64_sme_mop4s", "">; + defm SVFMOP4A_HtoS : MOP4<"a", "za32", "h", "aarch64_sme_mop4a", "_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVFMOP4S_HtoS : MOP4<"s", "za32", "h", "aarch64_sme_mop4s", "_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVFMOP4A_S : MOP4<"a", "za32", "f", "aarch64_sme_mop4a", "", [ImmCheck<0, ImmCheck0_3>]>; + defm SVFMOP4S_S : MOP4<"s", "za32", "f", "aarch64_sme_mop4s", "", [ImmCheck<0, ImmCheck0_3>]>; - defm SVBMOP4A_S : MOP4<"a", "za32", "b", "aarch64_sme_mop4a", "_wide">; - defm SVBMOP4S_S : MOP4<"s", "za32", "b", "aarch64_sme_mop4s", "_wide">; + defm SVBMOP4A_S : MOP4<"a", "za32", "b", "aarch64_sme_mop4a", "_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVBMOP4S_S : MOP4<"s", "za32", "b", "aarch64_sme_mop4s", "_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVSUMOP4A_S : SUMOP4<"a", "za32", "cs", "">; - defm SVSUMOP4S_S : SUMOP4<"s", "za32", "cs", "">; - defm SVUSMOP4A_S : USMOP4<"a", "za32", "UcUs", "">; - defm SVUSMOP4S_S : USMOP4<"s", "za32", "UcUs", "">; + defm SVSUMOP4A_S : SUMOP4<"a", "za32", "cs", "", [ImmCheck<0, ImmCheck0_3>]>; + defm SVSUMOP4S_S : SUMOP4<"s", "za32", "cs", "", [ImmCheck<0, ImmCheck0_3>]>; + defm SVUSMOP4A_S : USMOP4<"a", "za32", "UcUs", "", [ImmCheck<0, ImmCheck0_3>]>; + defm SVUSMOP4S_S : USMOP4<"s", "za32", "UcUs", "", [ImmCheck<0, ImmCheck0_3>]>; // VERTICAL DOT-PRODUCT def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; @@ -474,14 +474,14 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-i16i64" in { - defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a_za64", "_wide">; - defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s_za64", "_wide">; - defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a_za64", "_wide">; - defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s_za64", "_wide">; - defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s", "_za64">; - defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s", "_za64">; - defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us", "_za64">; - defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us", "_za64">; + defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a_za64", "_wide", [ImmCheck<0, ImmCheck0_7>]>; + defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s_za64", "_wide", [ImmCheck<0, ImmCheck0_7>]>; + defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a_za64", "_wide", [ImmCheck<0, ImmCheck0_7>]>; + defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s_za64", "_wide", [ImmCheck<0, ImmCheck0_7>]>; + defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s", "_za64", [ImmCheck<0, ImmCheck0_7>]>; + defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s", "_za64", [ImmCheck<0, ImmCheck0_7>]>; + defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us", "_za64", [ImmCheck<0, ImmCheck0_7>]>; + defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us", "_za64", [ImmCheck<0, ImmCheck0_7>]>; def SVVDOT_LANE_ZA64_VG1x4_S : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "s", MergeNone, "aarch64_sme_svdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; def SVVDOT_LANE_ZA64_VG1x4_U : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; @@ -519,8 +519,8 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-f64f64" in { - defm SVFMOP4A_D : MOP4<"a", "za64", "d", "aarch64_sme_mop4a", "">; - defm SVFMOP4S_D : MOP4<"s", "za64", "d", "aarch64_sme_mop4s", "">; + defm SVFMOP4A_D : MOP4<"a", "za64", "d", "aarch64_sme_mop4a", "", [ImmCheck<0, ImmCheck0_7>]>; + defm SVFMOP4S_D : MOP4<"s", "za64", "d", "aarch64_sme_mop4s", "", [ImmCheck<0, ImmCheck0_7>]>; def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; @@ -539,8 +539,8 @@ let SMETargetGuard = "sme2,sme-f64f64" in { } let SMETargetGuard = "sme-f16f16" in { - defm SVFMOP4A_H : MOP4<"a", "za16", "h", "aarch64_sme_mop4a", "">; - defm SVFMOP4S_H : MOP4<"s", "za16", "h", "aarch64_sme_mop4s", "">; + defm SVFMOP4A_H : MOP4<"a", "za16", "h", "aarch64_sme_mop4a", "", [ImmCheck<0, ImmCheck0_1>]>; + defm SVFMOP4S_H : MOP4<"s", "za16", "h", "aarch64_sme_mop4s", "", [ImmCheck<0, ImmCheck0_1>]>; def SVMLA_MULTI_VG1x2_F16 : Inst<"svmla_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_F16 : Inst<"svmla_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; @@ -559,8 +559,8 @@ let SMETargetGuard = "sme-f16f16" in { } let SMETargetGuard = "sme-b16b16" in { - defm SVBMOP4A_H : MOP4<"a", "za16", "bf", "aarch64_sme_mop4a", "">; - defm SVBMOP4S_H : MOP4<"s", "za16", "bf", "aarch64_sme_mop4s", "">; + defm SVBMOP4A_H : MOP4<"a", "za16", "bf", "aarch64_sme_mop4a", "", [ImmCheck<0, ImmCheck0_1>]>; + defm SVBMOP4S_H : MOP4<"s", "za16", "bf", "aarch64_sme_mop4s", "", [ImmCheck<0, ImmCheck0_1>]>; def SVMLA_MULTI_VG1x2_BF16 : Inst<"svmla_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_BF16 : Inst<"svmla_za16[_bf16]_vg1x4", "vm44", "b", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c index 34a9633374d3f..521f7900bacd2 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c @@ -18,448 +18,448 @@ // CHECK-LABEL: @test_svmop4a_1x1_za32_s8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_s8u10__SVInt8_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_s8,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_s8,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_s8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_s8u10__SVInt8_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_s8,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_s8,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_u8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_u8u11__SVUint8_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_u8,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_u8,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_u8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_u8u11__SVUint8_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_u8,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_u8,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_s8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_u8,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_u8,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_s8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_u8,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_u8,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_u8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_s8,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_s8,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_u8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_s8,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_s8,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_s16_s16u11__SVInt16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_s16_s16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za32,_s16_s16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_s16_s16u11__SVInt16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_s16_s16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za32,_s16_s16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_u16_u16u12__SVUint16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_u16_u16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za32,_u16_u16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_u16_u16u12__SVUint16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_u16_u16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za32,_u16_u16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_f16_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_f16_f16u13__SVFloat16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_f16_f16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za32,_f16_f16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_f16_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_f16_f16u13__SVFloat16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_f16_f16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za32,_f16_f16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_bf16_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svmop4a_1x1_za32_bf16_bf16u14__SVBfloat16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_bf16_bf16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za32,_bf16_bf16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_bf16_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svmop4s_1x1_za32_bf16_bf16u14__SVBfloat16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_bf16_bf16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za32,_bf16_bf16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za64_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_s16_s16u11__SVInt16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za64,_s16_s16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za64,_s16_s16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za64_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_s16_s16u11__SVInt16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za64,_s16_s16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za64,_s16_s16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za64_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_u16_u16u12__SVUint16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za64,_u16_u16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za64,_u16_u16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za64_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_u16_u16u12__SVUint16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za64,_u16_u16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za64,_u16_u16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za64_s16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_s16_u16u11__SVInt16_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za64,_s16_u16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za64,_s16_u16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za64_s16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_s16_u16u11__SVInt16_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za64,_s16_u16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za64,_s16_u16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za64_u16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_u16_s16u12__SVUint16_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za64,_u16_s16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za64,_u16_s16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za64_u16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_u16_s16u12__SVUint16_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za64,_u16_s16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za64,_u16_s16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za16_f16_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za16_f16_f16u13__SVFloat16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za16_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za16,_f16_f16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za16,_f16_f16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za16_f16_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8f16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za16_f16_f16u13__SVFloat16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8f16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8f16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za16_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za16,_f16_f16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za16,_f16_f16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_f32_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv4f32(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv4f32(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_f32_f32u13__SVFloat32_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv4f32(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv4f32(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_f32_f32,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za32,_f32_f32,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_f32_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv4f32(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv4f32(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_f32_f32u13__SVFloat32_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv4f32(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv4f32(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_f32_f32,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za32,_f32_f32,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za64_f64_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv2f64(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv2f64(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_f64_f64u13__SVFloat64_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv2f64(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv2f64(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za64,_f64_f64,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za64,_f64_f64,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za64_f64_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv2f64(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv2f64(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_f64_f64u13__SVFloat64_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv2f64(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv2f64(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za64,_f64_f64,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za64,_f64_f64,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za16_bf16_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svmop4a_1x1_za16_bf16_bf16u14__SVBfloat16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za16_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za16,_bf16_bf16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4a_1x1_za16,_bf16_bf16,)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za16_bf16_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svmop4s_1x1_za16_bf16_bf16u14__SVBfloat16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8bf16(i32 3, [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.1x1.nxv8bf16(i32 1, [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za16_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za16,_bf16_bf16,)(3, zn, zm); + SME_ACLE_FUNC(svmop4s_1x1_za16,_bf16_bf16,)(1, zn, zm); } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 9840d36b2c0fc..d9a25bd51ddfc 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -650,7 +650,7 @@ multiclass sme_quarter_outer_product_i64, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8i16>; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_7, nxv8i16>; def _M2ZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 0}, subtr, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>; @@ -5654,7 +5654,7 @@ multiclass sme2_fmop4as_fp16_non_widening { def NAME # _MZZ_H # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8f16>; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_1, nxv8f16>; // Multiple and single vectors def _M2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; @@ -5730,7 +5730,7 @@ multiclass sme2_bfmop4as_non_widening { def NAME # _MZZ_H # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8bf16>; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_1, nxv8bf16>; // Multiple and single vectors def _M2ZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; @@ -5812,7 +5812,7 @@ multiclass sme2_fmop4as_fp64_non_widening { def NAME # _MZZ_D # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv2f64>; + def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_7, nxv2f64>; // Multiple and single vectors def _M2ZZ_D : sme2_fp64_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZPR64Mul2_Hi>; diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll index 7bcf407d23297..6b878c827efaa 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll @@ -10,7 +10,7 @@ define void @mop4a_za32_s8( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4a za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void } @@ -20,7 +20,7 @@ define void @mop4s_za32_s8( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4s za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void } @@ -30,7 +30,7 @@ define void @mop4a_za32_u8( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: umop4a za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void } @@ -40,7 +40,7 @@ define void @mop4s_za32_u8( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: umop4s za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void } @@ -50,7 +50,7 @@ define void @mop4a_za32_s8_u8( %zn, %zm) #0 ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: sumop4a za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void } @@ -60,7 +60,7 @@ define void @mop4s_za32_s8_u8( %zn, %zm) #0 ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: sumop4s za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void } @@ -70,7 +70,7 @@ define void @mop4a_za32_u8_s8( %zn, %zm) #0 ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: usmop4a za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void } @@ -80,7 +80,7 @@ define void @mop4s_za32_u8_s8( %zn, %zm) #0 ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: usmop4s za1.s, z0.b, z24.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void } @@ -90,7 +90,7 @@ define void @mop4a_za32_s16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4a za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -100,7 +100,7 @@ define void @mop4s_za32_s16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4s za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -110,7 +110,7 @@ define void @mop4a_za32_u16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: umop4a za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -120,7 +120,7 @@ define void @mop4s_za32_u16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: umop4s za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -130,7 +130,7 @@ define void @mop4a_za32_f16( %zn, %zm) #0 ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: fmop4a za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 0, %zn, %zm) ret void } @@ -140,7 +140,7 @@ define void @mop4s_za32_f16( %zn, %zm) #0 ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: fmop4s za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 0, %zn, %zm) ret void } @@ -150,7 +150,7 @@ define void @mop4a_za32_bf16( %zn, %z ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: bfmop4a za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 0, %zn, %zm) ret void } @@ -160,7 +160,7 @@ define void @mop4s_za32_bf16( %zn, %z ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: bfmop4s za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 0, %zn, %zm) ret void } @@ -170,7 +170,7 @@ define void @mop4a_za64_s16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4a za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -180,7 +180,7 @@ define void @mop4s_za64_s16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: smop4s za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -190,7 +190,7 @@ define void @mop4a_za64_u16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: umop4a za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -200,7 +200,7 @@ define void @mop4s_za64_u16( %zn, %zm) #0 { ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: umop4s za1.s, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -210,7 +210,7 @@ define void @mop4a_za64_s16_u16( %zn, %zm) ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: sumop4a za1.d, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -220,7 +220,7 @@ define void @mop4s_za64_s16_u16( %zn, %zm) ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: sumop4s za1.d, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -230,7 +230,7 @@ define void @mop4a_za64_u16_s16( %zn, %zm) ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: usmop4a za1.d, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -240,7 +240,7 @@ define void @mop4s_za64_u16_s16( %zn, %zm) ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: usmop4s za1.d, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -251,7 +251,7 @@ define void @mop4a_za16_f16( %zn, %zm) #0 ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: fmop4a za1.h, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 0, %zn, %zm) ret void } @@ -261,7 +261,7 @@ define void @mop4s_za16_f16( %zn, %zm) #0 ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: fmop4s za1.h, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4s.1x1.nxv8f16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.mop4s.1x1.nxv8f16(i32 0, %zn, %zm) ret void } @@ -271,7 +271,7 @@ define void @mop4a_za32_f32( %zn, %zm) ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: fmop4a za1.s, z0.s, z24.s ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.1x1.nxv4f32(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.mop4a.1x1.nxv4f32(i32 0, %zn, %zm) ret void } @@ -281,7 +281,7 @@ define void @mop4s_za32_f32( %zn, %zm) ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: fmop4s za1.s, z0.s, z24.s ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4s.1x1.nxv4f32(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.mop4s.1x1.nxv4f32(i32 0, %zn, %zm) ret void } @@ -291,7 +291,7 @@ define void @mop4a_za64_f64( %zn, %zm ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: fmop4a za1.d, z0.d, z24.d ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.1x1.nxv2f64(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.mop4a.1x1.nxv2f64(i32 0, %zn, %zm) ret void } @@ -301,7 +301,7 @@ define void @mop4s_za64_f64( %zn, %zm ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: fmop4s za1.d, z0.d, z24.d ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4s.1x1.nxv2f64(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.mop4s.1x1.nxv2f64(i32 0, %zn, %zm) ret void } @@ -311,7 +311,7 @@ define void @mop4a_za16_bf16( %zn, %z ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: bfmop4a za1.h, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mop4a.1x1.nxv8bf16(i32 1, %zn, %zm) + call void @llvm.aarch64.sme.mop4a.1x1.nxv8bf16(i32 0, %zn, %zm) ret void } @@ -320,8 +320,100 @@ define void @mop4s_za16_bf16( %zn, %z ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: bfmop4s za1.h, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x1.nxv8bf16(i32 0, %zn, %zm) + ret void +} + +; Tile limits +define void @mop4s_za32_s8_limit( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za1.s, z0.b, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 3, %zn, %zm) + ret void +} + +define void @mop4s_za32_s16_limit( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, %zn, %zm) + ret void +} + +define void @mop4s_za32_f16_limit( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 3, %zn, %zm) + ret void +} + +define void @mop4s_za32_bf16_limit( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4s za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 3, %zn, %zm) + ret void +} + +define void @mop4s_za64_s16_limit( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za64_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 7, %zn, %zm) + ret void +} + +define void @mop4s_za64_f64_limit( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za64_f64_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za7.d, z0.d, z24.d +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x1.nxv2f64(i32 7, %zn, %zm) + ret void +} + +define void @mop4s_za32_f32_limit( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za32_f32_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za3.s, z0.s, z24.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x1.nxv4f32(i32 3, %zn, %zm) + ret void +} + +define void @mop4s_za16_f16_limit( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za16_f16_limit: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4s za1.h, z0.h, z24.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mop4s.1x1.nxv8f16(i32 1, %zn, %zm) + ret void +} + +define void @mop4s_za16_bf16_limit( %zn, %zm) #0 { +; CHECK-LABEL: mop4s_za16_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: bfmop4s za1.h, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4s.1x1.nxv8bf16(i32 1, %zn, %zm) ret void } + attributes #0 = {nounwind "target-features" = "+sme-i16i64,+sme-f64f64,+sme-b16b16,+sme2p1,+bf16,+sme-f16f16,+sme-mop4" } From eb39b7113c928ae739307d4c684dbf887842645b Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Wed, 26 Feb 2025 16:13:26 +0000 Subject: [PATCH 07/13] Fix llvm test --- .../AArch64/sme2-intrinsics-mop4a_1x1.ll | 96 +++++++++---------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll index 6b878c827efaa..ec899fab7cf21 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll @@ -8,7 +8,7 @@ define void @mop4a_za32_s8( %zn, %zm) #0 { ; CHECK-LABEL: mop4a_za32_s8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4a za1.s, z0.b, z24.b +; CHECK-NEXT: smop4a za0.s, z0.b, z24.b ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void @@ -18,7 +18,7 @@ define void @mop4s_za32_s8( %zn, %zm) #0 { ; CHECK-LABEL: mop4s_za32_s8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4s za1.s, z0.b, z24.b +; CHECK-NEXT: smop4s za0.s, z0.b, z24.b ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void @@ -28,7 +28,7 @@ define void @mop4a_za32_u8( %zn, %zm) #0 { ; CHECK-LABEL: mop4a_za32_u8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umop4a za1.s, z0.b, z24.b +; CHECK-NEXT: umop4a za0.s, z0.b, z24.b ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void @@ -38,7 +38,7 @@ define void @mop4s_za32_u8( %zn, %zm) #0 { ; CHECK-LABEL: mop4s_za32_u8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umop4s za1.s, z0.b, z24.b +; CHECK-NEXT: umop4s za0.s, z0.b, z24.b ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void @@ -48,7 +48,7 @@ define void @mop4a_za32_s8_u8( %zn, %zm) #0 ; CHECK-LABEL: mop4a_za32_s8_u8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sumop4a za1.s, z0.b, z24.b +; CHECK-NEXT: sumop4a za0.s, z0.b, z24.b ; CHECK-NEXT: ret call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void @@ -58,7 +58,7 @@ define void @mop4s_za32_s8_u8( %zn, %zm) #0 ; CHECK-LABEL: mop4s_za32_s8_u8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sumop4s za1.s, z0.b, z24.b +; CHECK-NEXT: sumop4s za0.s, z0.b, z24.b ; CHECK-NEXT: ret call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void @@ -68,7 +68,7 @@ define void @mop4a_za32_u8_s8( %zn, %zm) #0 ; CHECK-LABEL: mop4a_za32_u8_s8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: usmop4a za1.s, z0.b, z24.b +; CHECK-NEXT: usmop4a za0.s, z0.b, z24.b ; CHECK-NEXT: ret call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void @@ -78,7 +78,7 @@ define void @mop4s_za32_u8_s8( %zn, %zm) #0 ; CHECK-LABEL: mop4s_za32_u8_s8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: usmop4s za1.s, z0.b, z24.b +; CHECK-NEXT: usmop4s za0.s, z0.b, z24.b ; CHECK-NEXT: ret call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 0, %zn, %zm) ret void @@ -88,7 +88,7 @@ define void @mop4a_za32_s16( %zn, %zm) #0 { ; CHECK-LABEL: mop4a_za32_s16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4a za1.s, z0.h, z24.h +; CHECK-NEXT: smop4a za0.s, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void @@ -98,7 +98,7 @@ define void @mop4s_za32_s16( %zn, %zm) #0 { ; CHECK-LABEL: mop4s_za32_s16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: smop4s za0.s, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void @@ -108,7 +108,7 @@ define void @mop4a_za32_u16( %zn, %zm) #0 { ; CHECK-LABEL: mop4a_za32_u16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umop4a za1.s, z0.h, z24.h +; CHECK-NEXT: umop4a za0.s, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void @@ -118,7 +118,7 @@ define void @mop4s_za32_u16( %zn, %zm) #0 { ; CHECK-LABEL: mop4s_za32_u16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umop4s za1.s, z0.h, z24.h +; CHECK-NEXT: umop4s za0.s, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void @@ -128,7 +128,7 @@ define void @mop4a_za32_f16( %zn, %zm) #0 ; CHECK-LABEL: mop4a_za32_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmop4a za1.s, z0.h, z24.h +; CHECK-NEXT: fmop4a za0.s, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 0, %zn, %zm) ret void @@ -138,7 +138,7 @@ define void @mop4s_za32_f16( %zn, %zm) #0 ; CHECK-LABEL: mop4s_za32_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmop4s za1.s, z0.h, z24.h +; CHECK-NEXT: fmop4s za0.s, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 0, %zn, %zm) ret void @@ -148,7 +148,7 @@ define void @mop4a_za32_bf16( %zn, %z ; CHECK-LABEL: mop4a_za32_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: bfmop4a za1.s, z0.h, z24.h +; CHECK-NEXT: bfmop4a za0.s, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 0, %zn, %zm) ret void @@ -158,7 +158,7 @@ define void @mop4s_za32_bf16( %zn, %z ; CHECK-LABEL: mop4s_za32_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: bfmop4s za1.s, z0.h, z24.h +; CHECK-NEXT: bfmop4s za0.s, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 0, %zn, %zm) ret void @@ -168,9 +168,9 @@ define void @mop4a_za64_s16( %zn, %zm) #0 { ; CHECK-LABEL: mop4a_za64_s16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4a za1.s, z0.h, z24.h +; CHECK-NEXT: smop4a za0.d, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 0, %zn, %zm) + call void @llvm.aarch64.sme.smop4a.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -178,9 +178,9 @@ define void @mop4s_za64_s16( %zn, %zm) #0 { ; CHECK-LABEL: mop4s_za64_s16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: smop4s za0.d, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 0, %zn, %zm) + call void @llvm.aarch64.sme.smop4s.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -188,9 +188,9 @@ define void @mop4a_za64_u16( %zn, %zm) #0 { ; CHECK-LABEL: mop4a_za64_u16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umop4a za1.s, z0.h, z24.h +; CHECK-NEXT: umop4a za0.d, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 0, %zn, %zm) + call void @llvm.aarch64.sme.umop4a.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -198,9 +198,9 @@ define void @mop4s_za64_u16( %zn, %zm) #0 { ; CHECK-LABEL: mop4s_za64_u16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umop4s za1.s, z0.h, z24.h +; CHECK-NEXT: umop4s za0.d, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 0, %zn, %zm) + call void @llvm.aarch64.sme.umop4s.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void } @@ -208,7 +208,7 @@ define void @mop4a_za64_s16_u16( %zn, %zm) ; CHECK-LABEL: mop4a_za64_s16_u16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sumop4a za1.d, z0.h, z24.h +; CHECK-NEXT: sumop4a za0.d, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void @@ -218,7 +218,7 @@ define void @mop4s_za64_s16_u16( %zn, %zm) ; CHECK-LABEL: mop4s_za64_s16_u16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sumop4s za1.d, z0.h, z24.h +; CHECK-NEXT: sumop4s za0.d, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void @@ -228,7 +228,7 @@ define void @mop4a_za64_u16_s16( %zn, %zm) ; CHECK-LABEL: mop4a_za64_u16_s16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: usmop4a za1.d, z0.h, z24.h +; CHECK-NEXT: usmop4a za0.d, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void @@ -238,7 +238,7 @@ define void @mop4s_za64_u16_s16( %zn, %zm) ; CHECK-LABEL: mop4s_za64_u16_s16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: usmop4s za1.d, z0.h, z24.h +; CHECK-NEXT: usmop4s za0.d, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 0, %zn, %zm) ret void @@ -249,7 +249,7 @@ define void @mop4a_za16_f16( %zn, %zm) #0 ; CHECK-LABEL: mop4a_za16_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmop4a za1.h, z0.h, z24.h +; CHECK-NEXT: fmop4a za0.h, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 0, %zn, %zm) ret void @@ -259,7 +259,7 @@ define void @mop4s_za16_f16( %zn, %zm) #0 ; CHECK-LABEL: mop4s_za16_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmop4s za1.h, z0.h, z24.h +; CHECK-NEXT: fmop4s za0.h, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4s.1x1.nxv8f16(i32 0, %zn, %zm) ret void @@ -269,7 +269,7 @@ define void @mop4a_za32_f32( %zn, %zm) ; CHECK-LABEL: mop4a_za32_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmop4a za1.s, z0.s, z24.s +; CHECK-NEXT: fmop4a za0.s, z0.s, z24.s ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4a.1x1.nxv4f32(i32 0, %zn, %zm) ret void @@ -279,7 +279,7 @@ define void @mop4s_za32_f32( %zn, %zm) ; CHECK-LABEL: mop4s_za32_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmop4s za1.s, z0.s, z24.s +; CHECK-NEXT: fmop4s za0.s, z0.s, z24.s ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4s.1x1.nxv4f32(i32 0, %zn, %zm) ret void @@ -289,7 +289,7 @@ define void @mop4a_za64_f64( %zn, %zm ; CHECK-LABEL: mop4a_za64_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmop4a za1.d, z0.d, z24.d +; CHECK-NEXT: fmop4a za0.d, z0.d, z24.d ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4a.1x1.nxv2f64(i32 0, %zn, %zm) ret void @@ -299,7 +299,7 @@ define void @mop4s_za64_f64( %zn, %zm ; CHECK-LABEL: mop4s_za64_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmop4s za1.d, z0.d, z24.d +; CHECK-NEXT: fmop4s za0.d, z0.d, z24.d ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4s.1x1.nxv2f64(i32 0, %zn, %zm) ret void @@ -309,7 +309,7 @@ define void @mop4a_za16_bf16( %zn, %z ; CHECK-LABEL: mop4a_za16_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: bfmop4a za1.h, z0.h, z24.h +; CHECK-NEXT: bfmop4a za0.h, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4a.1x1.nxv8bf16(i32 0, %zn, %zm) ret void @@ -319,7 +319,7 @@ define void @mop4s_za16_bf16( %zn, %z ; CHECK-LABEL: mop4s_za16_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: bfmop4s za1.h, z0.h, z24.h +; CHECK-NEXT: bfmop4s za0.h, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4s.1x1.nxv8bf16(i32 0, %zn, %zm) ret void @@ -327,52 +327,52 @@ define void @mop4s_za16_bf16( %zn, %z ; Tile limits define void @mop4s_za32_s8_limit( %zn, %zm) #0 { -; CHECK-LABEL: mop4s_za32_s8: +; CHECK-LABEL: mop4s_za32_s8_limit: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4s za1.s, z0.b, z24.b +; CHECK-NEXT: smop4s za3.s, z0.b, z24.b ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv16i8(i32 3, %zn, %zm) ret void } define void @mop4s_za32_s16_limit( %zn, %zm) #0 { -; CHECK-LABEL: mop4s_za32_s16: +; CHECK-LABEL: mop4s_za32_s16_limit: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: smop4s za3.s, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, %zn, %zm) ret void } define void @mop4s_za32_f16_limit( %zn, %zm) #0 { -; CHECK-LABEL: mop4s_za32_f16: +; CHECK-LABEL: mop4s_za32_f16_limit: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmop4s za1.s, z0.h, z24.h +; CHECK-NEXT: fmop4s za3.s, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 3, %zn, %zm) ret void } define void @mop4s_za32_bf16_limit( %zn, %zm) #0 { -; CHECK-LABEL: mop4s_za32_bf16: +; CHECK-LABEL: mop4s_za32_bf16_limit: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: bfmop4s za1.s, z0.h, z24.h +; CHECK-NEXT: bfmop4s za3.s, z0.h, z24.h ; CHECK-NEXT: ret call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 3, %zn, %zm) ret void } define void @mop4s_za64_s16_limit( %zn, %zm) #0 { -; CHECK-LABEL: mop4s_za64_s16: +; CHECK-LABEL: mop4s_za64_s16_limit: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smop4s za1.s, z0.h, z24.h +; CHECK-NEXT: smop4s za7.d, z0.h, z24.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 7, %zn, %zm) + call void @llvm.aarch64.sme.smop4s.za64.wide.1x1.nxv8i16(i32 7, %zn, %zm) ret void } @@ -407,7 +407,7 @@ define void @mop4s_za16_f16_limit( %zn, % } define void @mop4s_za16_bf16_limit( %zn, %zm) #0 { -; CHECK-LABEL: mop4s_za16_bf16: +; CHECK-LABEL: mop4s_za16_bf16_limit: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: bfmop4s za1.h, z0.h, z24.h From 3b1f6674b21fe7c8a24f271be25b72da889f29ff Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Fri, 28 Feb 2025 09:25:53 +0000 Subject: [PATCH 08/13] Fix typo in QuarterTile --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 58 +++++++++++------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index eeea8d77d1e16..f8ae83cb2e96b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3064,40 +3064,40 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_usmopa_wide : SME_OuterProduct_Intrinsic; def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic; - class SME_OuterProduct_QuaterTile_Single + class SME_OuterProduct_QuarterTile_Single_Single : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], [ImmArg>]>; - def int_aarch64_sme_mop4a_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_mop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_mop4a_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_mop4s_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_smop4a_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_smop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_smop4a_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_smop4s_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_umop4a_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_umop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_umop4a_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_umop4s_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_sumop4a_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_sumop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_sumop4a_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_sumop4s_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_usmop4a_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_usmop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_usmop4a_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_usmop4s_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_smop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_smop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_umop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_umop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_sumop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_sumop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_usmop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; - def int_aarch64_sme_usmop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single; + def int_aarch64_sme_mop4a_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_mop4s_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_mop4a_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_mop4s_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_smop4a_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_smop4s_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_smop4a_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_smop4s_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_umop4a_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_umop4s_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_umop4a_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_umop4s_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_sumop4a_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_sumop4s_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_sumop4a_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_sumop4s_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_usmop4a_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_usmop4s_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_usmop4a_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_usmop4s_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_smop4a_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_smop4s_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_umop4a_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_umop4s_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_sumop4a_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_sumop4s_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_usmop4a_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_usmop4s_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], From 51ab5858aa6bb870d1bac3879244fa53adbffe76 Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Wed, 5 Mar 2025 15:13:03 +0000 Subject: [PATCH 09/13] Restructure files and add negative tests --- clang/include/clang/Basic/arm_sme.td | 137 +++++++++++------- .../sme2-intrinsics/acle_sme2_mop4_1x1.c | 10 +- .../acle_sme2p2_imm.cpp | 84 +++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 35 ++--- llvm/lib/Target/AArch64/SMEInstrFormats.td | 20 +-- 5 files changed, 191 insertions(+), 95 deletions(-) create mode 100644 clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_imm.cpp diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index ff42f110f72cb..6f127c7f4af61 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -289,6 +289,88 @@ multiclass ZAFPOuterProd { defm SVMOPA : ZAFPOuterProd<"mopa">; defm SVMOPS : ZAFPOuterProd<"mops">; +//////////////////////////////////////////////////////////////////////////////// +// SME2 - FMOP4A, FMOP4S, BFMOP4A, BFMOP4S + +multiclass MOP4 checks> { + def _1x1 : Inst<"svmop4" # mode # "[_1x1]_" # za # "[_{d}_{d}]", "vidd", t, MergeNone, i # "_1x1", [IsInOutZA, IsStreaming], checks>; +} + +let SMETargetGuard = "sme2,sme-mop4" in { + + defm SVFMOP4A_HtoS : MOP4<"a", "za32", "hb", "aarch64_sme_mop4a_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVFMOP4S_HtoS : MOP4<"s", "za32", "hb", "aarch64_sme_mop4s_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVFMOP4A_S : MOP4<"a", "za32", "f", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_3>]>; + defm SVFMOP4S_S : MOP4<"s", "za32", "f", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_3>]>; +} + +let SMETargetGuard = "sme2,sme-mop4,sme-f64f64" in { + defm SVFMOP4A_D : MOP4<"a", "za64", "d", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_7>]>; + defm SVFMOP4S_D : MOP4<"s", "za64", "d", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_7>]>; +} + +let SMETargetGuard = "sme2,sme-mop4,sme-f16f16" in { + defm SVFMOP4A_H : MOP4<"a", "za16", "h", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_1>]>; + defm SVFMOP4S_H : MOP4<"s", "za16", "h", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_1>]>; +} + +let SMETargetGuard = "sme2,sme-mop4,sme-b16b16" in { + defm SVBMOP4A_H : MOP4<"a", "za16", "b", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_1>]>; + defm SVBMOP4S_H : MOP4<"s", "za16", "b", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_1>]>; +} + +//////////////////////////////////////////////////////////////////////////////// +// SME2 - SMOP4A, SMOP4S, UMOP4A, UMOP4S + +let SMETargetGuard = "sme2,sme-mop4" in { + defm SVSMOP4A_H : MOP4<"a", "za32", "cs", "aarch64_sme_smop4a_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVSMOP4S_H : MOP4<"s", "za32", "cs", "aarch64_sme_smop4s_wide", [ImmCheck<0, ImmCheck0_3>]>; + + defm SVUMOP4A_H : MOP4<"a", "za32", "UcUs", "aarch64_sme_umop4a_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVUMOP4S_H : MOP4<"s", "za32", "UcUs", "aarch64_sme_umop4s_wide", [ImmCheck<0, ImmCheck0_3>]>; +} + +let SMETargetGuard = "sme2,sme-mop4,sme-i16i64" in { + defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; + defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; + + defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; + defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; +} + +//////////////////////////////////////////////////////////////////////////////// +// SME2 - SUMOP4A, SUMOP4S, USMOP4A, USMOP4S + +multiclass SUMOP4 checks> { + def _1x1 : SInst<"svmop4" # mode # "[_1x1]_" # za # "[_{d}_{3}]", + "vidu", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_1x1", + [IsStreaming, IsInOutZA], + checks>; +} + +multiclass USMOP4 checks> { + def _1x1 : SInst<"svmop4" # mode # "[_1x1]_" # za # "[_{d}_{3}]", + "vidx", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_1x1", + [IsStreaming, IsInOutZA], + checks>; +} + +let SMETargetGuard = "sme2,sme-mop4" in { + defm SVSUMOP4A_S : SUMOP4<"a", "za32", "c", "", [ImmCheck<0, ImmCheck0_3>]>; + defm SVSUMOP4S_S : SUMOP4<"s", "za32", "c", "", [ImmCheck<0, ImmCheck0_3>]>; + + defm SVUSMOP4A_S : USMOP4<"a", "za32", "Uc", "", [ImmCheck<0, ImmCheck0_3>]>; + defm SVUSMOP4S_S : USMOP4<"s", "za32", "Uc", "", [ImmCheck<0, ImmCheck0_3>]>; +} + +let SMETargetGuard = "sme2,sme-mop4,sme-i16i64" in { + defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s", "_za64", [ImmCheck<0, ImmCheck0_7>]>; + defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s", "_za64", [ImmCheck<0, ImmCheck0_7>]>; + + defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us", "_za64", [ImmCheck<0, ImmCheck0_7>]>; + defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us", "_za64", [ImmCheck<0, ImmCheck0_7>]>; +} + //////////////////////////////////////////////////////////////////////////////// // SME2 - ADD, SUB @@ -376,24 +458,6 @@ let SMETargetGuard = "sme2" in { // Outer product and accumulate/subtract // -multiclass MOP4 checks> { - def NAME # "_1x1" : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], checks>; -} - -multiclass SUMOP4 checks> { - def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{d}_{3}]", - "vidu", t, MergeNone, "aarch64_sme_sumop4" # s # i # "_wide_1x1", - [IsStreaming, IsInOutZA], - checks>; -} - -multiclass USMOP4 checks> { - def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{d}_{3}]", - "vidx", t, MergeNone, "aarch64_sme_usmop4" # s # i # "_wide_1x1", - [IsStreaming, IsInOutZA], - checks>; -} - let SMETargetGuard = "sme2" in { def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; @@ -405,25 +469,6 @@ let SMETargetGuard = "sme2" in { def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; - defm SVSMOP4A_H : MOP4<"a", "za32", "cs", "aarch64_sme_smop4a", "_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVSMOP4S_H : MOP4<"s", "za32", "cs", "aarch64_sme_smop4s", "_wide", [ImmCheck<0, ImmCheck0_3>]>; - - defm SVUMOP4A_H : MOP4<"a", "za32", "UcUs", "aarch64_sme_umop4a", "_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVUMOP4S_H : MOP4<"s", "za32", "UcUs", "aarch64_sme_umop4s", "_wide", [ImmCheck<0, ImmCheck0_3>]>; - - defm SVFMOP4A_HtoS : MOP4<"a", "za32", "h", "aarch64_sme_mop4a", "_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVFMOP4S_HtoS : MOP4<"s", "za32", "h", "aarch64_sme_mop4s", "_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVFMOP4A_S : MOP4<"a", "za32", "f", "aarch64_sme_mop4a", "", [ImmCheck<0, ImmCheck0_3>]>; - defm SVFMOP4S_S : MOP4<"s", "za32", "f", "aarch64_sme_mop4s", "", [ImmCheck<0, ImmCheck0_3>]>; - - defm SVBMOP4A_S : MOP4<"a", "za32", "b", "aarch64_sme_mop4a", "_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVBMOP4S_S : MOP4<"s", "za32", "b", "aarch64_sme_mop4s", "_wide", [ImmCheck<0, ImmCheck0_3>]>; - - defm SVSUMOP4A_S : SUMOP4<"a", "za32", "cs", "", [ImmCheck<0, ImmCheck0_3>]>; - defm SVSUMOP4S_S : SUMOP4<"s", "za32", "cs", "", [ImmCheck<0, ImmCheck0_3>]>; - defm SVUSMOP4A_S : USMOP4<"a", "za32", "UcUs", "", [ImmCheck<0, ImmCheck0_3>]>; - defm SVUSMOP4S_S : USMOP4<"s", "za32", "UcUs", "", [ImmCheck<0, ImmCheck0_3>]>; - // VERTICAL DOT-PRODUCT def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; def SVVDOT_LANE_ZA32_VG1x4_S : Inst<"svvdot_lane_za32[_{d}]_vg1x4", "vm4di", "c", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; @@ -474,15 +519,6 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-i16i64" in { - defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a_za64", "_wide", [ImmCheck<0, ImmCheck0_7>]>; - defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s_za64", "_wide", [ImmCheck<0, ImmCheck0_7>]>; - defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a_za64", "_wide", [ImmCheck<0, ImmCheck0_7>]>; - defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s_za64", "_wide", [ImmCheck<0, ImmCheck0_7>]>; - defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s", "_za64", [ImmCheck<0, ImmCheck0_7>]>; - defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s", "_za64", [ImmCheck<0, ImmCheck0_7>]>; - defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us", "_za64", [ImmCheck<0, ImmCheck0_7>]>; - defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us", "_za64", [ImmCheck<0, ImmCheck0_7>]>; - def SVVDOT_LANE_ZA64_VG1x4_S : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "s", MergeNone, "aarch64_sme_svdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; def SVVDOT_LANE_ZA64_VG1x4_U : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; @@ -519,9 +555,6 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-f64f64" in { - defm SVFMOP4A_D : MOP4<"a", "za64", "d", "aarch64_sme_mop4a", "", [ImmCheck<0, ImmCheck0_7>]>; - defm SVFMOP4S_D : MOP4<"s", "za64", "d", "aarch64_sme_mop4s", "", [ImmCheck<0, ImmCheck0_7>]>; - def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; @@ -539,9 +572,6 @@ let SMETargetGuard = "sme2,sme-f64f64" in { } let SMETargetGuard = "sme-f16f16" in { - defm SVFMOP4A_H : MOP4<"a", "za16", "h", "aarch64_sme_mop4a", "", [ImmCheck<0, ImmCheck0_1>]>; - defm SVFMOP4S_H : MOP4<"s", "za16", "h", "aarch64_sme_mop4s", "", [ImmCheck<0, ImmCheck0_1>]>; - def SVMLA_MULTI_VG1x2_F16 : Inst<"svmla_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_F16 : Inst<"svmla_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; def SVMLS_MULTI_VG1x2_F16 : Inst<"svmls_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; @@ -559,9 +589,6 @@ let SMETargetGuard = "sme-f16f16" in { } let SMETargetGuard = "sme-b16b16" in { - defm SVBMOP4A_H : MOP4<"a", "za16", "bf", "aarch64_sme_mop4a", "", [ImmCheck<0, ImmCheck0_1>]>; - defm SVBMOP4S_H : MOP4<"s", "za16", "bf", "aarch64_sme_mop4s", "", [ImmCheck<0, ImmCheck0_1>]>; - def SVMLA_MULTI_VG1x2_BF16 : Inst<"svmla_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_BF16 : Inst<"svmla_za16[_bf16]_vg1x4", "vm44", "b", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; def SVMLS_MULTI_VG1x2_BF16 : Inst<"svmls_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c index 521f7900bacd2..58cc758311f20 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_imm.cpp b/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_imm.cpp new file mode 100644 index 0000000000000..556cb1742dbbd --- /dev/null +++ b/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_imm.cpp @@ -0,0 +1,84 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -target-feature +sme2p2 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -fsyntax-only -verify %s + +// REQUIRES: aarch64-registered-target + +#include + +void tests_mop4_imm_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x1_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x1_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x1_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4a_1x1_za32_u8_s8(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x1_za32_u8_s8(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x1_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + + svmop4a_1x1_za64_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svmop4s_1x1_za64_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + return; +} + +void tests_mop4_imm_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x1_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + + svmop4a_1x1_za64_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svmop4s_1x1_za64_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + return; +} + +void tests_mop4_imm_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svmop4s_1x1_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svmop4a_1x1_za64_u16_s16(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svmop4s_1x1_za64_u16_s16(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + return; +} + +void tests_mop4_imm_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x1_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + + svmop4a_1x1_za16_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4s_1x1_za16_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + return; +} + +void tests_mop4_imm_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x1_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + + svmop4a_1x1_za16_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4s_1x1_za16_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + return; + +} + +void tests_mop4_imm_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svmop4s_1x1_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svmop4s_1x1_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + return; +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index f8ae83cb2e96b..211e9ce2f08a6 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3068,36 +3068,21 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, - LLVMMatchType<0>], [ImmArg>]>; + LLVMMatchType<0>], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; + + // 2-way and 4-way multi-vector signed/unsigned Quater Tile Quarter Product A/S + foreach mode = ["s", "a"] in { + foreach za = ["", "_za64"] in { + foreach ty = ["s", "u", "su", "us"] in { + def int_aarch64_sme_ # ty # "mop4" # mode # za # "_wide_1x1" : SME_OuterProduct_QuarterTile_Single_Single; + } + } + } def int_aarch64_sme_mop4a_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; def int_aarch64_sme_mop4s_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; def int_aarch64_sme_mop4a_1x1 : SME_OuterProduct_QuarterTile_Single_Single; def int_aarch64_sme_mop4s_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_smop4a_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_smop4s_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_smop4a_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_smop4s_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_umop4a_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_umop4s_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_umop4a_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_umop4s_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_sumop4a_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_sumop4s_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_sumop4a_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_sumop4s_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_usmop4a_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_usmop4s_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_usmop4a_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_usmop4s_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_smop4a_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_smop4s_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_umop4a_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_umop4s_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_sumop4a_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_sumop4s_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_usmop4a_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_usmop4s_za64_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index d9a25bd51ddfc..1544650be9be4 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -104,7 +104,7 @@ class sme_outer_product_pseudo let usesCustomInserter = 1; } -class sme2_quarter_tile_outer_product_pseudo_single_single +class sme2_quarter_tile_outer_product_pseudo : Pseudo<(outs), (ins i32imm:$tile, zn_ty:$zn, zm_ty:$zm), []>, Sched<[]> { @@ -616,7 +616,7 @@ multiclass sme_quarter_outer_product_i8_i32, SMEPseudo2Instr; - def NAME # _MZZ_BToS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; + def NAME # _MZZ_BToS_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv16i8>; @@ -632,7 +632,7 @@ multiclass sme_quarter_outer_product_i16_i32, SMEPseudo2Instr; - def NAME # _MZZ_HToS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; + def NAME # _MZZ_HToS_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8i16>; @@ -648,7 +648,7 @@ multiclass sme_quarter_outer_product_i64, SMEPseudo2Instr; - def NAME # _MZZ_HtoD # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; + def NAME # _MZZ_HtoD_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_7, nxv8i16>; @@ -5501,7 +5501,7 @@ multiclass sme2_bfmop4as_widening { // Single vectors def _MZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_S # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; + def NAME # _MZZ_S_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8bf16>; @@ -5652,7 +5652,7 @@ multiclass sme2_fmop4as_fp16_non_widening { // Single vectors def _MZZ_H : sme2_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_H # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; + def NAME # _MZZ_H_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_1, nxv8f16>; @@ -5728,7 +5728,7 @@ multiclass sme2_bfmop4as_non_widening { // Single vectors def _MZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_H # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; + def NAME # _MZZ_H_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_1, nxv8bf16>; @@ -5769,7 +5769,7 @@ multiclass sme2_fmop4as_fp32_non_widening { // Single vectors def _MZZ_S : sme2_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR32Mul2_Lo, ZPR32Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_S # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; + def NAME # _MZZ_S_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv4f32>; @@ -5810,7 +5810,7 @@ multiclass sme2_fmop4as_fp64_non_widening { // Single vectors def _MZZ_D : sme2_fp64_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR64Mul2_Lo, ZPR64Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_D # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; + def NAME # _MZZ_D_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_7, nxv2f64>; @@ -5851,7 +5851,7 @@ multiclass sme2_fmop4as_fp16_fp32_widening { // Single vectors def _MZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>, SMEPseudo2Instr; - def NAME # _MZZ_HtoS # _PSEUDO : sme2_quarter_tile_outer_product_pseudo_single_single, SMEPseudo2Instr; + def NAME # _MZZ_HtoS_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8f16>; From 6e8e25f1dead05d85bc631eb5f11594efb62022f Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Wed, 5 Mar 2025 16:05:05 +0000 Subject: [PATCH 10/13] For loop the intrinsics --- clang/include/clang/Basic/arm_sme.td | 1 - llvm/include/llvm/IR/IntrinsicsAArch64.td | 12 +++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 6f127c7f4af61..e9cff245c48db 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -297,7 +297,6 @@ multiclass MOP4 check } let SMETargetGuard = "sme2,sme-mop4" in { - defm SVFMOP4A_HtoS : MOP4<"a", "za32", "hb", "aarch64_sme_mop4a_wide", [ImmCheck<0, ImmCheck0_3>]>; defm SVFMOP4S_HtoS : MOP4<"s", "za32", "hb", "aarch64_sme_mop4s_wide", [ImmCheck<0, ImmCheck0_3>]>; defm SVFMOP4A_S : MOP4<"a", "za32", "f", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_3>]>; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 211e9ce2f08a6..fe8769154b1da 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3070,7 +3070,7 @@ let TargetPrefix = "aarch64" in { llvm_anyvector_ty, LLVMMatchType<0>], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; - // 2-way and 4-way multi-vector signed/unsigned Quater Tile Quarter Product A/S + // 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S foreach mode = ["s", "a"] in { foreach za = ["", "_za64"] in { foreach ty = ["s", "u", "su", "us"] in { @@ -3079,10 +3079,12 @@ let TargetPrefix = "aarch64" in { } } - def int_aarch64_sme_mop4a_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_mop4s_wide_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_mop4a_1x1 : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_mop4s_1x1 : SME_OuterProduct_QuarterTile_Single_Single; + // 2-way and 4-way multi-vector floating point Quarter Tile Quarter Product A/S + foreach mode = ["s", "a"] in { + foreach wide = ["", "_wide"] in { + def int_aarch64_sme_mop4 # mode # wide # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single; + } + } class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], From 1fa3a353f8ac5fc4e79e33a604ec203a4f94b3ff Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Fri, 7 Mar 2025 10:33:15 +0000 Subject: [PATCH 11/13] Change pattern name --- llvm/lib/Target/AArch64/SMEInstrFormats.td | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 1544650be9be4..54c63ead059ae 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -266,7 +266,7 @@ class SME2_Tile_Movaz_Pat(name # _PSEUDO) $tile, $base, $offset)>; -class SME2_ZA_Tile_TwoVec_Pat +class SME2_ZA_Tile_Vec_Single_Single_Pat : Pat<(intrinsic imm_ty:$tile, vt:$Zn, vt:$Zm), (!cast(name # _PSEUDO) $tile, $Zn, $Zm)>; @@ -618,7 +618,7 @@ multiclass sme_quarter_outer_product_i8_i32, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv16i8>; + def : SME2_ZA_Tile_Vec_Single_Single_Pat(op # "_1x1"), timm32_0_3, nxv16i8>; def _M2ZZ_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 0}, subtr, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi, mnemonic>; @@ -634,7 +634,7 @@ multiclass sme_quarter_outer_product_i16_i32, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8i16>; + def : SME2_ZA_Tile_Vec_Single_Single_Pat(op # "_1x1"), timm32_0_3, nxv8i16>; def _M2ZZ_HToS : sme_quarter_outer_product_i16_i32; @@ -650,7 +650,7 @@ multiclass sme_quarter_outer_product_i64, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_7, nxv8i16>; + def : SME2_ZA_Tile_Vec_Single_Single_Pat(op # "_1x1"), timm32_0_7, nxv8i16>; def _M2ZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 0}, subtr, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>; @@ -5503,7 +5503,7 @@ multiclass sme2_bfmop4as_widening { def NAME # _MZZ_S_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8bf16>; + def : SME2_ZA_Tile_Vec_Single_Single_Pat(op # "_1x1"), timm32_0_3, nxv8bf16>; // Multiple and single vectors def _M2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; @@ -5654,7 +5654,7 @@ multiclass sme2_fmop4as_fp16_non_widening { def NAME # _MZZ_H_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_1, nxv8f16>; + def : SME2_ZA_Tile_Vec_Single_Single_Pat(op # "_1x1"), timm32_0_1, nxv8f16>; // Multiple and single vectors def _M2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; @@ -5730,7 +5730,7 @@ multiclass sme2_bfmop4as_non_widening { def NAME # _MZZ_H_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_1, nxv8bf16>; + def : SME2_ZA_Tile_Vec_Single_Single_Pat(op # "_1x1"), timm32_0_1, nxv8bf16>; // Multiple and single vectors def _M2ZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; @@ -5771,7 +5771,7 @@ multiclass sme2_fmop4as_fp32_non_widening { def NAME # _MZZ_S_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv4f32>; + def : SME2_ZA_Tile_Vec_Single_Single_Pat(op # "_1x1"), timm32_0_3, nxv4f32>; // Multiple and single vectors def _M2ZZ_S : sme2_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZPR32Mul2_Hi>; @@ -5812,7 +5812,7 @@ multiclass sme2_fmop4as_fp64_non_widening { def NAME # _MZZ_D_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_7, nxv2f64>; + def : SME2_ZA_Tile_Vec_Single_Single_Pat(op # "_1x1"), timm32_0_7, nxv2f64>; // Multiple and single vectors def _M2ZZ_D : sme2_fp64_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZPR64Mul2_Hi>; @@ -5853,7 +5853,7 @@ multiclass sme2_fmop4as_fp16_fp32_widening { def NAME # _MZZ_HtoS_PSEUDO : sme2_quarter_tile_outer_product_pseudo, SMEPseudo2Instr; - def : SME2_ZA_Tile_TwoVec_Pat(op # "_1x1"), timm32_0_3, nxv8f16>; + def : SME2_ZA_Tile_Vec_Single_Single_Pat(op # "_1x1"), timm32_0_3, nxv8f16>; // Multiple and single vectors def _M2ZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; From 9cc55eb3adeaa586f89e242c75c55a1bdd70e4ba Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Tue, 25 Mar 2025 17:56:03 +0000 Subject: [PATCH 12/13] fix optional 1x1 in tests --- clang/include/clang/Basic/arm_sme.td | 38 +++++------ .../sme2-intrinsics/acle_sme2_mop4_1x1.c | 68 +++++++++---------- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index e9cff245c48db..065a86ae90536 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -293,48 +293,48 @@ defm SVMOPS : ZAFPOuterProd<"mops">; // SME2 - FMOP4A, FMOP4S, BFMOP4A, BFMOP4S multiclass MOP4 checks> { - def _1x1 : Inst<"svmop4" # mode # "[_1x1]_" # za # "[_{d}_{d}]", "vidd", t, MergeNone, i # "_1x1", [IsInOutZA, IsStreaming], checks>; + def _1x1 : Inst<"svmop4" # mode # "[_1x1]" # za # "[_{d}_{d}]", "vidd", t, MergeNone, i # "_1x1", [IsInOutZA, IsStreaming], checks>; } let SMETargetGuard = "sme2,sme-mop4" in { - defm SVFMOP4A_HtoS : MOP4<"a", "za32", "hb", "aarch64_sme_mop4a_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVFMOP4S_HtoS : MOP4<"s", "za32", "hb", "aarch64_sme_mop4s_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVFMOP4A_S : MOP4<"a", "za32", "f", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_3>]>; - defm SVFMOP4S_S : MOP4<"s", "za32", "f", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_3>]>; + defm SVFMOP4A_HtoS : MOP4<"a", "_za32", "hb", "aarch64_sme_mop4a_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVFMOP4S_HtoS : MOP4<"s", "_za32", "hb", "aarch64_sme_mop4s_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVFMOP4A_S : MOP4<"a", "_za32", "f", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_3>]>; + defm SVFMOP4S_S : MOP4<"s", "_za32", "f", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-mop4,sme-f64f64" in { - defm SVFMOP4A_D : MOP4<"a", "za64", "d", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_7>]>; - defm SVFMOP4S_D : MOP4<"s", "za64", "d", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_7>]>; + defm SVFMOP4A_D : MOP4<"a", "_za64", "d", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_7>]>; + defm SVFMOP4S_D : MOP4<"s", "_za64", "d", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_7>]>; } let SMETargetGuard = "sme2,sme-mop4,sme-f16f16" in { - defm SVFMOP4A_H : MOP4<"a", "za16", "h", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_1>]>; - defm SVFMOP4S_H : MOP4<"s", "za16", "h", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_1>]>; + defm SVFMOP4A_H : MOP4<"a", "_za16", "h", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_1>]>; + defm SVFMOP4S_H : MOP4<"s", "_za16", "h", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_1>]>; } let SMETargetGuard = "sme2,sme-mop4,sme-b16b16" in { - defm SVBMOP4A_H : MOP4<"a", "za16", "b", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_1>]>; - defm SVBMOP4S_H : MOP4<"s", "za16", "b", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_1>]>; + defm SVBMOP4A_H : MOP4<"a", "_za16", "b", "aarch64_sme_mop4a", [ImmCheck<0, ImmCheck0_1>]>; + defm SVBMOP4S_H : MOP4<"s", "_za16", "b", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_1>]>; } //////////////////////////////////////////////////////////////////////////////// // SME2 - SMOP4A, SMOP4S, UMOP4A, UMOP4S let SMETargetGuard = "sme2,sme-mop4" in { - defm SVSMOP4A_H : MOP4<"a", "za32", "cs", "aarch64_sme_smop4a_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVSMOP4S_H : MOP4<"s", "za32", "cs", "aarch64_sme_smop4s_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVSMOP4A_H : MOP4<"a", "_za32", "cs", "aarch64_sme_smop4a_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVSMOP4S_H : MOP4<"s", "_za32", "cs", "aarch64_sme_smop4s_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVUMOP4A_H : MOP4<"a", "za32", "UcUs", "aarch64_sme_umop4a_wide", [ImmCheck<0, ImmCheck0_3>]>; - defm SVUMOP4S_H : MOP4<"s", "za32", "UcUs", "aarch64_sme_umop4s_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVUMOP4A_H : MOP4<"a", "_za32", "UcUs", "aarch64_sme_umop4a_wide", [ImmCheck<0, ImmCheck0_3>]>; + defm SVUMOP4S_H : MOP4<"s", "_za32", "UcUs", "aarch64_sme_umop4s_wide", [ImmCheck<0, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-mop4,sme-i16i64" in { - defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; - defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; + defm SVSMOP4A_HtoD : MOP4<"a", "_za64", "s", "aarch64_sme_smop4a_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; + defm SVSMOP4S_HtoD : MOP4<"s", "_za64", "s", "aarch64_sme_smop4s_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; - defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; - defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; + defm SVUMOP4A_HtoD : MOP4<"a", "_za64", "Us", "aarch64_sme_umop4a_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; + defm SVUMOP4S_HtoD : MOP4<"s", "_za64", "Us", "aarch64_sme_umop4s_za64_wide", [ImmCheck<0, ImmCheck0_7>]>; } //////////////////////////////////////////////////////////////////////////////// diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c index 58cc758311f20..94a839d053479 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c @@ -11,9 +11,9 @@ #include #ifdef SME_OVERLOADED_FORMS -#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3, A4_UNUSED) A1##A3 #else -#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#define SME_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif // CHECK-LABEL: @test_svmop4a_1x1_za32_s8_s8( @@ -27,7 +27,7 @@ // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_s8,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za32,_s8_s8)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_s8_s8( @@ -41,7 +41,7 @@ void test_svmop4a_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_s8,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za32,_s8_s8)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_u8_u8( @@ -55,7 +55,7 @@ void test_svmop4s_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_u8,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za32,_u8_u8)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_u8_u8( @@ -69,7 +69,7 @@ void test_svmop4a_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __a // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_u8,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za32,_u8_u8)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_s8_u8( @@ -83,7 +83,7 @@ void test_svmop4s_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __a // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_u8,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za32,_s8_u8)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_s8_u8( @@ -97,7 +97,7 @@ void test_svmop4a_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __ar // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_u8,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za32,_s8_u8)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_u8_s8( @@ -111,7 +111,7 @@ void test_svmop4s_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __ar // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_s8,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za32,_u8_s8)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_u8_s8( @@ -125,7 +125,7 @@ void test_svmop4a_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __ar // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_s8,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za32,_u8_s8)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_s16_s16( @@ -139,7 +139,7 @@ void test_svmop4s_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __ar // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_s16_s16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za32,_s16_s16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_s16_s16( @@ -153,7 +153,7 @@ void test_svmop4a_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming _ // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_s16_s16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za32,_s16_s16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_u16_u16( @@ -167,7 +167,7 @@ void test_svmop4s_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming _ // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_u16_u16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za32,_u16_u16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_u16_u16( @@ -181,7 +181,7 @@ void test_svmop4a_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_u16_u16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za32,_u16_u16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_f16_f16( @@ -195,7 +195,7 @@ void test_svmop4s_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_f16_f16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za32,_f16_f16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_f16_f16( @@ -209,7 +209,7 @@ void test_svmop4a_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streami // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_f16_f16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za32,_f16_f16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_bf16_bf16( @@ -223,7 +223,7 @@ void test_svmop4s_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streami // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_bf16_bf16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za32,_bf16_bf16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_bf16_bf16( @@ -237,7 +237,7 @@ void test_svmop4a_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_str // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_bf16_bf16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za32,_bf16_bf16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za64_s16_s16( @@ -251,7 +251,7 @@ void test_svmop4s_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_str // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za64,_s16_s16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za64,_s16_s16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za64_s16_s16( @@ -265,7 +265,7 @@ void test_svmop4a_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming _ // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za64,_s16_s16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za64,_s16_s16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za64_u16_u16( @@ -279,7 +279,7 @@ void test_svmop4s_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming _ // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za64,_u16_u16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za64,_u16_u16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za64_u16_u16( @@ -293,7 +293,7 @@ void test_svmop4a_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za64,_u16_u16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za64,_u16_u16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za64_s16_u16( @@ -307,7 +307,7 @@ void test_svmop4s_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za64,_s16_u16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za64,_s16_u16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za64_s16_u16( @@ -321,7 +321,7 @@ void test_svmop4a_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za64,_s16_u16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za64,_s16_u16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za64_u16_s16( @@ -335,7 +335,7 @@ void test_svmop4s_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za64,_u16_s16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za64,_u16_s16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za64_u16_s16( @@ -349,7 +349,7 @@ void test_svmop4a_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za64,_u16_s16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za64,_u16_s16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za16_f16_f16( @@ -363,7 +363,7 @@ void test_svmop4s_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za16_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za16,_f16_f16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za16,_f16_f16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za16_f16_f16( @@ -377,7 +377,7 @@ void test_svmop4a_1x1_za16_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streami // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za16_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za16,_f16_f16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za16,_f16_f16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za32_f32_f32( @@ -391,7 +391,7 @@ void test_svmop4s_1x1_za16_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streami // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_f32_f32,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za32,_f32_f32)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za32_f32_f32( @@ -405,7 +405,7 @@ void test_svmop4a_1x1_za32_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streami // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za32_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za32,_f32_f32,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za32,_f32_f32)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za64_f64_f64( @@ -419,7 +419,7 @@ void test_svmop4s_1x1_za32_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streami // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za64_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za64,_f64_f64,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za64,_f64_f64)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za64_f64_f64( @@ -433,7 +433,7 @@ void test_svmop4a_1x1_za64_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streami // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za64_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za64,_f64_f64,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za64,_f64_f64)(1, zn, zm); } // CHECK-LABEL: @test_svmop4a_1x1_za16_bf16_bf16( @@ -447,7 +447,7 @@ void test_svmop4s_1x1_za64_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streami // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za16_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za16,_bf16_bf16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4a,_1x1,_za16,_bf16_bf16)(1, zn, zm); } // CHECK-LABEL: @test_svmop4s_1x1_za16_bf16_bf16( @@ -461,5 +461,5 @@ void test_svmop4a_1x1_za16_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_str // CPP-CHECK-NEXT: ret void // void test_svmop4s_1x1_za16_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4s_1x1_za16,_bf16_bf16,)(1, zn, zm); + SME_ACLE_FUNC(svmop4s,_1x1,_za16,_bf16_bf16)(1, zn, zm); } From 41e9484d2e3141e7bece4831d13c2906460443a6 Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Fri, 28 Mar 2025 17:37:32 +0000 Subject: [PATCH 13/13] Fix SUMOP and USMOP [_1x1] --- clang/include/clang/Basic/arm_sme.td | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 065a86ae90536..5012874a08790 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -341,33 +341,33 @@ let SMETargetGuard = "sme2,sme-mop4,sme-i16i64" in { // SME2 - SUMOP4A, SUMOP4S, USMOP4A, USMOP4S multiclass SUMOP4 checks> { - def _1x1 : SInst<"svmop4" # mode # "[_1x1]_" # za # "[_{d}_{3}]", + def _1x1 : SInst<"svmop4" # mode # "[_1x1]" # za # "[_{d}_{3}]", "vidu", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_1x1", [IsStreaming, IsInOutZA], checks>; } multiclass USMOP4 checks> { - def _1x1 : SInst<"svmop4" # mode # "[_1x1]_" # za # "[_{d}_{3}]", + def _1x1 : SInst<"svmop4" # mode # "[_1x1]" # za # "[_{d}_{3}]", "vidx", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_1x1", [IsStreaming, IsInOutZA], checks>; } let SMETargetGuard = "sme2,sme-mop4" in { - defm SVSUMOP4A_S : SUMOP4<"a", "za32", "c", "", [ImmCheck<0, ImmCheck0_3>]>; - defm SVSUMOP4S_S : SUMOP4<"s", "za32", "c", "", [ImmCheck<0, ImmCheck0_3>]>; + defm SVSUMOP4A_S : SUMOP4<"a", "_za32", "c", "", [ImmCheck<0, ImmCheck0_3>]>; + defm SVSUMOP4S_S : SUMOP4<"s", "_za32", "c", "", [ImmCheck<0, ImmCheck0_3>]>; - defm SVUSMOP4A_S : USMOP4<"a", "za32", "Uc", "", [ImmCheck<0, ImmCheck0_3>]>; - defm SVUSMOP4S_S : USMOP4<"s", "za32", "Uc", "", [ImmCheck<0, ImmCheck0_3>]>; + defm SVUSMOP4A_S : USMOP4<"a", "_za32", "Uc", "", [ImmCheck<0, ImmCheck0_3>]>; + defm SVUSMOP4S_S : USMOP4<"s", "_za32", "Uc", "", [ImmCheck<0, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-mop4,sme-i16i64" in { - defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s", "_za64", [ImmCheck<0, ImmCheck0_7>]>; - defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s", "_za64", [ImmCheck<0, ImmCheck0_7>]>; + defm SVSUMOP4A_D : SUMOP4<"a", "_za64", "s", "_za64", [ImmCheck<0, ImmCheck0_7>]>; + defm SVSUMOP4S_D : SUMOP4<"s", "_za64", "s", "_za64", [ImmCheck<0, ImmCheck0_7>]>; - defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us", "_za64", [ImmCheck<0, ImmCheck0_7>]>; - defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us", "_za64", [ImmCheck<0, ImmCheck0_7>]>; + defm SVUSMOP4A_D : USMOP4<"a", "_za64", "Us", "_za64", [ImmCheck<0, ImmCheck0_7>]>; + defm SVUSMOP4S_D : USMOP4<"s", "_za64", "Us", "_za64", [ImmCheck<0, ImmCheck0_7>]>; } ////////////////////////////////////////////////////////////////////////////////