From c0bff6fe1d1630ba93361ac29bca798810a64178 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Mon, 3 Mar 2025 16:16:46 +0000 Subject: [PATCH 01/15] [AArch64][clang][llvm] Add structured sparsity outer product (TMOP) instrinics Implement all {BF/F/S/U/SU/US}TMOP intrinsics in clang and llvm following the ACLE in https://github.com/ARM-software/acle/pull/380/files --- clang/include/clang/Basic/arm_sme.td | 42 +++++ .../AArch64/sme2-intrinsics/acle_sme2_tmop.c | 112 +++++++++++ .../acle_sme2_tmop.cpp | 176 ++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 17 ++ .../lib/Target/AArch64/AArch64RegisterInfo.td | 6 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 42 ++--- llvm/lib/Target/AArch64/SMEInstrFormats.td | 66 ++++--- .../AArch64/GlobalISel/regbank-inlineasm.mir | 6 +- .../emit_fneg_with_non_register_operand.mir | 4 +- .../CodeGen/AArch64/peephole-insvigpr.mir | 4 +- .../CodeGen/AArch64/sme2-intrinsics-tmop.ll | 138 ++++++++++++++ 11 files changed, 552 insertions(+), 61 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1bfcf4c31d552..d425c2c1e38bb 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -907,6 +907,48 @@ let SMETargetGuard = "sme-f16f16" in { } +//////////////////////////////////////////////////////////////////////////////// +// SME2 - TMOP, SUTMOP, USTMOP + +multiclass USTMOP checks> { + def _ : SInst<"svtmopa_lane_" # za # "[_{d}_{3}]", + "vi2x[i", t, MergeNone, "aarch64_sme_ustmopa", + [IsStreaming, IsInOutZA], + checks>; +} + +multiclass SUTMOP checks> { + def _ : SInst<"svtmopa_lane_" # za # "[_{d}_{3}]", + "vi2u[i", t, MergeNone, "aarch64_sme_sutmopa", + [IsStreaming, IsInOutZA], + checks>; +} + +let SMETargetGuard = "sme2,sme-tmop" in { + def SVTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hbf", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_stmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_utmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + + defm SVSUTMOPA_S : SUTMOP<"za32", "c", [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + defm SVUSTMOPA_S : USTMOP<"za32", "Uc", [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; +} + +let SMETargetGuard = "sme2,sme-tmop,sme-f16f16" in { + def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "hb", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; +} + +let SMETargetGuard = "sme2,sme-tmop,sme-b16b16" in { + def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "U", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; +} + +let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in { + def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.MM[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; +} + +let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in { + def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.MM[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; +} + multiclass ZAReadz ch> { let SMETargetGuard = "sme2p1" in { def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t, diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c new file mode 100644 index 0000000000000..a1456275dd810 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c @@ -0,0 +1,112 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: @test_svtmopa_lane_za32_s8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za32_s8_s8(svint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za32,_s8_s8,)(1, zn, zm, zk, 3); +} + +// CHECK-LABEL: @test_svtmopa_lane_za32_u8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za32_u8_u8(svuint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za32,_u8_u8,)(1, zn, zm, zk, 3); +} + +// CHECK-LABEL: @test_svtmopa_lane_za32_s8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za32_s8_u8(svint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za32,_s8_u8,)(1, zn, zm, zk, 3); +} + +// CHECK-LABEL: @test_svtmopa_lane_za32_u8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za32_u8_s8(svuint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za32,_u8_s8,)(1, zn, zm, zk, 3); +} + +// CHECK-LABEL: @test_svtmopa_lane_za32_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za32_s16_s16(svint16x2_t zn, svint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za32,_s16_s16,)(1, zn, zm, zk, 3); +} + +// CHECK-LABEL: @test_svtmopa_lane_za32_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za32,_u16_u16,)(1, zn, zm, zk, 3); +} + +// CHECK-LABEL: @test_svtmopa_lane_za32_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za32,_f16_f16,)(1, zn, zm, zk, 3); +} + +// CHECK-LABEL: @test_svtmopa_lane_za32_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za32,_bf16_bf16,)(1, zn, zm, zk, 3); +} + +// CHECK-LABEL: @test_svtmopa_lane_za16_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za16,_f16_f16,)(1, zn, zm, zk, 3); +} + +// CHECK-LABEL: @test_svtmopa_lane_za16_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za16,_bf16_bf16,)(1, zn, zm, zk, 3); +} + +//void test_svtmopa_lane_za16_mf8_mf8_fpm(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { +// SME_ACLE_FUNC(svtmopa_lane_za16,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr); +//} + +//void test_svtmopa_lane_za32_mf8_mf8_fpm(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { +// SME_ACLE_FUNC(svtmopa_lane_za32,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr); +//} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp new file mode 100644 index 0000000000000..6ccd3acaf749f --- /dev/null +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp @@ -0,0 +1,176 @@ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -target-feature +sme-f16f16 -target-feature +sme-b16b16 -verify -emit-llvm -o - %s + +// REQUIRES: aarch64-registered-target + +#include + +void test_features() __arm_streaming __arm_inout("za") { + svuint8x2_t zn_u8; + svint8x2_t zn_s8; + svuint8_t zm_u8; + svint8_t zm_s8; + svuint16x2_t zn_u16; + svint16x2_t zn_s16; + svuint16_t zm_u16; + svint16_t zm_s16; + svbfloat16x2_t zn_bf16; + svfloat16x2_t zn_f16; + svbfloat16_t zm_bf16; + svfloat16_t zm_f16; + svfloat32x2_t zn_f32; + svfloat32_t zm_f32; + fpm_t fpm = 0; + svuint8_t zk; + +// expected-error@+1 {{'svtmopa_lane_za32_s8_s8' needs target feature sme,sme2,sme-tmop}} + svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, 0); +// expected-error@+1 {{'svtmopa_lane_za32_u8_u8' needs target feature sme,sme2,sme-tmop}} + svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, 0); +// expected-error@+1 {{'svtmopa_lane_za32_s8_u8' needs target feature sme,sme2,sme-tmop}} + svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, 0); +// expected-error@+1 {{'svtmopa_lane_za32_u8_s8' needs target feature sme,sme2,sme-tmop}} + svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, 0); +// expected-error@+1 {{'svtmopa_lane_za32_s16_s16' needs target feature sme,sme2,sme-tmop}} + svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, 0); +// expected-error@+1 {{'svtmopa_lane_za32_u16_u16' needs target feature sme,sme2,sme-tmop}} + svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, 0); +// expected-error@+1 {{'svtmopa_lane_za32_f16_f16' needs target feature sme,sme2,sme-tmop}} + svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, 0); +// expected-error@+1 {{'svtmopa_lane_za32_bf16_bf16' needs target feature sme,sme2,sme-tmop}} + svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0); +// expected-error@+1 {{'svtmopa_lane_za16_f16_f16' needs target feature sme,sme2,sme-tmop,sme-f16f16}} + svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, 0); +// expected-error@+1 {{'svtmopa_lane_za16_bf16_bf16' needs target feature sme,sme2,sme-tmop,sme-f16f16}} + svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0); +// expected-error@+1 {{'svtmopa_lane_za16_mf8_mf8_fpm' needs target feature sme,sme2,sme-tmop,sme-f8f16}} + svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, 0, fpm); +// expected-error@+1 {{'svtmopa_lane_za32_mf8_mf8_fpm' needs target feature sme,sme2,sme-tmop,sme-f8f32}} + svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, 0, fpm); +} + +void test_imm() __arm_streaming __arm_inout("za") { + svuint8x2_t zn_u8; + svint8x2_t zn_s8; + svuint8_t zm_u8; + svint8_t zm_s8; + svuint16x2_t zn_u16; + svint16x2_t zn_s16; + svuint16_t zm_u16; + svint16_t zm_s16; + svbfloat16x2_t zn_bf16; + svfloat16x2_t zn_f16; + svbfloat16_t zm_bf16; + svfloat16_t zm_f16; + svfloat32x2_t zn_f32; + svfloat32_t zm_f32; + fpm_t fpm; + svuint8_t zk; + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s8_s8(3, zn_s8, zm_s8, zk, 4); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s8_s8(4, zn_s8, zm_s8, zk, 3); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, -1); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s8_s8(-1, zn_s8, zm_s8, zk, 0); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u8_u8(3, zn_u8, zm_u8, zk, 4); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u8_u8(4, zn_u8, zm_u8, zk, 3); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, -1); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u8_u8(-1, zn_u8, zm_u8, zk, 0); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s8_u8(3, zn_s8, zm_u8, zk, 4); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s8_u8(4, zn_s8, zm_u8, zk, 3); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, -1); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s8_u8(-1, zn_s8, zm_u8, zk, 0); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u8_s8(3, zn_u8, zm_s8, zk, 4); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u8_s8(4, zn_u8, zm_s8, zk, 3); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, -1); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u8_s8(-1, zn_u8, zm_s8, zk, 0); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s16_s16(3, zn_s16, zm_s16, zk, 4); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s16_s16(4, zn_s16, zm_s16, zk, 3); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, -1); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_s16_s16(-1, zn_s16, zm_s16, zk, 0); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u16_u16(3, zn_u16, zm_u16, zk, 4); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u16_u16(4, zn_u16, zm_u16, zk, 3); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, -1); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_u16_u16(-1, zn_u16, zm_u16, zk, 0); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_f16_f16(3, zn_f16, zm_f16, zk, 4); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_f16_f16(4, zn_f16, zm_f16, zk, 3); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, -1); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_f16_f16(-1, zn_f16, zm_f16, zk, 0); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_bf16_bf16(3, zn_bf16, zm_bf16, zk, 4); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_bf16_bf16(4, zn_bf16, zm_bf16, zk, 3); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, -1); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_bf16_bf16(-1, zn_bf16, zm_bf16, zk, 0); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za16_f16_f16(3, zn_f16, zm_f16, zk, 4); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za16_f16_f16(4, zn_f16, zm_f16, zk, 3); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, -1); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za16_f16_f16(-1, zn_f16, zm_f16, zk, 0); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za16_bf16_bf16(3, zn_bf16, zm_bf16, zk, 4); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za16_bf16_bf16(4, zn_bf16, zm_bf16, zk, 3); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, -1); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za16_bf16_bf16(-1, zn_bf16, zm_bf16, zk, 0); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za16_mf8_mf8_fpm(3, zn_f32, zm_f32, zk, 4, fpm); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za16_mf8_mf8_fpm(4, zn_f32, zm_f32, zk, 3, fpm); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, -1, fpm); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za16_mf8_mf8_fpm(-1, zn_f32, zm_f32, zk, 0, fpm); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_mf8_mf8_fpm(3, zn_f32, zm_f32, zk, 4, fpm); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_mf8_mf8_fpm(4, zn_f32, zm_f32, zk, 3, fpm); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, -1, fpm); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_mf8_mf8_fpm(-1, zn_f32, zm_f32, zk, 0, fpm); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 77ea0bcaa4b5f..349d05628261a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3107,6 +3107,23 @@ let TargetPrefix = "aarch64" in { } } + class SME_OuterProduct_TMOP_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_nxv16i8_ty, + llvm_i32_ty], + [ImmArg>, ImmArg>, + IntrNoMem, IntrHasSideEffects]>; + + def int_aarch64_sme_tmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_stmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_utmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_sutmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_ustmopa : SME_OuterProduct_TMOP_Intrinsic; + class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index fed9b7b173e9c..40553aff04919 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -1269,8 +1269,10 @@ def ZPRMul2AsmOp32_Hi : ZPRAsmOperand<"VectorS_Hi", 32, "Mul2_Hi">; def ZPRMul2AsmOp64_Lo : ZPRAsmOperand<"VectorD_Lo", 64, "Mul2_Lo">; def ZPRMul2AsmOp64_Hi : ZPRAsmOperand<"VectorD_Hi", 64, "Mul2_Hi">; -def ZPR_K : RegisterClass<"AArch64", [untyped], 128, - (add Z20, Z21, Z22, Z23, Z28, Z29, Z30, Z31)>; +def ZPR_K : RegisterClass<"AArch64", [nxv16i8], 128, + (add Z20, Z21, Z22, Z23, Z28, Z29, Z30, Z31)>{ + let Size = 128; +} def ZK : RegisterOperand">{ let EncoderMethod = "EncodeZK"; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index f992f73171e0e..330dce018daa4 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -175,12 +175,12 @@ let Predicates = [HasSME_MOP4, HasSMEI16I64] in { } let Predicates = [HasSME_TMOP] in { -def STMOPA_M2ZZZI_BtoS : sme_int_sparse_outer_product_i32<0b00100, ZZ_b_mul_r, ZPR8, "stmopa">; -def STMOPA_M2ZZZI_HtoS : sme_int_sparse_outer_product_i32<0b00101, ZZ_h_mul_r, ZPR16, "stmopa">; -def UTMOPA_M2ZZZI_BtoS : sme_int_sparse_outer_product_i32<0b11100, ZZ_b_mul_r, ZPR8, "utmopa">; -def UTMOPA_M2ZZZI_HtoS : sme_int_sparse_outer_product_i32<0b10101, ZZ_h_mul_r, ZPR16, "utmopa">; -def SUTMOPA_M2ZZZI_BtoS : sme_int_sparse_outer_product_i32<0b01100, ZZ_b_mul_r, ZPR8, "sutmopa">; -def USTMOPA_M2ZZZI_BtoS : sme_int_sparse_outer_product_i32<0b10100, ZZ_b_mul_r, ZPR8, "ustmopa">; + defm STMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b00100, ZZ_b_mul_r, ZPR8, nxv16i8, "stmopa", "int_aarch64_sme_stmopa">; + defm STMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b00101, ZZ_h_mul_r, ZPR16, nxv8i16, "stmopa", "int_aarch64_sme_stmopa">; + defm UTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b11100, ZZ_b_mul_r, ZPR8, nxv16i8, "utmopa", "int_aarch64_sme_utmopa">; + defm UTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10101, ZZ_h_mul_r, ZPR16, nxv8i16, "utmopa", "int_aarch64_sme_utmopa">; + defm SUTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01100, ZZ_b_mul_r, ZPR8, nxv16i8, "sutmopa", "int_aarch64_sme_sutmopa">; + defm USTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b10100, ZZ_b_mul_r, ZPR8, nxv16i8, "ustmopa", "int_aarch64_sme_ustmopa">; } let Predicates = [HasSME] in { @@ -1064,12 +1064,6 @@ let Predicates = [HasSME_MOP4] in { defm FMOP4S : sme2_fmop4as_fp32_non_widening<1, "fmop4s", "int_aarch64_sme_mop4s">; } -let Predicates = [HasSME_TMOP] in { - def FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, "ftmopa">; - def FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, "ftmopa">; - def BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, "bftmopa">; -} - let Predicates = [HasSME2p2] in { defm FMUL_2ZZ : sme2_multi2_fmul_sm<"fmul">; defm FMUL_2Z2Z : sme2_multi2_fmul_mm< "fmul">; @@ -1078,26 +1072,32 @@ let Predicates = [HasSME2p2] in { } // [HasSME2p2] +let Predicates = [HasSME_TMOP] in { + defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", "int_aarch64_sme_tmopa">; + defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", "int_aarch64_sme_tmopa">; + defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", "int_aarch64_sme_tmopa">; +} + +let Predicates = [HasSME_TMOP, HasSMEF16F16] in { + defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", "int_aarch64_sme_tmopa">; +} + let Predicates = [HasSME_TMOP, HasSMEB16B16] in { - def BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, "bftmopa">; + defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", "int_aarch64_sme_tmopa">; } -let Predicates = [HasSME_TMOP, HasSMEF8F32], Uses = [FPMR, FPCR] in { - def FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, "ftmopa">; +let Predicates = [HasSME_TMOP, HasSMEF8F32] in { + defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", "int_aarch64_sme_tmopa">; } -let Predicates = [HasSME_TMOP, HasSMEF8F16], Uses = [FPMR, FPCR] in { - def FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, "ftmopa">; +let Predicates = [HasSME_TMOP, HasSMEF8F16] in { + defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", "int_aarch64_sme_tmopa">; } let Predicates = [HasSME_MOP4, HasSMEF8F16], Uses = [FPMR, FPCR] in { defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a">; } -let Predicates = [HasSME_TMOP, HasSMEF16F16] in { - def FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, "ftmopa">; -} - let Predicates = [HasSME_MOP4, HasSMEF16F16] in { defm FMOP4A : sme2_fmop4as_fp16_non_widening<0, "fmop4a", "int_aarch64_sme_mop4a">; defm FMOP4S : sme2_fmop4as_fp16_non_widening<1, "fmop4s", "int_aarch64_sme_mop4s">; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index c008cda21cf05..c42299b050eb6 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -104,6 +104,14 @@ class sme_outer_product_pseudo let usesCustomInserter = 1; } +class sme_sparse_outer_product_pseudo + : Pseudo<(outs), (ins i32imm:$tile, zn_ty:$zn, zm_ty:$zm, ZK:$zk, i32imm:$idx), []>, + Sched<[]> { + // Translated to the actual instructions in AArch64ISelLowering.cpp + let SMEMatrixType = za_flag; + let usesCustomInserter = 1; +} + class sme2_quarter_tile_outer_product_pseudo : Pseudo<(outs), (ins i32imm:$tile, zn_ty:$zn, zm_ty:$zm), []>, @@ -281,6 +289,12 @@ class SME2_ZA_Tile_Vec_Multi_Single_Pat : Pat<(intrinsic imm_ty:$tile, vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2), (!cast(name # _PSEUDO) $tile, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>; + +class SME2_ZA_TMOP_Pat + : Pat<(intrinsic imm_ty:$tile, vt:$Zn1, vt:$Zn2, vt:$Zm, (nxv16i8 ZK:$Zk), imm_ty:$idx), + (!cast(name # _PSEUDO) $tile, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), $Zm, $Zk, $idx)>; + + //===----------------------------------------------------------------------===// // SME pattern match helpers. //===----------------------------------------------------------------------===// @@ -474,35 +488,6 @@ multiclass sme_int_outer_product_i64 opc, string mnemonic, def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } -class sme_int_sparse_outer_product_i32 opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic> - : I<(outs TileOp32:$ZAda), - (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm), - mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm", - "", []>, - Sched<[]> { - bits<2> ZAda; - bits<4> Zn; - bits<5> Zm; - bits<3> Zk; - bits<2> imm; - let Inst{31-25} = 0b1000000; - let Inst{24} = opc{4}; - let Inst{23-22} = 0b01; - let Inst{21} = opc{3}; - let Inst{20-16} = Zm; - let Inst{15} = opc{2}; - let Inst{14} = 0b0; - let Inst{13} = opc{1}; - let Inst{12-10} = Zk; - let Inst{9-6} = Zn; - let Inst{5-4} = imm; - let Inst{3} = opc{0}; - let Inst{2} = 0b0; - let Inst{1-0} = ZAda; - - let Constraints = "$ZAda = $_ZAda"; -} - class sme_outer_product_widening_inst opc, ZPRRegOp zpr_ty, string mnemonic> : I<(outs TileOp32:$ZAda), (ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), @@ -3535,7 +3520,7 @@ multiclass sme2_int_bmopx_tile op, SDPatternOperator i //===----------------------------------------------------------------------===// // SME2 Sparse Outer Product and Accumulate -class sme_tmopa_16b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic> +class sme_int_sparse_outer_product_i16 opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic> : I<(outs TileOp16:$ZAda), (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm), mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm", @@ -3564,7 +3549,7 @@ class sme_tmopa_16b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, s let Constraints = "$ZAda = $_ZAda"; } -class sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic> +class sme_int_sparse_outer_product_i32 opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic> : I<(outs TileOp32:$ZAda), (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm), mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm", @@ -3593,6 +3578,25 @@ class sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, s let Constraints = "$ZAda = $_ZAda"; } +multiclass sme_tmopa_16b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, ValueType vt, string mnemonic, string intrinsic> { + def NAME : sme_int_sparse_outer_product_i16, SMEPseudo2Instr { + let Uses = [FPMR, FPCR]; + } + + def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr; + + def _ : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_3, vt>; +} + +multiclass sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, ValueType vt, string mnemonic, string intrinsic> { + def NAME : sme_int_sparse_outer_product_i32, SMEPseudo2Instr { + let Uses = [FPMR, FPCR]; + } + + def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr; + + def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_3, vt>; +} //===----------------------------------------------------------------------===/// // SME2 Zero Lookup Table. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir index 216f94b2b51e3..12c015f30ec33 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir @@ -57,11 +57,11 @@ tracksRegLiveness: true body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_reg_output - ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0 + ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 - INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0:gpr32common + INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0:gpr32common %1:_(s32) = COPY %0 $w0 = COPY %1(s32) RET_ReallyLR implicit $w0 @@ -75,7 +75,7 @@ tracksRegLiveness: true body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_mixed_types - ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0, 3735562 /* regdef:FPR64 */, def %1 + ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2883594 /* regdef:FPR32_with_hsub_in_FPR16_lo */, def %0, 3735562 /* regdef:GPR64 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1 ; CHECK-NEXT: $d0 = COPY [[COPY1]](s64) diff --git a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir index 253e6a9c076c6..251828b0b4f9b 100644 --- a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir +++ b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir @@ -91,10 +91,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[LOADgot:%[0-9]+]]:gpr64common = LOADgot target-flags(aarch64-got) @c ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:GPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY %2 ; CHECK-NEXT: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:GPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) ; CHECK-NEXT: [[FNEGDr:%[0-9]+]]:fpr64 = FNEGDr %2 ; CHECK-NEXT: nofpexcept FCMPDrr %4, killed [[FNEGDr]], implicit-def $nzcv, implicit $fpcr ; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv diff --git a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir index 3174d3c8c1a73..aef01e42ed7cc 100644 --- a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir +++ b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir @@ -487,7 +487,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[DEF]] - ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3735562 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed [[COPY1]] + ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3670026 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed [[COPY1]] ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF @@ -505,7 +505,7 @@ body: | %0:gpr64common = COPY $x0 %2:gpr64all = IMPLICIT_DEF %3:gpr64sp = COPY %2 - INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3735562 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed %3 + INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3670026 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed %3 %4:fpr128 = MOVIv2d_ns 0 %5:fpr64 = COPY %4.dsub %7:fpr128 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll new file mode 100644 index 0000000000000..856b1780c508c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -force-streaming -verify-machineinstrs < %s | FileCheck %s + +target triple = "aarch64-linux" + +define void @tmopa_za32_s8( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: tmopa_za32_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.stmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @tmopa_za32_u8( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: tmopa_za32_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: utmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.utmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @tmopa_za32_u8_s8( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: tmopa_za32_u8_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: ustmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.ustmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @tmopa_za32_s8_u8( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: tmopa_za32_s8_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: sutmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sutmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @tmopa_za32_s16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: tmopa_za32_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.stmopa.nxv8i16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @tmopa_za32_u16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: tmopa_za32_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: utmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.utmopa.nxv8i16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @tmopa_za32_f16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: tmopa_za32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: ftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @tmopa_za32_bf16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: tmopa_za32_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @tmopa_za32_f32( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: tmopa_za32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: ftmopa za0.s, { z0.s, z1.s }, z2.s, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.tmopa.nxv4f32(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @tmopa_za16_f16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: tmopa_za16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: ftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @tmopa_za16_bf16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: tmopa_za16_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +attributes #0 = {nounwind "target-features" = "+sme2,+sme-tmop,+sme-f16f16,+sme-b16b16,+sme-f8f16,+sme-f8f32,+sme2p1,+bf16" } From 2624dc6222b93efb603163575d547893c0995876 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Fri, 11 Apr 2025 09:44:22 +0000 Subject: [PATCH 02/15] fixup! Fixup code review issues for TMOP --- clang/include/clang/Basic/arm_sme.td | 8 +- .../AArch64/sme2-intrinsics/acle_sme2_tmop.c | 33 +++++-- .../acle_sme2_tmop.cpp | 87 +++++++++++-------- llvm/lib/Target/AArch64/SMEInstrFormats.td | 2 +- 4 files changed, 83 insertions(+), 47 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index d425c2c1e38bb..a0d66bb038108 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -934,19 +934,19 @@ let SMETargetGuard = "sme2,sme-tmop" in { } let SMETargetGuard = "sme2,sme-tmop,sme-f16f16" in { - def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "hb", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "hb", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-b16b16" in { - def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "U", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "U", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in { - def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.MM[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.~~[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in { - def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.MM[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.~~[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; } multiclass ZAReadz ch> { diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c index a1456275dd810..85c24c0843409 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c @@ -76,6 +76,15 @@ void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t SME_ACLE_FUNC(svtmopa_lane_za32,_f16_f16,)(1, zn, zm, zk, 3); } +// CHECK-LABEL: @test_svtmopa_lane_za32_f32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za32,_f32_f32,)(1, zn, zm, zk, 3); +} + // CHECK-LABEL: @test_svtmopa_lane_za32_bf16_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) @@ -103,10 +112,22 @@ void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint SME_ACLE_FUNC(svtmopa_lane_za16,_bf16_bf16,)(1, zn, zm, zk, 3); } -//void test_svtmopa_lane_za16_mf8_mf8_fpm(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { -// SME_ACLE_FUNC(svtmopa_lane_za16,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr); -//} +// CHECK-LABEL: @test_svtmopa_lane_za16_mf8_mf8_fpm( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za16,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr); +} -//void test_svtmopa_lane_za32_mf8_mf8_fpm(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { -// SME_ACLE_FUNC(svtmopa_lane_za32,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr); -//} +// CHECK-LABEL: @test_svtmopa_lane_za32_mf8_mf8_fpm( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: ret void +// +void test_svtmopa_lane_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svtmopa_lane_za32,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr); +} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp index 6ccd3acaf749f..afa650950d61e 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp @@ -19,6 +19,8 @@ void test_features() __arm_streaming __arm_inout("za") { svfloat16_t zm_f16; svfloat32x2_t zn_f32; svfloat32_t zm_f32; + svmfloat8x2_t zn_f8; + svmfloat8_t zm_f8; fpm_t fpm = 0; svuint8_t zk; @@ -36,6 +38,8 @@ void test_features() __arm_streaming __arm_inout("za") { svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, 0); // expected-error@+1 {{'svtmopa_lane_za32_f16_f16' needs target feature sme,sme2,sme-tmop}} svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, 0); +// expected-error@+1 {{'svtmopa_lane_za32_f32_f32' needs target feature sme,sme2,sme-tmop}} + svtmopa_lane_za32_f32_f32(0, zn_f32, zm_f32, zk, 0); // expected-error@+1 {{'svtmopa_lane_za32_bf16_bf16' needs target feature sme,sme2,sme-tmop}} svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0); // expected-error@+1 {{'svtmopa_lane_za16_f16_f16' needs target feature sme,sme2,sme-tmop,sme-f16f16}} @@ -43,9 +47,9 @@ void test_features() __arm_streaming __arm_inout("za") { // expected-error@+1 {{'svtmopa_lane_za16_bf16_bf16' needs target feature sme,sme2,sme-tmop,sme-f16f16}} svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0); // expected-error@+1 {{'svtmopa_lane_za16_mf8_mf8_fpm' needs target feature sme,sme2,sme-tmop,sme-f8f16}} - svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, 0, fpm); + svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 0, fpm); // expected-error@+1 {{'svtmopa_lane_za32_mf8_mf8_fpm' needs target feature sme,sme2,sme-tmop,sme-f8f32}} - svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, 0, fpm); + svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 0, fpm); } void test_imm() __arm_streaming __arm_inout("za") { @@ -63,114 +67,125 @@ void test_imm() __arm_streaming __arm_inout("za") { svfloat16_t zm_f16; svfloat32x2_t zn_f32; svfloat32_t zm_f32; + svmfloat8x2_t zn_f8; + svmfloat8_t zm_f8; fpm_t fpm; svuint8_t zk; // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s8_s8(3, zn_s8, zm_s8, zk, 4); + svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s8_s8(4, zn_s8, zm_s8, zk, 3); + svtmopa_lane_za32_s8_s8(4, zn_s8, zm_s8, zk, 0); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, -1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_s8_s8(-1, zn_s8, zm_s8, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u8_u8(3, zn_u8, zm_u8, zk, 4); + svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u8_u8(4, zn_u8, zm_u8, zk, 3); + svtmopa_lane_za32_u8_u8(4, zn_u8, zm_u8, zk, 0); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, -1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_u8_u8(-1, zn_u8, zm_u8, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s8_u8(3, zn_s8, zm_u8, zk, 4); + svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s8_u8(4, zn_s8, zm_u8, zk, 3); + svtmopa_lane_za32_s8_u8(4, zn_s8, zm_u8, zk, 0); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, -1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_s8_u8(-1, zn_s8, zm_u8, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u8_s8(3, zn_u8, zm_s8, zk, 4); + svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u8_s8(4, zn_u8, zm_s8, zk, 3); + svtmopa_lane_za32_u8_s8(4, zn_u8, zm_s8, zk, 0); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, -1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_u8_s8(-1, zn_u8, zm_s8, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s16_s16(3, zn_s16, zm_s16, zk, 4); + svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s16_s16(4, zn_s16, zm_s16, zk, 3); + svtmopa_lane_za32_s16_s16(4, zn_s16, zm_s16, zk, 0); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, -1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_s16_s16(-1, zn_s16, zm_s16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u16_u16(3, zn_u16, zm_u16, zk, 4); + svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u16_u16(4, zn_u16, zm_u16, zk, 3); + svtmopa_lane_za32_u16_u16(4, zn_u16, zm_u16, zk, 0); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, -1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_u16_u16(-1, zn_u16, zm_u16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_f16_f16(3, zn_f16, zm_f16, zk, 4); + svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_f16_f16(4, zn_f16, zm_f16, zk, 3); + svtmopa_lane_za32_f16_f16(4, zn_f16, zm_f16, zk, 0); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, -1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_f16_f16(-1, zn_f16, zm_f16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_bf16_bf16(3, zn_bf16, zm_bf16, zk, 4); + svtmopa_lane_za32_f32_f32(0, zn_f32, zm_f32, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_bf16_bf16(4, zn_bf16, zm_bf16, zk, 3); + svtmopa_lane_za32_f32_f32(4, zn_f32, zm_f32, zk, 0); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_f32_f32(0, zn_f32, zm_f32, zk, -1); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svtmopa_lane_za32_f32_f32(-1, zn_f32, zm_f32, zk, 0); + +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, 4); +// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svtmopa_lane_za32_bf16_bf16(4, zn_bf16, zm_bf16, zk, 0); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, -1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za32_bf16_bf16(-1, zn_bf16, zm_bf16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za16_f16_f16(3, zn_f16, zm_f16, zk, 4); -// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za16_f16_f16(4, zn_f16, zm_f16, zk, 3); + svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, 4); +// expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + svtmopa_lane_za16_f16_f16(2, zn_f16, zm_f16, zk, 0); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} svtmopa_lane_za16_f16_f16(-1, zn_f16, zm_f16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za16_bf16_bf16(3, zn_bf16, zm_bf16, zk, 4); -// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za16_bf16_bf16(4, zn_bf16, zm_bf16, zk, 3); + svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, 4); +// expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + svtmopa_lane_za16_bf16_bf16(2, zn_bf16, zm_bf16, zk, 0); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} svtmopa_lane_za16_bf16_bf16(-1, zn_bf16, zm_bf16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za16_mf8_mf8_fpm(3, zn_f32, zm_f32, zk, 4, fpm); -// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za16_mf8_mf8_fpm(4, zn_f32, zm_f32, zk, 3, fpm); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, -1, fpm); + svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 4, fpm); +// expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + svtmopa_lane_za16_mf8_mf8_fpm(2, zn_f8, zm_f8, zk, 0, fpm); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za16_mf8_mf8_fpm(-1, zn_f32, zm_f32, zk, 0, fpm); + svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, -1, fpm); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svtmopa_lane_za16_mf8_mf8_fpm(-1, zn_f8, zm_f8, zk, 0, fpm); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_mf8_mf8_fpm(3, zn_f32, zm_f32, zk, 4, fpm); + svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 4, fpm); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - svtmopa_lane_za32_mf8_mf8_fpm(4, zn_f32, zm_f32, zk, 3, fpm); + svtmopa_lane_za32_mf8_mf8_fpm(4, zn_f8, zm_f8, zk, 0, fpm); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, -1, fpm); + svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, -1, fpm); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_mf8_mf8_fpm(-1, zn_f32, zm_f32, zk, 0, fpm); + svtmopa_lane_za32_mf8_mf8_fpm(-1, zn_f8, zm_f8, zk, 0, fpm); } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index c42299b050eb6..a85bc344220f2 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -3585,7 +3585,7 @@ multiclass sme_tmopa_16b opc, RegisterOperand zn_ty, RegisterOperand zm_ def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr; - def _ : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_3, vt>; + def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_3, vt>; } multiclass sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, ValueType vt, string mnemonic, string intrinsic> { From bf4bb103febe23b2d94397f1d1ab5c19cbcfc1e9 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Fri, 11 Apr 2025 10:54:57 +0000 Subject: [PATCH 03/15] fixup! Add extra parameter to SME2_ZA_TMOP_Pat for TileOp16 and VectorIndexS32b --- llvm/lib/Target/AArch64/SMEInstrFormats.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index a85bc344220f2..20f40b54a3122 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -290,8 +290,8 @@ class SME2_ZA_Tile_Vec_Multi_Multi_Pat(name # _PSEUDO) $tile, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>; -class SME2_ZA_TMOP_Pat - : Pat<(intrinsic imm_ty:$tile, vt:$Zn1, vt:$Zn2, vt:$Zm, (nxv16i8 ZK:$Zk), imm_ty:$idx), +class SME2_ZA_TMOP_Pat + : Pat<(intrinsic tile_imm:$tile, vt:$Zn1, vt:$Zn2, vt:$Zm, (nxv16i8 ZK:$Zk), imm_ty:$idx), (!cast(name # _PSEUDO) $tile, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), $Zm, $Zk, $idx)>; @@ -3585,7 +3585,7 @@ multiclass sme_tmopa_16b opc, RegisterOperand zn_ty, RegisterOperand zm_ def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr; - def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_3, vt>; + def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_1, timm32_0_3, vt>; } multiclass sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, ValueType vt, string mnemonic, string intrinsic> { @@ -3595,7 +3595,7 @@ multiclass sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr; - def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_3, vt>; + def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_1, timm32_0_3, vt>; } //===----------------------------------------------------------------------===/// From 51462fa9deb4547e1528ba49cf4caa8a8a04314b Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Fri, 11 Apr 2025 21:51:28 +0000 Subject: [PATCH 04/15] fixup! Fix Marian's code review comments --- clang/include/clang/Basic/arm_sme.td | 33 +++----- .../AArch64/sme2-intrinsics/acle_sme2_tmop.c | 75 ++++++++++++++++++- .../acle_sme2_tmop.cpp | 61 ++++++--------- llvm/lib/Target/AArch64/SMEInstrFormats.td | 2 +- 4 files changed, 103 insertions(+), 68 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index a0d66bb038108..037a7eaa39a34 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -910,43 +910,28 @@ let SMETargetGuard = "sme-f16f16" in { //////////////////////////////////////////////////////////////////////////////// // SME2 - TMOP, SUTMOP, USTMOP -multiclass USTMOP checks> { - def _ : SInst<"svtmopa_lane_" # za # "[_{d}_{3}]", - "vi2x[i", t, MergeNone, "aarch64_sme_ustmopa", - [IsStreaming, IsInOutZA], - checks>; -} - -multiclass SUTMOP checks> { - def _ : SInst<"svtmopa_lane_" # za # "[_{d}_{3}]", - "vi2u[i", t, MergeNone, "aarch64_sme_sutmopa", - [IsStreaming, IsInOutZA], - checks>; -} - let SMETargetGuard = "sme2,sme-tmop" in { - def SVTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hbf", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_stmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_utmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - - defm SVSUTMOPA_S : SUTMOP<"za32", "c", [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - defm SVUSTMOPA_S : USTMOP<"za32", "Uc", [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hbf", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_stmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_utmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVSUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2u[i", "c", MergeNone, "aarch64_sme_sutmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVUSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2x[i", "Uc", MergeNone, "aarch64_sme_ustmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f16f16" in { - def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "hb", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "h", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-b16b16" in { - def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "U", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "b", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in { - def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.~~[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in { - def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.~~[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; } multiclass ZAReadz ch> { diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c index 85c24c0843409..37c24c1672911 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c @@ -1,9 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include @@ -18,6 +20,11 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_s8_s810svint8x2_tu10__SVInt8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za32_s8_s8(svint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za32,_s8_s8,)(1, zn, zm, zk, 3); } @@ -27,6 +34,11 @@ void test_svtmopa_lane_za32_s8_s8(svint8x2_t zn, svint8_t zm, svuint8_t zk) __ar // CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_u8_u811svuint8x2_tu11__SVUint8_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za32_u8_u8(svuint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za32,_u8_u8,)(1, zn, zm, zk, 3); } @@ -36,6 +48,11 @@ void test_svtmopa_lane_za32_u8_u8(svuint8x2_t zn, svuint8_t zm, svuint8_t zk) __ // CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_s8_u810svint8x2_tu11__SVUint8_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za32_s8_u8(svint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za32,_s8_u8,)(1, zn, zm, zk, 3); } @@ -45,6 +62,11 @@ void test_svtmopa_lane_za32_s8_u8(svint8x2_t zn, svuint8_t zm, svuint8_t zk) __a // CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_u8_s811svuint8x2_tu10__SVInt8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za32_u8_s8(svuint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za32,_u8_s8,)(1, zn, zm, zk, 3); } @@ -54,6 +76,11 @@ void test_svtmopa_lane_za32_u8_s8(svuint8x2_t zn, svint8_t zm, svuint8_t zk) __a // CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_s16_s1611svint16x2_tu11__SVInt16_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za32_s16_s16(svint16x2_t zn, svint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za32,_s16_s16,)(1, zn, zm, zk, 3); } @@ -63,6 +90,11 @@ void test_svtmopa_lane_za32_s16_s16(svint16x2_t zn, svint16_t zm, svuint8_t zk) // CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_u16_u1612svuint16x2_tu12__SVUint16_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za32,_u16_u16,)(1, zn, zm, zk, 3); } @@ -72,6 +104,11 @@ void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk // CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za32,_f16_f16,)(1, zn, zm, zk, 3); } @@ -81,6 +118,11 @@ void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f32_f3213svfloat32x2_tu13__SVFloat32_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za32,_f32_f32,)(1, zn, zm, zk, 3); } @@ -90,6 +132,11 @@ void test_svtmopa_lane_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm, svuint8_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za32_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za32,_bf16_bf16,)(1, zn, zm, zk, 3); } @@ -99,6 +146,11 @@ void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint // CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za16_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za16,_f16_f16,)(1, zn, zm, zk, 3); } @@ -108,6 +160,11 @@ void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za16_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za16,_bf16_bf16,)(1, zn, zm, zk, 3); } @@ -118,6 +175,12 @@ void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint // CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za16,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr); } @@ -128,6 +191,12 @@ void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint // CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // +// CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: ret void +// void test_svtmopa_lane_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svtmopa_lane_za32,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr); } diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp index afa650950d61e..953169b0f55b9 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp @@ -1,28 +1,19 @@ -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -target-feature +sme-f16f16 -target-feature +sme-b16b16 -verify -emit-llvm -o - %s +// RUN: %clang_cc1 -triple aarch64 \ +// RUN: -target-feature +sme -target-feature +sme2 -verify -emit-llvm -o - %s // REQUIRES: aarch64-registered-target #include -void test_features() __arm_streaming __arm_inout("za") { - svuint8x2_t zn_u8; - svint8x2_t zn_s8; - svuint8_t zm_u8; - svint8_t zm_s8; - svuint16x2_t zn_u16; - svint16x2_t zn_s16; - svuint16_t zm_u16; - svint16_t zm_s16; - svbfloat16x2_t zn_bf16; - svfloat16x2_t zn_f16; - svbfloat16_t zm_bf16; - svfloat16_t zm_f16; - svfloat32x2_t zn_f32; - svfloat32_t zm_f32; - svmfloat8x2_t zn_f8; - svmfloat8_t zm_f8; - fpm_t fpm = 0; - svuint8_t zk; +void test_features(svuint8x2_t zn_u8, svuint8_t zm_u8, + svint8x2_t zn_s8, svint8_t zm_s8, + svint16x2_t zn_s16, svint16_t zm_s16, + svuint16x2_t zn_u16, svuint16_t zm_u16, + svfloat16x2_t zn_f16, svfloat16_t zm_f16, + svbfloat16x2_t zn_bf16, svbfloat16_t zm_bf16, + svfloat32x2_t zn_f32, svfloat32_t zm_f32, + svmfloat8x2_t zn_f8, svmfloat8_t zm_f8, + svuint8_t zk, fpm_t fpm) __arm_streaming __arm_inout("za") { // expected-error@+1 {{'svtmopa_lane_za32_s8_s8' needs target feature sme,sme2,sme-tmop}} svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, 0); @@ -44,7 +35,7 @@ void test_features() __arm_streaming __arm_inout("za") { svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0); // expected-error@+1 {{'svtmopa_lane_za16_f16_f16' needs target feature sme,sme2,sme-tmop,sme-f16f16}} svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, 0); -// expected-error@+1 {{'svtmopa_lane_za16_bf16_bf16' needs target feature sme,sme2,sme-tmop,sme-f16f16}} +// expected-error@+1 {{'svtmopa_lane_za16_bf16_bf16' needs target feature sme,sme2,sme-tmop,sme-b16b16}} svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0); // expected-error@+1 {{'svtmopa_lane_za16_mf8_mf8_fpm' needs target feature sme,sme2,sme-tmop,sme-f8f16}} svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 0, fpm); @@ -52,25 +43,15 @@ void test_features() __arm_streaming __arm_inout("za") { svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 0, fpm); } -void test_imm() __arm_streaming __arm_inout("za") { - svuint8x2_t zn_u8; - svint8x2_t zn_s8; - svuint8_t zm_u8; - svint8_t zm_s8; - svuint16x2_t zn_u16; - svint16x2_t zn_s16; - svuint16_t zm_u16; - svint16_t zm_s16; - svbfloat16x2_t zn_bf16; - svfloat16x2_t zn_f16; - svbfloat16_t zm_bf16; - svfloat16_t zm_f16; - svfloat32x2_t zn_f32; - svfloat32_t zm_f32; - svmfloat8x2_t zn_f8; - svmfloat8_t zm_f8; - fpm_t fpm; - svuint8_t zk; +void test_imm(svuint8x2_t zn_u8, svuint8_t zm_u8, + svint8x2_t zn_s8, svint8_t zm_s8, + svint16x2_t zn_s16, svint16_t zm_s16, + svuint16x2_t zn_u16, svuint16_t zm_u16, + svfloat16x2_t zn_f16, svfloat16_t zm_f16, + svbfloat16x2_t zn_bf16, svbfloat16_t zm_bf16, + svfloat32x2_t zn_f32, svfloat32_t zm_f32, + svmfloat8x2_t zn_f8, svmfloat8_t zm_f8, + svuint8_t zk, fpm_t fpm) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, 4); diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 20f40b54a3122..86129eb53eb11 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -291,7 +291,7 @@ class SME2_ZA_Tile_Vec_Multi_Multi_Pat(name # _PSEUDO) $tile, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>; class SME2_ZA_TMOP_Pat - : Pat<(intrinsic tile_imm:$tile, vt:$Zn1, vt:$Zn2, vt:$Zm, (nxv16i8 ZK:$Zk), imm_ty:$idx), + : Pat<(intrinsic tile_imm:$tile, vt:$Zn1, vt:$Zn2, vt:$Zm, nxv16i8:$Zk, imm_ty:$idx), (!cast(name # _PSEUDO) $tile, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), $Zm, $Zk, $idx)>; From 66c5ca0e0270a8148e484eb075bb40fd0b86e3fb Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Fri, 11 Apr 2025 22:24:38 +0000 Subject: [PATCH 05/15] fixup! Remove some range tests, and improve SME2_ZA_TMOP_Pat --- .../acle_sme2_tmop.cpp | 52 ------------------- llvm/lib/Target/AArch64/SMEInstrFormats.td | 8 +-- 2 files changed, 4 insertions(+), 56 deletions(-) diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp index 953169b0f55b9..5c229d0825366 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp @@ -57,116 +57,64 @@ void test_imm(svuint8x2_t zn_u8, svuint8_t zm_u8, svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_s8_s8(4, zn_s8, zm_s8, zk, 0); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s8_s8(-1, zn_s8, zm_s8, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_u8_u8(4, zn_u8, zm_u8, zk, 0); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u8_u8(-1, zn_u8, zm_u8, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_s8_u8(4, zn_s8, zm_u8, zk, 0); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s8_u8(-1, zn_s8, zm_u8, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_u8_s8(4, zn_u8, zm_s8, zk, 0); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u8_s8(-1, zn_u8, zm_s8, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_s16_s16(4, zn_s16, zm_s16, zk, 0); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_s16_s16(-1, zn_s16, zm_s16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_u16_u16(4, zn_u16, zm_u16, zk, 0); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_u16_u16(-1, zn_u16, zm_u16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_f16_f16(4, zn_f16, zm_f16, zk, 0); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_f16_f16(-1, zn_f16, zm_f16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_f32_f32(0, zn_f32, zm_f32, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_f32_f32(4, zn_f32, zm_f32, zk, 0); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_f32_f32(0, zn_f32, zm_f32, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_f32_f32(-1, zn_f32, zm_f32, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, 4); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_bf16_bf16(4, zn_bf16, zm_bf16, zk, 0); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_bf16_bf16(-1, zn_bf16, zm_bf16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, 4); // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} svtmopa_lane_za16_f16_f16(2, zn_f16, zm_f16, zk, 0); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} - svtmopa_lane_za16_f16_f16(-1, zn_f16, zm_f16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, 4); // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} svtmopa_lane_za16_bf16_bf16(2, zn_bf16, zm_bf16, zk, 0); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, -1); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} - svtmopa_lane_za16_bf16_bf16(-1, zn_bf16, zm_bf16, zk, 0); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 4, fpm); // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} svtmopa_lane_za16_mf8_mf8_fpm(2, zn_f8, zm_f8, zk, 0, fpm); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, -1, fpm); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} - svtmopa_lane_za16_mf8_mf8_fpm(-1, zn_f8, zm_f8, zk, 0, fpm); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 4, fpm); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} svtmopa_lane_za32_mf8_mf8_fpm(4, zn_f8, zm_f8, zk, 0, fpm); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, -1, fpm); -// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - svtmopa_lane_za32_mf8_mf8_fpm(-1, zn_f8, zm_f8, zk, 0, fpm); } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 86129eb53eb11..8777cdc9a86d1 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -290,8 +290,8 @@ class SME2_ZA_Tile_Vec_Multi_Multi_Pat(name # _PSEUDO) $tile, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>; -class SME2_ZA_TMOP_Pat - : Pat<(intrinsic tile_imm:$tile, vt:$Zn1, vt:$Zn2, vt:$Zm, nxv16i8:$Zk, imm_ty:$idx), +class SME2_ZA_TMOP_Pat + : Pat<(intrinsic tile_imm:$tile, vt:$Zn1, vt:$Zn2, vt:$Zm, nxv16i8:$Zk, timm32_0_3:$idx), (!cast(name # _PSEUDO) $tile, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), $Zm, $Zk, $idx)>; @@ -3585,7 +3585,7 @@ multiclass sme_tmopa_16b opc, RegisterOperand zn_ty, RegisterOperand zm_ def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr; - def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_1, timm32_0_3, vt>; + def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_1, vt>; } multiclass sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, ValueType vt, string mnemonic, string intrinsic> { @@ -3595,7 +3595,7 @@ multiclass sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr; - def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_1, timm32_0_3, vt>; + def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_3, vt>; } //===----------------------------------------------------------------------===/// From 155ce5d1d7573329bbe0cba20fc2025fc9973ec0 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Sun, 13 Apr 2025 21:14:52 +0000 Subject: [PATCH 06/15] fixup! Fix remaining CR comments --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 26 +++++++++---------- llvm/lib/Target/AArch64/SMEInstrFormats.td | 20 ++++++++------ 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 349d05628261a..85658fa7f6e0b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3116,7 +3116,7 @@ let TargetPrefix = "aarch64" in { llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, - IntrNoMem, IntrHasSideEffects]>; + IntrInaccessibleMemOnly]>; def int_aarch64_sme_tmopa : SME_OuterProduct_TMOP_Intrinsic; def int_aarch64_sme_stmopa : SME_OuterProduct_TMOP_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 330dce018daa4..aa793aabbe5ef 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -175,12 +175,12 @@ let Predicates = [HasSME_MOP4, HasSMEI16I64] in { } let Predicates = [HasSME_TMOP] in { - defm STMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b00100, ZZ_b_mul_r, ZPR8, nxv16i8, "stmopa", "int_aarch64_sme_stmopa">; - defm STMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b00101, ZZ_h_mul_r, ZPR16, nxv8i16, "stmopa", "int_aarch64_sme_stmopa">; - defm UTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b11100, ZZ_b_mul_r, ZPR8, nxv16i8, "utmopa", "int_aarch64_sme_utmopa">; - defm UTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10101, ZZ_h_mul_r, ZPR16, nxv8i16, "utmopa", "int_aarch64_sme_utmopa">; - defm SUTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01100, ZZ_b_mul_r, ZPR8, nxv16i8, "sutmopa", "int_aarch64_sme_sutmopa">; - defm USTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b10100, ZZ_b_mul_r, ZPR8, nxv16i8, "ustmopa", "int_aarch64_sme_ustmopa">; + defm STMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b00100, ZZ_b_mul_r, ZPR8, nxv16i8, "stmopa", int_aarch64_sme_stmopa>; + defm STMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b00101, ZZ_h_mul_r, ZPR16, nxv8i16, "stmopa", int_aarch64_sme_stmopa>; + defm UTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b11100, ZZ_b_mul_r, ZPR8, nxv16i8, "utmopa", int_aarch64_sme_utmopa>; + defm UTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10101, ZZ_h_mul_r, ZPR16, nxv8i16, "utmopa", int_aarch64_sme_utmopa>; + defm SUTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01100, ZZ_b_mul_r, ZPR8, nxv16i8, "sutmopa", int_aarch64_sme_sutmopa>; + defm USTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b10100, ZZ_b_mul_r, ZPR8, nxv16i8, "ustmopa", int_aarch64_sme_ustmopa>; } let Predicates = [HasSME] in { @@ -1073,25 +1073,25 @@ let Predicates = [HasSME2p2] in { } // [HasSME2p2] let Predicates = [HasSME_TMOP] in { - defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", "int_aarch64_sme_tmopa">; - defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", "int_aarch64_sme_tmopa">; - defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", "int_aarch64_sme_tmopa">; + defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa>; + defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", int_aarch64_sme_tmopa>; + defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa>; } let Predicates = [HasSME_TMOP, HasSMEF16F16] in { - defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", "int_aarch64_sme_tmopa">; + defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa>; } let Predicates = [HasSME_TMOP, HasSMEB16B16] in { - defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", "int_aarch64_sme_tmopa">; + defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa>; } let Predicates = [HasSME_TMOP, HasSMEF8F32] in { - defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", "int_aarch64_sme_tmopa">; + defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_tmopa, [FPMR, FPCR]>; } let Predicates = [HasSME_TMOP, HasSMEF8F16] in { - defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", "int_aarch64_sme_tmopa">; + defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_tmopa, [FPMR, FPCR]>; } let Predicates = [HasSME_MOP4, HasSMEF8F16], Uses = [FPMR, FPCR] in { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 8777cdc9a86d1..e039ae31fdb88 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -3578,24 +3578,28 @@ class sme_int_sparse_outer_product_i32 opc, RegisterOperand zn_ty, Regis let Constraints = "$ZAda = $_ZAda"; } -multiclass sme_tmopa_16b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, ValueType vt, string mnemonic, string intrinsic> { +multiclass sme_tmopa_16b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, ValueType vt, string mnemonic, SDPatternOperator intrinsic, list uses=[]> { def NAME : sme_int_sparse_outer_product_i16, SMEPseudo2Instr { - let Uses = [FPMR, FPCR]; + let Uses = uses; } - def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr { + let mayStore = 1; + } - def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_1, vt>; + def : SME2_ZA_TMOP_Pat; } -multiclass sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, ValueType vt, string mnemonic, string intrinsic> { +multiclass sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ty, ValueType vt, string mnemonic, SDPatternOperator intrinsic, list uses=[]> { def NAME : sme_int_sparse_outer_product_i32, SMEPseudo2Instr { - let Uses = [FPMR, FPCR]; + let Uses = uses; } - def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr { + let mayStore = 1; + } - def : SME2_ZA_TMOP_Pat(intrinsic), timm32_0_3, vt>; + def : SME2_ZA_TMOP_Pat; } //===----------------------------------------------------------------------===/// From eddca2a3c2041180a23ac8880f991c0895ca1065 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Sun, 13 Apr 2025 22:25:13 +0000 Subject: [PATCH 07/15] fixup! Numbers in test weren't correctly updated - fix. --- .../test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir | 4 ++-- .../AArch64/emit_fneg_with_non_register_operand.mir | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir index 12c015f30ec33..3bbd1b5cd216d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir @@ -75,12 +75,12 @@ tracksRegLiveness: true body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_mixed_types - ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2883594 /* regdef:FPR32_with_hsub_in_FPR16_lo */, def %0, 3735562 /* regdef:GPR64 */, def %1 + ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0, 3735562 /* regdef:GPR64 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1 ; CHECK-NEXT: $d0 = COPY [[COPY1]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $d0 - INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0:gpr32common, 3735562 /* regdef:FPR64 */, def %1:fpr64 + INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0:gpr32common, 3735562 /* regdef:FPR64 */, def %1:fpr64 %3:_(s32) = COPY %0 %4:_(s64) = COPY %1 $d0 = COPY %4(s64) diff --git a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir index 251828b0b4f9b..615accc7f75e5 100644 --- a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir +++ b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir @@ -91,10 +91,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[LOADgot:%[0-9]+]]:gpr64common = LOADgot target-flags(aarch64-got) @c ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:GPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3670026 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY %2 ; CHECK-NEXT: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:GPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3670026 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) ; CHECK-NEXT: [[FNEGDr:%[0-9]+]]:fpr64 = FNEGDr %2 ; CHECK-NEXT: nofpexcept FCMPDrr %4, killed [[FNEGDr]], implicit-def $nzcv, implicit $fpcr ; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv @@ -111,10 +111,10 @@ body: | %6:gpr64common = LOADgot target-flags(aarch64-got) @c %3:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c) - INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, %3(tied-def 3) + INLINEASM &"", 1 /* sideeffect attdialect */, 3670026 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, %3(tied-def 3) %0:fpr64 = COPY %2 %5:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c) - INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, %5(tied-def 3) + INLINEASM &"", 1 /* sideeffect attdialect */, 3670026 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, %5(tied-def 3) %7:fpr64 = FNEGDr %2 nofpexcept FCMPDrr %4, killed %7, implicit-def $nzcv, implicit $fpcr Bcc 1, %bb.2, implicit $nzcv From 95a51321aa31647c3e243dc12d42b36dd2f29ac0 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Sun, 13 Apr 2025 22:28:50 +0000 Subject: [PATCH 08/15] fixup! Bother, another number requiring bit-twiddling --- llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir index 3bbd1b5cd216d..37d6e852429fe 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir @@ -75,12 +75,12 @@ tracksRegLiveness: true body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_mixed_types - ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0, 3735562 /* regdef:GPR64 */, def %1 + ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0, 3670026 /* regdef:FPR64 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1 ; CHECK-NEXT: $d0 = COPY [[COPY1]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $d0 - INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0:gpr32common, 3735562 /* regdef:FPR64 */, def %1:fpr64 + INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0:gpr32common, 3670026 /* regdef:FPR64 */, def %1:fpr64 %3:_(s32) = COPY %0 %4:_(s64) = COPY %1 $d0 = COPY %4(s64) From 25bcd216078579c79a4711801e82c68417041801 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Mon, 14 Apr 2025 11:26:54 +0000 Subject: [PATCH 09/15] fixup! Also set "mayLoad" --- llvm/lib/Target/AArch64/SMEInstrFormats.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index e039ae31fdb88..8e79fb0850f20 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -3584,6 +3584,7 @@ multiclass sme_tmopa_16b opc, RegisterOperand zn_ty, RegisterOperand zm_ } def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr { + let mayLoad = 1; let mayStore = 1; } @@ -3596,6 +3597,7 @@ multiclass sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ } def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr { + let mayLoad = 1; let mayStore = 1; } From 66ff772e1343f852551100fc68f90d710d8588d5 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Mon, 14 Apr 2025 12:00:22 +0000 Subject: [PATCH 10/15] fixup! Move mayLoad/mayStore to reduce code duplication --- llvm/lib/Target/AArch64/SMEInstrFormats.td | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 8e79fb0850f20..3b0e068cd3c1a 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -110,6 +110,8 @@ class sme_sparse_outer_product_pseudo @@ -3583,10 +3585,7 @@ multiclass sme_tmopa_16b opc, RegisterOperand zn_ty, RegisterOperand zm_ let Uses = uses; } - def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr { - let mayLoad = 1; - let mayStore = 1; - } + def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr; def : SME2_ZA_TMOP_Pat; } @@ -3596,10 +3595,7 @@ multiclass sme_tmopa_32b opc, RegisterOperand zn_ty, RegisterOperand zm_ let Uses = uses; } - def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr { - let mayLoad = 1; - let mayStore = 1; - } + def NAME # _PSEUDO : sme_sparse_outer_product_pseudo, SMEPseudo2Instr; def : SME2_ZA_TMOP_Pat; } From b7bfd9b376e8bfa771127761e92b55f6fda7457b Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Tue, 15 Apr 2025 09:04:24 +0000 Subject: [PATCH 11/15] fixup! Add FPCR for all FP TMOP instructions --- llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index aa793aabbe5ef..d969f43657f4b 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1073,17 +1073,17 @@ let Predicates = [HasSME2p2] in { } // [HasSME2p2] let Predicates = [HasSME_TMOP] in { - defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa>; - defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", int_aarch64_sme_tmopa>; - defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa>; + defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa, [FPCR]>; + defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", int_aarch64_sme_tmopa, [FPCR]>; + defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa, [FPCR]>; } let Predicates = [HasSME_TMOP, HasSMEF16F16] in { - defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa>; + defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa, [FPCR]>; } let Predicates = [HasSME_TMOP, HasSMEB16B16] in { - defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa>; + defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa, [FPCR]>; } let Predicates = [HasSME_TMOP, HasSMEF8F32] in { From 8ff30861c5df054550e6951b9797eeb425a63f4c Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Tue, 15 Apr 2025 11:38:58 +0000 Subject: [PATCH 12/15] fixup! Address Carol's CR comments --- clang/include/clang/Basic/arm_sme.td | 19 ++-- .../AArch64/sme2-intrinsics/acle_sme2_tmop.c | 52 +++++----- llvm/include/llvm/IR/IntrinsicsAArch64.td | 13 ++- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 53 +++++------ .../CodeGen/AArch64/sme2-intrinsics-tmop.ll | 94 ++++++++++++------- 5 files changed, 128 insertions(+), 103 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 037a7eaa39a34..7998822483205 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -911,27 +911,28 @@ let SMETargetGuard = "sme-f16f16" in { // SME2 - TMOP, SUTMOP, USTMOP let SMETargetGuard = "sme2,sme-tmop" in { - def SVTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hbf", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_stmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_utmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - def SVSUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2u[i", "c", MergeNone, "aarch64_sme_sutmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - def SVUSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2x[i", "Uc", MergeNone, "aarch64_sme_ustmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVFTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hf", MergeNone, "aarch64_sme_za32_ftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVBTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "b", MergeNone, "aarch64_sme_za32_bftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_za32_stmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_za32_utmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVSUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2u[i", "c", MergeNone, "aarch64_sme_za32_sutmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVUSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2x[i", "Uc", MergeNone, "aarch64_sme_za32_ustmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f16f16" in { - def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "h", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "h", MergeNone, "aarch64_sme_za16_ftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-b16b16" in { - def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "b", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "b", MergeNone, "aarch64_sme_za16_bftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in { - def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_za16_ftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in { - def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_za32_ftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; } multiclass ZAReadz ch> { diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c index 37c24c1672911..eae2533f9bfa8 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c @@ -17,12 +17,12 @@ // CHECK-LABEL: @test_svtmopa_lane_za32_s8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.stmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_s8_s810svint8x2_tu10__SVInt8_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.stmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_s8_s8(svint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -31,12 +31,12 @@ void test_svtmopa_lane_za32_s8_s8(svint8x2_t zn, svint8_t zm, svuint8_t zk) __ar // CHECK-LABEL: @test_svtmopa_lane_za32_u8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.utmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_u8_u811svuint8x2_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.utmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_u8_u8(svuint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -45,12 +45,12 @@ void test_svtmopa_lane_za32_u8_u8(svuint8x2_t zn, svuint8_t zm, svuint8_t zk) __ // CHECK-LABEL: @test_svtmopa_lane_za32_s8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.sutmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_s8_u810svint8x2_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.sutmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_s8_u8(svint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -59,12 +59,12 @@ void test_svtmopa_lane_za32_s8_u8(svint8x2_t zn, svuint8_t zm, svuint8_t zk) __a // CHECK-LABEL: @test_svtmopa_lane_za32_u8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ustmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_u8_s811svuint8x2_tu10__SVInt8_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ustmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_u8_s8(svuint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -73,12 +73,12 @@ void test_svtmopa_lane_za32_u8_s8(svuint8x2_t zn, svint8_t zm, svuint8_t zk) __a // CHECK-LABEL: @test_svtmopa_lane_za32_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.stmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_s16_s1611svint16x2_tu11__SVInt16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.stmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_s16_s16(svint16x2_t zn, svint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -87,12 +87,12 @@ void test_svtmopa_lane_za32_s16_s16(svint16x2_t zn, svint16_t zm, svuint8_t zk) // CHECK-LABEL: @test_svtmopa_lane_za32_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.utmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_u16_u1612svuint16x2_tu12__SVUint16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.utmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -101,12 +101,12 @@ void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk // CHECK-LABEL: @test_svtmopa_lane_za32_f16_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -115,12 +115,12 @@ void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t // CHECK-LABEL: @test_svtmopa_lane_za32_f32_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f32_f3213svfloat32x2_tu13__SVFloat32_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -129,12 +129,12 @@ void test_svtmopa_lane_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm, svuint8_t // CHECK-LABEL: @test_svtmopa_lane_za32_bf16_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.bftmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za32_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.bftmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -143,12 +143,12 @@ void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint // CHECK-LABEL: @test_svtmopa_lane_za16_f16_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.ftmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za16_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.ftmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -157,12 +157,12 @@ void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t // CHECK-LABEL: @test_svtmopa_lane_za16_bf16_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.bftmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za16_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.bftmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -172,13 +172,13 @@ void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint // CHECK-LABEL: @test_svtmopa_lane_za16_mf8_mf8_fpm( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.ftmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.ftmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { @@ -188,13 +188,13 @@ void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint // CHECK-LABEL: @test_svtmopa_lane_za32_mf8_mf8_fpm( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 85658fa7f6e0b..adb1adf868767 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3118,11 +3118,14 @@ let TargetPrefix = "aarch64" in { [ImmArg>, ImmArg>, IntrInaccessibleMemOnly]>; - def int_aarch64_sme_tmopa : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_stmopa : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_utmopa : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_sutmopa : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_ustmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_za16_ftmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_za16_bftmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_za32_ftmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_za32_bftmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_za32_stmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_za32_utmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_za32_sutmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_za32_ustmopa : SME_OuterProduct_TMOP_Intrinsic; class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index d969f43657f4b..86857834f7b81 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -175,12 +175,31 @@ let Predicates = [HasSME_MOP4, HasSMEI16I64] in { } let Predicates = [HasSME_TMOP] in { - defm STMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b00100, ZZ_b_mul_r, ZPR8, nxv16i8, "stmopa", int_aarch64_sme_stmopa>; - defm STMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b00101, ZZ_h_mul_r, ZPR16, nxv8i16, "stmopa", int_aarch64_sme_stmopa>; - defm UTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b11100, ZZ_b_mul_r, ZPR8, nxv16i8, "utmopa", int_aarch64_sme_utmopa>; - defm UTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10101, ZZ_h_mul_r, ZPR16, nxv8i16, "utmopa", int_aarch64_sme_utmopa>; - defm SUTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01100, ZZ_b_mul_r, ZPR8, nxv16i8, "sutmopa", int_aarch64_sme_sutmopa>; - defm USTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b10100, ZZ_b_mul_r, ZPR8, nxv16i8, "ustmopa", int_aarch64_sme_ustmopa>; + defm STMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b00100, ZZ_b_mul_r, ZPR8, nxv16i8, "stmopa", int_aarch64_sme_za32_stmopa>; + defm STMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b00101, ZZ_h_mul_r, ZPR16, nxv8i16, "stmopa", int_aarch64_sme_za32_stmopa>; + defm UTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b11100, ZZ_b_mul_r, ZPR8, nxv16i8, "utmopa", int_aarch64_sme_za32_utmopa>; + defm UTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10101, ZZ_h_mul_r, ZPR16, nxv8i16, "utmopa", int_aarch64_sme_za32_utmopa>; + defm SUTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01100, ZZ_b_mul_r, ZPR8, nxv16i8, "sutmopa", int_aarch64_sme_za32_sutmopa>; + defm USTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b10100, ZZ_b_mul_r, ZPR8, nxv16i8, "ustmopa", int_aarch64_sme_za32_ustmopa>; + defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_za32_ftmopa, [FPCR]>; + defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", int_aarch64_sme_za32_ftmopa, [FPCR]>; + defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_za32_bftmopa, [FPCR]>; +} + +let Predicates = [HasSME_TMOP, HasSMEF16F16] in { + defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_za16_ftmopa, [FPCR]>; +} + +let Predicates = [HasSME_TMOP, HasSMEB16B16] in { + defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_za16_bftmopa, [FPCR]>; +} + +let Predicates = [HasSME_TMOP, HasSMEF8F32] in { + defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_za32_ftmopa, [FPMR, FPCR]>; +} + +let Predicates = [HasSME_TMOP, HasSMEF8F16] in { + defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_za16_ftmopa, [FPMR, FPCR]>; } let Predicates = [HasSME] in { @@ -1072,28 +1091,6 @@ let Predicates = [HasSME2p2] in { } // [HasSME2p2] -let Predicates = [HasSME_TMOP] in { - defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa, [FPCR]>; - defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", int_aarch64_sme_tmopa, [FPCR]>; - defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa, [FPCR]>; -} - -let Predicates = [HasSME_TMOP, HasSMEF16F16] in { - defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa, [FPCR]>; -} - -let Predicates = [HasSME_TMOP, HasSMEB16B16] in { - defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa, [FPCR]>; -} - -let Predicates = [HasSME_TMOP, HasSMEF8F32] in { - defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_tmopa, [FPMR, FPCR]>; -} - -let Predicates = [HasSME_TMOP, HasSMEF8F16] in { - defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_tmopa, [FPMR, FPCR]>; -} - let Predicates = [HasSME_MOP4, HasSMEF8F16], Uses = [FPMR, FPCR] in { defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a">; } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll index 856b1780c508c..da44f2446a18a 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll @@ -3,135 +3,159 @@ target triple = "aarch64-linux" -define void @tmopa_za32_s8( %zn1, %zn2, %zm, %zk) #0 { -; CHECK-LABEL: tmopa_za32_s8: +define void @stmopa_za32_s8( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: stmopa_za32_s8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z28.d, z3.d ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: stmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.stmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.za32.stmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } -define void @tmopa_za32_u8( %zn1, %zn2, %zm, %zk) #0 { -; CHECK-LABEL: tmopa_za32_u8: +define void @utmopa_za32_u8( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: utmopa_za32_u8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z28.d, z3.d ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: utmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.utmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.za32.utmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } -define void @tmopa_za32_u8_s8( %zn1, %zn2, %zm, %zk) #0 { -; CHECK-LABEL: tmopa_za32_u8_s8: +define void @ustmopa_za32_u8_s8( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: ustmopa_za32_u8_s8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z28.d, z3.d ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ustmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.ustmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.za32.ustmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } -define void @tmopa_za32_s8_u8( %zn1, %zn2, %zm, %zk) #0 { -; CHECK-LABEL: tmopa_za32_s8_u8: +define void @sutmopa_za32_s8_u8( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: sutmopa_za32_s8_u8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z28.d, z3.d ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sutmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sutmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.za32.sutmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } -define void @tmopa_za32_s16( %zn1, %zn2, %zm, %zk) #0 { -; CHECK-LABEL: tmopa_za32_s16: +define void @stmopa_za32_s16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: stmopa_za32_s16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z28.d, z3.d ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: stmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.stmopa.nxv8i16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.za32.stmopa.nxv8i16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } -define void @tmopa_za32_u16( %zn1, %zn2, %zm, %zk) #0 { -; CHECK-LABEL: tmopa_za32_u16: +define void @utmopa_za32_u16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: utmopa_za32_u16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z28.d, z3.d ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: utmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.utmopa.nxv8i16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.za32.utmopa.nxv8i16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } -define void @tmopa_za32_f16( %zn1, %zn2, %zm, %zk) #0 { -; CHECK-LABEL: tmopa_za32_f16: +define void @ftmopa_za32_f16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: ftmopa_za32_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z28.d, z3.d ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.za32.ftmopa.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } -define void @tmopa_za32_bf16( %zn1, %zn2, %zm, %zk) #0 { -; CHECK-LABEL: tmopa_za32_bf16: +define void @bftmopa_za32_bf16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: bftmopa_za32_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z28.d, z3.d ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.za32.bftmopa.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } -define void @tmopa_za32_f32( %zn1, %zn2, %zm, %zk) #0 { -; CHECK-LABEL: tmopa_za32_f32: +define void @ftmopa_za32_f32( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: ftmopa_za32_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z28.d, z3.d ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ftmopa za0.s, { z0.s, z1.s }, z2.s, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.tmopa.nxv4f32(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.za32.ftmopa.nxv4f32(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } -define void @tmopa_za16_f16( %zn1, %zn2, %zm, %zk) #0 { -; CHECK-LABEL: tmopa_za16_f16: +define void @ftmopa_za16_f16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: ftmopa_za16_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z28.d, z3.d ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: ftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ftmopa za0.h, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.za16.ftmopa.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } -define void @tmopa_za16_bf16( %zn1, %zn2, %zm, %zk) #0 { -; CHECK-LABEL: tmopa_za16_bf16: +define void @bftmopa_za16_bf16( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: bftmopa_za16_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z28.d, z3.d ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: bftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: bftmopa za0.h, { z0.h, z1.h }, z2.h, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.za16.bftmopa.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @ftmopa_za16_f8( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: ftmopa_za16_f8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: ftmopa za0.h, { z0.b, z1.b }, z2.b, z28[0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.za16.ftmopa.nxv16if8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + ret void +} + +define void @ftmopa_za32_f8( %zn1, %zn2, %zm, %zk) #0 { +; CHECK-LABEL: ftmopa_za32_f8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z28.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: ftmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.za32.ftmopa.nxv16if8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } From b6b98f2915e27d10622d12be01fe3bf989a6fbaa Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Tue, 15 Apr 2025 15:12:25 +0000 Subject: [PATCH 13/15] fixup! Rename intrinsics --- clang/include/clang/Basic/arm_sme.td | 19 ++++--- .../AArch64/sme2-intrinsics/acle_sme2_tmop.c | 52 +++++++++---------- llvm/include/llvm/IR/IntrinsicsAArch64.td | 14 +++-- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 30 +++++------ .../CodeGen/AArch64/sme2-intrinsics-tmop.ll | 26 +++++----- 5 files changed, 69 insertions(+), 72 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 7998822483205..41bbe8eb64ed5 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -911,28 +911,27 @@ let SMETargetGuard = "sme-f16f16" in { // SME2 - TMOP, SUTMOP, USTMOP let SMETargetGuard = "sme2,sme-tmop" in { - def SVFTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hf", MergeNone, "aarch64_sme_za32_ftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - def SVBTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "b", MergeNone, "aarch64_sme_za32_bftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_za32_stmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_za32_utmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - def SVSUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2u[i", "c", MergeNone, "aarch64_sme_za32_sutmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; - def SVUSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2x[i", "Uc", MergeNone, "aarch64_sme_za32_ustmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hbf", MergeNone, "aarch64_sme_tmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_stmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_utmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVSUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2u[i", "c", MergeNone, "aarch64_sme_sutmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVUSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2x[i", "Uc", MergeNone, "aarch64_sme_ustmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f16f16" in { - def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "h", MergeNone, "aarch64_sme_za16_ftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "h", MergeNone, "aarch64_sme_tmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-b16b16" in { - def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "b", MergeNone, "aarch64_sme_za16_bftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "b", MergeNone, "aarch64_sme_tmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in { - def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_za16_ftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_tmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in { - def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_za32_ftmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_tmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; } multiclass ZAReadz ch> { diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c index eae2533f9bfa8..a1f37d4fa8492 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c @@ -17,12 +17,12 @@ // CHECK-LABEL: @test_svtmopa_lane_za32_s8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.stmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_s8_s810svint8x2_tu10__SVInt8_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.stmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_s8_s8(svint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -31,12 +31,12 @@ void test_svtmopa_lane_za32_s8_s8(svint8x2_t zn, svint8_t zm, svuint8_t zk) __ar // CHECK-LABEL: @test_svtmopa_lane_za32_u8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.utmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_u8_u811svuint8x2_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.utmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_u8_u8(svuint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -45,12 +45,12 @@ void test_svtmopa_lane_za32_u8_u8(svuint8x2_t zn, svuint8_t zm, svuint8_t zk) __ // CHECK-LABEL: @test_svtmopa_lane_za32_s8_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.sutmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_s8_u810svint8x2_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.sutmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_s8_u8(svint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -59,12 +59,12 @@ void test_svtmopa_lane_za32_s8_u8(svint8x2_t zn, svuint8_t zm, svuint8_t zk) __a // CHECK-LABEL: @test_svtmopa_lane_za32_u8_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ustmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_u8_s811svuint8x2_tu10__SVInt8_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ustmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_u8_s8(svuint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -73,12 +73,12 @@ void test_svtmopa_lane_za32_u8_s8(svuint8x2_t zn, svint8_t zm, svuint8_t zk) __a // CHECK-LABEL: @test_svtmopa_lane_za32_s16_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.stmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_s16_s1611svint16x2_tu11__SVInt16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.stmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_s16_s16(svint16x2_t zn, svint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -87,12 +87,12 @@ void test_svtmopa_lane_za32_s16_s16(svint16x2_t zn, svint16_t zm, svuint8_t zk) // CHECK-LABEL: @test_svtmopa_lane_za32_u16_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.utmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_u16_u1612svuint16x2_tu12__SVUint16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.utmopa.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv8i16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -101,12 +101,12 @@ void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk // CHECK-LABEL: @test_svtmopa_lane_za32_f16_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -115,12 +115,12 @@ void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t // CHECK-LABEL: @test_svtmopa_lane_za32_f32_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f32_f3213svfloat32x2_tu13__SVFloat32_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -129,12 +129,12 @@ void test_svtmopa_lane_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm, svuint8_t // CHECK-LABEL: @test_svtmopa_lane_za32_bf16_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.bftmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za32_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.bftmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -143,12 +143,12 @@ void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint // CHECK-LABEL: @test_svtmopa_lane_za16_f16_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.ftmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za16_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.ftmopa.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -157,12 +157,12 @@ void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t // CHECK-LABEL: @test_svtmopa_lane_za16_bf16_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.bftmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za16_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.bftmopa.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -172,13 +172,13 @@ void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint // CHECK-LABEL: @test_svtmopa_lane_za16_mf8_mf8_fpm( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.ftmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za16.ftmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { @@ -188,13 +188,13 @@ void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint // CHECK-LABEL: @test_svtmopa_lane_za32_mf8_mf8_fpm( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.za32.ftmopa.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index adb1adf868767..668b7447e849b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3118,14 +3118,12 @@ let TargetPrefix = "aarch64" in { [ImmArg>, ImmArg>, IntrInaccessibleMemOnly]>; - def int_aarch64_sme_za16_ftmopa : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_za16_bftmopa : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_za32_ftmopa : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_za32_bftmopa : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_za32_stmopa : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_za32_utmopa : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_za32_sutmopa : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_za32_ustmopa : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_tmopa_za16 : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_tmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_stmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_utmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_sutmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_ustmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 86857834f7b81..b1f906ae8db5a 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -175,31 +175,31 @@ let Predicates = [HasSME_MOP4, HasSMEI16I64] in { } let Predicates = [HasSME_TMOP] in { - defm STMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b00100, ZZ_b_mul_r, ZPR8, nxv16i8, "stmopa", int_aarch64_sme_za32_stmopa>; - defm STMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b00101, ZZ_h_mul_r, ZPR16, nxv8i16, "stmopa", int_aarch64_sme_za32_stmopa>; - defm UTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b11100, ZZ_b_mul_r, ZPR8, nxv16i8, "utmopa", int_aarch64_sme_za32_utmopa>; - defm UTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10101, ZZ_h_mul_r, ZPR16, nxv8i16, "utmopa", int_aarch64_sme_za32_utmopa>; - defm SUTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01100, ZZ_b_mul_r, ZPR8, nxv16i8, "sutmopa", int_aarch64_sme_za32_sutmopa>; - defm USTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b10100, ZZ_b_mul_r, ZPR8, nxv16i8, "ustmopa", int_aarch64_sme_za32_ustmopa>; - defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_za32_ftmopa, [FPCR]>; - defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", int_aarch64_sme_za32_ftmopa, [FPCR]>; - defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_za32_bftmopa, [FPCR]>; + defm STMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b00100, ZZ_b_mul_r, ZPR8, nxv16i8, "stmopa", int_aarch64_sme_stmopa_za32>; + defm STMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b00101, ZZ_h_mul_r, ZPR16, nxv8i16, "stmopa", int_aarch64_sme_stmopa_za32>; + defm UTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b11100, ZZ_b_mul_r, ZPR8, nxv16i8, "utmopa", int_aarch64_sme_utmopa_za32>; + defm UTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10101, ZZ_h_mul_r, ZPR16, nxv8i16, "utmopa", int_aarch64_sme_utmopa_za32>; + defm SUTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01100, ZZ_b_mul_r, ZPR8, nxv16i8, "sutmopa", int_aarch64_sme_sutmopa_za32>; + defm USTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b10100, ZZ_b_mul_r, ZPR8, nxv16i8, "ustmopa", int_aarch64_sme_ustmopa_za32>; + defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa_za32, [FPCR]>; + defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", int_aarch64_sme_tmopa_za32, [FPCR]>; + defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa_za32, [FPCR]>; } let Predicates = [HasSME_TMOP, HasSMEF16F16] in { - defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_za16_ftmopa, [FPCR]>; + defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa_za16, [FPCR]>; } let Predicates = [HasSME_TMOP, HasSMEB16B16] in { - defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_za16_bftmopa, [FPCR]>; + defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa_za16, [FPCR]>; } -let Predicates = [HasSME_TMOP, HasSMEF8F32] in { - defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_za32_ftmopa, [FPMR, FPCR]>; +let Predicates = [HasSME_TMOP, HasSMEF8F16] in { + defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_tmopa_za16, [FPMR, FPCR]>; } -let Predicates = [HasSME_TMOP, HasSMEF8F16] in { - defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_za16_ftmopa, [FPMR, FPCR]>; +let Predicates = [HasSME_TMOP, HasSMEF8F32] in { + defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_tmopa_za32, [FPMR, FPCR]>; } let Predicates = [HasSME] in { diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll index da44f2446a18a..ed249c984a2f8 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll @@ -11,7 +11,7 @@ define void @stmopa_za32_s8( %zn1, %zn2, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.stmopa.za32.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -23,7 +23,7 @@ define void @utmopa_za32_u8( %zn1, %zn2, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.utmopa.za32.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -35,7 +35,7 @@ define void @ustmopa_za32_u8_s8( %zn1, %zn2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ustmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.za32.ustmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.ustmopa.za32.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -47,7 +47,7 @@ define void @sutmopa_za32_s8_u8( %zn1, %zn2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sutmopa za0.s, { z0.b, z1.b }, z2.b, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.za32.sutmopa.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.sutmopa.za32.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -59,7 +59,7 @@ define void @stmopa_za32_s16( %zn1, %zn2, < ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: stmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.za32.stmopa.nxv8i16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.stmopa.za32.nxv8i16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -71,7 +71,7 @@ define void @utmopa_za32_u16( %zn1, %zn2, < ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: utmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.za32.utmopa.nxv8i16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.utmopa.za32.nxv8i16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -83,7 +83,7 @@ define void @ftmopa_za32_f16( %zn1, %zn2, ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.za32.ftmopa.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.tmopa.za32.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -95,7 +95,7 @@ define void @bftmopa_za32_bf16( %zn1, ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.za32.bftmopa.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.tmopa.za32.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -107,7 +107,7 @@ define void @ftmopa_za32_f32( %zn1, %zn ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ftmopa za0.s, { z0.s, z1.s }, z2.s, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.za32.ftmopa.nxv4f32(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.tmopa.za32.nxv4f32(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -119,7 +119,7 @@ define void @ftmopa_za16_f16( %zn1, %zn2, ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ftmopa za0.h, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.za16.ftmopa.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.tmopa.za16.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -131,7 +131,7 @@ define void @bftmopa_za16_bf16( %zn1, ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bftmopa za0.h, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.za16.bftmopa.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.tmopa.za16.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -143,7 +143,7 @@ define void @ftmopa_za16_f8( %zn1, %zn2, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.tmopa.za16.nxv16if8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -155,7 +155,7 @@ define void @ftmopa_za32_f8( %zn1, %zn2, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.tmopa.za32.nxv16if8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } From ea5d22befcde1c782e29e189fa6d822aeab088df Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Wed, 16 Apr 2025 08:49:46 +0000 Subject: [PATCH 14/15] fixup! Fix minor nits --- clang/include/clang/Basic/arm_sme.td | 6 +++--- .../CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c | 12 ++++++------ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td | 6 +++--- llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll | 8 ++++---- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 41bbe8eb64ed5..403c9c6e92c8d 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -919,15 +919,15 @@ let SMETargetGuard = "sme2,sme-tmop" in { } let SMETargetGuard = "sme2,sme-tmop,sme-f16f16" in { - def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "h", MergeNone, "aarch64_sme_tmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "h", MergeNone, "aarch64_sme_ftmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-b16b16" in { - def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "b", MergeNone, "aarch64_sme_tmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "b", MergeNone, "aarch64_sme_ftmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in { - def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_tmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_ftmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in { diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c index a1f37d4fa8492..f3b3892dfe6a8 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c @@ -143,12 +143,12 @@ void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint // CHECK-LABEL: @test_svtmopa_lane_za16_f16_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za16_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -157,12 +157,12 @@ void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t // CHECK-LABEL: @test_svtmopa_lane_za16_bf16_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za16_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -172,13 +172,13 @@ void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint // CHECK-LABEL: @test_svtmopa_lane_za16_mf8_mf8_fpm( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za16.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 668b7447e849b..ce552f3f01b07 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3118,7 +3118,7 @@ let TargetPrefix = "aarch64" in { [ImmArg>, ImmArg>, IntrInaccessibleMemOnly]>; - def int_aarch64_sme_tmopa_za16 : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_ftmopa_za16 : SME_OuterProduct_TMOP_Intrinsic; def int_aarch64_sme_tmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; def int_aarch64_sme_stmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; def int_aarch64_sme_utmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index b1f906ae8db5a..ace6361895a64 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -187,15 +187,15 @@ let Predicates = [HasSME_TMOP] in { } let Predicates = [HasSME_TMOP, HasSMEF16F16] in { - defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa_za16, [FPCR]>; + defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_ftmopa_za16, [FPCR]>; } let Predicates = [HasSME_TMOP, HasSMEB16B16] in { - defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa_za16, [FPCR]>; + defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_ftmopa_za16, [FPCR]>; } let Predicates = [HasSME_TMOP, HasSMEF8F16] in { - defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_tmopa_za16, [FPMR, FPCR]>; + defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_ftmopa_za16, [FPMR, FPCR]>; } let Predicates = [HasSME_TMOP, HasSMEF8F32] in { diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll index ed249c984a2f8..61f8f7be7ef5f 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll @@ -119,7 +119,7 @@ define void @ftmopa_za16_f16( %zn1, %zn2, ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ftmopa za0.h, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.tmopa.za16.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.ftmopa.za16.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -131,7 +131,7 @@ define void @bftmopa_za16_bf16( %zn1, ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bftmopa za0.h, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.tmopa.za16.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.ftmopa.za16.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -143,7 +143,7 @@ define void @ftmopa_za16_f8( %zn1, %zn2, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -155,7 +155,7 @@ define void @ftmopa_za32_f8( %zn1, %zn2, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.tmopa.za32.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } From 8f3e036051d982bf49c264f6bf9592aff2af5b9a Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Wed, 16 Apr 2025 09:03:44 +0000 Subject: [PATCH 15/15] fixup! One more intrinsic rename --- clang/include/clang/Basic/arm_sme.td | 4 ++-- .../AArch64/sme2-intrinsics/acle_sme2_tmop.c | 16 ++++++++-------- llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td | 8 ++++---- .../test/CodeGen/AArch64/sme2-intrinsics-tmop.ll | 8 ++++---- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 403c9c6e92c8d..3647fc7a27d83 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -911,7 +911,7 @@ let SMETargetGuard = "sme-f16f16" in { // SME2 - TMOP, SUTMOP, USTMOP let SMETargetGuard = "sme2,sme-tmop" in { - def SVTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hbf", MergeNone, "aarch64_sme_tmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hbf", MergeNone, "aarch64_sme_ftmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_stmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_utmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; def SVSUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2u[i", "c", MergeNone, "aarch64_sme_sutmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; @@ -931,7 +931,7 @@ let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in { } let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in { - def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_tmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; + def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_ftmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; } multiclass ZAReadz ch> { diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c index f3b3892dfe6a8..55d0074663bc9 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c @@ -101,12 +101,12 @@ void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk // CHECK-LABEL: @test_svtmopa_lane_za32_f16_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8f16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -115,12 +115,12 @@ void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t // CHECK-LABEL: @test_svtmopa_lane_za32_f32_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f32_f3213svfloat32x2_tu13__SVFloat32_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv4f32(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -129,12 +129,12 @@ void test_svtmopa_lane_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm, svuint8_t // CHECK-LABEL: @test_svtmopa_lane_za32_bf16_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za32_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8bf16(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") { @@ -188,13 +188,13 @@ void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint // CHECK-LABEL: @test_svtmopa_lane_za32_mf8_mf8_fpm( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 1, [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], [[ZK:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svtmopa_lane_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index ce552f3f01b07..519e92c8b340d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3119,7 +3119,7 @@ let TargetPrefix = "aarch64" in { IntrInaccessibleMemOnly]>; def int_aarch64_sme_ftmopa_za16 : SME_OuterProduct_TMOP_Intrinsic; - def int_aarch64_sme_tmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; + def int_aarch64_sme_ftmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; def int_aarch64_sme_stmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; def int_aarch64_sme_utmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; def int_aarch64_sme_sutmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index ace6361895a64..a83b17f7c9000 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -181,9 +181,9 @@ let Predicates = [HasSME_TMOP] in { defm UTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10101, ZZ_h_mul_r, ZPR16, nxv8i16, "utmopa", int_aarch64_sme_utmopa_za32>; defm SUTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01100, ZZ_b_mul_r, ZPR8, nxv16i8, "sutmopa", int_aarch64_sme_sutmopa_za32>; defm USTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b10100, ZZ_b_mul_r, ZPR8, nxv16i8, "ustmopa", int_aarch64_sme_ustmopa_za32>; - defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_tmopa_za32, [FPCR]>; - defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", int_aarch64_sme_tmopa_za32, [FPCR]>; - defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_tmopa_za32, [FPCR]>; + defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_ftmopa_za32, [FPCR]>; + defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", int_aarch64_sme_ftmopa_za32, [FPCR]>; + defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_ftmopa_za32, [FPCR]>; } let Predicates = [HasSME_TMOP, HasSMEF16F16] in { @@ -199,7 +199,7 @@ let Predicates = [HasSME_TMOP, HasSMEF8F16] in { } let Predicates = [HasSME_TMOP, HasSMEF8F32] in { - defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_tmopa_za32, [FPMR, FPCR]>; + defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_ftmopa_za32, [FPMR, FPCR]>; } let Predicates = [HasSME] in { diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll index 61f8f7be7ef5f..efd5f951eced3 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll @@ -83,7 +83,7 @@ define void @ftmopa_za32_f16( %zn1, %zn2, ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.tmopa.za32.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.ftmopa.za32.nxv8f16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -95,7 +95,7 @@ define void @bftmopa_za32_bf16( %zn1, ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.tmopa.za32.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.ftmopa.za32.nxv8bf16(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -107,7 +107,7 @@ define void @ftmopa_za32_f32( %zn1, %zn ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ftmopa za0.s, { z0.s, z1.s }, z2.s, z28[0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.tmopa.za32.nxv4f32(i32 0, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.ftmopa.za32.nxv4f32(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void } @@ -155,7 +155,7 @@ define void @ftmopa_za32_f8( %zn1, %zn2, %zn1, %zn2, %zm, %zk, i32 0) + call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 0, %zn1, %zn2, %zm, %zk, i32 0) ret void }