Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ defm SVMOPS : ZAFPOuterProd<"mops">;
multiclass MOP4<string mode, string za, string t, string i, list<ImmCheck> checks> {
def _1x1 : Inst<"svmop4" # mode # "[_1x1]" # za # "[_{d}_{d}]", "vidd", t, MergeNone, i # "_1x1", [IsInOutZA, IsStreaming], checks>;
def _1x2 : Inst<"svmop4" # mode # "[_1x2]" # za # "[_{d}_{d}]", "vid2", t, MergeNone, i # "_1x2", [IsInOutZA, IsStreaming], checks>;
def _2x1 : Inst<"svmop4" # mode # "[_2x1]" # za # "[_{d}_{d}]", "vi2d", t, MergeNone, i # "_2x1", [IsInOutZA, IsStreaming], checks>;
}

let SMETargetGuard = "sme2,sme-mop4" in {
Expand Down Expand Up @@ -350,6 +351,10 @@ multiclass SUMOP4<string mode, string za, string t, string i, list<ImmCheck> che
"vid2.u", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_1x2",
[IsStreaming, IsInOutZA],
checks>;
def _2x1 : SInst<"svmop4" # mode # "[_2x1]" # za # "[_{d}_{3}]",
"vi2u", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_2x1",
[IsStreaming, IsInOutZA],
checks>;
}

multiclass USMOP4<string mode, string za, string t, string i, list<ImmCheck> checks> {
Expand All @@ -361,6 +366,10 @@ multiclass USMOP4<string mode, string za, string t, string i, list<ImmCheck> che
"vid2.x", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_1x2",
[IsStreaming, IsInOutZA],
checks>;
def _2x1 : SInst<"svmop4" # mode # "[_2x1]" # za # "[_{d}_{3}]",
"vi2x", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_2x1",
[IsStreaming, IsInOutZA],
checks>;
}

let SMETargetGuard = "sme2,sme-mop4" in {
Expand Down
304 changes: 304 additions & 0 deletions clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_2x1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py

// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s


#include <arm_sme.h>

#ifdef SME_OVERLOADED_FORMS
#define SME_ACLE_FUNC(A1,A2_UNUSED,A3, A4_UNUSED) A1##A3
#else
#define SME_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif

// CHECK-LABEL: @test_svmop4a_2x1_za32_s8_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za32_s8_s8(svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_s8_s8)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za32_s8_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za32_s8_s8(svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_s8_s8)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za32_u8_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za32_u8_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_u8_u8)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za32_u8_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za32_u8_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_u8_u8)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za32_s8_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za32_s8_u8(svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_s8_u8)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za32_s8_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za32_s8_u8(svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_s8_u8)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za32_u8_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za32_u8_s8(svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_u8_s8)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za32_u8_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za32_u8_s8(svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_u8_s8)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za32_s16_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za32_s16_s16(svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_s16_s16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za32_s16_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za32_s16_s16(svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_s16_s16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za32_u16_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za32_u16_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_u16_u16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za32_u16_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za32_u16_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_u16_u16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za32_f16_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.2x1.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_f16_f16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za32_f16_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.2x1.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_f16_f16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za32_bf16_bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.2x1.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_bf16_bf16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za32_bf16_bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.2x1.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_bf16_bf16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za64_s16_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za64_s16_s16(svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za64,_s16_s16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za64_s16_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za64_s16_s16(svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za64,_s16_s16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za64_u16_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za64_u16_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za64,_u16_u16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za64_u16_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za64_u16_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za64,_u16_u16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za64_s16_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za64_s16_u16(svint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za64,_s16_u16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za64_s16_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za64_s16_u16(svint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za64,_s16_u16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za64_u16_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za64_u16_s16(svuint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za64,_u16_s16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za64_u16_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za64_u16_s16(svuint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za64,_u16_s16)(1, zn, zm);
}


// CHECK-LABEL: @test_svmop4a_2x1_za16_f16_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.2x1.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za16,_f16_f16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za16_f16_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.2x1.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za16,_f16_f16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za32_f32_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.2x1.nxv4f32(i32 1, <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_f32_f32)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za32_f32_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.2x1.nxv4f32(i32 1, <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_f32_f32)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za64_f64_f64(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.2x1.nxv2f64(i32 1, <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za64_f64_f64(svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za64,_f64_f64)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za64_f64_f64(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.2x1.nxv2f64(i32 1, <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za64_f64_f64(svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za64,_f64_f64)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4a_2x1_za16_bf16_bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.2x1.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4a_2x1_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4a,_2x1,_za16,_bf16_bf16)(1, zn, zm);
}

// CHECK-LABEL: @test_svmop4s_2x1_za16_bf16_bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.2x1.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
// CHECK-NEXT: ret void
//
void test_svmop4s_2x1_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmop4s,_2x1,_za16,_bf16_bf16)(1, zn, zm);
}
Loading
Loading