Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -991,9 +991,10 @@ let SMETargetGuard = "sme-f8f32" in {
[IsStreaming, IsInOutZA, IsOverloadNone], [ImmCheck<3, ImmCheck0_15>]>;
def SVMLA_FP8_LANE_ZA16_VG4x4 : Inst<"svmla_lane_za32[_mf8]_vg4x4", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fmlall_lane_za32_vg4x4",
[IsStreaming, IsInOutZA, IsOverloadNone], [ImmCheck<3, ImmCheck0_15>]>;
// FMLALL
def SVMLA_FP8_ZA32_VG4x1 : Inst<"svmla_za32[_mf8]_vg4x1", "vmdd>", "m", MergeNone, "aarch64_sme_fp8_fmlall_za32_vg4x1",
[IsStreaming, IsInOutZA, IsOverloadNone], []>;
// FMLALL (single)
def SVMLA_FP8_SINGLE_ZA32_VG4x1 : Inst<"svmla[_single]_za32[_mf8]_vg4x1", "vmdd>", "m", MergeNone, "aarch64_sme_fp8_fmlall_single_za32_vg4x1",
[IsStreaming, IsInOutZA, IsOverloadNone], []>;
def SVMLA_FP8_SINGLE_ZA32_VG4x2 : Inst<"svmla[_single]_za32[_mf8]_vg4x2", "vm2d>", "m", MergeNone, "aarch64_sme_fp8_fmlall_single_za32_vg4x2",
[IsStreaming, IsInOutZA, IsOverloadNone], []>;
def SVMLA_FP8_SINGLE_ZA32_VG4x4 : Inst<"svmla[_single]_za32[_mf8]_vg4x4", "vm4d>", "m", MergeNone, "aarch64_sme_fp8_fmlall_single_za32_vg4x4",
Expand All @@ -1015,9 +1016,10 @@ let SMETargetGuard = "sme-f8f16" in {
[IsStreaming, IsInOutZA, IsOverloadNone], [ImmCheck<3, ImmCheck0_15>]>;
def SVMLA_FP8_LANE_ZA16_VG2x4 : Inst<"svmla_lane_za16[_mf8]_vg2x4", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fmlal_lane_za16_vg2x4",
[IsStreaming, IsInOutZA, IsOverloadNone], [ImmCheck<3, ImmCheck0_15>]>;
// FMLAL
def SVMLA_FP8_ZA16_VG2x1 : Inst<"svmla_za16[_mf8]_vg2x1", "vmdd>", "m", MergeNone, "aarch64_sme_fp8_fmlal_za16_vg2x1",
[IsStreaming, IsInOutZA, IsOverloadNone], []>;
// FMLAL (single)
def SVMLA_FP8_SINGLE_ZA16_VG2x1 : Inst<"svmla[_single]_za16[_mf8]_vg2x1", "vmdd>", "m", MergeNone, "aarch64_sme_fp8_fmlal_single_za16_vg2x1",
[IsStreaming, IsInOutZA, IsOverloadNone], []>;
def SVMLA_FP8_SINGLE_ZA16_VG2x2 : Inst<"svmla[_single]_za16[_mf8]_vg2x2", "vm2d>", "m", MergeNone, "aarch64_sme_fp8_fmlal_single_za16_vg2x2",
[IsStreaming, IsInOutZA, IsOverloadNone], []>;
def SVMLA_FP8_SINGLE_ZA16_VG2x4 : Inst<"svmla[_single]_za16[_mf8]_vg2x4", "vm4d>", "m", MergeNone, "aarch64_sme_fp8_fmlal_single_za16_vg2x4",
Expand Down
12 changes: 6 additions & 6 deletions clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,18 +133,18 @@ void test_svmla_lane_za32_vg4x4(uint32_t slice, svmfloat8x4_t zn, svmfloat8_t zm
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.single.za16.vg2x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.za16.vg2x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: define dso_local void @_Z28test_svmla_single_za16_vg2x1ju13__SVMfloat8_tS_m(
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.single.za16.vg2x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.za16.vg2x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CPP-CHECK-NEXT: ret void
//
void test_svmla_single_za16_vg2x1(uint32_t slice, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmla,_single,_za16,_mf8,_vg2x1_fpm)(slice, zn, zm, fpm);
SME_ACLE_FUNC(svmla_za16,_mf8,_vg2x1_fpm,,)(slice, zn, zm, fpm);
}

// CHECK-LABEL: define dso_local void @test_svmla_single_za16_vg2x2(
Expand Down Expand Up @@ -189,18 +189,18 @@ void test_svmla_single_za16_vg2x4(uint32_t slice, svmfloat8x4_t zn, svmfloat8_t
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.single.za32.vg4x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.za32.vg4x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: define dso_local void @_Z28test_svmla_single_za32_vg4x1ju13__SVMfloat8_tS_m(
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.single.za32.vg4x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.za32.vg4x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CPP-CHECK-NEXT: ret void
//
void test_svmla_single_za32_vg4x1(uint32_t slice, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmla,_single,_za32,_mf8,_vg4x1_fpm)(slice, zn, zm, fpm);
SME_ACLE_FUNC(svmla_za32,_mf8,_vg4x1_fpm,,)(slice, zn, zm, fpm);
}

// CHECK-LABEL: define dso_local void @test_svmla_single_za32_vg4x2(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void test(void) __arm_inout("za"){
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_lane_za16_vg2x4_fpm(uint32_t_val, svmfloat8x4_t_val, svmfloat8_t_val, 2, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_single_za16_mf8_vg2x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
svmla_za16_mf8_vg2x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_single_za16_mf8_vg2x2_fpm(uint32_t_val, svmfloat8x2_t_val, svmfloat8_t_val, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
Expand Down Expand Up @@ -107,7 +107,7 @@ void test_streaming(void) __arm_streaming __arm_inout("za"){
svmla_lane_za16_vg2x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, 2, fpm_t_val);
svmla_lane_za16_vg2x2_fpm(uint32_t_val, svmfloat8x2_t_val, svmfloat8_t_val, 2, fpm_t_val);
svmla_lane_za16_vg2x4_fpm(uint32_t_val, svmfloat8x4_t_val, svmfloat8_t_val, 2, fpm_t_val);
svmla_single_za16_mf8_vg2x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
svmla_za16_mf8_vg2x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
svmla_single_za16_mf8_vg2x2_fpm(uint32_t_val, svmfloat8x2_t_val, svmfloat8_t_val, fpm_t_val);
svmla_single_za16_mf8_vg2x4_fpm(uint32_t_val, svmfloat8x4_t_val, svmfloat8_t_val, fpm_t_val);
svmla_za16_mf8_vg2x2_fpm(uint32_t_val, svmfloat8x2_t_val, svmfloat8x2_t_val, fpm_t_val);
Expand Down Expand Up @@ -168,7 +168,7 @@ void test_streaming_compatible(void) __arm_streaming_compatible __arm_inout("za"
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_lane_za16_vg2x4_fpm(uint32_t_val, svmfloat8x4_t_val, svmfloat8_t_val, 2, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_single_za16_mf8_vg2x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
svmla_za16_mf8_vg2x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_single_za16_mf8_vg2x2_fpm(uint32_t_val, svmfloat8x2_t_val, svmfloat8_t_val, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void test(void) __arm_inout("za"){
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_lane_za32_vg4x4_fpm(uint32_t_val, svmfloat8x4_t_val, svmfloat8_t_val, 2, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_single_za32_mf8_vg4x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
svmla_za32_mf8_vg4x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_single_za32_mf8_vg4x2_fpm(uint32_t_val, svmfloat8x2_t_val, svmfloat8_t_val, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
Expand Down Expand Up @@ -111,7 +111,7 @@ void test_streaming(void) __arm_streaming __arm_inout("za"){
svmla_lane_za32_vg4x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, 2, fpm_t_val);
svmla_lane_za32_vg4x2_fpm(uint32_t_val, svmfloat8x2_t_val, svmfloat8_t_val, 2, fpm_t_val);
svmla_lane_za32_vg4x4_fpm(uint32_t_val, svmfloat8x4_t_val, svmfloat8_t_val, 2, fpm_t_val);
svmla_single_za32_mf8_vg4x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
svmla_za32_mf8_vg4x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
svmla_single_za32_mf8_vg4x2_fpm(uint32_t_val, svmfloat8x2_t_val, svmfloat8_t_val, fpm_t_val);
svmla_single_za32_mf8_vg4x4_fpm(uint32_t_val, svmfloat8x4_t_val, svmfloat8_t_val, fpm_t_val);
svmla_za32_mf8_vg4x2_fpm(uint32_t_val, svmfloat8x2_t_val, svmfloat8x2_t_val, fpm_t_val);
Expand Down Expand Up @@ -174,7 +174,7 @@ void test_streaming_compatible(void) __arm_streaming_compatible __arm_inout("za"
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_lane_za32_vg4x4_fpm(uint32_t_val, svmfloat8x4_t_val, svmfloat8_t_val, 2, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_single_za32_mf8_vg4x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
svmla_za32_mf8_vg4x1_fpm(uint32_t_val, svmfloat8_t_val, svmfloat8_t_val, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
svmla_single_za32_mf8_vg4x2_fpm(uint32_t_val, svmfloat8x2_t_val, svmfloat8_t_val, fpm_t_val);
// streaming-guard-error@+1 {{builtin can only be called from a streaming function}}
Expand Down
10 changes: 5 additions & 5 deletions clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_mla.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@ void test_svmla(uint32_t slice, svmfloat8_t zn, svmfloat8x2_t znx2, svmfloat8x4_
// expected-error@+1 {{'svmla_lane_za32_mf8_vg4x4_fpm' needs target feature sme,sme-f8f32}}
svmla_lane_za32_mf8_vg4x4_fpm(slice, znx4, zn, 0, fpmr);

// expected-error@+1 {{'svmla_single_za16_mf8_vg2x1_fpm' needs target feature sme,sme-f8f16}}
svmla_single_za16_mf8_vg2x1_fpm(slice, zn, zn, fpmr);
// expected-error@+1 {{'svmla_za16_mf8_vg2x1_fpm' needs target feature sme,sme-f8f16}}
svmla_za16_mf8_vg2x1_fpm(slice, zn, zn, fpmr);

// expected-error@+1 {{'svmla_single_za16_mf8_vg2x2_fpm' needs target feature sme,sme-f8f16}}
svmla_single_za16_mf8_vg2x2_fpm(slice, znx2, zn, fpmr);

// expected-error@+1 {{'svmla_single_za16_mf8_vg2x4_fpm' needs target feature sme,sme-f8f16}}
svmla_single_za16_mf8_vg2x4_fpm(slice, znx4, zn, fpmr);

// expected-error@+1 {{'svmla_single_za32_mf8_vg4x1_fpm' needs target feature sme,sme-f8f32}}
svmla_single_za32_mf8_vg4x1_fpm(slice, zn, zn, fpmr);
// expected-error@+1 {{'svmla_za32_mf8_vg4x1_fpm' needs target feature sme,sme-f8f32}}
svmla_za32_mf8_vg4x1_fpm(slice, zn, zn, fpmr);

// expected-error@+1 {{'svmla_single_za32_mf8_vg4x2_fpm' needs target feature sme,sme-f8f32}}
svmla_single_za32_mf8_vg4x2_fpm(slice, znx2, zn, fpmr);
Expand All @@ -53,4 +53,4 @@ void test_svmla(uint32_t slice, svmfloat8_t zn, svmfloat8x2_t znx2, svmfloat8x4_

// expected-error@+1 {{'svmla_za32_mf8_vg4x4_fpm' needs target feature sme,sme-f8f32}}
svmla_za32_mf8_vg4x4_fpm(slice, znx4, znx4, fpmr);
}
}
16 changes: 9 additions & 7 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -4085,7 +4085,7 @@ let TargetPrefix = "aarch64" in {
llvm_nxv16i8_ty,
llvm_i32_ty],
[IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>;
class SME_FP8_ZA_SINGLE_VGx1_Intrinsic
class SME_FP8_ZA_VGx1_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
llvm_nxv16i8_ty,
llvm_nxv16i8_ty],
Expand Down Expand Up @@ -4148,23 +4148,25 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_fp8_fmlal_lane_za16_vg2x1 : SME_FP8_ZA_LANE_VGx1_Intrinsic;
def int_aarch64_sme_fp8_fmlal_lane_za16_vg2x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
def int_aarch64_sme_fp8_fmlal_lane_za16_vg2x4 : SME_FP8_ZA_LANE_VGx4_Intrinsic;
// Single
def int_aarch64_sme_fp8_fmlal_single_za16_vg2x1 : SME_FP8_ZA_SINGLE_VGx1_Intrinsic;
// Single-Single
def int_aarch64_sme_fp8_fmlal_za16_vg2x1 : SME_FP8_ZA_VGx1_Intrinsic;
// Multi-Single
def int_aarch64_sme_fp8_fmlal_single_za16_vg2x2 : SME_FP8_ZA_SINGLE_VGx2_Intrinsic;
def int_aarch64_sme_fp8_fmlal_single_za16_vg2x4 : SME_FP8_ZA_SINGLE_VGx4_Intrinsic;
// Multi
// Multi-Multi
def int_aarch64_sme_fp8_fmlal_multi_za16_vg2x2 : SME_FP8_ZA_MULTI_VGx2_Intrinsic;
def int_aarch64_sme_fp8_fmlal_multi_za16_vg2x4 : SME_FP8_ZA_MULTI_VGx4_Intrinsic;

// Quad-vector groups (F8F32)
def int_aarch64_sme_fp8_fmlall_lane_za32_vg4x1 : SME_FP8_ZA_LANE_VGx1_Intrinsic;
def int_aarch64_sme_fp8_fmlall_lane_za32_vg4x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
def int_aarch64_sme_fp8_fmlall_lane_za32_vg4x4 : SME_FP8_ZA_LANE_VGx4_Intrinsic;
// Single
def int_aarch64_sme_fp8_fmlall_single_za32_vg4x1 : SME_FP8_ZA_SINGLE_VGx1_Intrinsic;
// Single-Single
def int_aarch64_sme_fp8_fmlall_za32_vg4x1 : SME_FP8_ZA_VGx1_Intrinsic;
// Multi-Single
def int_aarch64_sme_fp8_fmlall_single_za32_vg4x2 : SME_FP8_ZA_SINGLE_VGx2_Intrinsic;
def int_aarch64_sme_fp8_fmlall_single_za32_vg4x4 : SME_FP8_ZA_SINGLE_VGx4_Intrinsic;
// Multi
// Multi-Multi
def int_aarch64_sme_fp8_fmlall_multi_za32_vg4x2 : SME_FP8_ZA_MULTI_VGx2_Intrinsic;
def int_aarch64_sme_fp8_fmlall_multi_za32_vg4x4 : SME_FP8_ZA_MULTI_VGx4_Intrinsic;

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,7 @@ defm FMLAL_MZZI_BtoH : sme2_fp8_fmlal_index_za16<"fmlal", int_aarch64_
defm FMLAL_VG2_M2ZZI_BtoH : sme2_fp8_fmlal_index_za16_vgx2<"fmlal", int_aarch64_sme_fp8_fmlal_lane_za16_vg2x2>;
defm FMLAL_VG4_M4ZZI_BtoH : sme2_fp8_fmlal_index_za16_vgx4<"fmlal", int_aarch64_sme_fp8_fmlal_lane_za16_vg2x4>;

defm FMLAL_VG2_MZZ_BtoH : sme2_fp8_fmlal_single_za16<"fmlal", int_aarch64_sme_fp8_fmlal_single_za16_vg2x1>;
defm FMLAL_VG2_MZZ_BtoH : sme2_fp8_fmlal_single_za16<"fmlal", int_aarch64_sme_fp8_fmlal_za16_vg2x1>;
defm FMLAL_VG2_M2ZZ_BtoH : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b001, MatrixOp16, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlal_single_za16_vg2x2, [FPMR, FPCR]>;
defm FMLAL_VG4_M4ZZ_BtoH : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b001, MatrixOp16, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlal_single_za16_vg2x4, [FPMR, FPCR]>;

Expand All @@ -1112,7 +1112,7 @@ defm FMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"fmlall", 0b01, 0b0
defm FMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"fmlall", 0b10, 0b100, int_aarch64_sme_fp8_fmlall_lane_za32_vg4x2, [FPMR, FPCR]>;
defm FMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"fmlall", 0b00, 0b1000, int_aarch64_sme_fp8_fmlall_lane_za32_vg4x4, [FPMR, FPCR]>;

defm FMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"fmlall", 0b01000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlall_single_za32_vg4x1, [FPMR, FPCR]>;
defm FMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"fmlall", 0b01000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlall_za32_vg4x1, [FPMR, FPCR]>;
defm FMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"fmlall", 0b000001, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlall_single_za32_vg4x2, [FPMR, FPCR]>;
defm FMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"fmlall", 0b010001, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlall_single_za32_vg4x4, [FPMR, FPCR]>;

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-mla.ll
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ define void @test_fmlal_single_vg2x1(i32 %slice, <vscale x 16 x i8> %zn, <vscale
; CHECK: fmlal za.h[w8, 0:1], z0.b, z1.b
; CHECK: fmlal za.h[w8, 14:15], z0.b, z1.b
; CHECK: ret
call void @llvm.aarch64.sme.fp8.fmlal.single.za16.vg2x1(i32 %slice, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
call void @llvm.aarch64.sme.fp8.fmlal.za16.vg2x1(i32 %slice, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
%add = add i32 %slice, 14
call void @llvm.aarch64.sme.fp8.fmlal.single.za16.vg2x1(i32 %add, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
call void @llvm.aarch64.sme.fp8.fmlal.za16.vg2x1(i32 %add, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret void
}

Expand Down Expand Up @@ -173,9 +173,9 @@ define void @test_fmlall_single_vg4x1(i32 %slice, <vscale x 16 x i8> %zn, <vscal
; CHECK: fmlall za.s[w8, 0:3], z0.b, z1.b
; CHECK: fmlall za.s[w8, 12:15], z0.b, z1.b
; CHECK: ret
call void @llvm.aarch64.sme.fp8.fmlall.single.za32.vg4x1(i32 %slice, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
call void @llvm.aarch64.sme.fp8.fmlall.za32.vg4x1(i32 %slice, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
%add = add i32 %slice, 12
call void @llvm.aarch64.sme.fp8.fmlall.single.za32.vg4x1(i32 %add, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
call void @llvm.aarch64.sme.fp8.fmlall.za32.vg4x1(i32 %add, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret void
}

Expand Down
Loading