Skip to content

Commit 4b37033

Browse files
committed
- Implemented overloading for fmmla intrinsics, replaced fixed-type intrinsics
- Prototype cleanups - Updated ll tests to remove unnecessary IR - Removed unused arguments in clang test macros - Removed redundant check lines in ll tests
1 parent 9004ff2 commit 4b37033

File tree

12 files changed

+53
-137
lines changed

12 files changed

+53
-137
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,22 +1190,22 @@ def SVSUDOT_LANE_S : SInst<"svsudot_lane[_s32]", "ddqbi", "i", MergeNone, "aarc
11901190
}
11911191

11921192
let SVETargetGuard = "f32mm", SMETargetGuard = InvalidMode in {
1193-
def SVMLLA_F32 : SInst<"svmmla[_f32]", "dddd","f", MergeNone, "aarch64_sve_fmmla">;
1193+
def SVMLLA_F32 : SInst<"svmmla[_f32]", "dddd","f", MergeNone, "aarch64_sve_fmmla", [IsOverloadCvt]>;
11941194
}
11951195

11961196
let SVETargetGuard = "f64mm", SMETargetGuard = InvalidMode in {
1197-
def SVMLLA_F64 : SInst<"svmmla[_f64]", "dddd", "d", MergeNone, "aarch64_sve_fmmla">;
1197+
def SVMLLA_F64 : SInst<"svmmla[_f64]", "dddd", "d", MergeNone, "aarch64_sve_fmmla", [IsOverloadCvt]>;
11981198

11991199
let SVETargetGuard = "sve-f16f32mm", SMETargetGuard = InvalidMode in {
1200-
def SVMLLA_F32_F16 : SInst<"svmmla[_f32_f16]", "MMdd", "h", MergeNone, "aarch64_sve_fmmla_f16f32", [IsOverloadNone]>;
1200+
def SVMLLA_F32_F16 : SInst<"svmmla[_f32_f16]", "ddhh", "f", MergeNone, "aarch64_sve_fmmla", [IsOverloadCvt]>;
12011201
}
12021202

12031203
let SVETargetGuard = "sve2,f8f32mm", SMETargetGuard = InvalidMode in {
1204-
def SVMLLA_F32_MF8 : SInst<"svmmla[_f32_mf8]", "MM~~>", "m", MergeNone, "aarch64_sve_fmmla_mf8f32", [IsOverloadNone]>;
1204+
def SVMLLA_F32_MF8 : SInst<"svmmla[_f32_mf8]", "dd~~>", "f", MergeNone, "aarch64_sve_fmmla", [IsOverloadCvt]>;
12051205
}
12061206

12071207
let SVETargetGuard = "sve2,f8f16mm", SMETargetGuard = InvalidMode in {
1208-
def SVMLLA_F16_MF8 : SInst<"svmmla[_f16_mf8]", "OO~~>", "m", MergeNone, "aarch64_sve_fmmla_mf8f16", [IsOverloadNone]>;
1208+
def SVMLLA_F16_MF8 : SInst<"svmmla[_f16_mf8]", "dd~~>", "h", MergeNone, "aarch64_sve_fmmla", [IsOverloadCvt]>;
12091209
}
12101210

12111211
def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn1q">;

clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_fmmla-f32f16.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,24 @@
1010
#include <arm_sve.h>
1111

1212
#ifdef SVE_OVERLOADED_FORMS
13-
// A simple used,unused... macro, long enough to represent any SVE builtin.
14-
#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
13+
#define SVE_ACLE_FUNC(A1, A3) A1##A3
1514
#else
16-
#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
15+
#define SVE_ACLE_FUNC(A1, A2) A1##A2
1716
#endif
1817

18+
1919
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_f32f16(
2020
// CHECK-SAME: <vscale x 4 x float> [[ACC:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
2121
// CHECK-NEXT: [[ENTRY:.*:]]
22-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.f16f32(<vscale x 4 x float> [[ACC]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
22+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv8f16(<vscale x 4 x float> [[ACC]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
2323
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
2424
//
2525
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z11test_f32f16u13__SVFloat32_tu13__SVFloat16_tS0_(
2626
// CPP-CHECK-SAME: <vscale x 4 x float> [[ACC:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
2727
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
28-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.f16f32(<vscale x 4 x float> [[ACC]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
28+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv8f16(<vscale x 4 x float> [[ACC]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
2929
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
3030
//
3131
svfloat32_t test_f32f16(svfloat32_t acc, svfloat16_t a, svfloat16_t b) {
32-
return SVE_ACLE_FUNC(svmmla, _f32_f16, , )(acc, a, b);
32+
return SVE_ACLE_FUNC(svmmla, _f32_f16)(acc, a, b);
3333
}

clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_matmul_fp32.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717

1818
// CHECK-LABEL: @test_svmmla_f32(
1919
// CHECK-NEXT: entry:
20-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32(<vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> [[Y:%.*]], <vscale x 4 x float> [[Z:%.*]])
20+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv4f32(<vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> [[Y:%.*]], <vscale x 4 x float> [[Z:%.*]])
2121
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
2222
//
2323
// CPP-CHECK-LABEL: @_Z15test_svmmla_f32u13__SVFloat32_tS_S_(
2424
// CPP-CHECK-NEXT: entry:
25-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32(<vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> [[Y:%.*]], <vscale x 4 x float> [[Z:%.*]])
25+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv4f32(<vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> [[Y:%.*]], <vscale x 4 x float> [[Z:%.*]])
2626
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
2727
//
2828
svfloat32_t test_svmmla_f32(svfloat32_t x, svfloat32_t y, svfloat32_t z) {

clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_matmul_fp64.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717

1818
// CHECK-LABEL: @test_svmmla_f64(
1919
// CHECK-NEXT: entry:
20-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmmla.nxv2f64(<vscale x 2 x double> [[X:%.*]], <vscale x 2 x double> [[Y:%.*]], <vscale x 2 x double> [[Z:%.*]])
20+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmmla.nxv2f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], <vscale x 2 x double> [[Y:%.*]], <vscale x 2 x double> [[Z:%.*]])
2121
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
2222
//
2323
// CPP-CHECK-LABEL: @_Z15test_svmmla_f64u13__SVFloat64_tS_S_(
2424
// CPP-CHECK-NEXT: entry:
25-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmmla.nxv2f64(<vscale x 2 x double> [[X:%.*]], <vscale x 2 x double> [[Y:%.*]], <vscale x 2 x double> [[Z:%.*]])
25+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmmla.nxv2f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], <vscale x 2 x double> [[Y:%.*]], <vscale x 2 x double> [[Z:%.*]])
2626
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
2727
//
2828
svfloat64_t test_svmmla_f64(svfloat64_t x, svfloat64_t y, svfloat64_t z) {

clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_fmmla-f16mf8.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,25 @@
1111

1212
#ifdef SVE_OVERLOADED_FORMS
1313
// A simple used,unused... macro, long enough to represent any SVE builtin.
14-
#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
14+
#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3) A1##A3
1515
#else
16-
#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
16+
#define SVE_ACLE_FUNC(A1, A2, A3) A1##A2##A3
1717
#endif
1818

1919
// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_f16mf8(
2020
// CHECK-SAME: <vscale x 8 x half> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
2121
// CHECK-NEXT: [[ENTRY:.*:]]
2222
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
23-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmmla.mf8f16(<vscale x 8 x half> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
23+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmmla.nxv8f16.nxv16i8(<vscale x 8 x half> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
2424
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
2525
//
2626
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z11test_f16mf8u13__SVFloat16_tu13__SVMfloat8_tS0_m(
2727
// CPP-CHECK-SAME: <vscale x 8 x half> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
2828
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
2929
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
30-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmmla.mf8f16(<vscale x 8 x half> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
30+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmmla.nxv8f16.nxv16i8(<vscale x 8 x half> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
3131
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
3232
//
3333
svfloat16_t test_f16mf8(svfloat16_t acc, svmfloat8_t a, svmfloat8_t b, fpm_t fpmr) {
34-
return SVE_ACLE_FUNC(svmmla, _f16_mf8, _fpm, )(acc, a, b, fpmr);
34+
return SVE_ACLE_FUNC(svmmla, _f16_mf8, _fpm)(acc, a, b, fpmr);
3535
}

clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_fmmla-f32mf8.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,25 @@
1212

1313
#ifdef SVE_OVERLOADED_FORMS
1414
// A simple used,unused... macro, long enough to represent any SVE builtin.
15-
#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
15+
#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3) A1##A3
1616
#else
17-
#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
17+
#define SVE_ACLE_FUNC(A1, A2, A3) A1##A2##A3
1818
#endif
1919

2020
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_f32mf8(
2121
// CHECK-SAME: <vscale x 4 x float> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
2222
// CHECK-NEXT: [[ENTRY:.*:]]
2323
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
24-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.mf8f32(<vscale x 4 x float> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
24+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv16i8(<vscale x 4 x float> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
2525
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
2626
//
2727
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z11test_f32mf8u13__SVFloat32_tu13__SVMfloat8_tS0_m(
2828
// CPP-CHECK-SAME: <vscale x 4 x float> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
2929
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
3030
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
31-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.mf8f32(<vscale x 4 x float> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
31+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv16i8(<vscale x 4 x float> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
3232
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
3333
//
3434
svfloat32_t test_f32mf8(svfloat32_t acc, svmfloat8_t a, svmfloat8_t b, fpm_t fpmr) {
35-
return SVE_ACLE_FUNC(svmmla, _f32_mf8, _fpm, )(acc, a, b, fpmr);
35+
return SVE_ACLE_FUNC(svmmla, _f32_mf8, _fpm)(acc, a, b, fpmr);
3636
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2805,22 +2805,12 @@ def int_aarch64_sve_sudot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
28052805
//
28062806
// SVE ACLE: 7.4/5. FP64/FP32 matrix multiply extensions
28072807
//
2808-
def int_aarch64_sve_fmmla : AdvSIMD_3VectorArg_Intrinsic;
28092808

2810-
def int_aarch64_sve_fmmla_f16f32
2811-
: DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
2812-
[ llvm_nxv4f32_ty, llvm_nxv8f16_ty, llvm_nxv8f16_ty ],
2813-
[IntrNoMem]>;
2814-
2815-
def int_aarch64_sve_fmmla_mf8f32
2816-
: DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
2817-
[ llvm_nxv4f32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty ],
2818-
[IntrNoMem]>;
2809+
def int_aarch64_sve_fmmla
2810+
: DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
2811+
[ LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1> ],
2812+
[ IntrNoMem ]>;
28192813

2820-
def int_aarch64_sve_fmmla_mf8f16
2821-
: DefaultAttrsIntrinsic<[llvm_nxv8f16_ty],
2822-
[ llvm_nxv8f16_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty ],
2823-
[IntrNoMem]>;
28242814
//
28252815
// SVE ACLE: 7.2. BFloat16 extensions
28262816
//

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3684,7 +3684,7 @@ let Predicates = [HasSVE, HasMatMulFP32] in {
36843684
} // End HasSVE, HasMatMulFP32
36853685

36863686
let Predicates = [HasSVE_F16F32MM] in {
3687-
defm FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b001, "fmmla", ZPR32, ZPR16, int_aarch64_sve_fmmla_f16f32, nxv4f32, nxv8f16>;
3687+
defm FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b001, "fmmla", ZPR32, ZPR16, int_aarch64_sve_fmmla, nxv4f32, nxv8f16>;
36883688
} // End HasSVE_F16F32MM
36893689

36903690
let Predicates = [HasSVE, HasMatMulFP64] in {
@@ -4744,15 +4744,11 @@ defm FMLALLTT_ZZZ : sve2_fp8_mla<0b011, ZPR32, "fmlalltt", nxv4f32, int_aarch64_
47444744
} // End HasSSVE_FP8FMA
47454745

47464746
let Predicates = [HasSVE2, HasF8F32MM] in {
4747-
def FMMLA_ZZZ_BtoS : sve2_fp8_mmla<0b0, ZPR32, "fmmla">;
4748-
def : Pat<(nxv4f32 (int_aarch64_sve_fmmla_mf8f32 nxv4f32:$acc, nxv16i8:$zn, nxv16i8:$zm)),
4749-
(FMMLA_ZZZ_BtoS $acc, $zn, $zm)>;
4747+
defm FMMLA_ZZZ_BtoS : sve2_fp8_fmmla<0b0, ZPR32, "fmmla", nxv4f32>;
47504748
}
47514749

47524750
let Predicates = [HasSVE2, HasF8F16MM] in {
4753-
def FMMLA_ZZZ_BtoH : sve2_fp8_mmla<0b1, ZPR16, "fmmla">;
4754-
def : Pat<(nxv8f16 (int_aarch64_sve_fmmla_mf8f16 nxv8f16:$acc, nxv16i8:$zn, nxv16i8:$zm)),
4755-
(FMMLA_ZZZ_BtoH $acc, $zn, $zm)>;
4751+
defm FMMLA_ZZZ_BtoH : sve2_fp8_fmmla<0b1, ZPR16, "fmmla", nxv8f16>;
47564752
}
47574753

47584754
let Predicates = [HasSSVE_FP8DOT2] in {

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11143,6 +11143,12 @@ class sve2_fp8_mmla<bit opc, ZPRRegOp dst_ty, string mnemonic>
1114311143
let Uses = [FPMR, FPCR];
1114411144
}
1114511145

11146+
multiclass sve2_fp8_fmmla<bits<1> opc, ZPRRegOp zprty, string mnemonic, ValueType ResVT> {
11147+
def NAME : sve2_fp8_mmla<opc, zprty, mnemonic>;
11148+
def : Pat<(ResVT (int_aarch64_sve_fmmla ResVT:$acc, nxv16i8:$zn, nxv16i8:$zm)),
11149+
(!cast<Instruction>(NAME) $acc, $zn, $zm)>;
11150+
}
11151+
1114611152
class sve_fp8_dot_indexed<bits<4> opc, ZPRRegOp dst_ty, Operand iop_ty, string mnemonic>
1114711153
: I<(outs dst_ty:$Zda), (ins dst_ty:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, iop_ty:$iop),
1114811154
mnemonic, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> {
Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve-f16f32mm < %s | FileCheck %s --check-prefixes=CHECK
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve-f16f32mm < %s | FileCheck %s
33

4-
define <vscale x 4 x float> @_Z1tu13__SVFloat32_tu13__SVFloat16_tS0_(<vscale x 4 x float> %acc, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
5-
; CHECK-LABEL: _Z1tu13__SVFloat32_tu13__SVFloat16_tS0_:
4+
define <vscale x 4 x float> @fmmla_f32f16(<vscale x 4 x float> %acc, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
5+
; CHECK-LABEL: fmmla_f32f16:
66
; CHECK: // %bb.0: // %entry
7-
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
8-
; CHECK-NEXT: addvl sp, sp, #-3
9-
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
10-
; CHECK-NEXT: .cfi_offset w29, -16
11-
; CHECK-NEXT: str z0, [sp, #2, mul vl]
127
; CHECK-NEXT: fmmla z0.s, z1.h, z2.h
13-
; CHECK-NEXT: str z1, [sp, #1, mul vl]
14-
; CHECK-NEXT: str z2, [sp]
15-
; CHECK-NEXT: addvl sp, sp, #3
16-
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
178
; CHECK-NEXT: ret
189
entry:
19-
%acc.addr = alloca <vscale x 4 x float>, align 16
20-
%a.addr = alloca <vscale x 8 x half>, align 16
21-
%b.addr = alloca <vscale x 8 x half>, align 16
22-
store <vscale x 4 x float> %acc, ptr %acc.addr, align 16
23-
store <vscale x 8 x half> %a, ptr %a.addr, align 16
24-
store <vscale x 8 x half> %b, ptr %b.addr, align 16
25-
%0 = load <vscale x 4 x float>, ptr %acc.addr, align 16
26-
%1 = load <vscale x 8 x half>, ptr %a.addr, align 16
27-
%2 = load <vscale x 8 x half>, ptr %b.addr, align 16
28-
%3 = call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.f16f32(<vscale x 4 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2)
29-
ret <vscale x 4 x float> %3
10+
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv8f16(<vscale x 4 x float> %acc, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
11+
ret <vscale x 4 x float> %out
3012
}
31-
32-
declare <vscale x 4 x float> @llvm.aarch64.sve.fmmla.f16f32(<vscale x 4 x float>, <vscale x 8 x half>, <vscale x 8 x half>)

0 commit comments

Comments
 (0)