You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Proposed in [this ACLE
proposal](ARM-software/acle#409), this PR
implements widening FMMLA intrinsics.
- F16 to F32
- MF8 to F32
- MF8 to F16
Additional changes:
- IsOverloadCvt flag renamed to IsOverloadFirstandLast for clarity, as
the name implies conversion. Implementation remains unchanged.
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_f32f16(
20
+
// CHECK-SAME: <vscale x 4 x float> [[ACC:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
21
+
// CHECK-NEXT: [[ENTRY:.*:]]
22
+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv8f16(<vscale x 4 x float> [[ACC]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
23
+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
24
+
//
25
+
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z11test_f32f16u13__SVFloat32_tu13__SVFloat16_tS0_(
26
+
// CPP-CHECK-SAME: <vscale x 4 x float> [[ACC:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
27
+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
28
+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv8f16(<vscale x 4 x float> [[ACC]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
29
+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
Copy file name to clipboardExpand all lines: clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_matmul_fp32.c
+2-2Lines changed: 2 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -17,12 +17,12 @@
17
17
18
18
// CHECK-LABEL: @test_svmmla_f32(
19
19
// CHECK-NEXT: entry:
20
-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32(<vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> [[Y:%.*]], <vscale x 4 x float> [[Z:%.*]])
20
+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv4f32(<vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> [[Y:%.*]], <vscale x 4 x float> [[Z:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32(<vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> [[Y:%.*]], <vscale x 4 x float> [[Z:%.*]])
25
+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv4f32(<vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> [[Y:%.*]], <vscale x 4 x float> [[Z:%.*]])
26
26
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
Copy file name to clipboardExpand all lines: clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_matmul_fp64.c
+2-2Lines changed: 2 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -17,12 +17,12 @@
17
17
18
18
// CHECK-LABEL: @test_svmmla_f64(
19
19
// CHECK-NEXT: entry:
20
-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmmla.nxv2f64(<vscale x 2 x double> [[X:%.*]], <vscale x 2 x double> [[Y:%.*]], <vscale x 2 x double> [[Z:%.*]])
20
+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmmla.nxv2f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], <vscale x 2 x double> [[Y:%.*]], <vscale x 2 x double> [[Z:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmmla.nxv2f64(<vscale x 2 x double> [[X:%.*]], <vscale x 2 x double> [[Y:%.*]], <vscale x 2 x double> [[Z:%.*]])
25
+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmmla.nxv2f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], <vscale x 2 x double> [[Y:%.*]], <vscale x 2 x double> [[Z:%.*]])
26
26
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
// A simple used,unused... macro, long enough to represent any SVE builtin.
14
+
#defineSVE_ACLE_FUNC(A1, A2_UNUSED, A3) A1##A3
15
+
#else
16
+
#defineSVE_ACLE_FUNC(A1, A2, A3) A1##A2##A3
17
+
#endif
18
+
19
+
// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_f16mf8(
20
+
// CHECK-SAME: <vscale x 8 x half> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.fmmla.nxv8f16(<vscale x 8 x half> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
24
+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
25
+
//
26
+
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z11test_f16mf8u13__SVFloat16_tu13__SVMfloat8_tS0_m(
27
+
// CPP-CHECK-SAME: <vscale x 8 x half> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.fmmla.nxv8f16(<vscale x 8 x half> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
31
+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
// A simple used,unused... macro, long enough to represent any SVE builtin.
15
+
#defineSVE_ACLE_FUNC(A1, A2_UNUSED, A3) A1##A3
16
+
#else
17
+
#defineSVE_ACLE_FUNC(A1, A2, A3) A1##A2##A3
18
+
#endif
19
+
20
+
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_f32mf8(
21
+
// CHECK-SAME: <vscale x 4 x float> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmmla.nxv4f32(<vscale x 4 x float> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
25
+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
26
+
//
27
+
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z11test_f32mf8u13__SVFloat32_tu13__SVMfloat8_tS0_m(
28
+
// CPP-CHECK-SAME: <vscale x 4 x float> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmmla.nxv4f32(<vscale x 4 x float> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
32
+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
0 commit comments