Skip to content

Commit d25ccdb

Browse files
CarolineConcattonavaneethshan
authored andcommitted
[Clang][LLVM][AArch64] Add support for FCVTXNT, FCVTLT, {B}FCVTNT int… (#170356)
…rinsics This patch adds support in Clang for these assembly instructions FCVTXNT, FCVTLT, {B}FCVTNT By implementing these prototypes: // Variant is available for _f64_f32 svfloat32_t svcvtlt_f32[_f16]_z (svbool_t pg, svfloat16_t op); // Variants are available for: // _f32_f64, _bf16_f32 svfloat16_t svcvtnt_f16[_f32]_z (svfloat16_t even, svbool_t pg, svfloat32_t op); svfloat32_t svcvtxnt_f32[_f64]_z (svfloat32_t even, svbool_t pg, svfloat64_t op); according to the ACLE[1] [1] ARM-software/acle#412 --------- Co-authored-by: Copilot <[email protected]> (cherry picked from commit 542d2a5)
1 parent bc1609a commit d25ccdb

File tree

7 files changed

+273
-10
lines changed

7 files changed

+273
-10
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,18 @@ def SVCVTXNT_F32_F64 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aar
978978
// SVCVTXNT_X_F32_F64 : Implemented as macro by SveEmitter.cpp
979979
}
980980

981+
let SVETargetGuard = "sve2p2|sme2p2", SMETargetGuard = "sve2p2|sme2p2" in {
982+
983+
def SVCVTNT_Z_F16_F32 : SInst<"svcvtnt_f16[_f32]_z", "hhPd", "f", MergeNone, "aarch64_sve_fcvtnt_z_f16f32", [IsOverloadNone, VerifyRuntimeMode]>;
984+
def SVCVTNT_Z_F32_F64 : SInst<"svcvtnt_f32[_f64]_z", "hhPd", "d", MergeNone, "aarch64_sve_fcvtnt_z_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
985+
def SVCVTNT_Z_BF16_F32 : SInst<"svcvtnt_bf16[_f32]_z", "$$Pd", "f", MergeNone, "aarch64_sve_fcvtnt_z_bf16f32", [IsOverloadNone, VerifyRuntimeMode]>;
986+
987+
def SVCVTXNT_Z_F32_F64 : SInst<"svcvtxnt_f32[_f64]_z", "MMPd", "d", MergeNone, "aarch64_sve_fcvtxnt_z_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
988+
989+
def SVCVTLT_Z_F32_F16 : SInst<"svcvtlt_f32[_f16]", "dPh", "f", MergeZeroExp, "aarch64_sve_fcvtlt_f32f16", [IsOverloadNone, VerifyRuntimeMode]>;
990+
def SVCVTLT_Z_F64_F32 : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, "aarch64_sve_fcvtlt_f64f32", [IsOverloadNone, VerifyRuntimeMode]>;
991+
992+
}
981993
////////////////////////////////////////////////////////////////////////////////
982994
// Permutations and selection
983995

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
4+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
5+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
6+
7+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
8+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
9+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
10+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
11+
12+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p2\
13+
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
14+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p2\
15+
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
16+
//
17+
// REQUIRES: aarch64-registered-target
18+
19+
#include <arm_sve.h>
20+
21+
#ifdef SVE_OVERLOADED_FORMS
22+
// A simple used,unused... macro, long enough to represent any SVE builtin.
23+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
24+
#else
25+
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
26+
#endif
27+
28+
#if defined __ARM_FEATURE_SME
29+
#define MODE_ATTR __arm_streaming
30+
#else
31+
#define MODE_ATTR
32+
#endif
33+
34+
35+
// CHECK-LABEL: @test_svcvtnt_f16_f32_z(
36+
// CHECK-NEXT: entry:
37+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
38+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.z.f16f32(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
39+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
40+
//
41+
// CPP-CHECK-LABEL: @_Z22test_svcvtnt_f16_f32_zu13__SVFloat16_tu10__SVBool_tu13__SVFloat32_t(
42+
// CPP-CHECK-NEXT: entry:
43+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
44+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.z.f16f32(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
45+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
46+
//
47+
svfloat16_t test_svcvtnt_f16_f32_z(svfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
48+
{
49+
return SVE_ACLE_FUNC(svcvtnt_f16,_f32,_z,)(inactive, pg, op);
50+
}
51+
52+
// CHECK-LABEL: @test_svcvtnt_bf16_f32_z(
53+
// CHECK-NEXT: entry:
54+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
55+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.z.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
56+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
57+
//
58+
// CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_zu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
59+
// CPP-CHECK-NEXT: entry:
60+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
61+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.z.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
62+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
63+
//
64+
svbfloat16_t test_svcvtnt_bf16_f32_z(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
65+
{
66+
return SVE_ACLE_FUNC(svcvtnt_bf16,_f32,_z,)(inactive, pg, op);
67+
}
68+
69+
// CHECK-LABEL: @test_svcvtnt_f32_f64_z(
70+
// CHECK-NEXT: entry:
71+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
72+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
73+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
74+
//
75+
// CPP-CHECK-LABEL: @_Z22test_svcvtnt_f32_f64_zu13__SVFloat32_tu10__SVBool_tu13__SVFloat64_t(
76+
// CPP-CHECK-NEXT: entry:
77+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
78+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
79+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
80+
//
81+
svfloat32_t test_svcvtnt_f32_f64_z(svfloat32_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
82+
{
83+
return SVE_ACLE_FUNC(svcvtnt_f32,_f64,_z,)(inactive, pg, op);
84+
}
85+
86+
87+
88+
// CHECK-LABEL: @test_svcvtxnt_f32_f64_z(
89+
// CHECK-NEXT: entry:
90+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
91+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
92+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
93+
//
94+
// CPP-CHECK-LABEL: @_Z23test_svcvtxnt_f32_f64_zu13__SVFloat32_tu10__SVBool_tu13__SVFloat64_t(
95+
// CPP-CHECK-NEXT: entry:
96+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
97+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
98+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
99+
//
100+
svfloat32_t test_svcvtxnt_f32_f64_z(svfloat32_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
101+
{
102+
return SVE_ACLE_FUNC(svcvtxnt_f32,_f64,_z,)(inactive, pg, op);
103+
}
104+
105+
// CHECK-LABEL: @test_svcvtlt_f32_f16_z(
106+
// CHECK-NEXT: entry:
107+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
108+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
109+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
110+
//
111+
// CPP-CHECK-LABEL: @_Z22test_svcvtlt_f32_f16_zu10__SVBool_tu13__SVFloat16_t(
112+
// CPP-CHECK-NEXT: entry:
113+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
114+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
115+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
116+
//
117+
svfloat32_t test_svcvtlt_f32_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
118+
{
119+
return SVE_ACLE_FUNC(svcvtlt_f32,_f16,_z,)(pg, op);
120+
}
121+
122+
// CHECK-LABEL: @test_svcvtlt_f64_f32_z(
123+
// CHECK-NEXT: entry:
124+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
125+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
126+
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
127+
//
128+
// CPP-CHECK-LABEL: @_Z22test_svcvtlt_f64_f32_zu10__SVBool_tu13__SVFloat32_t(
129+
// CPP-CHECK-NEXT: entry:
130+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
131+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
132+
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
133+
//
134+
svfloat64_t test_svcvtlt_f64_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
135+
{
136+
return SVE_ACLE_FUNC(svcvtlt_f64,_f32,_z,)(pg, op);
137+
}
138+

clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p2_OR_sme2p2_RP___sme_AND_LP_sve2p2_OR_sme2p2_RP.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,24 @@
1010
// Properties: guard="sve,(sve2p2|sme2p2)" streaming_guard="sme,(sve2p2|sme2p2)" flags="feature-dependent"
1111

1212
void test(void) {
13+
svbfloat16_t svbfloat16_t_val;
1314
svbool_t svbool_t_val;
15+
svfloat16_t svfloat16_t_val;
16+
svfloat32_t svfloat32_t_val;
17+
svfloat64_t svfloat64_t_val;
1418

19+
svcvtlt_f32_f16_z(svbool_t_val, svfloat16_t_val);
20+
svcvtlt_f32_z(svbool_t_val, svfloat16_t_val);
21+
svcvtlt_f64_f32_z(svbool_t_val, svfloat32_t_val);
22+
svcvtlt_f64_z(svbool_t_val, svfloat32_t_val);
23+
svcvtnt_bf16_f32_z(svbfloat16_t_val, svbool_t_val, svfloat32_t_val);
24+
svcvtnt_bf16_z(svbfloat16_t_val, svbool_t_val, svfloat32_t_val);
25+
svcvtnt_f16_f32_z(svfloat16_t_val, svbool_t_val, svfloat32_t_val);
26+
svcvtnt_f16_z(svfloat16_t_val, svbool_t_val, svfloat32_t_val);
27+
svcvtnt_f32_f64_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
28+
svcvtnt_f32_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
29+
svcvtxnt_f32_f64_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
30+
svcvtxnt_f32_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
1531
svfirstp_b8(svbool_t_val, svbool_t_val);
1632
svfirstp_b16(svbool_t_val, svbool_t_val);
1733
svfirstp_b32(svbool_t_val, svbool_t_val);
@@ -23,8 +39,24 @@ void test(void) {
2339
}
2440

2541
void test_streaming(void) __arm_streaming{
42+
svbfloat16_t svbfloat16_t_val;
2643
svbool_t svbool_t_val;
44+
svfloat16_t svfloat16_t_val;
45+
svfloat32_t svfloat32_t_val;
46+
svfloat64_t svfloat64_t_val;
2747

48+
svcvtlt_f32_f16_z(svbool_t_val, svfloat16_t_val);
49+
svcvtlt_f32_z(svbool_t_val, svfloat16_t_val);
50+
svcvtlt_f64_f32_z(svbool_t_val, svfloat32_t_val);
51+
svcvtlt_f64_z(svbool_t_val, svfloat32_t_val);
52+
svcvtnt_bf16_f32_z(svbfloat16_t_val, svbool_t_val, svfloat32_t_val);
53+
svcvtnt_bf16_z(svbfloat16_t_val, svbool_t_val, svfloat32_t_val);
54+
svcvtnt_f16_f32_z(svfloat16_t_val, svbool_t_val, svfloat32_t_val);
55+
svcvtnt_f16_z(svfloat16_t_val, svbool_t_val, svfloat32_t_val);
56+
svcvtnt_f32_f64_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
57+
svcvtnt_f32_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
58+
svcvtxnt_f32_f64_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
59+
svcvtxnt_f32_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
2860
svfirstp_b8(svbool_t_val, svbool_t_val);
2961
svfirstp_b16(svbool_t_val, svbool_t_val);
3062
svfirstp_b32(svbool_t_val, svbool_t_val);
@@ -36,8 +68,24 @@ void test_streaming(void) __arm_streaming{
3668
}
3769

3870
void test_streaming_compatible(void) __arm_streaming_compatible{
71+
svbfloat16_t svbfloat16_t_val;
3972
svbool_t svbool_t_val;
73+
svfloat16_t svfloat16_t_val;
74+
svfloat32_t svfloat32_t_val;
75+
svfloat64_t svfloat64_t_val;
4076

77+
svcvtlt_f32_f16_z(svbool_t_val, svfloat16_t_val);
78+
svcvtlt_f32_z(svbool_t_val, svfloat16_t_val);
79+
svcvtlt_f64_f32_z(svbool_t_val, svfloat32_t_val);
80+
svcvtlt_f64_z(svbool_t_val, svfloat32_t_val);
81+
svcvtnt_bf16_f32_z(svbfloat16_t_val, svbool_t_val, svfloat32_t_val);
82+
svcvtnt_bf16_z(svbfloat16_t_val, svbool_t_val, svfloat32_t_val);
83+
svcvtnt_f16_f32_z(svfloat16_t_val, svbool_t_val, svfloat32_t_val);
84+
svcvtnt_f16_z(svfloat16_t_val, svbool_t_val, svfloat32_t_val);
85+
svcvtnt_f32_f64_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
86+
svcvtnt_f32_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
87+
svcvtxnt_f32_f64_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
88+
svcvtxnt_f32_z(svfloat32_t_val, svbool_t_val, svfloat64_t_val);
4189
svfirstp_b8(svbool_t_val, svbool_t_val);
4290
svfirstp_b16(svbool_t_val, svbool_t_val);
4391
svfirstp_b32(svbool_t_val, svbool_t_val);

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,6 +2213,7 @@ def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1
22132213

22142214
def int_aarch64_sve_fcvt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
22152215
def int_aarch64_sve_fcvtnt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
2216+
def int_aarch64_sve_fcvtnt_z_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
22162217

22172218
def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
22182219
def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
@@ -2230,10 +2231,13 @@ def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1
22302231
def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
22312232
def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
22322233
def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
2234+
def int_aarch64_sve_fcvtnt_z_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
22332235
def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2236+
def int_aarch64_sve_fcvtnt_z_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
22342237

2235-
def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2236-
def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2238+
def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2239+
def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2240+
def int_aarch64_sve_fcvtxnt_z_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
22372241

22382242
def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
22392243
def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4582,17 +4582,17 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
45824582
defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">;
45834583

45844584
// SVE2p2 floating-point convert precision down (placing odd), zeroing predicate
4585-
defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">;
4586-
def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, ZPR64, /*destructive*/ true>;
4585+
defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt", "int_aarch64_sve_fcvtnt_z">;
4586+
defm FCVTXNT_ZPzZ : sve_float_convert_top<"fcvtxnt", int_aarch64_sve_fcvtxnt_z_f32f64>;
45874587
// Placing even
45884588
defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
45894589

45904590
// SVE2p2 floating-point convert precision up, zeroing predicate
45914591
defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", "int_aarch64_sve_fcvtlt">;
45924592

45934593
// SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
4594-
def BFCVTNT_ZPzZ : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>;
4595-
defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
4594+
defm BFCVTNT_ZPzZ_StoH : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_z_bf16f32, 0b0, true>;
4595+
defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
45964596

45974597
// Floating-point convert to integer, zeroing predicate
45984598
defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs", "int_aarch64_sve_fcvtzs", AArch64fcvtzs_mt>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2902,9 +2902,11 @@ multiclass sve2_fp_convert_up_long_z<string asm, string op> {
29022902
defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
29032903
}
29042904

2905-
multiclass sve2_fp_convert_down_narrow_z<string asm> {
2905+
multiclass sve2_fp_convert_down_narrow_z<string asm, string op> {
29062906
def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm, ZPR16, ZPR32, /*destructive*/ true>;
29072907
def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>;
2908+
def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
2909+
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
29082910
}
29092911

29102912
//===----------------------------------------------------------------------===//
@@ -9556,10 +9558,16 @@ multiclass sve_bfloat_convert<string asm, SDPatternOperator op, SDPatternOperato
95569558
def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
95579559
}
95589560

9559-
multiclass sve_bfloat_convert_top<string asm, SDPatternOperator op> {
9560-
def NAME : sve2_fp_convert_precision<0b1010, 0b1, asm, ZPR16, ZPR32>;
9561+
multiclass sve_bfloat_convert_top<string asm, SDPatternOperator ir_op, bit op = true, bit destructive = op> {
9562+
def NAME : sve2_fp_convert_precision<0b1010, op, asm, ZPR16, ZPR32, destructive>;
95619563

9562-
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
9564+
def : SVE_3_Op_Pat<nxv8bf16, ir_op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
9565+
}
9566+
9567+
multiclass sve_float_convert_top<string asm, SDPatternOperator ir_op> {
9568+
def _StoD : sve2_fp_convert_precision<0b0010, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>;
9569+
9570+
def : SVE_3_Op_Pat<nxv4f32, ir_op, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _StoD)>;
95639571
}
95649572

95659573
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)