Skip to content

Commit 2766002

Browse files
authored
[AArch64]SIMD fpcvt codegen for rounding nodes (#165546)
This is followup patch to #157680, which allows simd fpcvt instructions to be generated from l/llround and l/llrint nodes.
1 parent 1b82c16 commit 2766002

File tree

3 files changed

+615
-0
lines changed

3 files changed

+615
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6841,6 +6841,49 @@ defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "F
68416841
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">;
68426842
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">;
68436843

6844+
let Predicates = [HasFPRCVT] in {
6845+
def : Pat<(f32 (bitconvert (i32 (any_lround f16:$Rn)))),
6846+
(FCVTASSHr f16:$Rn)>;
6847+
def : Pat<(f64 (bitconvert (i64 (any_lround f16:$Rn)))),
6848+
(FCVTASDHr f16:$Rn)>;
6849+
def : Pat<(f64 (bitconvert (i64 (any_llround f16:$Rn)))),
6850+
(FCVTASDHr f16:$Rn)>;
6851+
def : Pat<(f64 (bitconvert (i64 (any_lround f32:$Rn)))),
6852+
(FCVTASDSr f32:$Rn)>;
6853+
def : Pat<(f32 (bitconvert (i32 (any_lround f64:$Rn)))),
6854+
(FCVTASSDr f64:$Rn)>;
6855+
def : Pat<(f64 (bitconvert (i64 (any_llround f32:$Rn)))),
6856+
(FCVTASDSr f32:$Rn)>;
6857+
}
6858+
def : Pat<(f32 (bitconvert (i32 (any_lround f32:$Rn)))),
6859+
(FCVTASv1i32 f32:$Rn)>;
6860+
def : Pat<(f64 (bitconvert (i64 (any_lround f64:$Rn)))),
6861+
(FCVTASv1i64 f64:$Rn)>;
6862+
def : Pat<(f64 (bitconvert (i64 (any_llround f64:$Rn)))),
6863+
(FCVTASv1i64 f64:$Rn)>;
6864+
6865+
let Predicates = [HasFPRCVT] in {
6866+
def : Pat<(f32 (bitconvert (i32 (any_lrint f16:$Rn)))),
6867+
(FCVTZSSHr (FRINTXHr f16:$Rn))>;
6868+
def : Pat<(f64 (bitconvert (i64 (any_lrint f16:$Rn)))),
6869+
(FCVTZSDHr (FRINTXHr f16:$Rn))>;
6870+
def : Pat<(f64 (bitconvert (i64 (any_llrint f16:$Rn)))),
6871+
(FCVTZSDHr (FRINTXHr f16:$Rn))>;
6872+
def : Pat<(f64 (bitconvert (i64 (any_lrint f32:$Rn)))),
6873+
(FCVTZSDSr (FRINTXSr f32:$Rn))>;
6874+
def : Pat<(f32 (bitconvert (i32 (any_lrint f64:$Rn)))),
6875+
(FCVTZSSDr (FRINTXDr f64:$Rn))>;
6876+
def : Pat<(f64 (bitconvert (i64 (any_llrint f32:$Rn)))),
6877+
(FCVTZSDSr (FRINTXSr f32:$Rn))>;
6878+
}
6879+
def : Pat<(f32 (bitconvert (i32 (any_lrint f32:$Rn)))),
6880+
(FCVTZSv1i32 (FRINTXSr f32:$Rn))>;
6881+
def : Pat<(f64 (bitconvert (i64 (any_lrint f64:$Rn)))),
6882+
(FCVTZSv1i64 (FRINTXDr f64:$Rn))>;
6883+
def : Pat<(f64 (bitconvert (i64 (any_llrint f64:$Rn)))),
6884+
(FCVTZSv1i64 (FRINTXDr f64:$Rn))>;
6885+
6886+
68446887
// f16 -> s16 conversions
68456888
let Predicates = [HasFullFP16] in {
68466889
def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FPRCVT
3+
; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFPRCVT
4+
5+
;
6+
; Lround strictfp
7+
;
8+
9+
define float @lround_i32_f16_simd_exp(half %x) {
10+
; CHECK-FPRCVT-LABEL: lround_i32_f16_simd_exp:
11+
; CHECK-FPRCVT: // %bb.0:
12+
; CHECK-FPRCVT-NEXT: fcvtas s0, h0
13+
; CHECK-FPRCVT-NEXT: ret
14+
;
15+
; CHECK-NOFPRCVT-LABEL: lround_i32_f16_simd_exp:
16+
; CHECK-NOFPRCVT: // %bb.0:
17+
; CHECK-NOFPRCVT-NEXT: fcvtas w8, h0
18+
; CHECK-NOFPRCVT-NEXT: fmov s0, w8
19+
; CHECK-NOFPRCVT-NEXT: ret
20+
%val = call i32 @llvm.experimental.constrained.lround.i32.f16(half %x, metadata !"fpexcept.strict")
21+
%sum = bitcast i32 %val to float
22+
ret float %sum
23+
}
24+
25+
define float @lround_i32_f32_simd_exp(float %x) {
26+
; CHECK-LABEL: lround_i32_f32_simd_exp:
27+
; CHECK: // %bb.0:
28+
; CHECK-NEXT: fcvtas s0, s0
29+
; CHECK-NEXT: ret
30+
%val = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata !"fpexcept.strict")
31+
%bc = bitcast i32 %val to float
32+
ret float %bc
33+
}
34+
35+
define float @lround_i32_f64_simd_exp(double %x) {
36+
; CHECK-FPRCVT-LABEL: lround_i32_f64_simd_exp:
37+
; CHECK-FPRCVT: // %bb.0:
38+
; CHECK-FPRCVT-NEXT: fcvtas s0, d0
39+
; CHECK-FPRCVT-NEXT: ret
40+
;
41+
; CHECK-NOFPRCVT-LABEL: lround_i32_f64_simd_exp:
42+
; CHECK-NOFPRCVT: // %bb.0:
43+
; CHECK-NOFPRCVT-NEXT: fcvtas w8, d0
44+
; CHECK-NOFPRCVT-NEXT: fmov s0, w8
45+
; CHECK-NOFPRCVT-NEXT: ret
46+
%val = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata !"fpexcept.strict")
47+
%bc = bitcast i32 %val to float
48+
ret float %bc
49+
}
50+
51+
define double @lround_i64_f16_simd_exp(half %x) {
52+
; CHECK-FPRCVT-LABEL: lround_i64_f16_simd_exp:
53+
; CHECK-FPRCVT: // %bb.0:
54+
; CHECK-FPRCVT-NEXT: fcvtas d0, h0
55+
; CHECK-FPRCVT-NEXT: ret
56+
;
57+
; CHECK-NOFPRCVT-LABEL: lround_i64_f16_simd_exp:
58+
; CHECK-NOFPRCVT: // %bb.0:
59+
; CHECK-NOFPRCVT-NEXT: fcvtas x8, h0
60+
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
61+
; CHECK-NOFPRCVT-NEXT: ret
62+
%val = call i64 @llvm.experimental.constrained.lround.i64.f16(half %x, metadata !"fpexcept.strict")
63+
%bc = bitcast i64 %val to double
64+
ret double %bc
65+
}
66+
67+
define double @lround_i64_f32_simd_exp(float %x) {
68+
; CHECK-FPRCVT-LABEL: lround_i64_f32_simd_exp:
69+
; CHECK-FPRCVT: // %bb.0:
70+
; CHECK-FPRCVT-NEXT: fcvtas d0, s0
71+
; CHECK-FPRCVT-NEXT: ret
72+
;
73+
; CHECK-NOFPRCVT-LABEL: lround_i64_f32_simd_exp:
74+
; CHECK-NOFPRCVT: // %bb.0:
75+
; CHECK-NOFPRCVT-NEXT: fcvtas x8, s0
76+
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
77+
; CHECK-NOFPRCVT-NEXT: ret
78+
%val = call i64 @llvm.experimental.constrained.lround.i64.f32(float %x, metadata !"fpexcept.strict")
79+
%bc = bitcast i64 %val to double
80+
ret double %bc
81+
}
82+
83+
define double @lround_i64_f64_simd_exp(double %x) {
84+
; CHECK-LABEL: lround_i64_f64_simd_exp:
85+
; CHECK: // %bb.0:
86+
; CHECK-NEXT: fcvtas d0, d0
87+
; CHECK-NEXT: ret
88+
%val = call i64 @llvm.experimental.constrained.lround.i64.f64(double %x, metadata !"fpexcept.strict")
89+
%bc = bitcast i64 %val to double
90+
ret double %bc
91+
}
92+
93+
;
94+
; Llround strictfp
95+
;
96+
97+
define double @llround_i64_f16_simd_exp(half %x) {
98+
; CHECK-FPRCVT-LABEL: llround_i64_f16_simd_exp:
99+
; CHECK-FPRCVT: // %bb.0:
100+
; CHECK-FPRCVT-NEXT: fcvtas d0, h0
101+
; CHECK-FPRCVT-NEXT: ret
102+
;
103+
; CHECK-NOFPRCVT-LABEL: llround_i64_f16_simd_exp:
104+
; CHECK-NOFPRCVT: // %bb.0:
105+
; CHECK-NOFPRCVT-NEXT: fcvtas x8, h0
106+
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
107+
; CHECK-NOFPRCVT-NEXT: ret
108+
%val = call i64 @llvm.experimental.constrained.llround.i64.f16(half %x, metadata !"fpexcept.strict")
109+
%sum = bitcast i64 %val to double
110+
ret double %sum
111+
}
112+
113+
define double @llround_i64_f32_simd_exp(float %x) {
114+
; CHECK-FPRCVT-LABEL: llround_i64_f32_simd_exp:
115+
; CHECK-FPRCVT: // %bb.0:
116+
; CHECK-FPRCVT-NEXT: fcvtas d0, s0
117+
; CHECK-FPRCVT-NEXT: ret
118+
;
119+
; CHECK-NOFPRCVT-LABEL: llround_i64_f32_simd_exp:
120+
; CHECK-NOFPRCVT: // %bb.0:
121+
; CHECK-NOFPRCVT-NEXT: fcvtas x8, s0
122+
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
123+
; CHECK-NOFPRCVT-NEXT: ret
124+
%val = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, metadata !"fpexcept.strict")
125+
%bc = bitcast i64 %val to double
126+
ret double %bc
127+
}
128+
129+
define double @llround_i64_f64_simd_exp(double %x) {
130+
; CHECK-LABEL: llround_i64_f64_simd_exp:
131+
; CHECK: // %bb.0:
132+
; CHECK-NEXT: fcvtas d0, d0
133+
; CHECK-NEXT: ret
134+
%val = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, metadata !"fpexcept.strict")
135+
%bc = bitcast i64 %val to double
136+
ret double %bc
137+
}
138+
139+
;
140+
; Lrint strictfp
141+
;
142+
143+
define float @lrint_i32_f16_simd_exp(half %x) {
144+
; CHECK-FPRCVT-LABEL: lrint_i32_f16_simd_exp:
145+
; CHECK-FPRCVT: // %bb.0:
146+
; CHECK-FPRCVT-NEXT: frintx h0, h0
147+
; CHECK-FPRCVT-NEXT: fcvtzs s0, h0
148+
; CHECK-FPRCVT-NEXT: ret
149+
;
150+
; CHECK-NOFPRCVT-LABEL: lrint_i32_f16_simd_exp:
151+
; CHECK-NOFPRCVT: // %bb.0:
152+
; CHECK-NOFPRCVT-NEXT: frintx h0, h0
153+
; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0
154+
; CHECK-NOFPRCVT-NEXT: fmov s0, w8
155+
; CHECK-NOFPRCVT-NEXT: ret
156+
%val = call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
157+
%sum = bitcast i32 %val to float
158+
ret float %sum
159+
}
160+
161+
define float @lrint_i32_f32_simd_exp(float %x) {
162+
; CHECK-LABEL: lrint_i32_f32_simd_exp:
163+
; CHECK: // %bb.0:
164+
; CHECK-NEXT: frintx s0, s0
165+
; CHECK-NEXT: fcvtzs s0, s0
166+
; CHECK-NEXT: ret
167+
%val = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
168+
%bc = bitcast i32 %val to float
169+
ret float %bc
170+
}
171+
172+
define float @lrint_i32_f64_simd_exp(double %x) {
173+
; CHECK-FPRCVT-LABEL: lrint_i32_f64_simd_exp:
174+
; CHECK-FPRCVT: // %bb.0:
175+
; CHECK-FPRCVT-NEXT: frintx d0, d0
176+
; CHECK-FPRCVT-NEXT: fcvtzs s0, d0
177+
; CHECK-FPRCVT-NEXT: ret
178+
;
179+
; CHECK-NOFPRCVT-LABEL: lrint_i32_f64_simd_exp:
180+
; CHECK-NOFPRCVT: // %bb.0:
181+
; CHECK-NOFPRCVT-NEXT: frintx d0, d0
182+
; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0
183+
; CHECK-NOFPRCVT-NEXT: fmov s0, w8
184+
; CHECK-NOFPRCVT-NEXT: ret
185+
%val = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
186+
%bc = bitcast i32 %val to float
187+
ret float %bc
188+
}
189+
190+
define double @lrint_i64_f16_simd_exp(half %x) {
191+
; CHECK-FPRCVT-LABEL: lrint_i64_f16_simd_exp:
192+
; CHECK-FPRCVT: // %bb.0:
193+
; CHECK-FPRCVT-NEXT: frintx h0, h0
194+
; CHECK-FPRCVT-NEXT: fcvtzs d0, h0
195+
; CHECK-FPRCVT-NEXT: ret
196+
;
197+
; CHECK-NOFPRCVT-LABEL: lrint_i64_f16_simd_exp:
198+
; CHECK-NOFPRCVT: // %bb.0:
199+
; CHECK-NOFPRCVT-NEXT: frintx h0, h0
200+
; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
201+
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
202+
; CHECK-NOFPRCVT-NEXT: ret
203+
%val = call i64 @llvm.experimental.constrained.lrint.i53.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
204+
%bc = bitcast i64 %val to double
205+
ret double %bc
206+
}
207+
208+
define double @lrint_i64_f32_simd_exp(float %x) {
209+
; CHECK-FPRCVT-LABEL: lrint_i64_f32_simd_exp:
210+
; CHECK-FPRCVT: // %bb.0:
211+
; CHECK-FPRCVT-NEXT: frintx s0, s0
212+
; CHECK-FPRCVT-NEXT: fcvtzs d0, s0
213+
; CHECK-FPRCVT-NEXT: ret
214+
;
215+
; CHECK-NOFPRCVT-LABEL: lrint_i64_f32_simd_exp:
216+
; CHECK-NOFPRCVT: // %bb.0:
217+
; CHECK-NOFPRCVT-NEXT: frintx s0, s0
218+
; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
219+
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
220+
; CHECK-NOFPRCVT-NEXT: ret
221+
%val = call i64 @llvm.experimental.constrained.lrint.i64.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
222+
%bc = bitcast i64 %val to double
223+
ret double %bc
224+
}
225+
226+
define double @lrint_i64_f64_simd_exp(double %x) {
227+
; CHECK-LABEL: lrint_i64_f64_simd_exp:
228+
; CHECK: // %bb.0:
229+
; CHECK-NEXT: frintx d0, d0
230+
; CHECK-NEXT: fcvtzs d0, d0
231+
; CHECK-NEXT: ret
232+
%val = call i64 @llvm.experimental.constrained.lrint.i64.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
233+
%bc = bitcast i64 %val to double
234+
ret double %bc
235+
}
236+
237+
;
238+
; Llrint strictfp
239+
;
240+
241+
define double @llrint_i64_f16_simd_exp(half %x) {
242+
; CHECK-FPRCVT-LABEL: llrint_i64_f16_simd_exp:
243+
; CHECK-FPRCVT: // %bb.0:
244+
; CHECK-FPRCVT-NEXT: frintx h0, h0
245+
; CHECK-FPRCVT-NEXT: fcvtzs d0, h0
246+
; CHECK-FPRCVT-NEXT: ret
247+
;
248+
; CHECK-NOFPRCVT-LABEL: llrint_i64_f16_simd_exp:
249+
; CHECK-NOFPRCVT: // %bb.0:
250+
; CHECK-NOFPRCVT-NEXT: frintx h0, h0
251+
; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0
252+
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
253+
; CHECK-NOFPRCVT-NEXT: ret
254+
%val = call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
255+
%sum = bitcast i64 %val to double
256+
ret double %sum
257+
}
258+
259+
define double @llrint_i64_f32_simd_exp(float %x) {
260+
; CHECK-FPRCVT-LABEL: llrint_i64_f32_simd_exp:
261+
; CHECK-FPRCVT: // %bb.0:
262+
; CHECK-FPRCVT-NEXT: frintx s0, s0
263+
; CHECK-FPRCVT-NEXT: fcvtzs d0, s0
264+
; CHECK-FPRCVT-NEXT: ret
265+
;
266+
; CHECK-NOFPRCVT-LABEL: llrint_i64_f32_simd_exp:
267+
; CHECK-NOFPRCVT: // %bb.0:
268+
; CHECK-NOFPRCVT-NEXT: frintx s0, s0
269+
; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0
270+
; CHECK-NOFPRCVT-NEXT: fmov d0, x8
271+
; CHECK-NOFPRCVT-NEXT: ret
272+
%val = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
273+
%bc = bitcast i64 %val to double
274+
ret double %bc
275+
}
276+
277+
define double @llrint_i64_f64_simd_exp(double %x) {
278+
; CHECK-LABEL: llrint_i64_f64_simd_exp:
279+
; CHECK: // %bb.0:
280+
; CHECK-NEXT: frintx d0, d0
281+
; CHECK-NEXT: fcvtzs d0, d0
282+
; CHECK-NEXT: ret
283+
%val = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
284+
%bc = bitcast i64 %val to double
285+
ret double %bc
286+
}

0 commit comments

Comments
 (0)