Skip to content

Commit b127081

Browse files
committed
[DAGCombiner] Fold fp-uint-fp to fabs + ftrunc
1 parent 37413f3 commit b127081

File tree

5 files changed

+265
-145
lines changed

5 files changed

+265
-145
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18865,10 +18865,9 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
1886518865
// We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc.
1886618866
// If NoSignedZerosFPMath is enabled, this is a direct replacement.
1886718867
// Otherwise, for strict math, we must handle edge cases:
18868-
// 1. For signed conversions, clamp out-of-range values to the valid
18869-
// integer range before the trunc.
18870-
// 2. For unsigned conversions, use FABS. A negative float becomes integer 0,
18871-
// which must convert back to +0.0. FTRUNC on its own could produce -0.0.
18868+
// 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0
18869+
// as example, it first becomes integer 0, and is converted back to +0.0.
18870+
// FTRUNC on its own could produce -0.0.
1887218871

1887318872
// FIXME: We should be able to use node-level FMF here.
1887418873
EVT VT = N->getValueType(0);
@@ -18882,24 +18881,6 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
1888218881
N0.getOperand(0).getValueType() == VT) {
1888318882
if (DAG.getTarget().Options.NoSignedZerosFPMath)
1888418883
return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18885-
18886-
// Strict math: clamp to the signed integer range before truncating.
18887-
unsigned IntWidth = N0.getValueSizeInBits();
18888-
APInt APMax = APInt::getSignedMaxValue(IntWidth);
18889-
APInt APMin = APInt::getSignedMinValue(IntWidth);
18890-
18891-
APFloat MaxAPF(VT.getFltSemantics());
18892-
MaxAPF.convertFromAPInt(APMax, true, APFloat::rmTowardZero);
18893-
APFloat MinAPF(VT.getFltSemantics());
18894-
MinAPF.convertFromAPInt(APMin, true, APFloat::rmTowardZero);
18895-
18896-
SDValue MaxFP = DAG.getConstantFP(MaxAPF, DL, VT);
18897-
SDValue MinFP = DAG.getConstantFP(MinAPF, DL, VT);
18898-
18899-
SDValue Clamped = DAG.getNode(
18900-
ISD::FMINNUM, DL, VT,
18901-
DAG.getNode(ISD::FMAXNUM, DL, VT, N0->getOperand(0), MinFP), MaxFP);
18902-
return DAG.getNode(ISD::FTRUNC, DL, VT, Clamped);
1890318884
}
1890418885

1890518886
if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&

llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll

Lines changed: 9 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,8 @@
44
define double @t1(double %x) {
55
; CHECK-LABEL: t1:
66
; CHECK: // %bb.0: // %entry
7-
; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000
8-
; CHECK-NEXT: fmov d1, x8
9-
; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff
10-
; CHECK-NEXT: fmaxnm d0, d0, d1
11-
; CHECK-NEXT: fmov d1, x8
12-
; CHECK-NEXT: fminnm d0, d0, d1
13-
; CHECK-NEXT: frintz d0, d0
7+
; CHECK-NEXT: fcvtzs d0, d0
8+
; CHECK-NEXT: scvtf d0, d0
149
; CHECK-NEXT: ret
1510
entry:
1611
%conv = fptosi double %x to i64
@@ -21,12 +16,8 @@ entry:
2116
define float @t2(float %x) {
2217
; CHECK-LABEL: t2:
2318
; CHECK: // %bb.0: // %entry
24-
; CHECK-NEXT: movi v1.2s, #207, lsl #24
25-
; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff
26-
; CHECK-NEXT: fmaxnm s0, s0, s1
27-
; CHECK-NEXT: fmov s1, w8
28-
; CHECK-NEXT: fminnm s0, s0, s1
29-
; CHECK-NEXT: frintz s0, s0
19+
; CHECK-NEXT: fcvtzs s0, s0
20+
; CHECK-NEXT: scvtf s0, s0
3021
; CHECK-NEXT: ret
3122
entry:
3223
%conv = fptosi float %x to i32
@@ -37,13 +28,8 @@ entry:
3728
define half @t3(half %x) {
3829
; CHECK-LABEL: t3:
3930
; CHECK: // %bb.0: // %entry
40-
; CHECK-NEXT: mov w8, #64511 // =0xfbff
41-
; CHECK-NEXT: fmov h1, w8
42-
; CHECK-NEXT: mov w8, #31743 // =0x7bff
43-
; CHECK-NEXT: fmaxnm h0, h0, h1
44-
; CHECK-NEXT: fmov h1, w8
45-
; CHECK-NEXT: fminnm h0, h0, h1
46-
; CHECK-NEXT: frintz h0, h0
31+
; CHECK-NEXT: fcvtzs h0, h0
32+
; CHECK-NEXT: scvtf h0, h0
4733
; CHECK-NEXT: ret
4834
entry:
4935
%conv = fptosi half %x to i32
@@ -184,14 +170,8 @@ entry:
184170
define i64 @tests_f64_multiuse(double %x) {
185171
; CHECK-LABEL: tests_f64_multiuse:
186172
; CHECK: // %bb.0: // %entry
187-
; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000
188-
; CHECK-NEXT: fmov d1, x8
189-
; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff
190-
; CHECK-NEXT: fmov d2, x8
191173
; CHECK-NEXT: fcvtzs x8, d0
192-
; CHECK-NEXT: fmaxnm d1, d0, d1
193-
; CHECK-NEXT: fminnm d1, d1, d2
194-
; CHECK-NEXT: frintz d1, d1
174+
; CHECK-NEXT: scvtf d1, x8
195175
; CHECK-NEXT: fcmp d0, d1
196176
; CHECK-NEXT: csel x0, x8, xzr, eq
197177
; CHECK-NEXT: ret
@@ -206,13 +186,8 @@ entry:
206186
define i32 @tests_f32_multiuse(float %x) {
207187
; CHECK-LABEL: tests_f32_multiuse:
208188
; CHECK: // %bb.0: // %entry
209-
; CHECK-NEXT: movi v1.2s, #207, lsl #24
210-
; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff
211-
; CHECK-NEXT: fmov s2, w8
212189
; CHECK-NEXT: fcvtzs w8, s0
213-
; CHECK-NEXT: fmaxnm s1, s0, s1
214-
; CHECK-NEXT: fminnm s1, s1, s2
215-
; CHECK-NEXT: frintz s1, s1
190+
; CHECK-NEXT: scvtf s1, w8
216191
; CHECK-NEXT: fcmp s0, s1
217192
; CHECK-NEXT: csel w0, w8, wzr, eq
218193
; CHECK-NEXT: ret
@@ -227,14 +202,8 @@ entry:
227202
define i32 @tests_f16_multiuse(half %x) {
228203
; CHECK-LABEL: tests_f16_multiuse:
229204
; CHECK: // %bb.0: // %entry
230-
; CHECK-NEXT: mov w8, #64511 // =0xfbff
231-
; CHECK-NEXT: fmov h1, w8
232-
; CHECK-NEXT: mov w8, #31743 // =0x7bff
233-
; CHECK-NEXT: fmov h2, w8
234205
; CHECK-NEXT: fcvtzs w8, h0
235-
; CHECK-NEXT: fmaxnm h1, h0, h1
236-
; CHECK-NEXT: fminnm h1, h1, h2
237-
; CHECK-NEXT: frintz h1, h1
206+
; CHECK-NEXT: scvtf h1, w8
238207
; CHECK-NEXT: fcmp h0, h1
239208
; CHECK-NEXT: csel w0, w8, wzr, eq
240209
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll

Lines changed: 27 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -9,35 +9,23 @@ target triple = "aarch64-unknown-linux-gnu"
99
define double @t1(double %x) {
1010
; CHECK-LABEL: t1:
1111
; CHECK: // %bb.0: // %entry
12-
; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000
13-
; CHECK-NEXT: fmov d1, x8
14-
; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff
15-
; CHECK-NEXT: fmaxnm d0, d0, d1
16-
; CHECK-NEXT: fmov d1, x8
17-
; CHECK-NEXT: fminnm d0, d0, d1
18-
; CHECK-NEXT: frintz d0, d0
12+
; CHECK-NEXT: ptrue p0.d
13+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
14+
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
15+
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
16+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1917
; CHECK-NEXT: ret
2018
;
2119
; USE-NEON-NO-GPRS-LABEL: t1:
2220
; USE-NEON-NO-GPRS: // %bb.0: // %entry
23-
; USE-NEON-NO-GPRS-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000
24-
; USE-NEON-NO-GPRS-NEXT: fmov d1, x8
25-
; USE-NEON-NO-GPRS-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff
26-
; USE-NEON-NO-GPRS-NEXT: fmaxnm d0, d0, d1
27-
; USE-NEON-NO-GPRS-NEXT: fmov d1, x8
28-
; USE-NEON-NO-GPRS-NEXT: fminnm d0, d0, d1
29-
; USE-NEON-NO-GPRS-NEXT: frintz d0, d0
21+
; USE-NEON-NO-GPRS-NEXT: fcvtzs d0, d0
22+
; USE-NEON-NO-GPRS-NEXT: scvtf d0, d0
3023
; USE-NEON-NO-GPRS-NEXT: ret
3124
;
3225
; NONEON-NOSVE-LABEL: t1:
3326
; NONEON-NOSVE: // %bb.0: // %entry
34-
; NONEON-NOSVE-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000
35-
; NONEON-NOSVE-NEXT: fmov d1, x8
36-
; NONEON-NOSVE-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff
37-
; NONEON-NOSVE-NEXT: fmaxnm d0, d0, d1
38-
; NONEON-NOSVE-NEXT: fmov d1, x8
39-
; NONEON-NOSVE-NEXT: fminnm d0, d0, d1
40-
; NONEON-NOSVE-NEXT: frintz d0, d0
27+
; NONEON-NOSVE-NEXT: fcvtzs x8, d0
28+
; NONEON-NOSVE-NEXT: scvtf d0, x8
4129
; NONEON-NOSVE-NEXT: ret
4230
entry:
4331
%conv = fptosi double %x to i64
@@ -48,35 +36,23 @@ entry:
4836
define float @t2(float %x) {
4937
; CHECK-LABEL: t2:
5038
; CHECK: // %bb.0: // %entry
51-
; CHECK-NEXT: mov w8, #-822083584 // =0xcf000000
52-
; CHECK-NEXT: fmov s1, w8
53-
; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff
54-
; CHECK-NEXT: fmaxnm s0, s0, s1
55-
; CHECK-NEXT: fmov s1, w8
56-
; CHECK-NEXT: fminnm s0, s0, s1
57-
; CHECK-NEXT: frintz s0, s0
39+
; CHECK-NEXT: ptrue p0.s
40+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
41+
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
42+
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
43+
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
5844
; CHECK-NEXT: ret
5945
;
6046
; USE-NEON-NO-GPRS-LABEL: t2:
6147
; USE-NEON-NO-GPRS: // %bb.0: // %entry
62-
; USE-NEON-NO-GPRS-NEXT: mov w8, #-822083584 // =0xcf000000
63-
; USE-NEON-NO-GPRS-NEXT: fmov s1, w8
64-
; USE-NEON-NO-GPRS-NEXT: mov w8, #1325400063 // =0x4effffff
65-
; USE-NEON-NO-GPRS-NEXT: fmaxnm s0, s0, s1
66-
; USE-NEON-NO-GPRS-NEXT: fmov s1, w8
67-
; USE-NEON-NO-GPRS-NEXT: fminnm s0, s0, s1
68-
; USE-NEON-NO-GPRS-NEXT: frintz s0, s0
48+
; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0
49+
; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
6950
; USE-NEON-NO-GPRS-NEXT: ret
7051
;
7152
; NONEON-NOSVE-LABEL: t2:
7253
; NONEON-NOSVE: // %bb.0: // %entry
73-
; NONEON-NOSVE-NEXT: mov w8, #-822083584 // =0xcf000000
74-
; NONEON-NOSVE-NEXT: fmov s1, w8
75-
; NONEON-NOSVE-NEXT: mov w8, #1325400063 // =0x4effffff
76-
; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
77-
; NONEON-NOSVE-NEXT: fmov s1, w8
78-
; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
79-
; NONEON-NOSVE-NEXT: frintz s0, s0
54+
; NONEON-NOSVE-NEXT: fcvtzs w8, s0
55+
; NONEON-NOSVE-NEXT: scvtf s0, w8
8056
; NONEON-NOSVE-NEXT: ret
8157
entry:
8258
%conv = fptosi float %x to i32
@@ -87,36 +63,24 @@ entry:
8763
define half @t3(half %x) {
8864
; CHECK-LABEL: t3:
8965
; CHECK: // %bb.0: // %entry
90-
; CHECK-NEXT: adrp x8, .LCPI2_0
91-
; CHECK-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
92-
; CHECK-NEXT: adrp x8, .LCPI2_1
93-
; CHECK-NEXT: fmaxnm h0, h0, h1
94-
; CHECK-NEXT: ldr h1, [x8, :lo12:.LCPI2_1]
95-
; CHECK-NEXT: fminnm h0, h0, h1
96-
; CHECK-NEXT: frintz h0, h0
66+
; CHECK-NEXT: ptrue p0.s
67+
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
68+
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
69+
; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
70+
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
9771
; CHECK-NEXT: ret
9872
;
9973
; USE-NEON-NO-GPRS-LABEL: t3:
10074
; USE-NEON-NO-GPRS: // %bb.0: // %entry
101-
; USE-NEON-NO-GPRS-NEXT: adrp x8, .LCPI2_0
102-
; USE-NEON-NO-GPRS-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
103-
; USE-NEON-NO-GPRS-NEXT: adrp x8, .LCPI2_1
104-
; USE-NEON-NO-GPRS-NEXT: fmaxnm h0, h0, h1
105-
; USE-NEON-NO-GPRS-NEXT: ldr h1, [x8, :lo12:.LCPI2_1]
106-
; USE-NEON-NO-GPRS-NEXT: fminnm h0, h0, h1
107-
; USE-NEON-NO-GPRS-NEXT: frintz h0, h0
75+
; USE-NEON-NO-GPRS-NEXT: fcvtzs h0, h0
76+
; USE-NEON-NO-GPRS-NEXT: scvtf h0, h0
10877
; USE-NEON-NO-GPRS-NEXT: ret
10978
;
11079
; NONEON-NOSVE-LABEL: t3:
11180
; NONEON-NOSVE: // %bb.0: // %entry
11281
; NONEON-NOSVE-NEXT: fcvt s0, h0
113-
; NONEON-NOSVE-NEXT: mov w8, #-822083584 // =0xcf000000
114-
; NONEON-NOSVE-NEXT: fmov s1, w8
115-
; NONEON-NOSVE-NEXT: mov w8, #1325400063 // =0x4effffff
116-
; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
117-
; NONEON-NOSVE-NEXT: fmov s1, w8
118-
; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
119-
; NONEON-NOSVE-NEXT: frintz s0, s0
82+
; NONEON-NOSVE-NEXT: fcvtzs w8, s0
83+
; NONEON-NOSVE-NEXT: scvtf s0, w8
12084
; NONEON-NOSVE-NEXT: fcvt h0, s0
12185
; NONEON-NOSVE-NEXT: ret
12286
entry:

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,15 @@ define half @test_pow_fast_f16__integral_y(half %x, i32 %y.i) {
6060
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
6161
; CHECK-NEXT: v_log_f16_e64 v3, |v0|
6262
; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
63-
; CHECK-NEXT: v_cvt_f32_f16_e32 v2, v1
64-
; CHECK-NEXT: v_max_f16_e32 v1, 0xfbff, v1
65-
; CHECK-NEXT: v_min_f16_e32 v1, 0x7bff, v1
66-
; CHECK-NEXT: v_trunc_f16_e32 v1, v1
67-
; CHECK-NEXT: v_cvt_i32_f32_e32 v2, v2
68-
; CHECK-NEXT: v_mul_f16_e32 v1, v3, v1
69-
; CHECK-NEXT: v_exp_f16_e32 v1, v1
70-
; CHECK-NEXT: v_lshlrev_b16_e32 v2, 15, v2
71-
; CHECK-NEXT: v_and_b32_e32 v0, v2, v0
72-
; CHECK-NEXT: v_or_b32_e32 v0, v0, v1
63+
; CHECK-NEXT: v_cvt_f32_f16_e32 v1, v1
64+
; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1
65+
; CHECK-NEXT: v_cvt_f32_i32_e32 v2, v1
66+
; CHECK-NEXT: v_lshlrev_b16_e32 v1, 15, v1
67+
; CHECK-NEXT: v_and_b32_e32 v0, v1, v0
68+
; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v2
69+
; CHECK-NEXT: v_mul_f16_e32 v2, v3, v2
70+
; CHECK-NEXT: v_exp_f16_e32 v2, v2
71+
; CHECK-NEXT: v_or_b32_e32 v0, v0, v2
7372
; CHECK-NEXT: s_setpc_b64 s[30:31]
7473
%y = sitofp i32 %y.i to half
7574
%pow = tail call fast half @_Z3powDhDh(half %x, half %y)
@@ -80,30 +79,28 @@ define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {
8079
; CHECK-LABEL: test_pow_fast_f32__integral_y:
8180
; CHECK: ; %bb.0:
8281
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82+
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
8383
; CHECK-NEXT: s_mov_b32 s4, 0x800000
8484
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
8585
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
86+
; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1
8687
; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
8788
; CHECK-NEXT: v_log_f32_e32 v3, v3
88-
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
8989
; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
90+
; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
9091
; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
9192
; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2
92-
; CHECK-NEXT: v_max_f32_e32 v3, 0xcf000000, v1
93-
; CHECK-NEXT: v_min_f32_e32 v3, 0x4effffff, v3
94-
; CHECK-NEXT: v_trunc_f32_e32 v3, v3
95-
; CHECK-NEXT: v_mul_f32_e32 v4, v2, v3
9693
; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000
94+
; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4
9795
; CHECK-NEXT: v_mov_b32_e32 v5, 0x42800000
98-
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
99-
; CHECK-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc
100-
; CHECK-NEXT: v_fma_f32 v2, v2, v3, v4
96+
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3
97+
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
98+
; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3
10199
; CHECK-NEXT: v_exp_f32_e32 v2, v2
102-
; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1
103100
; CHECK-NEXT: v_not_b32_e32 v3, 63
104101
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
105-
; CHECK-NEXT: v_ldexp_f32 v2, v2, v3
106102
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1
103+
; CHECK-NEXT: v_ldexp_f32 v2, v2, v3
107104
; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2
108105
; CHECK-NEXT: s_setpc_b64 s[30:31]
109106
%y = sitofp i32 %y.i to float

0 commit comments

Comments
 (0)