Skip to content

Commit 84e4c06

Browse files
authored
[DAGCombiner] Remove NoSignedZerosFPMath uses in visitFSUB (#160974)
Remove NoSignedZerosFPMath in visitFSUB part, we should always use instruction level fast math flags.
1 parent c20ef94 commit 84e4c06

File tree

4 files changed

+28
-16
lines changed

4 files changed

+28
-16
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17983,8 +17983,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
1798317983

1798417984
// (fsub A, 0) -> A
1798517985
if (N1CFP && N1CFP->isZero()) {
17986-
if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
17987-
Flags.hasNoSignedZeros()) {
17986+
if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) {
1798817987
return N0;
1798917988
}
1799017989
}
@@ -17997,8 +17996,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
1799717996

1799817997
// (fsub -0.0, N1) -> -N1
1799917998
if (N0CFP && N0CFP->isZero()) {
18000-
if (N0CFP->isNegative() ||
18001-
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
17999+
if (N0CFP->isNegative() || Flags.hasNoSignedZeros()) {
1800218000
// We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
1800318001
// flushed to zero, unless all users treat denorms as zero (DAZ).
1800418002
// FIXME: This transform will change the sign of a NaN and the behavior
@@ -18014,8 +18012,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
1801418012
}
1801518013
}
1801618014

18017-
if ((Options.NoSignedZerosFPMath ||
18018-
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
18015+
if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
1801918016
N1.getOpcode() == ISD::FADD) {
1802018017
// X - (X + Y) -> -Y
1802118018
if (N0 == N1->getOperand(0))

llvm/test/CodeGen/AMDGPU/fneg-combines.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5015,7 +5015,7 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out
50155015
%a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
50165016
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
50175017
%a = load volatile double, ptr addrspace(1) %a.gep
5018-
%fneg.a = fsub double -0.000000e+00, %a
5018+
%fneg.a = fsub nsz double -0.000000e+00, %a
50195019
%fpround = fptrunc double %fneg.a to float
50205020
%fneg = fneg float %fpround
50215021
store float %fneg, ptr addrspace(1) %out.gep

llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4441,25 +4441,40 @@ define float @v_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1) {
44414441
ret float %i3
44424442
}
44434443

4444-
define float @v_fmul_0_fsub_0_infloop_regression(float %arg) {
4445-
; GCN-SAFE-LABEL: v_fmul_0_fsub_0_infloop_regression:
4444+
define float @v_fmul_0_fsub_0_safe_infloop_regression(float %arg) {
4445+
; GCN-SAFE-LABEL: v_fmul_0_fsub_0_safe_infloop_regression:
44464446
; GCN-SAFE: ; %bb.0: ; %bb
44474447
; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44484448
; GCN-SAFE-NEXT: v_mul_f32_e32 v0, 0, v0
44494449
; GCN-SAFE-NEXT: v_sub_f32_e32 v0, 0, v0
44504450
; GCN-SAFE-NEXT: s_setpc_b64 s[30:31]
44514451
;
4452-
; GCN-NSZ-LABEL: v_fmul_0_fsub_0_infloop_regression:
4453-
; GCN-NSZ: ; %bb.0: ; %bb
4454-
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4455-
; GCN-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
4456-
; GCN-NSZ-NEXT: s_setpc_b64 s[30:31]
4452+
; SI-NSZ-LABEL: v_fmul_0_fsub_0_safe_infloop_regression:
4453+
; SI-NSZ: ; %bb.0: ; %bb
4454+
; SI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4455+
; SI-NSZ-NEXT: s_brev_b32 s4, 1
4456+
; SI-NSZ-NEXT: v_fma_f32 v0, v0, s4, 0
4457+
; SI-NSZ-NEXT: s_setpc_b64 s[30:31]
4458+
; FIXME: utils/update_llc_test_checks.py will generate redundant VI
4459+
; labels, remove them, they will cause test failure.
44574460
bb:
44584461
%i = fmul float %arg, 0.0
44594462
%i1 = fsub float 0.0, %i
44604463
ret float %i1
44614464
}
44624465

4466+
define float @v_fmul_0_fsub_0_nsz_infloop_regression(float %arg) {
4467+
; GCN-LABEL: v_fmul_0_fsub_0_nsz_infloop_regression:
4468+
; GCN: ; %bb.0: ; %bb
4469+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4470+
; GCN-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
4471+
; GCN-NEXT: s_setpc_b64 s[30:31]
4472+
bb:
4473+
%i = fmul float %arg, 0.0
4474+
%i1 = fsub nsz float 0.0, %i
4475+
ret float %i1
4476+
}
4477+
44634478
declare i32 @llvm.amdgcn.workitem.id.x() #1
44644479
declare float @llvm.fma.f32(float, float, float) #1
44654480
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)

llvm/test/CodeGen/X86/vec_unsafe-fp-math.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -enable-unsafe-fp-math -enable-no-signed-zeros-fp-math -mtriple=x86_64-unknown-unknown | FileCheck %s
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
33

44
; Make sure that vectors get the same benefits as scalars when using unsafe-fp-math.
55

@@ -18,7 +18,7 @@ define <4 x float> @vec_fneg(<4 x float> %x) {
1818
; CHECK: # %bb.0:
1919
; CHECK-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2020
; CHECK-NEXT: retq
21-
%sub = fsub <4 x float> zeroinitializer, %x
21+
%sub = fsub nsz <4 x float> zeroinitializer, %x
2222
ret <4 x float> %sub
2323
}
2424

0 commit comments

Comments
 (0)