Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17781,7 +17781,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
if (N1C->isNegative() || Flags.hasNoSignedZeros())
if (N1C->isNegative() || Flags.hasNoSignedZeros() ||
DAG.allUsesSignedZeroInsensitive(SDValue(N, 0)))
return N0;

if (SDValue NewSel = foldBinOpIntoSelect(N))
Expand Down Expand Up @@ -17993,7 +17994,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

// (fsub A, 0) -> A
if (N1CFP && N1CFP->isZero()) {
if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) {
if (!N1CFP->isNegative() || Flags.hasNoSignedZeros() ||
DAG.allUsesSignedZeroInsensitive(SDValue(N, 0))) {
return N0;
}
}
Expand All @@ -18006,7 +18008,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

// (fsub -0.0, N1) -> -N1
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() || Flags.hasNoSignedZeros()) {
if (N0CFP->isNegative() || Flags.hasNoSignedZeros() ||
DAG.allUsesSignedZeroInsensitive(SDValue(N, 0))) {
// We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
// flushed to zero, unless all users treat denorms as zero (DAZ).
// FIXME: This transform will change the sign of a NaN and the behavior
Expand Down Expand Up @@ -18654,7 +18657,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}

// Fold X/Sqrt(X) -> Sqrt(X)
if (Flags.hasNoSignedZeros() && Flags.hasAllowReassociation())
if ((Flags.hasNoSignedZeros() ||
DAG.allUsesSignedZeroInsensitive(SDValue(N, 0))) &&
Flags.hasAllowReassociation())
if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
return N1;

Expand Down Expand Up @@ -18706,7 +18711,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
DAG.isKnownToBeAPowerOfTwoFP(N1)) {
bool NeedsCopySign =
!Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
!Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0) &&
!DAG.allUsesSignedZeroInsensitive(SDValue(N, 0));
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
SDValue MLA;
Expand Down
72 changes: 72 additions & 0 deletions llvm/test/CodeGen/AArch64/nsz-bypass.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s

; Test that nsz constraint can be bypassed when all uses are sign-insensitive.

define i1 @test_fadd_neg_zero_fcmp(float %x) {
; CHECK-LABEL: test_fadd_neg_zero_fcmp:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%add = fadd float %x, -0.0
%cmp = fcmp oeq float %add, 1.0
ret i1 %cmp
}

define float @test_fsub_zero_fabs(float %x) {
; CHECK-LABEL: test_fsub_zero_fabs:
; CHECK: // %bb.0:
; CHECK-NEXT: fabs s0, s0
; CHECK-NEXT: ret
%sub = fsub float %x, 0.0
%abs = call float @llvm.fabs.f32(float %sub)
ret float %abs
}

define float @test_fsub_neg_zero_copysign(float %x, float %y) {
; CHECK-LABEL: test_fsub_neg_zero_copysign:
; CHECK: // %bb.0:
; CHECK-NEXT: mvni v2.4s, #128, lsl #24
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%sub = fsub float -0.0, %x
%copysign = call float @llvm.copysign.f32(float %sub, float %y)
ret float %copysign
}

define i1 @test_div_sqrt_fcmp(float %x) {
; CHECK-LABEL: test_div_sqrt_fcmp:
; CHECK: // %bb.0:
; CHECK-NEXT: fsqrt s0, s0
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
%sqrt = call float @llvm.sqrt.f32(float %x)
%div = fdiv reassoc float %x, %sqrt
%cmp = fcmp ogt float %div, 0.0
ret i1 %cmp
}

define float @test_frem_fabs(float %x) {
; CHECK-LABEL: test_frem_fabs:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s1, #0.50000000
; CHECK-NEXT: fmov s2, #-2.00000000
; CHECK-NEXT: fmul s1, s0, s1
; CHECK-NEXT: frintz s1, s1
; CHECK-NEXT: fmadd s0, s1, s2, s0
; CHECK-NEXT: fabs s0, s0
; CHECK-NEXT: ret
%rem = frem float %x, 2.0
%abs = call float @llvm.fabs.f32(float %rem)
ret float %abs
}

declare float @llvm.fabs.f32(float)
declare float @llvm.copysign.f32(float, float)
declare float @llvm.sqrt.f32(float)
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AMDGPU/swdev380865.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,13 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
; CHECK-NEXT: v_mov_b32_e32 v1, s7
; CHECK-NEXT: .LBB0_1: ; %for.cond4.preheader
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], 0
; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: s_mov_b32 s7, 0x40140000
; CHECK-NEXT: s_add_i32 s1, s1, s0
; CHECK-NEXT: s_cmpk_lt_i32 s1, 0xa00
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[6:7]
; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: s_mov_b32 s7, 0x40180000
; CHECK-NEXT: s_add_i32 s1, s1, s0
; CHECK-NEXT: s_cmpk_lt_i32 s1, 0xa00
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[6:7]
; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: s_mov_b32 s7, 0x401c0000
Expand Down
Loading