-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[DAGCombiner] Relax nsz constraint with fp->int->fp optimizations #164503
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/guy-david/dag-combine-fp-to-int-to-fp
Are you sure you want to change the base?
[DAGCombiner] Relax nsz constraint with fp->int->fp optimizations #164503
Conversation
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-selectiondag Author: Guy David (guy-david) Changes
Based on top of #164502 but doesn't require it. Full diff: https://github.com/llvm/llvm-project/pull/164503.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 280753f3d797d..9eabec36f25dd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18871,6 +18871,37 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
return SDValue();
}
+/// Check if a use of a floating-point operation doesn't care about the sign of
+/// zero. This allows us to optimize (sitofp (fptosi x)) -> ftrunc(x) even
+/// without NoSignedZerosFPMath, as long as all uses are sign-insensitive.
+static bool isSignInsensitiveUse(SDNode *Use, unsigned OperandNo,
+ SelectionDAG &DAG) {
+ switch (Use->getOpcode()) {
+ case ISD::SETCC:
+ // Comparisons: IEEE 754 specifies +0.0 == -0.0.
+ case ISD::FABS:
+ // fabs always produces +0.0.
+ return true;
+ case ISD::FADD:
+ case ISD::FSUB: {
+ // Arithmetic with non-zero constants fixes the uncertainty around the sign
+ // bit.
+ SDValue Other = Use->getOperand(1 - OperandNo);
+ return DAG.isKnownNeverZeroFloat(Other);
+ }
+ default:
+ return false;
+ }
+}
+
+/// Check if all uses of a value are insensitive to the sign of zero.
+static bool allUsesSignInsensitive(SDValue V, SelectionDAG &DAG) {
+ return all_of(V->uses(), [&](SDUse &Use) {
+ SDNode *User = Use.getUser();
+ unsigned OperandNo = Use.getOperandNo();
+ return isSignInsensitiveUse(User, OperandNo, DAG);
+ });
+}
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
const TargetLowering &TLI) {
@@ -18892,13 +18923,13 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
assert(IsSigned || IsUnsigned);
- bool IsSignedZeroSafe =
- DAG.getTarget().Options.NoSignedZerosFPMath;
+ bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath ||
+ allUsesSignInsensitive(SDValue(N, 0), DAG);
// For signed conversions: The optimization changes signed zero behavior.
if (IsSigned && !IsSignedZeroSafe)
return SDValue();
// For unsigned conversions, we need FABS to canonicalize -0.0 to +0.0
- // (unless NoSignedZerosFPMath is set).
+ // (unless outputting a signed zero is OK).
if (IsUnsigned && !IsSignedZeroSafe && !TLI.isFAbsFree(VT))
return SDValue();
@@ -19376,10 +19407,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
// know it was called from a context with a nsz flag if the input fsub does
// not.
- if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
- N0.hasOneUse()) {
- return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
- N0.getOperand(0));
+ if (N0.getOpcode() == ISD::FSUB && N0.hasOneUse()) {
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N0.getOperand(1);
+
+ // Safe if NoSignedZeros, or if we can prove X != Y (avoiding the -0.0 vs
+ // +0.0 issue) For now, we use a conservative check: if either operand is
+ // known never zero, then X - Y can't produce a signed zero from X == Y.
+ if (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(X) ||
+ DAG.isKnownNeverZeroFloat(Y)) {
+ return DAG.getNode(ISD::FSUB, SDLoc(N), VT, Y, X);
+ }
}
if (SimplifyDemandedBits(SDValue(N, 0)))
diff --git a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
index 9a8c555953611..6f61e22203620 100644
--- a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
@@ -134,7 +134,66 @@ entry:
ret float %f
}
+define i1 @test_fcmp(float %x) {
+; CHECK-LABEL: test_fcmp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz s0, s0
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_fcmp:
+; NO-SIGNED-ZEROS: // %bb.0:
+; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT: fcmp s0, #0.0
+; NO-SIGNED-ZEROS-NEXT: cset w0, eq
+; NO-SIGNED-ZEROS-NEXT: ret
+ %conv1 = fptosi float %x to i32
+ %conv2 = sitofp i32 %conv1 to float
+ %cmp = fcmp oeq float %conv2, 0.0
+ ret i1 %cmp
+}
+
+define float @test_fadd(float %x) {
+; CHECK-LABEL: test_fadd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz s0, s0
+; CHECK-NEXT: fmov s1, #1.00000000
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_fadd:
+; NO-SIGNED-ZEROS: // %bb.0:
+; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT: fmov s1, #1.00000000
+; NO-SIGNED-ZEROS-NEXT: fadd s0, s0, s1
+; NO-SIGNED-ZEROS-NEXT: ret
+ %conv1 = fptosi float %x to i32
+ %conv2 = sitofp i32 %conv1 to float
+ %add = fadd float %conv2, 1.0
+ ret float %add
+}
+
+define float @test_fabs(float %x) {
+; CHECK-LABEL: test_fabs:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz s0, s0
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_fabs:
+; NO-SIGNED-ZEROS: // %bb.0:
+; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT: fabs s0, s0
+; NO-SIGNED-ZEROS-NEXT: ret
+ %conv1 = fptosi float %x to i32
+ %conv2 = sitofp i32 %conv1 to float
+ %abs = call float @llvm.fabs.f32(float %conv2)
+ ret float %abs
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)
+declare float @llvm.fabs.f32(float)
|
|
@llvm/pr-subscribers-backend-aarch64 Author: Guy David (guy-david) Changes
Based on top of #164502 but doesn't require it. Full diff: https://github.com/llvm/llvm-project/pull/164503.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 280753f3d797d..9eabec36f25dd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18871,6 +18871,37 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
return SDValue();
}
+/// Check if a use of a floating-point operation doesn't care about the sign of
+/// zero. This allows us to optimize (sitofp (fptosi x)) -> ftrunc(x) even
+/// without NoSignedZerosFPMath, as long as all uses are sign-insensitive.
+static bool isSignInsensitiveUse(SDNode *Use, unsigned OperandNo,
+ SelectionDAG &DAG) {
+ switch (Use->getOpcode()) {
+ case ISD::SETCC:
+ // Comparisons: IEEE 754 specifies +0.0 == -0.0.
+ case ISD::FABS:
+ // fabs always produces +0.0.
+ return true;
+ case ISD::FADD:
+ case ISD::FSUB: {
+ // Arithmetic with non-zero constants fixes the uncertainty around the sign
+ // bit.
+ SDValue Other = Use->getOperand(1 - OperandNo);
+ return DAG.isKnownNeverZeroFloat(Other);
+ }
+ default:
+ return false;
+ }
+}
+
+/// Check if all uses of a value are insensitive to the sign of zero.
+static bool allUsesSignInsensitive(SDValue V, SelectionDAG &DAG) {
+ return all_of(V->uses(), [&](SDUse &Use) {
+ SDNode *User = Use.getUser();
+ unsigned OperandNo = Use.getOperandNo();
+ return isSignInsensitiveUse(User, OperandNo, DAG);
+ });
+}
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
const TargetLowering &TLI) {
@@ -18892,13 +18923,13 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
assert(IsSigned || IsUnsigned);
- bool IsSignedZeroSafe =
- DAG.getTarget().Options.NoSignedZerosFPMath;
+ bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath ||
+ allUsesSignInsensitive(SDValue(N, 0), DAG);
// For signed conversions: The optimization changes signed zero behavior.
if (IsSigned && !IsSignedZeroSafe)
return SDValue();
// For unsigned conversions, we need FABS to canonicalize -0.0 to +0.0
- // (unless NoSignedZerosFPMath is set).
+ // (unless outputting a signed zero is OK).
if (IsUnsigned && !IsSignedZeroSafe && !TLI.isFAbsFree(VT))
return SDValue();
@@ -19376,10 +19407,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
// know it was called from a context with a nsz flag if the input fsub does
// not.
- if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
- N0.hasOneUse()) {
- return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
- N0.getOperand(0));
+ if (N0.getOpcode() == ISD::FSUB && N0.hasOneUse()) {
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N0.getOperand(1);
+
+ // Safe if NoSignedZeros, or if we can prove X != Y (avoiding the -0.0 vs
+ // +0.0 issue) For now, we use a conservative check: if either operand is
+ // known never zero, then X - Y can't produce a signed zero from X == Y.
+ if (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(X) ||
+ DAG.isKnownNeverZeroFloat(Y)) {
+ return DAG.getNode(ISD::FSUB, SDLoc(N), VT, Y, X);
+ }
}
if (SimplifyDemandedBits(SDValue(N, 0)))
diff --git a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
index 9a8c555953611..6f61e22203620 100644
--- a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
@@ -134,7 +134,66 @@ entry:
ret float %f
}
+define i1 @test_fcmp(float %x) {
+; CHECK-LABEL: test_fcmp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz s0, s0
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_fcmp:
+; NO-SIGNED-ZEROS: // %bb.0:
+; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT: fcmp s0, #0.0
+; NO-SIGNED-ZEROS-NEXT: cset w0, eq
+; NO-SIGNED-ZEROS-NEXT: ret
+ %conv1 = fptosi float %x to i32
+ %conv2 = sitofp i32 %conv1 to float
+ %cmp = fcmp oeq float %conv2, 0.0
+ ret i1 %cmp
+}
+
+define float @test_fadd(float %x) {
+; CHECK-LABEL: test_fadd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz s0, s0
+; CHECK-NEXT: fmov s1, #1.00000000
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_fadd:
+; NO-SIGNED-ZEROS: // %bb.0:
+; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT: fmov s1, #1.00000000
+; NO-SIGNED-ZEROS-NEXT: fadd s0, s0, s1
+; NO-SIGNED-ZEROS-NEXT: ret
+ %conv1 = fptosi float %x to i32
+ %conv2 = sitofp i32 %conv1 to float
+ %add = fadd float %conv2, 1.0
+ ret float %add
+}
+
+define float @test_fabs(float %x) {
+; CHECK-LABEL: test_fabs:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz s0, s0
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_fabs:
+; NO-SIGNED-ZEROS: // %bb.0:
+; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT: fabs s0, s0
+; NO-SIGNED-ZEROS-NEXT: ret
+ %conv1 = fptosi float %x to i32
+ %conv2 = sitofp i32 %conv1 to float
+ %abs = call float @llvm.fabs.f32(float %conv2)
+ ret float %abs
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)
+declare float @llvm.fabs.f32(float)
|
9742533 to
5d79dd0
Compare
8f9ed38 to
5f6506a
Compare
5d79dd0 to
da43b7e
Compare
0df2dcc to
7f65dea
Compare
NoSignedZerosFPMathisn't a hard requirements and in some contexts we can still apply the truncation without worrying. For example, in cases where the users of this sequence are overwriting the sign-bit (fabs) or simply ignoring it (fcmp).I think the same logic can be applied elsewhere for other DAG optimizations.
Based on top of #164502 but doesn't require it.