-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[X86] SimplifyDemandedBitsForTargetNode - add handling for X86ISD::FAND/FOR #136618
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…ND/FOR Also add computeKnownBitsForTargetNode handling for X86ISD::FAND Fixes llvm#136368
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesAlso add computeKnownBitsForTargetNode handling for X86ISD::FAND Fixes #136368 Full diff: https://github.com/llvm/llvm-project/pull/136618.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 993118c52564e..5e46708c7e877 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38487,11 +38487,17 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.Zero |= Known2.One;
break;
}
+ case X86ISD::FAND: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known &= Known2;
+ break;
+ }
case X86ISD::FOR: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
Known |= Known2;
break;
}
@@ -44147,6 +44153,55 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
Known.Zero |= Known2.One;
break;
}
+ case X86ISD::FAND: {
+ KnownBits Known2;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+
+ if (SimplifyDemandedBits(Op0, ~Known.Zero & OriginalDemandedBits,
+ OriginalDemandedElts, Known2, TLO, Depth + 1))
+ return true;
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if (OriginalDemandedBits.isSubsetOf(Known2.Zero | Known.One))
+ return TLO.CombineTo(Op, Op0);
+ if (OriginalDemandedBits.isSubsetOf(Known.Zero | Known2.One))
+ return TLO.CombineTo(Op, Op1);
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if (OriginalDemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
+ return TLO.CombineTo(Op, TLO.DAG.getConstantFP(0.0, SDLoc(Op), VT));
+
+ Known &= Known2;
+ break;
+ }
+ case X86ISD::FOR: {
+ KnownBits Known2;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+
+ if (SimplifyDemandedBits(Op0, ~Known.One & OriginalDemandedBits,
+ OriginalDemandedElts, Known2, TLO, Depth + 1))
+ return true;
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if (OriginalDemandedBits.isSubsetOf(Known2.One | Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (OriginalDemandedBits.isSubsetOf(Known.One | Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
+
+ Known |= Known2;
+ break;
+ }
case X86ISD::VSHLI: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
diff --git a/llvm/test/CodeGen/X86/combine-fcopysign.ll b/llvm/test/CodeGen/X86/combine-fcopysign.ll
index d7031be3addd9..0443d3aee3801 100644
--- a/llvm/test/CodeGen/X86/combine-fcopysign.ll
+++ b/llvm/test/CodeGen/X86/combine-fcopysign.ll
@@ -252,28 +252,22 @@ define double @PR136368(double %x) {
; SSE-LABEL: PR136368:
; SSE: # %bb.0:
; SSE-NEXT: movapd {{.*#+}} xmm1 = [NaN,NaN]
-; SSE-NEXT: movapd %xmm0, %xmm2
-; SSE-NEXT: andpd %xmm1, %xmm2
-; SSE-NEXT: movsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0]
-; SSE-NEXT: movapd %xmm3, %xmm4
-; SSE-NEXT: cmpltsd %xmm2, %xmm4
-; SSE-NEXT: andpd %xmm3, %xmm4
-; SSE-NEXT: andpd %xmm1, %xmm4
-; SSE-NEXT: andnpd %xmm0, %xmm1
-; SSE-NEXT: orpd %xmm4, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: andpd %xmm0, %xmm1
+; SSE-NEXT: movsd {{.*#+}} xmm2 = [1.5707963267948966E+0,0.0E+0]
+; SSE-NEXT: movapd %xmm2, %xmm3
+; SSE-NEXT: cmpltsd %xmm1, %xmm3
+; SSE-NEXT: andpd %xmm2, %xmm3
+; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: orpd %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: PR136368:
; AVX: # %bb.0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN]
-; AVX-NEXT: # xmm1 = mem[0,0]
-; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm2
-; AVX-NEXT: vmovsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0]
-; AVX-NEXT: vcmpltsd %xmm2, %xmm3, %xmm2
-; AVX-NEXT: vandpd %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vandnpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm1, %xmm2, %xmm1
+; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX-NEXT: vmovsd {{.*#+}} xmm2 = [1.5707963267948966E+0,0.0E+0]
+; AVX-NEXT: vcmpltsd %xmm1, %xmm2, %xmm1
+; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vorpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%fabs = tail call double @llvm.fabs.f64(double %x)
|
| Known.Zero |= Known2.One; | ||
| break; | ||
| } | ||
| case X86ISD::FAND: { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why there's no X86ISD::AND/OR in this function?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
X86ISD::AND/OR are scalar (RESULT,EFLAGS) nodes - not relevant to this (I'm not sure we have any tests that need them yet either).
| ; SSE-NEXT: movapd %xmm2, %xmm3 | ||
| ; SSE-NEXT: cmpltsd %xmm1, %xmm3 | ||
| ; SSE-NEXT: andpd %xmm2, %xmm3 | ||
| ; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We are loading extra 128-bit constant?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes - its not a bitselect pattern so canonicalizeBitSelect doesn't match, we just happen to have 0x80...0 and 0x7f...f in codegen.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't look good. We just reducing one andnpd here (omitting 2 movapd and one more register usage) but have to do one more 128-bit load.
Also add computeKnownBitsForTargetNode handling for X86ISD::FAND
Fixes #136368