diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 993118c52564e..5e46708c7e877 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38487,11 +38487,17 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.Zero |= Known2.One; break; } + case X86ISD::FAND: { + KnownBits Known2; + Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known &= Known2; + break; + } case X86ISD::FOR: { KnownBits Known2; Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known |= Known2; break; } @@ -44147,6 +44153,55 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( Known.Zero |= Known2.One; break; } + case X86ISD::FAND: { + KnownBits Known2; + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts, + Known, TLO, Depth + 1)) + return true; + + if (SimplifyDemandedBits(Op0, ~Known.Zero & OriginalDemandedBits, + OriginalDemandedElts, Known2, TLO, Depth + 1)) + return true; + + // If all of the demanded bits are known one on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if (OriginalDemandedBits.isSubsetOf(Known2.Zero | Known.One)) + return TLO.CombineTo(Op, Op0); + if (OriginalDemandedBits.isSubsetOf(Known.Zero | Known2.One)) + return TLO.CombineTo(Op, Op1); + // If all of the demanded bits in the inputs are known zeros, return zero. + if (OriginalDemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) + return TLO.CombineTo(Op, TLO.DAG.getConstantFP(0.0, SDLoc(Op), VT)); + + Known &= Known2; + break; + } + case X86ISD::FOR: { + KnownBits Known2; + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts, + Known, TLO, Depth + 1)) + return true; + + if (SimplifyDemandedBits(Op0, ~Known.One & OriginalDemandedBits, + OriginalDemandedElts, Known2, TLO, Depth + 1)) + return true; + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if (OriginalDemandedBits.isSubsetOf(Known2.One | Known.Zero)) + return TLO.CombineTo(Op, Op0); + if (OriginalDemandedBits.isSubsetOf(Known.One | Known2.Zero)) + return TLO.CombineTo(Op, Op1); + + Known |= Known2; + break; + } case X86ISD::VSHLI: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); diff --git a/llvm/test/CodeGen/X86/combine-fcopysign.ll b/llvm/test/CodeGen/X86/combine-fcopysign.ll index d7031be3addd9..0443d3aee3801 100644 --- a/llvm/test/CodeGen/X86/combine-fcopysign.ll +++ b/llvm/test/CodeGen/X86/combine-fcopysign.ll @@ -252,28 +252,22 @@ define double @PR136368(double %x) { ; SSE-LABEL: PR136368: ; SSE: # %bb.0: ; SSE-NEXT: movapd {{.*#+}} xmm1 = [NaN,NaN] -; SSE-NEXT: movapd %xmm0, %xmm2 -; SSE-NEXT: andpd %xmm1, %xmm2 -; SSE-NEXT: movsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0] -; SSE-NEXT: movapd %xmm3, %xmm4 -; SSE-NEXT: cmpltsd %xmm2, %xmm4 -; SSE-NEXT: andpd %xmm3, %xmm4 -; SSE-NEXT: andpd %xmm1, %xmm4 -; SSE-NEXT: andnpd %xmm0, %xmm1 -; SSE-NEXT: orpd %xmm4, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: andpd %xmm0, %xmm1 +; SSE-NEXT: movsd {{.*#+}} xmm2 = [1.5707963267948966E+0,0.0E+0] +; SSE-NEXT: movapd %xmm2, %xmm3 +; SSE-NEXT: cmpltsd %xmm1, %xmm3 +; SSE-NEXT: andpd %xmm2, %xmm3 +; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: orpd %xmm3, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: PR136368: ; AVX: # %bb.0: -; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN] -; AVX-NEXT: # xmm1 = mem[0,0] -; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vmovsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0] -; AVX-NEXT: vcmpltsd %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vandpd %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vandnpd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vandpd %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = [1.5707963267948966E+0,0.0E+0] +; AVX-NEXT: vcmpltsd %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vorpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %fabs = tail call double @llvm.fabs.f64(double %x)