Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38487,11 +38487,17 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.Zero |= Known2.One;
break;
}
case X86ISD::FAND: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known &= Known2;
break;
}
case X86ISD::FOR: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

Known |= Known2;
break;
}
Expand Down Expand Up @@ -44147,6 +44153,55 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
Known.Zero |= Known2.One;
break;
}
case X86ISD::FAND: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why there's no X86ISD::AND/OR in this function?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

X86ISD::AND/OR are scalar (RESULT,EFLAGS) nodes - not relevant to this (I'm not sure we have any tests that need them yet either).

KnownBits Known2;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);

if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts,
Known, TLO, Depth + 1))
return true;

if (SimplifyDemandedBits(Op0, ~Known.Zero & OriginalDemandedBits,
OriginalDemandedElts, Known2, TLO, Depth + 1))
return true;

// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (OriginalDemandedBits.isSubsetOf(Known2.Zero | Known.One))
return TLO.CombineTo(Op, Op0);
if (OriginalDemandedBits.isSubsetOf(Known.Zero | Known2.One))
return TLO.CombineTo(Op, Op1);
// If all of the demanded bits in the inputs are known zeros, return zero.
if (OriginalDemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getConstantFP(0.0, SDLoc(Op), VT));

Known &= Known2;
break;
}
case X86ISD::FOR: {
KnownBits Known2;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);

if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts,
Known, TLO, Depth + 1))
return true;

if (SimplifyDemandedBits(Op0, ~Known.One & OriginalDemandedBits,
OriginalDemandedElts, Known2, TLO, Depth + 1))
return true;

// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if (OriginalDemandedBits.isSubsetOf(Known2.One | Known.Zero))
return TLO.CombineTo(Op, Op0);
if (OriginalDemandedBits.isSubsetOf(Known.One | Known2.Zero))
return TLO.CombineTo(Op, Op1);

Known |= Known2;
break;
}
case X86ISD::VSHLI: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
Expand Down
30 changes: 12 additions & 18 deletions llvm/test/CodeGen/X86/combine-fcopysign.ll
Original file line number Diff line number Diff line change
Expand Up @@ -252,28 +252,22 @@ define double @PR136368(double %x) {
; SSE-LABEL: PR136368:
; SSE: # %bb.0:
; SSE-NEXT: movapd {{.*#+}} xmm1 = [NaN,NaN]
; SSE-NEXT: movapd %xmm0, %xmm2
; SSE-NEXT: andpd %xmm1, %xmm2
; SSE-NEXT: movsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0]
; SSE-NEXT: movapd %xmm3, %xmm4
; SSE-NEXT: cmpltsd %xmm2, %xmm4
; SSE-NEXT: andpd %xmm3, %xmm4
; SSE-NEXT: andpd %xmm1, %xmm4
; SSE-NEXT: andnpd %xmm0, %xmm1
; SSE-NEXT: orpd %xmm4, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: andpd %xmm0, %xmm1
; SSE-NEXT: movsd {{.*#+}} xmm2 = [1.5707963267948966E+0,0.0E+0]
; SSE-NEXT: movapd %xmm2, %xmm3
; SSE-NEXT: cmpltsd %xmm1, %xmm3
; SSE-NEXT: andpd %xmm2, %xmm3
; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are loading extra 128-bit constant?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes - its not a bitselect pattern so canonicalizeBitSelect doesn't match, we just happen to have 0x80...0 and 0x7f...f in codegen.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't look good. We just reducing one andnpd here (omitting 2 movapd and one more register usage) but have to do one more 128-bit load.

; SSE-NEXT: orpd %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: PR136368:
; AVX: # %bb.0:
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm2
; AVX-NEXT: vmovsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0]
; AVX-NEXT: vcmpltsd %xmm2, %xmm3, %xmm2
; AVX-NEXT: vandpd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vandnpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vandpd %xmm1, %xmm2, %xmm1
; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX-NEXT: vmovsd {{.*#+}} xmm2 = [1.5707963267948966E+0,0.0E+0]
; AVX-NEXT: vcmpltsd %xmm1, %xmm2, %xmm1
; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1
; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vorpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%fabs = tail call double @llvm.fabs.f64(double %x)
Expand Down
Loading