Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 703b827

Browse files
committed
[X86][SSE] Canonicalize OR(AND(X,C),AND(Y,~C)) -> OR(AND(X,C),ANDNP(C,Y))
For constant bit select patterns, replace one AND with a ANDNP, allowing us to reuse the constant mask. Only do this if the mask has multiple uses (to avoid losing load folding) or if we have XOP as its VPCMOV can handle most folding commutations. This also requires computeKnownBitsForTargetNode support for X86ISD::ANDNP and X86ISD::FOR to prevent regressions in fabs/fcopysign patterns. Differential Revision: https://reviews.llvm.org/D55935 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351819 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent cf74016 commit 703b827

16 files changed

+1012
-969
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30233,6 +30233,27 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3023330233
Known = Known.trunc(BitWidth);
3023430234
break;
3023530235
}
30236+
case X86ISD::ANDNP: {
30237+
KnownBits Known2;
30238+
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
30239+
Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
30240+
30241+
// ANDNP = (~X & Y);
30242+
Known.One &= Known2.Zero;
30243+
Known.Zero |= Known2.One;
30244+
break;
30245+
}
30246+
case X86ISD::FOR: {
30247+
KnownBits Known2;
30248+
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
30249+
Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
30250+
30251+
// Output known-0 bits are only known if clear in both the LHS & RHS.
30252+
Known.Zero &= Known2.Zero;
30253+
// Output known-1 are known to be set if set in either the LHS | RHS.
30254+
Known.One |= Known2.One;
30255+
break;
30256+
}
3023630257
case X86ISD::CMOV: {
3023730258
Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
3023830259
// If we don't know any bits, early out.
@@ -36519,6 +36540,52 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
3651936540
return SDValue();
3652036541
}
3652136542

36543+
// Canonicalize OR(AND(X,C),AND(Y,~C)) -> OR(AND(X,C),ANDNP(C,Y))
36544+
static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
36545+
const X86Subtarget &Subtarget) {
36546+
assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");
36547+
36548+
EVT VT = N->getValueType(0);
36549+
if (!VT.isVector())
36550+
return SDValue();
36551+
36552+
SDValue N0 = peekThroughBitcasts(N->getOperand(0));
36553+
SDValue N1 = peekThroughBitcasts(N->getOperand(1));
36554+
if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
36555+
return SDValue();
36556+
36557+
// On XOP we'll lower to PCMOV so accept one use, otherwise only
36558+
// do this if either mask has multiple uses already.
36559+
if (!(Subtarget.hasXOP() || !N0.getOperand(1).hasOneUse() ||
36560+
!N1.getOperand(1).hasOneUse()))
36561+
return SDValue();
36562+
36563+
// Attempt to extract constant byte masks.
36564+
APInt UndefElts0, UndefElts1;
36565+
SmallVector<APInt, 32> EltBits0, EltBits1;
36566+
if (!getTargetConstantBitsFromNode(N0.getOperand(1), 8, UndefElts0, EltBits0,
36567+
false, false))
36568+
return SDValue();
36569+
if (!getTargetConstantBitsFromNode(N1.getOperand(1), 8, UndefElts1, EltBits1,
36570+
false, false))
36571+
return SDValue();
36572+
36573+
for (unsigned i = 0, e = EltBits0.size(); i != e; ++i) {
36574+
// TODO - add UNDEF elts support.
36575+
if (UndefElts0[i] || UndefElts1[i])
36576+
return SDValue();
36577+
if (EltBits0[i] != ~EltBits1[i])
36578+
return SDValue();
36579+
}
36580+
36581+
SDLoc DL(N);
36582+
SDValue X = N->getOperand(0);
36583+
SDValue Y =
36584+
DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)),
36585+
DAG.getBitcast(VT, N1.getOperand(0)));
36586+
return DAG.getNode(ISD::OR, DL, VT, X, Y);
36587+
}
36588+
3652236589
// Try to match OR(AND(~MASK,X),AND(MASK,Y)) logic pattern.
3652336590
static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) {
3652436591
if (N->getOpcode() != ISD::OR)
@@ -36781,6 +36848,9 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
3678136848
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
3678236849
return FPLogic;
3678336850

36851+
if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget))
36852+
return R;
36853+
3678436854
if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
3678536855
return R;
3678636856

test/CodeGen/X86/bitreverse.ll

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,17 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
6161
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6262
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
6363
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
64-
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
65-
; X64-NEXT: packuswb %xmm2, %xmm0
66-
; X64-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
67-
; X64-NEXT: movdqa %xmm0, %xmm2
68-
; X64-NEXT: pand %xmm1, %xmm2
64+
; X64-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
65+
; X64-NEXT: packuswb %xmm2, %xmm1
66+
; X64-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
67+
; X64-NEXT: movdqa %xmm1, %xmm2
68+
; X64-NEXT: pand %xmm0, %xmm2
6969
; X64-NEXT: psllw $4, %xmm2
70-
; X64-NEXT: movdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
71-
; X64-NEXT: pand %xmm3, %xmm2
72-
; X64-NEXT: pand %xmm3, %xmm0
73-
; X64-NEXT: psrlw $4, %xmm0
74-
; X64-NEXT: pand %xmm1, %xmm0
75-
; X64-NEXT: por %xmm2, %xmm0
70+
; X64-NEXT: pand {{.*}}(%rip), %xmm1
71+
; X64-NEXT: psrlw $4, %xmm1
72+
; X64-NEXT: pand %xmm0, %xmm1
73+
; X64-NEXT: pandn %xmm2, %xmm0
74+
; X64-NEXT: por %xmm1, %xmm0
7675
; X64-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7776
; X64-NEXT: pand %xmm0, %xmm1
7877
; X64-NEXT: psllw $2, %xmm1

0 commit comments

Comments
 (0)