Skip to content

Commit cc4f462

Browse files
authored
[X86][ISel] Improve VPTERNLOG matching for negated logic trees (#164863)
This patch extends VPTERNLOG pattern matching to handle cases where an outer NOT wraps a pure logical tree, such as `~(A | B | C)`. By recognizing these negated logic trees, the instruction selector can now emit a single vpternlog instruction. The change preserves the match for patterns like `(x != C1) & (x != C2)`, which also have the xor-with-all-ones pattern outside. The patch conservatively peels the outer XOR-with-all-ones only when it directly wraps a foldable logical operator (AND, OR, XOR, or ANDNP). Resolves #163738
1 parent 5fda2a5 commit cc4f462

File tree

2 files changed

+58
-11
lines changed

2 files changed

+58
-11
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4721,9 +4721,6 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
47214721
if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
47224722
return false;
47234723

4724-
SDValue N0 = N->getOperand(0);
4725-
SDValue N1 = N->getOperand(1);
4726-
47274724
auto getFoldableLogicOp = [](SDValue Op) {
47284725
// Peek through single use bitcast.
47294726
if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
@@ -4740,13 +4737,47 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
47404737
return SDValue();
47414738
};
47424739

4743-
SDValue A, FoldableOp;
4744-
if ((FoldableOp = getFoldableLogicOp(N1))) {
4745-
A = N0;
4746-
} else if ((FoldableOp = getFoldableLogicOp(N0))) {
4747-
A = N1;
4748-
} else
4749-
return false;
4740+
SDValue N0, N1, A, FoldableOp;
4741+
4742+
// Identify and (optionally) peel an outer NOT that wraps a pure logic tree
4743+
auto tryPeelOuterNotWrappingLogic = [&](SDNode *Op) {
4744+
if (Op->getOpcode() == ISD::XOR && Op->hasOneUse() &&
4745+
ISD::isBuildVectorAllOnes(Op->getOperand(1).getNode())) {
4746+
SDValue InnerOp = Op->getOperand(0);
4747+
4748+
if (!getFoldableLogicOp(InnerOp))
4749+
return SDValue();
4750+
4751+
N0 = InnerOp.getOperand(0);
4752+
N1 = InnerOp.getOperand(1);
4753+
if ((FoldableOp = getFoldableLogicOp(N1))) {
4754+
A = N0;
4755+
return InnerOp;
4756+
}
4757+
if ((FoldableOp = getFoldableLogicOp(N0))) {
4758+
A = N1;
4759+
return InnerOp;
4760+
}
4761+
}
4762+
return SDValue();
4763+
};
4764+
4765+
bool PeeledOuterNot = false;
4766+
SDNode *OriN = N;
4767+
if (SDValue InnerOp = tryPeelOuterNotWrappingLogic(N)) {
4768+
PeeledOuterNot = true;
4769+
N = InnerOp.getNode();
4770+
} else {
4771+
N0 = N->getOperand(0);
4772+
N1 = N->getOperand(1);
4773+
4774+
if ((FoldableOp = getFoldableLogicOp(N1)))
4775+
A = N0;
4776+
else if ((FoldableOp = getFoldableLogicOp(N0)))
4777+
A = N1;
4778+
else
4779+
return false;
4780+
}
47504781

47514782
SDValue B = FoldableOp.getOperand(0);
47524783
SDValue C = FoldableOp.getOperand(1);
@@ -4798,7 +4829,10 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
47984829
case ISD::XOR: Imm ^= TernlogMagicA; break;
47994830
}
48004831

4801-
return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm);
4832+
if (PeeledOuterNot)
4833+
Imm = ~Imm;
4834+
4835+
return matchVPTERNLOG(OriN, ParentA, ParentB, ParentC, A, B, C, Imm);
48024836
}
48034837

48044838
/// If the high bits of an 'and' operand are known zero, try setting the
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK
3+
4+
define <8 x i64> @foo(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c) {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vpternlogq {{.*#+}} zmm0 = ~(zmm0 | zmm2 | zmm1)
8+
; CHECK-NEXT: retq
9+
%and.demorgan = or <8 x i64> %b, %a
10+
%and3.demorgan = or <8 x i64> %and.demorgan, %c
11+
%and3 = xor <8 x i64> %and3.demorgan, splat (i64 -1)
12+
ret <8 x i64> %and3
13+
}

0 commit comments

Comments
 (0)