-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[X86][ISel] Improve VPTERNLOG matching for negated logic trees #164863
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -4721,9 +4721,6 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { | |||||
| if (!(Subtarget->hasVLX() || NVT.is512BitVector())) | ||||||
| return false; | ||||||
|
|
||||||
| SDValue N0 = N->getOperand(0); | ||||||
| SDValue N1 = N->getOperand(1); | ||||||
|
|
||||||
| auto getFoldableLogicOp = [](SDValue Op) { | ||||||
| // Peek through single use bitcast. | ||||||
| if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse()) | ||||||
|
|
@@ -4740,6 +4737,34 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { | |||||
| return SDValue(); | ||||||
| }; | ||||||
|
|
||||||
| // Identify and (optionally) peel an outer NOT that wraps a pure logic tree | ||||||
| auto tryPeelOuterNotWrappingLogic = [&](SDNode *Op) { | ||||||
| if (Op->getOpcode() == ISD::XOR && Op->hasOneUse() && | ||||||
| ISD::isBuildVectorAllOnes(Op->getOperand(1).getNode())) { | ||||||
| SDValue InnerOp = Op->getOperand(0); | ||||||
|
|
||||||
| if (!getFoldableLogicOp(InnerOp)) { | ||||||
| return SDValue(); | ||||||
| } | ||||||
yichi170 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
|
|
||||||
| SDValue InnerN0 = InnerOp.getOperand(0); | ||||||
| SDValue InnerN1 = InnerOp.getOperand(1); | ||||||
| if (getFoldableLogicOp(InnerN1) || getFoldableLogicOp(InnerN0)) | ||||||
yichi170 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| return InnerOp; | ||||||
| } | ||||||
| return SDValue(); | ||||||
| }; | ||||||
|
|
||||||
| bool PeeledOuterNot = false; | ||||||
| SDNode *OriN = N; | ||||||
| if (SDValue InnerOp = tryPeelOuterNotWrappingLogic(N)) { | ||||||
| PeeledOuterNot = true; | ||||||
| N = InnerOp.getNode(); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately, we still need
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we peel the not out, the immediate (Imm) should be computed using the inner node (InnerOp), since the negation will be applied afterward. Therefore, we should not update this line here. |
||||||
| } | ||||||
yichi170 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
|
|
||||||
| SDValue N0 = N->getOperand(0); | ||||||
| SDValue N1 = N->getOperand(1); | ||||||
|
|
||||||
| SDValue A, FoldableOp; | ||||||
| if ((FoldableOp = getFoldableLogicOp(N1))) { | ||||||
| A = N0; | ||||||
|
|
@@ -4798,7 +4823,10 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { | |||||
| case ISD::XOR: Imm ^= TernlogMagicA; break; | ||||||
| } | ||||||
|
|
||||||
| return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm); | ||||||
| if (PeeledOuterNot) | ||||||
| Imm = ~Imm; | ||||||
|
|
||||||
| return matchVPTERNLOG(OriN, ParentA, ParentB, ParentC, A, B, C, Imm); | ||||||
| } | ||||||
|
|
||||||
| /// If the high bits of an 'and' operand are known zero, try setting the | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 | ||
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK | ||
|
|
||
| define <8 x i64> @foo(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c) { | ||
| ; CHECK-LABEL: foo: | ||
| ; CHECK: # %bb.0: | ||
| ; CHECK-NEXT: vpternlogq {{.*#+}} zmm0 = ~(zmm0 | zmm2 | zmm1) | ||
| ; CHECK-NEXT: retq | ||
| %and.demorgan = or <8 x i64> %b, %a | ||
| %and3.demorgan = or <8 x i64> %and.demorgan, %c | ||
| %and3 = xor <8 x i64> %and3.demorgan, splat (i64 -1) | ||
| ret <8 x i64> %and3 | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@yichi170 Should this be:
getFoldableLogicOp might peek through a bitcast so the original InnerOp might not be a logic binop
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, you're right. Thanks for pointing it out! Should I submit another PR to fix it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes please - I've managed to get a test case to crash here, but you might have ideas for a better one: