-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[X86]: Rewrite demorgan rule for ANDN #163789
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: None (kper) ChangesThe pattern Fixes: #163516 Full diff: https://github.com/llvm/llvm-project/pull/163789.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a0b64ff370b10..e2632d114ce0b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51651,6 +51651,31 @@ static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
return AndN;
}
+// fold (not (or A, B)) -> nand(A, not(B)) if BMI
+static SDValue
+combineReassocDemorganWithNANDWithBMI(SDNode *Xor, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ using namespace llvm::SDPatternMatch;
+
+ EVT VT = Xor->getValueType(0);
+ // Make sure this node is a candidate for BMI instructions.
+ if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
+ return SDValue();
+
+ SDValue A;
+ SDValue B;
+ APInt Cst;
+ if (!(sd_match(Xor, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) &&
+ Cst.isAllOnes()))
+ return SDValue();
+
+ auto Opcode =
+ Subtarget.is64Bit() && VT == MVT::i64 ? X86::ANDN64rr : X86::ANDN32rr;
+ auto AndN = DAG.getMachineNode(Opcode, DL, VT, A, DAG.getNOT(DL, B, VT));
+ return SDValue(AndN, 0);
+}
+
static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -55150,6 +55175,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
return R;
+ if (SDValue R = combineReassocDemorganWithNANDWithBMI(N, DL, DAG, Subtarget))
+ return R;
+
return combineFneg(N, DAG, DCI, Subtarget);
}
diff --git a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
new file mode 100644
index 0000000000000..ea81d08cd2e6d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
+
+define i32 @reassoc_demorgan_i32(i32 %a, i32 %b) nounwind {
+; X86-LABEL: reassoc_demorgan_i32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: notl %ecx
+; X86-NEXT: andnl %ecx, %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: reassoc_demorgan_i32:
+; X64: # %bb.0:
+; X64-NEXT: notl %edi
+; X64-NEXT: andnl %edi, %esi, %eax
+; X64-NEXT: retq
+ %temp = or i32 %b, %a
+ %res = xor i32 %temp, -1
+ ret i32 %res
+}
+
+define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; X86-LABEL: reassoc_demorgan_three_arguments_i32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: notl %eax
+; X86-NEXT: andnl %eax, %ecx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: reassoc_demorgan_three_arguments_i32:
+; X64: # %bb.0:
+; X64-NEXT: orl %esi, %edi
+; X64-NEXT: notl %edx
+; X64-NEXT: andnl %edx, %edi, %eax
+; X64-NEXT: retq
+ %and.demorgan = or i32 %b, %a
+ %and3.demorgan = or i32 %and.demorgan, %c
+ %and3 = xor i32 %and3.demorgan, -1
+ ret i32 %and3
+}
+
+define i64 @reassoc_demorgan_i64(i64 %a, i64 %b) nounwind {
+; X86-LABEL: reassoc_demorgan_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: notl %edx
+; X86-NEXT: andnl %edx, %eax, %eax
+; X86-NEXT: notl %esi
+; X86-NEXT: andnl %esi, %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: reassoc_demorgan_i64:
+; X64: # %bb.0:
+; X64-NEXT: notq %rdi
+; X64-NEXT: andnq %rdi, %rsi, %rax
+; X64-NEXT: retq
+ %temp = or i64 %b, %a
+ %res = xor i64 %temp, -1
+ ret i64 %res
+}
+
+define i64 @reassoc_demorgan_three_arguments_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; X86-LABEL: reassoc_demorgan_three_arguments_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: notl %eax
+; X86-NEXT: andnl %eax, %edx, %eax
+; X86-NEXT: notl %ecx
+; X86-NEXT: andnl %ecx, %esi, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: reassoc_demorgan_three_arguments_i64:
+; X64: # %bb.0:
+; X64-NEXT: orq %rsi, %rdi
+; X64-NEXT: notq %rdx
+; X64-NEXT: andnq %rdx, %rdi, %rax
+; X64-NEXT: retq
+ %and.demorgan = or i64 %b, %a
+ %and3.demorgan = or i64 %and.demorgan, %c
+ %and3 = xor i64 %and3.demorgan, -1
+ ret i64 %and3
+}
|
fyi @RKSimon |
|
||
EVT VT = Xor->getValueType(0); | ||
// Make sure this node is a candidate for BMI instructions. | ||
if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can this be handled generically in DAGCombine
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But there is no ISD::ANDN
(at least that I know of)
So do you want to generally reassociate it? For all cases?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Search DAGCombiner.cpp for hasAndNot
At the moment, there is an infinity loop with
Once the demorgan is applied and we have an |
This should not be described as "reassociation". |
// If we have AndNot then it is profitable to apply demorgan to make use | ||
// of the machine instruction. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would argue that this fold is not profitable at all on its own. It is only profitable when combined into a larger pattern like ~(A|B|C)
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah that's also my impression :/
I have been updating the rest of the .ll after I "fixed" the infinite loop. And, it doesn't look so good.
I will try the larger pattern and let's see what the result is.
IIRC this is to help with register pressure as we can only fold the second operand on ANDNP which can cause headaches - X86's IsNOT attempts to peek though chains to see if we can remove a NOT entirely (or in this case move it after the OR) - if you're going to improve this patch to look for more complex patterns this fold shouldn't infinite loop any more. |
Let's the disregard the fact that I had to update some test files needlessly for now, e.g. Looking at |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Failed Tests (4):
LLVM :: CodeGen/LoongArch/ctlz-cttz-ctpop.ll
LLVM :: CodeGen/PowerPC/xxeval-and-nand.ll
LLVM :: CodeGen/SystemZ/scalar-ctlz-02.ll
LLVM :: CodeGen/SystemZ/vec-eval.ll
The pattern
~(a | b | c)
should use theANDN
instruction whenBMI
is available. We do that by rewritting it to~a & ~b & ~c
.Fixes: #163516
Proof: https://alive2.llvm.org/ce/z/vwoZo6