diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5ffdc4efd21ea..b414270641443 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1262,6 +1262,27 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, if (N1 == N00 || N1 == N01) return N0; } + + // Optimize X & b & ~c -> X & (b & ~c) when ANDN is available + // This allows the ANDN operation to be done in parallel with computing X + if (Opc == ISD::AND && (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) && + sd_match(N1, m_Not(m_Value()))) { + // Look for pattern: AND(AND(X, b), NOT(c)) + // Transform to: AND(X, AND(b, NOT(c))) + + SDValue X, B; + + // Match AND(X, b) - check that N0 is an AND with one use + if (N0.getOpcode() == ISD::AND && N0->hasOneUse()) { + X = N00; + B = N01; + + // Transform: AND(AND(X, b), NOT(c)) + // To: AND(X, AND(b, NOT(c))) + SDValue AndBC = DAG.getNode(ISD::AND, DL, VT, B, N1); + return DAG.getNode(ISD::AND, DL, VT, X, AndBC); + } + } if (Opc == ISD::XOR) { // (N00 ^ N01) ^ N00 --> N01 if (N1 == N00) @@ -7518,6 +7539,52 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; + // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c) + // This allows the andn operation to be done in parallel with the xor + if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) { + SDValue InnerAndOp0, InnerAndOp1, NotArg; + + // Match: AND(AND(Op0, Op1), NOT(NotArg)) + // where NOT is represented as XOR with all-ones + // m_And automatically handles commutativity + if (sd_match(N, m_And(m_OneUse(m_And(m_Value(InnerAndOp0), + m_Value(InnerAndOp1))), + m_Xor(m_Value(NotArg), m_AllOnes())))) { + + // Determine which operand is XOR(Constant, X) where Constant is not + // all-ones + SDValue XorOp, OtherOp; + APInt XorConst; + + // Try first operand - m_Xor handles commutativity for XOR operands + if (sd_match(InnerAndOp0, m_Xor(m_ConstInt(XorConst), m_Value())) && + !XorConst.isAllOnes()) { + XorOp = InnerAndOp0; + OtherOp = InnerAndOp1; + } else if (sd_match(InnerAndOp1, + m_Xor(m_ConstInt(XorConst), m_Value())) && + !XorConst.isAllOnes()) { + XorOp = InnerAndOp1; + OtherOp = InnerAndOp0; + } else { + // Pattern doesn't match - no XOR(Constant, X) found + XorOp = SDValue(); + } + + // If we found the pattern, apply the transformation + // Prevent infinite loops by checking OtherOp is not also a NOT + if (XorOp && !sd_match(OtherOp, m_Xor(m_Value(), m_AllOnes()))) { + // Get the NOT node (either N0 or N1) + SDValue NotOp = sd_match(N0, m_Xor(m_Value(), m_AllOnes())) ? N0 : N1; + + // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) + // To: AND(XOR(Constant, a), AND(b, NOT(c))) + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp); + return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); + } + } + } + // reassociate and if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags())) return RAND; diff --git a/llvm/test/CodeGen/X86/andn-reassociate.ll b/llvm/test/CodeGen/X86/andn-reassociate.ll new file mode 100644 index 0000000000000..720702ddc1045 --- /dev/null +++ b/llvm/test/CodeGen/X86/andn-reassociate.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=-bmi < %s | FileCheck %s --check-prefixes=CHECK,NOBMI +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefixes=CHECK,BMI + +; Test the optimization: X & b & ~c -> X & (b & ~c) +; This reassociation allows ANDN to execute in parallel with computing X + +define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) { +; NOBMI-LABEL: test_constant_xor_and_andnot: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movq %rdx, %rax +; NOBMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; NOBMI-NEXT: andq %rsi, %rdi +; NOBMI-NEXT: notq %rax +; NOBMI-NEXT: andq %rdi, %rax +; NOBMI-NEXT: retq +; +; BMI-LABEL: test_constant_xor_and_andnot: +; BMI: # %bb.0: +; BMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; BMI-NEXT: andnq %rsi, %rdx, %rax +; BMI-NEXT: andq %rdi, %rax +; BMI-NEXT: retq + %xor = xor i64 %a, 1234 + %and1 = and i64 %xor, %b + %not_c = xor i64 %c, -1 + %result = and i64 %and1, %not_c + ret i64 %result +} + +define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) { +; NOBMI-LABEL: test_constant_xor_and_andnot_32: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %edx, %eax +; NOBMI-NEXT: xorl $5678, %edi # imm = 0x162E +; NOBMI-NEXT: andl %esi, %edi +; NOBMI-NEXT: notl %eax +; NOBMI-NEXT: andl %edi, %eax +; NOBMI-NEXT: retq +; +; BMI-LABEL: test_constant_xor_and_andnot_32: +; BMI: # %bb.0: +; BMI-NEXT: xorl $5678, %edi # imm = 0x162E +; BMI-NEXT: andnl %esi, %edx, %eax +; BMI-NEXT: andl %edi, %eax +; BMI-NEXT: retq + %xor = xor i32 %a, 5678 + %and1 = and i32 %xor, %b + %not_c = xor i32 %c, -1 + %result = and i32 %and1, %not_c + ret i32 %result +} + +; Test with different operand order +define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) { +; NOBMI-LABEL: test_constant_xor_and_andnot_swapped: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movq %rdx, %rax +; NOBMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; NOBMI-NEXT: andq %rsi, %rdi +; NOBMI-NEXT: notq %rax +; NOBMI-NEXT: andq %rdi, %rax +; NOBMI-NEXT: retq +; +; BMI-LABEL: test_constant_xor_and_andnot_swapped: +; BMI: # %bb.0: +; BMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; BMI-NEXT: andnq %rsi, %rdx, %rax +; BMI-NEXT: andq %rdi, %rax +; BMI-NEXT: retq + %xor = xor i64 %a, 1234 + %and1 = and i64 %b, %xor + %not_c = xor i64 %c, -1 + %result = and i64 %and1, %not_c + ret i64 %result +} + +; Test with different operand order for the final AND +define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) { +; NOBMI-LABEL: test_constant_xor_and_andnot_final_swapped: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movq %rdx, %rax +; NOBMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; NOBMI-NEXT: andq %rsi, %rdi +; NOBMI-NEXT: notq %rax +; NOBMI-NEXT: andq %rdi, %rax +; NOBMI-NEXT: retq +; +; BMI-LABEL: test_constant_xor_and_andnot_final_swapped: +; BMI: # %bb.0: +; BMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; BMI-NEXT: andnq %rsi, %rdx, %rax +; BMI-NEXT: andq %rdi, %rax +; BMI-NEXT: retq + %xor = xor i64 %a, 1234 + %and1 = and i64 %xor, %b + %not_c = xor i64 %c, -1 + %result = and i64 %not_c, %and1 + ret i64 %result +} + +define i64 @test_add_and_andnot(i64 %a, i64 %b, i64 %c) { +; NOBMI-LABEL: test_add_and_andnot: +; NOBMI: # %bb.0: +; NOBMI-NEXT: leaq 5678(%rdi), %rax +; NOBMI-NEXT: andq %rsi, %rax +; NOBMI-NEXT: notq %rdx +; NOBMI-NEXT: andq %rdx, %rax +; NOBMI-NEXT: retq +; +; BMI-LABEL: test_add_and_andnot: +; BMI: # %bb.0: +; BMI-NEXT: leaq 5678(%rdi), %rcx +; BMI-NEXT: andnq %rsi, %rdx, %rax +; BMI-NEXT: andq %rcx, %rax +; BMI-NEXT: retq + %add = add i64 %a, 5678 + %and1 = and i64 %add, %b + %not_c = xor i64 %c, -1 + %result = and i64 %and1, %not_c + ret i64 %result +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll index 2983d108eaedd..bda90117a1be4 100644 --- a/llvm/test/CodeGen/X86/pr108731.ll +++ b/llvm/test/CodeGen/X86/pr108731.ll @@ -17,9 +17,9 @@ define i64 @test_i64(i64 %w, i64 %x, i64 %y, i64 %z) { ; BMI-LABEL: test_i64: ; BMI: # %bb.0: # %Entry ; BMI-NEXT: andq %rdx, %rsi -; BMI-NEXT: andnq %rdi, %rsi, %rax -; BMI-NEXT: andnq %rcx, %rdx, %rcx -; BMI-NEXT: andnq %rax, %rcx, %rax +; BMI-NEXT: andnq %rcx, %rdx, %rax +; BMI-NEXT: andnq %rdi, %rax, %rax +; BMI-NEXT: andnq %rax, %rsi, %rax ; BMI-NEXT: retq Entry: %and1 = and i64 %y, %x @@ -46,9 +46,9 @@ define i32 @test_i32(i32 %w, i32 %x, i32 %y, i32 %z) { ; BMI-LABEL: test_i32: ; BMI: # %bb.0: # %Entry ; BMI-NEXT: andl %edx, %esi -; BMI-NEXT: andnl %edi, %esi, %eax -; BMI-NEXT: andnl %ecx, %edx, %ecx -; BMI-NEXT: andnl %eax, %ecx, %eax +; BMI-NEXT: andnl %ecx, %edx, %eax +; BMI-NEXT: andnl %edi, %eax, %eax +; BMI-NEXT: andnl %eax, %esi, %eax ; BMI-NEXT: retq Entry: %and1 = and i32 %y, %x