Skip to content
67 changes: 67 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1262,6 +1262,27 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (N1 == N00 || N1 == N01)
return N0;
}

// Optimize X & b & ~c -> X & (b & ~c) when ANDN is available
// This allows the ANDN operation to be done in parallel with computing X
if (Opc == ISD::AND && (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) &&
sd_match(N1, m_Not(m_Value()))) {
// Look for pattern: AND(AND(X, b), NOT(c))
// Transform to: AND(X, AND(b, NOT(c)))

SDValue X, B;

// Match AND(X, b) - check that N0 is an AND with one use
if (N0.getOpcode() == ISD::AND && N0->hasOneUse()) {
X = N00;
B = N01;

// Transform: AND(AND(X, b), NOT(c))
// To: AND(X, AND(b, NOT(c)))
SDValue AndBC = DAG.getNode(ISD::AND, DL, VT, B, N1);
return DAG.getNode(ISD::AND, DL, VT, X, AndBC);
}
}
if (Opc == ISD::XOR) {
// (N00 ^ N01) ^ N00 --> N01
if (N1 == N00)
Expand Down Expand Up @@ -7518,6 +7539,52 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;

// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
// This allows the andn operation to be done in parallel with the xor
if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) {
SDValue InnerAndOp0, InnerAndOp1, NotArg;

// Match: AND(AND(Op0, Op1), NOT(NotArg))
// where NOT is represented as XOR with all-ones
// m_And automatically handles commutativity
if (sd_match(N, m_And(m_OneUse(m_And(m_Value(InnerAndOp0),
m_Value(InnerAndOp1))),
m_Xor(m_Value(NotArg), m_AllOnes())))) {

// Determine which operand is XOR(Constant, X) where Constant is not
// all-ones
SDValue XorOp, OtherOp;
APInt XorConst;

// Try first operand - m_Xor handles commutativity for XOR operands
if (sd_match(InnerAndOp0, m_Xor(m_ConstInt(XorConst), m_Value())) &&
!XorConst.isAllOnes()) {
XorOp = InnerAndOp0;
OtherOp = InnerAndOp1;
} else if (sd_match(InnerAndOp1,
m_Xor(m_ConstInt(XorConst), m_Value())) &&
!XorConst.isAllOnes()) {
XorOp = InnerAndOp1;
OtherOp = InnerAndOp0;
} else {
// Pattern doesn't match - no XOR(Constant, X) found
XorOp = SDValue();
}

// If we found the pattern, apply the transformation
// Prevent infinite loops by checking OtherOp is not also a NOT
if (XorOp && !sd_match(OtherOp, m_Xor(m_Value(), m_AllOnes()))) {
// Get the NOT node (either N0 or N1)
SDValue NotOp = sd_match(N0, m_Xor(m_Value(), m_AllOnes())) ? N0 : N1;

// Transform: AND(AND(XOR(Constant, a), b), NOT(c))
// To: AND(XOR(Constant, a), AND(b, NOT(c)))
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
}
}
}

// reassociate and
if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
return RAND;
Expand Down
124 changes: 124 additions & 0 deletions llvm/test/CodeGen/X86/andn-reassociate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=-bmi < %s | FileCheck %s --check-prefixes=CHECK,NOBMI
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefixes=CHECK,BMI

; Test the optimization: X & b & ~c -> X & (b & ~c)
; This reassociation allows ANDN to execute in parallel with computing X

define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
; NOBMI-LABEL: test_constant_xor_and_andnot:
; NOBMI: # %bb.0:
; NOBMI-NEXT: movq %rdx, %rax
; NOBMI-NEXT: xorq $1234, %rdi # imm = 0x4D2
; NOBMI-NEXT: andq %rsi, %rdi
; NOBMI-NEXT: notq %rax
; NOBMI-NEXT: andq %rdi, %rax
; NOBMI-NEXT: retq
;
; BMI-LABEL: test_constant_xor_and_andnot:
; BMI: # %bb.0:
; BMI-NEXT: xorq $1234, %rdi # imm = 0x4D2
; BMI-NEXT: andnq %rsi, %rdx, %rax
; BMI-NEXT: andq %rdi, %rax
; BMI-NEXT: retq
%xor = xor i64 %a, 1234
%and1 = and i64 %xor, %b
%not_c = xor i64 %c, -1
%result = and i64 %and1, %not_c
ret i64 %result
}

define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
; NOBMI-LABEL: test_constant_xor_and_andnot_32:
; NOBMI: # %bb.0:
; NOBMI-NEXT: movl %edx, %eax
; NOBMI-NEXT: xorl $5678, %edi # imm = 0x162E
; NOBMI-NEXT: andl %esi, %edi
; NOBMI-NEXT: notl %eax
; NOBMI-NEXT: andl %edi, %eax
; NOBMI-NEXT: retq
;
; BMI-LABEL: test_constant_xor_and_andnot_32:
; BMI: # %bb.0:
; BMI-NEXT: xorl $5678, %edi # imm = 0x162E
; BMI-NEXT: andnl %esi, %edx, %eax
; BMI-NEXT: andl %edi, %eax
; BMI-NEXT: retq
%xor = xor i32 %a, 5678
%and1 = and i32 %xor, %b
%not_c = xor i32 %c, -1
%result = and i32 %and1, %not_c
ret i32 %result
}

; Test with different operand order
define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
; NOBMI-LABEL: test_constant_xor_and_andnot_swapped:
; NOBMI: # %bb.0:
; NOBMI-NEXT: movq %rdx, %rax
; NOBMI-NEXT: xorq $1234, %rdi # imm = 0x4D2
; NOBMI-NEXT: andq %rsi, %rdi
; NOBMI-NEXT: notq %rax
; NOBMI-NEXT: andq %rdi, %rax
; NOBMI-NEXT: retq
;
; BMI-LABEL: test_constant_xor_and_andnot_swapped:
; BMI: # %bb.0:
; BMI-NEXT: xorq $1234, %rdi # imm = 0x4D2
; BMI-NEXT: andnq %rsi, %rdx, %rax
; BMI-NEXT: andq %rdi, %rax
; BMI-NEXT: retq
%xor = xor i64 %a, 1234
%and1 = and i64 %b, %xor
%not_c = xor i64 %c, -1
%result = and i64 %and1, %not_c
ret i64 %result
}

; Test with different operand order for the final AND
define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
; NOBMI-LABEL: test_constant_xor_and_andnot_final_swapped:
; NOBMI: # %bb.0:
; NOBMI-NEXT: movq %rdx, %rax
; NOBMI-NEXT: xorq $1234, %rdi # imm = 0x4D2
; NOBMI-NEXT: andq %rsi, %rdi
; NOBMI-NEXT: notq %rax
; NOBMI-NEXT: andq %rdi, %rax
; NOBMI-NEXT: retq
;
; BMI-LABEL: test_constant_xor_and_andnot_final_swapped:
; BMI: # %bb.0:
; BMI-NEXT: xorq $1234, %rdi # imm = 0x4D2
; BMI-NEXT: andnq %rsi, %rdx, %rax
; BMI-NEXT: andq %rdi, %rax
; BMI-NEXT: retq
%xor = xor i64 %a, 1234
%and1 = and i64 %xor, %b
%not_c = xor i64 %c, -1
%result = and i64 %not_c, %and1
ret i64 %result
}

define i64 @test_add_and_andnot(i64 %a, i64 %b, i64 %c) {
; NOBMI-LABEL: test_add_and_andnot:
; NOBMI: # %bb.0:
; NOBMI-NEXT: leaq 5678(%rdi), %rax
; NOBMI-NEXT: andq %rsi, %rax
; NOBMI-NEXT: notq %rdx
; NOBMI-NEXT: andq %rdx, %rax
; NOBMI-NEXT: retq
;
; BMI-LABEL: test_add_and_andnot:
; BMI: # %bb.0:
; BMI-NEXT: leaq 5678(%rdi), %rcx
; BMI-NEXT: andnq %rsi, %rdx, %rax
; BMI-NEXT: andq %rcx, %rax
; BMI-NEXT: retq
%add = add i64 %a, 5678
%and1 = and i64 %add, %b
%not_c = xor i64 %c, -1
%result = and i64 %and1, %not_c
ret i64 %result
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/pr108731.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ define i64 @test_i64(i64 %w, i64 %x, i64 %y, i64 %z) {
; BMI-LABEL: test_i64:
; BMI: # %bb.0: # %Entry
; BMI-NEXT: andq %rdx, %rsi
; BMI-NEXT: andnq %rdi, %rsi, %rax
; BMI-NEXT: andnq %rcx, %rdx, %rcx
; BMI-NEXT: andnq %rax, %rcx, %rax
; BMI-NEXT: andnq %rcx, %rdx, %rax
; BMI-NEXT: andnq %rdi, %rax, %rax
; BMI-NEXT: andnq %rax, %rsi, %rax
; BMI-NEXT: retq
Entry:
%and1 = and i64 %y, %x
Expand All @@ -46,9 +46,9 @@ define i32 @test_i32(i32 %w, i32 %x, i32 %y, i32 %z) {
; BMI-LABEL: test_i32:
; BMI: # %bb.0: # %Entry
; BMI-NEXT: andl %edx, %esi
; BMI-NEXT: andnl %edi, %esi, %eax
; BMI-NEXT: andnl %ecx, %edx, %ecx
; BMI-NEXT: andnl %eax, %ecx, %eax
; BMI-NEXT: andnl %ecx, %edx, %eax
; BMI-NEXT: andnl %edi, %eax, %eax
; BMI-NEXT: andnl %eax, %esi, %eax
; BMI-NEXT: retq
Entry:
%and1 = and i32 %y, %x
Expand Down
Loading