From af5dcb54f7888cbc07183f6be852f29e321afc6b Mon Sep 17 00:00:00 2001 From: Manik Mukherjee Date: Fri, 3 Oct 2025 01:03:52 -0400 Subject: [PATCH 1/9] added optimization and tests --- llvm/lib/Target/X86/X86ISelLowering.cpp | 63 +++++++++++++++++++ .../CodeGen/X86/constant-xor-and-andnot.ll | 63 +++++++++++++++++++ llvm/test/CodeGen/X86/pr108731.ll | 12 ++-- 3 files changed, 132 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/X86/constant-xor-and-andnot.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cdc97faf394ca..6c562ccf5b363 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51541,6 +51541,64 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c) +/// This allows the andn operation to be done in parallel with the xor +static SDValue combineConstantXorAndAndNot(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + using namespace llvm::SDPatternMatch; + + EVT VT = N->getValueType(0); + // Only handle scalar integer types that support BMI instructions + if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64)) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Check if N0 is AND(XOR(Constant, a), b) + if (N0.getOpcode() != ISD::AND) + return SDValue(); + + SDValue AndLHS = N0.getOperand(0); + SDValue AndRHS = N0.getOperand(1); + + // Check if one operand is XOR(Constant, a) + SDValue XorOp, OtherOp; + if (AndLHS.getOpcode() == ISD::XOR) { + XorOp = AndLHS; + OtherOp = AndRHS; + } else if (AndRHS.getOpcode() == ISD::XOR) { + XorOp = AndRHS; + OtherOp = AndLHS; + } else { + return SDValue(); + } + + // Check if XOR has a constant operand + if (!isa(XorOp.getOperand(0)) && + !isa(XorOp.getOperand(1))) { + return SDValue(); + } + + // Check if N1 is NOT(c) - i.e., XOR(c, -1) + SDValue NotOp; + if (N1.getOpcode() == ISD::XOR && isAllOnesConstant(N1.getOperand(1))) { + NotOp = N1.getOperand(0); + } else { + return SDValue(); + } + + // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) + // To: AND(XOR(Constant, a), AND(b, NOT(c))) + // This allows the andn (b & ~c) to be done in parallel with the xor + + // Create AND(b, NOT(c)) - this will become andn + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1); + // Create final AND(XOR(Constant, a), AND(b, NOT(c))) + return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); +} + /// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available. static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL, SelectionDAG &DAG, @@ -51833,6 +51891,11 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (SDValue R = combineAndNotOrIntoAndNotAnd(N, dl, DAG)) return R; + // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c) + // This allows the andn operation to be done in parallel with the xor + if (SDValue R = combineConstantXorAndAndNot(N, dl, DAG, Subtarget)) + return R; + // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2)) // iff c2 is all/no bits mask - i.e. a select-with-zero mask. // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW? diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll new file mode 100644 index 0000000000000..5a4d931d29896 --- /dev/null +++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s + +; Test the optimization described in issue #161630: +; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor + +define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: test_constant_xor_and_andnot: +; CHECK: # %bb.0: +; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; CHECK-NEXT: andnq %rsi, %rdx, %rax +; CHECK-NEXT: andq %rdi, %rax +; CHECK-NEXT: retq + %xor = xor i64 %a, 1234 + %and1 = and i64 %xor, %b + %not_c = xor i64 %c, -1 + %result = and i64 %and1, %not_c + ret i64 %result +} + +define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: test_constant_xor_and_andnot_32: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl $5678, %edi # imm = 0x162E +; CHECK-NEXT: andnl %esi, %edx, %eax +; CHECK-NEXT: andl %edi, %eax +; CHECK-NEXT: retq + %xor = xor i32 %a, 5678 + %and1 = and i32 %xor, %b + %not_c = xor i32 %c, -1 + %result = and i32 %and1, %not_c + ret i32 %result +} + +; Test with different operand order +define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: test_constant_xor_and_andnot_swapped: +; CHECK: # %bb.0: +; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; CHECK-NEXT: andnq %rsi, %rdx, %rax +; CHECK-NEXT: andq %rdi, %rax +; CHECK-NEXT: retq + %xor = xor i64 %a, 1234 + %and1 = and i64 %b, %xor + %not_c = xor i64 %c, -1 + %result = and i64 %and1, %not_c + ret i64 %result +} + +; Test with different operand order for the final AND +define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped: +; CHECK: # %bb.0: +; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; CHECK-NEXT: andq %rsi, %rdi +; CHECK-NEXT: andnq %rdi, %rdx, %rax +; CHECK-NEXT: retq + %xor = xor i64 %a, 1234 + %and1 = and i64 %xor, %b + %not_c = xor i64 %c, -1 + %result = and i64 %not_c, %and1 + ret i64 %result +} \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll index 2983d108eaedd..bda90117a1be4 100644 --- a/llvm/test/CodeGen/X86/pr108731.ll +++ b/llvm/test/CodeGen/X86/pr108731.ll @@ -17,9 +17,9 @@ define i64 @test_i64(i64 %w, i64 %x, i64 %y, i64 %z) { ; BMI-LABEL: test_i64: ; BMI: # %bb.0: # %Entry ; BMI-NEXT: andq %rdx, %rsi -; BMI-NEXT: andnq %rdi, %rsi, %rax -; BMI-NEXT: andnq %rcx, %rdx, %rcx -; BMI-NEXT: andnq %rax, %rcx, %rax +; BMI-NEXT: andnq %rcx, %rdx, %rax +; BMI-NEXT: andnq %rdi, %rax, %rax +; BMI-NEXT: andnq %rax, %rsi, %rax ; BMI-NEXT: retq Entry: %and1 = and i64 %y, %x @@ -46,9 +46,9 @@ define i32 @test_i32(i32 %w, i32 %x, i32 %y, i32 %z) { ; BMI-LABEL: test_i32: ; BMI: # %bb.0: # %Entry ; BMI-NEXT: andl %edx, %esi -; BMI-NEXT: andnl %edi, %esi, %eax -; BMI-NEXT: andnl %ecx, %edx, %ecx -; BMI-NEXT: andnl %eax, %ecx, %eax +; BMI-NEXT: andnl %ecx, %edx, %eax +; BMI-NEXT: andnl %edi, %eax, %eax +; BMI-NEXT: andnl %eax, %esi, %eax ; BMI-NEXT: retq Entry: %and1 = and i32 %y, %x From b176fd6e56d22c8f06190246ab5b5a2871776060 Mon Sep 17 00:00:00 2001 From: Manik Mukherjee Date: Sat, 4 Oct 2025 16:32:10 -0400 Subject: [PATCH 2/9] Move constant XOR AND ANDNOT optimization to generic DAG combiner This moves the optimization from X86-specific code to the generic reassociateOpsCommutative function in DAGCombiner.cpp. The optimization transforms (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c) to allow ANDNOT operations to be done in parallel with XOR operations. This benefits all targets that have ANDNOT instructions (X86 BMI, ARM BIC, RISC-V, etc.) rather than being limited to X86 only. - Remove X86-specific combineConstantXorAndAndNot function - Add generic optimization to reassociateOpsCommutative with TLI.hasAndNot check - Update test expectations for the new optimized output --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 35 +++++++++++ llvm/lib/Target/X86/X86ISelLowering.cpp | 62 ------------------- .../CodeGen/X86/constant-xor-and-andnot.ll | 4 +- 3 files changed, 37 insertions(+), 64 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1ef2b35952833..4241019d47ec1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1262,6 +1262,41 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, if (N1 == N00 || N1 == N01) return N0; } + + // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c) + // This allows the andn operation to be done in parallel with the xor + if (Opc == ISD::AND && TLI.hasAndNot(N1)) { + // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c)) + // Transform to: AND(XOR(Constant, a), AND(b, NOT(c))) + + // Check if N1 is NOT(c) - i.e., XOR(c, -1) + if (N1.getOpcode() == ISD::XOR && + DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) && + isAllOnesConstant(N1.getOperand(1))) { + + // Check if one operand of N0 is XOR(Constant, a) + SDValue XorOp, OtherOp; + if (N00.getOpcode() == ISD::XOR) { + XorOp = N00; + OtherOp = N01; + } else if (N01.getOpcode() == ISD::XOR) { + XorOp = N01; + OtherOp = N00; + } else { + return SDValue(); + } + + // Check if XOR has a constant operand + if (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) || + DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1))) { + // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) + // To: AND(XOR(Constant, a), AND(b, NOT(c))) + // This allows the andn (b & ~c) to be done in parallel with the xor + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1); + return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); + } + } + } if (Opc == ISD::XOR) { // (N00 ^ N01) ^ N00 --> N01 if (N1 == N00) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6c562ccf5b363..d32cb680594c6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51541,63 +51541,6 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c) -/// This allows the andn operation to be done in parallel with the xor -static SDValue combineConstantXorAndAndNot(SDNode *N, const SDLoc &DL, - SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - using namespace llvm::SDPatternMatch; - - EVT VT = N->getValueType(0); - // Only handle scalar integer types that support BMI instructions - if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64)) - return SDValue(); - - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - // Check if N0 is AND(XOR(Constant, a), b) - if (N0.getOpcode() != ISD::AND) - return SDValue(); - - SDValue AndLHS = N0.getOperand(0); - SDValue AndRHS = N0.getOperand(1); - - // Check if one operand is XOR(Constant, a) - SDValue XorOp, OtherOp; - if (AndLHS.getOpcode() == ISD::XOR) { - XorOp = AndLHS; - OtherOp = AndRHS; - } else if (AndRHS.getOpcode() == ISD::XOR) { - XorOp = AndRHS; - OtherOp = AndLHS; - } else { - return SDValue(); - } - - // Check if XOR has a constant operand - if (!isa(XorOp.getOperand(0)) && - !isa(XorOp.getOperand(1))) { - return SDValue(); - } - - // Check if N1 is NOT(c) - i.e., XOR(c, -1) - SDValue NotOp; - if (N1.getOpcode() == ISD::XOR && isAllOnesConstant(N1.getOperand(1))) { - NotOp = N1.getOperand(0); - } else { - return SDValue(); - } - - // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) - // To: AND(XOR(Constant, a), AND(b, NOT(c))) - // This allows the andn (b & ~c) to be done in parallel with the xor - - // Create AND(b, NOT(c)) - this will become andn - SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1); - // Create final AND(XOR(Constant, a), AND(b, NOT(c))) - return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); -} /// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available. static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL, @@ -51891,11 +51834,6 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (SDValue R = combineAndNotOrIntoAndNotAnd(N, dl, DAG)) return R; - // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c) - // This allows the andn operation to be done in parallel with the xor - if (SDValue R = combineConstantXorAndAndNot(N, dl, DAG, Subtarget)) - return R; - // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2)) // iff c2 is all/no bits mask - i.e. a select-with-zero mask. // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW? diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll index 5a4d931d29896..923d065962081 100644 --- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll +++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll @@ -52,8 +52,8 @@ define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped: ; CHECK: # %bb.0: ; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2 -; CHECK-NEXT: andq %rsi, %rdi -; CHECK-NEXT: andnq %rdi, %rdx, %rax +; CHECK-NEXT: andnq %rsi, %rdx, %rax +; CHECK-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq %xor = xor i64 %a, 1234 %and1 = and i64 %xor, %b From 4a2e54661ca9758d6277e63992b80b065ff64588 Mon Sep 17 00:00:00 2001 From: Manik Mukherjee Date: Sat, 4 Oct 2025 19:30:44 -0400 Subject: [PATCH 3/9] changed combiner logic to account for infinite loops --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4241019d47ec1..9b30f7a672c7f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7498,6 +7498,69 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; + // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c) + // This allows the andn operation to be done in parallel with the xor + if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) { + // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c)) + // Transform to: AND(XOR(Constant, a), AND(b, NOT(c))) + + // Handle both operand orders: N0=AND, N1=NOT and N0=NOT, N1=AND + SDValue AndOp, NotOp; + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::XOR && + DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) && + isAllOnesConstant(N1.getOperand(1))) { + AndOp = N0; + NotOp = N1; + } else if (N1.getOpcode() == ISD::AND && + N0.getOpcode() == ISD::XOR && + DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && + isAllOnesConstant(N0.getOperand(1))) { + AndOp = N1; + NotOp = N0; + } else { + goto skip_optimization; + } + + // Prevent infinite loops: only apply if the AND node has one use + if (!AndOp.hasOneUse()) + goto skip_optimization; + + SDValue AndOp0 = AndOp.getOperand(0); + SDValue AndOp1 = AndOp.getOperand(1); + + // Check if one operand of AndOp is XOR(Constant, a) + SDValue XorOp, OtherOp; + if (AndOp0.getOpcode() == ISD::XOR) { + XorOp = AndOp0; + OtherOp = AndOp1; + } else if (AndOp1.getOpcode() == ISD::XOR) { + XorOp = AndOp1; + OtherOp = AndOp0; + } else { + goto skip_optimization; + } + + // Check if XOR has a constant operand (and not all-ones constant to avoid NOT) + if ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) && + !isAllOnesConstant(XorOp.getOperand(0))) || + (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) && + !isAllOnesConstant(XorOp.getOperand(1)))) { + // Prevent infinite loops: only apply if OtherOp is not also a NOT + if (OtherOp.getOpcode() == ISD::XOR && + DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) && + isAllOnesConstant(OtherOp.getOperand(1))) { + goto skip_optimization; + } + // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) + // To: AND(XOR(Constant, a), AND(b, NOT(c))) + // This allows the andn (b & ~c) to be done in parallel with the xor + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp); + return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); + } + } +skip_optimization: + // reassociate and if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags())) return RAND; From 7fb0e39bcb733850a671a8c922719cda80d05e31 Mon Sep 17 00:00:00 2001 From: Manik Mukherjee Date: Sat, 4 Oct 2025 20:13:32 -0400 Subject: [PATCH 4/9] refactored to remove goto --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 70 +++++++++---------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9b30f7a672c7f..e92e1319bb0f8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7519,47 +7519,45 @@ SDValue DAGCombiner::visitAND(SDNode *N) { AndOp = N1; NotOp = N0; } else { - goto skip_optimization; + // Pattern doesn't match, continue to next optimization } - // Prevent infinite loops: only apply if the AND node has one use - if (!AndOp.hasOneUse()) - goto skip_optimization; - - SDValue AndOp0 = AndOp.getOperand(0); - SDValue AndOp1 = AndOp.getOperand(1); - - // Check if one operand of AndOp is XOR(Constant, a) - SDValue XorOp, OtherOp; - if (AndOp0.getOpcode() == ISD::XOR) { - XorOp = AndOp0; - OtherOp = AndOp1; - } else if (AndOp1.getOpcode() == ISD::XOR) { - XorOp = AndOp1; - OtherOp = AndOp0; - } else { - goto skip_optimization; - } - - // Check if XOR has a constant operand (and not all-ones constant to avoid NOT) - if ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) && - !isAllOnesConstant(XorOp.getOperand(0))) || - (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) && - !isAllOnesConstant(XorOp.getOperand(1)))) { - // Prevent infinite loops: only apply if OtherOp is not also a NOT - if (OtherOp.getOpcode() == ISD::XOR && - DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) && - isAllOnesConstant(OtherOp.getOperand(1))) { - goto skip_optimization; + // If we found a valid pattern, check if the AND node has one use + if (AndOp && NotOp && AndOp.hasOneUse()) { + SDValue AndOp0 = AndOp.getOperand(0); + SDValue AndOp1 = AndOp.getOperand(1); + + // Check if one operand of AndOp is XOR(Constant, a) + SDValue XorOp, OtherOp; + if (AndOp0.getOpcode() == ISD::XOR) { + XorOp = AndOp0; + OtherOp = AndOp1; + } else if (AndOp1.getOpcode() == ISD::XOR) { + XorOp = AndOp1; + OtherOp = AndOp0; + } else { + // No XOR found in AND operands, continue to next optimization + } + + // If we found XOR, check if it has a constant operand (and not all-ones constant to avoid NOT) + if (XorOp && OtherOp && + ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) && + !isAllOnesConstant(XorOp.getOperand(0))) || + (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) && + !isAllOnesConstant(XorOp.getOperand(1))))) { + // Prevent infinite loops: only apply if OtherOp is not also a NOT + if (!(OtherOp.getOpcode() == ISD::XOR && + DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) && + isAllOnesConstant(OtherOp.getOperand(1)))) { + // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) + // To: AND(XOR(Constant, a), AND(b, NOT(c))) + // This allows the andn (b & ~c) to be done in parallel with the xor + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp); + return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); + } } - // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) - // To: AND(XOR(Constant, a), AND(b, NOT(c))) - // This allows the andn (b & ~c) to be done in parallel with the xor - SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp); - return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); } } -skip_optimization: // reassociate and if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags())) From d9c1a7568fb75a69c7dcc7f2fef25d637e0b4f00 Mon Sep 17 00:00:00 2001 From: Manik Mukherjee Date: Fri, 10 Oct 2025 00:26:23 -0400 Subject: [PATCH 5/9] addressed comments --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 79 ++++++++----------- llvm/lib/Target/X86/X86ISelLowering.cpp | 1 - .../CodeGen/X86/constant-xor-and-andnot.ll | 35 +++++--- 3 files changed, 57 insertions(+), 58 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e92e1319bb0f8..45ddb02a96d92 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7501,60 +7501,43 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c) // This allows the andn operation to be done in parallel with the xor if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) { - // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c)) - // Transform to: AND(XOR(Constant, a), AND(b, NOT(c))) - - // Handle both operand orders: N0=AND, N1=NOT and N0=NOT, N1=AND - SDValue AndOp, NotOp; - if (N0.getOpcode() == ISD::AND && - N1.getOpcode() == ISD::XOR && - DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) && - isAllOnesConstant(N1.getOperand(1))) { - AndOp = N0; - NotOp = N1; - } else if (N1.getOpcode() == ISD::AND && - N0.getOpcode() == ISD::XOR && - DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && - isAllOnesConstant(N0.getOperand(1))) { - AndOp = N1; - NotOp = N0; - } else { - // Pattern doesn't match, continue to next optimization - } + SDValue InnerAndOp0, InnerAndOp1, NotArg; - // If we found a valid pattern, check if the AND node has one use - if (AndOp && NotOp && AndOp.hasOneUse()) { - SDValue AndOp0 = AndOp.getOperand(0); - SDValue AndOp1 = AndOp.getOperand(1); + // Match: AND(AND(Op0, Op1), NOT(NotArg)) + // where NOT is represented as XOR with all-ones + // m_And automatically handles commutativity + if (sd_match(N, m_And(m_OneUse(m_And(m_Value(InnerAndOp0), + m_Value(InnerAndOp1))), + m_Xor(m_Value(NotArg), m_AllOnes())))) { - // Check if one operand of AndOp is XOR(Constant, a) + // Determine which operand is XOR(Constant, X) where Constant is not all-ones SDValue XorOp, OtherOp; - if (AndOp0.getOpcode() == ISD::XOR) { - XorOp = AndOp0; - OtherOp = AndOp1; - } else if (AndOp1.getOpcode() == ISD::XOR) { - XorOp = AndOp1; - OtherOp = AndOp0; + APInt XorConst; + + // Try first operand - m_Xor handles commutativity for XOR operands + if (sd_match(InnerAndOp0, m_Xor(m_ConstInt(XorConst), m_Value())) && + !XorConst.isAllOnes()) { + XorOp = InnerAndOp0; + OtherOp = InnerAndOp1; + } else if (sd_match(InnerAndOp1, m_Xor(m_ConstInt(XorConst), m_Value())) && + !XorConst.isAllOnes()) { + XorOp = InnerAndOp1; + OtherOp = InnerAndOp0; } else { - // No XOR found in AND operands, continue to next optimization + // Pattern doesn't match - no XOR(Constant, X) found + XorOp = SDValue(); } - // If we found XOR, check if it has a constant operand (and not all-ones constant to avoid NOT) - if (XorOp && OtherOp && - ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) && - !isAllOnesConstant(XorOp.getOperand(0))) || - (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) && - !isAllOnesConstant(XorOp.getOperand(1))))) { - // Prevent infinite loops: only apply if OtherOp is not also a NOT - if (!(OtherOp.getOpcode() == ISD::XOR && - DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) && - isAllOnesConstant(OtherOp.getOperand(1)))) { - // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) - // To: AND(XOR(Constant, a), AND(b, NOT(c))) - // This allows the andn (b & ~c) to be done in parallel with the xor - SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp); - return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); - } + // If we found the pattern, apply the transformation + // Prevent infinite loops by checking OtherOp is not also a NOT + if (XorOp && !sd_match(OtherOp, m_Xor(m_Value(), m_AllOnes()))) { + // Get the NOT node (either N0 or N1) + SDValue NotOp = sd_match(N0, m_Xor(m_Value(), m_AllOnes())) ? N0 : N1; + + // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) + // To: AND(XOR(Constant, a), AND(b, NOT(c))) + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp); + return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); } } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d32cb680594c6..cdc97faf394ca 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51541,7 +51541,6 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG, return SDValue(); } - /// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available. static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL, SelectionDAG &DAG, diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll index 923d065962081..76056a413f904 100644 --- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll +++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=-bmi < %s | FileCheck %s --check-prefixes=CHECK,NOBMI +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefixes=CHECK,BMI ; Test the optimization described in issue #161630: ; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor @@ -7,9 +8,13 @@ define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test_constant_xor_and_andnot: ; CHECK: # %bb.0: +; NOBMI-NEXT: movq %rdx, %rax ; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2 -; CHECK-NEXT: andnq %rsi, %rdx, %rax -; CHECK-NEXT: andq %rdi, %rax +; NOBMI-NEXT: andq %rsi, %rdi +; NOBMI-NEXT: notq %rax +; NOBMI-NEXT: andq %rdi, %rax +; BMI-NEXT: andnq %rsi, %rdx, %rax +; BMI-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq %xor = xor i64 %a, 1234 %and1 = and i64 %xor, %b @@ -21,9 +26,13 @@ define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) { define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: test_constant_xor_and_andnot_32: ; CHECK: # %bb.0: +; NOBMI-NEXT: movl %edx, %eax ; CHECK-NEXT: xorl $5678, %edi # imm = 0x162E -; CHECK-NEXT: andnl %esi, %edx, %eax -; CHECK-NEXT: andl %edi, %eax +; NOBMI-NEXT: andl %esi, %edi +; NOBMI-NEXT: notl %eax +; NOBMI-NEXT: andl %edi, %eax +; BMI-NEXT: andnl %esi, %edx, %eax +; BMI-NEXT: andl %edi, %eax ; CHECK-NEXT: retq %xor = xor i32 %a, 5678 %and1 = and i32 %xor, %b @@ -36,9 +45,13 @@ define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) { define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test_constant_xor_and_andnot_swapped: ; CHECK: # %bb.0: +; NOBMI-NEXT: movq %rdx, %rax ; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2 -; CHECK-NEXT: andnq %rsi, %rdx, %rax -; CHECK-NEXT: andq %rdi, %rax +; NOBMI-NEXT: andq %rsi, %rdi +; NOBMI-NEXT: notq %rax +; NOBMI-NEXT: andq %rdi, %rax +; BMI-NEXT: andnq %rsi, %rdx, %rax +; BMI-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq %xor = xor i64 %a, 1234 %and1 = and i64 %b, %xor @@ -51,9 +64,13 @@ define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) { define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped: ; CHECK: # %bb.0: +; NOBMI-NEXT: movq %rdx, %rax ; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2 -; CHECK-NEXT: andnq %rsi, %rdx, %rax -; CHECK-NEXT: andq %rdi, %rax +; NOBMI-NEXT: andq %rsi, %rdi +; NOBMI-NEXT: notq %rax +; NOBMI-NEXT: andq %rdi, %rax +; BMI-NEXT: andnq %rsi, %rdx, %rax +; BMI-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq %xor = xor i64 %a, 1234 %and1 = and i64 %xor, %b From 6fcb51fe8f0d9e275f92f142af7d663c5a267b4a Mon Sep 17 00:00:00 2001 From: Manik Mukherjee Date: Fri, 10 Oct 2025 02:46:22 -0400 Subject: [PATCH 6/9] only changed modified lines of code --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 36c1721ef7331..f00d458a2eff8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1268,12 +1268,12 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, if (Opc == ISD::AND && TLI.hasAndNot(N1)) { // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c)) // Transform to: AND(XOR(Constant, a), AND(b, NOT(c))) - + // Check if N1 is NOT(c) - i.e., XOR(c, -1) - if (N1.getOpcode() == ISD::XOR && + if (N1.getOpcode() == ISD::XOR && DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) && isAllOnesConstant(N1.getOperand(1))) { - + // Check if one operand of N0 is XOR(Constant, a) SDValue XorOp, OtherOp; if (N00.getOpcode() == ISD::XOR) { @@ -1285,7 +1285,7 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, } else { return SDValue(); } - + // Check if XOR has a constant operand if (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) || DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1))) { @@ -7557,24 +7557,26 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // This allows the andn operation to be done in parallel with the xor if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) { SDValue InnerAndOp0, InnerAndOp1, NotArg; - + // Match: AND(AND(Op0, Op1), NOT(NotArg)) // where NOT is represented as XOR with all-ones // m_And automatically handles commutativity - if (sd_match(N, m_And(m_OneUse(m_And(m_Value(InnerAndOp0), + if (sd_match(N, m_And(m_OneUse(m_And(m_Value(InnerAndOp0), m_Value(InnerAndOp1))), m_Xor(m_Value(NotArg), m_AllOnes())))) { - - // Determine which operand is XOR(Constant, X) where Constant is not all-ones + + // Determine which operand is XOR(Constant, X) where Constant is not + // all-ones SDValue XorOp, OtherOp; APInt XorConst; - + // Try first operand - m_Xor handles commutativity for XOR operands if (sd_match(InnerAndOp0, m_Xor(m_ConstInt(XorConst), m_Value())) && !XorConst.isAllOnes()) { XorOp = InnerAndOp0; OtherOp = InnerAndOp1; - } else if (sd_match(InnerAndOp1, m_Xor(m_ConstInt(XorConst), m_Value())) && + } else if (sd_match(InnerAndOp1, + m_Xor(m_ConstInt(XorConst), m_Value())) && !XorConst.isAllOnes()) { XorOp = InnerAndOp1; OtherOp = InnerAndOp0; @@ -7582,13 +7584,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Pattern doesn't match - no XOR(Constant, X) found XorOp = SDValue(); } - + // If we found the pattern, apply the transformation // Prevent infinite loops by checking OtherOp is not also a NOT if (XorOp && !sd_match(OtherOp, m_Xor(m_Value(), m_AllOnes()))) { // Get the NOT node (either N0 or N1) SDValue NotOp = sd_match(N0, m_Xor(m_Value(), m_AllOnes())) ? N0 : N1; - + // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) // To: AND(XOR(Constant, a), AND(b, NOT(c))) SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp); From 9022deb5a772f3e05bd309b24b23af53079e9402 Mon Sep 17 00:00:00 2001 From: Manik Mukherjee Date: Fri, 10 Oct 2025 03:42:40 -0400 Subject: [PATCH 7/9] refactored to use sd match --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 48 ++++++++----------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f00d458a2eff8..9cff8380b2a1e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1265,37 +1265,31 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c) // This allows the andn operation to be done in parallel with the xor - if (Opc == ISD::AND && TLI.hasAndNot(N1)) { + if (Opc == ISD::AND && (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) && + sd_match(N1, m_Xor(m_Value(), m_AllOnes()))) { // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c)) // Transform to: AND(XOR(Constant, a), AND(b, NOT(c))) - // Check if N1 is NOT(c) - i.e., XOR(c, -1) - if (N1.getOpcode() == ISD::XOR && - DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) && - isAllOnesConstant(N1.getOperand(1))) { - - // Check if one operand of N0 is XOR(Constant, a) - SDValue XorOp, OtherOp; - if (N00.getOpcode() == ISD::XOR) { - XorOp = N00; - OtherOp = N01; - } else if (N01.getOpcode() == ISD::XOR) { - XorOp = N01; - OtherOp = N00; - } else { - return SDValue(); - } - - // Check if XOR has a constant operand - if (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) || - DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1))) { - // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) - // To: AND(XOR(Constant, a), AND(b, NOT(c))) - // This allows the andn (b & ~c) to be done in parallel with the xor - SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1); - return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); - } + SDValue XorOp, OtherOp; + APInt XorConst; + + // Check which operand of N0 is XOR(Constant, X) + if (sd_match(N00, m_Xor(m_ConstInt(XorConst), m_Value())) && + !XorConst.isAllOnes()) { + XorOp = N00; + OtherOp = N01; + } else if (sd_match(N01, m_Xor(m_ConstInt(XorConst), m_Value())) && + !XorConst.isAllOnes()) { + XorOp = N01; + OtherOp = N00; + } else { + return SDValue(); } + + // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) + // To: AND(XOR(Constant, a), AND(b, NOT(c))) + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1); + return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); } if (Opc == ISD::XOR) { // (N00 ^ N01) ^ N00 --> N01 From 7e6ffa78246c809a2562f188b6eb420c0e1047ba Mon Sep 17 00:00:00 2001 From: Manik Mukherjee Date: Mon, 13 Oct 2025 02:08:45 -0400 Subject: [PATCH 8/9] addressed comments --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +++-- .../CodeGen/X86/constant-xor-and-andnot.ll | 72 ++++++++++++------- 2 files changed, 55 insertions(+), 35 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9cff8380b2a1e..af97a7dc76c66 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1273,16 +1273,14 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue XorOp, OtherOp; APInt XorConst; - // Check which operand of N0 is XOR(Constant, X) - if (sd_match(N00, m_Xor(m_ConstInt(XorConst), m_Value())) && - !XorConst.isAllOnes()) { - XorOp = N00; - OtherOp = N01; - } else if (sd_match(N01, m_Xor(m_ConstInt(XorConst), m_Value())) && - !XorConst.isAllOnes()) { - XorOp = N01; - OtherOp = N00; - } else { + // Match AND(XOR(X, Constant), b) in either operand order + // Constants are canonicalized to RHS, so we can rely on that + // Use m_c_BinOp to handle commutativity of the AND + if (!sd_match(N0, m_c_BinOp(ISD::AND, + m_AllOf(m_Xor(m_Value(), m_ConstInt(XorConst)), + m_Value(XorOp)), + m_Value(OtherOp))) || + XorConst.isAllOnes()) { return SDValue(); } diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll index 76056a413f904..150f47423c705 100644 --- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll +++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll @@ -6,16 +6,21 @@ ; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) { -; CHECK-LABEL: test_constant_xor_and_andnot: -; CHECK: # %bb.0: +; NOBMI-LABEL: test_constant_xor_and_andnot: +; NOBMI: # %bb.0: ; NOBMI-NEXT: movq %rdx, %rax -; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; NOBMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 ; NOBMI-NEXT: andq %rsi, %rdi ; NOBMI-NEXT: notq %rax ; NOBMI-NEXT: andq %rdi, %rax -; BMI-NEXT: andnq %rsi, %rdx, %rax -; BMI-NEXT: andq %rdi, %rax -; CHECK-NEXT: retq +; NOBMI-NEXT: retq +; +; BMI-LABEL: test_constant_xor_and_andnot: +; BMI: # %bb.0: +; BMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; BMI-NEXT: andnq %rsi, %rdx, %rax +; BMI-NEXT: andq %rdi, %rax +; BMI-NEXT: retq %xor = xor i64 %a, 1234 %and1 = and i64 %xor, %b %not_c = xor i64 %c, -1 @@ -24,16 +29,21 @@ define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) { } define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: test_constant_xor_and_andnot_32: -; CHECK: # %bb.0: +; NOBMI-LABEL: test_constant_xor_and_andnot_32: +; NOBMI: # %bb.0: ; NOBMI-NEXT: movl %edx, %eax -; CHECK-NEXT: xorl $5678, %edi # imm = 0x162E +; NOBMI-NEXT: xorl $5678, %edi # imm = 0x162E ; NOBMI-NEXT: andl %esi, %edi ; NOBMI-NEXT: notl %eax ; NOBMI-NEXT: andl %edi, %eax -; BMI-NEXT: andnl %esi, %edx, %eax -; BMI-NEXT: andl %edi, %eax -; CHECK-NEXT: retq +; NOBMI-NEXT: retq +; +; BMI-LABEL: test_constant_xor_and_andnot_32: +; BMI: # %bb.0: +; BMI-NEXT: xorl $5678, %edi # imm = 0x162E +; BMI-NEXT: andnl %esi, %edx, %eax +; BMI-NEXT: andl %edi, %eax +; BMI-NEXT: retq %xor = xor i32 %a, 5678 %and1 = and i32 %xor, %b %not_c = xor i32 %c, -1 @@ -43,16 +53,21 @@ define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) { ; Test with different operand order define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) { -; CHECK-LABEL: test_constant_xor_and_andnot_swapped: -; CHECK: # %bb.0: +; NOBMI-LABEL: test_constant_xor_and_andnot_swapped: +; NOBMI: # %bb.0: ; NOBMI-NEXT: movq %rdx, %rax -; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; NOBMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 ; NOBMI-NEXT: andq %rsi, %rdi ; NOBMI-NEXT: notq %rax ; NOBMI-NEXT: andq %rdi, %rax -; BMI-NEXT: andnq %rsi, %rdx, %rax -; BMI-NEXT: andq %rdi, %rax -; CHECK-NEXT: retq +; NOBMI-NEXT: retq +; +; BMI-LABEL: test_constant_xor_and_andnot_swapped: +; BMI: # %bb.0: +; BMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; BMI-NEXT: andnq %rsi, %rdx, %rax +; BMI-NEXT: andq %rdi, %rax +; BMI-NEXT: retq %xor = xor i64 %a, 1234 %and1 = and i64 %b, %xor %not_c = xor i64 %c, -1 @@ -62,19 +77,26 @@ define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) { ; Test with different operand order for the final AND define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) { -; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped: -; CHECK: # %bb.0: +; NOBMI-LABEL: test_constant_xor_and_andnot_final_swapped: +; NOBMI: # %bb.0: ; NOBMI-NEXT: movq %rdx, %rax -; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; NOBMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 ; NOBMI-NEXT: andq %rsi, %rdi ; NOBMI-NEXT: notq %rax ; NOBMI-NEXT: andq %rdi, %rax -; BMI-NEXT: andnq %rsi, %rdx, %rax -; BMI-NEXT: andq %rdi, %rax -; CHECK-NEXT: retq +; NOBMI-NEXT: retq +; +; BMI-LABEL: test_constant_xor_and_andnot_final_swapped: +; BMI: # %bb.0: +; BMI-NEXT: xorq $1234, %rdi # imm = 0x4D2 +; BMI-NEXT: andnq %rsi, %rdx, %rax +; BMI-NEXT: andq %rdi, %rax +; BMI-NEXT: retq %xor = xor i64 %a, 1234 %and1 = and i64 %xor, %b %not_c = xor i64 %c, -1 %result = and i64 %not_c, %and1 ret i64 %result -} \ No newline at end of file +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} From de88c96008de1b326a6a9c0d60d06d6d6f532c21 Mon Sep 17 00:00:00 2001 From: Manik Mukherjee Date: Tue, 14 Oct 2025 23:04:48 -0400 Subject: [PATCH 9/9] made use case more generalizable to all similar patterns --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 40 ++++++++----------- ...-xor-and-andnot.ll => andn-reassociate.ll} | 26 +++++++++++- 2 files changed, 41 insertions(+), 25 deletions(-) rename llvm/test/CodeGen/X86/{constant-xor-and-andnot.ll => andn-reassociate.ll} (81%) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index af97a7dc76c66..b414270641443 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1263,31 +1263,25 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, return N0; } - // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c) - // This allows the andn operation to be done in parallel with the xor + // Optimize X & b & ~c -> X & (b & ~c) when ANDN is available + // This allows the ANDN operation to be done in parallel with computing X if (Opc == ISD::AND && (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) && - sd_match(N1, m_Xor(m_Value(), m_AllOnes()))) { - // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c)) - // Transform to: AND(XOR(Constant, a), AND(b, NOT(c))) - - SDValue XorOp, OtherOp; - APInt XorConst; - - // Match AND(XOR(X, Constant), b) in either operand order - // Constants are canonicalized to RHS, so we can rely on that - // Use m_c_BinOp to handle commutativity of the AND - if (!sd_match(N0, m_c_BinOp(ISD::AND, - m_AllOf(m_Xor(m_Value(), m_ConstInt(XorConst)), - m_Value(XorOp)), - m_Value(OtherOp))) || - XorConst.isAllOnes()) { - return SDValue(); - } + sd_match(N1, m_Not(m_Value()))) { + // Look for pattern: AND(AND(X, b), NOT(c)) + // Transform to: AND(X, AND(b, NOT(c))) - // Transform: AND(AND(XOR(Constant, a), b), NOT(c)) - // To: AND(XOR(Constant, a), AND(b, NOT(c))) - SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1); - return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd); + SDValue X, B; + + // Match AND(X, b) - check that N0 is an AND with one use + if (N0.getOpcode() == ISD::AND && N0->hasOneUse()) { + X = N00; + B = N01; + + // Transform: AND(AND(X, b), NOT(c)) + // To: AND(X, AND(b, NOT(c))) + SDValue AndBC = DAG.getNode(ISD::AND, DL, VT, B, N1); + return DAG.getNode(ISD::AND, DL, VT, X, AndBC); + } } if (Opc == ISD::XOR) { // (N00 ^ N01) ^ N00 --> N01 diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/andn-reassociate.ll similarity index 81% rename from llvm/test/CodeGen/X86/constant-xor-and-andnot.ll rename to llvm/test/CodeGen/X86/andn-reassociate.ll index 150f47423c705..720702ddc1045 100644 --- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll +++ b/llvm/test/CodeGen/X86/andn-reassociate.ll @@ -2,8 +2,8 @@ ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=-bmi < %s | FileCheck %s --check-prefixes=CHECK,NOBMI ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefixes=CHECK,BMI -; Test the optimization described in issue #161630: -; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor +; Test the optimization: X & b & ~c -> X & (b & ~c) +; This reassociation allows ANDN to execute in parallel with computing X define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) { ; NOBMI-LABEL: test_constant_xor_and_andnot: @@ -98,5 +98,27 @@ define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) { %result = and i64 %not_c, %and1 ret i64 %result } + +define i64 @test_add_and_andnot(i64 %a, i64 %b, i64 %c) { +; NOBMI-LABEL: test_add_and_andnot: +; NOBMI: # %bb.0: +; NOBMI-NEXT: leaq 5678(%rdi), %rax +; NOBMI-NEXT: andq %rsi, %rax +; NOBMI-NEXT: notq %rdx +; NOBMI-NEXT: andq %rdx, %rax +; NOBMI-NEXT: retq +; +; BMI-LABEL: test_add_and_andnot: +; BMI: # %bb.0: +; BMI-NEXT: leaq 5678(%rdi), %rcx +; BMI-NEXT: andnq %rsi, %rdx, %rax +; BMI-NEXT: andq %rcx, %rax +; BMI-NEXT: retq + %add = add i64 %a, 5678 + %and1 = and i64 %add, %b + %not_c = xor i64 %c, -1 + %result = and i64 %and1, %not_c + ret i64 %result +} ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK: {{.*}}