llvm · manik-muk · Oct 3, 2025 · Oct 4, 2025 · Oct 4, 2025 · Oct 5, 2025
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51541,6 +51541,64 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+/// This allows the andn operation to be done in parallel with the xor
+static SDValue combineConstantXorAndAndNot(SDNode *N, const SDLoc &DL,
+                                           SelectionDAG &DAG,
+                                           const X86Subtarget &Subtarget) {
+  using namespace llvm::SDPatternMatch;
+
+  EVT VT = N->getValueType(0);
+  // Only handle scalar integer types that support BMI instructions
+  if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Check if N0 is AND(XOR(Constant, a), b)
+  if (N0.getOpcode() != ISD::AND)
+    return SDValue();
+
+  SDValue AndLHS = N0.getOperand(0);
+  SDValue AndRHS = N0.getOperand(1);
+
+  // Check if one operand is XOR(Constant, a)
+  SDValue XorOp, OtherOp;
+  if (AndLHS.getOpcode() == ISD::XOR) {
+    XorOp = AndLHS;
+    OtherOp = AndRHS;
+  } else if (AndRHS.getOpcode() == ISD::XOR) {
+    XorOp = AndRHS;
+    OtherOp = AndLHS;
+  } else {
+    return SDValue();
+  }
+
+  // Check if XOR has a constant operand
+  if (!isa<ConstantSDNode>(XorOp.getOperand(0)) &&
+      !isa<ConstantSDNode>(XorOp.getOperand(1))) {
+    return SDValue();
+  }
+
+  // Check if N1 is NOT(c) - i.e., XOR(c, -1)
+  SDValue NotOp;
+  if (N1.getOpcode() == ISD::XOR && isAllOnesConstant(N1.getOperand(1))) {
+    NotOp = N1.getOperand(0);
+  } else {
+    return SDValue();
+  }
+
+  // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+  // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+  // This allows the andn (b & ~c) to be done in parallel with the xor
+
+  // Create AND(b, NOT(c)) - this will become andn
+  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
+  // Create final AND(XOR(Constant, a), AND(b, NOT(c)))
+  return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+}
+
 /// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
 static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
                                        SelectionDAG &DAG,
@@ -51833,6 +51891,11 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineAndNotOrIntoAndNotAnd(N, dl, DAG))
     return R;
 
+  // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+  // This allows the andn operation to be done in parallel with the xor
+  if (SDValue R = combineConstantXorAndAndNot(N, dl, DAG, Subtarget))
+    return R;
+
   // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
   // iff c2 is all/no bits mask - i.e. a select-with-zero mask.
   // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?

diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
+
+; Test the optimization described in issue #161630:
+; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor
+
+define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT:    andnq %rsi, %rdx, %rax
+; CHECK-NEXT:    andq %rdi, %rax
+; CHECK-NEXT:    retq
+  %xor = xor i64 %a, 1234
+  %and1 = and i64 %xor, %b
+  %not_c = xor i64 %c, -1
+  %result = and i64 %and1, %not_c
+  ret i64 %result
+}
+
+define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl $5678, %edi # imm = 0x162E
+; CHECK-NEXT:    andnl %esi, %edx, %eax
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    retq
+  %xor = xor i32 %a, 5678
+  %and1 = and i32 %xor, %b
+  %not_c = xor i32 %c, -1
+  %result = and i32 %and1, %not_c
+  ret i32 %result
+}
+
+; Test with different operand order
+define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_swapped:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT:    andnq %rsi, %rdx, %rax
+; CHECK-NEXT:    andq %rdi, %rax
+; CHECK-NEXT:    retq
+  %xor = xor i64 %a, 1234
+  %and1 = and i64 %b, %xor
+  %not_c = xor i64 %c, -1
+  %result = and i64 %and1, %not_c
+  ret i64 %result
+}
+
+; Test with different operand order for the final AND
+define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT:    andq %rsi, %rdi
+; CHECK-NEXT:    andnq %rdi, %rdx, %rax
+; CHECK-NEXT:    retq
+  %xor = xor i64 %a, 1234
+  %and1 = and i64 %xor, %b
+  %not_c = xor i64 %c, -1
+  %result = and i64 %not_c, %and1
+  ret i64 %result
+}
diff --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll
@@ -17,9 +17,9 @@ define i64 @test_i64(i64 %w, i64 %x, i64 %y, i64 %z) {
 ; BMI-LABEL: test_i64:
 ; BMI:       # %bb.0: # %Entry
 ; BMI-NEXT:    andq %rdx, %rsi
-; BMI-NEXT:    andnq %rdi, %rsi, %rax
-; BMI-NEXT:    andnq %rcx, %rdx, %rcx
-; BMI-NEXT:    andnq %rax, %rcx, %rax
+; BMI-NEXT:    andnq %rcx, %rdx, %rax
+; BMI-NEXT:    andnq %rdi, %rax, %rax
+; BMI-NEXT:    andnq %rax, %rsi, %rax
 ; BMI-NEXT:    retq
 Entry:
   %and1 = and i64 %y, %x
@@ -46,9 +46,9 @@ define i32 @test_i32(i32 %w, i32 %x, i32 %y, i32 %z) {
 ; BMI-LABEL: test_i32:
 ; BMI:       # %bb.0: # %Entry
 ; BMI-NEXT:    andl %edx, %esi
-; BMI-NEXT:    andnl %edi, %esi, %eax
-; BMI-NEXT:    andnl %ecx, %edx, %ecx
-; BMI-NEXT:    andnl %eax, %ecx, %eax
+; BMI-NEXT:    andnl %ecx, %edx, %eax
+; BMI-NEXT:    andnl %edi, %eax, %eax
+; BMI-NEXT:    andnl %eax, %esi, %eax
 ; BMI-NEXT:    retq
 Entry:
   %and1 = and i32 %y, %x