Skip to content

Commit af5dcb5

Browse files
committed
added optimization and tests
1 parent 27af6bc commit af5dcb5

File tree

3 files changed

+132
-6
lines changed

3 files changed

+132
-6
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51541,6 +51541,64 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
5154151541
return SDValue();
5154251542
}
5154351543

51544+
/// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
51545+
/// This allows the andn operation to be done in parallel with the xor
51546+
static SDValue combineConstantXorAndAndNot(SDNode *N, const SDLoc &DL,
51547+
SelectionDAG &DAG,
51548+
const X86Subtarget &Subtarget) {
51549+
using namespace llvm::SDPatternMatch;
51550+
51551+
EVT VT = N->getValueType(0);
51552+
// Only handle scalar integer types that support BMI instructions
51553+
if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
51554+
return SDValue();
51555+
51556+
SDValue N0 = N->getOperand(0);
51557+
SDValue N1 = N->getOperand(1);
51558+
51559+
// Check if N0 is AND(XOR(Constant, a), b)
51560+
if (N0.getOpcode() != ISD::AND)
51561+
return SDValue();
51562+
51563+
SDValue AndLHS = N0.getOperand(0);
51564+
SDValue AndRHS = N0.getOperand(1);
51565+
51566+
// Check if one operand is XOR(Constant, a)
51567+
SDValue XorOp, OtherOp;
51568+
if (AndLHS.getOpcode() == ISD::XOR) {
51569+
XorOp = AndLHS;
51570+
OtherOp = AndRHS;
51571+
} else if (AndRHS.getOpcode() == ISD::XOR) {
51572+
XorOp = AndRHS;
51573+
OtherOp = AndLHS;
51574+
} else {
51575+
return SDValue();
51576+
}
51577+
51578+
// Check if XOR has a constant operand
51579+
if (!isa<ConstantSDNode>(XorOp.getOperand(0)) &&
51580+
!isa<ConstantSDNode>(XorOp.getOperand(1))) {
51581+
return SDValue();
51582+
}
51583+
51584+
// Check if N1 is NOT(c) - i.e., XOR(c, -1)
51585+
SDValue NotOp;
51586+
if (N1.getOpcode() == ISD::XOR && isAllOnesConstant(N1.getOperand(1))) {
51587+
NotOp = N1.getOperand(0);
51588+
} else {
51589+
return SDValue();
51590+
}
51591+
51592+
// Transform: AND(AND(XOR(Constant, a), b), NOT(c))
51593+
// To: AND(XOR(Constant, a), AND(b, NOT(c)))
51594+
// This allows the andn (b & ~c) to be done in parallel with the xor
51595+
51596+
// Create AND(b, NOT(c)) - this will become andn
51597+
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
51598+
// Create final AND(XOR(Constant, a), AND(b, NOT(c)))
51599+
return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
51600+
}
51601+
5154451602
/// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
5154551603
static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
5154651604
SelectionDAG &DAG,
@@ -51833,6 +51891,11 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5183351891
if (SDValue R = combineAndNotOrIntoAndNotAnd(N, dl, DAG))
5183451892
return R;
5183551893

51894+
// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
51895+
// This allows the andn operation to be done in parallel with the xor
51896+
if (SDValue R = combineConstantXorAndAndNot(N, dl, DAG, Subtarget))
51897+
return R;
51898+
5183651899
// fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
5183751900
// iff c2 is all/no bits mask - i.e. a select-with-zero mask.
5183851901
// TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
3+
4+
; Test the optimization described in issue #161630:
5+
; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor
6+
7+
define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
8+
; CHECK-LABEL: test_constant_xor_and_andnot:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
11+
; CHECK-NEXT: andnq %rsi, %rdx, %rax
12+
; CHECK-NEXT: andq %rdi, %rax
13+
; CHECK-NEXT: retq
14+
%xor = xor i64 %a, 1234
15+
%and1 = and i64 %xor, %b
16+
%not_c = xor i64 %c, -1
17+
%result = and i64 %and1, %not_c
18+
ret i64 %result
19+
}
20+
21+
define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
22+
; CHECK-LABEL: test_constant_xor_and_andnot_32:
23+
; CHECK: # %bb.0:
24+
; CHECK-NEXT: xorl $5678, %edi # imm = 0x162E
25+
; CHECK-NEXT: andnl %esi, %edx, %eax
26+
; CHECK-NEXT: andl %edi, %eax
27+
; CHECK-NEXT: retq
28+
%xor = xor i32 %a, 5678
29+
%and1 = and i32 %xor, %b
30+
%not_c = xor i32 %c, -1
31+
%result = and i32 %and1, %not_c
32+
ret i32 %result
33+
}
34+
35+
; Test with different operand order
36+
define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
37+
; CHECK-LABEL: test_constant_xor_and_andnot_swapped:
38+
; CHECK: # %bb.0:
39+
; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
40+
; CHECK-NEXT: andnq %rsi, %rdx, %rax
41+
; CHECK-NEXT: andq %rdi, %rax
42+
; CHECK-NEXT: retq
43+
%xor = xor i64 %a, 1234
44+
%and1 = and i64 %b, %xor
45+
%not_c = xor i64 %c, -1
46+
%result = and i64 %and1, %not_c
47+
ret i64 %result
48+
}
49+
50+
; Test with different operand order for the final AND
51+
define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
52+
; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
55+
; CHECK-NEXT: andq %rsi, %rdi
56+
; CHECK-NEXT: andnq %rdi, %rdx, %rax
57+
; CHECK-NEXT: retq
58+
%xor = xor i64 %a, 1234
59+
%and1 = and i64 %xor, %b
60+
%not_c = xor i64 %c, -1
61+
%result = and i64 %not_c, %and1
62+
ret i64 %result
63+
}

llvm/test/CodeGen/X86/pr108731.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ define i64 @test_i64(i64 %w, i64 %x, i64 %y, i64 %z) {
1717
; BMI-LABEL: test_i64:
1818
; BMI: # %bb.0: # %Entry
1919
; BMI-NEXT: andq %rdx, %rsi
20-
; BMI-NEXT: andnq %rdi, %rsi, %rax
21-
; BMI-NEXT: andnq %rcx, %rdx, %rcx
22-
; BMI-NEXT: andnq %rax, %rcx, %rax
20+
; BMI-NEXT: andnq %rcx, %rdx, %rax
21+
; BMI-NEXT: andnq %rdi, %rax, %rax
22+
; BMI-NEXT: andnq %rax, %rsi, %rax
2323
; BMI-NEXT: retq
2424
Entry:
2525
%and1 = and i64 %y, %x
@@ -46,9 +46,9 @@ define i32 @test_i32(i32 %w, i32 %x, i32 %y, i32 %z) {
4646
; BMI-LABEL: test_i32:
4747
; BMI: # %bb.0: # %Entry
4848
; BMI-NEXT: andl %edx, %esi
49-
; BMI-NEXT: andnl %edi, %esi, %eax
50-
; BMI-NEXT: andnl %ecx, %edx, %ecx
51-
; BMI-NEXT: andnl %eax, %ecx, %eax
49+
; BMI-NEXT: andnl %ecx, %edx, %eax
50+
; BMI-NEXT: andnl %edi, %eax, %eax
51+
; BMI-NEXT: andnl %eax, %esi, %eax
5252
; BMI-NEXT: retq
5353
Entry:
5454
%and1 = and i32 %y, %x

0 commit comments

Comments
 (0)