Skip to content

Commit e58edd4

Browse files
committed
[X86] EmitCmp - use existing XOR node to check for equality
Normally, we use the result of the SUB flag to scalar comparison as its more compatible with CMP, but if we're testing for equality and already have a XOR we can reuse that instead. Fixes #6146
1 parent 9216419 commit e58edd4

File tree

3 files changed

+29
-34
lines changed

3 files changed

+29
-34
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23157,10 +23157,17 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
2315723157
return Add.getValue(1);
2315823158
}
2315923159

23160-
// Use SUB instead of CMP to enable CSE between SUB and CMP.
23160+
// If we already have an XOR of the ops, use that to check for equality.
23161+
// Else use SUB instead of CMP to enable CSE between SUB and CMP.
23162+
unsigned X86Opc = X86ISD::SUB;
23163+
if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) &&
23164+
(DAG.doesNodeExist(ISD::XOR, DAG.getVTList({CmpVT}), {Op0, Op1}) ||
23165+
DAG.doesNodeExist(ISD::XOR, DAG.getVTList({CmpVT}), {Op1, Op0})))
23166+
X86Opc = X86ISD::XOR;
23167+
2316123168
SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
23162-
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
23163-
return Sub.getValue(1);
23169+
SDValue CmpOp = DAG.getNode(X86Opc, dl, VTs, Op0, Op1);
23170+
return CmpOp.getValue(1);
2316423171
}
2316523172

2316623173
bool X86TargetLowering::isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,

llvm/test/CodeGen/X86/cmp-xor.ll

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,18 @@
99
define i32 @cmp_xor_i32(i32 %a, i32 %b, i32 %c)
1010
; X86-LABEL: cmp_xor_i32:
1111
; X86: # %bb.0:
12-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1312
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
14-
; X86-NEXT: cmpl %ecx, %eax
15-
; X86-NEXT: je .LBB0_1
16-
; X86-NEXT: # %bb.2:
17-
; X86-NEXT: xorl %ecx, %eax
18-
; X86-NEXT: retl
19-
; X86-NEXT: .LBB0_1:
13+
; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
14+
; X86-NEXT: jne .LBB0_2
15+
; X86-NEXT: # %bb.1:
2016
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
17+
; X86-NEXT: .LBB0_2:
2118
; X86-NEXT: retl
2219
;
2320
; X64-LABEL: cmp_xor_i32:
2421
; X64: # %bb.0:
2522
; X64-NEXT: movl %edi, %eax
2623
; X64-NEXT: xorl %esi, %eax
27-
; X64-NEXT: cmpl %esi, %edi
2824
; X64-NEXT: cmovel %edx, %eax
2925
; X64-NEXT: retq
3026
{
@@ -37,22 +33,18 @@ define i32 @cmp_xor_i32(i32 %a, i32 %b, i32 %c)
3733
define i32 @cmp_xor_i32_commute(i32 %a, i32 %b, i32 %c)
3834
; X86-LABEL: cmp_xor_i32_commute:
3935
; X86: # %bb.0:
40-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
4136
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
42-
; X86-NEXT: cmpl %eax, %ecx
43-
; X86-NEXT: je .LBB1_1
44-
; X86-NEXT: # %bb.2:
45-
; X86-NEXT: xorl %ecx, %eax
46-
; X86-NEXT: retl
47-
; X86-NEXT: .LBB1_1:
37+
; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
38+
; X86-NEXT: jne .LBB1_2
39+
; X86-NEXT: # %bb.1:
4840
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
41+
; X86-NEXT: .LBB1_2:
4942
; X86-NEXT: retl
5043
;
5144
; X64-LABEL: cmp_xor_i32_commute:
5245
; X64: # %bb.0:
53-
; X64-NEXT: movl %esi, %eax
54-
; X64-NEXT: xorl %edi, %eax
55-
; X64-NEXT: cmpl %esi, %edi
46+
; X64-NEXT: movl %edi, %eax
47+
; X64-NEXT: xorl %esi, %eax
5648
; X64-NEXT: cmovel %edx, %eax
5749
; X64-NEXT: retq
5850
{

llvm/test/CodeGen/X86/pr32284.ll

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -321,11 +321,9 @@ define void @f2() {
321321
; X64-NEXT: xorl %ecx, %ecx
322322
; X64-NEXT: testl %eax, %eax
323323
; X64-NEXT: sete %cl
324-
; X64-NEXT: movl %eax, %edx
325-
; X64-NEXT: xorl %ecx, %edx
326-
; X64-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
327324
; X64-NEXT: xorl %edx, %edx
328-
; X64-NEXT: cmpl %eax, %ecx
325+
; X64-NEXT: xorl %eax, %ecx
326+
; X64-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
329327
; X64-NEXT: sete %dl
330328
; X64-NEXT: movw %dx, (%rax)
331329
; X64-NEXT: retq
@@ -366,17 +364,15 @@ define void @f2() {
366364
; X86: # %bb.0: # %entry
367365
; X86-NEXT: subl $2, %esp
368366
; X86-NEXT: .cfi_def_cfa_offset 6
369-
; X86-NEXT: movzbl var_7, %ecx
367+
; X86-NEXT: movzbl var_7, %edx
370368
; X86-NEXT: xorl %eax, %eax
371-
; X86-NEXT: testl %ecx, %ecx
369+
; X86-NEXT: testl %edx, %edx
372370
; X86-NEXT: sete %al
373-
; X86-NEXT: movl %ecx, %edx
374-
; X86-NEXT: xorl %eax, %edx
375-
; X86-NEXT: movw %dx, (%esp)
376-
; X86-NEXT: xorl %edx, %edx
377-
; X86-NEXT: cmpl %ecx, %eax
378-
; X86-NEXT: sete %dl
379-
; X86-NEXT: movw %dx, (%eax)
371+
; X86-NEXT: xorl %ecx, %ecx
372+
; X86-NEXT: xorl %edx, %eax
373+
; X86-NEXT: movw %ax, (%esp)
374+
; X86-NEXT: sete %cl
375+
; X86-NEXT: movw %cx, (%eax)
380376
; X86-NEXT: addl $2, %esp
381377
; X86-NEXT: .cfi_def_cfa_offset 4
382378
; X86-NEXT: retl

0 commit comments

Comments
 (0)