diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8f904209d8a3a..e3931e4790b5e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23157,10 +23157,17 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, return Add.getValue(1); } - // Use SUB instead of CMP to enable CSE between SUB and CMP. + // If we already have an XOR of the ops, use that to check for equality. + // Else use SUB instead of CMP to enable CSE between SUB and CMP. + unsigned X86Opc = X86ISD::SUB; + if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && + (DAG.doesNodeExist(ISD::XOR, DAG.getVTList({CmpVT}), {Op0, Op1}) || + DAG.doesNodeExist(ISD::XOR, DAG.getVTList({CmpVT}), {Op1, Op0}))) + X86Opc = X86ISD::XOR; + SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32); - SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1); - return Sub.getValue(1); + SDValue CmpOp = DAG.getNode(X86Opc, dl, VTs, Op0, Op1); + return CmpOp.getValue(1); } bool X86TargetLowering::isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, diff --git a/llvm/test/CodeGen/X86/cmp-xor.ll b/llvm/test/CodeGen/X86/cmp-xor.ll index 6be5cea383032..a8a7da4871ba9 100644 --- a/llvm/test/CodeGen/X86/cmp-xor.ll +++ b/llvm/test/CodeGen/X86/cmp-xor.ll @@ -9,22 +9,18 @@ define i32 @cmp_xor_i32(i32 %a, i32 %b, i32 %c) ; X86-LABEL: cmp_xor_i32: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: je .LBB0_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: retl -; X86-NEXT: .LBB0_1: +; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NEXT: jne .LBB0_2 +; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: .LBB0_2: ; X86-NEXT: retl ; ; X64-LABEL: cmp_xor_i32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: xorl %esi, %eax -; X64-NEXT: cmpl %esi, %edi ; X64-NEXT: cmovel %edx, %eax ; X64-NEXT: retq { @@ -37,22 +33,18 @@ define i32 @cmp_xor_i32(i32 %a, i32 %b, i32 %c) define i32 @cmp_xor_i32_commute(i32 %a, i32 %b, i32 %c) ; X86-LABEL: cmp_xor_i32_commute: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: je .LBB1_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: retl -; X86-NEXT: .LBB1_1: +; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NEXT: jne .LBB1_2 +; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: .LBB1_2: ; X86-NEXT: retl ; ; X64-LABEL: cmp_xor_i32_commute: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: xorl %edi, %eax -; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %eax ; X64-NEXT: cmovel %edx, %eax ; X64-NEXT: retq { diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll index 90fb76a23e9a5..8a726a469b61e 100644 --- a/llvm/test/CodeGen/X86/pr32284.ll +++ b/llvm/test/CodeGen/X86/pr32284.ll @@ -321,11 +321,9 @@ define void @f2() { ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: testl %eax, %eax ; X64-NEXT: sete %cl -; X64-NEXT: movl %eax, %edx -; X64-NEXT: xorl %ecx, %edx -; X64-NEXT: movw %dx, -{{[0-9]+}}(%rsp) ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpl %eax, %ecx +; X64-NEXT: xorl %eax, %ecx +; X64-NEXT: movw %cx, -{{[0-9]+}}(%rsp) ; X64-NEXT: sete %dl ; X64-NEXT: movw %dx, (%rax) ; X64-NEXT: retq @@ -366,17 +364,15 @@ define void @f2() { ; X86: # %bb.0: # %entry ; X86-NEXT: subl $2, %esp ; X86-NEXT: .cfi_def_cfa_offset 6 -; X86-NEXT: movzbl var_7, %ecx +; X86-NEXT: movzbl var_7, %edx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: testl %edx, %edx ; X86-NEXT: sete %al -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: xorl %eax, %edx -; X86-NEXT: movw %dx, (%esp) -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: sete %dl -; X86-NEXT: movw %dx, (%eax) +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: movw %ax, (%esp) +; X86-NEXT: sete %cl +; X86-NEXT: movw %cx, (%eax) ; X86-NEXT: addl $2, %esp ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl