diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8136f1794775e..8eb3f95a30989 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6427,6 +6427,12 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) { } } + if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR && + LHS0.getValueType() == RHS0.getValueType() && + ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) || + (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO))) + return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL); + if (TargetPreference == AndOrSETCCFoldKind::None) return SDValue(); diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll new file mode 100644 index 0000000000000..21be9df94d553 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %} + +target triple = "nvptx64-nvidia-cuda" + +define i1 @and_ord(float %a, float %b) { +; CHECK-LABEL: and_ord( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [and_ord_param_0]; +; CHECK-NEXT: ld.param.f32 %f2, [and_ord_param_1]; +; CHECK-NEXT: setp.num.f32 %p1, %f1, %f2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %c = fcmp ord float %a, 0.0 + %d = fcmp ord float %b, 0.0 + %e = and i1 %c, %d + ret i1 %e +} + +define i1 @or_uno(float %a, float %b) { +; CHECK-LABEL: or_uno( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [or_uno_param_0]; +; CHECK-NEXT: ld.param.f32 %f2, [or_uno_param_1]; +; CHECK-NEXT: setp.nan.f32 %p1, %f1, %f2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %c = fcmp uno float %a, 0.0 + %d = fcmp uno float %b, 0.0 + %e = or i1 %c, %d + ret i1 %e +} diff --git a/llvm/test/CodeGen/X86/and-or-setcc.ll b/llvm/test/CodeGen/X86/and-or-setcc.ll new file mode 100644 index 0000000000000..cb8ecca9348e6 --- /dev/null +++ b/llvm/test/CodeGen/X86/and-or-setcc.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 + +define i1 @and_ord(float %a, float %b) { +; X86-LABEL: and_ord: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucompp +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setnp %al +; X86-NEXT: retl +; +; X64-LABEL: and_ord: +; X64: # %bb.0: +; X64-NEXT: xorps %xmm2, %xmm2 +; X64-NEXT: cmpordps %xmm2, %xmm1 +; X64-NEXT: cmpordps %xmm2, %xmm0 +; X64-NEXT: andps %xmm1, %xmm0 +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + %c = fcmp ord float %a, 0.0 + %d = fcmp ord float %b, 0.0 + %e = and i1 %c, %d + ret i1 %e +} + +define i1 @or_uno(float %a, float %b) { +; X86-LABEL: or_uno: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucompp +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %al +; X86-NEXT: retl +; +; X64-LABEL: or_uno: +; X64: # %bb.0: +; X64-NEXT: xorps %xmm2, %xmm2 +; X64-NEXT: cmpunordps %xmm2, %xmm1 +; X64-NEXT: cmpunordps %xmm2, %xmm0 +; X64-NEXT: orps %xmm1, %xmm0 +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + %c = fcmp uno float %a, 0.0 + %d = fcmp uno float %b, 0.0 + %e = or i1 %c, %d + ret i1 %e +}