From 88249c6c66bf4ef60b38e06834bd49cb9c7b9aa1 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Fri, 11 Apr 2025 19:08:50 +0000 Subject: [PATCH 1/2] pre-commit tests --- llvm/test/CodeGen/NVPTX/and-or-setcc.ll | 49 ++++++++++++++++++ llvm/test/CodeGen/X86/and-or-setcc.ll | 69 +++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 llvm/test/CodeGen/NVPTX/and-or-setcc.ll create mode 100644 llvm/test/CodeGen/X86/and-or-setcc.ll diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll new file mode 100644 index 0000000000000..494c823ea2110 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %} + +target triple = "nvptx64-nvidia-cuda" + +define i1 @and_ord(float %a, float %b) { +; CHECK-LABEL: and_ord( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<4>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [and_ord_param_0]; +; CHECK-NEXT: setp.num.f32 %p1, %f1, %f1; +; CHECK-NEXT: ld.param.f32 %f2, [and_ord_param_1]; +; CHECK-NEXT: setp.num.f32 %p2, %f2, %f2; +; CHECK-NEXT: and.pred %p3, %p1, %p2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %c = fcmp ord float %a, 0.0 + %d = fcmp ord float %b, 0.0 + %e = and i1 %c, %d + ret i1 %e +} + +define i1 @or_uno(float %a, float %b) { +; CHECK-LABEL: or_uno( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<4>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [or_uno_param_0]; +; CHECK-NEXT: setp.nan.f32 %p1, %f1, %f1; +; CHECK-NEXT: ld.param.f32 %f2, [or_uno_param_1]; +; CHECK-NEXT: setp.nan.f32 %p2, %f2, %f2; +; CHECK-NEXT: or.pred %p3, %p1, %p2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %c = fcmp uno float %a, 0.0 + %d = fcmp uno float %b, 0.0 + %e = or i1 %c, %d + ret i1 %e +} diff --git a/llvm/test/CodeGen/X86/and-or-setcc.ll b/llvm/test/CodeGen/X86/and-or-setcc.ll new file mode 100644 index 0000000000000..d4e663f605263 --- /dev/null +++ b/llvm/test/CodeGen/X86/and-or-setcc.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 + +define i1 @and_ord(float %a, float %b) { +; X86-LABEL: and_ord: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setnp %cl +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setnp %al +; X86-NEXT: andb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: and_ord: +; X64: # %bb.0: +; X64-NEXT: xorps %xmm2, %xmm2 +; X64-NEXT: cmpordps %xmm2, %xmm1 +; X64-NEXT: cmpordps %xmm2, %xmm0 +; X64-NEXT: andps %xmm1, %xmm0 +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + %c = fcmp ord float %a, 0.0 + %d = fcmp ord float %b, 0.0 + %e = and i1 %c, %d + ret i1 %e +} + +define i1 @or_uno(float %a, float %b) { +; X86-LABEL: or_uno: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %cl +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: or_uno: +; X64: # %bb.0: +; X64-NEXT: xorps %xmm2, %xmm2 +; X64-NEXT: cmpunordps %xmm2, %xmm1 +; X64-NEXT: cmpunordps %xmm2, %xmm0 +; X64-NEXT: orps %xmm1, %xmm0 +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + %c = fcmp uno float %a, 0.0 + %d = fcmp uno float %b, 0.0 + %e = or i1 %c, %d + ret i1 %e +} From 238ca4b8fcb929a79aee6ad477e37335eb3676c5 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Fri, 11 Apr 2025 19:12:10 +0000 Subject: [PATCH 2/2] [DAGCombiner] Fold and/or of NaN SETCC --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++++ llvm/test/CodeGen/NVPTX/and-or-setcc.ll | 16 ++++++---------- llvm/test/CodeGen/X86/and-or-setcc.ll | 16 ++-------------- 3 files changed, 14 insertions(+), 24 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8136f1794775e..8eb3f95a30989 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6427,6 +6427,12 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) { } } + if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR && + LHS0.getValueType() == RHS0.getValueType() && + ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) || + (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO))) + return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL); + if (TargetPreference == AndOrSETCCFoldKind::None) return SDValue(); diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll index 494c823ea2110..21be9df94d553 100644 --- a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll +++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll @@ -7,17 +7,15 @@ target triple = "nvptx64-nvidia-cuda" define i1 @and_ord(float %a, float %b) { ; CHECK-LABEL: and_ord( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<4>; +; CHECK-NEXT: .reg .pred %p<2>; ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .f32 %f<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [and_ord_param_0]; -; CHECK-NEXT: setp.num.f32 %p1, %f1, %f1; ; CHECK-NEXT: ld.param.f32 %f2, [and_ord_param_1]; -; CHECK-NEXT: setp.num.f32 %p2, %f2, %f2; -; CHECK-NEXT: and.pred %p3, %p1, %p2; -; CHECK-NEXT: selp.b32 %r1, 1, 0, %p3; +; CHECK-NEXT: setp.num.f32 %p1, %f1, %f2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %c = fcmp ord float %a, 0.0 @@ -29,17 +27,15 @@ define i1 @and_ord(float %a, float %b) { define i1 @or_uno(float %a, float %b) { ; CHECK-LABEL: or_uno( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<4>; +; CHECK-NEXT: .reg .pred %p<2>; ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .f32 %f<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [or_uno_param_0]; -; CHECK-NEXT: setp.nan.f32 %p1, %f1, %f1; ; CHECK-NEXT: ld.param.f32 %f2, [or_uno_param_1]; -; CHECK-NEXT: setp.nan.f32 %p2, %f2, %f2; -; CHECK-NEXT: or.pred %p3, %p1, %p2; -; CHECK-NEXT: selp.b32 %r1, 1, 0, %p3; +; CHECK-NEXT: setp.nan.f32 %p1, %f1, %f2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %c = fcmp uno float %a, 0.0 diff --git a/llvm/test/CodeGen/X86/and-or-setcc.ll b/llvm/test/CodeGen/X86/and-or-setcc.ll index d4e663f605263..cb8ecca9348e6 100644 --- a/llvm/test/CodeGen/X86/and-or-setcc.ll +++ b/llvm/test/CodeGen/X86/and-or-setcc.ll @@ -7,17 +7,11 @@ define i1 @and_ord(float %a, float %b) { ; X86: # %bb.0: ; X86-NEXT: flds {{[0-9]+}}(%esp) ; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: fucomp %st(0) -; X86-NEXT: fnstsw %ax -; X86-NEXT: # kill: def $ah killed $ah killed $ax -; X86-NEXT: sahf -; X86-NEXT: setnp %cl -; X86-NEXT: fucomp %st(0) +; X86-NEXT: fucompp ; X86-NEXT: fnstsw %ax ; X86-NEXT: # kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf ; X86-NEXT: setnp %al -; X86-NEXT: andb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: and_ord: @@ -40,17 +34,11 @@ define i1 @or_uno(float %a, float %b) { ; X86: # %bb.0: ; X86-NEXT: flds {{[0-9]+}}(%esp) ; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: fucomp %st(0) -; X86-NEXT: fnstsw %ax -; X86-NEXT: # kill: def $ah killed $ah killed $ax -; X86-NEXT: sahf -; X86-NEXT: setp %cl -; X86-NEXT: fucomp %st(0) +; X86-NEXT: fucompp ; X86-NEXT: fnstsw %ax ; X86-NEXT: # kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf ; X86-NEXT: setp %al -; X86-NEXT: orb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: or_uno: