Skip to content

Conversation

@AlexMaclean
Copy link
Member

Fold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed.

@AlexMaclean AlexMaclean self-assigned this Apr 14, 2025
@llvmbot llvmbot added backend:NVPTX llvm:SelectionDAG SelectionDAGISel as well labels Apr 14, 2025
@llvmbot
Copy link
Member

llvmbot commented Apr 14, 2025

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-llvm-selectiondag

Author: Alex MacLean (AlexMaclean)

Changes

Fold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed.


Full diff: https://github.com/llvm/llvm-project/pull/135645.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+6)
  • (added) llvm/test/CodeGen/NVPTX/and-or-setcc.ll (+45)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8136f1794775e..8eb3f95a30989 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6427,6 +6427,12 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
     }
   }
 
+  if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
+      LHS0.getValueType() == RHS0.getValueType() &&
+      ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
+       (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
+    return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
+
   if (TargetPreference == AndOrSETCCFoldKind::None)
     return SDValue();
 
diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
new file mode 100644
index 0000000000000..21be9df94d553
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i1 @and_ord(float %a, float %b) {
+; CHECK-LABEL: and_ord(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [and_ord_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [and_ord_param_1];
+; CHECK-NEXT:    setp.num.f32 %p1, %f1, %f2;
+; CHECK-NEXT:    selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
+  %c = fcmp ord float %a, 0.0
+  %d = fcmp ord float %b, 0.0
+  %e = and i1 %c, %d
+  ret i1 %e
+}
+
+define i1 @or_uno(float %a, float %b) {
+; CHECK-LABEL: or_uno(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [or_uno_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [or_uno_param_1];
+; CHECK-NEXT:    setp.nan.f32 %p1, %f1, %f2;
+; CHECK-NEXT:    selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
+  %c = fcmp uno float %a, 0.0
+  %d = fcmp uno float %b, 0.0
+  %e = or i1 %c, %d
+  ret i1 %e
+}

@llvmbot
Copy link
Member

llvmbot commented Apr 14, 2025

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

Fold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed.


Full diff: https://github.com/llvm/llvm-project/pull/135645.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+6)
  • (added) llvm/test/CodeGen/NVPTX/and-or-setcc.ll (+45)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8136f1794775e..8eb3f95a30989 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6427,6 +6427,12 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
     }
   }
 
+  if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
+      LHS0.getValueType() == RHS0.getValueType() &&
+      ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
+       (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
+    return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
+
   if (TargetPreference == AndOrSETCCFoldKind::None)
     return SDValue();
 
diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
new file mode 100644
index 0000000000000..21be9df94d553
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i1 @and_ord(float %a, float %b) {
+; CHECK-LABEL: and_ord(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [and_ord_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [and_ord_param_1];
+; CHECK-NEXT:    setp.num.f32 %p1, %f1, %f2;
+; CHECK-NEXT:    selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
+  %c = fcmp ord float %a, 0.0
+  %d = fcmp ord float %b, 0.0
+  %e = and i1 %c, %d
+  ret i1 %e
+}
+
+define i1 @or_uno(float %a, float %b) {
+; CHECK-LABEL: or_uno(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [or_uno_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [or_uno_param_1];
+; CHECK-NEXT:    setp.nan.f32 %p1, %f1, %f2;
+; CHECK-NEXT:    selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
+  %c = fcmp uno float %a, 0.0
+  %d = fcmp uno float %b, 0.0
+  %e = or i1 %c, %d
+  ret i1 %e
+}

@AlexMaclean AlexMaclean force-pushed the dev/amaclean/upstream/and-or-fcmp branch from 84c760a to 238ca4b Compare April 15, 2025 18:49
Copy link
Member

@Artem-B Artem-B left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@AlexMaclean AlexMaclean merged commit 1bfd444 into llvm:main Apr 16, 2025
12 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Apr 16, 2025

LLVM Buildbot has detected a new failure on builder openmp-offload-amdgpu-runtime-2 running on rocm-worker-hw-02 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/10/builds/3573

Here is the relevant piece of the build log for the reference
Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libomp :: tasking/issue-94260-2.c' FAILED ********************
Exit Code: -11

Command Output (stdout):
--
# RUN: at line 1
/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp   -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -fno-omit-frame-pointer -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test/ompt /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic && /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -fno-omit-frame-pointer -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test/ompt /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic
# note: command had no output on stdout or stderr
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11

--

********************


%d = fcmp uno float %b, 0.0
%e = or i1 %c, %d
ret i1 %e
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should also do vector tests

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies, looks like I committed this change prematurely. I've addressed this comment in a follow up here #136168. Please take a look when you have a minute.

; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}

target triple = "nvptx64-nvidia-cuda"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Redundant with the mtriple

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also addressed in #136168

AlexMaclean added a commit that referenced this pull request Apr 18, 2025
IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants