Skip to content

Conversation

@LU-JOHN
Copy link
Contributor

@LU-JOHN LU-JOHN commented Oct 20, 2025

Fold (smax x (neg x)) to (abs x).

@llvmbot llvmbot added backend:AMDGPU llvm:SelectionDAG SelectionDAGISel as well labels Oct 20, 2025
@llvmbot
Copy link
Member

llvmbot commented Oct 20, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: None (LU-JOHN)

Changes

Fold (smax x (neg x)) to (abs x).


Full diff: https://github.com/llvm/llvm-project/pull/164322.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+8)
  • (modified) llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll (+4-4)
  • (modified) llvm/test/CodeGen/AMDGPU/sminmax.ll (+8-8)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6bf9008c3d677..5f5cf34057069 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6181,6 +6181,14 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
     if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
       return S;
 
+  // Fold max(x, neg(x)) -> abs(x)
+  if (Opcode == ISD::SMAX &&
+      TLI.isOperationLegal(ISD::ABS, VT)) {
+    SDValue Value;
+    if (sd_match(N, m_SMax(m_Value(Value), m_Neg(m_Deferred(Value)))))
+      return DAG.getNode(ISD::ABS, DL, VT, Value);
+  }
+
   // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
   auto ReductionOpcode = [](unsigned Opcode) {
     switch (Opcode) {
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
index 68ae9854bd7d2..f72c164d6ff80 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
@@ -15,8 +15,8 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
 }
 
 ; GCN-LABEL: name: v_abs_i32
-; SI:  V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI:  V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
 ; GCN: V_MAX_I32_e64
 define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -47,8 +47,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
 }
 
 ; GCN-LABEL: name: v_abs_v2i32
-; SI:  V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI:  V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
 ; GCN: V_MAX_I32_e64
 ; GCN: V_MAX_I32_e64
 define amdgpu_kernel void @v_abs_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.ll b/llvm/test/CodeGen/AMDGPU/sminmax.ll
index dbcb4b75e7818..002efac8039d5 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.ll
@@ -21,7 +21,7 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
 ; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
 ; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
 
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
 ; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2
@@ -42,7 +42,7 @@ define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %sr
 ; GCN-LABEL: {{^}}v_abs_i32_repeat_user:
 ; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
 ; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
-; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[NEG]], [[SRC]]
 ; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]]
 define amdgpu_kernel void @v_abs_i32_repeat_user(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -84,8 +84,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
 ; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]]
 ; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]]
 
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
@@ -156,10 +156,10 @@ define amdgpu_kernel void @s_abs_v4i32(ptr addrspace(1) %out, <4 x i32> %val) no
 ; GFX9-DAG: v_sub_u32_e32 [[NEG2:v[0-9]+]], 0, [[SRC2:v[0-9]+]]
 ; GFX9-DAG: v_sub_u32_e32 [[NEG3:v[0-9]+]], 0, [[SRC3:v[0-9]+]]
 
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,

@llvmbot
Copy link
Member

llvmbot commented Oct 20, 2025

@llvm/pr-subscribers-llvm-selectiondag

Author: None (LU-JOHN)

Changes

Fold (smax x (neg x)) to (abs x).


Full diff: https://github.com/llvm/llvm-project/pull/164322.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+8)
  • (modified) llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll (+4-4)
  • (modified) llvm/test/CodeGen/AMDGPU/sminmax.ll (+8-8)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6bf9008c3d677..5f5cf34057069 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6181,6 +6181,14 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
     if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
       return S;
 
+  // Fold max(x, neg(x)) -> abs(x)
+  if (Opcode == ISD::SMAX &&
+      TLI.isOperationLegal(ISD::ABS, VT)) {
+    SDValue Value;
+    if (sd_match(N, m_SMax(m_Value(Value), m_Neg(m_Deferred(Value)))))
+      return DAG.getNode(ISD::ABS, DL, VT, Value);
+  }
+
   // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
   auto ReductionOpcode = [](unsigned Opcode) {
     switch (Opcode) {
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
index 68ae9854bd7d2..f72c164d6ff80 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
@@ -15,8 +15,8 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
 }
 
 ; GCN-LABEL: name: v_abs_i32
-; SI:  V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI:  V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
 ; GCN: V_MAX_I32_e64
 define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -47,8 +47,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
 }
 
 ; GCN-LABEL: name: v_abs_v2i32
-; SI:  V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI:  V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
 ; GCN: V_MAX_I32_e64
 ; GCN: V_MAX_I32_e64
 define amdgpu_kernel void @v_abs_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.ll b/llvm/test/CodeGen/AMDGPU/sminmax.ll
index dbcb4b75e7818..002efac8039d5 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.ll
@@ -21,7 +21,7 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
 ; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
 ; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
 
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
 ; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2
@@ -42,7 +42,7 @@ define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %sr
 ; GCN-LABEL: {{^}}v_abs_i32_repeat_user:
 ; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
 ; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
-; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[NEG]], [[SRC]]
 ; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]]
 define amdgpu_kernel void @v_abs_i32_repeat_user(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -84,8 +84,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
 ; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]]
 ; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]]
 
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
@@ -156,10 +156,10 @@ define amdgpu_kernel void @s_abs_v4i32(ptr addrspace(1) %out, <4 x i32> %val) no
 ; GFX9-DAG: v_sub_u32_e32 [[NEG2:v[0-9]+]], 0, [[SRC2:v[0-9]+]]
 ; GFX9-DAG: v_sub_u32_e32 [[NEG3:v[0-9]+]], 0, [[SRC3:v[0-9]+]]
 
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,

@github-actions
Copy link

github-actions bot commented Oct 20, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

Signed-off-by: John Lu <[email protected]>
; GCN-LABEL: name: v_abs_i32
; SI: V_SUB_CO_U32_e64
; GFX900: V_SUB_U32_e64
; SI: V_SUB_CO_U32_e32
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an encoding shrink which is good, but why do none of these changes show a new abs use?

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like all the changed tests use non-canonical IR -- the middle-end already folds this. It would be good to demonstrate that this fold is useful in DAGCombine due to an interaction with legalization.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU llvm:SelectionDAG SelectionDAGISel as well

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants