[DAGCombiner] Fold (smax x (neg x)) to (abs x) #164322

LU-JOHN · 2025-10-20T21:14:13Z

Fold (smax x (neg x)) to (abs x).

Signed-off-by: John Lu <[email protected]>

llvmbot · 2025-10-20T21:14:57Z

@llvm/pr-subscribers-backend-amdgpu

Author: None (LU-JOHN)

Changes

Fold (smax x (neg x)) to (abs x).

Full diff: https://github.com/llvm/llvm-project/pull/164322.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+8)
(modified) llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll (+4-4)
(modified) llvm/test/CodeGen/AMDGPU/sminmax.ll (+8-8)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6bf9008c3d677..5f5cf34057069 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6181,6 +6181,14 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
     if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
       return S;
 
+  // Fold max(x, neg(x)) -> abs(x)
+  if (Opcode == ISD::SMAX &&
+      TLI.isOperationLegal(ISD::ABS, VT)) {
+    SDValue Value;
+    if (sd_match(N, m_SMax(m_Value(Value), m_Neg(m_Deferred(Value)))))
+      return DAG.getNode(ISD::ABS, DL, VT, Value);
+  }
+
   // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
   auto ReductionOpcode = [](unsigned Opcode) {
     switch (Opcode) {
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
index 68ae9854bd7d2..f72c164d6ff80 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
@@ -15,8 +15,8 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
 }
 
 ; GCN-LABEL: name: v_abs_i32
-; SI:  V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI:  V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
 ; GCN: V_MAX_I32_e64
 define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -47,8 +47,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
 }
 
 ; GCN-LABEL: name: v_abs_v2i32
-; SI:  V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI:  V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
 ; GCN: V_MAX_I32_e64
 ; GCN: V_MAX_I32_e64
 define amdgpu_kernel void @v_abs_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.ll b/llvm/test/CodeGen/AMDGPU/sminmax.ll
index dbcb4b75e7818..002efac8039d5 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.ll
@@ -21,7 +21,7 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
 ; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
 ; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
 
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
 ; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2
@@ -42,7 +42,7 @@ define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %sr
 ; GCN-LABEL: {{^}}v_abs_i32_repeat_user:
 ; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
 ; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
-; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[NEG]], [[SRC]]
 ; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]]
 define amdgpu_kernel void @v_abs_i32_repeat_user(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -84,8 +84,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
 ; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]]
 ; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]]
 
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
@@ -156,10 +156,10 @@ define amdgpu_kernel void @s_abs_v4i32(ptr addrspace(1) %out, <4 x i32> %val) no
 ; GFX9-DAG: v_sub_u32_e32 [[NEG2:v[0-9]+]], 0, [[SRC2:v[0-9]+]]
 ; GFX9-DAG: v_sub_u32_e32 [[NEG3:v[0-9]+]], 0, [[SRC3:v[0-9]+]]
 
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,

llvmbot · 2025-10-20T21:14:58Z

@llvm/pr-subscribers-llvm-selectiondag

Author: None (LU-JOHN)

Changes

Fold (smax x (neg x)) to (abs x).

Full diff: https://github.com/llvm/llvm-project/pull/164322.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+8)
(modified) llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll (+4-4)
(modified) llvm/test/CodeGen/AMDGPU/sminmax.ll (+8-8)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6bf9008c3d677..5f5cf34057069 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6181,6 +6181,14 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
     if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
       return S;
 
+  // Fold max(x, neg(x)) -> abs(x)
+  if (Opcode == ISD::SMAX &&
+      TLI.isOperationLegal(ISD::ABS, VT)) {
+    SDValue Value;
+    if (sd_match(N, m_SMax(m_Value(Value), m_Neg(m_Deferred(Value)))))
+      return DAG.getNode(ISD::ABS, DL, VT, Value);
+  }
+
   // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
   auto ReductionOpcode = [](unsigned Opcode) {
     switch (Opcode) {
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
index 68ae9854bd7d2..f72c164d6ff80 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
@@ -15,8 +15,8 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
 }
 
 ; GCN-LABEL: name: v_abs_i32
-; SI:  V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI:  V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
 ; GCN: V_MAX_I32_e64
 define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -47,8 +47,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
 }
 
 ; GCN-LABEL: name: v_abs_v2i32
-; SI:  V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI:  V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
 ; GCN: V_MAX_I32_e64
 ; GCN: V_MAX_I32_e64
 define amdgpu_kernel void @v_abs_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.ll b/llvm/test/CodeGen/AMDGPU/sminmax.ll
index dbcb4b75e7818..002efac8039d5 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.ll
@@ -21,7 +21,7 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
 ; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
 ; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
 
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
 ; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2
@@ -42,7 +42,7 @@ define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %sr
 ; GCN-LABEL: {{^}}v_abs_i32_repeat_user:
 ; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
 ; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
-; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[NEG]], [[SRC]]
 ; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]]
 define amdgpu_kernel void @v_abs_i32_repeat_user(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -84,8 +84,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
 ; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]]
 ; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]]
 
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
@@ -156,10 +156,10 @@ define amdgpu_kernel void @s_abs_v4i32(ptr addrspace(1) %out, <4 x i32> %val) no
 ; GFX9-DAG: v_sub_u32_e32 [[NEG2:v[0-9]+]], 0, [[SRC2:v[0-9]+]]
 ; GFX9-DAG: v_sub_u32_e32 [[NEG3:v[0-9]+]], 0, [[SRC3:v[0-9]+]]
 
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
 
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,

github-actions · 2025-10-20T21:16:09Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Signed-off-by: John Lu <[email protected]>

arsenm · 2025-10-21T02:58:29Z

llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll

 ; GCN-LABEL: name: v_abs_i32
-; SI:  V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI:  V_SUB_CO_U32_e32


This is an encoding shrink which is good, but why do none of these changes show a new abs use?

nikic

It looks like all the changed tests use non-canonical IR -- the middle-end already folds this. It would be good to demonstrate that this fold is useful in DAGCombine due to an interaction with legalization.

Fold (smax x (neg x)) to (abs x)

01f7949

Signed-off-by: John Lu <[email protected]>

llvmbot added backend:AMDGPU llvm:SelectionDAG SelectionDAGISel as well labels Oct 20, 2025

Apply clang-format

a534443

Signed-off-by: John Lu <[email protected]>

arsenm reviewed Oct 21, 2025

View reviewed changes

nikic reviewed Oct 21, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[DAGCombiner] Fold (smax x (neg x)) to (abs x) #164322

[DAGCombiner] Fold (smax x (neg x)) to (abs x) #164322

Uh oh!

LU-JOHN commented Oct 20, 2025

Uh oh!

llvmbot commented Oct 20, 2025

Uh oh!

llvmbot commented Oct 20, 2025

Uh oh!

github-actions bot commented Oct 20, 2025 •

edited

Loading

Uh oh!

arsenm Oct 21, 2025

Uh oh!

nikic left a comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[DAGCombiner] Fold (smax x (neg x)) to (abs x) #164322

Are you sure you want to change the base?

[DAGCombiner] Fold (smax x (neg x)) to (abs x) #164322

Uh oh!

Conversation

LU-JOHN commented Oct 20, 2025

Uh oh!

llvmbot commented Oct 20, 2025

Uh oh!

llvmbot commented Oct 20, 2025

Uh oh!

github-actions bot commented Oct 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm Oct 21, 2025

Choose a reason for hiding this comment

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

github-actions bot commented Oct 20, 2025 •

edited

Loading