-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[DAGCombiner] Fold (smax x (neg x)) to (abs x) #164322
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Signed-off-by: John Lu <[email protected]>
|
@llvm/pr-subscribers-backend-amdgpu Author: None (LU-JOHN) ChangesFold (smax x (neg x)) to (abs x). Full diff: https://github.com/llvm/llvm-project/pull/164322.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6bf9008c3d677..5f5cf34057069 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6181,6 +6181,14 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
return S;
+ // Fold max(x, neg(x)) -> abs(x)
+ if (Opcode == ISD::SMAX &&
+ TLI.isOperationLegal(ISD::ABS, VT)) {
+ SDValue Value;
+ if (sd_match(N, m_SMax(m_Value(Value), m_Neg(m_Deferred(Value)))))
+ return DAG.getNode(ISD::ABS, DL, VT, Value);
+ }
+
// Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
auto ReductionOpcode = [](unsigned Opcode) {
switch (Opcode) {
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
index 68ae9854bd7d2..f72c164d6ff80 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
@@ -15,8 +15,8 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
}
; GCN-LABEL: name: v_abs_i32
-; SI: V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI: V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
; GCN: V_MAX_I32_e64
define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -47,8 +47,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
}
; GCN-LABEL: name: v_abs_v2i32
-; SI: V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI: V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
; GCN: V_MAX_I32_e64
; GCN: V_MAX_I32_e64
define amdgpu_kernel void @v_abs_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.ll b/llvm/test/CodeGen/AMDGPU/sminmax.ll
index dbcb4b75e7818..002efac8039d5 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.ll
@@ -21,7 +21,7 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2
@@ -42,7 +42,7 @@ define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %sr
; GCN-LABEL: {{^}}v_abs_i32_repeat_user:
; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
-; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[NEG]], [[SRC]]
; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]]
define amdgpu_kernel void @v_abs_i32_repeat_user(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -84,8 +84,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]]
; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
@@ -156,10 +156,10 @@ define amdgpu_kernel void @s_abs_v4i32(ptr addrspace(1) %out, <4 x i32> %val) no
; GFX9-DAG: v_sub_u32_e32 [[NEG2:v[0-9]+]], 0, [[SRC2:v[0-9]+]]
; GFX9-DAG: v_sub_u32_e32 [[NEG3:v[0-9]+]], 0, [[SRC3:v[0-9]+]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
|
|
@llvm/pr-subscribers-llvm-selectiondag Author: None (LU-JOHN) ChangesFold (smax x (neg x)) to (abs x). Full diff: https://github.com/llvm/llvm-project/pull/164322.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6bf9008c3d677..5f5cf34057069 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6181,6 +6181,14 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
return S;
+ // Fold max(x, neg(x)) -> abs(x)
+ if (Opcode == ISD::SMAX &&
+ TLI.isOperationLegal(ISD::ABS, VT)) {
+ SDValue Value;
+ if (sd_match(N, m_SMax(m_Value(Value), m_Neg(m_Deferred(Value)))))
+ return DAG.getNode(ISD::ABS, DL, VT, Value);
+ }
+
// Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
auto ReductionOpcode = [](unsigned Opcode) {
switch (Opcode) {
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
index 68ae9854bd7d2..f72c164d6ff80 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
@@ -15,8 +15,8 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
}
; GCN-LABEL: name: v_abs_i32
-; SI: V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI: V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
; GCN: V_MAX_I32_e64
define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -47,8 +47,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
}
; GCN-LABEL: name: v_abs_v2i32
-; SI: V_SUB_CO_U32_e64
-; GFX900: V_SUB_U32_e64
+; SI: V_SUB_CO_U32_e32
+; GFX900: V_SUB_U32_e32
; GCN: V_MAX_I32_e64
; GCN: V_MAX_I32_e64
define amdgpu_kernel void @v_abs_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.ll b/llvm/test/CodeGen/AMDGPU/sminmax.ll
index dbcb4b75e7818..002efac8039d5 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.ll
@@ -21,7 +21,7 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2
@@ -42,7 +42,7 @@ define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %sr
; GCN-LABEL: {{^}}v_abs_i32_repeat_user:
; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
-; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
+; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[NEG]], [[SRC]]
; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]]
define amdgpu_kernel void @v_abs_i32_repeat_user(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -84,8 +84,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]]
; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
@@ -156,10 +156,10 @@ define amdgpu_kernel void @s_abs_v4i32(ptr addrspace(1) %out, <4 x i32> %val) no
; GFX9-DAG: v_sub_u32_e32 [[NEG2:v[0-9]+]], 0, [[SRC2:v[0-9]+]]
; GFX9-DAG: v_sub_u32_e32 [[NEG3:v[0-9]+]], 0, [[SRC3:v[0-9]+]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Signed-off-by: John Lu <[email protected]>
| ; GCN-LABEL: name: v_abs_i32 | ||
| ; SI: V_SUB_CO_U32_e64 | ||
| ; GFX900: V_SUB_U32_e64 | ||
| ; SI: V_SUB_CO_U32_e32 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is an encoding shrink which is good, but why do none of these changes show a new abs use?
nikic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like all the changed tests use non-canonical IR -- the middle-end already folds this. It would be good to demonstrate that this fold is useful in DAGCombine due to an interaction with legalization.
Fold (smax x (neg x)) to (abs x).