Skip to content

Commit 5a10d95

Browse files
committed
Do smax transform in separate PR
Signed-off-by: John Lu <[email protected]>
1 parent 1893053 commit 5a10d95

File tree

3 files changed

+12
-19
lines changed

3 files changed

+12
-19
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14945,13 +14945,6 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
1494514945
}
1494614946
}
1494714947

14948-
// max(x, neg(x)) -> abs(x)
14949-
if (Opc == ISD::SMAX && VT == MVT::i32) {
14950-
SDValue Value;
14951-
if (sd_match(N, m_SMax(m_Value(Value), m_Neg(m_Deferred(Value)))))
14952-
return DAG.getNode(ISD::ABS, SDLoc(N), VT, Value);
14953-
}
14954-
1495514948
// min(max(x, K0), K1), K0 < K1 -> med3(x, K0, K1)
1495614949
// max(min(x, K0), K1), K1 < K0 -> med3(x, K1, K0)
1495714950
if (Opc == ISD::SMIN && Op0.getOpcode() == ISD::SMAX && Op0.hasOneUse()) {

llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
1515
}
1616

1717
; GCN-LABEL: name: v_abs_i32
18-
; SI: V_SUB_CO_U32_e32
19-
; GFX900: V_SUB_U32_e32
18+
; SI: V_SUB_CO_U32_e64
19+
; GFX900: V_SUB_U32_e64
2020
; GCN: V_MAX_I32_e64
2121
define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
2222
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -47,8 +47,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
4747
}
4848

4949
; GCN-LABEL: name: v_abs_v2i32
50-
; SI: V_SUB_CO_U32_e32
51-
; GFX900: V_SUB_U32_e32
50+
; SI: V_SUB_CO_U32_e64
51+
; GFX900: V_SUB_U32_e64
5252
; GCN: V_MAX_I32_e64
5353
; GCN: V_MAX_I32_e64
5454
define amdgpu_kernel void @v_abs_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {

llvm/test/CodeGen/AMDGPU/sminmax.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
2121
; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
2222
; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
2323

24-
; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
24+
; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]]
2525

2626
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
2727
; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2
@@ -42,7 +42,7 @@ define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %sr
4242
; GCN-LABEL: {{^}}v_abs_i32_repeat_user:
4343
; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
4444
; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
45-
; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[NEG]], [[SRC]]
45+
; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
4646
; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]]
4747
define amdgpu_kernel void @v_abs_i32_repeat_user(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
4848
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -84,8 +84,8 @@ define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) no
8484
; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]]
8585
; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]]
8686

87-
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
88-
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
87+
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
88+
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
8989

9090
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
9191
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
@@ -156,10 +156,10 @@ define amdgpu_kernel void @s_abs_v4i32(ptr addrspace(1) %out, <4 x i32> %val) no
156156
; GFX9-DAG: v_sub_u32_e32 [[NEG2:v[0-9]+]], 0, [[SRC2:v[0-9]+]]
157157
; GFX9-DAG: v_sub_u32_e32 [[NEG3:v[0-9]+]], 0, [[SRC3:v[0-9]+]]
158158

159-
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
160-
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
161-
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
162-
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
159+
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
160+
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
161+
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]]
162+
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]]
163163

164164
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
165165
; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,

0 commit comments

Comments
 (0)