Skip to content

Commit 6df10cc

Browse files
committed
Adjust patterns for AMDGPU
1 parent 224f0a3 commit 6df10cc

File tree

1 file changed

+25
-20
lines changed

1 file changed

+25
-20
lines changed

llvm/test/CodeGen/AMDGPU/umin-sub-to-usubo-select-combine.ll

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,16 @@ define i32 @v_underflow_compare_fold_i32(i32 %a, i32 %b) #0 {
2626
; GFX9-LABEL: v_underflow_compare_fold_i32:
2727
; GFX9: ; %bb.0:
2828
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29-
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
30-
; GFX9-NEXT: v_min_u32_e32 v0, v1, v0
29+
; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, v0, v1
30+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
3131
; GFX9-NEXT: s_setpc_b64 s[30:31]
3232
;
3333
; GFX11-LABEL: v_underflow_compare_fold_i32:
3434
; GFX11: ; %bb.0:
3535
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36-
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
36+
; GFX11-NEXT: v_sub_co_u32 v1, vcc_lo, v0, v1
3737
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
38-
; GFX11-NEXT: v_min_u32_e32 v0, v1, v0
38+
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
3939
; GFX11-NEXT: s_setpc_b64 s[30:31]
4040
%sub = sub i32 %a, %b
4141
%cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
@@ -46,16 +46,16 @@ define i32 @v_underflow_compare_fold_i32_commute(i32 %a, i32 %b) #0 {
4646
; GFX9-LABEL: v_underflow_compare_fold_i32_commute:
4747
; GFX9: ; %bb.0:
4848
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49-
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
50-
; GFX9-NEXT: v_min_u32_e32 v0, v0, v1
49+
; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, v0, v1
50+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
5151
; GFX9-NEXT: s_setpc_b64 s[30:31]
5252
;
5353
; GFX11-LABEL: v_underflow_compare_fold_i32_commute:
5454
; GFX11: ; %bb.0:
5555
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56-
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
56+
; GFX11-NEXT: v_sub_co_u32 v1, vcc_lo, v0, v1
5757
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
58-
; GFX11-NEXT: v_min_u32_e32 v0, v0, v1
58+
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
5959
; GFX11-NEXT: s_setpc_b64 s[30:31]
6060
%sub = sub i32 %a, %b
6161
%cond = call i32 @llvm.umin.i32(i32 %a, i32 %sub)
@@ -66,19 +66,20 @@ define i32 @v_underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace
6666
; GFX9-LABEL: v_underflow_compare_fold_i32_multi_use:
6767
; GFX9: ; %bb.0:
6868
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69-
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
70-
; GFX9-NEXT: v_min_u32_e32 v0, v1, v0
71-
; GFX9-NEXT: global_store_dword v[2:3], v1, off
69+
; GFX9-NEXT: v_sub_u32_e32 v4, v0, v1
70+
; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, v0, v1
71+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
72+
; GFX9-NEXT: global_store_dword v[2:3], v4, off
7273
; GFX9-NEXT: s_waitcnt vmcnt(0)
7374
; GFX9-NEXT: s_setpc_b64 s[30:31]
7475
;
7576
; GFX11-LABEL: v_underflow_compare_fold_i32_multi_use:
7677
; GFX11: ; %bb.0:
7778
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78-
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
79-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
80-
; GFX11-NEXT: v_min_u32_e32 v0, v1, v0
81-
; GFX11-NEXT: global_store_b32 v[2:3], v1, off
79+
; GFX11-NEXT: v_sub_nc_u32_e32 v4, v0, v1
80+
; GFX11-NEXT: v_sub_co_u32 v1, vcc_lo, v0, v1
81+
; GFX11-NEXT: global_store_b32 v[2:3], v4, off
82+
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
8283
; GFX11-NEXT: s_setpc_b64 s[30:31]
8384
%sub = sub i32 %a, %b
8485
store i32 %sub, ptr addrspace(1) %ptr
@@ -190,15 +191,19 @@ define amdgpu_ps i16 @s_underflow_compare_fold_i16(i16 inreg %a, i16 inreg %b) #
190191
define amdgpu_ps i32 @s_underflow_compare_fold_i32(i32 inreg %a, i32 inreg %b) #0 {
191192
; GFX9-LABEL: s_underflow_compare_fold_i32:
192193
; GFX9: ; %bb.0:
193-
; GFX9-NEXT: s_sub_i32 s1, s0, s1
194-
; GFX9-NEXT: s_min_u32 s0, s1, s0
194+
; GFX9-NEXT: v_mov_b32_e32 v0, s1
195+
; GFX9-NEXT: v_mov_b32_e32 v1, s0
196+
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s0, v0
197+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
198+
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
195199
; GFX9-NEXT: ; return to shader part epilog
196200
;
197201
; GFX11-LABEL: s_underflow_compare_fold_i32:
198202
; GFX11: ; %bb.0:
199-
; GFX11-NEXT: s_sub_i32 s1, s0, s1
200-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
201-
; GFX11-NEXT: s_min_u32 s0, s1, s0
203+
; GFX11-NEXT: v_sub_co_u32 v0, s1, s0, s1
204+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
205+
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
206+
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
202207
; GFX11-NEXT: ; return to shader part epilog
203208
%sub = sub i32 %a, %b
204209
%cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)

0 commit comments

Comments
 (0)