Skip to content

Commit 1195022

Browse files
committed
Calc IsVALU correctly during UADDO/USUBO selection
Signed-off-by: John Lu <[email protected]>
1 parent f98e651 commit 1195022

File tree

11 files changed

+838
-760
lines changed

11 files changed

+838
-760
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,8 +1111,7 @@ void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
11111111
{N->getOperand(0), N->getOperand(1),
11121112
CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
11131113
} else {
1114-
unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
1115-
: AMDGPU::S_USUBO_PSEUDO;
1114+
unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;
11161115

11171116
CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
11181117
{N->getOperand(0), N->getOperand(1)});

llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll

Lines changed: 56 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -8760,8 +8760,9 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
87608760
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
87618761
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
87628762
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8763-
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8763+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
87648764
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8765+
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
87658766
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
87668767
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
87678768
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8780,19 +8781,20 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
87808781
; GFX90A-NEXT: s_cbranch_execz .LBB113_6
87818782
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
87828783
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
8783-
; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v4, vcc
8784-
; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
8785-
; GFX90A-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4
8784+
; GFX90A-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc
8785+
; GFX90A-NEXT: buffer_load_dword v0, v4, s[0:3], 0 offen
8786+
; GFX90A-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen offset:4
87868787
; GFX90A-NEXT: s_waitcnt vmcnt(1)
8787-
; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v1, v6
8788+
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
87888789
; GFX90A-NEXT: s_waitcnt vmcnt(0)
8789-
; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v7, vcc
8790-
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
8791-
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
8792-
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
8793-
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
8794-
; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
8795-
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
8790+
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8791+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
8792+
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
8793+
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
8794+
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
8795+
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8796+
; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen offset:4
8797+
; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
87968798
; GFX90A-NEXT: .LBB113_6: ; %atomicrmw.phi
87978799
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
87988800
; GFX90A-NEXT: ;;#ASMSTART
@@ -8826,9 +8828,10 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
88268828
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
88278829
; GFX950-NEXT: s_nop 1
88288830
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8831+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
88298832
; GFX950-NEXT: s_nop 1
8830-
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
88318833
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8834+
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
88328835
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0
88338836
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
88348837
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8854,11 +8857,11 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
88548857
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
88558858
; GFX950-NEXT: s_nop 1
88568859
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8860+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
88578861
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
8858-
; GFX950-NEXT: s_nop 0
8862+
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
88598863
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
88608864
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8861-
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
88628865
; GFX950-NEXT: scratch_store_dwordx2 v4, v[2:3], off
88638866
; GFX950-NEXT: .LBB113_6: ; %atomicrmw.phi
88648867
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
@@ -8898,8 +8901,9 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
88988901
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
88998902
; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
89008903
; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
8901-
; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
8904+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
89028905
; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
8906+
; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
89038907
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
89048908
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89058909
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
@@ -8915,17 +8919,18 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89158919
; GFX90A-NEXT: s_cbranch_execz .LBB114_6
89168920
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
89178921
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
8918-
; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
8919-
; GFX90A-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen
8920-
; GFX90A-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:4
8922+
; GFX90A-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc
8923+
; GFX90A-NEXT: buffer_load_dword v4, v6, s[0:3], 0 offen
8924+
; GFX90A-NEXT: buffer_load_dword v5, v6, s[0:3], 0 offen offset:4
89218925
; GFX90A-NEXT: s_waitcnt vmcnt(1)
8922-
; GFX90A-NEXT: v_sub_co_u32_e32 v1, vcc, v4, v2
8926+
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v4, v2
89238927
; GFX90A-NEXT: s_waitcnt vmcnt(0)
8924-
; GFX90A-NEXT: v_subb_co_u32_e32 v2, vcc, v5, v3, vcc
8928+
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v3, vcc
8929+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[4:5]
8930+
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
89258931
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8926-
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8927-
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
8928-
; GFX90A-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
8932+
; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen
8933+
; GFX90A-NEXT: buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
89298934
; GFX90A-NEXT: .LBB114_6: ; %atomicrmw.phi
89308935
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
89318936
; GFX90A-NEXT: ;;#ASMSTART
@@ -8958,9 +8963,10 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89588963
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
89598964
; GFX950-NEXT: s_nop 1
89608965
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
8966+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
89618967
; GFX950-NEXT: s_nop 1
8962-
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
89638968
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
8969+
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
89648970
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
89658971
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89668972
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -8983,6 +8989,7 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89838989
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
89848990
; GFX950-NEXT: s_nop 1
89858991
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
8992+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
89868993
; GFX950-NEXT: s_nop 1
89878994
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
89888995
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
@@ -17058,8 +17065,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1705817065
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1705917066
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1706017067
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17061-
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17068+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1706217069
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17070+
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1706317071
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
1706417072
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1706517073
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17078,19 +17086,20 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1707817086
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
1707917087
; GFX90A-NEXT: s_cmp_lg_u64 s[4:5], 0
1708017088
; GFX90A-NEXT: s_cselect_b32 s4, s4, -1
17081-
; GFX90A-NEXT: v_mov_b32_e32 v0, s4
17082-
; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
17083-
; GFX90A-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4
17089+
; GFX90A-NEXT: v_mov_b32_e32 v6, s4
17090+
; GFX90A-NEXT: buffer_load_dword v0, v6, s[0:3], 0 offen
17091+
; GFX90A-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
1708417092
; GFX90A-NEXT: s_waitcnt vmcnt(1)
17085-
; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v1, v4
17093+
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
1708617094
; GFX90A-NEXT: s_waitcnt vmcnt(0)
17087-
; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v5, vcc
17088-
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
17089-
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
17090-
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
17091-
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
17092-
; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
17093-
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
17095+
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17096+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
17097+
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
17098+
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
17099+
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
17100+
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17101+
; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen offset:4
17102+
; GFX90A-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
1709417103
; GFX90A-NEXT: .LBB221_6: ; %atomicrmw.phi
1709517104
; GFX90A-NEXT: ;;#ASMSTART
1709617105
; GFX90A-NEXT: ; use a[0:1]
@@ -17123,9 +17132,10 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1712317132
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1712417133
; GFX950-NEXT: s_nop 1
1712517134
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17135+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1712617136
; GFX950-NEXT: s_nop 1
17127-
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1712817137
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17138+
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1712917139
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
1713017140
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1713117141
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17149,11 +17159,11 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1714917159
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
1715017160
; GFX950-NEXT: s_nop 1
1715117161
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17162+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
1715217163
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
17153-
; GFX950-NEXT: s_nop 0
17164+
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
1715417165
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
1715517166
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17156-
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
1715717167
; GFX950-NEXT: scratch_store_dwordx2 off, v[2:3], s0
1715817168
; GFX950-NEXT: .LBB221_6: ; %atomicrmw.phi
1715917169
; GFX950-NEXT: ;;#ASMSTART
@@ -17192,8 +17202,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1719217202
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1]
1719317203
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1719417204
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17195-
; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17205+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
1719617206
; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
17207+
; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
1719717208
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] glc
1719817209
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1719917210
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17216,6 +17227,7 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1721617227
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1721717228
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1721817229
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17230+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1721917231
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1722017232
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1722117233
; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen
@@ -17251,9 +17263,10 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1725117263
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1725217264
; GFX950-NEXT: s_nop 1
1725317265
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17266+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
1725417267
; GFX950-NEXT: s_nop 1
17255-
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
1725617268
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
17269+
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
1725717270
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
1725817271
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1725917272
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17274,6 +17287,7 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1727417287
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1727517288
; GFX950-NEXT: s_nop 1
1727617289
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17290+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1727717291
; GFX950-NEXT: s_nop 1
1727817292
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1727917293
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc

llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5804,8 +5804,9 @@ define void @global_atomic_usub_sat_i64_ret_a_a(ptr addrspace(1) %ptr) #0 {
58045804
; GFX90A-NEXT: s_waitcnt vmcnt(0)
58055805
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v4, v6
58065806
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v5, v7, vcc
5807-
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
5807+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[4:5]
58085808
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
5809+
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
58095810
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 glc
58105811
; GFX90A-NEXT: s_waitcnt vmcnt(0)
58115812
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -5838,9 +5839,10 @@ define void @global_atomic_usub_sat_i64_ret_a_a(ptr addrspace(1) %ptr) #0 {
58385839
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v4, v6
58395840
; GFX950-NEXT: s_nop 1
58405841
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v5, v7, vcc
5842+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[4:5]
58415843
; GFX950-NEXT: s_nop 1
5842-
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
58435844
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
5845+
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
58445846
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 sc0
58455847
; GFX950-NEXT: s_waitcnt vmcnt(0)
58465848
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -5878,8 +5880,9 @@ define void @global_atomic_usub_sat_i64_ret_av_av(ptr addrspace(1) %ptr) #0 {
58785880
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
58795881
; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
58805882
; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
5881-
; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
5883+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
58825884
; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
5885+
; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
58835886
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:80 glc
58845887
; GFX90A-NEXT: s_waitcnt vmcnt(0)
58855888
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
@@ -5908,9 +5911,10 @@ define void @global_atomic_usub_sat_i64_ret_av_av(ptr addrspace(1) %ptr) #0 {
59085911
; GFX950-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
59095912
; GFX950-NEXT: s_nop 1
59105913
; GFX950-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
5914+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
59115915
; GFX950-NEXT: s_nop 1
5912-
; GFX950-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
59135916
; GFX950-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
5917+
; GFX950-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
59145918
; GFX950-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:80 sc0
59155919
; GFX950-NEXT: s_waitcnt vmcnt(0)
59165920
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
@@ -11569,8 +11573,9 @@ define void @global_atomic_usub_sat_i64_saddr_ret_a_a(ptr addrspace(1) inreg %pt
1156911573
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1157011574
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1157111575
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
11572-
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
11576+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1157311577
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
11578+
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1157411579
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[16:17] offset:80 glc
1157511580
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1157611581
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -11604,9 +11609,10 @@ define void @global_atomic_usub_sat_i64_saddr_ret_a_a(ptr addrspace(1) inreg %pt
1160411609
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1160511610
; GFX950-NEXT: s_nop 1
1160611611
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
11612+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1160711613
; GFX950-NEXT: s_nop 1
11608-
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1160911614
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
11615+
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1161011616
; GFX950-NEXT: global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] offset:80 sc0
1161111617
; GFX950-NEXT: s_waitcnt vmcnt(0)
1161211618
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -11645,8 +11651,9 @@ define void @global_atomic_usub_sat_i64_saddr_ret_av_av(ptr addrspace(1) inreg %
1164511651
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1]
1164611652
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1164711653
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
11648-
; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
11654+
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
1164911655
; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
11656+
; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
1165011657
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v4, v[6:9], s[16:17] offset:80 glc
1165111658
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1165211659
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -11676,9 +11683,10 @@ define void @global_atomic_usub_sat_i64_saddr_ret_av_av(ptr addrspace(1) inreg %
1167611683
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1167711684
; GFX950-NEXT: s_nop 1
1167811685
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
11686+
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
1167911687
; GFX950-NEXT: s_nop 1
11680-
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
1168111688
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
11689+
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
1168211690
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v4, v[6:9], s[0:1] offset:80 sc0
1168311691
; GFX950-NEXT: s_waitcnt vmcnt(0)
1168411692
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]

0 commit comments

Comments
 (0)