Skip to content

Commit f98e651

Browse files
committed
Update new tests
Signed-off-by: John Lu <[email protected]>
1 parent 54ab1cc commit f98e651

File tree

6 files changed

+78
-100
lines changed

6 files changed

+78
-100
lines changed

llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll

Lines changed: 42 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8760,9 +8760,8 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
87608760
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
87618761
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
87628762
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8763-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
8764-
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
87658763
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8764+
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
87668765
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
87678766
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
87688767
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8781,20 +8780,19 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
87818780
; GFX90A-NEXT: s_cbranch_execz .LBB113_6
87828781
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
87838782
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
8784-
; GFX90A-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc
8785-
; GFX90A-NEXT: buffer_load_dword v0, v4, s[0:3], 0 offen
8786-
; GFX90A-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen offset:4
8783+
; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v4, vcc
8784+
; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
8785+
; GFX90A-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4
87878786
; GFX90A-NEXT: s_waitcnt vmcnt(1)
8788-
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
8787+
; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v1, v6
87898788
; GFX90A-NEXT: s_waitcnt vmcnt(0)
8790-
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8791-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
8792-
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
8793-
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
8794-
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
8795-
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8796-
; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen offset:4
8797-
; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
8789+
; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v7, vcc
8790+
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
8791+
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
8792+
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
8793+
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
8794+
; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
8795+
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
87988796
; GFX90A-NEXT: .LBB113_6: ; %atomicrmw.phi
87998797
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
88008798
; GFX90A-NEXT: ;;#ASMSTART
@@ -8828,10 +8826,9 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
88288826
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
88298827
; GFX950-NEXT: s_nop 1
88308828
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8831-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
88328829
; GFX950-NEXT: s_nop 1
8833-
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
88348830
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8831+
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
88358832
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0
88368833
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
88378834
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8857,11 +8854,11 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
88578854
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
88588855
; GFX950-NEXT: s_nop 1
88598856
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8860-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
88618857
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
8862-
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
8858+
; GFX950-NEXT: s_nop 0
88638859
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
88648860
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8861+
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
88658862
; GFX950-NEXT: scratch_store_dwordx2 v4, v[2:3], off
88668863
; GFX950-NEXT: .LBB113_6: ; %atomicrmw.phi
88678864
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
@@ -8901,9 +8898,8 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89018898
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
89028899
; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
89038900
; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
8904-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
8905-
; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
89068901
; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
8902+
; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
89078903
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
89088904
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89098905
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
@@ -8919,18 +8915,17 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89198915
; GFX90A-NEXT: s_cbranch_execz .LBB114_6
89208916
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
89218917
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
8922-
; GFX90A-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc
8923-
; GFX90A-NEXT: buffer_load_dword v4, v6, s[0:3], 0 offen
8924-
; GFX90A-NEXT: buffer_load_dword v5, v6, s[0:3], 0 offen offset:4
8918+
; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
8919+
; GFX90A-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen
8920+
; GFX90A-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:4
89258921
; GFX90A-NEXT: s_waitcnt vmcnt(1)
8926-
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v4, v2
8922+
; GFX90A-NEXT: v_sub_co_u32_e32 v1, vcc, v4, v2
89278923
; GFX90A-NEXT: s_waitcnt vmcnt(0)
8928-
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v3, vcc
8929-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[4:5]
8930-
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8924+
; GFX90A-NEXT: v_subb_co_u32_e32 v2, vcc, v5, v3, vcc
89318925
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8932-
; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen
8933-
; GFX90A-NEXT: buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
8926+
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8927+
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
8928+
; GFX90A-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
89348929
; GFX90A-NEXT: .LBB114_6: ; %atomicrmw.phi
89358930
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
89368931
; GFX90A-NEXT: ;;#ASMSTART
@@ -8963,10 +8958,9 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89638958
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
89648959
; GFX950-NEXT: s_nop 1
89658960
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
8966-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
89678961
; GFX950-NEXT: s_nop 1
8968-
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
89698962
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
8963+
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
89708964
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
89718965
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89728966
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -8989,7 +8983,6 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89898983
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
89908984
; GFX950-NEXT: s_nop 1
89918985
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
8992-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
89938986
; GFX950-NEXT: s_nop 1
89948987
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
89958988
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
@@ -17065,9 +17058,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1706517058
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1706617059
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1706717060
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17068-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17069-
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1707017061
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17062+
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1707117063
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
1707217064
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1707317065
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17086,20 +17078,19 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1708617078
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
1708717079
; GFX90A-NEXT: s_cmp_lg_u64 s[4:5], 0
1708817080
; GFX90A-NEXT: s_cselect_b32 s4, s4, -1
17089-
; GFX90A-NEXT: v_mov_b32_e32 v6, s4
17090-
; GFX90A-NEXT: buffer_load_dword v0, v6, s[0:3], 0 offen
17091-
; GFX90A-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
17081+
; GFX90A-NEXT: v_mov_b32_e32 v0, s4
17082+
; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
17083+
; GFX90A-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4
1709217084
; GFX90A-NEXT: s_waitcnt vmcnt(1)
17093-
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
17085+
; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v1, v4
1709417086
; GFX90A-NEXT: s_waitcnt vmcnt(0)
17095-
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17096-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
17097-
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
17098-
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
17099-
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
17100-
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17101-
; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen offset:4
17102-
; GFX90A-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
17087+
; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v5, vcc
17088+
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
17089+
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
17090+
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
17091+
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
17092+
; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
17093+
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
1710317094
; GFX90A-NEXT: .LBB221_6: ; %atomicrmw.phi
1710417095
; GFX90A-NEXT: ;;#ASMSTART
1710517096
; GFX90A-NEXT: ; use a[0:1]
@@ -17132,10 +17123,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1713217123
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1713317124
; GFX950-NEXT: s_nop 1
1713417125
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17135-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1713617126
; GFX950-NEXT: s_nop 1
17137-
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1713817127
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17128+
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1713917129
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
1714017130
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1714117131
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17159,11 +17149,11 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1715917149
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
1716017150
; GFX950-NEXT: s_nop 1
1716117151
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17162-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
1716317152
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
17164-
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
17153+
; GFX950-NEXT: s_nop 0
1716517154
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
1716617155
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17156+
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
1716717157
; GFX950-NEXT: scratch_store_dwordx2 off, v[2:3], s0
1716817158
; GFX950-NEXT: .LBB221_6: ; %atomicrmw.phi
1716917159
; GFX950-NEXT: ;;#ASMSTART
@@ -17202,9 +17192,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1720217192
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1]
1720317193
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1720417194
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17205-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
17206-
; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1720717195
; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17196+
; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1720817197
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] glc
1720917198
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1721017199
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17227,7 +17216,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1722717216
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1722817217
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1722917218
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17230-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1723117219
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1723217220
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1723317221
; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen
@@ -17263,10 +17251,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1726317251
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1726417252
; GFX950-NEXT: s_nop 1
1726517253
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17266-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
1726717254
; GFX950-NEXT: s_nop 1
17268-
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1726917255
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17256+
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1727017257
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
1727117258
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1727217259
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17287,7 +17274,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1728717274
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1728817275
; GFX950-NEXT: s_nop 1
1728917276
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17290-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1729117277
; GFX950-NEXT: s_nop 1
1729217278
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1729317279
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc

llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5804,9 +5804,8 @@ define void @global_atomic_usub_sat_i64_ret_a_a(ptr addrspace(1) %ptr) #0 {
58045804
; GFX90A-NEXT: s_waitcnt vmcnt(0)
58055805
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v4, v6
58065806
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v5, v7, vcc
5807-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[4:5]
5808-
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
58095807
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
5808+
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
58105809
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 glc
58115810
; GFX90A-NEXT: s_waitcnt vmcnt(0)
58125811
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -5839,10 +5838,9 @@ define void @global_atomic_usub_sat_i64_ret_a_a(ptr addrspace(1) %ptr) #0 {
58395838
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v4, v6
58405839
; GFX950-NEXT: s_nop 1
58415840
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v5, v7, vcc
5842-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[4:5]
58435841
; GFX950-NEXT: s_nop 1
5844-
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
58455842
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
5843+
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
58465844
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 sc0
58475845
; GFX950-NEXT: s_waitcnt vmcnt(0)
58485846
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
@@ -5880,9 +5878,8 @@ define void @global_atomic_usub_sat_i64_ret_av_av(ptr addrspace(1) %ptr) #0 {
58805878
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
58815879
; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
58825880
; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
5883-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
5884-
; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
58855881
; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
5882+
; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
58865883
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:80 glc
58875884
; GFX90A-NEXT: s_waitcnt vmcnt(0)
58885885
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
@@ -5911,10 +5908,9 @@ define void @global_atomic_usub_sat_i64_ret_av_av(ptr addrspace(1) %ptr) #0 {
59115908
; GFX950-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
59125909
; GFX950-NEXT: s_nop 1
59135910
; GFX950-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
5914-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
59155911
; GFX950-NEXT: s_nop 1
5916-
; GFX950-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
59175912
; GFX950-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
5913+
; GFX950-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
59185914
; GFX950-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:80 sc0
59195915
; GFX950-NEXT: s_waitcnt vmcnt(0)
59205916
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
@@ -11573,9 +11569,8 @@ define void @global_atomic_usub_sat_i64_saddr_ret_a_a(ptr addrspace(1) inreg %pt
1157311569
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1157411570
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1157511571
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
11576-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
11577-
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1157811572
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
11573+
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1157911574
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[16:17] offset:80 glc
1158011575
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1158111576
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -11609,10 +11604,9 @@ define void @global_atomic_usub_sat_i64_saddr_ret_a_a(ptr addrspace(1) inreg %pt
1160911604
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1161011605
; GFX950-NEXT: s_nop 1
1161111606
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
11612-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1161311607
; GFX950-NEXT: s_nop 1
11614-
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1161511608
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
11609+
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1161611610
; GFX950-NEXT: global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] offset:80 sc0
1161711611
; GFX950-NEXT: s_waitcnt vmcnt(0)
1161811612
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -11651,9 +11645,8 @@ define void @global_atomic_usub_sat_i64_saddr_ret_av_av(ptr addrspace(1) inreg %
1165111645
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1]
1165211646
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1165311647
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
11654-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
11655-
; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1165611648
; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
11649+
; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1165711650
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v4, v[6:9], s[16:17] offset:80 glc
1165811651
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1165911652
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -11683,10 +11676,9 @@ define void @global_atomic_usub_sat_i64_saddr_ret_av_av(ptr addrspace(1) inreg %
1168311676
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1168411677
; GFX950-NEXT: s_nop 1
1168511678
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
11686-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
1168711679
; GFX950-NEXT: s_nop 1
11688-
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1168911680
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
11681+
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1169011682
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v4, v[6:9], s[0:1] offset:80 sc0
1169111683
; GFX950-NEXT: s_waitcnt vmcnt(0)
1169211684
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]

llvm/test/CodeGen/AMDGPU/sdiv64.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1800,14 +1800,14 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
18001800
; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
18011801
; GCN-IR-NEXT: s_cbranch_execz .LBB13_5
18021802
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
1803-
; GCN-IR-NEXT: v_lshr_b64 v[6:7], v[4:5], v6
1804-
; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, 0xffffffcf, v8
1805-
; GCN-IR-NEXT: v_mov_b32_e32 v8, 0
1806-
; GCN-IR-NEXT: v_addc_u32_e64 v5, s[4:5], 0, -1, vcc
1807-
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
1808-
; GCN-IR-NEXT: v_mov_b32_e32 v9, 0
1803+
; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, 0xffffffcf, v6
1804+
; GCN-IR-NEXT: v_lshr_b64 v[4:5], v[4:5], v7
1805+
; GCN-IR-NEXT: v_addc_u32_e64 v11, s[8:9], 0, -1, vcc
1806+
; GCN-IR-NEXT: v_mov_b32_e32 v6, 0
1807+
; GCN-IR-NEXT: s_mov_b64 s[8:9], 0
1808+
; GCN-IR-NEXT: v_mov_b32_e32 v7, 0
18091809
; GCN-IR-NEXT: v_mov_b32_e32 v3, 0
1810-
; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff
1810+
; GCN-IR-NEXT: s_movk_i32 s10, 0x7fff
18111811
; GCN-IR-NEXT: .LBB13_3: ; %udiv-do-while
18121812
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
18131813
; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[4:5], 1

llvm/test/CodeGen/AMDGPU/srem64.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,14 +1970,14 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) {
19701970
; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
19711971
; GCN-IR-NEXT: s_cbranch_execz .LBB13_5
19721972
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
1973-
; GCN-IR-NEXT: v_lshr_b64 v[8:9], v[0:1], v6
1974-
; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 0xffffffcf, v10
1975-
; GCN-IR-NEXT: v_mov_b32_e32 v10, 0
1976-
; GCN-IR-NEXT: v_addc_u32_e64 v7, s[4:5], 0, -1, vcc
1977-
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
1978-
; GCN-IR-NEXT: v_mov_b32_e32 v11, 0
1973+
; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 0xffffffcf, v8
1974+
; GCN-IR-NEXT: v_lshr_b64 v[6:7], v[0:1], v6
1975+
; GCN-IR-NEXT: v_addc_u32_e64 v13, s[8:9], 0, -1, vcc
1976+
; GCN-IR-NEXT: v_mov_b32_e32 v8, 0
1977+
; GCN-IR-NEXT: s_mov_b64 s[8:9], 0
1978+
; GCN-IR-NEXT: v_mov_b32_e32 v9, 0
19791979
; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
1980-
; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff
1980+
; GCN-IR-NEXT: s_movk_i32 s10, 0x7fff
19811981
; GCN-IR-NEXT: .LBB13_3: ; %udiv-do-while
19821982
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
19831983
; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1

0 commit comments

Comments
 (0)