@@ -8760,9 +8760,8 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
87608760; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
87618761; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
87628762; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8763- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
8764- ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
87658763; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8764+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
87668765; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
87678766; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
87688767; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8781,20 +8780,19 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
87818780; GFX90A-NEXT: s_cbranch_execz .LBB113_6
87828781; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
87838782; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
8784- ; GFX90A-NEXT: v_cndmask_b32_e32 v4 , -1, v4, vcc
8785- ; GFX90A-NEXT: buffer_load_dword v0, v4 , s[0:3], 0 offen
8786- ; GFX90A-NEXT: buffer_load_dword v1, v4 , s[0:3], 0 offen offset:4
8783+ ; GFX90A-NEXT: v_cndmask_b32_e32 v0 , -1, v4, vcc
8784+ ; GFX90A-NEXT: buffer_load_dword v1, v0 , s[0:3], 0 offen
8785+ ; GFX90A-NEXT: buffer_load_dword v2, v0 , s[0:3], 0 offen offset:4
87878786; GFX90A-NEXT: s_waitcnt vmcnt(1)
8788- ; GFX90A-NEXT: v_sub_co_u32_e32 v2 , vcc, v0 , v6
8787+ ; GFX90A-NEXT: v_sub_co_u32_e32 v3 , vcc, v1 , v6
87898788; GFX90A-NEXT: s_waitcnt vmcnt(0)
8790- ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8791- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
8792- ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
8793- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
8794- ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
8795- ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8796- ; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen offset:4
8797- ; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
8789+ ; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v7, vcc
8790+ ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
8791+ ; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
8792+ ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
8793+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
8794+ ; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
8795+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
87988796; GFX90A-NEXT: .LBB113_6: ; %atomicrmw.phi
87998797; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
88008798; GFX90A-NEXT: ;;#ASMSTART
@@ -8828,10 +8826,9 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
88288826; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
88298827; GFX950-NEXT: s_nop 1
88308828; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8831- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
88328829; GFX950-NEXT: s_nop 1
8833- ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
88348830; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8831+ ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
88358832; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0
88368833; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
88378834; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8857,11 +8854,11 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
88578854; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
88588855; GFX950-NEXT: s_nop 1
88598856; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8860- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
88618857; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
8862- ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
8858+ ; GFX950-NEXT: s_nop 0
88638859; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
88648860; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8861+ ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
88658862; GFX950-NEXT: scratch_store_dwordx2 v4, v[2:3], off
88668863; GFX950-NEXT: .LBB113_6: ; %atomicrmw.phi
88678864; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
@@ -8901,9 +8898,8 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89018898; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
89028899; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
89038900; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
8904- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
8905- ; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
89068901; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
8902+ ; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
89078903; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
89088904; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89098905; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
@@ -8919,18 +8915,17 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89198915; GFX90A-NEXT: s_cbranch_execz .LBB114_6
89208916; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
89218917; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
8922- ; GFX90A-NEXT: v_cndmask_b32_e32 v6 , -1, v0, vcc
8923- ; GFX90A-NEXT: buffer_load_dword v4, v6 , s[0:3], 0 offen
8924- ; GFX90A-NEXT: buffer_load_dword v5, v6 , s[0:3], 0 offen offset:4
8918+ ; GFX90A-NEXT: v_cndmask_b32_e32 v0 , -1, v0, vcc
8919+ ; GFX90A-NEXT: buffer_load_dword v4, v0 , s[0:3], 0 offen
8920+ ; GFX90A-NEXT: buffer_load_dword v5, v0 , s[0:3], 0 offen offset:4
89258921; GFX90A-NEXT: s_waitcnt vmcnt(1)
8926- ; GFX90A-NEXT: v_sub_co_u32_e32 v0 , vcc, v4, v2
8922+ ; GFX90A-NEXT: v_sub_co_u32_e32 v1 , vcc, v4, v2
89278923; GFX90A-NEXT: s_waitcnt vmcnt(0)
8928- ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v3, vcc
8929- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[4:5]
8930- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8924+ ; GFX90A-NEXT: v_subb_co_u32_e32 v2, vcc, v5, v3, vcc
89318925; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8932- ; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen
8933- ; GFX90A-NEXT: buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
8926+ ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8927+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
8928+ ; GFX90A-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
89348929; GFX90A-NEXT: .LBB114_6: ; %atomicrmw.phi
89358930; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
89368931; GFX90A-NEXT: ;;#ASMSTART
@@ -8963,10 +8958,9 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89638958; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
89648959; GFX950-NEXT: s_nop 1
89658960; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
8966- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
89678961; GFX950-NEXT: s_nop 1
8968- ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
89698962; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
8963+ ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
89708964; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
89718965; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89728966; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -8989,7 +8983,6 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89898983; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
89908984; GFX950-NEXT: s_nop 1
89918985; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
8992- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
89938986; GFX950-NEXT: s_nop 1
89948987; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
89958988; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
@@ -17065,9 +17058,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1706517058; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1706617059; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1706717060; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17068- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17069- ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1707017061; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17062+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1707117063; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
1707217064; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1707317065; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17086,20 +17078,19 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1708617078; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
1708717079; GFX90A-NEXT: s_cmp_lg_u64 s[4:5], 0
1708817080; GFX90A-NEXT: s_cselect_b32 s4, s4, -1
17089- ; GFX90A-NEXT: v_mov_b32_e32 v6 , s4
17090- ; GFX90A-NEXT: buffer_load_dword v0, v6 , s[0:3], 0 offen
17091- ; GFX90A-NEXT: buffer_load_dword v1, v6 , s[0:3], 0 offen offset:4
17081+ ; GFX90A-NEXT: v_mov_b32_e32 v0 , s4
17082+ ; GFX90A-NEXT: buffer_load_dword v1, v0 , s[0:3], 0 offen
17083+ ; GFX90A-NEXT: buffer_load_dword v2, v0 , s[0:3], 0 offen offset:4
1709217084; GFX90A-NEXT: s_waitcnt vmcnt(1)
17093- ; GFX90A-NEXT: v_sub_co_u32_e32 v2 , vcc, v0 , v4
17085+ ; GFX90A-NEXT: v_sub_co_u32_e32 v3 , vcc, v1 , v4
1709417086; GFX90A-NEXT: s_waitcnt vmcnt(0)
17095- ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17096- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
17097- ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
17098- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
17099- ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
17100- ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17101- ; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen offset:4
17102- ; GFX90A-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
17087+ ; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v5, vcc
17088+ ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
17089+ ; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
17090+ ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
17091+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
17092+ ; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
17093+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
1710317094; GFX90A-NEXT: .LBB221_6: ; %atomicrmw.phi
1710417095; GFX90A-NEXT: ;;#ASMSTART
1710517096; GFX90A-NEXT: ; use a[0:1]
@@ -17132,10 +17123,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1713217123; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1713317124; GFX950-NEXT: s_nop 1
1713417125; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17135- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1713617126; GFX950-NEXT: s_nop 1
17137- ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1713817127; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17128+ ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1713917129; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
1714017130; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1714117131; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17159,11 +17149,11 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1715917149; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
1716017150; GFX950-NEXT: s_nop 1
1716117151; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17162- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
1716317152; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
17164- ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
17153+ ; GFX950-NEXT: s_nop 0
1716517154; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
1716617155; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17156+ ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
1716717157; GFX950-NEXT: scratch_store_dwordx2 off, v[2:3], s0
1716817158; GFX950-NEXT: .LBB221_6: ; %atomicrmw.phi
1716917159; GFX950-NEXT: ;;#ASMSTART
@@ -17202,9 +17192,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1720217192; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1]
1720317193; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1720417194; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17205- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
17206- ; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1720717195; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17196+ ; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1720817197; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] glc
1720917198; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1721017199; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17227,7 +17216,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1722717216; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1722817217; GFX90A-NEXT: s_waitcnt vmcnt(0)
1722917218; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17230- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1723117219; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1723217220; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1723317221; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen
@@ -17263,10 +17251,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1726317251; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1726417252; GFX950-NEXT: s_nop 1
1726517253; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17266- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
1726717254; GFX950-NEXT: s_nop 1
17268- ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1726917255; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17256+ ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1727017257; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
1727117258; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1727217259; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17287,7 +17274,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1728717274; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1728817275; GFX950-NEXT: s_nop 1
1728917276; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17290- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1729117277; GFX950-NEXT: s_nop 1
1729217278; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1729317279; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
0 commit comments