@@ -8759,9 +8759,8 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
87598759; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
87608760; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
87618761; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8762- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
8763- ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
87648762; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8763+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
87658764; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
87668765; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
87678766; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8780,20 +8779,19 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
87808779; GFX90A-NEXT: s_cbranch_execz .LBB113_6
87818780; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
87828781; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
8783- ; GFX90A-NEXT: v_cndmask_b32_e32 v4 , -1, v4, vcc
8784- ; GFX90A-NEXT: buffer_load_dword v0, v4 , s[0:3], 0 offen
8785- ; GFX90A-NEXT: buffer_load_dword v1, v4 , s[0:3], 0 offen offset:4
8782+ ; GFX90A-NEXT: v_cndmask_b32_e32 v0 , -1, v4, vcc
8783+ ; GFX90A-NEXT: buffer_load_dword v1, v0 , s[0:3], 0 offen
8784+ ; GFX90A-NEXT: buffer_load_dword v2, v0 , s[0:3], 0 offen offset:4
87868785; GFX90A-NEXT: s_waitcnt vmcnt(1)
8787- ; GFX90A-NEXT: v_sub_co_u32_e32 v2 , vcc, v0 , v6
8786+ ; GFX90A-NEXT: v_sub_co_u32_e32 v3 , vcc, v1 , v6
87888787; GFX90A-NEXT: s_waitcnt vmcnt(0)
8789- ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8790- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
8791- ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
8792- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
8793- ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
8794- ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8795- ; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen offset:4
8796- ; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
8788+ ; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v7, vcc
8789+ ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
8790+ ; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
8791+ ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
8792+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
8793+ ; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
8794+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
87978795; GFX90A-NEXT: .LBB113_6: ; %atomicrmw.phi
87988796; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
87998797; GFX90A-NEXT: ;;#ASMSTART
@@ -8827,10 +8825,9 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
88278825; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
88288826; GFX950-NEXT: s_nop 1
88298827; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8830- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
88318828; GFX950-NEXT: s_nop 1
8832- ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
88338829; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8830+ ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
88348831; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0
88358832; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
88368833; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8856,11 +8853,11 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
88568853; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
88578854; GFX950-NEXT: s_nop 1
88588855; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8859- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
88608856; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
8861- ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
8857+ ; GFX950-NEXT: s_nop 0
88628858; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
88638859; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8860+ ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
88648861; GFX950-NEXT: scratch_store_dwordx2 v4, v[2:3], off
88658862; GFX950-NEXT: .LBB113_6: ; %atomicrmw.phi
88668863; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
@@ -8900,9 +8897,8 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89008897; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
89018898; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
89028899; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
8903- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
8904- ; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
89058900; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
8901+ ; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
89068902; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
89078903; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89088904; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
@@ -8918,18 +8914,17 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89188914; GFX90A-NEXT: s_cbranch_execz .LBB114_6
89198915; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
89208916; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
8921- ; GFX90A-NEXT: v_cndmask_b32_e32 v6 , -1, v0, vcc
8922- ; GFX90A-NEXT: buffer_load_dword v4, v6 , s[0:3], 0 offen
8923- ; GFX90A-NEXT: buffer_load_dword v5, v6 , s[0:3], 0 offen offset:4
8917+ ; GFX90A-NEXT: v_cndmask_b32_e32 v0 , -1, v0, vcc
8918+ ; GFX90A-NEXT: buffer_load_dword v4, v0 , s[0:3], 0 offen
8919+ ; GFX90A-NEXT: buffer_load_dword v5, v0 , s[0:3], 0 offen offset:4
89248920; GFX90A-NEXT: s_waitcnt vmcnt(1)
8925- ; GFX90A-NEXT: v_sub_co_u32_e32 v0 , vcc, v4, v2
8921+ ; GFX90A-NEXT: v_sub_co_u32_e32 v1 , vcc, v4, v2
89268922; GFX90A-NEXT: s_waitcnt vmcnt(0)
8927- ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v3, vcc
8928- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[4:5]
8929- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8923+ ; GFX90A-NEXT: v_subb_co_u32_e32 v2, vcc, v5, v3, vcc
89308924; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8931- ; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen
8932- ; GFX90A-NEXT: buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
8925+ ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8926+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
8927+ ; GFX90A-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
89338928; GFX90A-NEXT: .LBB114_6: ; %atomicrmw.phi
89348929; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
89358930; GFX90A-NEXT: ;;#ASMSTART
@@ -8962,10 +8957,9 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89628957; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
89638958; GFX950-NEXT: s_nop 1
89648959; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
8965- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
89668960; GFX950-NEXT: s_nop 1
8967- ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
89688961; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
8962+ ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
89698963; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
89708964; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89718965; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -8988,7 +8982,6 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89888982; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
89898983; GFX950-NEXT: s_nop 1
89908984; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
8991- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
89928985; GFX950-NEXT: s_nop 1
89938986; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
89948987; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
@@ -17064,9 +17057,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1706417057; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1706517058; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1706617059; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17067- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17068- ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1706917060; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17061+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1707017062; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
1707117063; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1707217064; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17085,20 +17077,19 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1708517077; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
1708617078; GFX90A-NEXT: s_cmp_lg_u64 s[4:5], 0
1708717079; GFX90A-NEXT: s_cselect_b32 s4, s4, -1
17088- ; GFX90A-NEXT: v_mov_b32_e32 v6 , s4
17089- ; GFX90A-NEXT: buffer_load_dword v0, v6 , s[0:3], 0 offen
17090- ; GFX90A-NEXT: buffer_load_dword v1, v6 , s[0:3], 0 offen offset:4
17080+ ; GFX90A-NEXT: v_mov_b32_e32 v0 , s4
17081+ ; GFX90A-NEXT: buffer_load_dword v1, v0 , s[0:3], 0 offen
17082+ ; GFX90A-NEXT: buffer_load_dword v2, v0 , s[0:3], 0 offen offset:4
1709117083; GFX90A-NEXT: s_waitcnt vmcnt(1)
17092- ; GFX90A-NEXT: v_sub_co_u32_e32 v2 , vcc, v0 , v4
17084+ ; GFX90A-NEXT: v_sub_co_u32_e32 v3 , vcc, v1 , v4
1709317085; GFX90A-NEXT: s_waitcnt vmcnt(0)
17094- ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17095- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
17096- ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
17097- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
17098- ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
17099- ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17100- ; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen offset:4
17101- ; GFX90A-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
17086+ ; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v5, vcc
17087+ ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
17088+ ; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
17089+ ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
17090+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
17091+ ; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
17092+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
1710217093; GFX90A-NEXT: .LBB221_6: ; %atomicrmw.phi
1710317094; GFX90A-NEXT: ;;#ASMSTART
1710417095; GFX90A-NEXT: ; use a[0:1]
@@ -17131,10 +17122,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1713117122; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1713217123; GFX950-NEXT: s_nop 1
1713317124; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17134- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1713517125; GFX950-NEXT: s_nop 1
17136- ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1713717126; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17127+ ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1713817128; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
1713917129; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1714017130; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17158,11 +17148,11 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1715817148; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
1715917149; GFX950-NEXT: s_nop 1
1716017150; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17161- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
1716217151; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
17163- ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
17152+ ; GFX950-NEXT: s_nop 0
1716417153; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
1716517154; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17155+ ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
1716617156; GFX950-NEXT: scratch_store_dwordx2 off, v[2:3], s0
1716717157; GFX950-NEXT: .LBB221_6: ; %atomicrmw.phi
1716817158; GFX950-NEXT: ;;#ASMSTART
@@ -17201,9 +17191,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1720117191; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1]
1720217192; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1720317193; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17204- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
17205- ; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1720617194; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17195+ ; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1720717196; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] glc
1720817197; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1720917198; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17226,7 +17215,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1722617215; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1722717216; GFX90A-NEXT: s_waitcnt vmcnt(0)
1722817217; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17229- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1723017218; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1723117219; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1723217220; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen
@@ -17262,10 +17250,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1726217250; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1726317251; GFX950-NEXT: s_nop 1
1726417252; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17265- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
1726617253; GFX950-NEXT: s_nop 1
17267- ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1726817254; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17255+ ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1726917256; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
1727017257; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1727117258; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17286,7 +17273,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1728617273; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1728717274; GFX950-NEXT: s_nop 1
1728817275; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17289- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1729017276; GFX950-NEXT: s_nop 1
1729117277; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1729217278; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
0 commit comments