@@ -8760,9 +8760,8 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
8760
8760
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8761
8761
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
8762
8762
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8763
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
8764
- ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8765
8763
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8764
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8766
8765
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
8767
8766
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8768
8767
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8781,20 +8780,19 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
8781
8780
; GFX90A-NEXT: s_cbranch_execz .LBB113_6
8782
8781
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
8783
8782
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
8784
- ; GFX90A-NEXT: v_cndmask_b32_e32 v4 , -1, v4, vcc
8785
- ; GFX90A-NEXT: buffer_load_dword v0, v4 , s[0:3], 0 offen
8786
- ; GFX90A-NEXT: buffer_load_dword v1, v4 , s[0:3], 0 offen offset:4
8783
+ ; GFX90A-NEXT: v_cndmask_b32_e32 v0 , -1, v4, vcc
8784
+ ; GFX90A-NEXT: buffer_load_dword v1, v0 , s[0:3], 0 offen
8785
+ ; GFX90A-NEXT: buffer_load_dword v2, v0 , s[0:3], 0 offen offset:4
8787
8786
; GFX90A-NEXT: s_waitcnt vmcnt(1)
8788
- ; GFX90A-NEXT: v_sub_co_u32_e32 v2 , vcc, v0 , v6
8787
+ ; GFX90A-NEXT: v_sub_co_u32_e32 v3 , vcc, v1 , v6
8789
8788
; GFX90A-NEXT: s_waitcnt vmcnt(0)
8790
- ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8791
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
8792
- ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
8793
- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
8794
- ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
8795
- ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8796
- ; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen offset:4
8797
- ; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
8789
+ ; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v7, vcc
8790
+ ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
8791
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
8792
+ ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
8793
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
8794
+ ; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
8795
+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
8798
8796
; GFX90A-NEXT: .LBB113_6: ; %atomicrmw.phi
8799
8797
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
8800
8798
; GFX90A-NEXT: ;;#ASMSTART
@@ -8828,10 +8826,9 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
8828
8826
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
8829
8827
; GFX950-NEXT: s_nop 1
8830
8828
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8831
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
8832
8829
; GFX950-NEXT: s_nop 1
8833
- ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8834
8830
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8831
+ ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8835
8832
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0
8836
8833
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8837
8834
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8857,11 +8854,11 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
8857
8854
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
8858
8855
; GFX950-NEXT: s_nop 1
8859
8856
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8860
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
8861
8857
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
8862
- ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
8858
+ ; GFX950-NEXT: s_nop 0
8863
8859
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
8864
8860
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8861
+ ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
8865
8862
; GFX950-NEXT: scratch_store_dwordx2 v4, v[2:3], off
8866
8863
; GFX950-NEXT: .LBB113_6: ; %atomicrmw.phi
8867
8864
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
@@ -8901,9 +8898,8 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
8901
8898
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
8902
8899
; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
8903
8900
; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
8904
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
8905
- ; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
8906
8901
; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
8902
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
8907
8903
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
8908
8904
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8909
8905
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
@@ -8919,18 +8915,17 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
8919
8915
; GFX90A-NEXT: s_cbranch_execz .LBB114_6
8920
8916
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
8921
8917
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
8922
- ; GFX90A-NEXT: v_cndmask_b32_e32 v6 , -1, v0, vcc
8923
- ; GFX90A-NEXT: buffer_load_dword v4, v6 , s[0:3], 0 offen
8924
- ; GFX90A-NEXT: buffer_load_dword v5, v6 , s[0:3], 0 offen offset:4
8918
+ ; GFX90A-NEXT: v_cndmask_b32_e32 v0 , -1, v0, vcc
8919
+ ; GFX90A-NEXT: buffer_load_dword v4, v0 , s[0:3], 0 offen
8920
+ ; GFX90A-NEXT: buffer_load_dword v5, v0 , s[0:3], 0 offen offset:4
8925
8921
; GFX90A-NEXT: s_waitcnt vmcnt(1)
8926
- ; GFX90A-NEXT: v_sub_co_u32_e32 v0 , vcc, v4, v2
8922
+ ; GFX90A-NEXT: v_sub_co_u32_e32 v1 , vcc, v4, v2
8927
8923
; GFX90A-NEXT: s_waitcnt vmcnt(0)
8928
- ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v3, vcc
8929
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[4:5]
8930
- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8924
+ ; GFX90A-NEXT: v_subb_co_u32_e32 v2, vcc, v5, v3, vcc
8931
8925
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8932
- ; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen
8933
- ; GFX90A-NEXT: buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
8926
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8927
+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
8928
+ ; GFX90A-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
8934
8929
; GFX90A-NEXT: .LBB114_6: ; %atomicrmw.phi
8935
8930
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
8936
8931
; GFX90A-NEXT: ;;#ASMSTART
@@ -8963,10 +8958,9 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
8963
8958
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
8964
8959
; GFX950-NEXT: s_nop 1
8965
8960
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
8966
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
8967
8961
; GFX950-NEXT: s_nop 1
8968
- ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
8969
8962
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
8963
+ ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
8970
8964
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
8971
8965
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8972
8966
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -8989,7 +8983,6 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
8989
8983
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
8990
8984
; GFX950-NEXT: s_nop 1
8991
8985
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
8992
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
8993
8986
; GFX950-NEXT: s_nop 1
8994
8987
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8995
8988
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
@@ -17065,9 +17058,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
17065
17058
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
17066
17059
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
17067
17060
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17068
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17069
- ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17070
17061
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17062
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17071
17063
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
17072
17064
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
17073
17065
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17086,20 +17078,19 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
17086
17078
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
17087
17079
; GFX90A-NEXT: s_cmp_lg_u64 s[4:5], 0
17088
17080
; GFX90A-NEXT: s_cselect_b32 s4, s4, -1
17089
- ; GFX90A-NEXT: v_mov_b32_e32 v6 , s4
17090
- ; GFX90A-NEXT: buffer_load_dword v0, v6 , s[0:3], 0 offen
17091
- ; GFX90A-NEXT: buffer_load_dword v1, v6 , s[0:3], 0 offen offset:4
17081
+ ; GFX90A-NEXT: v_mov_b32_e32 v0 , s4
17082
+ ; GFX90A-NEXT: buffer_load_dword v1, v0 , s[0:3], 0 offen
17083
+ ; GFX90A-NEXT: buffer_load_dword v2, v0 , s[0:3], 0 offen offset:4
17092
17084
; GFX90A-NEXT: s_waitcnt vmcnt(1)
17093
- ; GFX90A-NEXT: v_sub_co_u32_e32 v2 , vcc, v0 , v4
17085
+ ; GFX90A-NEXT: v_sub_co_u32_e32 v3 , vcc, v1 , v4
17094
17086
; GFX90A-NEXT: s_waitcnt vmcnt(0)
17095
- ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17096
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
17097
- ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
17098
- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
17099
- ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
17100
- ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17101
- ; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen offset:4
17102
- ; GFX90A-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
17087
+ ; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v5, vcc
17088
+ ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
17089
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
17090
+ ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
17091
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
17092
+ ; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
17093
+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
17103
17094
; GFX90A-NEXT: .LBB221_6: ; %atomicrmw.phi
17104
17095
; GFX90A-NEXT: ;;#ASMSTART
17105
17096
; GFX90A-NEXT: ; use a[0:1]
@@ -17132,10 +17123,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
17132
17123
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
17133
17124
; GFX950-NEXT: s_nop 1
17134
17125
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17135
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17136
17126
; GFX950-NEXT: s_nop 1
17137
- ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17138
17127
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17128
+ ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17139
17129
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
17140
17130
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
17141
17131
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17159,11 +17149,11 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
17159
17149
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
17160
17150
; GFX950-NEXT: s_nop 1
17161
17151
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17162
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
17163
17152
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
17164
- ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
17153
+ ; GFX950-NEXT: s_nop 0
17165
17154
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
17166
17155
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17156
+ ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
17167
17157
; GFX950-NEXT: scratch_store_dwordx2 off, v[2:3], s0
17168
17158
; GFX950-NEXT: .LBB221_6: ; %atomicrmw.phi
17169
17159
; GFX950-NEXT: ;;#ASMSTART
@@ -17202,9 +17192,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
17202
17192
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1]
17203
17193
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
17204
17194
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17205
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
17206
- ; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
17207
17195
; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17196
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
17208
17197
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] glc
17209
17198
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
17210
17199
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17227,7 +17216,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
17227
17216
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
17228
17217
; GFX90A-NEXT: s_waitcnt vmcnt(0)
17229
17218
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17230
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17231
17219
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17232
17220
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17233
17221
; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen
@@ -17263,10 +17251,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
17263
17251
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
17264
17252
; GFX950-NEXT: s_nop 1
17265
17253
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17266
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
17267
17254
; GFX950-NEXT: s_nop 1
17268
- ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
17269
17255
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17256
+ ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
17270
17257
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
17271
17258
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
17272
17259
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17287,7 +17274,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
17287
17274
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
17288
17275
; GFX950-NEXT: s_nop 1
17289
17276
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17290
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17291
17277
; GFX950-NEXT: s_nop 1
17292
17278
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17293
17279
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
0 commit comments