@@ -8759,9 +8759,8 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
8759
8759
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8760
8760
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
8761
8761
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8762
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
8763
- ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8764
8762
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8763
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8765
8764
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
8766
8765
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8767
8766
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8780,20 +8779,19 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
8780
8779
; GFX90A-NEXT: s_cbranch_execz .LBB113_6
8781
8780
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
8782
8781
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
8783
- ; GFX90A-NEXT: v_cndmask_b32_e32 v4 , -1, v4, vcc
8784
- ; GFX90A-NEXT: buffer_load_dword v0, v4 , s[0:3], 0 offen
8785
- ; GFX90A-NEXT: buffer_load_dword v1, v4 , s[0:3], 0 offen offset:4
8782
+ ; GFX90A-NEXT: v_cndmask_b32_e32 v0 , -1, v4, vcc
8783
+ ; GFX90A-NEXT: buffer_load_dword v1, v0 , s[0:3], 0 offen
8784
+ ; GFX90A-NEXT: buffer_load_dword v2, v0 , s[0:3], 0 offen offset:4
8786
8785
; GFX90A-NEXT: s_waitcnt vmcnt(1)
8787
- ; GFX90A-NEXT: v_sub_co_u32_e32 v2 , vcc, v0 , v6
8786
+ ; GFX90A-NEXT: v_sub_co_u32_e32 v3 , vcc, v1 , v6
8788
8787
; GFX90A-NEXT: s_waitcnt vmcnt(0)
8789
- ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8790
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
8791
- ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
8792
- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
8793
- ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
8794
- ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8795
- ; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen offset:4
8796
- ; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
8788
+ ; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v7, vcc
8789
+ ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
8790
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
8791
+ ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
8792
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
8793
+ ; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
8794
+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
8797
8795
; GFX90A-NEXT: .LBB113_6: ; %atomicrmw.phi
8798
8796
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
8799
8797
; GFX90A-NEXT: ;;#ASMSTART
@@ -8827,10 +8825,9 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
8827
8825
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
8828
8826
; GFX950-NEXT: s_nop 1
8829
8827
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8830
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
8831
8828
; GFX950-NEXT: s_nop 1
8832
- ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8833
8829
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8830
+ ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8834
8831
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0
8835
8832
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8836
8833
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8856,11 +8853,11 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
8856
8853
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
8857
8854
; GFX950-NEXT: s_nop 1
8858
8855
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8859
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
8860
8856
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
8861
- ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
8857
+ ; GFX950-NEXT: s_nop 0
8862
8858
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
8863
8859
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8860
+ ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
8864
8861
; GFX950-NEXT: scratch_store_dwordx2 v4, v[2:3], off
8865
8862
; GFX950-NEXT: .LBB113_6: ; %atomicrmw.phi
8866
8863
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
@@ -8900,9 +8897,8 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
8900
8897
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
8901
8898
; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
8902
8899
; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
8903
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
8904
- ; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
8905
8900
; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
8901
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
8906
8902
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
8907
8903
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8908
8904
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
@@ -8918,18 +8914,17 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
8918
8914
; GFX90A-NEXT: s_cbranch_execz .LBB114_6
8919
8915
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
8920
8916
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
8921
- ; GFX90A-NEXT: v_cndmask_b32_e32 v6 , -1, v0, vcc
8922
- ; GFX90A-NEXT: buffer_load_dword v4, v6 , s[0:3], 0 offen
8923
- ; GFX90A-NEXT: buffer_load_dword v5, v6 , s[0:3], 0 offen offset:4
8917
+ ; GFX90A-NEXT: v_cndmask_b32_e32 v0 , -1, v0, vcc
8918
+ ; GFX90A-NEXT: buffer_load_dword v4, v0 , s[0:3], 0 offen
8919
+ ; GFX90A-NEXT: buffer_load_dword v5, v0 , s[0:3], 0 offen offset:4
8924
8920
; GFX90A-NEXT: s_waitcnt vmcnt(1)
8925
- ; GFX90A-NEXT: v_sub_co_u32_e32 v0 , vcc, v4, v2
8921
+ ; GFX90A-NEXT: v_sub_co_u32_e32 v1 , vcc, v4, v2
8926
8922
; GFX90A-NEXT: s_waitcnt vmcnt(0)
8927
- ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v3, vcc
8928
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[4:5]
8929
- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8923
+ ; GFX90A-NEXT: v_subb_co_u32_e32 v2, vcc, v5, v3, vcc
8930
8924
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8931
- ; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen
8932
- ; GFX90A-NEXT: buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
8925
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8926
+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
8927
+ ; GFX90A-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
8933
8928
; GFX90A-NEXT: .LBB114_6: ; %atomicrmw.phi
8934
8929
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
8935
8930
; GFX90A-NEXT: ;;#ASMSTART
@@ -8962,10 +8957,9 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
8962
8957
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
8963
8958
; GFX950-NEXT: s_nop 1
8964
8959
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
8965
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
8966
8960
; GFX950-NEXT: s_nop 1
8967
- ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
8968
8961
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
8962
+ ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
8969
8963
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
8970
8964
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8971
8965
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -8988,7 +8982,6 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
8988
8982
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
8989
8983
; GFX950-NEXT: s_nop 1
8990
8984
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
8991
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
8992
8985
; GFX950-NEXT: s_nop 1
8993
8986
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8994
8987
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
@@ -17064,9 +17057,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
17064
17057
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
17065
17058
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
17066
17059
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17067
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17068
- ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17069
17060
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17061
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17070
17062
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
17071
17063
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
17072
17064
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17085,20 +17077,19 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
17085
17077
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
17086
17078
; GFX90A-NEXT: s_cmp_lg_u64 s[4:5], 0
17087
17079
; GFX90A-NEXT: s_cselect_b32 s4, s4, -1
17088
- ; GFX90A-NEXT: v_mov_b32_e32 v6 , s4
17089
- ; GFX90A-NEXT: buffer_load_dword v0, v6 , s[0:3], 0 offen
17090
- ; GFX90A-NEXT: buffer_load_dword v1, v6 , s[0:3], 0 offen offset:4
17080
+ ; GFX90A-NEXT: v_mov_b32_e32 v0 , s4
17081
+ ; GFX90A-NEXT: buffer_load_dword v1, v0 , s[0:3], 0 offen
17082
+ ; GFX90A-NEXT: buffer_load_dword v2, v0 , s[0:3], 0 offen offset:4
17091
17083
; GFX90A-NEXT: s_waitcnt vmcnt(1)
17092
- ; GFX90A-NEXT: v_sub_co_u32_e32 v2 , vcc, v0 , v4
17084
+ ; GFX90A-NEXT: v_sub_co_u32_e32 v3 , vcc, v1 , v4
17093
17085
; GFX90A-NEXT: s_waitcnt vmcnt(0)
17094
- ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17095
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
17096
- ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
17097
- ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
17098
- ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
17099
- ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17100
- ; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen offset:4
17101
- ; GFX90A-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
17086
+ ; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v5, vcc
17087
+ ; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
17088
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
17089
+ ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
17090
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
17091
+ ; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
17092
+ ; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
17102
17093
; GFX90A-NEXT: .LBB221_6: ; %atomicrmw.phi
17103
17094
; GFX90A-NEXT: ;;#ASMSTART
17104
17095
; GFX90A-NEXT: ; use a[0:1]
@@ -17131,10 +17122,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
17131
17122
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
17132
17123
; GFX950-NEXT: s_nop 1
17133
17124
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17134
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17135
17125
; GFX950-NEXT: s_nop 1
17136
- ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17137
17126
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17127
+ ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17138
17128
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
17139
17129
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
17140
17130
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17158,11 +17148,11 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
17158
17148
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
17159
17149
; GFX950-NEXT: s_nop 1
17160
17150
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17161
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
17162
17151
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
17163
- ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
17152
+ ; GFX950-NEXT: s_nop 0
17164
17153
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
17165
17154
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17155
+ ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
17166
17156
; GFX950-NEXT: scratch_store_dwordx2 off, v[2:3], s0
17167
17157
; GFX950-NEXT: .LBB221_6: ; %atomicrmw.phi
17168
17158
; GFX950-NEXT: ;;#ASMSTART
@@ -17201,9 +17191,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
17201
17191
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1]
17202
17192
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
17203
17193
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17204
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
17205
- ; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
17206
17194
; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17195
+ ; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
17207
17196
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] glc
17208
17197
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
17209
17198
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17226,7 +17215,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
17226
17215
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
17227
17216
; GFX90A-NEXT: s_waitcnt vmcnt(0)
17228
17217
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17229
- ; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17230
17218
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17231
17219
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17232
17220
; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen
@@ -17262,10 +17250,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
17262
17250
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
17263
17251
; GFX950-NEXT: s_nop 1
17264
17252
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17265
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
17266
17253
; GFX950-NEXT: s_nop 1
17267
- ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
17268
17254
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17255
+ ; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
17269
17256
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
17270
17257
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
17271
17258
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17286,7 +17273,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
17286
17273
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
17287
17274
; GFX950-NEXT: s_nop 1
17288
17275
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17289
- ; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17290
17276
; GFX950-NEXT: s_nop 1
17291
17277
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
17292
17278
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
0 commit comments