@@ -8946,8 +8946,7 @@ define void @flat_atomic_udec_wrap_i32_noret(ptr %ptr, i32 %in) {
8946
8946
; GCN1-NEXT: .LBB141_1: ; %atomicrmw.start
8947
8947
; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
8948
8948
; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8949
- ; GCN1-NEXT: v_add_i32_e32 v3, vcc, -1, v4
8950
- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
8949
+ ; GCN1-NEXT: v_subrev_i32_e32 v3, vcc, 1, v4
8951
8950
; GCN1-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
8952
8951
; GCN1-NEXT: s_or_b64 vcc, vcc, s[4:5]
8953
8952
; GCN1-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -8971,8 +8970,7 @@ define void @flat_atomic_udec_wrap_i32_noret(ptr %ptr, i32 %in) {
8971
8970
; GCN2-NEXT: .LBB141_1: ; %atomicrmw.start
8972
8971
; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
8973
8972
; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8974
- ; GCN2-NEXT: v_add_u32_e32 v3, vcc, -1, v4
8975
- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
8973
+ ; GCN2-NEXT: v_subrev_u32_e32 v3, vcc, 1, v4
8976
8974
; GCN2-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
8977
8975
; GCN2-NEXT: s_or_b64 vcc, vcc, s[4:5]
8978
8976
; GCN2-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -8996,9 +8994,8 @@ define void @flat_atomic_udec_wrap_i32_noret(ptr %ptr, i32 %in) {
8996
8994
; GCN3-NEXT: .LBB141_1: ; %atomicrmw.start
8997
8995
; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
8998
8996
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8999
- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v4
8997
+ ; GCN3-NEXT: v_subrev_co_u32_e32 v3, vcc, 1 , v4
9000
8998
; GCN3-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9001
- ; GCN3-NEXT: v_add_u32_e32 v3, -1, v4
9002
8999
; GCN3-NEXT: s_or_b64 vcc, vcc, s[4:5]
9003
9000
; GCN3-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
9004
9001
; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
@@ -9027,8 +9024,7 @@ define void @flat_atomic_udec_wrap_i32_noret_offset(ptr %out, i32 %in) {
9027
9024
; GCN1-NEXT: .LBB142_1: ; %atomicrmw.start
9028
9025
; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
9029
9026
; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9030
- ; GCN1-NEXT: v_add_i32_e32 v3, vcc, -1, v4
9031
- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
9027
+ ; GCN1-NEXT: v_subrev_i32_e32 v3, vcc, 1, v4
9032
9028
; GCN1-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9033
9029
; GCN1-NEXT: s_or_b64 vcc, vcc, s[4:5]
9034
9030
; GCN1-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -9054,8 +9050,7 @@ define void @flat_atomic_udec_wrap_i32_noret_offset(ptr %out, i32 %in) {
9054
9050
; GCN2-NEXT: .LBB142_1: ; %atomicrmw.start
9055
9051
; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
9056
9052
; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9057
- ; GCN2-NEXT: v_add_u32_e32 v3, vcc, -1, v4
9058
- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
9053
+ ; GCN2-NEXT: v_subrev_u32_e32 v3, vcc, 1, v4
9059
9054
; GCN2-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9060
9055
; GCN2-NEXT: s_or_b64 vcc, vcc, s[4:5]
9061
9056
; GCN2-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -9079,9 +9074,8 @@ define void @flat_atomic_udec_wrap_i32_noret_offset(ptr %out, i32 %in) {
9079
9074
; GCN3-NEXT: .LBB142_1: ; %atomicrmw.start
9080
9075
; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
9081
9076
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9082
- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v4
9077
+ ; GCN3-NEXT: v_subrev_co_u32_e32 v3, vcc, 1 , v4
9083
9078
; GCN3-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9084
- ; GCN3-NEXT: v_add_u32_e32 v3, -1, v4
9085
9079
; GCN3-NEXT: s_or_b64 vcc, vcc, s[4:5]
9086
9080
; GCN3-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
9087
9081
; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc
@@ -9110,8 +9104,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret(ptr %ptr, i32 %in) {
9110
9104
; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
9111
9105
; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9112
9106
; GCN1-NEXT: v_mov_b32_e32 v4, v3
9113
- ; GCN1-NEXT: v_add_i32_e32 v3, vcc, -1, v4
9114
- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
9107
+ ; GCN1-NEXT: v_subrev_i32_e32 v3, vcc, 1, v4
9115
9108
; GCN1-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9116
9109
; GCN1-NEXT: s_or_b64 vcc, vcc, s[4:5]
9117
9110
; GCN1-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -9136,8 +9129,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret(ptr %ptr, i32 %in) {
9136
9129
; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
9137
9130
; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9138
9131
; GCN2-NEXT: v_mov_b32_e32 v4, v3
9139
- ; GCN2-NEXT: v_add_u32_e32 v3, vcc, -1, v4
9140
- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
9132
+ ; GCN2-NEXT: v_subrev_u32_e32 v3, vcc, 1, v4
9141
9133
; GCN2-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9142
9134
; GCN2-NEXT: s_or_b64 vcc, vcc, s[4:5]
9143
9135
; GCN2-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -9162,9 +9154,8 @@ define i32 @flat_atomic_udec_wrap_i32_ret(ptr %ptr, i32 %in) {
9162
9154
; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
9163
9155
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9164
9156
; GCN3-NEXT: v_mov_b32_e32 v4, v3
9165
- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v4
9157
+ ; GCN3-NEXT: v_subrev_co_u32_e32 v3, vcc, 1 , v4
9166
9158
; GCN3-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9167
- ; GCN3-NEXT: v_add_u32_e32 v3, -1, v4
9168
9159
; GCN3-NEXT: s_or_b64 vcc, vcc, s[4:5]
9169
9160
; GCN3-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
9170
9161
; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
@@ -9194,8 +9185,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret_offset(ptr %out, i32 %in) {
9194
9185
; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
9195
9186
; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9196
9187
; GCN1-NEXT: v_mov_b32_e32 v1, v0
9197
- ; GCN1-NEXT: v_add_i32_e32 v0, vcc, -1, v1
9198
- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
9188
+ ; GCN1-NEXT: v_subrev_i32_e32 v0, vcc, 1, v1
9199
9189
; GCN1-NEXT: v_cmp_gt_u32_e64 s[4:5], v1, v2
9200
9190
; GCN1-NEXT: s_or_b64 vcc, vcc, s[4:5]
9201
9191
; GCN1-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
@@ -9221,8 +9211,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret_offset(ptr %out, i32 %in) {
9221
9211
; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
9222
9212
; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9223
9213
; GCN2-NEXT: v_mov_b32_e32 v1, v0
9224
- ; GCN2-NEXT: v_add_u32_e32 v0, vcc, -1, v1
9225
- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
9214
+ ; GCN2-NEXT: v_subrev_u32_e32 v0, vcc, 1, v1
9226
9215
; GCN2-NEXT: v_cmp_gt_u32_e64 s[4:5], v1, v2
9227
9216
; GCN2-NEXT: s_or_b64 vcc, vcc, s[4:5]
9228
9217
; GCN2-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
@@ -9246,9 +9235,8 @@ define i32 @flat_atomic_udec_wrap_i32_ret_offset(ptr %out, i32 %in) {
9246
9235
; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
9247
9236
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9248
9237
; GCN3-NEXT: v_mov_b32_e32 v4, v3
9249
- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v4
9238
+ ; GCN3-NEXT: v_subrev_co_u32_e32 v3, vcc, 1 , v4
9250
9239
; GCN3-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9251
- ; GCN3-NEXT: v_add_u32_e32 v3, -1, v4
9252
9240
; GCN3-NEXT: s_or_b64 vcc, vcc, s[4:5]
9253
9241
; GCN3-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
9254
9242
; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc
@@ -9279,8 +9267,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_scalar(ptr inreg %ptr, i
9279
9267
; GCN1-NEXT: .LBB145_1: ; %atomicrmw.start
9280
9268
; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
9281
9269
; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9282
- ; GCN1-NEXT: v_add_i32_e32 v2, vcc, -1, v3
9283
- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
9270
+ ; GCN1-NEXT: v_subrev_i32_e32 v2, vcc, 1, v3
9284
9271
; GCN1-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
9285
9272
; GCN1-NEXT: s_or_b64 vcc, vcc, s[34:35]
9286
9273
; GCN1-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -9307,8 +9294,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_scalar(ptr inreg %ptr, i
9307
9294
; GCN2-NEXT: .LBB145_1: ; %atomicrmw.start
9308
9295
; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
9309
9296
; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9310
- ; GCN2-NEXT: v_add_u32_e32 v2, vcc, -1, v3
9311
- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
9297
+ ; GCN2-NEXT: v_subrev_u32_e32 v2, vcc, 1, v3
9312
9298
; GCN2-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
9313
9299
; GCN2-NEXT: s_or_b64 vcc, vcc, s[34:35]
9314
9300
; GCN2-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -9335,9 +9321,8 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_scalar(ptr inreg %ptr, i
9335
9321
; GCN3-NEXT: .LBB145_1: ; %atomicrmw.start
9336
9322
; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
9337
9323
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9338
- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v3
9324
+ ; GCN3-NEXT: v_subrev_co_u32_e32 v2, vcc, 1 , v3
9339
9325
; GCN3-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
9340
- ; GCN3-NEXT: v_add_u32_e32 v2, -1, v3
9341
9326
; GCN3-NEXT: s_or_b64 vcc, vcc, s[34:35]
9342
9327
; GCN3-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
9343
9328
; GCN3-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
@@ -9369,8 +9354,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_offset_scalar(ptr inreg
9369
9354
; GCN1-NEXT: .LBB146_1: ; %atomicrmw.start
9370
9355
; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
9371
9356
; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9372
- ; GCN1-NEXT: v_add_i32_e32 v2, vcc, -1, v3
9373
- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
9357
+ ; GCN1-NEXT: v_subrev_i32_e32 v2, vcc, 1, v3
9374
9358
; GCN1-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
9375
9359
; GCN1-NEXT: s_or_b64 vcc, vcc, s[34:35]
9376
9360
; GCN1-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -9399,8 +9383,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_offset_scalar(ptr inreg
9399
9383
; GCN2-NEXT: .LBB146_1: ; %atomicrmw.start
9400
9384
; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
9401
9385
; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9402
- ; GCN2-NEXT: v_add_u32_e32 v2, vcc, -1, v3
9403
- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
9386
+ ; GCN2-NEXT: v_subrev_u32_e32 v2, vcc, 1, v3
9404
9387
; GCN2-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
9405
9388
; GCN2-NEXT: s_or_b64 vcc, vcc, s[34:35]
9406
9389
; GCN2-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -9427,9 +9410,8 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_offset_scalar(ptr inreg
9427
9410
; GCN3-NEXT: .LBB146_1: ; %atomicrmw.start
9428
9411
; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
9429
9412
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9430
- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v3
9413
+ ; GCN3-NEXT: v_subrev_co_u32_e32 v2, vcc, 1 , v3
9431
9414
; GCN3-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
9432
- ; GCN3-NEXT: v_add_u32_e32 v2, -1, v3
9433
9415
; GCN3-NEXT: s_or_b64 vcc, vcc, s[34:35]
9434
9416
; GCN3-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
9435
9417
; GCN3-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc
@@ -9463,8 +9445,7 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_scalar(ptr inreg %ptr, i32
9463
9445
; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
9464
9446
; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9465
9447
; GCN1-NEXT: v_mov_b32_e32 v5, v0
9466
- ; GCN1-NEXT: v_add_i32_e32 v0, vcc, -1, v5
9467
- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
9448
+ ; GCN1-NEXT: v_subrev_i32_e32 v0, vcc, 1, v5
9468
9449
; GCN1-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
9469
9450
; GCN1-NEXT: s_or_b64 vcc, vcc, s[34:35]
9470
9451
; GCN1-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
@@ -9493,8 +9474,7 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_scalar(ptr inreg %ptr, i32
9493
9474
; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
9494
9475
; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9495
9476
; GCN2-NEXT: v_mov_b32_e32 v5, v0
9496
- ; GCN2-NEXT: v_add_u32_e32 v0, vcc, -1, v5
9497
- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
9477
+ ; GCN2-NEXT: v_subrev_u32_e32 v0, vcc, 1, v5
9498
9478
; GCN2-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
9499
9479
; GCN2-NEXT: s_or_b64 vcc, vcc, s[34:35]
9500
9480
; GCN2-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
@@ -9523,9 +9503,8 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_scalar(ptr inreg %ptr, i32
9523
9503
; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
9524
9504
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9525
9505
; GCN3-NEXT: v_mov_b32_e32 v5, v0
9526
- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v5
9506
+ ; GCN3-NEXT: v_subrev_co_u32_e32 v0, vcc, 1 , v5
9527
9507
; GCN3-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
9528
- ; GCN3-NEXT: v_add_u32_e32 v0, -1, v5
9529
9508
; GCN3-NEXT: s_or_b64 vcc, vcc, s[34:35]
9530
9509
; GCN3-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
9531
9510
; GCN3-NEXT: flat_atomic_cmpswap v0, v[1:2], v[4:5] glc
@@ -9557,8 +9536,7 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_offset_scalar(ptr inreg %ou
9557
9536
; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
9558
9537
; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9559
9538
; GCN1-NEXT: v_mov_b32_e32 v5, v0
9560
- ; GCN1-NEXT: v_add_i32_e32 v0, vcc, -1, v5
9561
- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
9539
+ ; GCN1-NEXT: v_subrev_i32_e32 v0, vcc, 1, v5
9562
9540
; GCN1-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
9563
9541
; GCN1-NEXT: s_or_b64 vcc, vcc, s[34:35]
9564
9542
; GCN1-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
@@ -9587,8 +9565,7 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_offset_scalar(ptr inreg %ou
9587
9565
; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
9588
9566
; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9589
9567
; GCN2-NEXT: v_mov_b32_e32 v5, v0
9590
- ; GCN2-NEXT: v_add_u32_e32 v0, vcc, -1, v5
9591
- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
9568
+ ; GCN2-NEXT: v_subrev_u32_e32 v0, vcc, 1, v5
9592
9569
; GCN2-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
9593
9570
; GCN2-NEXT: s_or_b64 vcc, vcc, s[34:35]
9594
9571
; GCN2-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
@@ -9617,9 +9594,8 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_offset_scalar(ptr inreg %ou
9617
9594
; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
9618
9595
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9619
9596
; GCN3-NEXT: v_mov_b32_e32 v5, v0
9620
- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v5
9597
+ ; GCN3-NEXT: v_subrev_co_u32_e32 v0, vcc, 1 , v5
9621
9598
; GCN3-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
9622
- ; GCN3-NEXT: v_add_u32_e32 v0, -1, v5
9623
9599
; GCN3-NEXT: s_or_b64 vcc, vcc, s[34:35]
9624
9600
; GCN3-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
9625
9601
; GCN3-NEXT: flat_atomic_cmpswap v0, v[1:2], v[4:5] offset:16 glc
0 commit comments