@@ -8946,8 +8946,7 @@ define void @flat_atomic_udec_wrap_i32_noret(ptr %ptr, i32 %in) {
89468946; GCN1-NEXT: .LBB141_1: ; %atomicrmw.start
89478947; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
89488948; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8949- ; GCN1-NEXT: v_add_i32_e32 v3, vcc, -1, v4
8950- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
8949+ ; GCN1-NEXT: v_subrev_i32_e32 v3, vcc, 1, v4
89518950; GCN1-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
89528951; GCN1-NEXT: s_or_b64 vcc, vcc, s[4:5]
89538952; GCN1-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -8971,8 +8970,7 @@ define void @flat_atomic_udec_wrap_i32_noret(ptr %ptr, i32 %in) {
89718970; GCN2-NEXT: .LBB141_1: ; %atomicrmw.start
89728971; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
89738972; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8974- ; GCN2-NEXT: v_add_u32_e32 v3, vcc, -1, v4
8975- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
8973+ ; GCN2-NEXT: v_subrev_u32_e32 v3, vcc, 1, v4
89768974; GCN2-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
89778975; GCN2-NEXT: s_or_b64 vcc, vcc, s[4:5]
89788976; GCN2-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -8996,9 +8994,8 @@ define void @flat_atomic_udec_wrap_i32_noret(ptr %ptr, i32 %in) {
89968994; GCN3-NEXT: .LBB141_1: ; %atomicrmw.start
89978995; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
89988996; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8999- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v4
8997+ ; GCN3-NEXT: v_subrev_co_u32_e32 v3, vcc, 1 , v4
90008998; GCN3-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9001- ; GCN3-NEXT: v_add_u32_e32 v3, -1, v4
90028999; GCN3-NEXT: s_or_b64 vcc, vcc, s[4:5]
90039000; GCN3-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
90049001; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
@@ -9027,8 +9024,7 @@ define void @flat_atomic_udec_wrap_i32_noret_offset(ptr %out, i32 %in) {
90279024; GCN1-NEXT: .LBB142_1: ; %atomicrmw.start
90289025; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
90299026; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9030- ; GCN1-NEXT: v_add_i32_e32 v3, vcc, -1, v4
9031- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
9027+ ; GCN1-NEXT: v_subrev_i32_e32 v3, vcc, 1, v4
90329028; GCN1-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
90339029; GCN1-NEXT: s_or_b64 vcc, vcc, s[4:5]
90349030; GCN1-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -9054,8 +9050,7 @@ define void @flat_atomic_udec_wrap_i32_noret_offset(ptr %out, i32 %in) {
90549050; GCN2-NEXT: .LBB142_1: ; %atomicrmw.start
90559051; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
90569052; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9057- ; GCN2-NEXT: v_add_u32_e32 v3, vcc, -1, v4
9058- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
9053+ ; GCN2-NEXT: v_subrev_u32_e32 v3, vcc, 1, v4
90599054; GCN2-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
90609055; GCN2-NEXT: s_or_b64 vcc, vcc, s[4:5]
90619056; GCN2-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -9079,9 +9074,8 @@ define void @flat_atomic_udec_wrap_i32_noret_offset(ptr %out, i32 %in) {
90799074; GCN3-NEXT: .LBB142_1: ; %atomicrmw.start
90809075; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
90819076; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9082- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v4
9077+ ; GCN3-NEXT: v_subrev_co_u32_e32 v3, vcc, 1 , v4
90839078; GCN3-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9084- ; GCN3-NEXT: v_add_u32_e32 v3, -1, v4
90859079; GCN3-NEXT: s_or_b64 vcc, vcc, s[4:5]
90869080; GCN3-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
90879081; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc
@@ -9110,8 +9104,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret(ptr %ptr, i32 %in) {
91109104; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
91119105; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
91129106; GCN1-NEXT: v_mov_b32_e32 v4, v3
9113- ; GCN1-NEXT: v_add_i32_e32 v3, vcc, -1, v4
9114- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
9107+ ; GCN1-NEXT: v_subrev_i32_e32 v3, vcc, 1, v4
91159108; GCN1-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
91169109; GCN1-NEXT: s_or_b64 vcc, vcc, s[4:5]
91179110; GCN1-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -9136,8 +9129,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret(ptr %ptr, i32 %in) {
91369129; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
91379130; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
91389131; GCN2-NEXT: v_mov_b32_e32 v4, v3
9139- ; GCN2-NEXT: v_add_u32_e32 v3, vcc, -1, v4
9140- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
9132+ ; GCN2-NEXT: v_subrev_u32_e32 v3, vcc, 1, v4
91419133; GCN2-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
91429134; GCN2-NEXT: s_or_b64 vcc, vcc, s[4:5]
91439135; GCN2-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
@@ -9162,9 +9154,8 @@ define i32 @flat_atomic_udec_wrap_i32_ret(ptr %ptr, i32 %in) {
91629154; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
91639155; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
91649156; GCN3-NEXT: v_mov_b32_e32 v4, v3
9165- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v4
9157+ ; GCN3-NEXT: v_subrev_co_u32_e32 v3, vcc, 1 , v4
91669158; GCN3-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9167- ; GCN3-NEXT: v_add_u32_e32 v3, -1, v4
91689159; GCN3-NEXT: s_or_b64 vcc, vcc, s[4:5]
91699160; GCN3-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
91709161; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
@@ -9194,8 +9185,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret_offset(ptr %out, i32 %in) {
91949185; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
91959186; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
91969187; GCN1-NEXT: v_mov_b32_e32 v1, v0
9197- ; GCN1-NEXT: v_add_i32_e32 v0, vcc, -1, v1
9198- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
9188+ ; GCN1-NEXT: v_subrev_i32_e32 v0, vcc, 1, v1
91999189; GCN1-NEXT: v_cmp_gt_u32_e64 s[4:5], v1, v2
92009190; GCN1-NEXT: s_or_b64 vcc, vcc, s[4:5]
92019191; GCN1-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
@@ -9221,8 +9211,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret_offset(ptr %out, i32 %in) {
92219211; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
92229212; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
92239213; GCN2-NEXT: v_mov_b32_e32 v1, v0
9224- ; GCN2-NEXT: v_add_u32_e32 v0, vcc, -1, v1
9225- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
9214+ ; GCN2-NEXT: v_subrev_u32_e32 v0, vcc, 1, v1
92269215; GCN2-NEXT: v_cmp_gt_u32_e64 s[4:5], v1, v2
92279216; GCN2-NEXT: s_or_b64 vcc, vcc, s[4:5]
92289217; GCN2-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
@@ -9246,9 +9235,8 @@ define i32 @flat_atomic_udec_wrap_i32_ret_offset(ptr %out, i32 %in) {
92469235; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
92479236; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
92489237; GCN3-NEXT: v_mov_b32_e32 v4, v3
9249- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v4
9238+ ; GCN3-NEXT: v_subrev_co_u32_e32 v3, vcc, 1 , v4
92509239; GCN3-NEXT: v_cmp_gt_u32_e64 s[4:5], v4, v2
9251- ; GCN3-NEXT: v_add_u32_e32 v3, -1, v4
92529240; GCN3-NEXT: s_or_b64 vcc, vcc, s[4:5]
92539241; GCN3-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc
92549242; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc
@@ -9279,8 +9267,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_scalar(ptr inreg %ptr, i
92799267; GCN1-NEXT: .LBB145_1: ; %atomicrmw.start
92809268; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
92819269; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9282- ; GCN1-NEXT: v_add_i32_e32 v2, vcc, -1, v3
9283- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
9270+ ; GCN1-NEXT: v_subrev_i32_e32 v2, vcc, 1, v3
92849271; GCN1-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
92859272; GCN1-NEXT: s_or_b64 vcc, vcc, s[34:35]
92869273; GCN1-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -9307,8 +9294,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_scalar(ptr inreg %ptr, i
93079294; GCN2-NEXT: .LBB145_1: ; %atomicrmw.start
93089295; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
93099296; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9310- ; GCN2-NEXT: v_add_u32_e32 v2, vcc, -1, v3
9311- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
9297+ ; GCN2-NEXT: v_subrev_u32_e32 v2, vcc, 1, v3
93129298; GCN2-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
93139299; GCN2-NEXT: s_or_b64 vcc, vcc, s[34:35]
93149300; GCN2-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -9335,9 +9321,8 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_scalar(ptr inreg %ptr, i
93359321; GCN3-NEXT: .LBB145_1: ; %atomicrmw.start
93369322; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
93379323; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9338- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v3
9324+ ; GCN3-NEXT: v_subrev_co_u32_e32 v2, vcc, 1 , v3
93399325; GCN3-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
9340- ; GCN3-NEXT: v_add_u32_e32 v2, -1, v3
93419326; GCN3-NEXT: s_or_b64 vcc, vcc, s[34:35]
93429327; GCN3-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
93439328; GCN3-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
@@ -9369,8 +9354,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_offset_scalar(ptr inreg
93699354; GCN1-NEXT: .LBB146_1: ; %atomicrmw.start
93709355; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
93719356; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9372- ; GCN1-NEXT: v_add_i32_e32 v2, vcc, -1, v3
9373- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
9357+ ; GCN1-NEXT: v_subrev_i32_e32 v2, vcc, 1, v3
93749358; GCN1-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
93759359; GCN1-NEXT: s_or_b64 vcc, vcc, s[34:35]
93769360; GCN1-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -9399,8 +9383,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_offset_scalar(ptr inreg
93999383; GCN2-NEXT: .LBB146_1: ; %atomicrmw.start
94009384; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
94019385; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9402- ; GCN2-NEXT: v_add_u32_e32 v2, vcc, -1, v3
9403- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
9386+ ; GCN2-NEXT: v_subrev_u32_e32 v2, vcc, 1, v3
94049387; GCN2-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
94059388; GCN2-NEXT: s_or_b64 vcc, vcc, s[34:35]
94069389; GCN2-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -9427,9 +9410,8 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_offset_scalar(ptr inreg
94279410; GCN3-NEXT: .LBB146_1: ; %atomicrmw.start
94289411; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
94299412; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9430- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v3
9413+ ; GCN3-NEXT: v_subrev_co_u32_e32 v2, vcc, 1 , v3
94319414; GCN3-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v3
9432- ; GCN3-NEXT: v_add_u32_e32 v2, -1, v3
94339415; GCN3-NEXT: s_or_b64 vcc, vcc, s[34:35]
94349416; GCN3-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
94359417; GCN3-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc
@@ -9463,8 +9445,7 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_scalar(ptr inreg %ptr, i32
94639445; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
94649446; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
94659447; GCN1-NEXT: v_mov_b32_e32 v5, v0
9466- ; GCN1-NEXT: v_add_i32_e32 v0, vcc, -1, v5
9467- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
9448+ ; GCN1-NEXT: v_subrev_i32_e32 v0, vcc, 1, v5
94689449; GCN1-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
94699450; GCN1-NEXT: s_or_b64 vcc, vcc, s[34:35]
94709451; GCN1-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
@@ -9493,8 +9474,7 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_scalar(ptr inreg %ptr, i32
94939474; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
94949475; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
94959476; GCN2-NEXT: v_mov_b32_e32 v5, v0
9496- ; GCN2-NEXT: v_add_u32_e32 v0, vcc, -1, v5
9497- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
9477+ ; GCN2-NEXT: v_subrev_u32_e32 v0, vcc, 1, v5
94989478; GCN2-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
94999479; GCN2-NEXT: s_or_b64 vcc, vcc, s[34:35]
95009480; GCN2-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
@@ -9523,9 +9503,8 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_scalar(ptr inreg %ptr, i32
95239503; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
95249504; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
95259505; GCN3-NEXT: v_mov_b32_e32 v5, v0
9526- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v5
9506+ ; GCN3-NEXT: v_subrev_co_u32_e32 v0, vcc, 1 , v5
95279507; GCN3-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
9528- ; GCN3-NEXT: v_add_u32_e32 v0, -1, v5
95299508; GCN3-NEXT: s_or_b64 vcc, vcc, s[34:35]
95309509; GCN3-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
95319510; GCN3-NEXT: flat_atomic_cmpswap v0, v[1:2], v[4:5] glc
@@ -9557,8 +9536,7 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_offset_scalar(ptr inreg %ou
95579536; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
95589537; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
95599538; GCN1-NEXT: v_mov_b32_e32 v5, v0
9560- ; GCN1-NEXT: v_add_i32_e32 v0, vcc, -1, v5
9561- ; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
9539+ ; GCN1-NEXT: v_subrev_i32_e32 v0, vcc, 1, v5
95629540; GCN1-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
95639541; GCN1-NEXT: s_or_b64 vcc, vcc, s[34:35]
95649542; GCN1-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
@@ -9587,8 +9565,7 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_offset_scalar(ptr inreg %ou
95879565; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
95889566; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
95899567; GCN2-NEXT: v_mov_b32_e32 v5, v0
9590- ; GCN2-NEXT: v_add_u32_e32 v0, vcc, -1, v5
9591- ; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
9568+ ; GCN2-NEXT: v_subrev_u32_e32 v0, vcc, 1, v5
95929569; GCN2-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
95939570; GCN2-NEXT: s_or_b64 vcc, vcc, s[34:35]
95949571; GCN2-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
@@ -9617,9 +9594,8 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_offset_scalar(ptr inreg %ou
96179594; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
96189595; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
96199596; GCN3-NEXT: v_mov_b32_e32 v5, v0
9620- ; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, 0 , v5
9597+ ; GCN3-NEXT: v_subrev_co_u32_e32 v0, vcc, 1 , v5
96219598; GCN3-NEXT: v_cmp_lt_u32_e64 s[34:35], s6, v5
9622- ; GCN3-NEXT: v_add_u32_e32 v0, -1, v5
96239599; GCN3-NEXT: s_or_b64 vcc, vcc, s[34:35]
96249600; GCN3-NEXT: v_cndmask_b32_e32 v4, v0, v3, vcc
96259601; GCN3-NEXT: flat_atomic_cmpswap v0, v[1:2], v[4:5] offset:16 glc
0 commit comments