@@ -75,10 +75,10 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
7575; GCN-O0-NEXT: s_waitcnt expcnt(0)
7676; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
7777; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
78- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
79- ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
78+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
8079; GCN-O0-NEXT: v_readlane_b32 s4, v0, 0
8180; GCN-O0-NEXT: v_readlane_b32 s5, v0, 1
81+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
8282; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
8383; GCN-O0-NEXT: s_mov_b32 s0, 0
8484; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
@@ -104,15 +104,16 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
104104; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
105105; GCN-O0-NEXT: s_cbranch_execz .LBB0_3
106106; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then
107- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
108107; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
109108; GCN-O0-NEXT: s_waitcnt expcnt(0)
110109; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
111110; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
112111; GCN-O0-NEXT: s_waitcnt vmcnt(0)
113112; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
114113; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
114+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
115115; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
116+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
116117; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
117118; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
118119; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -248,10 +249,10 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
248249; GCN-O0-NEXT: s_waitcnt expcnt(0)
249250; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
250251; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
251- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
252- ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
252+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
253253; GCN-O0-NEXT: v_readlane_b32 s4, v0, 0
254254; GCN-O0-NEXT: v_readlane_b32 s5, v0, 1
255+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
255256; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
256257; GCN-O0-NEXT: s_mov_b32 s0, 0
257258; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
@@ -277,15 +278,16 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
277278; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
278279; GCN-O0-NEXT: s_cbranch_execz .LBB1_4
279280; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then
280- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
281281; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
282282; GCN-O0-NEXT: s_waitcnt expcnt(0)
283283; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
284284; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
285285; GCN-O0-NEXT: s_waitcnt vmcnt(0)
286286; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
287287; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
288+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
288289; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
290+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
289291; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
290292; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
291293; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -311,7 +313,6 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
311313; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
312314; GCN-O0-NEXT: s_branch .LBB1_5
313315; GCN-O0-NEXT: .LBB1_4: ; %bb.inner.end
314- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
315316; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
316317; GCN-O0-NEXT: s_waitcnt expcnt(0)
317318; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -322,7 +323,9 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
322323; GCN-O0-NEXT: s_or_b64 exec, exec, s[2:3]
323324; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
324325; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
326+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
325327; GCN-O0-NEXT: v_mov_b32_e32 v0, 2
328+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
326329; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
327330; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
328331; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -508,15 +511,16 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
508511; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1]
509512; GCN-O0-NEXT: s_cbranch_execz .LBB2_5
510513; GCN-O0-NEXT: ; %bb.3: ; %bb.then
511- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
512514; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
513515; GCN-O0-NEXT: s_waitcnt expcnt(0)
514516; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
515517; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
516518; GCN-O0-NEXT: s_waitcnt vmcnt(0)
517519; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
518520; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
521+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
519522; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
523+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
520524; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
521525; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
522526; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -532,15 +536,16 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
532536; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
533537; GCN-O0-NEXT: s_branch .LBB2_5
534538; GCN-O0-NEXT: .LBB2_4: ; %bb.else
535- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
536539; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
537540; GCN-O0-NEXT: s_waitcnt expcnt(0)
538541; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
539542; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
540543; GCN-O0-NEXT: s_waitcnt vmcnt(0)
541544; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
542545; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
546+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
543547; GCN-O0-NEXT: v_mov_b32_e32 v0, 2
548+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
544549; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
545550; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
546551; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -953,20 +958,21 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
953958; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
954959; GCN-O0-NEXT: s_cbranch_execz .LBB4_2
955960; GCN-O0-NEXT: ; %bb.1: ; %bb.then
956- ; GCN-O0-NEXT: s_waitcnt expcnt(0)
957- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
958961; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
959962; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
960963; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
961964; GCN-O0-NEXT: s_waitcnt vmcnt(0)
962965; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0
963966; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1
967+ ; GCN-O0-NEXT: s_waitcnt expcnt(0)
968+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
964969; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
965970; GCN-O0-NEXT: s_mov_b32 s4, 0
966971; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
967972; GCN-O0-NEXT: s_mov_b32 s5, s2
968973; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
969974; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5]
975+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
970976; GCN-O0-NEXT: v_ashrrev_i32_e64 v2, 31, v0
971977; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
972978; GCN-O0-NEXT: v_mov_b32_e32 v1, v2
@@ -1102,14 +1108,14 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
11021108; GCN-O0-NEXT: s_waitcnt expcnt(0)
11031109; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
11041110; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
1105- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1106- ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
1111+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
11071112; GCN-O0-NEXT: v_readlane_b32 s8, v0, 2
11081113; GCN-O0-NEXT: v_readlane_b32 s9, v0, 3
11091114; GCN-O0-NEXT: v_readlane_b32 s6, v0, 0
11101115; GCN-O0-NEXT: v_readlane_b32 s7, v0, 1
11111116; GCN-O0-NEXT: v_writelane_b32 v0, s6, 4
11121117; GCN-O0-NEXT: v_writelane_b32 v0, s7, 5
1118+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
11131119; GCN-O0-NEXT: s_mov_b32 s4, 0x207
11141120; GCN-O0-NEXT: s_waitcnt vmcnt(0)
11151121; GCN-O0-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, s4
@@ -1132,11 +1138,11 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
11321138; GCN-O0-NEXT: s_waitcnt expcnt(0)
11331139; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
11341140; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
1135- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1136- ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
1141+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
11371142; GCN-O0-NEXT: v_readlane_b32 s4, v0, 6
11381143; GCN-O0-NEXT: v_readlane_b32 s5, v0, 7
11391144; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
1145+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
11401146; GCN-O0-NEXT: s_mov_b32 s6, 0
11411147; GCN-O0-NEXT: s_waitcnt vmcnt(0)
11421148; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], v1, s6
@@ -1226,18 +1232,20 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
12261232; GCN-O0-NEXT: s_branch .LBB5_6
12271233; GCN-O0-NEXT: .LBB5_5: ; %Flow2
12281234; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
1229- ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1230- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
1231- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
1232- ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
1233- ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
12341235; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
1236+ ; GCN-O0-NEXT: s_waitcnt expcnt(1)
12351237; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
12361238; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
12371239; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12381240; GCN-O0-NEXT: v_readlane_b32 s4, v4, 10
12391241; GCN-O0-NEXT: v_readlane_b32 s5, v4, 11
12401242; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
1243+ ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1244+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
1245+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
1246+ ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
1247+ ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1248+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12411249; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
12421250; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12431251; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
@@ -1246,18 +1254,20 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
12461254; GCN-O0-NEXT: s_branch .LBB5_7
12471255; GCN-O0-NEXT: .LBB5_6: ; %Flow
12481256; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
1249- ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1250- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1251- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1252- ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1253- ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
12541257; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
1258+ ; GCN-O0-NEXT: s_waitcnt expcnt(1)
12551259; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
12561260; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
12571261; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12581262; GCN-O0-NEXT: v_readlane_b32 s4, v4, 12
12591263; GCN-O0-NEXT: v_readlane_b32 s5, v4, 13
12601264; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
1265+ ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1266+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1267+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1268+ ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1269+ ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1270+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12611271; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
12621272; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12631273; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
@@ -1301,11 +1311,6 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
13011311; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
13021312; GCN-O0-NEXT: .LBB5_9: ; %Flow3
13031313; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
1304- ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1305- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1306- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1307- ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1308- ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
13091314; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
13101315; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
13111316; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
@@ -1317,6 +1322,11 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
13171322; GCN-O0-NEXT: v_readlane_b32 s7, v4, 5
13181323; GCN-O0-NEXT: v_readlane_b32 s4, v4, 14
13191324; GCN-O0-NEXT: v_readlane_b32 s5, v4, 15
1325+ ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1326+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1327+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1328+ ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1329+ ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
13201330; GCN-O0-NEXT: s_and_b64 s[4:5], exec, s[4:5]
13211331; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
13221332; GCN-O0-NEXT: s_mov_b64 s[6:7], 0
@@ -1331,6 +1341,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
13311341; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
13321342; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill
13331343; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
1344+ ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
13341345; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
13351346; GCN-O0-NEXT: s_waitcnt vmcnt(0)
13361347; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
0 commit comments