@@ -67,21 +67,20 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
6767; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
6868; GCN-O0-NEXT: s_cbranch_execz .LBB0_4
6969; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then
70+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
7071; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
7172; GCN-O0-NEXT: s_waitcnt expcnt(0)
7273; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
7374; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
7475; GCN-O0-NEXT: s_waitcnt vmcnt(0)
7576; GCN-O0-NEXT: v_readlane_b32 s4, v4, 0
7677; GCN-O0-NEXT: v_readlane_b32 s5, v4, 1
77- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
7878; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
7979; GCN-O0-NEXT: s_mov_b32 s0, 0
8080; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
8181; GCN-O0-NEXT: s_mov_b32 s1, s2
8282; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
8383; GCN-O0-NEXT: s_mov_b64 s[6:7], s[0:1]
84- ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
8584; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v0
8685; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
8786; GCN-O0-NEXT: v_mov_b32_e32 v2, v3
@@ -100,16 +99,16 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
10099; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
101100; GCN-O0-NEXT: s_cbranch_execz .LBB0_3
102101; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then
102+ ; GCN-O0-NEXT: s_waitcnt expcnt(1)
103+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
103104; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
104105; GCN-O0-NEXT: s_waitcnt expcnt(0)
105106; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
106107; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
107108; GCN-O0-NEXT: s_waitcnt vmcnt(0)
108109; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0
109110; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1
110- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
111111; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
112- ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
113112; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
114113; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
115114; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -236,21 +235,20 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
236235; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
237236; GCN-O0-NEXT: s_cbranch_execz .LBB1_3
238237; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then
238+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
239239; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
240240; GCN-O0-NEXT: s_waitcnt expcnt(0)
241241; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
242242; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
243243; GCN-O0-NEXT: s_waitcnt vmcnt(0)
244244; GCN-O0-NEXT: v_readlane_b32 s4, v4, 0
245245; GCN-O0-NEXT: v_readlane_b32 s5, v4, 1
246- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
247246; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
248247; GCN-O0-NEXT: s_mov_b32 s0, 0
249248; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
250249; GCN-O0-NEXT: s_mov_b32 s1, s2
251250; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
252251; GCN-O0-NEXT: s_mov_b64 s[6:7], s[0:1]
253- ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
254252; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v0
255253; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
256254; GCN-O0-NEXT: v_mov_b32_e32 v2, v3
@@ -269,16 +267,16 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
269267; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
270268; GCN-O0-NEXT: s_cbranch_execz .LBB1_4
271269; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then
270+ ; GCN-O0-NEXT: s_waitcnt expcnt(1)
271+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
272272; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
273273; GCN-O0-NEXT: s_waitcnt expcnt(0)
274274; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
275275; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
276276; GCN-O0-NEXT: s_waitcnt vmcnt(0)
277277; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0
278278; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1
279- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
280279; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
281- ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
282280; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
283281; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
284282; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -312,9 +310,9 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
312310; GCN-O0-NEXT: v_readlane_b32 s2, v4, 4
313311; GCN-O0-NEXT: v_readlane_b32 s3, v4, 5
314312; GCN-O0-NEXT: s_or_b64 exec, exec, s[2:3]
313+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
315314; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0
316315; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1
317- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
318316; GCN-O0-NEXT: v_mov_b32_e32 v0, 2
319317; GCN-O0-NEXT: s_waitcnt vmcnt(0)
320318; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
@@ -456,17 +454,18 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
456454; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
457455; GCN-O0-NEXT: s_cbranch_execz .LBB2_6
458456; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then
457+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
459458; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
460459; GCN-O0-NEXT: s_waitcnt expcnt(0)
461460; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
462461; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
463- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
464462; GCN-O0-NEXT: s_mov_b32 s0, 2
465- ; GCN-O0-NEXT: s_waitcnt vmcnt(0 )
463+ ; GCN-O0-NEXT: s_waitcnt vmcnt(1 )
466464; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[0:1], v0, s0
467465; GCN-O0-NEXT: s_mov_b64 s[2:3], exec
468466; GCN-O0-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
469467; GCN-O0-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3]
468+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
470469; GCN-O0-NEXT: v_writelane_b32 v4, s2, 4
471470; GCN-O0-NEXT: v_writelane_b32 v4, s3, 5
472471; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
@@ -493,16 +492,15 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
493492; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1]
494493; GCN-O0-NEXT: s_cbranch_execz .LBB2_5
495494; GCN-O0-NEXT: ; %bb.3: ; %bb.then
495+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
496496; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
497497; GCN-O0-NEXT: s_waitcnt expcnt(0)
498498; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
499499; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
500500; GCN-O0-NEXT: s_waitcnt vmcnt(0)
501501; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0
502502; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1
503- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
504503; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
505- ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
506504; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
507505; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
508506; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -518,16 +516,15 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
518516; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
519517; GCN-O0-NEXT: s_branch .LBB2_5
520518; GCN-O0-NEXT: .LBB2_4: ; %bb.else
519+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
521520; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
522521; GCN-O0-NEXT: s_waitcnt expcnt(0)
523522; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
524523; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
525524; GCN-O0-NEXT: s_waitcnt vmcnt(0)
526525; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0
527526; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1
528- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
529527; GCN-O0-NEXT: v_mov_b32_e32 v0, 2
530- ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
531528; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
532529; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
533530; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -724,13 +721,13 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
724721; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1]
725722; GCN-O0-NEXT: s_cbranch_execz .LBB3_8
726723; GCN-O0-NEXT: ; %bb.2: ; %bb.outer.then
724+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
725+ ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
726+ ; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
727727; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
728728; GCN-O0-NEXT: s_waitcnt expcnt(0)
729729; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
730730; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
731- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
732- ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
733- ; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
734731; GCN-O0-NEXT: s_mov_b32 s0, 0xf000
735732; GCN-O0-NEXT: s_mov_b32 s2, 0
736733; GCN-O0-NEXT: s_mov_b32 s4, s2
@@ -740,11 +737,12 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
740737; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
741738; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5]
742739; GCN-O0-NEXT: v_mov_b32_e32 v1, 1
743- ; GCN-O0-NEXT: s_waitcnt vmcnt(0 )
740+ ; GCN-O0-NEXT: s_waitcnt vmcnt(1 )
744741; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[0:3], 0 addr64 offset:4
745742; GCN-O0-NEXT: s_mov_b32 s0, 2
746743; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0
747744; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
745+ ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
748746; GCN-O0-NEXT: v_writelane_b32 v6, s0, 4
749747; GCN-O0-NEXT: v_writelane_b32 v6, s1, 5
750748; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
@@ -770,13 +768,13 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
770768; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:8
771769; GCN-O0-NEXT: s_branch .LBB3_7
772770; GCN-O0-NEXT: .LBB3_4: ; %bb.outer.else
771+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
772+ ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
773+ ; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
773774; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
774775; GCN-O0-NEXT: s_waitcnt expcnt(0)
775776; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
776777; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
777- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
778- ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
779- ; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
780778; GCN-O0-NEXT: s_mov_b32 s1, 0xf000
781779; GCN-O0-NEXT: s_mov_b32 s0, 0
782780; GCN-O0-NEXT: s_mov_b32 s2, s0
@@ -786,10 +784,11 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
786784; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
787785; GCN-O0-NEXT: s_mov_b64 s[6:7], s[2:3]
788786; GCN-O0-NEXT: v_mov_b32_e32 v1, 3
789- ; GCN-O0-NEXT: s_waitcnt vmcnt(0 )
787+ ; GCN-O0-NEXT: s_waitcnt vmcnt(1 )
790788; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[4:7], 0 addr64 offset:12
791789; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0
792790; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
791+ ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
793792; GCN-O0-NEXT: v_writelane_b32 v6, s0, 6
794793; GCN-O0-NEXT: v_writelane_b32 v6, s1, 7
795794; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
@@ -927,21 +926,20 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
927926; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
928927; GCN-O0-NEXT: s_cbranch_execz .LBB4_2
929928; GCN-O0-NEXT: ; %bb.1: ; %bb.then
929+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
930930; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
931931; GCN-O0-NEXT: s_waitcnt expcnt(0)
932932; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 ; 4-byte Folded Reload
933933; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
934934; GCN-O0-NEXT: s_waitcnt vmcnt(0)
935935; GCN-O0-NEXT: v_readlane_b32 s0, v3, 0
936936; GCN-O0-NEXT: v_readlane_b32 s1, v3, 1
937- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
938937; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
939938; GCN-O0-NEXT: s_mov_b32 s4, 0
940939; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
941940; GCN-O0-NEXT: s_mov_b32 s5, s2
942941; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
943942; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5]
944- ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
945943; GCN-O0-NEXT: v_ashrrev_i32_e64 v2, 31, v0
946944; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
947945; GCN-O0-NEXT: v_mov_b32_e32 v1, v2
@@ -1066,6 +1064,8 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
10661064; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
10671065; GCN-O0-NEXT: .LBB5_1: ; %bb1
10681066; GCN-O0-NEXT: ; =>This Inner Loop Header: Depth=1
1067+ ; GCN-O0-NEXT: s_waitcnt expcnt(1)
1068+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
10691069; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
10701070; GCN-O0-NEXT: s_waitcnt expcnt(0)
10711071; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
@@ -1077,9 +1077,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
10771077; GCN-O0-NEXT: v_readlane_b32 s7, v6, 1
10781078; GCN-O0-NEXT: v_writelane_b32 v6, s6, 4
10791079; GCN-O0-NEXT: v_writelane_b32 v6, s7, 5
1080- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
10811080; GCN-O0-NEXT: s_mov_b32 s4, 0x207
1082- ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
10831081; GCN-O0-NEXT: v_cmp_lt_i32_e64 s[4:5], v0, s4
10841082; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
10851083; GCN-O0-NEXT: v_writelane_b32 v6, s4, 6
@@ -1279,14 +1277,14 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
12791277; GCN-O0-NEXT: v_readlane_b32 s8, v6, 16
12801278; GCN-O0-NEXT: v_readlane_b32 s9, v6, 17
12811279; GCN-O0-NEXT: s_or_b64 exec, exec, s[8:9]
1282- ; GCN-O0-NEXT: v_readlane_b32 s6, v6, 4
1283- ; GCN-O0-NEXT: v_readlane_b32 s7, v6, 5
1284- ; GCN-O0-NEXT: v_readlane_b32 s4, v6, 14
1285- ; GCN-O0-NEXT: v_readlane_b32 s5, v6, 15
12861280; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
12871281; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
12881282; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
12891283; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
1284+ ; GCN-O0-NEXT: v_readlane_b32 s6, v6, 4
1285+ ; GCN-O0-NEXT: v_readlane_b32 s7, v6, 5
1286+ ; GCN-O0-NEXT: v_readlane_b32 s4, v6, 14
1287+ ; GCN-O0-NEXT: v_readlane_b32 s5, v6, 15
12901288; GCN-O0-NEXT: s_and_b64 s[4:5], exec, s[4:5]
12911289; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
12921290; GCN-O0-NEXT: s_mov_b64 s[6:7], 0
0 commit comments