@@ -7,17 +7,15 @@ define amdgpu_kernel void @break_inserted_outside_of_loop(ptr addrspace(1) %out,
77; SI: ; %bb.0: ; %main_body
88; SI-NEXT: s_load_dword s0, s[2:3], 0xb
99; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0
10+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
11+ ; SI-NEXT: v_and_b32_e32 v0, s0, v0
1012; SI-NEXT: v_and_b32_e32 v0, 1, v0
1113; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
12- ; SI-NEXT: s_waitcnt lgkmcnt(0)
13- ; SI-NEXT: s_bitcmp1_b32 s0, 0
14- ; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
15- ; SI-NEXT: s_and_b64 s[4:5], s[0:1], vcc
1614; SI-NEXT: s_mov_b64 s[0:1], 0
1715; SI-NEXT: .LBB0_1: ; %ENDIF
1816; SI-NEXT: ; =>This Inner Loop Header: Depth=1
19- ; SI-NEXT: s_and_b64 s[6:7 ], exec, s[4:5]
20- ; SI-NEXT: s_or_b64 s[0:1], s[6:7 ], s[0:1]
17+ ; SI-NEXT: s_and_b64 s[4:5 ], exec, vcc
18+ ; SI-NEXT: s_or_b64 s[0:1], s[4:5 ], s[0:1]
2119; SI-NEXT: s_andn2_b64 exec, exec, s[0:1]
2220; SI-NEXT: s_cbranch_execnz .LBB0_1
2321; SI-NEXT: ; %bb.2: ; %ENDLOOP
@@ -34,17 +32,15 @@ define amdgpu_kernel void @break_inserted_outside_of_loop(ptr addrspace(1) %out,
3432; FLAT: ; %bb.0: ; %main_body
3533; FLAT-NEXT: s_load_dword s0, s[2:3], 0x2c
3634; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0
35+ ; FLAT-NEXT: s_waitcnt lgkmcnt(0)
36+ ; FLAT-NEXT: v_and_b32_e32 v0, s0, v0
3737; FLAT-NEXT: v_and_b32_e32 v0, 1, v0
3838; FLAT-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
39- ; FLAT-NEXT: s_waitcnt lgkmcnt(0)
40- ; FLAT-NEXT: s_bitcmp1_b32 s0, 0
41- ; FLAT-NEXT: s_cselect_b64 s[0:1], -1, 0
42- ; FLAT-NEXT: s_and_b64 s[4:5], s[0:1], vcc
4339; FLAT-NEXT: s_mov_b64 s[0:1], 0
4440; FLAT-NEXT: .LBB0_1: ; %ENDIF
4541; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
46- ; FLAT-NEXT: s_and_b64 s[6:7 ], exec, s[4:5]
47- ; FLAT-NEXT: s_or_b64 s[0:1], s[6:7 ], s[0:1]
42+ ; FLAT-NEXT: s_and_b64 s[4:5 ], exec, vcc
43+ ; FLAT-NEXT: s_or_b64 s[0:1], s[4:5 ], s[0:1]
4844; FLAT-NEXT: s_andn2_b64 exec, exec, s[0:1]
4945; FLAT-NEXT: s_cbranch_execnz .LBB0_1
5046; FLAT-NEXT: ; %bb.2: ; %ENDLOOP
0 commit comments