44define amdgpu_kernel void @copy_to_vreg_1 (i32 %0 ) {
55; GCN-LABEL: copy_to_vreg_1:
66; GCN: ; %bb.0: ; %._crit_edge
7- ; GCN-NEXT: s_load_dword s4, s[4:5], 0x24
7+ ; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
88; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
99; GCN-NEXT: v_mov_b64_e32 v[2:3], 0
1010; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -29,7 +29,7 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) {
2929; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1
3030; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
3131; GCN-NEXT: s_xor_b64 s[8:9], exec, -1
32- ; GCN-NEXT: .LBB0_2: ; %Flow2
32+ ; GCN-NEXT: .LBB0_2: ; %Flow3
3333; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1
3434; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
3535; GCN-NEXT: s_and_b64 s[4:5], exec, s[8:9]
@@ -49,7 +49,7 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) {
4949; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1
5050; GCN-NEXT: s_or_b64 s[6:7], s[6:7], exec
5151; GCN-NEXT: global_store_byte v[2:3], v1, off
52- ; GCN-NEXT: .LBB0_5: ; %Flow1
52+ ; GCN-NEXT: .LBB0_5: ; %Flow2
5353; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1
5454; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
5555; GCN-NEXT: s_mov_b64 s[8:9], -1
@@ -67,28 +67,28 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) {
6767; GCN-NEXT: .LBB0_8: ; %DummyReturnBlock
6868; GCN-NEXT: s_endpgm
6969._crit_edge:
70- %1 = tail call i32 @llvm.amdgcn.workitem.id.x ()
71- %2 = udiv i32 1 , %0
70+ %id.x = tail call i32 @llvm.amdgcn.workitem.id.x ()
71+ %div = udiv i32 1 , %0
7272 br label %.lr.ph27
7373
7474.lr.ph27: ; preds = %pred.store.if41, %pred.store.continue, %._crit_edge
75- %3 = phi i32 [ %2 , %._crit_edge ], [ 0 , %pred.store.if41 ], [ 0 , %pred.store.continue ]
76- %4 = icmp ugt i32 %3 , 0
77- %broadcast.splatinsert37 = insertelement <4 x i1 > zeroinitializer , i1 %4 , i64 0
78- %.zext = zext i32 %1 to i64
75+ %iv = phi i32 [ %div , %._crit_edge ], [ 0 , %pred.store.if41 ], [ 0 , %pred.store.continue ]
76+ %cmp = icmp ugt i32 %iv , 0
77+ %broadcast.splatinsert37 = insertelement <4 x i1 > zeroinitializer , i1 %cmp , i64 0
78+ %.zext = zext i32 %id.x to i64
7979 %broadcast.splatinsert39 = insertelement <4 x i64 > zeroinitializer , i64 %.zext , i64 0
80- %5 = icmp uge <4 x i64 > %broadcast.splatinsert39 , splat (i64 1 )
81- %6 = or <4 x i1 > %5 , %broadcast.splatinsert37
82- %7 = extractelement <4 x i1 > %6 , i64 0
83- br i1 %7 , label %pred.store.if , label %pred.store.continue
80+ %cmp.1 = icmp uge <4 x i64 > %broadcast.splatinsert39 , splat (i64 1 )
81+ %or = or <4 x i1 > %cmp.1 , %broadcast.splatinsert37
82+ %extract = extractelement <4 x i1 > %or , i64 0
83+ br i1 %extract , label %pred.store.if , label %pred.store.continue
8484
8585pred.store .if: ; preds = %.lr.ph27
8686 store i8 0 , ptr addrspace (1 ) null , align 64
8787 br label %pred.store.continue
8888
8989pred.store .continue: ; preds = %pred.store.if, %.lr.ph27
90- %8 = extractelement <4 x i1 > %6 , i64 1
91- br i1 %8 , label %pred.store.if41 , label %.lr.ph27
90+ %extract.1 = extractelement <4 x i1 > %or , i64 1
91+ br i1 %extract.1 , label %pred.store.if41 , label %.lr.ph27
9292
9393pred.store .if41: ; preds = %pred.store.continue
9494 store i8 0 , ptr addrspace (1 ) null , align 64
0 commit comments