Skip to content

Commit e25b0ef

Browse files
committed
Address comments and fix test
1 parent f3b9f33 commit e25b0ef

File tree

1 file changed

+15
-15
lines changed

1 file changed

+15
-15
lines changed

llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
define amdgpu_kernel void @copy_to_vreg_1(i32 %0) {
55
; GCN-LABEL: copy_to_vreg_1:
66
; GCN: ; %bb.0: ; %._crit_edge
7-
; GCN-NEXT: s_load_dword s4, s[4:5], 0x24
7+
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
88
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
99
; GCN-NEXT: v_mov_b64_e32 v[2:3], 0
1010
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -29,7 +29,7 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) {
2929
; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1
3030
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
3131
; GCN-NEXT: s_xor_b64 s[8:9], exec, -1
32-
; GCN-NEXT: .LBB0_2: ; %Flow2
32+
; GCN-NEXT: .LBB0_2: ; %Flow3
3333
; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1
3434
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
3535
; GCN-NEXT: s_and_b64 s[4:5], exec, s[8:9]
@@ -49,7 +49,7 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) {
4949
; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1
5050
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], exec
5151
; GCN-NEXT: global_store_byte v[2:3], v1, off
52-
; GCN-NEXT: .LBB0_5: ; %Flow1
52+
; GCN-NEXT: .LBB0_5: ; %Flow2
5353
; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1
5454
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
5555
; GCN-NEXT: s_mov_b64 s[8:9], -1
@@ -67,28 +67,28 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) {
6767
; GCN-NEXT: .LBB0_8: ; %DummyReturnBlock
6868
; GCN-NEXT: s_endpgm
6969
._crit_edge:
70-
%1 = tail call i32 @llvm.amdgcn.workitem.id.x()
71-
%2 = udiv i32 1, %0
70+
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
71+
%div = udiv i32 1, %0
7272
br label %.lr.ph27
7373

7474
.lr.ph27: ; preds = %pred.store.if41, %pred.store.continue, %._crit_edge
75-
%3 = phi i32 [ %2, %._crit_edge ], [ 0, %pred.store.if41 ], [ 0, %pred.store.continue ]
76-
%4 = icmp ugt i32 %3, 0
77-
%broadcast.splatinsert37 = insertelement <4 x i1> zeroinitializer, i1 %4, i64 0
78-
%.zext = zext i32 %1 to i64
75+
%iv = phi i32 [ %div, %._crit_edge ], [ 0, %pred.store.if41 ], [ 0, %pred.store.continue ]
76+
%cmp = icmp ugt i32 %iv, 0
77+
%broadcast.splatinsert37 = insertelement <4 x i1> zeroinitializer, i1 %cmp, i64 0
78+
%.zext = zext i32 %id.x to i64
7979
%broadcast.splatinsert39 = insertelement <4 x i64> zeroinitializer, i64 %.zext, i64 0
80-
%5 = icmp uge <4 x i64> %broadcast.splatinsert39, splat (i64 1)
81-
%6 = or <4 x i1> %5, %broadcast.splatinsert37
82-
%7 = extractelement <4 x i1> %6, i64 0
83-
br i1 %7, label %pred.store.if, label %pred.store.continue
80+
%cmp.1 = icmp uge <4 x i64> %broadcast.splatinsert39, splat (i64 1)
81+
%or = or <4 x i1> %cmp.1, %broadcast.splatinsert37
82+
%extract = extractelement <4 x i1> %or, i64 0
83+
br i1 %extract, label %pred.store.if, label %pred.store.continue
8484

8585
pred.store.if: ; preds = %.lr.ph27
8686
store i8 0, ptr addrspace(1) null, align 64
8787
br label %pred.store.continue
8888

8989
pred.store.continue: ; preds = %pred.store.if, %.lr.ph27
90-
%8 = extractelement <4 x i1> %6, i64 1
91-
br i1 %8, label %pred.store.if41, label %.lr.ph27
90+
%extract.1 = extractelement <4 x i1> %or, i64 1
91+
br i1 %extract.1, label %pred.store.if41, label %.lr.ph27
9292

9393
pred.store.if41: ; preds = %pred.store.continue
9494
store i8 0, ptr addrspace(1) null, align 64

0 commit comments

Comments
 (0)