Skip to content

Commit c8f37bb

Browse files
committed
[NFC] Precommit test.
1 parent 4e175b8 commit c8f37bb

File tree

1 file changed

+82
-0
lines changed

1 file changed

+82
-0
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefix=GCN %s
3+
4+
%"struct.cub::BlockRadixSort<unsigned char, 32, 1, cub::NullType, 1, true, cub::BLOCK_SCAN_RAKING>::TempStorage" = type { %"struct.cub::Uninitialized" }
5+
%"struct.cub::Uninitialized" = type { [26 x %struct.ulonglong2.0] }
6+
%struct.ulonglong2.0 = type { i64, i64 }
7+
8+
@tmp_storage = external dso_local local_unnamed_addr addrspace(3) global %"struct.cub::BlockRadixSort<unsigned char, 32, 1, cub::NullType, 1, true, cub::BLOCK_SCAN_RAKING>::TempStorage", align 16
9+
10+
declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
11+
12+
declare void @llvm.amdgcn.s.barrier()
13+
14+
define amdgpu_kernel void @Kernel_func(i8 %a, i32 %b, i32 %c, i32 %end_bit) {
15+
; GCN-LABEL: Kernel_func:
16+
; GCN: ; %bb.0: ; %entry
17+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
18+
; GCN-NEXT: s_load_dword s6, s[4:5], 0x30
19+
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
20+
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
21+
; GCN-NEXT: s_mov_b64 s[2:3], 0
22+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
23+
; GCN-NEXT: s_and_b32 s0, s0, 0xff
24+
; GCN-NEXT: v_mov_b32_e32 v3, s1
25+
; GCN-NEXT: v_lshl_or_b32 v2, v0, 2, 2
26+
; GCN-NEXT: .LBB0_1: ; %while.cond
27+
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
28+
; GCN-NEXT: v_sub_u32_e32 v4, s6, v3
29+
; GCN-NEXT: v_lshrrev_b32_e64 v3, v3, s0
30+
; GCN-NEXT: v_min_i32_e32 v4, 1, v4
31+
; GCN-NEXT: v_bfe_u32 v3, v3, 0, v4
32+
; GCN-NEXT: v_lshlrev_b32_e32 v3, 1, v3
33+
; GCN-NEXT: v_sub_u32_e32 v4, v2, v3
34+
; GCN-NEXT: ds_read_u16 v3, v4
35+
; GCN-NEXT: ds_write_b32 v1, v0
36+
; GCN-NEXT: s_waitcnt lgkmcnt(1)
37+
; GCN-NEXT: v_add_u16_e32 v3, 1, v3
38+
; GCN-NEXT: v_cmp_ge_i32_e32 vcc, s1, v3
39+
; GCN-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
40+
; GCN-NEXT: ds_write_b16 v4, v3
41+
; GCN-NEXT: s_barrier
42+
; GCN-NEXT: s_andn2_b64 exec, exec, s[2:3]
43+
; GCN-NEXT: s_cbranch_execnz .LBB0_1
44+
; GCN-NEXT: ; %bb.2: ; %end
45+
; GCN-NEXT: s_endpgm
46+
entry:
47+
%tid = tail call noundef i32 @llvm.amdgcn.workitem.id.x()
48+
%idx = getelementptr inbounds [2 x [32 x [2 x i16]]], ptr addrspace(3) @tmp_storage, i32 0, i32 0, i32 %tid
49+
br label %while.cond
50+
51+
while.cond: ; preds = %while.cond, %entry
52+
%begin_bit = phi i32 [ %b, %entry ], [ %conv, %while.cond ]
53+
%diff = sub nsw i32 %end_bit, %begin_bit
54+
%smin = tail call i32 @llvm.smin.i32(i32 %diff, i32 1)
55+
store i32 %tid, ptr addrspace(3) %idx, align 4, !tbaa !0
56+
%notmask = shl nsw i32 -1, %smin
57+
%not = xor i32 %notmask, -1
58+
%conv.iii = zext i8 %a to i32
59+
%shr.iii = lshr i32 %conv.iii, %begin_bit
60+
%and.iii = and i32 %shr.iii, %not
61+
%sub = sub nsw i32 1, %and.iii
62+
%arrayidx = getelementptr inbounds [32 x [2 x i16]], ptr addrspace(3) @tmp_storage, i32 0, i32 %tid, i32 %sub
63+
%value = load i16, ptr addrspace(3) %arrayidx, align 2, !tbaa !4
64+
%add = add i16 %value, 1
65+
store i16 %add, ptr addrspace(3) %arrayidx, align 2, !tbaa !4
66+
tail call void @llvm.amdgcn.s.barrier()
67+
%conv = zext i16 %add to i32
68+
%cmp = icmp sgt i32 %conv, %b
69+
br i1 %cmp, label %while.cond, label %end
70+
71+
end: ; preds = %while.cond
72+
ret void
73+
}
74+
75+
declare i32 @llvm.smin.i32(i32, i32)
76+
77+
!0 = !{!1, !1, i64 0}
78+
!1 = !{!"int", !2, i64 0}
79+
!2 = !{!"omnipotent char", !3, i64 0}
80+
!3 = !{!"Simple C++ TBAA"}
81+
!4 = !{!5, !5, i64 0}
82+
!5 = !{!"short", !2, i64 0}

0 commit comments

Comments
 (0)