Skip to content

Commit ac8f3cd

Browse files
committed
[AMDGPU] Precommit test for memory intrinics CGP handling
Change-Id: Id229f849b1d8552bbe59d6e18114042ef1614fad
1 parent c7054d9 commit ac8f3cd

File tree

1 file changed

+166
-0
lines changed

1 file changed

+166
-0
lines changed
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s
3+
4+
define amdgpu_kernel void @memoryIntrinstic(ptr addrspace(3) %inptr, i1 %cond, ptr addrspace(3) %outptr) {
5+
; CHECK-LABEL: memoryIntrinstic:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
8+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
9+
; CHECK-NEXT: s_and_b32 s1, s1, 1
10+
; CHECK-NEXT: s_add_i32 s3, s0, 0x2000
11+
; CHECK-NEXT: s_cmp_eq_u32 s1, 0
12+
; CHECK-NEXT: s_cbranch_scc0 .LBB0_2
13+
; CHECK-NEXT: ; %bb.1: ; %else
14+
; CHECK-NEXT: v_mov_b32_e32 v0, s3
15+
; CHECK-NEXT: ds_read_b64_tr_b16 v[2:3], v0
16+
; CHECK-NEXT: s_mov_b32 s0, 0x7060302
17+
; CHECK-NEXT: s_mov_b32 s1, 0x5040100
18+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
19+
; CHECK-NEXT: v_perm_b32 v0, v3, v2, s0
20+
; CHECK-NEXT: v_perm_b32 v1, v3, v2, s1
21+
; CHECK-NEXT: s_cbranch_execz .LBB0_3
22+
; CHECK-NEXT: s_branch .LBB0_4
23+
; CHECK-NEXT: .LBB0_2:
24+
; CHECK-NEXT: ; implicit-def: $vgpr1
25+
; CHECK-NEXT: .LBB0_3: ; %then
26+
; CHECK-NEXT: v_mov_b32_e32 v0, s3
27+
; CHECK-NEXT: ds_read_b64_tr_b16 v[2:3], v0
28+
; CHECK-NEXT: s_mov_b32 s0, 0x5040100
29+
; CHECK-NEXT: s_mov_b32 s1, 0x7060302
30+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
31+
; CHECK-NEXT: v_perm_b32 v0, v3, v2, s0
32+
; CHECK-NEXT: v_perm_b32 v1, v3, v2, s1
33+
; CHECK-NEXT: .LBB0_4: ; %end
34+
; CHECK-NEXT: v_mov_b32_e32 v2, s2
35+
; CHECK-NEXT: ds_write_b64 v2, v[0:1]
36+
; CHECK-NEXT: s_endpgm
37+
%gep0 = getelementptr ptr addrspace(3), ptr addrspace(3) %inptr, i32 2048
38+
br i1 %cond, label %then, label %else
39+
40+
then:
41+
%load0 = tail call <4 x half> @llvm.amdgcn.ds.read.tr16.b64.v4f16(ptr addrspace(3) %gep0)
42+
%shuf0 = shufflevector <4 x half> %load0, <4 x half> %load0, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
43+
br label %end
44+
45+
else:
46+
%load1 = tail call <4 x half> @llvm.amdgcn.ds.read.tr16.b64.v4f16(ptr addrspace(3) %gep0)
47+
%shuf1 = shufflevector <4 x half> %load1, <4 x half> %load1, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
48+
br label %end
49+
50+
end:
51+
%res = phi <4 x half> [ %shuf0, %then ], [ %shuf1, %else ]
52+
store <4 x half> %res, ptr addrspace(3) %outptr
53+
ret void
54+
}
55+
56+
define amdgpu_kernel void @badIntrinsicUse(ptr addrspace(3) %inptr, i1 %cond, ptr addrspace(3) %outptr, <4 x i32> %rsrc) {
57+
; CHECK-LABEL: badIntrinsicUse:
58+
; CHECK: ; %bb.0:
59+
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
60+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
61+
; CHECK-NEXT: s_and_b32 s1, s1, 1
62+
; CHECK-NEXT: s_add_i32 s3, s0, 0x2000
63+
; CHECK-NEXT: s_cmp_eq_u32 s1, 0
64+
; CHECK-NEXT: s_cbranch_scc0 .LBB1_2
65+
; CHECK-NEXT: ; %bb.1: ; %else
66+
; CHECK-NEXT: v_mov_b32_e32 v0, s3
67+
; CHECK-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x10
68+
; CHECK-NEXT: ds_read_b64_tr_b16 v[2:3], v0
69+
; CHECK-NEXT: s_mov_b32 s0, 0x7060302
70+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
71+
; CHECK-NEXT: buffer_store_dword v0, off, s[4:7], 0
72+
; CHECK-NEXT: v_perm_b32 v0, v3, v2, s0
73+
; CHECK-NEXT: s_mov_b32 s0, 0x5040100
74+
; CHECK-NEXT: v_perm_b32 v1, v3, v2, s0
75+
; CHECK-NEXT: s_cbranch_execz .LBB1_3
76+
; CHECK-NEXT: s_branch .LBB1_4
77+
; CHECK-NEXT: .LBB1_2:
78+
; CHECK-NEXT: ; implicit-def: $vgpr1
79+
; CHECK-NEXT: .LBB1_3: ; %then
80+
; CHECK-NEXT: v_mov_b32_e32 v0, s3
81+
; CHECK-NEXT: ds_read_b64_tr_b16 v[2:3], v0
82+
; CHECK-NEXT: s_mov_b32 s0, 0x5040100
83+
; CHECK-NEXT: s_mov_b32 s1, 0x7060302
84+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
85+
; CHECK-NEXT: v_perm_b32 v0, v3, v2, s0
86+
; CHECK-NEXT: v_perm_b32 v1, v3, v2, s1
87+
; CHECK-NEXT: .LBB1_4: ; %end
88+
; CHECK-NEXT: v_mov_b32_e32 v2, s2
89+
; CHECK-NEXT: ds_write_b64 v2, v[0:1]
90+
; CHECK-NEXT: s_endpgm
91+
%gep0 = getelementptr ptr addrspace(3), ptr addrspace(3) %inptr, i32 2048
92+
br i1 %cond, label %then, label %else
93+
94+
then:
95+
%load0 = tail call <4 x half> @llvm.amdgcn.ds.read.tr16.b64.v4f16(ptr addrspace(3) %gep0)
96+
%shuf0 = shufflevector <4 x half> %load0, <4 x half> %load0, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
97+
br label %end
98+
99+
else:
100+
%load1 = tail call <4 x half> @llvm.amdgcn.ds.read.tr16.b64.v4f16(ptr addrspace(3) %gep0)
101+
call void @llvm.amdgcn.raw.buffer.store(ptr addrspace(3) %gep0, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
102+
%shuf1 = shufflevector <4 x half> %load1, <4 x half> %load1, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
103+
br label %end
104+
105+
end:
106+
%res = phi <4 x half> [ %shuf0, %then ], [ %shuf1, %else ]
107+
store <4 x half> %res, ptr addrspace(3) %outptr
108+
ret void
109+
}
110+
111+
define amdgpu_kernel void @badIntrinsicUse2(ptr addrspace(3) %inptr, i1 %cond, ptr addrspace(3) %outptr, ptr addrspace(3) %outptr1) {
112+
; CHECK-LABEL: badIntrinsicUse2:
113+
; CHECK: ; %bb.0:
114+
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
115+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
116+
; CHECK-NEXT: s_and_b32 s1, s1, 1
117+
; CHECK-NEXT: s_add_i32 s4, s0, 0x2000
118+
; CHECK-NEXT: s_cmp_eq_u32 s1, 0
119+
; CHECK-NEXT: s_cbranch_scc0 .LBB2_2
120+
; CHECK-NEXT: ; %bb.1: ; %else
121+
; CHECK-NEXT: v_mov_b32_e32 v0, s4
122+
; CHECK-NEXT: ds_read_b64_tr_b16 v[2:3], v0
123+
; CHECK-NEXT: v_mov_b32_e32 v0, s3
124+
; CHECK-NEXT: v_mov_b32_e32 v1, s4
125+
; CHECK-NEXT: s_mov_b32 s0, 0x7060302
126+
; CHECK-NEXT: ds_write_b32 v0, v1
127+
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
128+
; CHECK-NEXT: v_perm_b32 v0, v3, v2, s0
129+
; CHECK-NEXT: s_mov_b32 s0, 0x5040100
130+
; CHECK-NEXT: v_perm_b32 v1, v3, v2, s0
131+
; CHECK-NEXT: s_cbranch_execz .LBB2_3
132+
; CHECK-NEXT: s_branch .LBB2_4
133+
; CHECK-NEXT: .LBB2_2:
134+
; CHECK-NEXT: ; implicit-def: $vgpr1
135+
; CHECK-NEXT: .LBB2_3: ; %then
136+
; CHECK-NEXT: v_mov_b32_e32 v0, s4
137+
; CHECK-NEXT: ds_read_b64_tr_b16 v[2:3], v0
138+
; CHECK-NEXT: s_mov_b32 s0, 0x5040100
139+
; CHECK-NEXT: s_mov_b32 s1, 0x7060302
140+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
141+
; CHECK-NEXT: v_perm_b32 v0, v3, v2, s0
142+
; CHECK-NEXT: v_perm_b32 v1, v3, v2, s1
143+
; CHECK-NEXT: .LBB2_4: ; %end
144+
; CHECK-NEXT: v_mov_b32_e32 v2, s2
145+
; CHECK-NEXT: ds_write_b64 v2, v[0:1]
146+
; CHECK-NEXT: s_endpgm
147+
%gep0 = getelementptr ptr addrspace(3), ptr addrspace(3) %inptr, i32 2048
148+
br i1 %cond, label %then, label %else
149+
150+
then:
151+
%load0 = tail call <4 x half> @llvm.amdgcn.ds.read.tr16.b64.v4f16(ptr addrspace(3) %gep0)
152+
%shuf0 = shufflevector <4 x half> %load0, <4 x half> %load0, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
153+
br label %end
154+
155+
else:
156+
%load1 = tail call <4 x half> @llvm.amdgcn.ds.read.tr16.b64.v4f16(ptr addrspace(3) %gep0)
157+
%gep1 = call ptr addrspace(3) @llvm.amdgcn.readfirstlane(ptr addrspace(3) %gep0)
158+
store ptr addrspace(3) %gep1, ptr addrspace(3) %outptr1
159+
%shuf1 = shufflevector <4 x half> %load1, <4 x half> %load1, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
160+
br label %end
161+
162+
end:
163+
%res = phi <4 x half> [ %shuf0, %then ], [ %shuf1, %else ]
164+
store <4 x half> %res, ptr addrspace(3) %outptr
165+
ret void
166+
}

0 commit comments

Comments
 (0)