Skip to content

Commit 2024d67

Browse files
committed
[NFC][AMDGPU] modify lit test to use update_llc_test_checks
1 parent 87d3795 commit 2024d67

File tree

1 file changed

+285
-42
lines changed

1 file changed

+285
-42
lines changed

llvm/test/CodeGen/AMDGPU/lds-dma-waits.ll

Lines changed: 285 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=GCN,GFX9
2-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck %s --check-prefixes=GCN,GFX10
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=GFX9
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck %s --check-prefixes=GFX10
34

45
@lds.0 = internal addrspace(3) global [64 x float] poison, align 16
56
@lds.1 = internal addrspace(3) global [64 x float] poison, align 16
@@ -15,13 +16,60 @@
1516
declare void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
1617
declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux)
1718

18-
; GCN-LABEL: {{^}}buffer_load_lds_dword_2_arrays:
19-
; GCN-COUNT-4: buffer_load_dword
20-
; GCN: s_waitcnt vmcnt(2)
21-
; GCN: ds_read_b32
22-
; GCN: s_waitcnt vmcnt(0)
23-
; GCN: ds_read_b32
2419
define amdgpu_kernel void @buffer_load_lds_dword_2_arrays(<4 x i32> %rsrc, i32 %i1, i32 %i2, ptr addrspace(1) %out) {
20+
; GFX9-LABEL: buffer_load_lds_dword_2_arrays:
21+
; GFX9: ; %bb.0: ; %main_body
22+
; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
23+
; GFX9-NEXT: v_mov_b32_e32 v0, 4
24+
; GFX9-NEXT: s_mov_b32 m0, 0
25+
; GFX9-NEXT: v_mov_b32_e32 v1, 8
26+
; GFX9-NEXT: v_mov_b32_e32 v2, 0
27+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
28+
; GFX9-NEXT: buffer_load_dword off, s[8:11], 0 lds
29+
; GFX9-NEXT: buffer_load_dword v0, s[8:11], 0 offen lds
30+
; GFX9-NEXT: s_movk_i32 m0, 0x100
31+
; GFX9-NEXT: v_mov_b32_e32 v0, 12
32+
; GFX9-NEXT: buffer_load_dword v1, s[8:11], 0 offen lds
33+
; GFX9-NEXT: buffer_load_dword v0, s[8:11], 0 offen lds
34+
; GFX9-NEXT: s_lshl_b32 s0, s12, 2
35+
; GFX9-NEXT: s_lshl_b32 s1, s13, 2
36+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
37+
; GFX9-NEXT: v_mov_b32_e32 v1, s1
38+
; GFX9-NEXT: s_waitcnt vmcnt(2)
39+
; GFX9-NEXT: ds_read_b32 v0, v0
40+
; GFX9-NEXT: ; wave barrier
41+
; GFX9-NEXT: s_waitcnt vmcnt(0)
42+
; GFX9-NEXT: ds_read_b32 v1, v1 offset:256
43+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
44+
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[14:15]
45+
; GFX9-NEXT: s_endpgm
46+
;
47+
; GFX10-LABEL: buffer_load_lds_dword_2_arrays:
48+
; GFX10: ; %bb.0: ; %main_body
49+
; GFX10-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
50+
; GFX10-NEXT: v_mov_b32_e32 v0, 4
51+
; GFX10-NEXT: v_mov_b32_e32 v1, 8
52+
; GFX10-NEXT: v_mov_b32_e32 v2, 12
53+
; GFX10-NEXT: s_mov_b32 m0, 0
54+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
55+
; GFX10-NEXT: buffer_load_dword off, s[0:3], 0 lds
56+
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen lds
57+
; GFX10-NEXT: s_movk_i32 m0, 0x100
58+
; GFX10-NEXT: buffer_load_dword v1, s[0:3], 0 offen lds
59+
; GFX10-NEXT: buffer_load_dword v2, s[0:3], 0 offen lds
60+
; GFX10-NEXT: s_lshl_b32 s0, s4, 2
61+
; GFX10-NEXT: s_lshl_b32 s1, s5, 2
62+
; GFX10-NEXT: v_mov_b32_e32 v0, s0
63+
; GFX10-NEXT: v_mov_b32_e32 v1, s1
64+
; GFX10-NEXT: v_mov_b32_e32 v2, 0
65+
; GFX10-NEXT: s_waitcnt vmcnt(2)
66+
; GFX10-NEXT: ds_read_b32 v0, v0
67+
; GFX10-NEXT: ; wave barrier
68+
; GFX10-NEXT: s_waitcnt vmcnt(0)
69+
; GFX10-NEXT: ds_read_b32 v1, v1 offset:256
70+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
71+
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
72+
; GFX10-NEXT: s_endpgm
2573
main_body:
2674
call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) @lds.0, i32 4, i32 0, i32 0, i32 0, i32 0)
2775
call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) @lds.0, i32 4, i32 4, i32 0, i32 0, i32 0)
@@ -41,15 +89,56 @@ main_body:
4189
; On gfx9 if there is a pending FLAT operation, and this is a VMem or LGKM
4290
; waitcnt and the target can report early completion, then we need to force a waitcnt 0.
4391

44-
; GCN-LABEL: {{^}}global_load_lds_dword_2_arrays:
45-
; GCN-COUNT-4: global_load_dword
46-
; GFX9: s_waitcnt vmcnt(0)
47-
; GFX9-COUNT-2: ds_read_b32
48-
; GFX10: s_waitcnt vmcnt(2)
49-
; GFX10: ds_read_b32
50-
; GFX10: s_waitcnt vmcnt(0)
51-
; GFX10: ds_read_b32
5292
define amdgpu_kernel void @global_load_lds_dword_2_arrays(ptr addrspace(1) nocapture %gptr, i32 %i1, i32 %i2, ptr addrspace(1) %out) {
93+
; GFX9-LABEL: global_load_lds_dword_2_arrays:
94+
; GFX9: ; %bb.0: ; %main_body
95+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
96+
; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
97+
; GFX9-NEXT: v_mov_b32_e32 v2, 0
98+
; GFX9-NEXT: s_mov_b32 m0, 0
99+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
100+
; GFX9-NEXT: global_load_dword v2, s[0:1] lds
101+
; GFX9-NEXT: global_load_dword v2, s[0:1] offset:4 lds
102+
; GFX9-NEXT: s_movk_i32 m0, 0x100
103+
; GFX9-NEXT: s_nop 0
104+
; GFX9-NEXT: global_load_dword v2, s[0:1] offset:8 lds
105+
; GFX9-NEXT: global_load_dword v2, s[0:1] offset:12 lds
106+
; GFX9-NEXT: s_lshl_b32 s0, s2, 2
107+
; GFX9-NEXT: s_lshl_b32 s1, s3, 2
108+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
109+
; GFX9-NEXT: v_mov_b32_e32 v1, s1
110+
; GFX9-NEXT: s_waitcnt vmcnt(0)
111+
; GFX9-NEXT: ds_read_b32 v0, v0
112+
; GFX9-NEXT: ; wave barrier
113+
; GFX9-NEXT: ds_read_b32 v1, v1 offset:256
114+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
115+
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
116+
; GFX9-NEXT: s_endpgm
117+
;
118+
; GFX10-LABEL: global_load_lds_dword_2_arrays:
119+
; GFX10: ; %bb.0: ; %main_body
120+
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
121+
; GFX10-NEXT: v_mov_b32_e32 v2, 0
122+
; GFX10-NEXT: s_mov_b32 m0, 0
123+
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
124+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
125+
; GFX10-NEXT: global_load_dword v2, s[0:1] lds
126+
; GFX10-NEXT: global_load_dword v2, s[0:1] offset:4 lds
127+
; GFX10-NEXT: s_movk_i32 m0, 0x100
128+
; GFX10-NEXT: global_load_dword v2, s[0:1] offset:8 lds
129+
; GFX10-NEXT: global_load_dword v2, s[0:1] offset:12 lds
130+
; GFX10-NEXT: s_lshl_b32 s0, s2, 2
131+
; GFX10-NEXT: s_lshl_b32 s1, s3, 2
132+
; GFX10-NEXT: v_mov_b32_e32 v0, s0
133+
; GFX10-NEXT: v_mov_b32_e32 v1, s1
134+
; GFX10-NEXT: s_waitcnt vmcnt(2)
135+
; GFX10-NEXT: ds_read_b32 v0, v0
136+
; GFX10-NEXT: ; wave barrier
137+
; GFX10-NEXT: s_waitcnt vmcnt(0)
138+
; GFX10-NEXT: ds_read_b32 v1, v1 offset:256
139+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
140+
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
141+
; GFX10-NEXT: s_endpgm
53142
main_body:
54143
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) @lds.0, i32 4, i32 0, i32 0)
55144
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) @lds.0, i32 4, i32 4, i32 0)
@@ -68,25 +157,144 @@ main_body:
68157

69158
; There are 8 pseudo registers defined to track LDS DMA dependencies.
70159

71-
; GCN-LABEL: {{^}}buffer_load_lds_dword_10_arrays:
72-
; GCN-COUNT-10: buffer_load_dword
73-
; GCN: s_waitcnt vmcnt(8)
74-
; GCN: ds_read_b32
75-
; GCN: s_waitcnt vmcnt(7)
76-
; GCN: ds_read_b32
77-
; GCN: s_waitcnt vmcnt(6)
78-
; GCN: ds_read_b32
79-
; GCN: s_waitcnt vmcnt(5)
80-
; GCN: ds_read_b32
81-
; GCN: s_waitcnt vmcnt(4)
82-
; GCN: ds_read_b32
83-
; GCN: s_waitcnt vmcnt(3)
84-
; GCN: ds_read_b32
85-
; GCN: s_waitcnt vmcnt(2)
86-
; GCN-NOT: s_waitcnt vmcnt
87-
; GCN: ds_read_b32
88-
; GCN: ds_read_b32
89160
define amdgpu_kernel void @buffer_load_lds_dword_10_arrays(<4 x i32> %rsrc, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, ptr addrspace(1) %out) {
161+
; GFX9-LABEL: buffer_load_lds_dword_10_arrays:
162+
; GFX9: ; %bb.0: ; %main_body
163+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
164+
; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
165+
; GFX9-NEXT: s_mov_b32 m0, 0
166+
; GFX9-NEXT: v_mov_b32_e32 v10, 0
167+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
168+
; GFX9-NEXT: buffer_load_dword off, s[0:3], 0 lds
169+
; GFX9-NEXT: s_movk_i32 m0, 0x100
170+
; GFX9-NEXT: s_nop 0
171+
; GFX9-NEXT: buffer_load_dword off, s[0:3], 0 lds
172+
; GFX9-NEXT: s_movk_i32 m0, 0x200
173+
; GFX9-NEXT: s_nop 0
174+
; GFX9-NEXT: buffer_load_dword off, s[0:3], 0 lds
175+
; GFX9-NEXT: s_movk_i32 m0, 0x300
176+
; GFX9-NEXT: s_nop 0
177+
; GFX9-NEXT: buffer_load_dword off, s[0:3], 0 lds
178+
; GFX9-NEXT: s_movk_i32 m0, 0x400
179+
; GFX9-NEXT: s_nop 0
180+
; GFX9-NEXT: buffer_load_dword off, s[0:3], 0 lds
181+
; GFX9-NEXT: s_movk_i32 m0, 0x500
182+
; GFX9-NEXT: s_nop 0
183+
; GFX9-NEXT: buffer_load_dword off, s[0:3], 0 lds
184+
; GFX9-NEXT: s_movk_i32 m0, 0x600
185+
; GFX9-NEXT: s_nop 0
186+
; GFX9-NEXT: buffer_load_dword off, s[0:3], 0 lds
187+
; GFX9-NEXT: s_movk_i32 m0, 0x700
188+
; GFX9-NEXT: s_nop 0
189+
; GFX9-NEXT: buffer_load_dword off, s[0:3], 0 lds
190+
; GFX9-NEXT: s_movk_i32 m0, 0x800
191+
; GFX9-NEXT: s_nop 0
192+
; GFX9-NEXT: buffer_load_dword off, s[0:3], 0 lds
193+
; GFX9-NEXT: s_movk_i32 m0, 0x900
194+
; GFX9-NEXT: s_nop 0
195+
; GFX9-NEXT: buffer_load_dword off, s[0:3], 0 lds
196+
; GFX9-NEXT: s_lshl_b32 s2, s6, 2
197+
; GFX9-NEXT: s_lshl_b32 s3, s7, 2
198+
; GFX9-NEXT: v_mov_b32_e32 v0, s2
199+
; GFX9-NEXT: v_mov_b32_e32 v9, s3
200+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x5c
201+
; GFX9-NEXT: s_waitcnt vmcnt(9)
202+
; GFX9-NEXT: ds_read_b32 v0, v0
203+
; GFX9-NEXT: ; wave barrier
204+
; GFX9-NEXT: s_waitcnt vmcnt(8)
205+
; GFX9-NEXT: ds_read_b32 v1, v9 offset:256
206+
; GFX9-NEXT: ; wave barrier
207+
; GFX9-NEXT: s_waitcnt vmcnt(7)
208+
; GFX9-NEXT: ds_read_b32 v2, v9 offset:512
209+
; GFX9-NEXT: ; wave barrier
210+
; GFX9-NEXT: s_waitcnt vmcnt(6)
211+
; GFX9-NEXT: ds_read_b32 v3, v9 offset:768
212+
; GFX9-NEXT: ; wave barrier
213+
; GFX9-NEXT: s_waitcnt vmcnt(5)
214+
; GFX9-NEXT: ds_read_b32 v4, v9 offset:1024
215+
; GFX9-NEXT: ; wave barrier
216+
; GFX9-NEXT: s_waitcnt vmcnt(4)
217+
; GFX9-NEXT: ds_read_b32 v5, v9 offset:1280
218+
; GFX9-NEXT: ; wave barrier
219+
; GFX9-NEXT: s_waitcnt vmcnt(3)
220+
; GFX9-NEXT: ds_read_b32 v6, v9 offset:1536
221+
; GFX9-NEXT: ; wave barrier
222+
; GFX9-NEXT: s_waitcnt vmcnt(2)
223+
; GFX9-NEXT: ds_read_b32 v7, v9 offset:1792
224+
; GFX9-NEXT: ; wave barrier
225+
; GFX9-NEXT: ds_read_b32 v8, v9 offset:2048
226+
; GFX9-NEXT: ; wave barrier
227+
; GFX9-NEXT: ds_read_b32 v9, v9 offset:2304
228+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
229+
; GFX9-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
230+
; GFX9-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
231+
; GFX9-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:32
232+
; GFX9-NEXT: s_endpgm
233+
;
234+
; GFX10-LABEL: buffer_load_lds_dword_10_arrays:
235+
; GFX10: ; %bb.0: ; %main_body
236+
; GFX10-NEXT: s_clause 0x1
237+
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
238+
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
239+
; GFX10-NEXT: s_mov_b32 m0, 0
240+
; GFX10-NEXT: v_mov_b32_e32 v10, 0
241+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
242+
; GFX10-NEXT: buffer_load_dword off, s[0:3], 0 lds
243+
; GFX10-NEXT: s_movk_i32 m0, 0x100
244+
; GFX10-NEXT: buffer_load_dword off, s[0:3], 0 lds
245+
; GFX10-NEXT: s_movk_i32 m0, 0x200
246+
; GFX10-NEXT: buffer_load_dword off, s[0:3], 0 lds
247+
; GFX10-NEXT: s_movk_i32 m0, 0x300
248+
; GFX10-NEXT: buffer_load_dword off, s[0:3], 0 lds
249+
; GFX10-NEXT: s_movk_i32 m0, 0x400
250+
; GFX10-NEXT: buffer_load_dword off, s[0:3], 0 lds
251+
; GFX10-NEXT: s_movk_i32 m0, 0x500
252+
; GFX10-NEXT: buffer_load_dword off, s[0:3], 0 lds
253+
; GFX10-NEXT: s_movk_i32 m0, 0x600
254+
; GFX10-NEXT: buffer_load_dword off, s[0:3], 0 lds
255+
; GFX10-NEXT: s_movk_i32 m0, 0x700
256+
; GFX10-NEXT: buffer_load_dword off, s[0:3], 0 lds
257+
; GFX10-NEXT: s_movk_i32 m0, 0x800
258+
; GFX10-NEXT: buffer_load_dword off, s[0:3], 0 lds
259+
; GFX10-NEXT: s_movk_i32 m0, 0x900
260+
; GFX10-NEXT: buffer_load_dword off, s[0:3], 0 lds
261+
; GFX10-NEXT: s_lshl_b32 s0, s6, 2
262+
; GFX10-NEXT: s_lshl_b32 s1, s7, 2
263+
; GFX10-NEXT: v_mov_b32_e32 v0, s0
264+
; GFX10-NEXT: v_mov_b32_e32 v9, s1
265+
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x5c
266+
; GFX10-NEXT: s_waitcnt vmcnt(9)
267+
; GFX10-NEXT: ds_read_b32 v0, v0
268+
; GFX10-NEXT: ; wave barrier
269+
; GFX10-NEXT: s_waitcnt vmcnt(8)
270+
; GFX10-NEXT: ds_read_b32 v1, v9 offset:256
271+
; GFX10-NEXT: ; wave barrier
272+
; GFX10-NEXT: s_waitcnt vmcnt(7)
273+
; GFX10-NEXT: ds_read_b32 v2, v9 offset:512
274+
; GFX10-NEXT: ; wave barrier
275+
; GFX10-NEXT: s_waitcnt vmcnt(6)
276+
; GFX10-NEXT: ds_read_b32 v3, v9 offset:768
277+
; GFX10-NEXT: ; wave barrier
278+
; GFX10-NEXT: s_waitcnt vmcnt(5)
279+
; GFX10-NEXT: ds_read_b32 v4, v9 offset:1024
280+
; GFX10-NEXT: ; wave barrier
281+
; GFX10-NEXT: s_waitcnt vmcnt(4)
282+
; GFX10-NEXT: ds_read_b32 v5, v9 offset:1280
283+
; GFX10-NEXT: ; wave barrier
284+
; GFX10-NEXT: s_waitcnt vmcnt(3)
285+
; GFX10-NEXT: ds_read_b32 v6, v9 offset:1536
286+
; GFX10-NEXT: ; wave barrier
287+
; GFX10-NEXT: s_waitcnt vmcnt(2)
288+
; GFX10-NEXT: ds_read_b32 v7, v9 offset:1792
289+
; GFX10-NEXT: ; wave barrier
290+
; GFX10-NEXT: ds_read_b32 v8, v9 offset:2048
291+
; GFX10-NEXT: ; wave barrier
292+
; GFX10-NEXT: ds_read_b32 v9, v9 offset:2304
293+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
294+
; GFX10-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
295+
; GFX10-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
296+
; GFX10-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:32
297+
; GFX10-NEXT: s_endpgm
90298
main_body:
91299
call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) @lds.0, i32 4, i32 0, i32 0, i32 0, i32 0)
92300
call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) @lds.1, i32 4, i32 0, i32 0, i32 0, i32 0)
@@ -151,14 +359,49 @@ main_body:
151359

152360
define amdgpu_kernel void @global_load_lds_no_alias_ds_read(ptr addrspace(1) nocapture %gptr, i32 %i1, i32 %i2, ptr addrspace(1) %out) {
153361
; GFX9-LABEL: global_load_lds_no_alias_ds_read:
154-
; GFX9: global_load_dword
155-
; GFX9: global_load_dword
156-
; GFX9: s_waitcnt vmcnt(1)
157-
; GFX9-NOT: s_waitcnt vmcnt(0)
158-
; GFX9: ds_read_b32
159-
; GFX9: s_waitcnt vmcnt(0)
160-
; GFX9: ds_read_b32
161-
; GFX9: s_endpgm
362+
; GFX9: ; %bb.0: ; %body
363+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
364+
; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
365+
; GFX9-NEXT: v_mov_b32_e32 v2, 0
366+
; GFX9-NEXT: s_mov_b32 m0, 0
367+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
368+
; GFX9-NEXT: global_load_dword v2, s[0:1] lds
369+
; GFX9-NEXT: s_movk_i32 m0, 0x100
370+
; GFX9-NEXT: s_nop 0
371+
; GFX9-NEXT: global_load_dword v2, s[0:1] offset:4 lds
372+
; GFX9-NEXT: s_lshl_b32 s0, s2, 2
373+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
374+
; GFX9-NEXT: s_lshl_b32 s0, s3, 2
375+
; GFX9-NEXT: v_mov_b32_e32 v1, s0
376+
; GFX9-NEXT: s_waitcnt vmcnt(1)
377+
; GFX9-NEXT: ds_read_b32 v0, v0 offset:512
378+
; GFX9-NEXT: s_waitcnt vmcnt(0)
379+
; GFX9-NEXT: ds_read_b32 v1, v1 offset:768
380+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
381+
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
382+
; GFX9-NEXT: s_endpgm
383+
;
384+
; GFX10-LABEL: global_load_lds_no_alias_ds_read:
385+
; GFX10: ; %bb.0: ; %body
386+
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
387+
; GFX10-NEXT: v_mov_b32_e32 v2, 0
388+
; GFX10-NEXT: s_mov_b32 m0, 0
389+
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
390+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
391+
; GFX10-NEXT: global_load_dword v2, s[0:1] lds
392+
; GFX10-NEXT: s_movk_i32 m0, 0x100
393+
; GFX10-NEXT: global_load_dword v2, s[0:1] offset:4 lds
394+
; GFX10-NEXT: s_lshl_b32 s0, s2, 2
395+
; GFX10-NEXT: s_waitcnt vmcnt(1) lgkmcnt(15)
396+
; GFX10-NEXT: v_mov_b32_e32 v0, s0
397+
; GFX10-NEXT: s_lshl_b32 s0, s3, 2
398+
; GFX10-NEXT: v_mov_b32_e32 v1, s0
399+
; GFX10-NEXT: ds_read_b32 v0, v0 offset:512
400+
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(15)
401+
; GFX10-NEXT: ds_read_b32 v1, v1 offset:768
402+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
403+
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
404+
; GFX10-NEXT: s_endpgm
162405
body:
163406
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) @lds.0, i32 4, i32 0, i32 0)
164407
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) @lds.1, i32 4, i32 4, i32 0)

0 commit comments

Comments
 (0)