Skip to content

Commit 76cb5fc

Browse files
authored
AMDGPU: Define agpr versions of ds permute instructions (#156695)
Correctly model these without AV_* operands. This is another step towards removing the special casing in TargetInstrInfo::getRegClass. Also add some tests for this.
1 parent 573627f commit 76cb5fc

File tree

3 files changed

+365
-18
lines changed

3 files changed

+365
-18
lines changed

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,19 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag,
520520
let has_gds = 0;
521521
}
522522

523+
multiclass DS_1A1D_PERMUTE_mc <string opName, SDPatternOperator node = null_frag,
524+
RegisterOperand data_op = VGPROp_32> {
525+
assert OperandIsVGPR<data_op>.ret,
526+
"DS with 2 data operands should be declared with VGPRs";
527+
def "" : DS_1A1D_PERMUTE<opName, node, data_op>;
528+
529+
let SubtargetPredicate = isGFX90APlus in {
530+
def _agpr : DS_1A1D_PERMUTE<opName, null_frag,
531+
getEquivalentAGPROperand<data_op>.ret>;
532+
}
533+
}
534+
535+
523536
class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0,
524537
bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
525538
(inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))> {
@@ -837,10 +850,10 @@ def DS_NOP : DS_VOID<"ds_nop">;
837850
let SubtargetPredicate = isGFX8Plus in {
838851

839852
let Uses = [EXEC] in {
840-
def DS_PERMUTE_B32 : DS_1A1D_PERMUTE <"ds_permute_b32",
841-
int_amdgcn_ds_permute>;
842-
def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
843-
int_amdgcn_ds_bpermute>;
853+
defm DS_PERMUTE_B32 : DS_1A1D_PERMUTE_mc<"ds_permute_b32",
854+
int_amdgcn_ds_permute>;
855+
defm DS_BPERMUTE_B32 : DS_1A1D_PERMUTE_mc<"ds_bpermute_b32",
856+
int_amdgcn_ds_bpermute>;
844857
}
845858

846859
} // let SubtargetPredicate = isGFX8Plus
Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
3+
4+
; Try to stress ds.bpermute and ds.permute instructions with AGPR/AV
5+
; inputs. It's not permissible to mix AGPRs and VGPR data operands.
6+
7+
define void @ds_bpermute_b32_a_a__use_a(ptr addrspace(3) %lds) #0 {
8+
; CHECK-LABEL: ds_bpermute_b32_a_a__use_a:
9+
; CHECK: ; %bb.0:
10+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11+
; CHECK-NEXT: ;;#ASMSTART
12+
; CHECK-NEXT: ; def a0
13+
; CHECK-NEXT: ;;#ASMEND
14+
; CHECK-NEXT: ;;#ASMSTART
15+
; CHECK-NEXT: ; def a1
16+
; CHECK-NEXT: ;;#ASMEND
17+
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
18+
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
19+
; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1
20+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
21+
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
22+
; CHECK-NEXT: ;;#ASMSTART
23+
; CHECK-NEXT: ; use a0
24+
; CHECK-NEXT: ;;#ASMEND
25+
; CHECK-NEXT: s_setpc_b64 s[30:31]
26+
%op0 = call i32 asm "; def $0", "=a"()
27+
%op1 = call i32 asm "; def $0", "=a"()
28+
%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1)
29+
call void asm sideeffect "; use $0", "a"(i32 %bpermute)
30+
ret void
31+
}
32+
33+
define void @ds_bpermute_b32_v_a__use_a(ptr addrspace(3) %lds) #0 {
34+
; CHECK-LABEL: ds_bpermute_b32_v_a__use_a:
35+
; CHECK: ; %bb.0:
36+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37+
; CHECK-NEXT: ;;#ASMSTART
38+
; CHECK-NEXT: ; def v0
39+
; CHECK-NEXT: ;;#ASMEND
40+
; CHECK-NEXT: ;;#ASMSTART
41+
; CHECK-NEXT: ; def a0
42+
; CHECK-NEXT: ;;#ASMEND
43+
; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
44+
; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1
45+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
46+
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
47+
; CHECK-NEXT: ;;#ASMSTART
48+
; CHECK-NEXT: ; use a0
49+
; CHECK-NEXT: ;;#ASMEND
50+
; CHECK-NEXT: s_setpc_b64 s[30:31]
51+
%op0 = call i32 asm "; def $0", "=v"()
52+
%op1 = call i32 asm "; def $0", "=a"()
53+
%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1)
54+
call void asm sideeffect "; use $0", "a"(i32 %bpermute)
55+
ret void
56+
}
57+
58+
define void @ds_bpermute_b32_a_v__use_a(ptr addrspace(3) %lds) #0 {
59+
; CHECK-LABEL: ds_bpermute_b32_a_v__use_a:
60+
; CHECK: ; %bb.0:
61+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62+
; CHECK-NEXT: ;;#ASMSTART
63+
; CHECK-NEXT: ; def a0
64+
; CHECK-NEXT: ;;#ASMEND
65+
; CHECK-NEXT: ;;#ASMSTART
66+
; CHECK-NEXT: ; def v0
67+
; CHECK-NEXT: ;;#ASMEND
68+
; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
69+
; CHECK-NEXT: ds_bpermute_b32 v0, v1, v0
70+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
71+
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
72+
; CHECK-NEXT: ;;#ASMSTART
73+
; CHECK-NEXT: ; use a0
74+
; CHECK-NEXT: ;;#ASMEND
75+
; CHECK-NEXT: s_setpc_b64 s[30:31]
76+
%op0 = call i32 asm "; def $0", "=a"()
77+
%op1 = call i32 asm "; def $0", "=v"()
78+
%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1)
79+
call void asm sideeffect "; use $0", "a"(i32 %bpermute)
80+
ret void
81+
}
82+
83+
define void @ds_bpermute_b32_a_a__use_v(ptr addrspace(3) %lds) #0 {
84+
; CHECK-LABEL: ds_bpermute_b32_a_a__use_v:
85+
; CHECK: ; %bb.0:
86+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87+
; CHECK-NEXT: ;;#ASMSTART
88+
; CHECK-NEXT: ; def a0
89+
; CHECK-NEXT: ;;#ASMEND
90+
; CHECK-NEXT: ;;#ASMSTART
91+
; CHECK-NEXT: ; def a1
92+
; CHECK-NEXT: ;;#ASMEND
93+
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
94+
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
95+
; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1
96+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
97+
; CHECK-NEXT: ;;#ASMSTART
98+
; CHECK-NEXT: ; use v0
99+
; CHECK-NEXT: ;;#ASMEND
100+
; CHECK-NEXT: s_setpc_b64 s[30:31]
101+
%op0 = call i32 asm "; def $0", "=a"()
102+
%op1 = call i32 asm "; def $0", "=a"()
103+
%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1)
104+
call void asm sideeffect "; use $0", "v"(i32 %bpermute)
105+
ret void
106+
}
107+
108+
define void @ds_bpermute_b32_v_v__use_a(ptr addrspace(3) %lds) #0 {
109+
; CHECK-LABEL: ds_bpermute_b32_v_v__use_a:
110+
; CHECK: ; %bb.0:
111+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112+
; CHECK-NEXT: ;;#ASMSTART
113+
; CHECK-NEXT: ; def v0
114+
; CHECK-NEXT: ;;#ASMEND
115+
; CHECK-NEXT: ;;#ASMSTART
116+
; CHECK-NEXT: ; def v1
117+
; CHECK-NEXT: ;;#ASMEND
118+
; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1
119+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
120+
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
121+
; CHECK-NEXT: ;;#ASMSTART
122+
; CHECK-NEXT: ; use a0
123+
; CHECK-NEXT: ;;#ASMEND
124+
; CHECK-NEXT: s_setpc_b64 s[30:31]
125+
%op0 = call i32 asm "; def $0", "=v"()
126+
%op1 = call i32 asm "; def $0", "=v"()
127+
%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1)
128+
call void asm sideeffect "; use $0", "a"(i32 %bpermute)
129+
ret void
130+
}
131+
132+
define void @ds_bpermute_b32_av_av__use_av(ptr addrspace(3) %lds) #0 {
133+
; CHECK-LABEL: ds_bpermute_b32_av_av__use_av:
134+
; CHECK: ; %bb.0:
135+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136+
; CHECK-NEXT: ;;#ASMSTART
137+
; CHECK-NEXT: ; def v0
138+
; CHECK-NEXT: ;;#ASMEND
139+
; CHECK-NEXT: ;;#ASMSTART
140+
; CHECK-NEXT: ; def v1
141+
; CHECK-NEXT: ;;#ASMEND
142+
; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1
143+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
144+
; CHECK-NEXT: ;;#ASMSTART
145+
; CHECK-NEXT: ; use v0
146+
; CHECK-NEXT: ;;#ASMEND
147+
; CHECK-NEXT: s_setpc_b64 s[30:31]
148+
%op0 = call i32 asm "; def $0", "=^VA"()
149+
%op1 = call i32 asm "; def $0", "=^VA"()
150+
%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1)
151+
call void asm sideeffect "; use $0", "^VA"(i32 %bpermute)
152+
ret void
153+
}
154+
155+
define i32 @ds_bpermute_b32_av_av_no_vgprs(ptr addrspace(3) %lds) #0 {
156+
; CHECK-LABEL: ds_bpermute_b32_av_av_no_vgprs:
157+
; CHECK: ; %bb.0:
158+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159+
; CHECK-NEXT: v_accvgpr_write_b32 a2, v40 ; Reload Reuse
160+
; CHECK-NEXT: v_accvgpr_write_b32 a3, v41 ; Reload Reuse
161+
; CHECK-NEXT: v_accvgpr_write_b32 a4, v42 ; Reload Reuse
162+
; CHECK-NEXT: v_accvgpr_write_b32 a5, v43 ; Reload Reuse
163+
; CHECK-NEXT: v_accvgpr_write_b32 a6, v44 ; Reload Reuse
164+
; CHECK-NEXT: v_accvgpr_write_b32 a7, v45 ; Reload Reuse
165+
; CHECK-NEXT: v_accvgpr_write_b32 a8, v46 ; Reload Reuse
166+
; CHECK-NEXT: v_accvgpr_write_b32 a9, v47 ; Reload Reuse
167+
; CHECK-NEXT: v_accvgpr_write_b32 a10, v56 ; Reload Reuse
168+
; CHECK-NEXT: v_accvgpr_write_b32 a11, v57 ; Reload Reuse
169+
; CHECK-NEXT: v_accvgpr_write_b32 a12, v58 ; Reload Reuse
170+
; CHECK-NEXT: v_accvgpr_write_b32 a13, v59 ; Reload Reuse
171+
; CHECK-NEXT: v_accvgpr_write_b32 a14, v60 ; Reload Reuse
172+
; CHECK-NEXT: v_accvgpr_write_b32 a15, v61 ; Reload Reuse
173+
; CHECK-NEXT: v_accvgpr_write_b32 a16, v62 ; Reload Reuse
174+
; CHECK-NEXT: v_accvgpr_write_b32 a17, v63 ; Reload Reuse
175+
; CHECK-NEXT: ;;#ASMSTART
176+
; CHECK-NEXT: ; def a0
177+
; CHECK-NEXT: ;;#ASMEND
178+
; CHECK-NEXT: ;;#ASMSTART
179+
; CHECK-NEXT: ; def a1
180+
; CHECK-NEXT: ;;#ASMEND
181+
; CHECK-NEXT: ;;#ASMSTART
182+
; CHECK-NEXT: ; def v[0:31]
183+
; CHECK-NEXT: ;;#ASMEND
184+
; CHECK-NEXT: ;;#ASMSTART
185+
; CHECK-NEXT: ; use v[0:31]
186+
; CHECK-NEXT: ;;#ASMEND
187+
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
188+
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
189+
; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1
190+
; CHECK-NEXT: v_accvgpr_read_b32 v63, a17 ; Reload Reuse
191+
; CHECK-NEXT: v_accvgpr_read_b32 v62, a16 ; Reload Reuse
192+
; CHECK-NEXT: v_accvgpr_read_b32 v61, a15 ; Reload Reuse
193+
; CHECK-NEXT: v_accvgpr_read_b32 v60, a14 ; Reload Reuse
194+
; CHECK-NEXT: v_accvgpr_read_b32 v59, a13 ; Reload Reuse
195+
; CHECK-NEXT: v_accvgpr_read_b32 v58, a12 ; Reload Reuse
196+
; CHECK-NEXT: v_accvgpr_read_b32 v57, a11 ; Reload Reuse
197+
; CHECK-NEXT: v_accvgpr_read_b32 v56, a10 ; Reload Reuse
198+
; CHECK-NEXT: v_accvgpr_read_b32 v47, a9 ; Reload Reuse
199+
; CHECK-NEXT: v_accvgpr_read_b32 v46, a8 ; Reload Reuse
200+
; CHECK-NEXT: v_accvgpr_read_b32 v45, a7 ; Reload Reuse
201+
; CHECK-NEXT: v_accvgpr_read_b32 v44, a6 ; Reload Reuse
202+
; CHECK-NEXT: v_accvgpr_read_b32 v43, a5 ; Reload Reuse
203+
; CHECK-NEXT: v_accvgpr_read_b32 v42, a4 ; Reload Reuse
204+
; CHECK-NEXT: v_accvgpr_read_b32 v41, a3 ; Reload Reuse
205+
; CHECK-NEXT: v_accvgpr_read_b32 v40, a2 ; Reload Reuse
206+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
207+
; CHECK-NEXT: s_setpc_b64 s[30:31]
208+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %lds, i32 0, i32 10
209+
%gep.1 = getelementptr inbounds [512 x i32], ptr addrspace(3) %lds, i32 0, i32 24
210+
%op0 = call i32 asm sideeffect "; def $0", "=^VA"()
211+
%op1 = call i32 asm sideeffect "; def $0", "=^VA"()
212+
%vgpr.def = call { <32 x i32>, <32 x i32> } asm sideeffect "; def $0", "=${v[0:31]},=${v[32:63]}"()
213+
%vgpr.0 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 0
214+
%vgpr.1 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 1
215+
%permute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1)
216+
call void asm sideeffect "; use $0", "{v[0:31]},{v[32:63]}"(<32 x i32> %vgpr.0, <32 x i32> %vgpr.1)
217+
ret i32 %permute
218+
}
219+
220+
define void @ds_permute_b32_a_a__use_a(ptr addrspace(3) %lds) #0 {
221+
; CHECK-LABEL: ds_permute_b32_a_a__use_a:
222+
; CHECK: ; %bb.0:
223+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224+
; CHECK-NEXT: ;;#ASMSTART
225+
; CHECK-NEXT: ; def a0
226+
; CHECK-NEXT: ;;#ASMEND
227+
; CHECK-NEXT: ;;#ASMSTART
228+
; CHECK-NEXT: ; def a1
229+
; CHECK-NEXT: ;;#ASMEND
230+
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
231+
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
232+
; CHECK-NEXT: ds_permute_b32 v0, v0, v1
233+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
234+
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
235+
; CHECK-NEXT: ;;#ASMSTART
236+
; CHECK-NEXT: ; use a0
237+
; CHECK-NEXT: ;;#ASMEND
238+
; CHECK-NEXT: s_setpc_b64 s[30:31]
239+
%op0 = call i32 asm "; def $0", "=a"()
240+
%op1 = call i32 asm "; def $0", "=a"()
241+
%permute = call i32 @llvm.amdgcn.ds.permute(i32 %op0, i32 %op1)
242+
call void asm sideeffect "; use $0", "a"(i32 %permute)
243+
ret void
244+
}
245+
246+
define void @ds_permute_b32_av_av__use_av(ptr addrspace(3) %lds) #0 {
247+
; CHECK-LABEL: ds_permute_b32_av_av__use_av:
248+
; CHECK: ; %bb.0:
249+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250+
; CHECK-NEXT: ;;#ASMSTART
251+
; CHECK-NEXT: ; def v0
252+
; CHECK-NEXT: ;;#ASMEND
253+
; CHECK-NEXT: ;;#ASMSTART
254+
; CHECK-NEXT: ; def v1
255+
; CHECK-NEXT: ;;#ASMEND
256+
; CHECK-NEXT: ds_permute_b32 v0, v0, v1
257+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
258+
; CHECK-NEXT: ;;#ASMSTART
259+
; CHECK-NEXT: ; use v0
260+
; CHECK-NEXT: ;;#ASMEND
261+
; CHECK-NEXT: s_setpc_b64 s[30:31]
262+
%op0 = call i32 asm "; def $0", "=^VA"()
263+
%op1 = call i32 asm "; def $0", "=^VA"()
264+
%permute = call i32 @llvm.amdgcn.ds.permute(i32 %op0, i32 %op1)
265+
call void asm sideeffect "; use $0", "^VA"(i32 %permute)
266+
ret void
267+
}
268+
269+
define i32 @ds_permute_b32_av_av_no_vgprs(ptr addrspace(3) %lds) #0 {
270+
; CHECK-LABEL: ds_permute_b32_av_av_no_vgprs:
271+
; CHECK: ; %bb.0:
272+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273+
; CHECK-NEXT: v_accvgpr_write_b32 a2, v40 ; Reload Reuse
274+
; CHECK-NEXT: v_accvgpr_write_b32 a3, v41 ; Reload Reuse
275+
; CHECK-NEXT: v_accvgpr_write_b32 a4, v42 ; Reload Reuse
276+
; CHECK-NEXT: v_accvgpr_write_b32 a5, v43 ; Reload Reuse
277+
; CHECK-NEXT: v_accvgpr_write_b32 a6, v44 ; Reload Reuse
278+
; CHECK-NEXT: v_accvgpr_write_b32 a7, v45 ; Reload Reuse
279+
; CHECK-NEXT: v_accvgpr_write_b32 a8, v46 ; Reload Reuse
280+
; CHECK-NEXT: v_accvgpr_write_b32 a9, v47 ; Reload Reuse
281+
; CHECK-NEXT: v_accvgpr_write_b32 a10, v56 ; Reload Reuse
282+
; CHECK-NEXT: v_accvgpr_write_b32 a11, v57 ; Reload Reuse
283+
; CHECK-NEXT: v_accvgpr_write_b32 a12, v58 ; Reload Reuse
284+
; CHECK-NEXT: v_accvgpr_write_b32 a13, v59 ; Reload Reuse
285+
; CHECK-NEXT: v_accvgpr_write_b32 a14, v60 ; Reload Reuse
286+
; CHECK-NEXT: v_accvgpr_write_b32 a15, v61 ; Reload Reuse
287+
; CHECK-NEXT: v_accvgpr_write_b32 a16, v62 ; Reload Reuse
288+
; CHECK-NEXT: v_accvgpr_write_b32 a17, v63 ; Reload Reuse
289+
; CHECK-NEXT: ;;#ASMSTART
290+
; CHECK-NEXT: ; def a0
291+
; CHECK-NEXT: ;;#ASMEND
292+
; CHECK-NEXT: ;;#ASMSTART
293+
; CHECK-NEXT: ; def a1
294+
; CHECK-NEXT: ;;#ASMEND
295+
; CHECK-NEXT: ;;#ASMSTART
296+
; CHECK-NEXT: ; def v[0:31]
297+
; CHECK-NEXT: ;;#ASMEND
298+
; CHECK-NEXT: ;;#ASMSTART
299+
; CHECK-NEXT: ; use v[0:31]
300+
; CHECK-NEXT: ;;#ASMEND
301+
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
302+
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
303+
; CHECK-NEXT: ds_permute_b32 v0, v0, v1
304+
; CHECK-NEXT: v_accvgpr_read_b32 v63, a17 ; Reload Reuse
305+
; CHECK-NEXT: v_accvgpr_read_b32 v62, a16 ; Reload Reuse
306+
; CHECK-NEXT: v_accvgpr_read_b32 v61, a15 ; Reload Reuse
307+
; CHECK-NEXT: v_accvgpr_read_b32 v60, a14 ; Reload Reuse
308+
; CHECK-NEXT: v_accvgpr_read_b32 v59, a13 ; Reload Reuse
309+
; CHECK-NEXT: v_accvgpr_read_b32 v58, a12 ; Reload Reuse
310+
; CHECK-NEXT: v_accvgpr_read_b32 v57, a11 ; Reload Reuse
311+
; CHECK-NEXT: v_accvgpr_read_b32 v56, a10 ; Reload Reuse
312+
; CHECK-NEXT: v_accvgpr_read_b32 v47, a9 ; Reload Reuse
313+
; CHECK-NEXT: v_accvgpr_read_b32 v46, a8 ; Reload Reuse
314+
; CHECK-NEXT: v_accvgpr_read_b32 v45, a7 ; Reload Reuse
315+
; CHECK-NEXT: v_accvgpr_read_b32 v44, a6 ; Reload Reuse
316+
; CHECK-NEXT: v_accvgpr_read_b32 v43, a5 ; Reload Reuse
317+
; CHECK-NEXT: v_accvgpr_read_b32 v42, a4 ; Reload Reuse
318+
; CHECK-NEXT: v_accvgpr_read_b32 v41, a3 ; Reload Reuse
319+
; CHECK-NEXT: v_accvgpr_read_b32 v40, a2 ; Reload Reuse
320+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
321+
; CHECK-NEXT: s_setpc_b64 s[30:31]
322+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %lds, i32 0, i32 10
323+
%gep.1 = getelementptr inbounds [512 x i32], ptr addrspace(3) %lds, i32 0, i32 24
324+
%op0 = call i32 asm sideeffect "; def $0", "=^VA"()
325+
%op1 = call i32 asm sideeffect "; def $0", "=^VA"()
326+
%vgpr.def = call { <32 x i32>, <32 x i32> } asm sideeffect "; def $0", "=${v[0:31]},=${v[32:63]}"()
327+
%vgpr.0 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 0
328+
%vgpr.1 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 1
329+
%permute = call i32 @llvm.amdgcn.ds.permute(i32 %op0, i32 %op1)
330+
call void asm sideeffect "; use $0", "{v[0:31]},{v[32:63]}"(<32 x i32> %vgpr.0, <32 x i32> %vgpr.1)
331+
ret i32 %permute
332+
}
333+
334+
attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" }

0 commit comments

Comments
 (0)