Skip to content

Commit bcf41e0

Browse files
authored
AMDGPU: Add baseline test for vgpr mfma with copied-from AGPR (llvm#153020)
1 parent eefad74 commit bcf41e0

File tree

2 files changed

+648
-0
lines changed

2 files changed

+648
-0
lines changed
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s | FileCheck %s
3+
4+
---
5+
name: test_rewrite_mfma_copy_from_agpr_physreg
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $agpr0_agpr1
10+
11+
; CHECK-LABEL: name: test_rewrite_mfma_copy_from_agpr_physreg
12+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $agpr0_agpr1
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
15+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
16+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
17+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY $agpr0_agpr1
18+
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]], 0, 0, 0, implicit $mode, implicit $exec
19+
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
20+
; CHECK-NEXT: SI_RETURN
21+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
22+
%1:av_64_align2 = COPY $vgpr0_vgpr1
23+
%2:av_64_align2 = COPY $vgpr2_vgpr3
24+
%3:vreg_64_align2 = COPY $agpr0_agpr1
25+
%4:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3, 0, 0, 0, implicit $mode, implicit $exec
26+
GLOBAL_STORE_DWORDX2 %0, %4, 0, 0, implicit $exec :: (store (s64), addrspace 1)
27+
SI_RETURN
28+
...
29+
30+
---
31+
name: test_rewrite_mfma_copy_from_agpr_unrewritable_use
32+
tracksRegLiveness: true
33+
body: |
34+
bb.0:
35+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
36+
37+
; CHECK-LABEL: name: test_rewrite_mfma_copy_from_agpr_unrewritable_use
38+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
39+
; CHECK-NEXT: {{ $}}
40+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
41+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
42+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
43+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
44+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
45+
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
46+
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3866633 /* reguse:VReg_64_Align2 */, [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]]
47+
; CHECK-NEXT: SI_RETURN
48+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
49+
%1:av_64_align2 = COPY $vgpr0_vgpr1
50+
%2:av_64_align2 = COPY $vgpr2_vgpr3
51+
%3:areg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1)
52+
%4:vreg_128_align2 = COPY %3
53+
%5:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %4.sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
54+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3866633 /* reguse:VReg_64_Align2 */, %5
55+
SI_RETURN
56+
...
57+
58+
---
59+
name: test_rewrite_mfma_copy_from_agpr_src2_subreg_use
60+
tracksRegLiveness: true
61+
body: |
62+
bb.0:
63+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
64+
65+
; CHECK-LABEL: name: test_rewrite_mfma_copy_from_agpr_src2_subreg_use
66+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
69+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
70+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
71+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
72+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
73+
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
74+
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
75+
; CHECK-NEXT: SI_RETURN
76+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
77+
%1:av_64_align2 = COPY $vgpr0_vgpr1
78+
%2:av_64_align2 = COPY $vgpr2_vgpr3
79+
%3:areg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1)
80+
%4:vreg_128_align2 = COPY %3
81+
%5:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %4.sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
82+
GLOBAL_STORE_DWORDX2 %0, %5, 0, 0, implicit $exec :: (store (s64), addrspace 1)
83+
SI_RETURN
84+
...
85+
86+
---
87+
name: test_rewrite_mfma_copy_from_agpr_vdst_subreg_use
88+
tracksRegLiveness: true
89+
body: |
90+
bb.0:
91+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
92+
93+
; CHECK-LABEL: name: test_rewrite_mfma_copy_from_agpr_vdst_subreg_use
94+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
95+
; CHECK-NEXT: {{ $}}
96+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
97+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
98+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
99+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
100+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
101+
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec
102+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
103+
; CHECK-NEXT: SI_RETURN
104+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
105+
%1:av_64_align2 = COPY $vgpr0_vgpr1
106+
%2:av_64_align2 = COPY $vgpr2_vgpr3
107+
%3:areg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1)
108+
%4:vreg_128_align2 = COPY %3
109+
%4.sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %4.sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec
110+
GLOBAL_STORE_DWORDX4 %0, %4, 0, 0, implicit $exec :: (store (s128), addrspace 1)
111+
SI_RETURN
112+
...
113+
114+
# A-to-V copy is performed subregister at a time instead.
115+
---
116+
name: test_rewrite_mfma_copy_from_agpr_split_copy
117+
tracksRegLiveness: true
118+
body: |
119+
bb.0:
120+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
121+
122+
; CHECK-LABEL: name: test_rewrite_mfma_copy_from_agpr_split_copy
123+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
124+
; CHECK-NEXT: {{ $}}
125+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
126+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
127+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
128+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
129+
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
130+
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub1:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
131+
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]], 0, 0, 0, implicit $mode, implicit $exec
132+
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
133+
; CHECK-NEXT: SI_RETURN
134+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
135+
%1:av_64_align2 = COPY $vgpr0_vgpr1
136+
%2:av_64_align2 = COPY $vgpr2_vgpr3
137+
%3:areg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
138+
undef %4.sub0:vreg_64_align2 = COPY %3.sub0
139+
%4.sub1:vreg_64_align2 = COPY %3.sub1
140+
%5:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %4, 0, 0, 0, implicit $mode, implicit $exec
141+
GLOBAL_STORE_DWORDX2 %0, %5, 0, 0, implicit $exec :: (store (s64), addrspace 1)
142+
SI_RETURN
143+
...
144+
145+
---
146+
name: test_rewrite_mfma_copy_from_agpr_copyback
147+
tracksRegLiveness: true
148+
body: |
149+
bb.0:
150+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
151+
152+
; CHECK-LABEL: name: test_rewrite_mfma_copy_from_agpr_copyback
153+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
154+
; CHECK-NEXT: {{ $}}
155+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
156+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
157+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
158+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
159+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]]
160+
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[COPY3]], 0, 0, 0, implicit $mode, implicit $exec
161+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]]
162+
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY4]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
163+
; CHECK-NEXT: SI_RETURN
164+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
165+
%1:av_64_align2 = COPY $vgpr0_vgpr1
166+
%2:av_64_align2 = COPY $vgpr2_vgpr3
167+
%3:areg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
168+
%4:vreg_64_align2 = COPY %3
169+
%5:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %4, 0, 0, 0, implicit $mode, implicit $exec
170+
%6:areg_64_align2 = COPY %5
171+
GLOBAL_STORE_DWORDX2 %0, %6, 0, 0, implicit $exec :: (store (s64), addrspace 1)
172+
SI_RETURN
173+
...
174+
175+
# There is a read of the copy from AGPR to VGPR in the dst operand of the MFMA.
176+
---
177+
name: test_rewrite_mfma_copy_from_agpr_vdst_subreg_use_imm_src2
178+
tracksRegLiveness: true
179+
body: |
180+
bb.0:
181+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
182+
183+
; CHECK-LABEL: name: test_rewrite_mfma_copy_from_agpr_vdst_subreg_use_imm_src2
184+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
185+
; CHECK-NEXT: {{ $}}
186+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
187+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
188+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
189+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
190+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
191+
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec
192+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
193+
; CHECK-NEXT: SI_RETURN
194+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
195+
%1:av_64_align2 = COPY $vgpr0_vgpr1
196+
%2:av_64_align2 = COPY $vgpr2_vgpr3
197+
%3:areg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1)
198+
%4:vreg_128_align2 = COPY %3
199+
%4.sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, 0, 0, 0, 0, implicit $mode, implicit $exec
200+
GLOBAL_STORE_DWORDX4 %0, %4, 0, 0, implicit $exec :: (store (s128), addrspace 1)
201+
SI_RETURN
202+
...
203+
204+
# Degenerate case. Copy from AGPR to VGPR is dead undef subreg def
205+
---
206+
name: test_rewrite_mfma_copy_from_agpr_undef_vdst_subreg_use_imm_src2
207+
tracksRegLiveness: true
208+
body: |
209+
bb.0:
210+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
211+
212+
; CHECK-LABEL: name: test_rewrite_mfma_copy_from_agpr_undef_vdst_subreg_use_imm_src2
213+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
214+
; CHECK-NEXT: {{ $}}
215+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
216+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
217+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
218+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
219+
; CHECK-NEXT: dead [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
220+
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec
221+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
222+
; CHECK-NEXT: SI_RETURN
223+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
224+
%1:av_64_align2 = COPY $vgpr0_vgpr1
225+
%2:av_64_align2 = COPY $vgpr2_vgpr3
226+
%3:areg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1)
227+
%4:vreg_128_align2 = COPY %3
228+
undef %4.sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, 0, 0, 0, 0, implicit $mode, implicit $exec
229+
GLOBAL_STORE_DWORDX4 %0, %4, 0, 0, implicit $exec :: (store (s128), addrspace 1)
230+
SI_RETURN
231+
...
232+
233+
# Degenerate case. Copy from AGPR to VGPR is dead, but same register
234+
# is redefined as whole register.
235+
---
236+
name: test_rewrite_mfma_copy_from_agpr_to_vdst_def_imm_src2
237+
tracksRegLiveness: true
238+
body: |
239+
bb.0:
240+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
241+
242+
; CHECK-LABEL: name: test_rewrite_mfma_copy_from_agpr_to_vdst_def_imm_src2
243+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
244+
; CHECK-NEXT: {{ $}}
245+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
246+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
247+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
248+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
249+
; CHECK-NEXT: dead [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]]
250+
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec
251+
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
252+
; CHECK-NEXT: SI_RETURN
253+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
254+
%1:av_64_align2 = COPY $vgpr0_vgpr1
255+
%2:av_64_align2 = COPY $vgpr2_vgpr3
256+
%3:areg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
257+
%4:vreg_64_align2 = COPY %3
258+
%4:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, 0, 0, 0, 0, implicit $mode, implicit $exec
259+
GLOBAL_STORE_DWORDX2 %0, %4, 0, 0, implicit $exec :: (store (s64), addrspace 1)
260+
SI_RETURN
261+
...

0 commit comments

Comments
 (0)