|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | 2 | ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-SDAG %s |
3 | 3 | ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-GISEL %s |
4 | | -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX942-STRESS,GFX942-SDAG-STRESS %s |
5 | | -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX942-STRESS,GFX942-GISEL-STRESS %s |
6 | 4 |
|
7 | 5 | declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float>, <2 x float>, <4 x float>, i32, i32, i32) |
8 | 6 | declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float>, <2 x float>, <16 x float>, i32, i32, i32) |
@@ -51,50 +49,6 @@ define amdgpu_kernel void @test_mfma_f32_16x16x8xf32(ptr addrspace(1) %arg) #0 { |
51 | 49 | ; GFX942-GISEL-NEXT: s_nop 5 |
52 | 50 | ; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] |
53 | 51 | ; GFX942-GISEL-NEXT: s_endpgm |
54 | | -; |
55 | | -; GFX942-SDAG-STRESS-LABEL: test_mfma_f32_16x16x8xf32: |
56 | | -; GFX942-SDAG-STRESS: ; %bb.0: ; %bb |
57 | | -; GFX942-SDAG-STRESS-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 |
58 | | -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 1.0 |
59 | | -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v1, 2.0 |
60 | | -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v2, 0x40400000 |
61 | | -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v3, 4.0 |
62 | | -; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
63 | | -; GFX942-SDAG-STRESS-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 |
64 | | -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v4, 0 |
65 | | -; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
66 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 |
67 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 |
68 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 |
69 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 |
70 | | -; GFX942-SDAG-STRESS-NEXT: s_nop 1 |
71 | | -; GFX942-SDAG-STRESS-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3 |
72 | | -; GFX942-SDAG-STRESS-NEXT: s_nop 6 |
73 | | -; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v4, a[0:3], s[6:7] |
74 | | -; GFX942-SDAG-STRESS-NEXT: s_endpgm |
75 | | -; |
76 | | -; GFX942-GISEL-STRESS-LABEL: test_mfma_f32_16x16x8xf32: |
77 | | -; GFX942-GISEL-STRESS: ; %bb.0: ; %bb |
78 | | -; GFX942-GISEL-STRESS-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 |
79 | | -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 1.0 |
80 | | -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s2, 0x40400000 |
81 | | -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 2.0 |
82 | | -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s3, 4.0 |
83 | | -; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
84 | | -; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[2:3], s[2:3] |
85 | | -; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
86 | | -; GFX942-GISEL-STRESS-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 |
87 | | -; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
88 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 |
89 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 |
90 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 |
91 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 |
92 | | -; GFX942-GISEL-STRESS-NEXT: s_nop 1 |
93 | | -; GFX942-GISEL-STRESS-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3 |
94 | | -; GFX942-GISEL-STRESS-NEXT: v_mov_b32_e32 v0, 0 |
95 | | -; GFX942-GISEL-STRESS-NEXT: s_nop 5 |
96 | | -; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] |
97 | | -; GFX942-GISEL-STRESS-NEXT: s_endpgm |
98 | 52 | bb: |
99 | 53 | %in.1 = load <4 x float>, ptr addrspace(1) %arg |
100 | 54 | %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <4 x float> %in.1, i32 1, i32 2, i32 3) |
@@ -178,82 +132,6 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) #0 { |
178 | 132 | ; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32 |
179 | 133 | ; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 |
180 | 134 | ; GFX942-GISEL-NEXT: s_endpgm |
181 | | -; |
182 | | -; GFX942-SDAG-STRESS-LABEL: test_mfma_f32_32x32x4xf32: |
183 | | -; GFX942-SDAG-STRESS: ; %bb.0: ; %bb |
184 | | -; GFX942-SDAG-STRESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 |
185 | | -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 1.0 |
186 | | -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v1, 2.0 |
187 | | -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v2, 0x40400000 |
188 | | -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v3, 4.0 |
189 | | -; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
190 | | -; GFX942-SDAG-STRESS-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 |
191 | | -; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
192 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 |
193 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 |
194 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 |
195 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 |
196 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a4, s4 |
197 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a5, s5 |
198 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a6, s6 |
199 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a7, s7 |
200 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a8, s8 |
201 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a9, s9 |
202 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a10, s10 |
203 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a11, s11 |
204 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a12, s12 |
205 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a13, s13 |
206 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a14, s14 |
207 | | -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a15, s15 |
208 | | -; GFX942-SDAG-STRESS-NEXT: s_nop 1 |
209 | | -; GFX942-SDAG-STRESS-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3 |
210 | | -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 0 |
211 | | -; GFX942-SDAG-STRESS-NEXT: s_nop 7 |
212 | | -; GFX942-SDAG-STRESS-NEXT: s_nop 1 |
213 | | -; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 |
214 | | -; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32 |
215 | | -; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16 |
216 | | -; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17] |
217 | | -; GFX942-SDAG-STRESS-NEXT: s_endpgm |
218 | | -; |
219 | | -; GFX942-GISEL-STRESS-LABEL: test_mfma_f32_32x32x4xf32: |
220 | | -; GFX942-GISEL-STRESS: ; %bb.0: ; %bb |
221 | | -; GFX942-GISEL-STRESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 |
222 | | -; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
223 | | -; GFX942-GISEL-STRESS-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 |
224 | | -; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
225 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 |
226 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 |
227 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 |
228 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 |
229 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a4, s4 |
230 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a5, s5 |
231 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a6, s6 |
232 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a7, s7 |
233 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a8, s8 |
234 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a9, s9 |
235 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a10, s10 |
236 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a11, s11 |
237 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a12, s12 |
238 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a13, s13 |
239 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a14, s14 |
240 | | -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a15, s15 |
241 | | -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 1.0 |
242 | | -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 2.0 |
243 | | -; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
244 | | -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 0x40400000 |
245 | | -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 4.0 |
246 | | -; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
247 | | -; GFX942-GISEL-STRESS-NEXT: s_nop 1 |
248 | | -; GFX942-GISEL-STRESS-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3 |
249 | | -; GFX942-GISEL-STRESS-NEXT: v_mov_b32_e32 v0, 0 |
250 | | -; GFX942-GISEL-STRESS-NEXT: s_nop 7 |
251 | | -; GFX942-GISEL-STRESS-NEXT: s_nop 1 |
252 | | -; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17] |
253 | | -; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16 |
254 | | -; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32 |
255 | | -; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 |
256 | | -; GFX942-GISEL-STRESS-NEXT: s_endpgm |
257 | 135 | bb: |
258 | 136 | %in.1 = load <16 x float>, ptr addrspace(1) %arg |
259 | 137 | %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <16 x float> %in.1, i32 1, i32 2, i32 3) |
|
264 | 142 | attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } |
265 | 143 | ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
266 | 144 | ; GFX942: {{.*}} |
267 | | -; GFX942-STRESS: {{.*}} |
0 commit comments