Skip to content

Commit c6f8acd

Browse files
committed
AMDGPU: Test VGPR and AGPR case for xf32 mfmas
1 parent 29fc34e commit c6f8acd

File tree

1 file changed

+115
-0
lines changed

1 file changed

+115
-0
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,52 @@ bb:
5555
store <4 x float> %mai.1, ptr addrspace(1) %arg
5656
ret void
5757
}
58+
define amdgpu_kernel void @test_mfma_f32_16x16x8xf32_vgprcd(ptr addrspace(1) %arg) #1 {
59+
; GFX942-SDAG-LABEL: test_mfma_f32_16x16x8xf32_vgprcd:
60+
; GFX942-SDAG: ; %bb.0: ; %bb
61+
; GFX942-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
62+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, 1.0
63+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, 2.0
64+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, 0x40400000
65+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, 4.0
66+
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
67+
; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
68+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, 0
69+
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
70+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
71+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
72+
; GFX942-SDAG-NEXT: s_nop 1
73+
; GFX942-SDAG-NEXT: v_mfma_f32_16x16x8_xf32 v[0:3], v[4:5], v[6:7], v[0:3] cbsz:1 abid:2 blgp:3
74+
; GFX942-SDAG-NEXT: s_nop 6
75+
; GFX942-SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[6:7]
76+
; GFX942-SDAG-NEXT: s_endpgm
77+
;
78+
; GFX942-GISEL-LABEL: test_mfma_f32_16x16x8xf32_vgprcd:
79+
; GFX942-GISEL: ; %bb.0: ; %bb
80+
; GFX942-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
81+
; GFX942-GISEL-NEXT: s_mov_b32 s4, 1.0
82+
; GFX942-GISEL-NEXT: s_mov_b32 s5, 2.0
83+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
84+
; GFX942-GISEL-NEXT: s_mov_b32 s4, 0x40400000
85+
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
86+
; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
87+
; GFX942-GISEL-NEXT: s_mov_b32 s5, 4.0
88+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
89+
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
90+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
91+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
92+
; GFX942-GISEL-NEXT: s_nop 1
93+
; GFX942-GISEL-NEXT: v_mfma_f32_16x16x8_xf32 v[0:3], v[4:5], v[6:7], v[0:3] cbsz:1 abid:2 blgp:3
94+
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, 0
95+
; GFX942-GISEL-NEXT: s_nop 5
96+
; GFX942-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
97+
; GFX942-GISEL-NEXT: s_endpgm
98+
bb:
99+
%in.1 = load <4 x float>, ptr addrspace(1) %arg
100+
%mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <4 x float> %in.1, i32 1, i32 2, i32 3)
101+
store <4 x float> %mai.1, ptr addrspace(1) %arg
102+
ret void
103+
}
58104

59105
define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) #0 {
60106
; GFX942-SDAG-LABEL: test_mfma_f32_32x32x4xf32:
@@ -139,6 +185,75 @@ bb:
139185
ret void
140186
}
141187

188+
define amdgpu_kernel void @test_mfma_f32_32x32x4xf32_vgprcd(ptr addrspace(1) %arg) #1 {
189+
; GFX942-SDAG-LABEL: test_mfma_f32_32x32x4xf32_vgprcd:
190+
; GFX942-SDAG: ; %bb.0: ; %bb
191+
; GFX942-SDAG-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
192+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, 1.0
193+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, 2.0
194+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, 0x40400000
195+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, 4.0
196+
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
197+
; GFX942-SDAG-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
198+
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
199+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
200+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
201+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
202+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
203+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[8:9]
204+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
205+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
206+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
207+
; GFX942-SDAG-NEXT: s_nop 1
208+
; GFX942-SDAG-NEXT: v_mfma_f32_32x32x4_xf32 v[0:15], v[16:17], v[18:19], v[0:15] cbsz:1 abid:2 blgp:3
209+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, 0
210+
; GFX942-SDAG-NEXT: s_nop 7
211+
; GFX942-SDAG-NEXT: s_nop 1
212+
; GFX942-SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48
213+
; GFX942-SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:32
214+
; GFX942-SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
215+
; GFX942-SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
216+
; GFX942-SDAG-NEXT: s_endpgm
217+
;
218+
; GFX942-GISEL-LABEL: test_mfma_f32_32x32x4xf32_vgprcd:
219+
; GFX942-GISEL: ; %bb.0: ; %bb
220+
; GFX942-GISEL-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
221+
; GFX942-GISEL-NEXT: s_mov_b32 s18, 1.0
222+
; GFX942-GISEL-NEXT: s_mov_b32 s19, 2.0
223+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[18:19]
224+
; GFX942-GISEL-NEXT: s_mov_b32 s18, 0x40400000
225+
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
226+
; GFX942-GISEL-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
227+
; GFX942-GISEL-NEXT: s_mov_b32 s19, 4.0
228+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
229+
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
230+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
231+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
232+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
233+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
234+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9]
235+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
236+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
237+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
238+
; GFX942-GISEL-NEXT: s_nop 1
239+
; GFX942-GISEL-NEXT: v_mfma_f32_32x32x4_xf32 v[0:15], v[16:17], v[18:19], v[0:15] cbsz:1 abid:2 blgp:3
240+
; GFX942-GISEL-NEXT: v_mov_b32_e32 v16, 0
241+
; GFX942-GISEL-NEXT: s_nop 7
242+
; GFX942-GISEL-NEXT: s_nop 1
243+
; GFX942-GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
244+
; GFX942-GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
245+
; GFX942-GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:32
246+
; GFX942-GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48
247+
; GFX942-GISEL-NEXT: s_endpgm
248+
bb:
249+
%in.1 = load <16 x float>, ptr addrspace(1) %arg
250+
%mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <16 x float> %in.1, i32 1, i32 2, i32 3)
251+
store <16 x float> %mai.1, ptr addrspace(1) %arg
252+
ret void
253+
}
254+
142255
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
256+
attributes #1 = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-agpr-alloc"="0,0" }
257+
143258
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
144259
; GFX942: {{.*}}

0 commit comments

Comments
 (0)