Skip to content

Commit 7476360

Browse files
authored
AMDGPU: Test VGPR and AGPR case for xf32 mfmas (#150891)
1 parent 7f47058 commit 7476360

File tree

1 file changed

+116
-0
lines changed

1 file changed

+116
-0
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,53 @@ bb:
5656
ret void
5757
}
5858

59+
define amdgpu_kernel void @test_mfma_f32_16x16x8xf32_vgprcd(ptr addrspace(1) %arg) #1 {
60+
; GFX942-SDAG-LABEL: test_mfma_f32_16x16x8xf32_vgprcd:
61+
; GFX942-SDAG: ; %bb.0: ; %bb
62+
; GFX942-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
63+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, 1.0
64+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, 2.0
65+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, 0x40400000
66+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, 4.0
67+
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
68+
; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
69+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, 0
70+
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
71+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
72+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
73+
; GFX942-SDAG-NEXT: s_nop 1
74+
; GFX942-SDAG-NEXT: v_mfma_f32_16x16x8_xf32 v[0:3], v[4:5], v[6:7], v[0:3] cbsz:1 abid:2 blgp:3
75+
; GFX942-SDAG-NEXT: s_nop 6
76+
; GFX942-SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[6:7]
77+
; GFX942-SDAG-NEXT: s_endpgm
78+
;
79+
; GFX942-GISEL-LABEL: test_mfma_f32_16x16x8xf32_vgprcd:
80+
; GFX942-GISEL: ; %bb.0: ; %bb
81+
; GFX942-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
82+
; GFX942-GISEL-NEXT: s_mov_b32 s4, 1.0
83+
; GFX942-GISEL-NEXT: s_mov_b32 s5, 2.0
84+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
85+
; GFX942-GISEL-NEXT: s_mov_b32 s4, 0x40400000
86+
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
87+
; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
88+
; GFX942-GISEL-NEXT: s_mov_b32 s5, 4.0
89+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
90+
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
91+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
92+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
93+
; GFX942-GISEL-NEXT: s_nop 1
94+
; GFX942-GISEL-NEXT: v_mfma_f32_16x16x8_xf32 v[0:3], v[4:5], v[6:7], v[0:3] cbsz:1 abid:2 blgp:3
95+
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, 0
96+
; GFX942-GISEL-NEXT: s_nop 5
97+
; GFX942-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
98+
; GFX942-GISEL-NEXT: s_endpgm
99+
bb:
100+
%in.1 = load <4 x float>, ptr addrspace(1) %arg
101+
%mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <4 x float> %in.1, i32 1, i32 2, i32 3)
102+
store <4 x float> %mai.1, ptr addrspace(1) %arg
103+
ret void
104+
}
105+
59106
define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) #0 {
60107
; GFX942-SDAG-LABEL: test_mfma_f32_32x32x4xf32:
61108
; GFX942-SDAG: ; %bb.0: ; %bb
@@ -139,6 +186,75 @@ bb:
139186
ret void
140187
}
141188

189+
define amdgpu_kernel void @test_mfma_f32_32x32x4xf32_vgprcd(ptr addrspace(1) %arg) #1 {
190+
; GFX942-SDAG-LABEL: test_mfma_f32_32x32x4xf32_vgprcd:
191+
; GFX942-SDAG: ; %bb.0: ; %bb
192+
; GFX942-SDAG-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
193+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, 1.0
194+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, 2.0
195+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, 0x40400000
196+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, 4.0
197+
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
198+
; GFX942-SDAG-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
199+
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
200+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
201+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
202+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
203+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
204+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[8:9]
205+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
206+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
207+
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
208+
; GFX942-SDAG-NEXT: s_nop 1
209+
; GFX942-SDAG-NEXT: v_mfma_f32_32x32x4_xf32 v[0:15], v[16:17], v[18:19], v[0:15] cbsz:1 abid:2 blgp:3
210+
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, 0
211+
; GFX942-SDAG-NEXT: s_nop 7
212+
; GFX942-SDAG-NEXT: s_nop 1
213+
; GFX942-SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48
214+
; GFX942-SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:32
215+
; GFX942-SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
216+
; GFX942-SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
217+
; GFX942-SDAG-NEXT: s_endpgm
218+
;
219+
; GFX942-GISEL-LABEL: test_mfma_f32_32x32x4xf32_vgprcd:
220+
; GFX942-GISEL: ; %bb.0: ; %bb
221+
; GFX942-GISEL-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
222+
; GFX942-GISEL-NEXT: s_mov_b32 s18, 1.0
223+
; GFX942-GISEL-NEXT: s_mov_b32 s19, 2.0
224+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[18:19]
225+
; GFX942-GISEL-NEXT: s_mov_b32 s18, 0x40400000
226+
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
227+
; GFX942-GISEL-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
228+
; GFX942-GISEL-NEXT: s_mov_b32 s19, 4.0
229+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
230+
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
231+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
232+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
233+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
234+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
235+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9]
236+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
237+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
238+
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
239+
; GFX942-GISEL-NEXT: s_nop 1
240+
; GFX942-GISEL-NEXT: v_mfma_f32_32x32x4_xf32 v[0:15], v[16:17], v[18:19], v[0:15] cbsz:1 abid:2 blgp:3
241+
; GFX942-GISEL-NEXT: v_mov_b32_e32 v16, 0
242+
; GFX942-GISEL-NEXT: s_nop 7
243+
; GFX942-GISEL-NEXT: s_nop 1
244+
; GFX942-GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
245+
; GFX942-GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
246+
; GFX942-GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:32
247+
; GFX942-GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48
248+
; GFX942-GISEL-NEXT: s_endpgm
249+
bb:
250+
%in.1 = load <16 x float>, ptr addrspace(1) %arg
251+
%mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <16 x float> %in.1, i32 1, i32 2, i32 3)
252+
store <16 x float> %mai.1, ptr addrspace(1) %arg
253+
ret void
254+
}
255+
142256
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
257+
attributes #1 = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-agpr-alloc"="0,0" }
258+
143259
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
144260
; GFX942: {{.*}}

0 commit comments

Comments
 (0)