@@ -986,6 +986,50 @@ define void @test_rewrite_mfma_f32_16x16x4f32(float %arg0, float %arg1, ptr addr
986986 ret void
987987}
988988
989+ define void @test_rewrite_mfma_f32_32x32x4f16 (<4 x half > %arg0 , <4 x half > %arg1 , ptr addrspace (1 ) %ptr ) #0 {
990+ ; CHECK-LABEL: test_rewrite_mfma_f32_32x32x4f16:
991+ ; CHECK: ; %bb.0:
992+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
993+ ; CHECK-NEXT: global_load_dwordx4 a[28:31], v[4:5], off offset:112
994+ ; CHECK-NEXT: global_load_dwordx4 a[24:27], v[4:5], off offset:96
995+ ; CHECK-NEXT: global_load_dwordx4 a[20:23], v[4:5], off offset:80
996+ ; CHECK-NEXT: global_load_dwordx4 a[16:19], v[4:5], off offset:64
997+ ; CHECK-NEXT: global_load_dwordx4 a[12:15], v[4:5], off offset:48
998+ ; CHECK-NEXT: global_load_dwordx4 a[8:11], v[4:5], off offset:32
999+ ; CHECK-NEXT: global_load_dwordx4 a[4:7], v[4:5], off offset:16
1000+ ; CHECK-NEXT: global_load_dwordx4 a[0:3], v[4:5], off
1001+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
1002+ ; CHECK-NEXT: v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[0:31]
1003+ ; CHECK-NEXT: ;;#ASMSTART
1004+ ; CHECK-NEXT: ; use a[0:31]
1005+ ; CHECK-NEXT: ;;#ASMEND
1006+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
1007+ %src2 = load <32 x float >, ptr addrspace (1 ) %ptr
1008+ %mai = call <32 x float > @llvm.amdgcn.mfma.f32.32x32x4f16 (<4 x half > %arg0 , <4 x half > %arg1 , <32 x float > %src2 , i32 0 , i32 0 , i32 0 )
1009+ call void asm sideeffect "; use $0" , "a" (<32 x float > %mai )
1010+ ret void
1011+ }
1012+
1013+ define void @test_rewrite_mfma_f32_16x16x4f16 (<4 x half > %arg0 , <4 x half > %arg1 , ptr addrspace (1 ) %ptr ) #0 {
1014+ ; CHECK-LABEL: test_rewrite_mfma_f32_16x16x4f16:
1015+ ; CHECK: ; %bb.0:
1016+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1017+ ; CHECK-NEXT: global_load_dwordx4 a[12:15], v[4:5], off offset:48
1018+ ; CHECK-NEXT: global_load_dwordx4 a[8:11], v[4:5], off offset:32
1019+ ; CHECK-NEXT: global_load_dwordx4 a[4:7], v[4:5], off offset:16
1020+ ; CHECK-NEXT: global_load_dwordx4 a[0:3], v[4:5], off
1021+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
1022+ ; CHECK-NEXT: v_mfma_f32_16x16x4_4b_f16 a[0:15], v[0:1], v[2:3], a[0:15]
1023+ ; CHECK-NEXT: ;;#ASMSTART
1024+ ; CHECK-NEXT: ; use a[0:15]
1025+ ; CHECK-NEXT: ;;#ASMEND
1026+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
1027+ %src2 = load <16 x float >, ptr addrspace (1 ) %ptr
1028+ %mai = call <16 x float > @llvm.amdgcn.mfma.f32.16x16x4f16 (<4 x half > %arg0 , <4 x half > %arg1 , <16 x float > %src2 , i32 0 , i32 0 , i32 0 )
1029+ call void asm sideeffect "; use $0" , "a" (<16 x float > %mai )
1030+ ret void
1031+ }
1032+
9891033define void @test_rewrite_mfma_f32_4x4x4f16 (<4 x half > %arg0 , <4 x half > %arg1 , ptr addrspace (1 ) %ptr ) #0 {
9901034; CHECK-LABEL: test_rewrite_mfma_f32_4x4x4f16:
9911035; CHECK: ; %bb.0:
0 commit comments