@@ -395,6 +395,38 @@ llvm.func @rocdl.xdlops(%arg0 : f32, %arg1 : f32,
395395 (vector <8 xf16 >, vector <8 xf16 >, vector <16 xf32 >,
396396 i32 , i32 , i32 ) -> vector <16 xf32 >
397397
398+ // CHECK: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.f16(<4 x half> %{{.*}}, <8 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
399+ %r34 = rocdl.smfmac.f32.16x16x32.f16 %arg14 , %arg14 , %arg5 , %csti32 , %csti32 , %csti32 :
400+ (vector <4 xf16 >, vector <8 xf16 >, vector <4 xf32 >,
401+ i32 , i32 , i32 ) -> vector <16 xf32 >
402+
403+ // CHECK: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.f16(<4 x half> %a, <8 x half> %b, <16 x float> %c, i32 %idx, i32 0, i32 0)
404+ %r35 = rocdl.smfmac.f32.32x32x16.f16 %arg14 , %arg14 , %arg5 , %csti32 , %csti32 , %csti32 :
405+ (vector <4 xf16 >, vector <8 xf16 >, vector <16 xf32 >,
406+ i32 , i32 , i32 ) -> vector <16 xf32 >
407+
408+ // CHECK: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.bf16(<4 x i16> %a, <8 x i16> %b, <4 x float> %c, i32 %idx, i32 0, i32 0)
409+ %r36 = rocdl.smfmac.f32.16x16x32.bf16 %arg14 , %arg14 , %arg5 , %csti32 , %csti32 , %csti32 :
410+ (vector <4 xi16 >, vector <8 xi16 >, vector <4 xi16 >,
411+ i32 , i32 , i32 ) -> vector <4 xf32 >
412+
413+ // CHECK: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.bf16(<4 x i16> %a, <8 x i16> %b, <16 x float> %c, i32 %idx, i32 0, i32 0)
414+ %r37 = rocdl.smfmac.f32.16x16x32.bf16 %arg14 , %arg14 , %arg5 , %csti32 , %csti32 , %csti32 :
415+ (vector <4 xi16 >, vector <8 xi16 >, vector <16 xf32 >,
416+ i32 , i32 , i32 ) -> vector <16 xf32 >
417+
418+
419+ //def ROCDL_smfmac_i32_16x16x64_i8 : ROCDL_Mfma_IntrOp<"smfmac.i32.16x16x64.i8">;
420+ //def ROCDL_smfmac_i32_32x32x32_i8 : ROCDL_Mfma_IntrOp<"smfmac.i32.32x32x32.i8">;
421+ //def ROCDL_smfmac_f32_16x16x64_bf8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.bf8.bf8">;
422+ //def ROCDL_smfmac_f32_16x16x64_bf8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.bf8.fp8">;
423+ //def ROCDL_smfmac_f32_16x16x64_fp8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.fp8.bf8">;
424+ //def ROCDL_smfmac_f32_16x16x64_fp8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.fp8.fp8">;
425+ //def ROCDL_smfmac_f32_32x32x32_bf8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.bf8.bf8">;
426+ //def ROCDL_smfmac_f32_32x32x32_bf8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.bf8.fp8">;
427+ //def ROCDL_smfmac_f32_32x32x32_fp8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.fp8.bf8">;
428+ //def ROCDL_smfmac_f32_32x32x32_fp8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.fp8.fp8">;
429+
398430 llvm.return %r0 : vector <32 x f32 >
399431}
400432
0 commit comments