@@ -713,6 +713,10 @@ def ROCDL_V2I16Type : FixedVectorOfLengthAndType<[2], [I16]>,
713713 BuildableType<"::mlir::VectorType::get("
714714 "{2},$_builder.getI16Type())">;
715715
716+ def ROCDL_V2F32Type : FixedVectorOfLengthAndType<[2], [F32]>,
717+ BuildableType<"::mlir::VectorType::get("
718+ "{2},$_builder.getF32Type())">;
719+
716720def ROCDL_V2F16Type : FixedVectorOfLengthAndType<[2], [F16]>,
717721 BuildableType<"::mlir::VectorType::get("
718722 "{2},$_builder.getF16Type())">;
@@ -1005,6 +1009,120 @@ def ROCDL_CvtScaleF32SrBf8F32Op :
10051009 }];
10061010}
10071011
1012+ //===---------------------------------------------------------------------===//
1013+ // 4-bit float scale intrinsics
1014+ //===---------------------------------------------------------------------===//
1015+ def ROCDL_CvtScaleF32PkFp4F32Op :
1016+ ROCDL_IntrOp<"cvt.scalef32.pk.fp4.f32", [], [], [Pure], 1>,
1017+ Arguments<(ins I32:$old, F32:$src0, F32:$src1, F32: $scale, I32:$byteSel)> {
1018+ let summary = "Convert f32 to packed fp4 and scale";
1019+ let description = [{ Convert `src` based on $$byteSe to packed fp4, then scale
1020+ the packed values by the exponent in `scale`.
1021+ }];
1022+ let assemblyFormat = [{
1023+ attr-dict $src0 `,` $src1 `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1024+ }];
1025+ }
1026+
1027+ def ROCDL_CvtScaleF32PkFp4F16Op :
1028+ ROCDL_IntrOp<"cvt.scalef32.pk.fp4.f16", [], [], [Pure], 1>,
1029+ Arguments<(ins I32:$old, ROCDL_V2F16Type:$src, F32: $scale, I32:$byteSel)> {
1030+ let summary = "Convert f16 to packed fp4 and scale";
1031+ let description = [{ Convert `src` based on $byteSel to packed fp4, then scale
1032+ the packed values by the exponent in `scale`.
1033+ }];
1034+ let assemblyFormat = [{
1035+ attr-dict $src `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1036+ }];
1037+ }
1038+
1039+ def ROCDL_CvtScaleF32PkFp4Bf16Op :
1040+ ROCDL_IntrOp<"cvt.scalef32.pk.fp4.bf16", [], [], [Pure], 1>,
1041+ Arguments<(ins I32:$old, ROCDL_V2BF16Type:$src, F32: $scale, I32:$byteSel)> {
1042+ let summary = "Convert bf16 to packed fp4 and scale";
1043+ let description = [{ Convert `src` based on $byteSel to packed fp4, then scale
1044+ the packed values by the exponent in `scale`.
1045+ }];
1046+ let assemblyFormat = [{
1047+ attr-dict $src `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1048+ }];
1049+ }
1050+
1051+ def ROCDL_CvtScaleF32SrPkFp4F32Op :
1052+ ROCDL_IntrOp<"cvt.scalef32.sr.pk.fp4.f32", [], [], [Pure], 1>,
1053+ Arguments<(ins I32:$old, ROCDL_V2F32Type:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
1054+ let summary = "Scale and convert f32 to packed fp4 using stochastic rounding";
1055+ let description = [{
1056+ Scale `src` by the exponent in `scale` then convert to packed fp4 with stochastic rounding
1057+ using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
1058+ }];
1059+ let assemblyFormat = [{
1060+ attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1061+ }];
1062+ }
1063+
1064+ def ROCDL_CvtScaleF32SrPkFp4F16Op :
1065+ ROCDL_IntrOp<"cvt.scalef32.sr.pk.fp4.f16", [], [], [Pure], 1>,
1066+ Arguments<(ins I32:$old, ROCDL_V2F16Type:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
1067+ let summary = "Scale and convert f16 to packed fp4 using stochastic rounding";
1068+ let description = [{
1069+ Scale `src` by the exponent in `scale` then convert to packed fp4 with stochastic rounding
1070+ using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
1071+ }];
1072+ let assemblyFormat = [{
1073+ attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1074+ }];
1075+ }
1076+
1077+ def ROCDL_CvtScaleF32SrPkFp4Bf16Op :
1078+ ROCDL_IntrOp<"cvt.scalef32.sr.pk.fp4.bf16", [], [], [Pure], 1>,
1079+ Arguments<(ins I32:$old, ROCDL_V2BF16Type:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
1080+ let summary = "Scale and convert bf16 to packed fp4 using stochastic rounding";
1081+ let description = [{
1082+ Scale `src` by the exponent in `scale` then convert to packed fp4 with stochastic rounding
1083+ using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
1084+ }];
1085+ let assemblyFormat = [{
1086+ attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1087+ }];
1088+ }
1089+
1090+ def ROCDL_CvtScaleF32PkF32Fp4Op :
1091+ ROCDL_IntrOp<"cvt.scalef32.pk.f32.fp4", [], [], [Pure], 1>,
1092+ Arguments<(ins I32:$src, F32:$scale, I32:$byteSel)> {
1093+ let summary = "Convert fp4 to packed f32 and scale";
1094+ let description = [{ Convert `src` based on $byteSel to packed f32, then scale
1095+ the packed values by the exponent in `scale`.
1096+ }];
1097+ let assemblyFormat = [{
1098+ attr-dict $src `[` $byteSel `]` `,` $scale `:` type($res)
1099+ }];
1100+ }
1101+
1102+
1103+ def ROCDL_CvtScaleF32PkF16Fp4Op :
1104+ ROCDL_IntrOp<"cvt.scalef32.pk.f16.fp4", [], [], [Pure], 1>,
1105+ Arguments<(ins I32:$src, F32:$scale, I32:$byteSel)> {
1106+ let summary = "Convert fp4 to packed f16 and scale";
1107+ let description = [{ Convert `src` based on $byteSel to packed f16, then scale
1108+ the packed values by the exponent in `scale`.
1109+ }];
1110+ let assemblyFormat = [{
1111+ attr-dict $src `[` $byteSel `]` `,` $scale `:` type($res)
1112+ }];
1113+ }
1114+
1115+ def ROCDL_CvtScaleF32PkBf16Fp4Op :
1116+ ROCDL_IntrOp<"cvt.scalef32.pk.bf16.fp4", [], [], [Pure], 1>,
1117+ Arguments<(ins I32:$src, F32:$scale, I32:$byteSel)> {
1118+ let summary = "Convert fp4 to packed bf16 and scale";
1119+ let description = [{ Convert `src` based on $byteSel to packed bf16, then scale
1120+ the packed values by the exponent in `scale`.
1121+ }];
1122+ let assemblyFormat = [{
1123+ attr-dict $src `[` $byteSel `]` `,` $scale `:` type($res)
1124+ }];
1125+ }
10081126//===---------------------------------------------------------------------===//
10091127// 8-bit float intrinsics
10101128//===---------------------------------------------------------------------===//
0 commit comments