@@ -1079,7 +1079,7 @@ def CVTFP6TypeAttr : EnumAttr<NVVM_Dialect, CVTFP6Type, "cvt_fp6_type"> {
10791079 let assemblyFormat = "`<` $value `>`";
10801080}
10811081
1082- def NVVM_CvtToF6x2Op : NVVM_Op<"cvt.to.f6x2"> {
1082+ def NVVM_CvtF32x2ToF6x2Op : NVVM_Op<"cvt.f32x2 .to.f6x2"> {
10831083 let summary = "Convert a pair of float inputs to f6x2";
10841084 let description = [{
10851085 This Op converts each of the given float inputs to the specified fp6 type.
@@ -1110,7 +1110,7 @@ def NVVM_CvtToF6x2Op : NVVM_Op<"cvt.to.f6x2"> {
11101110 }];
11111111
11121112 string llvmBuilder = [{
1113- auto intId = NVVM::CvtToF6x2Op ::getIntrinsicID($type, $relu);
1113+ auto intId = NVVM::CvtF32x2ToF6x2Op ::getIntrinsicID($type, $relu);
11141114 llvm::Value *packedI16 = createIntrinsicCall(builder, intId, {$a, $b});
11151115 if(op.getDst().getType().isInteger(16))
11161116 $dst = packedI16;
@@ -1120,6 +1120,153 @@ def NVVM_CvtToF6x2Op : NVVM_Op<"cvt.to.f6x2"> {
11201120 }];
11211121}
11221122
1123+ def CVTFP8E4M3 : I32EnumAttrCase<"E4M3", 0, "e4m3">;
1124+ def CVTFP8E5M2 : I32EnumAttrCase<"E5M2", 1, "e5m2">;
1125+ def CVTFP8UE8M0 : I32EnumAttrCase<"UE8M0", 2, "ue8m0">;
1126+
1127+ def CVTFP8Type : I32EnumAttr<"CVTFP8Type", "NVVM CVTFP8Type kind",
1128+ [CVTFP8E4M3, CVTFP8E5M2, CVTFP8UE8M0]> {
1129+ let genSpecializedAttr = 0;
1130+ let cppNamespace = "::mlir::NVVM";
1131+ }
1132+ def CVTFP8TypeAttr : EnumAttr<NVVM_Dialect, CVTFP8Type, "cvt_fp8_type"> {
1133+ let assemblyFormat = "`<` $value `>`";
1134+ }
1135+
1136+ def NVVM_CvtF32x2ToF8x2Op : NVVM_Op<"cvt.f32x2.to.f8x2"> {
1137+ let summary = "Convert a pair of float inputs to f8x2";
1138+ let description = [{
1139+ This Op converts each of the given float inputs to the specified fp8 type.
1140+ The result `dst` is represented as an i16 type or as a vector
1141+ of two i8 types.
1142+ If `dst` is returned as an i16 type, the converted values are packed such
1143+ that the value converted from `a` is stored in the upper 8 bits of `dst`
1144+ and the value converted from `b` is stored in the lower 8 bits of `dst`.
1145+ If `dst` is returned as a vector type, each converted value is stored as an
1146+ i8 element in the vector.
1147+ The `rnd` and `sat` attributes specify the rounding and saturation modes respectively.
1148+ The `relu` attribute, when set, lowers to the '.relu' variant of
1149+ the cvt instruction.
1150+
1151+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
1152+ }];
1153+ let results = (outs AnyTypeOf<[I16, VectorOfLengthAndType<[2], [I8]>]>:$dst);
1154+ let arguments = (ins
1155+ CVTFP8TypeAttr:$type,
1156+ F32:$a,
1157+ F32:$b,
1158+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
1159+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
1160+ DefaultValuedAttr<BoolAttr, "false">:$relu);
1161+ let assemblyFormat = "$type $a `,` $b attr-dict `:` type($dst)";
1162+
1163+ let extraClassDeclaration = [{
1164+ static llvm::Intrinsic::ID getIntrinsicID(NVVM::CVTFP8Type to,
1165+ NVVM::FPRoundingMode rnd,
1166+ NVVM::SaturationMode sat,
1167+ bool hasRelu);
1168+ }];
1169+
1170+ string llvmBuilder = [{
1171+ auto intId = NVVM::CvtF32x2ToF8x2Op::getIntrinsicID($type, $rnd, $sat, $relu);
1172+ llvm::Value *packedI16 = createIntrinsicCall(builder, intId, {$a, $b});
1173+ if(op.getDst().getType().isInteger(16))
1174+ $dst = packedI16;
1175+ else
1176+ $dst = builder.CreateBitCast(packedI16,
1177+ llvm::FixedVectorType::get(llvm::Type::getInt8Ty(builder.getContext()), 2));
1178+ }];
1179+
1180+ let hasVerifier = 1;
1181+ }
1182+
1183+ def NVVM_CvtF16x2ToF8x2Op : NVVM_Op<"cvt.f16x2.to.f8x2"> {
1184+ let summary = "Convert an f16x2 input to f8x2";
1185+ let description = [{
1186+ This Op converts the given f16 inputs in an f16x2 vector to the specified
1187+ f8 type.
1188+ The result `dst` is represented as an i16 type or as a vector
1189+ of two i8 types.
1190+ If `dst` is returned as an i16 type, the converted values from `a`
1191+ are packed such that the value converted from the first element of `a`
1192+ is stored in the upper 8 bits of `dst` and the value converted from the
1193+ second element of `a` is stored in the lower 8 bits of `dst`.
1194+ If `dst` is returned as a vector type, each converted value is stored as an
1195+ i8 element in the vector.
1196+ The `relu` attribute, when set, lowers to the '.relu' variant of
1197+ the cvt instruction.
1198+
1199+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
1200+ }];
1201+ let results = (outs AnyTypeOf<[I16, VectorOfLengthAndType<[2], [I8]>]>:$dst);
1202+ let arguments = (ins
1203+ CVTFP8TypeAttr:$type,
1204+ VectorOfLengthAndType<[2], [F16]>:$a,
1205+ DefaultValuedAttr<BoolAttr, "false">:$relu);
1206+ let assemblyFormat = "$type $a attr-dict `:` type($a) `->` type($dst)";
1207+
1208+ let extraClassDeclaration = [{
1209+ static llvm::Intrinsic::ID getIntrinsicID(NVVM::CVTFP8Type to,
1210+ bool hasRelu);
1211+ }];
1212+
1213+ string llvmBuilder = [{
1214+ auto intId = NVVM::CvtF16x2ToF8x2Op::getIntrinsicID($type, $relu);
1215+ llvm::Value *packedI16 = createIntrinsicCall(builder, intId, {$a});
1216+ if(op.getDst().getType().isInteger(16))
1217+ $dst = packedI16;
1218+ else
1219+ $dst = builder.CreateBitCast(packedI16,
1220+ llvm::FixedVectorType::get(llvm::Type::getInt8Ty(builder.getContext()), 2));
1221+ }];
1222+
1223+ let hasVerifier = 1;
1224+ }
1225+
1226+ def NVVM_CvtBF16x2ToF8x2Op : NVVM_Op<"cvt.bf16x2.to.f8x2"> {
1227+ let summary = "Convert a pair of bf16 inputs to f8x2";
1228+ let description = [{
1229+ This Op converts the given bf16 inputs in a bf16x2 vector to the specified
1230+ f8 type.
1231+ The result `dst` is represented as an i16 type or as a vector
1232+ of two i8 types.
1233+ If `dst` is returned as an i16 type, the converted values from `a`
1234+ are packed such that the value converted from the first element of `a`
1235+ is stored in the upper 8 bits of `dst` and the value converted from the
1236+ second element of `a` is stored in the lower 8 bits of `dst`.
1237+ If `dst` is returned as a vector type, each converted value is stored as an
1238+ i8 element in the vector.
1239+ The `rnd` and `sat` attributes specify the rounding and saturation modes
1240+ respectively.
1241+
1242+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
1243+ }];
1244+ let results = (outs AnyTypeOf<[I16, VectorOfLengthAndType<[2], [I8]>]>:$dst);
1245+ let arguments = (ins
1246+ CVTFP8TypeAttr:$type,
1247+ VectorOfLengthAndType<[2], [BF16]>:$a,
1248+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
1249+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat);
1250+ let assemblyFormat = "$type $a attr-dict `:` type($a) `->` type($dst)";
1251+
1252+ let extraClassDeclaration = [{
1253+ static llvm::Intrinsic::ID getIntrinsicID(NVVM::FPRoundingMode rnd,
1254+ NVVM::SaturationMode sat);
1255+ }];
1256+
1257+ string llvmBuilder = [{
1258+ auto intId = NVVM::CvtBF16x2ToF8x2Op::getIntrinsicID($rnd, $sat);
1259+ llvm::Value *packedI16 = createIntrinsicCall(builder, intId, {$a});
1260+ if(op.getDst().getType().isInteger(16))
1261+ $dst = packedI16;
1262+ else
1263+ $dst = builder.CreateBitCast(packedI16,
1264+ llvm::FixedVectorType::get(llvm::Type::getInt8Ty(builder.getContext()), 2));
1265+ }];
1266+
1267+ let hasVerifier = 1;
1268+ }
1269+
11231270//===----------------------------------------------------------------------===//
11241271// NVVM MMA Ops
11251272//===----------------------------------------------------------------------===//
0 commit comments