@@ -1589,10 +1589,11 @@ def FPRoundingModeRM : I32EnumAttrCase<"RM", 2, "rm">;
15891589def FPRoundingModeRP : I32EnumAttrCase<"RP", 3, "rp">;
15901590def FPRoundingModeRZ : I32EnumAttrCase<"RZ", 4, "rz">;
15911591def FPRoundingModeRNA : I32EnumAttrCase<"RNA", 5, "rna">;
1592+ def FPRoundingModeRS : I32EnumAttrCase<"RS", 6, "rs">;
15921593
15931594def FPRoundingMode : I32EnumAttr<"FPRoundingMode", "NVVM FPRoundingMode kind",
15941595 [FPRoundingModeNone, FPRoundingModeRN, FPRoundingModeRM,
1595- FPRoundingModeRP, FPRoundingModeRZ, FPRoundingModeRNA]> {
1596+ FPRoundingModeRP, FPRoundingModeRZ, FPRoundingModeRNA, FPRoundingModeRS ]> {
15961597 let genSpecializedAttr = 0;
15971598 let cppNamespace = "::mlir::NVVM";
15981599}
@@ -1906,6 +1907,96 @@ def NVVM_ConvertF6x2ToF16x2Op :
19061907def NVVM_ConvertF4x2ToF16x2Op :
19071908 NVVM_ConvertToFP16x2Op_Base<"F4", I8, "F16">;
19081909
1910+ //===----------------------------------------------------------------------===//
1911+ // NVVM Stochastic Rounding Conversion Ops
1912+ //===----------------------------------------------------------------------===//
1913+
1914+ // Base class for conversions from F32x2 to FPx2 formats
1915+ // (F16x2, BF16x2)
1916+ // TODO: In separate PR, add .rn and .rz rounding variants for this conversion
1917+ // as currently only support .rs rounding mode
1918+ class NVVM_ConvertF32x2ToFPx2OpBase<string dstFormat, string mnemonic, Type dstType> :
1919+ NVVM_Op<mnemonic, [Pure, NVVMRequiresSMa<[100, 103]>]>,
1920+ Results<(outs dstType:$dst)>,
1921+ Arguments<(ins F32:$src_hi, F32:$src_lo, I32:$rbits,
1922+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::RS">:$rnd,
1923+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
1924+ DefaultValuedAttr<BoolAttr, "false">:$relu)> {
1925+ let summary = "Convert two F32 values to packed " # dstFormat # " with stochastic rounding (.rs)";
1926+ let description = [{
1927+ Converts two F32 values to packed }] # dstFormat # [{ format using stochastic
1928+ rounding (.rs) mode with randomness provided by the `rbits` parameter. The
1929+ `relu` attribute clamps negative results to 0. The `sat` attribute determines
1930+ saturation behavior. The `src_hi` and `src_lo` parameters correspond to operands
1931+ `a` and `b` in the PTX ISA, respectively.
1932+
1933+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
1934+ }];
1935+
1936+ let assemblyFormat = "$src_hi `,` $src_lo `,` $rbits attr-dict `:` type($dst)";
1937+
1938+ let hasVerifier = 1;
1939+
1940+ let extraClassDeclaration = [{
1941+ llvm::Intrinsic::ID getIntrinsicID();
1942+ }];
1943+
1944+ string llvmBuilder = [{
1945+ auto intId = op.getIntrinsicID();
1946+ $dst = createIntrinsicCall(builder, intId, {$src_hi, $src_lo, $rbits});
1947+ }];
1948+ }
1949+
1950+ // F32x2 -> F16x2 with stochastic rounding
1951+ def NVVM_ConvertF32x2ToF16x2Op : NVVM_ConvertF32x2ToFPx2OpBase<"f16x2", "convert.f32x2.to.f16x2", VectorOfLengthAndType<[2], [F16]>>;
1952+
1953+ // F32x2 -> BF16x2 with stochastic rounding
1954+ def NVVM_ConvertF32x2ToBF16x2Op : NVVM_ConvertF32x2ToFPx2OpBase<"bf16x2", "convert.f32x2.to.bf16x2", VectorOfLengthAndType<[2], [BF16]>>;
1955+
1956+ // Base class for stochastic rounding conversions from F32x4 to FPx4 formats
1957+ // (E4M3x4, E5M2x4, E2M3x4, E3M2x4, E2M1x4)
1958+ // These operations always use RS (stochastic rounding) mode with SATFINITE saturation.
1959+ class NVVM_ConvertF32x4ToFPx4OpBase<string dstFormat, string mnemonic, Type dstType> :
1960+ NVVM_Op<mnemonic, [Pure, NVVMRequiresSMa<[100, 103]>]>,
1961+ Results<(outs dstType:$dst)>,
1962+ Arguments<(ins VectorOfLengthAndType<[4], [F32]>:$src, I32:$rbits,
1963+ DefaultValuedAttr<BoolAttr, "false">:$relu,
1964+ TypeAttr:$dstTy)> {
1965+ let summary = "Convert vector<4xf32> to packed " # dstFormat # " with stochastic rounding (.rs) and satfinite";
1966+ let description = [{
1967+ Converts a vector<4xf32> to packed }] # dstFormat # [{ format using
1968+ stochastic rounding (.rs) mode with SATFINITE saturation. Randomness is
1969+ provided by the `rbits` parameter. The `dstTy` attribute specifies the
1970+ target floating-point format. The `relu` attribute clamps negative results to 0.
1971+
1972+ Note: These operations always use RS rounding mode and SATFINITE saturation mode.
1973+
1974+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
1975+ }];
1976+
1977+ let assemblyFormat = "$src `,` $rbits attr-dict `:` type($src) `->` type($dst) `(` $dstTy `)`";
1978+
1979+ let hasVerifier = 1;
1980+
1981+ let extraClassDeclaration = [{
1982+ llvm::Intrinsic::ID getIntrinsicID();
1983+ }];
1984+
1985+ string llvmBuilder = [{
1986+ auto intId = op.getIntrinsicID();
1987+ $dst = createIntrinsicCall(builder, intId, {$src, $rbits});
1988+ }];
1989+ }
1990+
1991+ // F32x4 -> F8x4 with stochastic rounding (supports E4M3FN, E5M2)
1992+ def NVVM_ConvertF32x4ToF8x4Op : NVVM_ConvertF32x4ToFPx4OpBase<"f8x4", "convert.f32x4.to.f8x4", VectorOfLengthAndType<[4], [I8]>>;
1993+
1994+ // F32x4 -> F6x4 with stochastic rounding (supports E2M3FN, E3M2FN)
1995+ def NVVM_ConvertF32x4ToF6x4Op : NVVM_ConvertF32x4ToFPx4OpBase<"f6x4", "convert.f32x4.to.f6x4", VectorOfLengthAndType<[4], [I8]>>;
1996+
1997+ // F32x4 -> F4x4 with stochastic rounding (supports E2M1FN)
1998+ def NVVM_ConvertF32x4ToF4x4Op : NVVM_ConvertF32x4ToFPx4OpBase<"f4x4", "convert.f32x4.to.f4x4", I16>;
1999+
19092000//===----------------------------------------------------------------------===//
19102001// NVVM MMA Ops
19112002//===----------------------------------------------------------------------===//
0 commit comments