@@ -2071,34 +2071,36 @@ def : Pat<(int_nvvm_ull2d_rp i64:$a), (CVT_f64_u64 $a, CvtRP)>;
20712071def : Pat<(int_nvvm_f2h_rn_ftz f32:$a), (CVT_f16_f32 $a, CvtRN_FTZ)>;
20722072def : Pat<(int_nvvm_f2h_rn f32:$a), (CVT_f16_f32 $a, CvtRN)>;
20732073
2074- def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b),
2075- (CVT_e4m3x2_f32 $a, $b, CvtRN)>;
2076- def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b),
2077- (CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>;
2078- def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b),
2079- (CVT_e5m2x2_f32 $a, $b, CvtRN)>;
2080- def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b),
2081- (CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>;
2082-
2083- def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn v2f16:$a),
2084- (CVT_e4m3x2_f16x2 $a, CvtRN)>;
2085- def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu v2f16:$a),
2086- (CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>;
2087- def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn v2f16:$a),
2088- (CVT_e5m2x2_f16x2 $a, CvtRN)>;
2089- def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu v2f16:$a),
2090- (CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>;
2091-
2092- def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn i16:$a),
2093- (CVT_f16x2_e4m3x2 $a, CvtRN)>;
2094- def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu i16:$a),
2095- (CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>;
2096- def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn i16:$a),
2097- (CVT_f16x2_e5m2x2 $a, CvtRN)>;
2098- def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu i16:$a),
2099- (CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>;
2100-
2101- let Predicates = [hasPTX<86>, hasSM<100>, hasArchAccelFeatures] in {
2074+ let Predicates = [callSubtarget<"hasFP8ConversionSupport">] in {
2075+ def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b),
2076+ (CVT_e4m3x2_f32 $a, $b, CvtRN)>;
2077+ def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b),
2078+ (CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>;
2079+ def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b),
2080+ (CVT_e5m2x2_f32 $a, $b, CvtRN)>;
2081+ def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b),
2082+ (CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>;
2083+
2084+ def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn v2f16:$a),
2085+ (CVT_e4m3x2_f16x2 $a, CvtRN)>;
2086+ def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu v2f16:$a),
2087+ (CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>;
2088+ def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn v2f16:$a),
2089+ (CVT_e5m2x2_f16x2 $a, CvtRN)>;
2090+ def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu v2f16:$a),
2091+ (CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>;
2092+
2093+ def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn i16:$a),
2094+ (CVT_f16x2_e4m3x2 $a, CvtRN)>;
2095+ def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu i16:$a),
2096+ (CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>;
2097+ def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn i16:$a),
2098+ (CVT_f16x2_e5m2x2 $a, CvtRN)>;
2099+ def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu i16:$a),
2100+ (CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>;
2101+ }
2102+
2103+ let Predicates = [callSubtarget<"hasNarrowFPConversionSupport">] in {
21022104 def : Pat<(int_nvvm_ff_to_e2m3x2_rn_satfinite f32:$a, f32:$b),
21032105 (CVT_e2m3x2_f32_sf $a, $b, CvtRN)>;
21042106 def : Pat<(int_nvvm_ff_to_e2m3x2_rn_relu_satfinite f32:$a, f32:$b),
0 commit comments