@@ -2008,34 +2008,40 @@ def : Pat<(int_nvvm_ull2d_rp i64:$a), (CVT_f64_u64 $a, CvtRP)>;
20082008def : Pat<(int_nvvm_f2h_rn_ftz f32:$a), (CVT_f16_f32 $a, CvtRN_FTZ)>;
20092009def : Pat<(int_nvvm_f2h_rn f32:$a), (CVT_f16_f32 $a, CvtRN)>;
20102010
2011- def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b),
2012- (CVT_e4m3x2_f32 $a, $b, CvtRN)>;
2013- def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b),
2014- (CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>;
2015- def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b),
2016- (CVT_e5m2x2_f32 $a, $b, CvtRN)>;
2017- def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b),
2018- (CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>;
2019-
2020- def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn v2f16:$a),
2021- (CVT_e4m3x2_f16x2 $a, CvtRN)>;
2022- def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu v2f16:$a),
2023- (CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>;
2024- def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn v2f16:$a),
2025- (CVT_e5m2x2_f16x2 $a, CvtRN)>;
2026- def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu v2f16:$a),
2027- (CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>;
2028-
2029- def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn i16:$a),
2030- (CVT_f16x2_e4m3x2 $a, CvtRN)>;
2031- def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu i16:$a),
2032- (CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>;
2033- def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn i16:$a),
2034- (CVT_f16x2_e5m2x2 $a, CvtRN)>;
2035- def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu i16:$a),
2036- (CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>;
2037-
2038- let Predicates = [hasPTX<86>, hasSM<100>, hasArchAccelFeatures] in {
2011+ let Predicates = [hasPTX<81>, hasSM<89>] in {
2012+ def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b),
2013+ (CVT_e4m3x2_f32 $a, $b, CvtRN)>;
2014+ def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b),
2015+ (CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>;
2016+ def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b),
2017+ (CVT_e5m2x2_f32 $a, $b, CvtRN)>;
2018+ def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b),
2019+ (CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>;
2020+
2021+ def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn v2f16:$a),
2022+ (CVT_e4m3x2_f16x2 $a, CvtRN)>;
2023+ def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu v2f16:$a),
2024+ (CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>;
2025+ def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn v2f16:$a),
2026+ (CVT_e5m2x2_f16x2 $a, CvtRN)>;
2027+ def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu v2f16:$a),
2028+ (CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>;
2029+
2030+ def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn i16:$a),
2031+ (CVT_f16x2_e4m3x2 $a, CvtRN)>;
2032+ def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu i16:$a),
2033+ (CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>;
2034+ def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn i16:$a),
2035+ (CVT_f16x2_e5m2x2 $a, CvtRN)>;
2036+ def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu i16:$a),
2037+ (CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>;
2038+ }
2039+
2040+ let Predicates = [AnyPred<[
2041+ PTXWithFamilySMs<90, [100, 110, 120]>,
2042+ PTXWithFamilySMs<88, [100, 101, 120]>,
2043+ PTXWithAccelSMs<86, [100, 101, 120]>
2044+ ]>] in {
20392045 def : Pat<(int_nvvm_ff_to_e2m3x2_rn_satfinite f32:$a, f32:$b),
20402046 (CVT_e2m3x2_f32_sf $a, $b, CvtRN)>;
20412047 def : Pat<(int_nvvm_ff_to_e2m3x2_rn_relu_satfinite f32:$a, f32:$b),
0 commit comments