@@ -1950,57 +1950,49 @@ let Predicates = [hasPTX<86>, hasSM<100>, hasArchAccelFeatures] in {
19501950            (CVT_bf16x2_ue8m0x2 $a)>;
19511951}
19521952
1953- def SDT_CVT_F32X4_TO_FP8X4_RS  :
1953+ def SDT_CVT_F32X4_TO_FPX4_RS_VEC  :
19541954  SDTypeProfile<1, 6, [SDTCisVec<0>, SDTCisFP<1>, SDTCisFP<2>, SDTCisFP<3>, 
19551955                       SDTCisFP<4>, SDTCisInt<5>, SDTCisInt<6>]>;
19561956
1957- def SDT_CVT_F32X4_TO_FP6X4_RS :
1958-   SDTypeProfile<1, 6, [SDTCisVec<0>, SDTCisFP<1>, SDTCisFP<2>, SDTCisFP<3>, 
1959-                        SDTCisFP<4>, SDTCisInt<5>, SDTCisInt<6>]>;
1960- 
1961- def SDT_CVT_F32X4_TO_FP4X4_RS :
1957+ def SDT_CVT_F32X4_TO_FPX4_RS_INT :
19621958  SDTypeProfile<1, 6, [SDTCisInt<0>, SDTCisFP<1>, SDTCisFP<2>, SDTCisFP<3>, 
19631959                       SDTCisFP<4>, SDTCisInt<5>, SDTCisInt<6>]>;
19641960
19651961class CVT_F32X4_TO_FPX4_RS_SF_NODE<string FPName, SDTypeProfile SDT> :
19661962  SDNode<"NVPTXISD::CVT_" # FPName # "X4_F32X4_RS_SF", SDT, []>;
1963+   
1964+ multiclass CVT_F32X4_TO_FPX4_RS_SF_VEC<string FPName, VTVec RetTy> {
1965+   def : Pat<(RetTy (CVT_F32X4_TO_FPX4_RS_SF_NODE<!toupper(FPName),
1966+                       SDT_CVT_F32X4_TO_FPX4_RS_VEC>
1967+                    f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1968+             (!cast<NVPTXInst>(CVT_ # FPName # "x4_f32x4_rs_sf") 
1969+               $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
1970+   
1971+   def : Pat<(RetTy (CVT_F32X4_TO_FPX4_RS_SF_NODE<!toupper(FPName), 
1972+                       SDT_CVT_F32X4_TO_FPX4_RS_VEC>
1973+                    f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1974+             (!cast<NVPTXInst>(CVT_ # FPName # "x4_f32x4_rs_sf") 
1975+               $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
1976+ }
19671977
19681978// RS rounding mode conversions
19691979let Predicates = [hasPTX<87>, hasSM100aOrSM103a] in {
19701980// FP8x4 conversions
1971- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E4M3", SDT_CVT_F32X4_TO_FP8X4_RS>
1972-                  f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1973-           (CVT_e4m3x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
1974- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E5M2", SDT_CVT_F32X4_TO_FP8X4_RS>
1975-                  f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1976-           (CVT_e5m2x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
1977- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E4M3", SDT_CVT_F32X4_TO_FP8X4_RS>
1978-                  f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1979-           (CVT_e4m3x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
1980- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E5M2", SDT_CVT_F32X4_TO_FP8X4_RS>
1981-                  f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1982-           (CVT_e5m2x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
1981+ def : CVT_F32X4_TO_FPX4_RS_SF_VEC<"E4M3", v4i8>;
1982+ def : CVT_F32X4_TO_FPX4_RS_SF_VEC<"E5M2", v4i8>;
19831983
19841984// FP6x4 conversions
1985- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M3", SDT_CVT_F32X4_TO_FP6X4_RS>
1986-                  f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1987-           (CVT_e2m3x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
1988- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E3M2", SDT_CVT_F32X4_TO_FP6X4_RS>
1989-                  f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1990-           (CVT_e3m2x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
1991- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M3", SDT_CVT_F32X4_TO_FP6X4_RS>
1992-                  f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1993-           (CVT_e2m3x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
1994- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E3M2", SDT_CVT_F32X4_TO_FP6X4_RS>
1995-                  f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1996-           (CVT_e3m2x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
1985+ def : CVT_F32X4_TO_FPX4_RS_SF_VEC<"E2M3", v4i8>;
1986+ def : CVT_F32X4_TO_FPX4_RS_SF_VEC<"E3M2", v4i8>;
19971987
19981988// FP4x4 conversions
1999- def : Pat<(i16 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M1", SDT_CVT_F32X4_TO_FP4X4_RS>
2000-                  f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1989+ def : Pat<(i16 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M1", 
1990+                   SDT_CVT_F32X4_TO_FPX4_RS_INT>
1991+                 f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
20011992          (CVT_e2m1x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
2002- def : Pat<(i16 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M1", SDT_CVT_F32X4_TO_FP4X4_RS>
2003-                  f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1993+ def : Pat<(i16 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M1", 
1994+                   SDT_CVT_F32X4_TO_FPX4_RS_INT>
1995+                 f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
20041996          (CVT_e2m1x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
20051997}
20061998
0 commit comments