@@ -1950,57 +1950,49 @@ let Predicates = [hasPTX<86>, hasSM<100>, hasArchAccelFeatures] in {
19501950 (CVT_bf16x2_ue8m0x2 $a)>;
19511951}
19521952
1953- def SDT_CVT_F32X4_TO_FP8X4_RS :
1953+ def SDT_CVT_F32X4_TO_FPX4_RS_VEC :
19541954 SDTypeProfile<1, 6, [SDTCisVec<0>, SDTCisFP<1>, SDTCisFP<2>, SDTCisFP<3>,
19551955 SDTCisFP<4>, SDTCisInt<5>, SDTCisInt<6>]>;
19561956
1957- def SDT_CVT_F32X4_TO_FP6X4_RS :
1958- SDTypeProfile<1, 6, [SDTCisVec<0>, SDTCisFP<1>, SDTCisFP<2>, SDTCisFP<3>,
1959- SDTCisFP<4>, SDTCisInt<5>, SDTCisInt<6>]>;
1960-
1961- def SDT_CVT_F32X4_TO_FP4X4_RS :
1957+ def SDT_CVT_F32X4_TO_FPX4_RS_INT :
19621958 SDTypeProfile<1, 6, [SDTCisInt<0>, SDTCisFP<1>, SDTCisFP<2>, SDTCisFP<3>,
19631959 SDTCisFP<4>, SDTCisInt<5>, SDTCisInt<6>]>;
19641960
19651961class CVT_F32X4_TO_FPX4_RS_SF_NODE<string FPName, SDTypeProfile SDT> :
19661962 SDNode<"NVPTXISD::CVT_" # FPName # "X4_F32X4_RS_SF", SDT, []>;
1963+
1964+ multiclass CVT_F32X4_TO_FPX4_RS_SF_VEC<string FPName, VTVec RetTy> {
1965+ def : Pat<(RetTy (CVT_F32X4_TO_FPX4_RS_SF_NODE<!toupper(FPName),
1966+ SDT_CVT_F32X4_TO_FPX4_RS_VEC>
1967+ f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1968+ (!cast<NVPTXInst>(CVT_ # FPName # "x4_f32x4_rs_sf")
1969+ $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
1970+
1971+ def : Pat<(RetTy (CVT_F32X4_TO_FPX4_RS_SF_NODE<!toupper(FPName),
1972+ SDT_CVT_F32X4_TO_FPX4_RS_VEC>
1973+ f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1974+ (!cast<NVPTXInst>(CVT_ # FPName # "x4_f32x4_rs_sf")
1975+ $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
1976+ }
19671977
19681978// RS rounding mode conversions
19691979let Predicates = [hasPTX<87>, hasSM100aOrSM103a] in {
19701980// FP8x4 conversions
1971- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E4M3", SDT_CVT_F32X4_TO_FP8X4_RS>
1972- f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1973- (CVT_e4m3x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
1974- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E5M2", SDT_CVT_F32X4_TO_FP8X4_RS>
1975- f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1976- (CVT_e5m2x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
1977- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E4M3", SDT_CVT_F32X4_TO_FP8X4_RS>
1978- f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1979- (CVT_e4m3x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
1980- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E5M2", SDT_CVT_F32X4_TO_FP8X4_RS>
1981- f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1982- (CVT_e5m2x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
1981+ def : CVT_F32X4_TO_FPX4_RS_SF_VEC<"E4M3", v4i8>;
1982+ def : CVT_F32X4_TO_FPX4_RS_SF_VEC<"E5M2", v4i8>;
19831983
19841984// FP6x4 conversions
1985- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M3", SDT_CVT_F32X4_TO_FP6X4_RS>
1986- f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1987- (CVT_e2m3x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
1988- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E3M2", SDT_CVT_F32X4_TO_FP6X4_RS>
1989- f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1990- (CVT_e3m2x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
1991- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M3", SDT_CVT_F32X4_TO_FP6X4_RS>
1992- f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1993- (CVT_e2m3x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
1994- def : Pat<(v4i8 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E3M2", SDT_CVT_F32X4_TO_FP6X4_RS>
1995- f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1996- (CVT_e3m2x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
1985+ def : CVT_F32X4_TO_FPX4_RS_SF_VEC<"E2M3", v4i8>;
1986+ def : CVT_F32X4_TO_FPX4_RS_SF_VEC<"E3M2", v4i8>;
19971987
19981988// FP4x4 conversions
1999- def : Pat<(i16 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M1", SDT_CVT_F32X4_TO_FP4X4_RS>
2000- f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
1989+ def : Pat<(i16 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M1",
1990+ SDT_CVT_F32X4_TO_FPX4_RS_INT>
1991+ f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS)),
20011992 (CVT_e2m1x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS)>;
2002- def : Pat<(i16 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M1", SDT_CVT_F32X4_TO_FP4X4_RS>
2003- f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
1993+ def : Pat<(i16 (CVT_F32X4_TO_FPX4_RS_SF_NODE<"E2M1",
1994+ SDT_CVT_F32X4_TO_FPX4_RS_INT>
1995+ f32:$f1, f32:$f2, f32:$f3, f32:$f4, i32:$rbits, CvtRS_RELU)),
20041996 (CVT_e2m1x4_f32x4_rs_sf $f1, $f2, $f3, $f4, $rbits, CvtRS_RELU)>;
20051997}
20061998
0 commit comments