@@ -2134,6 +2134,35 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
21342134 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
21352135 setOperationAction(ISD::CTPOP, VT, Legal);
21362136 }
2137+
2138+ // We can try to convert vectors to different sizes to leverage legal
2139+ // `vpcompress` cases. So we mark these supported vector sizes as Custom and
2140+ // then specialize to Legal below.
2141+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v4i32, MVT::v4f32, MVT::v4i64,
2142+ MVT::v4f64, MVT::v2i64, MVT::v2f64, MVT::v16i8, MVT::v8i16,
2143+ MVT::v16i16, MVT::v8i8})
2144+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
2145+
2146+ // Legal vpcompress depends on various AVX512 extensions.
2147+ // Legal in AVX512F
2148+ for (MVT VT : {MVT::v16i32, MVT::v16f32, MVT::v8i64, MVT::v8f64})
2149+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Legal);
2150+
2151+ // Legal in AVX512F + AVX512VL
2152+ if (Subtarget.hasVLX())
2153+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v4i32, MVT::v4f32, MVT::v4i64,
2154+ MVT::v4f64, MVT::v2i64, MVT::v2f64})
2155+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Legal);
2156+
2157+ // Legal in AVX512F + AVX512VBMI2
2158+ if (Subtarget.hasVBMI2())
2159+ for (MVT VT : {MVT::v32i16, MVT::v64i8})
2160+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Legal);
2161+
2162+ // Legal in AVX512F + AVX512VL + AVX512VBMI2
2163+ if (Subtarget.hasVBMI2() && Subtarget.hasVLX())
2164+ for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v32i8, MVT::v16i16})
2165+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Legal);
21372166 }
21382167
21392168 // This block control legalization of v32i1/v64i1 which are available with
@@ -17795,6 +17824,68 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
1779517824 llvm_unreachable("Unimplemented!");
1779617825}
1779717826
17827+ // As legal vpcompress instructions depend on various AVX512 extensions, try to
17828+ // convert illegal vector sizes to legal ones to avoid expansion.
17829+ static SDValue lowerVECTOR_COMPRESS(SDValue Op, const X86Subtarget &Subtarget,
17830+ SelectionDAG &DAG) {
17831+ assert(Subtarget.hasAVX512() &&
17832+ "Need AVX512 for custom VECTOR_COMPRESS lowering.");
17833+
17834+ SDLoc DL(Op);
17835+ SDValue Vec = Op.getOperand(0);
17836+ SDValue Mask = Op.getOperand(1);
17837+ SDValue Passthru = Op.getOperand(2);
17838+
17839+ EVT VecVT = Vec.getValueType();
17840+ EVT ElementVT = VecVT.getVectorElementType();
17841+ unsigned NumElements = VecVT.getVectorNumElements();
17842+ unsigned NumVecBits = VecVT.getFixedSizeInBits();
17843+ unsigned NumElementBits = ElementVT.getFixedSizeInBits();
17844+
17845+ // 128- and 256-bit vectors with <= 16 elements can be converted to and
17846+ // compressed as 512-bit vectors in AVX512F.
17847+ if (NumVecBits != 128 && NumVecBits != 256)
17848+ return SDValue();
17849+
17850+ if (NumElementBits == 32 || NumElementBits == 64) {
17851+ unsigned NumLargeElements = 512 / NumElementBits;
17852+ MVT LargeVecVT =
17853+ MVT::getVectorVT(ElementVT.getSimpleVT(), NumLargeElements);
17854+ MVT LargeMaskVT = MVT::getVectorVT(MVT::i1, NumLargeElements);
17855+
17856+ Vec = widenSubVector(LargeVecVT, Vec, /*ZeroNewElements=*/false, Subtarget,
17857+ DAG, DL);
17858+ Mask = widenSubVector(LargeMaskVT, Mask, /*ZeroNewElements=*/true,
17859+ Subtarget, DAG, DL);
17860+ Passthru = Passthru.isUndef() ? DAG.getUNDEF(LargeVecVT)
17861+ : widenSubVector(LargeVecVT, Passthru,
17862+ /*ZeroNewElements=*/false,
17863+ Subtarget, DAG, DL);
17864+
17865+ SDValue Compressed =
17866+ DAG.getNode(ISD::VECTOR_COMPRESS, DL, LargeVecVT, Vec, Mask, Passthru);
17867+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Compressed,
17868+ DAG.getConstant(0, DL, MVT::i64));
17869+ }
17870+
17871+ if (VecVT == MVT::v8i16 || VecVT == MVT::v8i8 || VecVT == MVT::v16i8 ||
17872+ VecVT == MVT::v16i16) {
17873+ MVT LageElementVT = MVT::getIntegerVT(512 / NumElements);
17874+ EVT LargeVecVT = MVT::getVectorVT(LageElementVT, NumElements);
17875+
17876+ Vec = DAG.getNode(ISD::ANY_EXTEND, DL, LargeVecVT, Vec);
17877+ Passthru = Passthru.isUndef()
17878+ ? DAG.getUNDEF(LargeVecVT)
17879+ : DAG.getNode(ISD::ANY_EXTEND, DL, LargeVecVT, Passthru);
17880+
17881+ SDValue Compressed =
17882+ DAG.getNode(ISD::VECTOR_COMPRESS, DL, LargeVecVT, Vec, Mask, Passthru);
17883+ return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Compressed);
17884+ }
17885+
17886+ return SDValue();
17887+ }
17888+
1779817889/// Try to lower a VSELECT instruction to a vector shuffle.
1779917890static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
1780017891 const X86Subtarget &Subtarget,
@@ -32621,6 +32712,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3262132712 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
3262232713 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
3262332714 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, Subtarget, DAG);
32715+ case ISD::VECTOR_COMPRESS: return lowerVECTOR_COMPRESS(Op, Subtarget, DAG);
3262432716 case ISD::VSELECT: return LowerVSELECT(Op, DAG);
3262532717 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3262632718 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
0 commit comments