@@ -59273,36 +59273,33 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5927359273 !(Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())))) {
5927459274 SDValue ExtSrc = SubVec.getOperand(0);
5927559275 int ExtIdxVal = SubVec.getConstantOperandVal(1);
59276- if (ExtIdxVal != 0) {
59277- SmallVector<int, 64> Mask(VecNumElts);
59278- // First create an identity shuffle mask.
59279- for (int i = 0; i != VecNumElts; ++i)
59280- Mask[i] = i;
59281- // Now insert the extracted portion.
59282- for (int i = 0; i != SubVecNumElts; ++i)
59283- Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;
59276+ // Create a shuffle mask matching the extraction and insertion.
59277+ SmallVector<int, 64> Mask(VecNumElts);
59278+ std::iota(Mask.begin(), Mask.end(), 0);
59279+ std::iota(Mask.begin() + IdxVal, Mask.begin() + IdxVal + SubVecNumElts,
59280+ ExtIdxVal + VecNumElts);
59281+ if (ExtIdxVal != 0)
5928459282 return DAG.getVectorShuffle(OpVT, dl, Vec, ExtSrc, Mask);
59285- }
59286- // If we're broadcasting, see if we can use a blend instead of
59287- // extract/insert pair. Ensure that the subvector is aligned with the
59288- // insertion/extractions.
59289- if ((ExtIdxVal % SubVecNumElts) == 0 && (IdxVal % SubVecNumElts) == 0 &&
59290- (ExtSrc.getOpcode() == X86ISD::VBROADCAST ||
59291- ExtSrc.getOpcode() == X86ISD::VBROADCAST_LOAD ||
59292- (ExtSrc.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
59293- cast<MemIntrinsicSDNode>(ExtSrc)->getMemoryVT() == SubVecVT))) {
59283+ // See if we can use a blend instead of extract/insert pair.
59284+ SmallVector<int, 64> BlendMask(VecNumElts);
59285+ std::iota(BlendMask.begin(), BlendMask.end(), 0);
59286+ std::iota(BlendMask.begin() + IdxVal,
59287+ BlendMask.begin() + IdxVal + SubVecNumElts, VecNumElts + IdxVal);
59288+ if (isShuffleEquivalent(Mask, BlendMask, Vec, ExtSrc)) {
59289+ assert((IdxVal == 0 || IdxVal == SubVecNumElts) &&
59290+ "Unaligned subvector insertion");
5929459291 if (OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
59295- uint64_t BlendMask = IdxVal == 0 ? 0x0F : 0xF0;
5929659292 SDValue Blend = DAG.getNode(
5929759293 X86ISD::BLENDI, dl, MVT::v8f32, DAG.getBitcast(MVT::v8f32, Vec),
5929859294 DAG.getBitcast(MVT::v8f32, ExtSrc),
59299- DAG.getTargetConstant(BlendMask , dl, MVT::i8));
59295+ DAG.getTargetConstant(IdxVal == 0 ? 0x0F : 0xF0 , dl, MVT::i8));
5930059296 return DAG.getBitcast(OpVT, Blend);
5930159297 } else if (OpVT.is512BitVector() && SubVecVT.is256BitVector()) {
59302- SDValue Lo = DAG.getBitcast(MVT::v8f64, IdxVal == 0 ? ExtSrc : Vec);
59303- SDValue Hi = DAG.getBitcast(MVT::v8f64, IdxVal == 0 ? Vec : ExtSrc);
59298+ MVT ShufVT = OpVT.isInteger() ? MVT::v8i64 : MVT::v8f64;
59299+ SDValue Lo = DAG.getBitcast(ShufVT, IdxVal == 0 ? ExtSrc : Vec);
59300+ SDValue Hi = DAG.getBitcast(ShufVT, IdxVal == 0 ? Vec : ExtSrc);
5930459301 SDValue Shuffle =
59305- DAG.getNode(X86ISD::SHUF128, dl, MVT::v8f64 , Lo, Hi,
59302+ DAG.getNode(X86ISD::SHUF128, dl, ShufVT , Lo, Hi,
5930659303 getV4X86ShuffleImm8ForMask({0, 1, 2, 3}, dl, DAG));
5930759304 return DAG.getBitcast(OpVT, Shuffle);
5930859305 }
0 commit comments