@@ -15512,14 +15512,16 @@ static void fixupShuffleMaskForPermutedSToV(
1551215512 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
1551315513 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
1551415514 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15515+ int LHSEltFixup =
15516+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15517+ int RHSEltFixup =
15518+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
1551515519 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
1551615520 int Idx = ShuffV[I];
1551715521 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15518- ShuffV[I] +=
15519- Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15522+ ShuffV[I] += LHSEltFixup;
1552015523 if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15521- ShuffV[I] +=
15522- Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15524+ ShuffV[I] += RHSEltFixup;
1552315525 }
1552415526}
1552515527
@@ -15577,6 +15579,31 @@ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
1557715579 return true;
1557815580}
1557915581
15582+ static SDValue generateSToVPermutedForVecShuffle(
15583+ int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15584+ int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15585+ SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15586+ EVT VecShuffOperandType = VecShuffOperand.getValueType();
15587+ // Set up the values for the shuffle vector fixup.
15588+ NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15589+ // The last element depends on if the input comes from the LHS or RHS.
15590+ //
15591+ // For example:
15592+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15593+ //
15594+ // For the LHS: The last element that comes from the LHS is actually 0, not 3
15595+ // because elements 1 and higher of a scalar_to_vector are undefined.
15596+ // For the RHS: The last element that comes from the RHS is actually 5, not 7
15597+ // because elements 1 and higher of a scalar_to_vector are undefined.
15598+ // It is also not 4 because the original scalar_to_vector is wider and
15599+ // actually contains two i32 elements.
15600+ LastElt = ScalarSize / (ShuffleEltWidth + 1) + FirstElt;
15601+ SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15602+ if (SToVPermuted.getValueType() != VecShuffOperandType)
15603+ SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15604+ return SToVPermuted;
15605+ }
15606+
1558015607// On little endian subtargets, combine shuffles such as:
1558115608// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1558215609// into:
@@ -15654,36 +15681,17 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1565415681 int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
1565515682 if (!IsLittleEndian && LHSScalarSize >= 64)
1565615683 return Res;
15657- // Set up the values for the shuffle vector fixup.
15658- LHSNumValidElts =
15659- LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15660- // The last element that comes from the LHS. For example:
15661- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15662- // The last element that comes from the LHS is actually 0, not 3
15663- // because elements 1 and higher of a scalar_to_vector are undefined.
15664- LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15665- SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15666- if (SToVLHS.getValueType() != LHS.getValueType())
15667- SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15668- LHS = SToVLHS;
15684+ LHS = generateSToVPermutedForVecShuffle(
15685+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15686+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
1566915687 }
1567015688 if (SToVRHS) {
1567115689 int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
1567215690 if (!IsLittleEndian && RHSScalarSize >= 64)
1567315691 return Res;
15674- RHSNumValidElts =
15675- RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15676- // The last element that comes from the RHS. For example:
15677- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15678- // The last element that comes from the RHS is actually 5, not 7
15679- // because elements 1 and higher of a scalar_to_vector are undefined.
15680- // It is also not 4 because the original scalar_to_vector is wider and
15681- // actually contains two i32 elements.
15682- RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15683- SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15684- if (SToVRHS.getValueType() != RHS.getValueType())
15685- SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15686- RHS = SToVRHS;
15692+ RHS = generateSToVPermutedForVecShuffle(
15693+ RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15694+ RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
1568715695 }
1568815696
1568915697 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
0 commit comments