@@ -15642,14 +15642,16 @@ static void fixupShuffleMaskForPermutedSToV(
1564215642 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
1564315643 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
1564415644 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15645+ int LHSEltFixup =
15646+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15647+ int RHSEltFixup =
15648+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
1564515649 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
1564615650 int Idx = ShuffV[I];
1564715651 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15648- ShuffV[I] +=
15649- Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15652+ ShuffV[I] += LHSEltFixup;
1565015653 if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15651- ShuffV[I] +=
15652- Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15654+ ShuffV[I] += RHSEltFixup;
1565315655 }
1565415656}
1565515657
@@ -15707,6 +15709,31 @@ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
1570715709 return true;
1570815710}
1570915711
15712+ static SDValue generateSToVPermutedForVecShuffle(
15713+ int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15714+ int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15715+ SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15716+ EVT VecShuffOperandType = VecShuffOperand.getValueType();
15717+ // Set up the values for the shuffle vector fixup.
15718+ NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15719+ // The last element depends on if the input comes from the LHS or RHS.
15720+ //
15721+ // For example:
15722+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15723+ //
15724+ // For the LHS: The last element that comes from the LHS is actually 0, not 3
15725+ // because elements 1 and higher of a scalar_to_vector are undefined.
15726+ // For the RHS: The last element that comes from the RHS is actually 5, not 7
15727+ // because elements 1 and higher of a scalar_to_vector are undefined.
15728+ // It is also not 4 because the original scalar_to_vector is wider and
15729+ // actually contains two i32 elements.
15730+ LastElt = ScalarSize / (ShuffleEltWidth + 1) + FirstElt;
15731+ SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15732+ if (SToVPermuted.getValueType() != VecShuffOperandType)
15733+ SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15734+ return SToVPermuted;
15735+ }
15736+
1571015737// On little endian subtargets, combine shuffles such as:
1571115738// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1571215739// into:
@@ -15784,36 +15811,17 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1578415811 int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
1578515812 if (!IsLittleEndian && LHSScalarSize >= 64)
1578615813 return Res;
15787- // Set up the values for the shuffle vector fixup.
15788- LHSNumValidElts =
15789- LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15790- // The last element that comes from the LHS. For example:
15791- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15792- // The last element that comes from the LHS is actually 0, not 3
15793- // because elements 1 and higher of a scalar_to_vector are undefined.
15794- LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15795- SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15796- if (SToVLHS.getValueType() != LHS.getValueType())
15797- SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15798- LHS = SToVLHS;
15814+ LHS = generateSToVPermutedForVecShuffle(
15815+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15816+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
1579915817 }
1580015818 if (SToVRHS) {
1580115819 int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
1580215820 if (!IsLittleEndian && RHSScalarSize >= 64)
1580315821 return Res;
15804- RHSNumValidElts =
15805- RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15806- // The last element that comes from the RHS. For example:
15807- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15808- // The last element that comes from the RHS is actually 5, not 7
15809- // because elements 1 and higher of a scalar_to_vector are undefined.
15810- // It is also not 4 because the original scalar_to_vector is wider and
15811- // actually contains two i32 elements.
15812- RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15813- SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15814- if (SToVRHS.getValueType() != RHS.getValueType())
15815- SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15816- RHS = SToVRHS;
15822+ RHS = generateSToVPermutedForVecShuffle(
15823+ RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15824+ RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
1581715825 }
1581815826
1581915827 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
0 commit comments