@@ -15390,14 +15390,16 @@ static void fixupShuffleMaskForPermutedSToV(
1539015390 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
1539115391 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
1539215392 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15393+ int LHSEltFixup =
15394+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15395+ int RHSEltFixup =
15396+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
1539315397 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
1539415398 int Idx = ShuffV[I];
1539515399 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15396- ShuffV[I] +=
15397- Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15400+ ShuffV[I] += LHSEltFixup;
1539815401 if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15399- ShuffV[I] +=
15400- Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15402+ ShuffV[I] += RHSEltFixup;
1540115403 }
1540215404}
1540315405
@@ -15455,6 +15457,31 @@ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
1545515457 return true;
1545615458}
1545715459
15460+ static SDValue generateSToVPermutedForVecShuffle(
15461+ int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15462+ int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15463+ SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15464+ EVT VecShuffOperandType = VecShuffOperand.getValueType();
15465+ // Set up the values for the shuffle vector fixup.
15466+ NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15467+ // The last element depends on if the input comes from the LHS or RHS.
15468+ //
15469+ // For example:
15470+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15471+ //
15472+ // For the LHS: The last element that comes from the LHS is actually 0, not 3
15473+ // because elements 1 and higher of a scalar_to_vector are undefined.
15474+ // For the RHS: The last element that comes from the RHS is actually 5, not 7
15475+ // because elements 1 and higher of a scalar_to_vector are undefined.
15476+ // It is also not 4 because the original scalar_to_vector is wider and
15477+ // actually contains two i32 elements.
15478+ LastElt = ScalarSize / (ShuffleEltWidth + 1) + FirstElt;
15479+ SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15480+ if (SToVPermuted.getValueType() != VecShuffOperandType)
15481+ SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15482+ return SToVPermuted;
15483+ }
15484+
1545815485// On little endian subtargets, combine shuffles such as:
1545915486// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1546015487// into:
@@ -15532,36 +15559,17 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1553215559 int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
1553315560 if (!IsLittleEndian && LHSScalarSize >= 64)
1553415561 return Res;
15535- // Set up the values for the shuffle vector fixup.
15536- LHSNumValidElts =
15537- LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15538- // The last element that comes from the LHS. For example:
15539- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15540- // The last element that comes from the LHS is actually 0, not 3
15541- // because elements 1 and higher of a scalar_to_vector are undefined.
15542- LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15543- SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15544- if (SToVLHS.getValueType() != LHS.getValueType())
15545- SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15546- LHS = SToVLHS;
15562+ LHS = generateSToVPermutedForVecShuffle(
15563+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15564+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
1554715565 }
1554815566 if (SToVRHS) {
1554915567 int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
1555015568 if (!IsLittleEndian && RHSScalarSize >= 64)
1555115569 return Res;
15552- RHSNumValidElts =
15553- RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15554- // The last element that comes from the RHS. For example:
15555- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15556- // The last element that comes from the RHS is actually 5, not 7
15557- // because elements 1 and higher of a scalar_to_vector are undefined.
15558- // It is also not 4 because the original scalar_to_vector is wider and
15559- // actually contains two i32 elements.
15560- RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15561- SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15562- if (SToVRHS.getValueType() != RHS.getValueType())
15563- SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15564- RHS = SToVRHS;
15570+ RHS = generateSToVPermutedForVecShuffle(
15571+ RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15572+ RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
1556515573 }
1556615574
1556715575 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
0 commit comments