@@ -15687,16 +15687,20 @@ static SDValue isScalarToVec(SDValue Op) {
1568715687// On little endian, that's just the corresponding element in the other
1568815688// half of the vector. On big endian, it is in the same half but right
1568915689// justified rather than left justified in that half.
15690- static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15691- int LHSMaxIdx, int RHSMinIdx,
15692- int RHSMaxIdx, int HalfVec,
15693- unsigned ValidLaneWidth,
15694- const PPCSubtarget &Subtarget) {
15695- for (int i = 0, e = ShuffV.size(); i < e; i++) {
15696- int Idx = ShuffV[i];
15697- if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15698- ShuffV[i] +=
15699- Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15690+ static void fixupShuffleMaskForPermutedSToV(
15691+ SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15692+ int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15693+ unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15694+ int LHSEltFixup =
15695+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15696+ int RHSEltFixup =
15697+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15698+ for (int I = 0, E = ShuffV.size(); I < E; ++I) {
15699+ int Idx = ShuffV[I];
15700+ if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15701+ ShuffV[I] += LHSEltFixup;
15702+ else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15703+ ShuffV[I] += RHSEltFixup;
1570015704 }
1570115705}
1570215706
@@ -15735,6 +15739,51 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
1573515739 OrigSToV.getOperand(0));
1573615740}
1573715741
15742+ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15743+ int HalfVec, int LHSLastElementDefined,
15744+ int RHSLastElementDefined) {
15745+ for (int Index : ShuffV) {
15746+ if (Index < 0) // Skip explicitly undefined mask indices.
15747+ continue;
15748+ // Handle first input vector of the vector_shuffle.
15749+ if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15750+ (Index > LHSLastElementDefined))
15751+ return false;
15752+ // Handle second input vector of the vector_shuffle.
15753+ if ((RHSLastElementDefined >= 0) &&
15754+ (Index > HalfVec + RHSLastElementDefined))
15755+ return false;
15756+ }
15757+ return true;
15758+ }
15759+
15760+ static SDValue generateSToVPermutedForVecShuffle(
15761+ int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15762+ int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15763+ SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15764+ EVT VecShuffOperandType = VecShuffOperand.getValueType();
15765+ // Set up the values for the shuffle vector fixup.
15766+ NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15767+ // The last element depends on if the input comes from the LHS or RHS.
15768+ //
15769+ // For example:
15770+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15771+ //
15772+ // For the LHS: The last element that comes from the LHS is actually 0, not 3
15773+ // because elements 1 and higher of a scalar_to_vector are undefined.
15774+ // For the RHS: The last element that comes from the RHS is actually 5, not 7
15775+ // because elements 1 and higher of a scalar_to_vector are undefined.
15776+ // It is also not 4 because the original scalar_to_vector is wider and
15777+ // actually contains two i32 elements.
15778+ LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
15779+ ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
15780+ : FirstElt;
15781+ SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15782+ if (SToVPermuted.getValueType() != VecShuffOperandType)
15783+ SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15784+ return SToVPermuted;
15785+ }
15786+
1573815787// On little endian subtargets, combine shuffles such as:
1573915788// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1574015789// into:
@@ -15782,71 +15831,64 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1578215831 SDValue SToVLHS = isScalarToVec(LHS);
1578315832 SDValue SToVRHS = isScalarToVec(RHS);
1578415833 if (SToVLHS || SToVRHS) {
15785- // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15786- // same type and have differing element sizes, then do not perform
15787- // the following transformation. The current transformation for
15788- // SCALAR_TO_VECTOR assumes that both input vectors have the same
15789- // element size. This will be updated in the future to account for
15790- // differing sizes of the LHS and RHS.
15791- if (SToVLHS && SToVRHS &&
15792- (SToVLHS.getValueType().getScalarSizeInBits() !=
15793- SToVRHS.getValueType().getScalarSizeInBits()))
15794- return Res;
15795-
15796- int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15797- : SToVRHS.getValueType().getVectorNumElements();
15798- int NumEltsOut = ShuffV.size();
15834+ EVT VT = SVN->getValueType(0);
15835+ uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15836+ int ShuffleNumElts = ShuffV.size();
15837+ int HalfVec = ShuffleNumElts / 2;
1579915838 // The width of the "valid lane" (i.e. the lane that contains the value that
1580015839 // is vectorized) needs to be expressed in terms of the number of elements
1580115840 // of the shuffle. It is thereby the ratio of the values before and after
15802- // any bitcast.
15803- unsigned ValidLaneWidth =
15804- SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15805- LHS.getValueType().getScalarSizeInBits()
15806- : SToVRHS.getValueType().getScalarSizeInBits() /
15807- RHS.getValueType().getScalarSizeInBits();
15841+ // any bitcast, which will be set later on if the LHS or RHS are
15842+ // SCALAR_TO_VECTOR nodes.
15843+ unsigned LHSNumValidElts = HalfVec;
15844+ unsigned RHSNumValidElts = HalfVec;
1580815845
1580915846 // Initially assume that neither input is permuted. These will be adjusted
15810- // accordingly if either input is.
15811- int LHSMaxIdx = -1;
15812- int RHSMinIdx = -1;
15813- int RHSMaxIdx = -1;
15814- int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15847+ // accordingly if either input is. Note, that -1 means that all elements
15848+ // are undefined.
15849+ int LHSFirstElt = 0;
15850+ int RHSFirstElt = ShuffleNumElts;
15851+ int LHSLastElt = -1;
15852+ int RHSLastElt = -1;
1581515853
1581615854 // Get the permuted scalar to vector nodes for the source(s) that come from
1581715855 // ISD::SCALAR_TO_VECTOR.
1581815856 // On big endian systems, this only makes sense for element sizes smaller
1581915857 // than 64 bits since for 64-bit elements, all instructions already put
1582015858 // the value into element zero. Since scalar size of LHS and RHS may differ
1582115859 // after isScalarToVec, this should be checked using their own sizes.
15860+ int LHSScalarSize = 0;
15861+ int RHSScalarSize = 0;
1582215862 if (SToVLHS) {
15823- if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15863+ LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15864+ if (!IsLittleEndian && LHSScalarSize >= 64)
1582415865 return Res;
15825- // Set up the values for the shuffle vector fixup.
15826- LHSMaxIdx = NumEltsOut / NumEltsIn;
15827- SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15828- if (SToVLHS.getValueType() != LHS.getValueType())
15829- SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15830- LHS = SToVLHS;
1583115866 }
1583215867 if (SToVRHS) {
15833- if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15868+ RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15869+ if (!IsLittleEndian && RHSScalarSize >= 64)
1583415870 return Res;
15835- RHSMinIdx = NumEltsOut;
15836- RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15837- SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15838- if (SToVRHS.getValueType() != RHS.getValueType())
15839- SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15840- RHS = SToVRHS;
1584115871 }
15872+ if (LHSScalarSize != 0)
15873+ LHS = generateSToVPermutedForVecShuffle(
15874+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15875+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
15876+ if (RHSScalarSize != 0)
15877+ RHS = generateSToVPermutedForVecShuffle(
15878+ RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15879+ RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
15880+
15881+ if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15882+ return Res;
1584215883
1584315884 // Fix up the shuffle mask to reflect where the desired element actually is.
1584415885 // The minimum and maximum indices that correspond to element zero for both
1584515886 // the LHS and RHS are computed and will control which shuffle mask entries
1584615887 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15847- // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15848- fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15849- HalfVec, ValidLaneWidth, Subtarget);
15888+ // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15889+ fixupShuffleMaskForPermutedSToV(
15890+ ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15891+ LHSNumValidElts, RHSNumValidElts, Subtarget);
1585015892 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
1585115893
1585215894 // We may have simplified away the shuffle. We won't be able to do anything
0 commit comments