@@ -15638,16 +15638,18 @@ static SDValue isScalarToVec(SDValue Op) {
1563815638// On little endian, that's just the corresponding element in the other
1563915639// half of the vector. On big endian, it is in the same half but right
1564015640// justified rather than left justified in that half.
15641- static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15642- int LHSMaxIdx, int RHSMinIdx,
15643- int RHSMaxIdx, int HalfVec,
15644- unsigned ValidLaneWidth,
15645- const PPCSubtarget &Subtarget) {
15641+ static void fixupShuffleMaskForPermutedSToV(
15642+ SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15643+ int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15644+ unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
1564615645 for (int i = 0, e = ShuffV.size(); i < e; i++) {
1564715646 int Idx = ShuffV[i];
15648- if (( Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx) )
15647+ if (Idx >= LHSFirstElt && Idx <= LHSLastElt )
1564915648 ShuffV[i] +=
15650- Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15649+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15650+ if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15651+ ShuffV[i] +=
15652+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
1565115653 }
1565215654}
1565315655
@@ -15686,6 +15688,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
1568615688 OrigSToV.getOperand(0));
1568715689}
1568815690
15691+ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15692+ int HalfVec, int LHSLastElementDefined,
15693+ int RHSLastElementDefined) {
15694+ for (int i : seq<int>(0, ShuffV.size())) {
15695+ int Index = ShuffV[i];
15696+ if (Index < 0) // Skip explicitly undefined mask indices.
15697+ continue;
15698+ // Handle first input vector of the vector_shuffle.
15699+ if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15700+ (Index > LHSLastElementDefined))
15701+ return false;
15702+ // Handle second input vector of the vector_shuffle.
15703+ if ((RHSLastElementDefined >= 0) &&
15704+ (Index > HalfVec + RHSLastElementDefined))
15705+ return false;
15706+ }
15707+ return true;
15708+ }
15709+
1568915710// On little endian subtargets, combine shuffles such as:
1569015711// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1569115712// into:
@@ -15733,36 +15754,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1573315754 SDValue SToVLHS = isScalarToVec(LHS);
1573415755 SDValue SToVRHS = isScalarToVec(RHS);
1573515756 if (SToVLHS || SToVRHS) {
15736- // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15737- // same type and have differing element sizes, then do not perform
15738- // the following transformation. The current transformation for
15739- // SCALAR_TO_VECTOR assumes that both input vectors have the same
15740- // element size. This will be updated in the future to account for
15741- // differing sizes of the LHS and RHS.
15742- if (SToVLHS && SToVRHS &&
15743- (SToVLHS.getValueType().getScalarSizeInBits() !=
15744- SToVRHS.getValueType().getScalarSizeInBits()))
15745- return Res;
15746-
15747- int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15748- : SToVRHS.getValueType().getVectorNumElements();
15749- int NumEltsOut = ShuffV.size();
15757+ EVT VT = SVN->getValueType(0);
15758+ uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15759+ int ShuffleNumElts = ShuffV.size();
15760+ int HalfVec = ShuffleNumElts / 2;
1575015761 // The width of the "valid lane" (i.e. the lane that contains the value that
1575115762 // is vectorized) needs to be expressed in terms of the number of elements
1575215763 // of the shuffle. It is thereby the ratio of the values before and after
15753- // any bitcast.
15754- unsigned ValidLaneWidth =
15755- SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15756- LHS.getValueType().getScalarSizeInBits()
15757- : SToVRHS.getValueType().getScalarSizeInBits() /
15758- RHS.getValueType().getScalarSizeInBits();
15764+ // any bitcast, which will be set later on if the LHS or RHS are
15765+ // SCALAR_TO_VECTOR nodes.
15766+ unsigned LHSNumValidElts = HalfVec;
15767+ unsigned RHSNumValidElts = HalfVec;
1575915768
1576015769 // Initially assume that neither input is permuted. These will be adjusted
15761- // accordingly if either input is.
15762- int LHSMaxIdx = -1;
15763- int RHSMinIdx = -1;
15764- int RHSMaxIdx = -1;
15765- int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15770+ // accordingly if either input is. Note, that -1 means that all elements
15771+ // are undefined.
15772+ int LHSFirstElt = 0;
15773+ int RHSFirstElt = ShuffleNumElts;
15774+ int LHSLastElt = -1;
15775+ int RHSLastElt = -1;
1576615776
1576715777 // Get the permuted scalar to vector nodes for the source(s) that come from
1576815778 // ISD::SCALAR_TO_VECTOR.
@@ -15771,33 +15781,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1577115781 // the value into element zero. Since scalar size of LHS and RHS may differ
1577215782 // after isScalarToVec, this should be checked using their own sizes.
1577315783 if (SToVLHS) {
15774- if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15784+ int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15785+ if (!IsLittleEndian && LHSScalarSize >= 64)
1577515786 return Res;
1577615787 // Set up the values for the shuffle vector fixup.
15777- LHSMaxIdx = NumEltsOut / NumEltsIn;
15788+ LHSNumValidElts =
15789+ LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15790+ // The last element that comes from the LHS. For example:
15791+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15792+ // The last element that comes from the LHS is actually 0, not 3
15793+ // because elements 1 and higher of a scalar_to_vector are undefined.
15794+ LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
1577815795 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
1577915796 if (SToVLHS.getValueType() != LHS.getValueType())
1578015797 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
1578115798 LHS = SToVLHS;
1578215799 }
1578315800 if (SToVRHS) {
15784- if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15801+ int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15802+ if (!IsLittleEndian && RHSScalarSize >= 64)
1578515803 return Res;
15786- RHSMinIdx = NumEltsOut;
15787- RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15804+ RHSNumValidElts =
15805+ RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15806+ // The last element that comes from the RHS. For example:
15807+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15808+ // The last element that comes from the RHS is actually 5, not 7
15809+ // because elements 1 and higher of a scalar_to_vector are undefined.
15810+ // It is also not 4 because the original scalar_to_vector is wider and
15811+ // actually contains two i32 elements.
15812+ RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
1578815813 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
1578915814 if (SToVRHS.getValueType() != RHS.getValueType())
1579015815 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
1579115816 RHS = SToVRHS;
1579215817 }
1579315818
15819+ if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15820+ return Res;
15821+
1579415822 // Fix up the shuffle mask to reflect where the desired element actually is.
1579515823 // The minimum and maximum indices that correspond to element zero for both
1579615824 // the LHS and RHS are computed and will control which shuffle mask entries
1579715825 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15798- // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15799- fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15800- HalfVec, ValidLaneWidth, Subtarget);
15826+ // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15827+ fixupShuffleMaskForPermutedSToV(
15828+ ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15829+ LHSNumValidElts, RHSNumValidElts, Subtarget);
1580115830 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
1580215831
1580315832 // We may have simplified away the shuffle. We won't be able to do anything
0 commit comments