@@ -15687,16 +15687,18 @@ static SDValue isScalarToVec(SDValue Op) {
1568715687// On little endian, that's just the corresponding element in the other
1568815688// half of the vector. On big endian, it is in the same half but right
1568915689// justified rather than left justified in that half.
15690- static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15691- int LHSMaxIdx, int RHSMinIdx,
15692- int RHSMaxIdx, int HalfVec,
15693- unsigned ValidLaneWidth,
15694- const PPCSubtarget &Subtarget) {
15690+ static void fixupShuffleMaskForPermutedSToV(
15691+ SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15692+ int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15693+ unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
1569515694 for (int i = 0, e = ShuffV.size(); i < e; i++) {
1569615695 int Idx = ShuffV[i];
15697- if (( Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx) )
15696+ if (Idx >= LHSFirstElt && Idx <= LHSLastElt )
1569815697 ShuffV[i] +=
15699- Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15698+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15699+ if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15700+ ShuffV[i] +=
15701+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
1570015702 }
1570115703}
1570215704
@@ -15735,6 +15737,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
1573515737 OrigSToV.getOperand(0));
1573615738}
1573715739
15740+ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15741+ int HalfVec, int LHSLastElementDefined,
15742+ int RHSLastElementDefined) {
15743+ for (int i : seq<int>(0, ShuffV.size())) {
15744+ int Index = ShuffV[i];
15745+ if (Index < 0) // Skip explicitly undefined mask indices.
15746+ continue;
15747+ // Handle first input vector of the vector_shuffle.
15748+ if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15749+ (Index > LHSLastElementDefined))
15750+ return false;
15751+ // Handle second input vector of the vector_shuffle.
15752+ if ((RHSLastElementDefined >= 0) &&
15753+ (Index > HalfVec + RHSLastElementDefined))
15754+ return false;
15755+ }
15756+ return true;
15757+ }
15758+
1573815759// On little endian subtargets, combine shuffles such as:
1573915760// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1574015761// into:
@@ -15782,36 +15803,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1578215803 SDValue SToVLHS = isScalarToVec(LHS);
1578315804 SDValue SToVRHS = isScalarToVec(RHS);
1578415805 if (SToVLHS || SToVRHS) {
15785- // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15786- // same type and have differing element sizes, then do not perform
15787- // the following transformation. The current transformation for
15788- // SCALAR_TO_VECTOR assumes that both input vectors have the same
15789- // element size. This will be updated in the future to account for
15790- // differing sizes of the LHS and RHS.
15791- if (SToVLHS && SToVRHS &&
15792- (SToVLHS.getValueType().getScalarSizeInBits() !=
15793- SToVRHS.getValueType().getScalarSizeInBits()))
15794- return Res;
15795-
15796- int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15797- : SToVRHS.getValueType().getVectorNumElements();
15798- int NumEltsOut = ShuffV.size();
15806+ EVT VT = SVN->getValueType(0);
15807+ uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15808+ int ShuffleNumElts = ShuffV.size();
15809+ int HalfVec = ShuffleNumElts / 2;
1579915810 // The width of the "valid lane" (i.e. the lane that contains the value that
1580015811 // is vectorized) needs to be expressed in terms of the number of elements
1580115812 // of the shuffle. It is thereby the ratio of the values before and after
15802- // any bitcast.
15803- unsigned ValidLaneWidth =
15804- SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15805- LHS.getValueType().getScalarSizeInBits()
15806- : SToVRHS.getValueType().getScalarSizeInBits() /
15807- RHS.getValueType().getScalarSizeInBits();
15813+ // any bitcast, which will be set later on if the LHS or RHS are
15814+ // SCALAR_TO_VECTOR nodes.
15815+ unsigned LHSNumValidElts = HalfVec;
15816+ unsigned RHSNumValidElts = HalfVec;
1580815817
1580915818 // Initially assume that neither input is permuted. These will be adjusted
15810- // accordingly if either input is.
15811- int LHSMaxIdx = -1;
15812- int RHSMinIdx = -1;
15813- int RHSMaxIdx = -1;
15814- int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15819+ // accordingly if either input is. Note, that -1 means that all elements
15820+ // are undefined.
15821+ int LHSFirstElt = 0;
15822+ int RHSFirstElt = ShuffleNumElts;
15823+ int LHSLastElt = -1;
15824+ int RHSLastElt = -1;
1581515825
1581615826 // Get the permuted scalar to vector nodes for the source(s) that come from
1581715827 // ISD::SCALAR_TO_VECTOR.
@@ -15820,33 +15830,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1582015830 // the value into element zero. Since scalar size of LHS and RHS may differ
1582115831 // after isScalarToVec, this should be checked using their own sizes.
1582215832 if (SToVLHS) {
15823- if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15833+ int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15834+ if (!IsLittleEndian && LHSScalarSize >= 64)
1582415835 return Res;
1582515836 // Set up the values for the shuffle vector fixup.
15826- LHSMaxIdx = NumEltsOut / NumEltsIn;
15837+ LHSNumValidElts =
15838+ LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15839+ // The last element that comes from the LHS. For example:
15840+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15841+ // The last element that comes from the LHS is actually 0, not 3
15842+ // because elements 1 and higher of a scalar_to_vector are undefined.
15843+ LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
1582715844 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
1582815845 if (SToVLHS.getValueType() != LHS.getValueType())
1582915846 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
1583015847 LHS = SToVLHS;
1583115848 }
1583215849 if (SToVRHS) {
15833- if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15850+ int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15851+ if (!IsLittleEndian && RHSScalarSize >= 64)
1583415852 return Res;
15835- RHSMinIdx = NumEltsOut;
15836- RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15853+ RHSNumValidElts =
15854+ RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15855+ // The last element that comes from the RHS. For example:
15856+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15857+ // The last element that comes from the RHS is actually 5, not 7
15858+ // because elements 1 and higher of a scalar_to_vector are undefined.
15859+ // It is also not 4 because the original scalar_to_vector is wider and
15860+ // actually contains two i32 elements.
15861+ RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
1583715862 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
1583815863 if (SToVRHS.getValueType() != RHS.getValueType())
1583915864 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
1584015865 RHS = SToVRHS;
1584115866 }
1584215867
15868+ if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15869+ return Res;
15870+
1584315871 // Fix up the shuffle mask to reflect where the desired element actually is.
1584415872 // The minimum and maximum indices that correspond to element zero for both
1584515873 // the LHS and RHS are computed and will control which shuffle mask entries
1584615874 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15847- // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15848- fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15849- HalfVec, ValidLaneWidth, Subtarget);
15875+ // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15876+ fixupShuffleMaskForPermutedSToV(
15877+ ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15878+ LHSNumValidElts, RHSNumValidElts, Subtarget);
1585015879 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
1585115880
1585215881 // We may have simplified away the shuffle. We won't be able to do anything
0 commit comments