@@ -15386,16 +15386,18 @@ static SDValue isScalarToVec(SDValue Op) {
1538615386// On little endian, that's just the corresponding element in the other
1538715387// half of the vector. On big endian, it is in the same half but right
1538815388// justified rather than left justified in that half.
15389- static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15390- int LHSMaxIdx, int RHSMinIdx,
15391- int RHSMaxIdx, int HalfVec,
15392- unsigned ValidLaneWidth,
15393- const PPCSubtarget &Subtarget) {
15389+ static void fixupShuffleMaskForPermutedSToV(
15390+ SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15391+ int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15392+ unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
1539415393 for (int i = 0, e = ShuffV.size(); i < e; i++) {
1539515394 int Idx = ShuffV[i];
15396- if (( Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx) )
15395+ if (Idx >= LHSFirstElt && Idx <= LHSLastElt )
1539715396 ShuffV[i] +=
15398- Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15397+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15398+ if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15399+ ShuffV[i] +=
15400+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
1539915401 }
1540015402}
1540115403
@@ -15434,6 +15436,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
1543415436 OrigSToV.getOperand(0));
1543515437}
1543615438
15439+ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15440+ int HalfVec, int LHSLastElementDefined,
15441+ int RHSLastElementDefined) {
15442+ for (int i : seq<int>(0, ShuffV.size())) {
15443+ int Index = ShuffV[i];
15444+ if (Index < 0) // Skip explicitly undefined mask indices.
15445+ continue;
15446+ // Handle first input vector of the vector_shuffle.
15447+ if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15448+ (Index > LHSLastElementDefined))
15449+ return false;
15450+ // Handle second input vector of the vector_shuffle.
15451+ if ((RHSLastElementDefined >= 0) &&
15452+ (Index > HalfVec + RHSLastElementDefined))
15453+ return false;
15454+ }
15455+ return true;
15456+ }
15457+
1543715458// On little endian subtargets, combine shuffles such as:
1543815459// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1543915460// into:
@@ -15481,36 +15502,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1548115502 SDValue SToVLHS = isScalarToVec(LHS);
1548215503 SDValue SToVRHS = isScalarToVec(RHS);
1548315504 if (SToVLHS || SToVRHS) {
15484- // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15485- // same type and have differing element sizes, then do not perform
15486- // the following transformation. The current transformation for
15487- // SCALAR_TO_VECTOR assumes that both input vectors have the same
15488- // element size. This will be updated in the future to account for
15489- // differing sizes of the LHS and RHS.
15490- if (SToVLHS && SToVRHS &&
15491- (SToVLHS.getValueType().getScalarSizeInBits() !=
15492- SToVRHS.getValueType().getScalarSizeInBits()))
15493- return Res;
15494-
15495- int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15496- : SToVRHS.getValueType().getVectorNumElements();
15497- int NumEltsOut = ShuffV.size();
15505+ EVT VT = SVN->getValueType(0);
15506+ uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15507+ int ShuffleNumElts = ShuffV.size();
15508+ int HalfVec = ShuffleNumElts / 2;
1549815509 // The width of the "valid lane" (i.e. the lane that contains the value that
1549915510 // is vectorized) needs to be expressed in terms of the number of elements
1550015511 // of the shuffle. It is thereby the ratio of the values before and after
15501- // any bitcast.
15502- unsigned ValidLaneWidth =
15503- SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15504- LHS.getValueType().getScalarSizeInBits()
15505- : SToVRHS.getValueType().getScalarSizeInBits() /
15506- RHS.getValueType().getScalarSizeInBits();
15512+ // any bitcast, which will be set later on if the LHS or RHS are
15513+ // SCALAR_TO_VECTOR nodes.
15514+ unsigned LHSNumValidElts = HalfVec;
15515+ unsigned RHSNumValidElts = HalfVec;
1550715516
1550815517 // Initially assume that neither input is permuted. These will be adjusted
15509- // accordingly if either input is.
15510- int LHSMaxIdx = -1;
15511- int RHSMinIdx = -1;
15512- int RHSMaxIdx = -1;
15513- int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15518+ // accordingly if either input is. Note, that -1 means that all elements
15519+ // are undefined.
15520+ int LHSFirstElt = 0;
15521+ int RHSFirstElt = ShuffleNumElts;
15522+ int LHSLastElt = -1;
15523+ int RHSLastElt = -1;
1551415524
1551515525 // Get the permuted scalar to vector nodes for the source(s) that come from
1551615526 // ISD::SCALAR_TO_VECTOR.
@@ -15519,33 +15529,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1551915529 // the value into element zero. Since scalar size of LHS and RHS may differ
1552015530 // after isScalarToVec, this should be checked using their own sizes.
1552115531 if (SToVLHS) {
15522- if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15532+ int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15533+ if (!IsLittleEndian && LHSScalarSize >= 64)
1552315534 return Res;
1552415535 // Set up the values for the shuffle vector fixup.
15525- LHSMaxIdx = NumEltsOut / NumEltsIn;
15536+ LHSNumValidElts =
15537+ LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15538+ // The last element that comes from the LHS. For example:
15539+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15540+ // The last element that comes from the LHS is actually 0, not 3
15541+ // because elements 1 and higher of a scalar_to_vector are undefined.
15542+ LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
1552615543 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
1552715544 if (SToVLHS.getValueType() != LHS.getValueType())
1552815545 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
1552915546 LHS = SToVLHS;
1553015547 }
1553115548 if (SToVRHS) {
15532- if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15549+ int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15550+ if (!IsLittleEndian && RHSScalarSize >= 64)
1553315551 return Res;
15534- RHSMinIdx = NumEltsOut;
15535- RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15552+ RHSNumValidElts =
15553+ RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15554+ // The last element that comes from the RHS. For example:
15555+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15556+ // The last element that comes from the RHS is actually 5, not 7
15557+ // because elements 1 and higher of a scalar_to_vector are undefined.
15558+ // It is also not 4 because the original scalar_to_vector is wider and
15559+ // actually contains two i32 elements.
15560+ RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
1553615561 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
1553715562 if (SToVRHS.getValueType() != RHS.getValueType())
1553815563 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
1553915564 RHS = SToVRHS;
1554015565 }
1554115566
15567+ if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15568+ return Res;
15569+
1554215570 // Fix up the shuffle mask to reflect where the desired element actually is.
1554315571 // The minimum and maximum indices that correspond to element zero for both
1554415572 // the LHS and RHS are computed and will control which shuffle mask entries
1554515573 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15546- // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15547- fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15548- HalfVec, ValidLaneWidth, Subtarget);
15574+ // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15575+ fixupShuffleMaskForPermutedSToV(
15576+ ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15577+ LHSNumValidElts, RHSNumValidElts, Subtarget);
1554915578 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
1555015579
1555115580 // We may have simplified away the shuffle. We won't be able to do anything
0 commit comments