@@ -15691,14 +15691,16 @@ static void fixupShuffleMaskForPermutedSToV(
1569115691 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
1569215692 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
1569315693 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15694+ int LHSEltFixup =
15695+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15696+ int RHSEltFixup =
15697+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
1569415698 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
1569515699 int Idx = ShuffV[I];
1569615700 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15697- ShuffV[I] +=
15698- Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15701+ ShuffV[I] += LHSEltFixup;
1569915702 if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15700- ShuffV[I] +=
15701- Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15703+ ShuffV[I] += RHSEltFixup;
1570215704 }
1570315705}
1570415706
@@ -15756,6 +15758,31 @@ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
1575615758 return true;
1575715759}
1575815760
15761+ static SDValue generateSToVPermutedForVecShuffle(
15762+ int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15763+ int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15764+ SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15765+ EVT VecShuffOperandType = VecShuffOperand.getValueType();
15766+ // Set up the values for the shuffle vector fixup.
15767+ NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15768+ // The last element depends on if the input comes from the LHS or RHS.
15769+ //
15770+ // For example:
15771+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15772+ //
15773+ // For the LHS: The last element that comes from the LHS is actually 0, not 3
15774+ // because elements 1 and higher of a scalar_to_vector are undefined.
15775+ // For the RHS: The last element that comes from the RHS is actually 5, not 7
15776+ // because elements 1 and higher of a scalar_to_vector are undefined.
15777+ // It is also not 4 because the original scalar_to_vector is wider and
15778+ // actually contains two i32 elements.
15779+ LastElt = ScalarSize / (ShuffleEltWidth + 1) + FirstElt;
15780+ SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15781+ if (SToVPermuted.getValueType() != VecShuffOperandType)
15782+ SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15783+ return SToVPermuted;
15784+ }
15785+
1575915786// On little endian subtargets, combine shuffles such as:
1576015787// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1576115788// into:
@@ -15833,36 +15860,17 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1583315860 int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
1583415861 if (!IsLittleEndian && LHSScalarSize >= 64)
1583515862 return Res;
15836- // Set up the values for the shuffle vector fixup.
15837- LHSNumValidElts =
15838- LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15839- // The last element that comes from the LHS. For example:
15840- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15841- // The last element that comes from the LHS is actually 0, not 3
15842- // because elements 1 and higher of a scalar_to_vector are undefined.
15843- LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15844- SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15845- if (SToVLHS.getValueType() != LHS.getValueType())
15846- SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15847- LHS = SToVLHS;
15863+ LHS = generateSToVPermutedForVecShuffle(
15864+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15865+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
1584815866 }
1584915867 if (SToVRHS) {
1585015868 int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
1585115869 if (!IsLittleEndian && RHSScalarSize >= 64)
1585215870 return Res;
15853- RHSNumValidElts =
15854- RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15855- // The last element that comes from the RHS. For example:
15856- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15857- // The last element that comes from the RHS is actually 5, not 7
15858- // because elements 1 and higher of a scalar_to_vector are undefined.
15859- // It is also not 4 because the original scalar_to_vector is wider and
15860- // actually contains two i32 elements.
15861- RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15862- SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15863- if (SToVRHS.getValueType() != RHS.getValueType())
15864- SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15865- RHS = SToVRHS;
15871+ RHS = generateSToVPermutedForVecShuffle(
15872+ RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15873+ RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
1586615874 }
1586715875
1586815876 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
0 commit comments