@@ -12214,19 +12214,23 @@ static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
1221412214 return SDValue();
1221512215}
1221612216
12217- /// Lower a vector shuffle as a zero or any extension.
12217+ /// Lower a vector shuffle as an any/signed/zero extension.
1221812218///
1221912219/// Given a specific number of elements, element bit width, and extension
12220- /// stride, produce either a zero or any extension based on the available
12220+ /// stride, produce either an extension based on the available
1222112221/// features of the subtarget. The extended elements are consecutive and
1222212222/// begin and can start from an offsetted element index in the input; to
1222312223/// avoid excess shuffling the offset must either being in the bottom lane
1222412224/// or at the start of a higher lane. All extended elements must be from
1222512225/// the same lane.
12226- static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
12227- const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
12228- ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
12226+ static SDValue lowerShuffleAsSpecificExtension(const SDLoc &DL, MVT VT,
12227+ int Scale, int Offset,
12228+ unsigned ExtOpc, SDValue InputV,
12229+ ArrayRef<int> Mask,
12230+ const X86Subtarget &Subtarget,
12231+ SelectionDAG &DAG) {
1222912232 assert(Scale > 1 && "Need a scale to extend.");
12233+ assert(ISD::isExtOpcode(ExtOpc) && "Unsupported extension");
1223012234 int EltBits = VT.getScalarSizeInBits();
1223112235 int NumElements = VT.getVectorNumElements();
1223212236 int NumEltsPerLane = 128 / EltBits;
@@ -12267,13 +12271,17 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
1226712271 NumElements / Scale);
1226812272 InputV = DAG.getBitcast(VT, InputV);
1226912273 InputV = ShuffleOffset(InputV);
12270- InputV = getEXTEND_VECTOR_INREG(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND,
12271- DL, ExtVT, InputV, DAG);
12274+ InputV = getEXTEND_VECTOR_INREG(ExtOpc, DL, ExtVT, InputV, DAG);
1227212275 return DAG.getBitcast(VT, InputV);
1227312276 }
1227412277
1227512278 assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");
1227612279 InputV = DAG.getBitcast(VT, InputV);
12280+ bool AnyExt = ExtOpc == ISD::ANY_EXTEND;
12281+
12282+ // TODO: Add pre-SSE41 SIGN_EXTEND_VECTOR_INREG handling.
12283+ if (ExtOpc == ISD::SIGN_EXTEND)
12284+ return SDValue();
1227712285
1227812286 // For any extends we can cheat for larger element sizes and use shuffle
1227912287 // instructions that can fold with a load and/or copy.
@@ -12458,8 +12466,9 @@ static SDValue lowerShuffleAsZeroOrAnyExtend(
1245812466 if (Offset != 0 && Matches < 2)
1245912467 return SDValue();
1246012468
12461- return lowerShuffleAsSpecificZeroOrAnyExtend(DL, VT, Scale, Offset, AnyExt,
12462- InputV, Mask, Subtarget, DAG);
12469+ unsigned ExtOpc = AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND;
12470+ return lowerShuffleAsSpecificExtension(DL, VT, Scale, Offset, ExtOpc,
12471+ InputV, Mask, Subtarget, DAG);
1246312472 };
1246412473
1246512474 // The widest scale possible for extending is to a 64-bit integer.
0 commit comments