@@ -29774,20 +29774,22 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
2977429774// UnpairedInputs contains values yet to be paired, mapping an unpaired value to
2977529775// its current neighbor's value and index.
2977629776// Do not use llvm::DenseMap as ~0 is reserved key.
29777- template <typename InputTy,
29778- typename PermutationTy,
29779- typename MapTy = SmallMapVector<typename InputTy::value_type,
29780- std::pair<typename InputTy::value_type, typename PermutationTy::value_type>, 8>>
29781- static bool PermuteAndPairVector(const InputTy& Inputs,
29782- PermutationTy &Permutation,
29783- MapTy UnpairedInputs = SmallMapVector<typename InputTy::value_type,
29784- std::pair<typename InputTy::value_type, typename PermutationTy::value_type>, 8>()) {
29777+ template <typename InputTy, typename PermutationTy,
29778+ typename MapTy =
29779+ SmallMapVector<typename InputTy::value_type,
29780+ std::pair<typename InputTy::value_type,
29781+ typename PermutationTy::value_type>,
29782+ 8>>
29783+ static bool PermuteAndPairVector(
29784+ const InputTy &Inputs, PermutationTy &Permutation,
29785+ MapTy UnpairedInputs = MapTy()) {
2978529786 const auto Wildcard = ~typename InputTy::value_type();
2978629787 SmallVector<typename PermutationTy::value_type, 16> WildcardPairs;
2978729788
2978829789 size_t OutputOffset = Permutation.size();
2978929790 typename PermutationTy::value_type I = 0;
29790- for (auto InputIt = Inputs.begin(), InputEnd = Inputs.end(); InputIt != InputEnd;) {
29791+ for (auto InputIt = Inputs.begin(), InputEnd = Inputs.end();
29792+ InputIt != InputEnd;) {
2979129793 Permutation.push_back(OutputOffset + I);
2979229794 Permutation.push_back(OutputOffset + I + 1);
2979329795
@@ -29802,14 +29804,18 @@ static bool PermuteAndPairVector(const InputTy& Inputs,
2980229804
2980329805 // If both are equal, they are in good position.
2980429806 if (Even != Odd) {
29805- auto DoWork = [&] (auto &This, auto ThisIndex, auto Other, auto OtherIndex) {
29807+ auto DoWork = [&](auto &This, auto ThisIndex, auto Other,
29808+ auto OtherIndex) {
2980629809 if (This != Wildcard) {
2980729810 // For non-wildcard value, check if it can pair with an exisiting
2980829811 // unpaired value from UnpairedInputs, if so, swap with the unpaired
2980929812 // value's neighbor, otherwise the current value is added to the map.
29810- if (auto [MapIt, Inserted] = UnpairedInputs.try_emplace(This, std::make_pair(Other, OtherIndex)); !Inserted) {
29813+ if (auto [MapIt, Inserted] = UnpairedInputs.try_emplace(
29814+ This, std::make_pair(Other, OtherIndex));
29815+ !Inserted) {
2981129816 auto [SwapValue, SwapIndex] = MapIt->second;
29812- std::swap(Permutation[OutputOffset + SwapIndex], Permutation[OutputOffset + ThisIndex]);
29817+ std::swap(Permutation[OutputOffset + SwapIndex],
29818+ Permutation[OutputOffset + ThisIndex]);
2981329819 This = SwapValue;
2981429820 UnpairedInputs.erase(MapIt);
2981529821
@@ -29831,7 +29837,9 @@ static bool PermuteAndPairVector(const InputTy& Inputs,
2983129837 UnpairedInputs[This] = std::make_pair(Other, OtherIndex);
2983229838 }
2983329839 // If its neighbor is also in UnpairedInputs, update its info too.
29834- if (auto OtherMapIt = UnpairedInputs.find(Other); OtherMapIt != UnpairedInputs.end() && OtherMapIt->second.second == ThisIndex) {
29840+ if (auto OtherMapIt = UnpairedInputs.find(Other);
29841+ OtherMapIt != UnpairedInputs.end() &&
29842+ OtherMapIt->second.second == ThisIndex) {
2983529843 OtherMapIt->second.first = This;
2983629844 }
2983729845 }
@@ -29849,11 +29857,12 @@ static bool PermuteAndPairVector(const InputTy& Inputs,
2984929857 // Now check if each remaining unpaired neighboring values can be swapped with
2985029858 // a wildcard pair to form two paired values.
2985129859 for (auto &[Unpaired, V] : UnpairedInputs) {
29852- auto [Neighbor, NeighborIndex] = V;
29860+ auto [Neighbor, NeighborIndex] = V;
2985329861 if (Neighbor != Wildcard) {
2985429862 assert(UnpairedInputs.count(Neighbor));
2985529863 if (WildcardPairs.size()) {
29856- std::swap(Permutation[OutputOffset + WildcardPairs.back()], Permutation[OutputOffset + NeighborIndex]);
29864+ std::swap(Permutation[OutputOffset + WildcardPairs.back()],
29865+ Permutation[OutputOffset + NeighborIndex]);
2985729866 WildcardPairs.pop_back();
2985829867 // Mark the neighbor as processed.
2985929868 UnpairedInputs[Neighbor].first = Wildcard;
@@ -30151,8 +30160,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
3015130160 // (shift (shuffle X P1) S1) ->
3015230161 // (shuffle (shift (shuffle X (shuffle P2 P1)) S2) P2^-1) where S2 can be
3015330162 // widened, and P2^-1 is the inverse shuffle of P2.
30154- if (ConstantAmt && (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) && R.hasOneUse()
30155- && Subtarget.hasSSE3() && !Subtarget.hasAVX512()) {
30163+ if (ConstantAmt &&
30164+ (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) &&
30165+ R.hasOneUse() && Subtarget.hasSSE3() && !Subtarget.hasAVX512()) {
3015630166 constexpr size_t LaneBytes = 16;
3015730167 const size_t NumLanes = VT.getVectorNumElements() / LaneBytes;
3015830168
@@ -30169,7 +30179,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
3016930179 // if so, this transformation may be profitable.
3017030180 bool Profitable;
3017130181 for (size_t I = 0; I < NumLanes; ++I) {
30172- if (!(Profitable = PermuteAndPairVector(ArrayRef(&ShiftAmt[I * LaneBytes], LaneBytes), Permutation)))
30182+ if (!(Profitable = PermuteAndPairVector(
30183+ ArrayRef(&ShiftAmt[I * LaneBytes], LaneBytes), Permutation)))
3017330184 break;
3017430185 }
3017530186
@@ -30187,7 +30198,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
3018730198 }
3018830199 SmallVector<int, 32> Permutation2;
3018930200 for (size_t I = 0; I < NumLanes; ++I) {
30190- if (!(IsAdjacentQuads = PermuteAndPairVector(ArrayRef(&EveryOtherShiftAmt[I * LaneBytes / 2], LaneBytes / 2), Permutation2)))
30201+ if (!(IsAdjacentQuads = PermuteAndPairVector(
30202+ ArrayRef(&EveryOtherShiftAmt[I * LaneBytes / 2],
30203+ LaneBytes / 2),
30204+ Permutation2)))
3019130205 break;
3019230206 }
3019330207 if (IsAdjacentQuads) {
@@ -30235,7 +30249,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
3023530249 if (!IsAdjacentQuads || (VT == MVT::v64i8 && Opc == ISD::SHL))
3023630250 Profitable = false;
3023730251 } else {
30238- if (Opc == ISD::SHL || ((VT == MVT::v16i8 || VT == MVT::v32i8) && Opc == ISD::SRL))
30252+ if (Opc == ISD::SHL ||
30253+ ((VT == MVT::v16i8 || VT == MVT::v32i8) && Opc == ISD::SRL))
3023930254 Profitable = false;
3024030255 }
3024130256 }
0 commit comments