Skip to content

Commit 10d803d

Browse files
committed
Rebase
Created using spr 1.3.5
2 parents 635c51d + 07d284d commit 10d803d

28 files changed

+1105
-1039
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,33 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
259259
return AddrExtractCost + MemoryOpCost + PackingCost + ConditionalCost;
260260
}
261261

262+
/// Checks if the provided mask \p is a splat mask, i.e. it contains only -1
263+
/// or same non -1 index value and this index value contained at least twice.
264+
/// So, mask <0, -1,-1, -1> is not considered splat (it is just identity),
265+
/// same for <-1, 0, -1, -1> (just a slide), while <2, -1, 2, -1> is a splat
266+
/// with \p Index=2.
267+
static bool isSplatMask(ArrayRef<int> Mask, unsigned NumSrcElts, int &Index) {
268+
// Check that the broadcast index meets at least twice.
269+
bool IsCompared = false;
270+
if (int SplatIdx = PoisonMaskElem;
271+
all_of(enumerate(Mask), [&](const auto &P) {
272+
if (P.value() == PoisonMaskElem)
273+
return P.index() != Mask.size() - 1 || IsCompared;
274+
if (static_cast<unsigned>(P.value()) >= NumSrcElts * 2)
275+
return false;
276+
if (SplatIdx == PoisonMaskElem) {
277+
SplatIdx = P.value();
278+
return P.index() != Mask.size() - 1;
279+
}
280+
IsCompared = true;
281+
return SplatIdx == P.value();
282+
})) {
283+
Index = SplatIdx;
284+
return true;
285+
}
286+
return false;
287+
}
288+
262289
protected:
263290
explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
264291
: BaseT(DL) {}
@@ -1014,17 +1041,20 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
10141041
return Kind;
10151042
int NumSrcElts = Ty->getElementCount().getKnownMinValue();
10161043
switch (Kind) {
1017-
case TTI::SK_PermuteSingleSrc:
1044+
case TTI::SK_PermuteSingleSrc: {
10181045
if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts))
10191046
return TTI::SK_Reverse;
10201047
if (ShuffleVectorInst::isZeroEltSplatMask(Mask, NumSrcElts))
10211048
return TTI::SK_Broadcast;
1049+
if (isSplatMask(Mask, NumSrcElts, Index))
1050+
return TTI::SK_Broadcast;
10221051
if (ShuffleVectorInst::isExtractSubvectorMask(Mask, NumSrcElts, Index) &&
10231052
(Index + Mask.size()) <= (size_t)NumSrcElts) {
10241053
SubTy = FixedVectorType::get(Ty->getElementType(), Mask.size());
10251054
return TTI::SK_ExtractSubvector;
10261055
}
10271056
break;
1057+
}
10281058
case TTI::SK_PermuteTwoSrc: {
10291059
int NumSubElts;
10301060
if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 138 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13199,6 +13199,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1319913199
// No perfect match, just shuffle, so choose the first tree node from the
1320013200
// tree.
1320113201
Entries.push_back(FirstEntries.front());
13202+
VF = FirstEntries.front()->getVectorFactor();
1320213203
} else {
1320313204
// Try to find nodes with the same vector factor.
1320413205
assert(UsedTEs.size() == 2 && "Expected at max 2 permuted entries.");
@@ -13239,6 +13240,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1323913240
Entries.push_back(SecondEntries.front());
1324013241
VF = std::max(Entries.front()->getVectorFactor(),
1324113242
Entries.back()->getVectorFactor());
13243+
} else {
13244+
VF = Entries.front()->getVectorFactor();
1324213245
}
1324313246
}
1324413247

@@ -13350,17 +13353,141 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1335013353
: Entries[Pair.first]->findLaneForValue(VL[Pair.second]));
1335113354
IsIdentity &= Mask[Idx] == Pair.second;
1335213355
}
13353-
switch (Entries.size()) {
13354-
case 1:
13355-
if (IsIdentity || EntryLanes.size() > 1 || VL.size() <= 2)
13356-
return TargetTransformInfo::SK_PermuteSingleSrc;
13357-
break;
13358-
case 2:
13359-
if (EntryLanes.size() > 2 || VL.size() <= 2)
13360-
return TargetTransformInfo::SK_PermuteTwoSrc;
13361-
break;
13362-
default:
13363-
break;
13356+
if (ForOrder || IsIdentity || Entries.empty()) {
13357+
switch (Entries.size()) {
13358+
case 1:
13359+
if (IsIdentity || EntryLanes.size() > 1 || VL.size() <= 2)
13360+
return TargetTransformInfo::SK_PermuteSingleSrc;
13361+
break;
13362+
case 2:
13363+
if (EntryLanes.size() > 2 || VL.size() <= 2)
13364+
return TargetTransformInfo::SK_PermuteTwoSrc;
13365+
break;
13366+
default:
13367+
break;
13368+
}
13369+
} else if (!isa<VectorType>(VL.front()->getType()) &&
13370+
(EntryLanes.size() > Entries.size() || VL.size() <= 2)) {
13371+
// Do the cost estimation if shuffle beneficial than buildvector.
13372+
SmallVector<int> SubMask(std::next(Mask.begin(), Part * VL.size()),
13373+
std::next(Mask.begin(), (Part + 1) * VL.size()));
13374+
int MinElement = SubMask.front(), MaxElement = SubMask.front();
13375+
for (int Idx : SubMask) {
13376+
if (Idx == PoisonMaskElem)
13377+
continue;
13378+
if (MinElement == PoisonMaskElem || MinElement % VF > Idx % VF)
13379+
MinElement = Idx;
13380+
if (MaxElement == PoisonMaskElem || MaxElement % VF < Idx % VF)
13381+
MaxElement = Idx;
13382+
}
13383+
assert(MaxElement >= 0 && MinElement >= 0 &&
13384+
MaxElement % VF >= MinElement % VF &&
13385+
"Expected at least single element.");
13386+
unsigned NewVF = std::max<unsigned>(
13387+
VL.size(), getFullVectorNumberOfElements(*TTI, VL.front()->getType(),
13388+
(MaxElement % VF) -
13389+
(MinElement % VF) + 1));
13390+
if (NewVF < VF) {
13391+
for_each(SubMask, [&](int &Idx) {
13392+
if (Idx == PoisonMaskElem)
13393+
return;
13394+
Idx = (Idx % VF) - (MinElement % VF) +
13395+
(Idx >= static_cast<int>(VF) ? NewVF : 0);
13396+
});
13397+
VF = NewVF;
13398+
}
13399+
13400+
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
13401+
auto *VecTy = getWidenedType(VL.front()->getType(), VF);
13402+
auto *MaskVecTy = getWidenedType(VL.front()->getType(), SubMask.size());
13403+
auto GetShuffleCost = [&,
13404+
&TTI = *TTI](ArrayRef<int> Mask,
13405+
ArrayRef<const TreeEntry *> Entries,
13406+
VectorType *VecTy) -> InstructionCost {
13407+
if (Entries.size() == 1 && Entries.front()->getInterleaveFactor() > 0 &&
13408+
ShuffleVectorInst::isDeInterleaveMaskOfFactor(
13409+
Mask, Entries.front()->getInterleaveFactor()))
13410+
return TTI::TCC_Free;
13411+
return ::getShuffleCost(TTI,
13412+
Entries.size() > 1 ? TTI::SK_PermuteTwoSrc
13413+
: TTI::SK_PermuteSingleSrc,
13414+
VecTy, Mask, CostKind);
13415+
};
13416+
InstructionCost ShuffleCost = GetShuffleCost(SubMask, Entries, VecTy);
13417+
InstructionCost FirstShuffleCost = 0;
13418+
SmallVector<int> FirstMask(SubMask.begin(), SubMask.end());
13419+
if (Entries.size() == 1 || !Entries[0]->isGather()) {
13420+
FirstShuffleCost = ShuffleCost;
13421+
} else {
13422+
// Transform mask to include only first entry.
13423+
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
13424+
bool IsIdentity = true;
13425+
for (auto [I, Idx] : enumerate(FirstMask)) {
13426+
if (Idx >= static_cast<int>(VF)) {
13427+
Idx = PoisonMaskElem;
13428+
} else {
13429+
DemandedElts.clearBit(I);
13430+
if (Idx != PoisonMaskElem)
13431+
IsIdentity &= static_cast<int>(I) == Idx;
13432+
}
13433+
}
13434+
if (!IsIdentity)
13435+
FirstShuffleCost = GetShuffleCost(FirstMask, Entries.front(), VecTy);
13436+
FirstShuffleCost += TTI->getScalarizationOverhead(
13437+
MaskVecTy, DemandedElts, /*Insert=*/true,
13438+
/*Extract=*/false, CostKind);
13439+
}
13440+
InstructionCost SecondShuffleCost = 0;
13441+
SmallVector<int> SecondMask(SubMask.begin(), SubMask.end());
13442+
if (Entries.size() == 1 || !Entries[1]->isGather()) {
13443+
SecondShuffleCost = ShuffleCost;
13444+
} else {
13445+
// Transform mask to include only first entry.
13446+
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
13447+
bool IsIdentity = true;
13448+
for (auto [I, Idx] : enumerate(SecondMask)) {
13449+
if (Idx < static_cast<int>(VF) && Idx >= 0) {
13450+
Idx = PoisonMaskElem;
13451+
} else {
13452+
DemandedElts.clearBit(I);
13453+
if (Idx != PoisonMaskElem) {
13454+
Idx -= VF;
13455+
IsIdentity &= static_cast<int>(I) == Idx;
13456+
}
13457+
}
13458+
}
13459+
if (!IsIdentity)
13460+
SecondShuffleCost = GetShuffleCost(SecondMask, Entries[1], VecTy);
13461+
SecondShuffleCost += TTI->getScalarizationOverhead(
13462+
MaskVecTy, DemandedElts, /*Insert=*/true,
13463+
/*Extract=*/false, CostKind);
13464+
}
13465+
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
13466+
for (auto [I, Idx] : enumerate(SubMask))
13467+
if (Idx == PoisonMaskElem)
13468+
DemandedElts.clearBit(I);
13469+
InstructionCost BuildVectorCost =
13470+
TTI->getScalarizationOverhead(MaskVecTy, DemandedElts, /*Insert=*/true,
13471+
/*Extract=*/false, CostKind);
13472+
const TreeEntry *BestEntry = nullptr;
13473+
if (FirstShuffleCost < ShuffleCost) {
13474+
copy(FirstMask, std::next(Mask.begin(), Part * VL.size()));
13475+
BestEntry = Entries.front();
13476+
ShuffleCost = FirstShuffleCost;
13477+
}
13478+
if (SecondShuffleCost < ShuffleCost) {
13479+
copy(SecondMask, std::next(Mask.begin(), Part * VL.size()));
13480+
BestEntry = Entries[1];
13481+
ShuffleCost = SecondShuffleCost;
13482+
}
13483+
if (BuildVectorCost >= ShuffleCost) {
13484+
if (BestEntry) {
13485+
Entries.clear();
13486+
Entries.push_back(BestEntry);
13487+
}
13488+
return Entries.size() > 1 ? TargetTransformInfo::SK_PermuteTwoSrc
13489+
: TargetTransformInfo::SK_PermuteSingleSrc;
13490+
}
1336413491
}
1336513492
Entries.clear();
1336613493
// Clear the corresponding mask elements.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1642,7 +1642,7 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
16421642
VPSlotTracker &SlotTracker) const {
16431643
O << Indent << "WIDEN-CAST ";
16441644
printAsOperand(O, SlotTracker);
1645-
O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1645+
O << " = " << Instruction::getOpcodeName(Opcode);
16461646
printFlags(O);
16471647
printOperands(O, SlotTracker);
16481648
O << " to " << *getResultType();

llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -399,13 +399,13 @@ define amdgpu_kernel void @shufflevector_i8(<2 x i8> %vec1, <2 x i8> %vec2) {
399399
; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer
400400
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 1>
401401
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 0>
402-
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 1>
402+
; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 1>
403403
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 2>
404404
; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf20 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 0>
405405
; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 2>
406406
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 3>
407407
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 0>
408-
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 3>
408+
; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 3>
409409
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 2>
410410
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 1>
411411
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 3>
@@ -436,13 +436,13 @@ define amdgpu_kernel void @shufflevector_i8(<2 x i8> %vec1, <2 x i8> %vec2) {
436436
; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer
437437
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 1>
438438
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 0>
439-
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 1>
439+
; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 1>
440440
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 2>
441441
; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf20_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 0>
442442
; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 2>
443443
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 3>
444444
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 0>
445-
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 3>
445+
; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 3>
446446
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 2>
447447
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 1>
448448
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 3>
@@ -476,13 +476,13 @@ define amdgpu_kernel void @shufflevector_i8(<2 x i8> %vec1, <2 x i8> %vec2) {
476476
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer
477477
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 1>
478478
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 0>
479-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 1>
479+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 1>
480480
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 2>
481481
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf20 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 0>
482482
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 2>
483483
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 3>
484484
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 0>
485-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 3>
485+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 3>
486486
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 2>
487487
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 1>
488488
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 3>
@@ -513,13 +513,13 @@ define amdgpu_kernel void @shufflevector_i8(<2 x i8> %vec1, <2 x i8> %vec2) {
513513
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer
514514
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 1>
515515
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 0>
516-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 1>
516+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 1>
517517
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 2>
518518
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf20_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 0>
519519
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 2>
520520
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 3>
521521
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 0>
522-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 3>
522+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 3>
523523
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 2>
524524
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 1>
525525
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 3>

0 commit comments

Comments
 (0)