Skip to content

Commit de75b1b

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.5
1 parent 2b3266c commit de75b1b

File tree

8 files changed

+166
-56
lines changed

8 files changed

+166
-56
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -504,25 +504,26 @@ void llvm::processShuffleMasks(
504504
unsigned SzSrc = Sz / NumOfSrcRegs;
505505
for (unsigned I = 0; I < NumOfDestRegs; ++I) {
506506
auto &RegMasks = Res[I];
507-
RegMasks.assign(NumOfSrcRegs, {});
507+
RegMasks.assign(2 * NumOfSrcRegs, {});
508508
// Check that the values in dest registers are in the one src
509509
// register.
510510
for (unsigned K = 0; K < SzDest; ++K) {
511511
int Idx = I * SzDest + K;
512512
if (Idx == Sz)
513513
break;
514-
if (Mask[Idx] >= Sz || Mask[Idx] == PoisonMaskElem)
514+
if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem)
515515
continue;
516-
int SrcRegIdx = Mask[Idx] / SzSrc;
516+
int MaskIdx = Mask[Idx] % Sz;
517+
int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);
517518
// Add a cost of PermuteTwoSrc for each new source register permute,
518519
// if we have more than one source registers.
519520
if (RegMasks[SrcRegIdx].empty())
520521
RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem);
521-
RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
522+
RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;
522523
}
523524
}
524525
// Process split mask.
525-
for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
526+
for (unsigned I : seq<unsigned>(NumOfUsedRegs)) {
526527
auto &Dest = Res[I];
527528
int NumSrcRegs =
528529
count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
@@ -567,7 +568,7 @@ void llvm::processShuffleMasks(
567568
int FirstIdx = -1;
568569
SecondIdx = -1;
569570
MutableArrayRef<int> FirstMask, SecondMask;
570-
for (unsigned I = 0; I < NumOfDestRegs; ++I) {
571+
for (unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) {
571572
SmallVectorImpl<int> &RegMask = Dest[I];
572573
if (RegMask.empty())
573574
continue;

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 52 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5081,7 +5081,6 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
50815081
SDValue V1 = SVN->getOperand(0);
50825082
SDValue V2 = SVN->getOperand(1);
50835083
ArrayRef<int> Mask = SVN->getMask();
5084-
unsigned NumElts = VT.getVectorNumElements();
50855084

50865085
// If we don't know exact data layout, not much we can do. If this
50875086
// is already m1 or smaller, no point in splitting further.
@@ -5098,58 +5097,70 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
50985097

50995098
MVT ElemVT = VT.getVectorElementType();
51005099
unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5101-
unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
5102-
5103-
SmallVector<std::pair<int, SmallVector<int>>>
5104-
OutMasks(VRegsPerSrc, {-1, {}});
5105-
5106-
// Check if our mask can be done as a 1-to-1 mapping from source
5107-
// to destination registers in the group without needing to
5108-
// write each destination more than once.
5109-
for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
5110-
int DstVecIdx = DstIdx / ElemsPerVReg;
5111-
int DstSubIdx = DstIdx % ElemsPerVReg;
5112-
int SrcIdx = Mask[DstIdx];
5113-
if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
5114-
continue;
5115-
int SrcVecIdx = SrcIdx / ElemsPerVReg;
5116-
int SrcSubIdx = SrcIdx % ElemsPerVReg;
5117-
if (OutMasks[DstVecIdx].first == -1)
5118-
OutMasks[DstVecIdx].first = SrcVecIdx;
5119-
if (OutMasks[DstVecIdx].first != SrcVecIdx)
5120-
// Note: This case could easily be handled by keeping track of a chain
5121-
// of source values and generating two element shuffles below. This is
5122-
// less an implementation question, and more a profitability one.
5123-
return SDValue();
5124-
5125-
OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
5126-
OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
5127-
}
51285100

51295101
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
51305102
MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
51315103
MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
51325104
assert(M1VT == getLMUL1VT(M1VT));
51335105
unsigned NumOpElts = M1VT.getVectorMinNumElements();
5134-
SDValue Vec = DAG.getUNDEF(ContainerVT);
5106+
unsigned NormalizedVF = ContainerVT.getVectorMinNumElements();
5107+
unsigned NumOfSrcRegs = NormalizedVF / NumOpElts;
5108+
unsigned NumOfDestRegs = NormalizedVF / NumOpElts;
51355109
// The following semantically builds up a fixed length concat_vector
51365110
// of the component shuffle_vectors. We eagerly lower to scalable here
51375111
// to avoid DAG combining it back to a large shuffle_vector again.
51385112
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
51395113
V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5140-
for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
5141-
auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
5142-
if (SrcVecIdx == -1)
5114+
SmallVector<SDValue> SubRegs(NumOfDestRegs);
5115+
unsigned RegCnt = 0;
5116+
unsigned PrevCnt = 0;
5117+
processShuffleMasks(
5118+
Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5119+
[&]() {
5120+
PrevCnt = RegCnt;
5121+
++RegCnt;
5122+
},
5123+
[&, &DAG = DAG](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx,
5124+
unsigned DstVecIdx) {
5125+
SDValue SrcVec = SrcVecIdx >= NumOfSrcRegs ? V2 : V1;
5126+
unsigned ExtractIdx = (SrcVecIdx % NumOfSrcRegs) * NumOpElts;
5127+
SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5128+
DAG.getVectorIdxConstant(ExtractIdx, DL));
5129+
SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5130+
SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
5131+
SubRegs[RegCnt] = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
5132+
PrevCnt = RegCnt;
5133+
++RegCnt;
5134+
},
5135+
[&, &DAG = DAG](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2) {
5136+
if (PrevCnt + 1 == RegCnt)
5137+
++RegCnt;
5138+
SDValue SubVec1 = SubRegs[PrevCnt + 1];
5139+
if (!SubVec1) {
5140+
SDValue SrcVec = Idx1 >= NumOfSrcRegs ? V2 : V1;
5141+
unsigned ExtractIdx = (Idx1 % NumOfSrcRegs) * NumOpElts;
5142+
SubVec1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5143+
DAG.getVectorIdxConstant(ExtractIdx, DL));
5144+
}
5145+
SubVec1 = convertFromScalableVector(OneRegVT, SubVec1, DAG, Subtarget);
5146+
SDValue SrcVec = Idx2 >= NumOfSrcRegs ? V2 : V1;
5147+
unsigned ExtractIdx = (Idx2 % NumOfSrcRegs) * NumOpElts;
5148+
SDValue SubVec2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5149+
DAG.getVectorIdxConstant(ExtractIdx, DL));
5150+
SubVec2 = convertFromScalableVector(OneRegVT, SubVec2, DAG, Subtarget);
5151+
SubVec1 =
5152+
DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, SrcSubMask);
5153+
SubVec1 = convertToScalableVector(M1VT, SubVec1, DAG, Subtarget);
5154+
SubRegs[PrevCnt + 1] = SubVec1;
5155+
});
5156+
assert(RegCnt == NumOfDestRegs && "Whole vector must be processed");
5157+
SDValue Vec = DAG.getUNDEF(ContainerVT);
5158+
for (auto [I, V] : enumerate(SubRegs)) {
5159+
if (!V)
51435160
continue;
5144-
unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
5145-
SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
5146-
SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5147-
DAG.getVectorIdxConstant(ExtractIdx, DL));
5148-
SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5149-
SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
5150-
SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
5151-
unsigned InsertIdx = DstVecIdx * NumOpElts;
5152-
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
5161+
unsigned InsertIdx = I * NumOpElts;
5162+
5163+
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
51535164
DAG.getVectorIdxConstant(InsertIdx, DL));
51545165
}
51555166
return convertFromScalableVector(VT, Vec, DAG, Subtarget);

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,105 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
389389
// give a more accurate cost than falling back to generic scalable codegen.
390390
// TODO: Each of these cases hints at a modeling gap around scalable vectors.
391391
if (isa<FixedVectorType>(Tp)) {
392+
MVT LegalVT = LT.second;
393+
InstructionCost NumOfDests = LT.first;
394+
if (ST->hasVInstructions() &&
395+
LT.second.getSizeInBits().getFixedValue() >
396+
ST->getRealVLen().value_or(UINT_MAX) &&
397+
!Mask.empty() && NumOfDests.isValid() && NumOfDests > 1 &&
398+
LegalVT.isFixedLengthVector() &&
399+
LegalVT.getVectorElementType().getSizeInBits() ==
400+
Tp->getElementType()->getPrimitiveSizeInBits() &&
401+
LegalVT.getVectorNumElements() <
402+
Tp->getElementCount().getFixedValue()) {
403+
unsigned VecTySize = DL.getTypeStoreSize(Tp);
404+
unsigned LegalVTSize = LegalVT.getStoreSize();
405+
// Number of source vectors after legalization:
406+
unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
407+
// Number of destination vectors after legalization:
408+
409+
auto *SingleOpTy = FixedVectorType::get(Tp->getElementType(),
410+
LegalVT.getVectorNumElements());
411+
412+
// Try to perform better estimation of the permutation.
413+
// 1. Split the source/destination vectors into real registers.
414+
// 2. Do the mask analysis to identify which real registers are
415+
// permuted. If more than 1 source registers are used for the
416+
// destination register building, the cost for this destination register
417+
// is (Number_of_source_register - 1) * Cost_PermuteTwoSrc. If only one
418+
// source register is used, build mask and calculate the cost as a cost
419+
// of PermuteSingleSrc.
420+
// Also, for the single register permute we try to identify if the
421+
// destination register is just a copy of the source register or the
422+
// copy of the previous destination register (the cost is
423+
// TTI::TCC_Basic). If the source register is just reused, the cost for
424+
// this operation is 0.
425+
NumOfDests = getTypeLegalizationCost(
426+
FixedVectorType::get(Tp->getElementType(), Mask.size()))
427+
.first;
428+
unsigned E = *NumOfDests.getValue();
429+
unsigned NormalizedVF =
430+
LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E);
431+
unsigned NumOfSrcRegs = NormalizedVF / LegalVT.getVectorNumElements();
432+
unsigned NumOfDestRegs = NormalizedVF / LegalVT.getVectorNumElements();
433+
SmallVector<int> NormalizedMask(NormalizedVF, PoisonMaskElem);
434+
copy(Mask, NormalizedMask.begin());
435+
unsigned PrevSrcReg = 0;
436+
ArrayRef<int> PrevRegMask;
437+
InstructionCost Cost = 0;
438+
SmallBitVector ExtractedRegs(2 * NumOfSrcRegs);
439+
processShuffleMasks(
440+
NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
441+
[&](ArrayRef<int> RegMask, unsigned SrcReg, unsigned DestReg) {
442+
if (ExtractedRegs.test(SrcReg)) {
443+
Cost += getShuffleCost(TTI::SK_ExtractSubvector, Tp, {}, CostKind,
444+
(SrcReg % NumOfSrcRegs) *
445+
SingleOpTy->getNumElements(),
446+
SingleOpTy);
447+
ExtractedRegs.set(SrcReg);
448+
}
449+
if (!ShuffleVectorInst::isIdentityMask(RegMask, RegMask.size())) {
450+
// Check if the previous register can be just copied to the next
451+
// one.
452+
if (PrevRegMask.empty() || PrevSrcReg != SrcReg ||
453+
PrevRegMask != RegMask) {
454+
Cost += getShuffleCost(TTI::SK_PermuteSingleSrc, SingleOpTy,
455+
RegMask, CostKind, 0, nullptr);
456+
} else {
457+
// Just a copy of previous destination register.
458+
Cost += TTI::TCC_Basic;
459+
}
460+
return;
461+
}
462+
if (SrcReg != DestReg &&
463+
any_of(RegMask, [](int I) { return I != PoisonMaskElem; })) {
464+
// Just a copy of the source register.
465+
Cost += TTI::TCC_Basic;
466+
}
467+
PrevSrcReg = SrcReg;
468+
PrevRegMask = RegMask;
469+
ExtractedRegs.set(DestReg);
470+
},
471+
[&](ArrayRef<int> RegMask, unsigned Idx1, unsigned Idx2) {
472+
if (ExtractedRegs.test(Idx1)) {
473+
Cost += getShuffleCost(TTI::SK_ExtractSubvector, Tp, {}, CostKind,
474+
(Idx1 % NumOfSrcRegs) *
475+
SingleOpTy->getNumElements(),
476+
SingleOpTy);
477+
ExtractedRegs.set(Idx1);
478+
}
479+
if (ExtractedRegs.test(Idx2)) {
480+
Cost += getShuffleCost(TTI::SK_ExtractSubvector, Tp, {}, CostKind,
481+
(Idx2 % NumOfSrcRegs) *
482+
SingleOpTy->getNumElements(),
483+
SingleOpTy);
484+
ExtractedRegs.set(Idx2);
485+
}
486+
Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, RegMask,
487+
CostKind, 0, nullptr);
488+
});
489+
return Cost;
490+
}
392491
switch (Kind) {
393492
default:
394493
break;

llvm/test/Analysis/CostModel/X86/shuffle-splat-codesize.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a
483483
; SSE-LABEL: 'test_upper_vXf32'
484484
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
485485
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
486-
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
486+
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
487487
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
488488
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
489489
;

llvm/test/Analysis/CostModel/X86/shuffle-splat-latency.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a
483483
; SSE-LABEL: 'test_upper_vXf32'
484484
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
485485
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
486-
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
486+
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
487487
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
488488
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
489489
;

llvm/test/Analysis/CostModel/X86/shuffle-splat-sizelatency.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a
483483
; SSE-LABEL: 'test_upper_vXf32'
484484
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
485485
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
486-
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
486+
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
487487
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
488488
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
489489
;

llvm/test/Analysis/CostModel/X86/shuffle-splat.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a
483483
; SSE-LABEL: 'test_upper_vXf32'
484484
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
485485
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
486-
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
486+
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
487487
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
488488
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
489489
;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,12 +164,11 @@ define <4 x i64> @m2_splat_into_slide_two_source_v2_lo(<4 x i64> %v1, <4 x i64>
164164
define <4 x i64> @m2_splat_into_slide_two_source(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) {
165165
; CHECK-LABEL: m2_splat_into_slide_two_source:
166166
; CHECK: # %bb.0:
167-
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
168-
; CHECK-NEXT: vmv.v.i v0, 12
169-
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
167+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
168+
; CHECK-NEXT: vslidedown.vi v13, v10, 1
169+
; CHECK-NEXT: vslideup.vi v13, v11, 1
170170
; CHECK-NEXT: vrgather.vi v12, v8, 0
171-
; CHECK-NEXT: vslideup.vi v12, v10, 1, v0.t
172-
; CHECK-NEXT: vmv.v.v v8, v12
171+
; CHECK-NEXT: vmv2r.v v8, v12
173172
; CHECK-NEXT: ret
174173
%res = shufflevector <4 x i64> %v1, <4 x i64> %v2, <4 x i32> <i32 0, i32 0, i32 5, i32 6>
175174
ret <4 x i64> %res

0 commit comments

Comments
 (0)