File tree Expand file tree Collapse file tree 2 files changed +29
-4
lines changed
test/Transforms/SLPVectorizer/X86 Expand file tree Collapse file tree 2 files changed +29
-4
lines changed Original file line number Diff line number Diff line change @@ -19724,10 +19724,18 @@ bool SLPVectorizerPass::vectorizeStores(
1972419724 Type *ValueTy = StoreTy;
1972519725 if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
1972619726 ValueTy = Trunc->getSrcTy();
19727- unsigned MinVF = std::max<unsigned>(
19728- 2, PowerOf2Ceil(TTI->getStoreMinimumVF(
19729- R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy,
19730- ValueTy)));
19727+ // When REVEC is enabled, StoreTy and ValueTy may be FixedVectorType. But
19728+ // getStoreMinimumVF only support scalar type as arguments. As a result,
19729+ // we need to use the element type of StoreTy and ValueTy to retrieve the
19730+ // VF and then transform it back.
19731+ // Remember: VF is defined as the number we want to vectorize, not the
19732+ // number of elements in the final vector.
19733+ Type *StoreScalarTy = StoreTy->getScalarType();
19734+ unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF(
19735+ R.getMinVF(DL->getTypeStoreSizeInBits(StoreScalarTy)), StoreScalarTy,
19736+ ValueTy->getScalarType()));
19737+ MinVF /= getNumElements(StoreTy);
19738+ MinVF = std::max<unsigned>(2, MinVF);
1973119739
1973219740 if (MaxVF < MinVF) {
1973319741 LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
Original file line number Diff line number Diff line change 1+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+ ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
3+
4+ define void @test () {
5+ ; CHECK-LABEL: @test(
6+ ; CHECK-NEXT: entry:
7+ ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> zeroinitializer, i64 0)
8+ ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP0]], <4 x i8> zeroinitializer, i64 4)
9+ ; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr null, align 1
10+ ; CHECK-NEXT: ret void
11+ ;
12+ entry:
13+ %0 = getelementptr i8 , ptr null , i64 4
14+ store <4 x i8 > zeroinitializer , ptr null , align 1
15+ store <4 x i8 > zeroinitializer , ptr %0 , align 1
16+ ret void
17+ }
You can’t perform that action at this time.
0 commit comments