Skip to content

Commit eae9ade

Browse files
committed
[SLP][REVEC] Fix scalar mask is passed to getScalarizationOverhead but
the type is vector. Fix "Vector size mismatch".
1 parent f4ff7fd commit eae9ade

File tree

2 files changed

+40
-4
lines changed

2 files changed

+40
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13963,15 +13963,31 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1396313963
ShuffledElements.setBit(I);
1396413964
ShuffleMask[I] = Res.first->second;
1396513965
}
13966-
if (!DemandedElements.isZero())
13967-
Cost +=
13968-
TTI->getScalarizationOverhead(VecTy, DemandedElements, /*Insert=*/true,
13969-
/*Extract=*/false, CostKind, VL);
13966+
if (!DemandedElements.isZero()) {
13967+
if (isa<FixedVectorType>(ScalarTy)) {
13968+
assert(SLPReVec && "Only supported by REVEC.");
13969+
// We don't need to insert elements one by one. Instead, we can insert the
13970+
// entire vector into the destination.
13971+
Cost = 0;
13972+
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
13973+
for (unsigned I : seq<unsigned>(VL.size()))
13974+
if (DemandedElements[I])
13975+
Cost += TTI->getShuffleCost(
13976+
TTI::SK_InsertSubvector, VecTy, std::nullopt, CostKind,
13977+
I * ScalarTyNumElements, cast<FixedVectorType>(ScalarTy));
13978+
} else {
13979+
Cost += TTI->getScalarizationOverhead(VecTy, DemandedElements,
13980+
/*Insert=*/true,
13981+
/*Extract=*/false, CostKind, VL);
13982+
}
13983+
}
1397013984
if (ForPoisonSrc) {
1397113985
if (isa<FixedVectorType>(ScalarTy)) {
1397213986
assert(SLPReVec && "Only supported by REVEC.");
1397313987
// We don't need to insert elements one by one. Instead, we can insert the
1397413988
// entire vector into the destination.
13989+
assert(DemandedElements.isZero() &&
13990+
"Need to consider the cost from DemandedElements.");
1397513991
Cost = 0;
1397613992
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
1397713993
for (unsigned I : seq<unsigned>(VL.size()))

llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,26 @@
22
; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=arch15 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
33

44
define void @e(<4 x i16> %0) {
5+
; CHECK-LABEL: @e(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
8+
; CHECK: vector.body:
9+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[VECTOR_BODY]] ]
10+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
11+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[VEC_IND]], zeroinitializer
12+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
13+
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i16> [[VEC_IND]], zeroinitializer
14+
; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i16> [[TMP0:%.*]], zeroinitializer
15+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i32>
16+
; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP2]], [[TMP5]]
17+
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i16> [[TMP3]], zeroinitializer
18+
; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i32>
19+
; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP6]], [[TMP8]]
20+
; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i16> zeroinitializer, zeroinitializer
21+
; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i1> [[TMP10]] to <4 x i32>
22+
; CHECK-NEXT: [[TMP12]] = or <4 x i32> [[TMP9]], [[TMP11]]
23+
; CHECK-NEXT: br label [[VECTOR_BODY]]
24+
;
525
entry:
626
br label %vector.body
727

0 commit comments

Comments
 (0)