Skip to content

Commit 3f53d36

Browse files
committed
[SLP][REVEC] Fix type comparison and mask transformation for REVEC.
When REVEC is enabled, ScalarTy may be a FixedVectorType. Compare its element type to decide if casting is needed. Also apply mask transformation accordingly.
1 parent 1d3cbe7 commit 3f53d36

File tree

2 files changed

+36
-3
lines changed

2 files changed

+36
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16998,7 +16998,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1699816998
});
1699916999
return IsSigned;
1700017000
};
17001-
if (cast<VectorType>(Op1->getType())->getElementType() != ScalarTy) {
17001+
if (cast<VectorType>(Op1->getType())->getElementType() !=
17002+
ScalarTy->getScalarType()) {
1700217003
assert(ScalarTy->isIntegerTy() && "Expected item in MinBWs.");
1700317004
Op1 = Builder.CreateIntCast(
1700417005
Op1,
@@ -17007,7 +17008,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1700717008
cast<FixedVectorType>(Op1->getType())->getNumElements()),
1700817009
GetOperandSignedness(&OpTE1));
1700917010
}
17010-
if (cast<VectorType>(Op2->getType())->getElementType() != ScalarTy) {
17011+
if (cast<VectorType>(Op2->getType())->getElementType() !=
17012+
ScalarTy->getScalarType()) {
1701117013
assert(ScalarTy->isIntegerTy() && "Expected item in MinBWs.");
1701217014
Op2 = Builder.CreateIntCast(
1701317015
Op2,
@@ -17022,9 +17024,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1702217024
Mask.begin(),
1702317025
std::next(Mask.begin(), E->CombinedEntriesWithIndices.back().second),
1702417026
0);
17027+
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
17028+
if (ScalarTyNumElements != 1) {
17029+
assert(SLPReVec && "Only supported by REVEC.");
17030+
transformScalarShuffleIndiciesToVector(ScalarTyNumElements, Mask);
17031+
}
1702517032
Value *Vec = Builder.CreateShuffleVector(Op1, Mask);
1702617033
Vec = createInsertVector(Builder, Vec, Op2,
17027-
E->CombinedEntriesWithIndices.back().second);
17034+
E->CombinedEntriesWithIndices.back().second *
17035+
ScalarTyNumElements);
1702817036
E->VectorizedValue = Vec;
1702917037
return Vec;
1703017038
}

llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,31 @@
22
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -passes=slp-vectorizer -S -slp-revec < %s | FileCheck %s
33

44
define void @test() {
5+
; CHECK-LABEL: @test(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[TMP0:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> poison, <4 x i32> zeroinitializer, i64 0)
8+
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP0]], <4 x i32> zeroinitializer, i64 4)
9+
; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> zeroinitializer, i64 8)
10+
; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP2]], <4 x i32> zeroinitializer, i64 12)
11+
; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP3]], <4 x i32> zeroinitializer, i64 16)
12+
; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP4]], <4 x i32> zeroinitializer, i64 20)
13+
; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP5]], <4 x i32> zeroinitializer, i64 24)
14+
; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP6]], <4 x i32> zeroinitializer, i64 28)
15+
; CHECK-NEXT: [[TMP8:%.*]] = trunc <32 x i32> [[TMP7]] to <32 x i1>
16+
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> poison, <4 x i1> zeroinitializer, i64 0)
17+
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> [[TMP9]], <4 x i1> zeroinitializer, i64 4)
18+
; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> [[TMP10]], <4 x i1> zeroinitializer, i64 8)
19+
; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> [[TMP11]], <4 x i1> zeroinitializer, i64 12)
20+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
21+
; CHECK: vector.body:
22+
; CHECK-NEXT: [[TMP13:%.*]] = phi <32 x i1> [ [[TMP8]], [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
23+
; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP12]], <16 x i1> [[TMP12]], <16 x i1> [[TMP12]]
24+
; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i1> [[TMP12]], [[TMP12]]
25+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i1> [[TMP14]], <16 x i1> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
26+
; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v16i1(<32 x i1> [[TMP16]], <16 x i1> [[TMP15]], i64 16)
27+
; CHECK-NEXT: [[TMP18]] = or <32 x i1> [[TMP13]], [[TMP17]]
28+
; CHECK-NEXT: br label [[VECTOR_BODY]]
29+
;
530
entry:
631
br label %vector.body
732

0 commit comments

Comments
 (0)