Skip to content

Commit dcc89b6

Browse files
alexey-bataevgithub-actions[bot]
authored andcommitted
Automerge: [SLP]Recalculate number of parts when requesting number of elements based on original scalars size
Need to recalculate number of parts, since gathered scalar size might be changed during building the buildvector shuffles. Fixes #125259
2 parents 9ae3331 + d9c9326 commit dcc89b6

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15094,7 +15094,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1509415094
}
1509515095
}
1509615096
if (!GatherShuffles.empty()) {
15097-
unsigned SliceSize = getPartNumElems(E->Scalars.size(), NumParts);
15097+
unsigned SliceSize =
15098+
getPartNumElems(E->Scalars.size(),
15099+
::getNumberOfParts(*TTI, VecTy, E->Scalars.size()));
1509815100
SmallVector<int> VecMask(Mask.size(), PoisonMaskElem);
1509915101
for (const auto [I, TEs] : enumerate(Entries)) {
1510015102
if (TEs.empty()) {

llvm/test/Transforms/SLPVectorizer/X86/gathered-shuffle-resized.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ define ptr @test(ptr %0, ptr %args_gep) {
1818
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !noalias [[META0:![0-9]+]]
1919
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8, !noalias [[META0]]
2020
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
21-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <16 x i32> poison
21+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
2222
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
23-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
23+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
2424
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <16 x i32> [[TMP13]], zeroinitializer
2525
; CHECK-NEXT: [[TMP15:%.*]] = zext <16 x i1> [[TMP14]] to <16 x i8>
2626
; CHECK-NEXT: store <16 x i8> [[TMP15]], ptr [[TMP5]], align 1

0 commit comments

Comments
 (0)