Skip to content

Commit d8614b1

Browse files
alexey-bataevKornevNikita
authored andcommitted
[SLP]Fix vector factor for repeated node for bv
When adding a node vector, when it is used already in the shuffle for buildvector, need to calculate vector factor from all vector, not only this single vector, to avoid incorrect result. Also, need to increase stability of the reused entries detection to avoid mismatch in cost estimation/codegen. Fixes #123639
1 parent 461398e commit d8614b1

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12601,9 +12601,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1260112601
VTE = *MIt;
1260212602
}
1260312603
}
12604-
Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
12605-
if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
12606-
continue;
12604+
if (none_of(TE->CombinedEntriesWithIndices,
12605+
[&](const auto &P) { return P.first == VTE->Idx; })) {
12606+
Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
12607+
if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
12608+
continue;
12609+
}
1260712610
VToTEs.insert(VTE);
1260812611
}
1260912612
if (VToTEs.empty())
@@ -13737,7 +13740,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1373713740
break;
1373813741
}
1373913742
}
13740-
int VF = getVF(V1);
13743+
unsigned VF = 0;
13744+
for (Value *V : InVectors)
13745+
VF = std::max(VF, getVF(V));
1374113746
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
1374213747
if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
1374313748
CommonMask[Idx] = Mask[Idx] + (It == InVectors.begin() ? 0 : VF);

llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
; YAML-NEXT: Function: test
99
; YAML-NEXT: Args:
1010
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
11-
; YAML-NEXT: - Cost: '-38'
11+
; YAML-NEXT: - Cost: '-41'
1212
; YAML-NEXT: - String: ' and with tree size '
1313
; YAML-NEXT: - TreeSize: '7'
1414
; YAML-NEXT: ...
@@ -17,7 +17,7 @@ define i64 @test() {
1717
; CHECK-LABEL: define i64 @test(
1818
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
1919
; CHECK-NEXT: [[ENTRY:.*:]]
20-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0>, i32 0, i32 6
20+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 1, i32 0>, i32 0, i32 6
2121
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8)
2222
; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> <i32 0, i32 0, i32 0, i32 1>, i64 24)
2323
; CHECK-NEXT: [[TMP3:%.*]] = sub <32 x i32> zeroinitializer, [[TMP2]]

0 commit comments

Comments
 (0)