[SLP]Fix vector factor for repeated node for bv

alexey-bataev · KornevNikita · commit d8614b19e2b4 · 2025-02-25T18:10:05.000+01:00
When adding a node vector, when it is used already in the shuffle for
buildvector, need to calculate vector factor from all vector, not only
this single vector, to avoid incorrect result. Also, need to increase
stability of the reused entries detection to avoid mismatch in cost
estimation/codegen.

Fixes #123639
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12601,9 +12601,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
           VTE = *MIt;
         }
       }
-      Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
-      if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
-        continue;
+      if (none_of(TE->CombinedEntriesWithIndices,
+                  [&](const auto &P) { return P.first == VTE->Idx; })) {
+        Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
+        if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
+          continue;
+      }
       VToTEs.insert(VTE);
     }
     if (VToTEs.empty())
@@ -13737,7 +13740,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
           break;
         }
     }
-    int VF = getVF(V1);
+    unsigned VF = 0;
+    for (Value *V : InVectors)
+      VF = std::max(VF, getVF(V));
     for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
       if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
         CommonMask[Idx] = Mask[Idx] + (It == InVectors.begin() ? 0 : VF);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll
@@ -8,7 +8,7 @@
 ; YAML-NEXT: Function:        test
 ; YAML-NEXT: Args:
 ; YAML-NEXT:   - String:          'Vectorized horizontal reduction with cost '
-; YAML-NEXT:   - Cost:            '-38'
+; YAML-NEXT:   - Cost:            '-41'
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '7'
 ; YAML-NEXT: ...
@@ -17,7 +17,7 @@ define i64 @test() {
 ; CHECK-LABEL: define i64 @test(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0>, i32 0, i32 6
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 1, i32 0>, i32 0, i32 6
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> <i32 0, i32 0, i32 0, i32 1>, i64 24)
 ; CHECK-NEXT:    [[TMP3:%.*]] = sub <32 x i32> zeroinitializer, [[TMP2]]

Original file line number	Diff line number	Diff line change
`@@ -12601,9 +12601,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(`
`12601`	`12601`	`VTE = *MIt;`
`12602`	`12602`	`}`
`12603`	`12603`	`}`
`12604`		`- Instruction &LastBundleInst = getLastInstructionInBundle(VTE);`
`12605`		`- if (&LastBundleInst == TEInsertPt \|\| !CheckOrdering(&LastBundleInst))`
`12606`		`- continue;`
	`12604`	`+ if (none_of(TE->CombinedEntriesWithIndices,`
	`12605`	`+ [&](const auto &P) { return P.first == VTE->Idx; })) {`
	`12606`	`+ Instruction &LastBundleInst = getLastInstructionInBundle(VTE);`
	`12607`	`+ if (&LastBundleInst == TEInsertPt \|\| !CheckOrdering(&LastBundleInst))`
	`12608`	`+ continue;`
	`12609`	`+ }`
`12607`	`12610`	`VToTEs.insert(VTE);`
`12608`	`12611`	`}`
`12609`	`12612`	`if (VToTEs.empty())`
`@@ -13737,7 +13740,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {`
`13737`	`13740`	`break;`
`13738`	`13741`	`}`
`13739`	`13742`	`}`
`13740`		`- int VF = getVF(V1);`
	`13743`	`+ unsigned VF = 0;`
	`13744`	`+ for (Value *V : InVectors)`
	`13745`	`+ VF = std::max(VF, getVF(V));`
`13741`	`13746`	`for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)`
`13742`	`13747`	`if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)`
`13743`	`13748`	`CommonMask[Idx] = Mask[Idx] + (It == InVectors.begin() ? 0 : VF);`