Skip to content

Commit 75ef0be

Browse files
committed
[SLP]Be careful when trying match/vectorize copyable nodes with external uses only
Need to be careful when trying to match and/or build copyable node with the instructions, used outside the block only and if their operands immediately precede such instructions. In this case insertion point might be the same and it may cause broken def-use chain. Fixes llvm#167366
1 parent 7cd9d3d commit 75ef0be

File tree

2 files changed

+90
-1
lines changed

2 files changed

+90
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16844,6 +16844,16 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1684416844
}
1684516845
return false;
1684616846
};
16847+
auto CheckNonSchedulableOrdering = [&](const TreeEntry *E,
16848+
Instruction *InsertPt) {
16849+
return TEUseEI && TEUseEI.UserTE && TEUseEI.UserTE->hasCopyableElements() &&
16850+
!TEUseEI.UserTE->isCopyableElement(
16851+
const_cast<Instruction *>(TEInsertPt)) &&
16852+
isUsedOutsideBlock(const_cast<Instruction *>(TEInsertPt)) &&
16853+
InsertPt->getNextNode() == TEInsertPt &&
16854+
(!E->hasCopyableElements() || !E->isCopyableElement(InsertPt) ||
16855+
!isUsedOutsideBlock(InsertPt));
16856+
};
1684716857
for (Value *V : VL) {
1684816858
if (isConstant(V) || !VisitedValue.insert(V).second)
1684916859
continue;
@@ -16926,6 +16936,11 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1692616936
// The node is reused - exit.
1692716937
if (CheckAndUseSameNode(TEPtr))
1692816938
break;
16939+
// The parent node is copyable with last inst used outside? And the last
16940+
// inst is the next inst for the lastinst of TEPtr? Exit, if yes, to
16941+
// preserve def-use chain.
16942+
if (CheckNonSchedulableOrdering(UseEI.UserTE, InsertPt))
16943+
continue;
1692916944
VToTEs.insert(TEPtr);
1693016945
}
1693116946
if (ArrayRef<TreeEntry *> VTEs = getSplitTreeEntries(V); !VTEs.empty()) {
@@ -16961,7 +16976,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1696116976
if (none_of(TE->CombinedEntriesWithIndices,
1696216977
[&](const auto &P) { return P.first == VTE->Idx; })) {
1696316978
Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
16964-
if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
16979+
if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst) ||
16980+
CheckNonSchedulableOrdering(VTE, &LastBundleInst))
1696516981
continue;
1696616982
}
1696716983
// The node is reused - exit.
@@ -21003,6 +21019,22 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2100321019
return isUsedOutsideBlock(V);
2100421020
}))
2100521021
return std::nullopt;
21022+
// If any instruction is used outside block only and its operand is placed
21023+
// immediately before it, do not schedule, it may cause wrong def-use chain.
21024+
if (S.areInstructionsWithCopyableElements() && any_of(VL, [&](Value *V) {
21025+
if (isa<PoisonValue>(V) || S.isCopyableElement(V))
21026+
return false;
21027+
if (isUsedOutsideBlock(V)) {
21028+
for (Value *Op : cast<Instruction>(V)->operands()) {
21029+
auto *I = dyn_cast<Instruction>(Op);
21030+
if (!I)
21031+
continue;
21032+
return SLP->isVectorized(I) && I->getNextNode() == V;
21033+
}
21034+
}
21035+
return false;
21036+
}))
21037+
return std::nullopt;
2100621038
bool HasCopyables = S.areInstructionsWithCopyableElements();
2100721039
if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
2100821040
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-9999 < %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: [[BB:.*]]:
7+
; CHECK-NEXT: br label %[[BB1:.*]]
8+
; CHECK: [[BB1]]:
9+
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ], [ [[TMP6:%.*]], %[[BB14:.*]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB10:.*]] ]
10+
; CHECK-NEXT: br label %[[BB3:.*]]
11+
; CHECK: [[BB3]]:
12+
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ zeroinitializer, %[[BB1]] ]
13+
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ [[TMP0]], %[[BB1]] ]
14+
; CHECK-NEXT: br label %[[BB10]]
15+
; CHECK: [[BB10]]:
16+
; CHECK-NEXT: [[PHI12:%.*]] = phi float [ 0.000000e+00, %[[BB3]] ], [ 0.000000e+00, %[[BB14]] ]
17+
; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB3]] ], [ [[TMP7:%.*]], %[[BB14]] ]
18+
; CHECK-NEXT: switch i32 0, label %[[BB14]] [
19+
; CHECK-NEXT: i32 0, label %[[BB1]]
20+
; CHECK-NEXT: ]
21+
; CHECK: [[BB14]]:
22+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> [[TMP3]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 6>
23+
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP3]], [[TMP4]]
24+
; CHECK-NEXT: [[TMP6]] = or <4 x i32> [[TMP5]], <i32 poison, i32 poison, i32 0, i32 0>
25+
; CHECK-NEXT: [[TMP7]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 7>
26+
; CHECK-NEXT: br i1 false, label %[[BB1]], label %[[BB10]]
27+
;
28+
bb:
29+
br label %bb1
30+
31+
bb1:
32+
%phi = phi i32 [ 0, %bb ], [ %or16, %bb14 ], [ 0, %bb10 ]
33+
%phi2 = phi i32 [ 0, %bb ], [ %or15, %bb14 ], [ 0, %bb10 ]
34+
br label %bb3
35+
36+
bb3: ; preds = %bb1
37+
%phi4 = phi i32 [ poison, %bb1 ]
38+
%phi6 = phi i32 [ poison, %bb1 ]
39+
%phi7 = phi i32 [ %phi, %bb1 ]
40+
%phi9 = phi i32 [ %phi2, %bb1 ]
41+
%0 = phi <2 x float> [ zeroinitializer, %bb1 ]
42+
br label %bb10
43+
44+
bb10:
45+
%phi11 = phi i32 [ 0, %bb3 ], [ %phi11, %bb14 ]
46+
%phi12 = phi float [ 0.000000e+00, %bb3 ], [ 0.000000e+00, %bb14 ]
47+
%phi13 = phi i32 [ 0, %bb3 ], [ %or15, %bb14 ]
48+
switch i32 0, label %bb14 [
49+
i32 0, label %bb1
50+
]
51+
52+
bb14:
53+
%or = or i32 %phi13, %phi11
54+
%or15 = or i32 %or, 0
55+
%or16 = or i32 %phi11, 0
56+
br i1 false, label %bb1, label %bb10
57+
}

0 commit comments

Comments
 (0)