Skip to content

Commit 7d56590

Browse files
committed
[SLP]Do not create copyable node, if parent node is non-schedulable and has a use in binop.
If the parent node is non-schedulable (only externally used instructions), and at least one instruction has multiple uses and used in the binop, such copyable node should be created. Otherwise, it may contain wrong def-use chain model, which cannot be effective detected. Fixes #166035
1 parent bf5332c commit 7d56590

File tree

2 files changed

+61
-0
lines changed

2 files changed

+61
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20975,6 +20975,27 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2097520975
if (isa<PHINode>(S.getMainOp()) ||
2097620976
isVectorLikeInstWithConstOps(S.getMainOp()))
2097720977
return nullptr;
20978+
// If the parent node is non-schedulable and the current node is copyable, and
20979+
// any of parent instructions are used outside several basic blocks or in
20980+
// bin-op node - cancel scheduling, it may cause wrong def-use deps in
20981+
// analysis, leading to a crash.
20982+
// Non-scheduled nodes may not have related ScheduleData model, which may lead
20983+
// to a skipped dep analysis.
20984+
if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() &&
20985+
EI.UserTE->doesNotNeedToSchedule() &&
20986+
EI.UserTE->getOpcode() != Instruction::PHI &&
20987+
any_of(EI.UserTE->Scalars, [](Value *V) {
20988+
auto *I = dyn_cast<Instruction>(V);
20989+
if (!I || I->hasOneUser())
20990+
return false;
20991+
for (User *U : I->users()) {
20992+
auto *UI = cast<Instruction>(U);
20993+
if (isa<BinaryOperator>(UI))
20994+
return true;
20995+
}
20996+
return false;
20997+
}))
20998+
return std::nullopt;
2097820999
bool HasCopyables = S.areInstructionsWithCopyableElements();
2097921000
if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
2098021001
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=slp-vectorizer -S --mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
@a = common global [100 x i64] zeroinitializer, align 64
5+
6+
define void @test() {
7+
; CHECK-LABEL: define void @test() {
8+
; CHECK-NEXT: [[ENTRY:.*]]:
9+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds nuw (i8, ptr @a, i64 48), align 8
10+
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[TMP0]], splat (i64 1)
11+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> <i32 0, i32 3>
12+
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], splat (i64 1)
13+
; CHECK-NEXT: br i1 false, label %[[LOP_RHSCNT_I_PEEL:.*]], label %[[LAND_END_I_PEEL:.*]]
14+
; CHECK: [[LOP_RHSCNT_I_PEEL]]:
15+
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], <i64 1, i64 0>
16+
; CHECK-NEXT: br label %[[LAND_END_I_PEEL]]
17+
; CHECK: [[LAND_END_I_PEEL]]:
18+
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP4]], %[[LOP_RHSCNT_I_PEEL]] ]
19+
; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 48), align 8
20+
; CHECK-NEXT: ret void
21+
;
22+
entry:
23+
%.promoted104.i = load i64, ptr getelementptr inbounds nuw (i8, ptr @a, i64 56), align 8
24+
%.promoted103.i = load i64, ptr getelementptr inbounds nuw (i8, ptr @a, i64 48), align 8
25+
%0 = add i64 %.promoted104.i, 1
26+
%1 = add i64 %.promoted103.i, 1
27+
%2 = add i64 %0, 1
28+
br i1 false, label %lop.rhscnt.i.peel, label %land.end.i.peel
29+
30+
lop.rhscnt.i.peel:
31+
%3 = or i64 %1, 1
32+
br label %land.end.i.peel
33+
34+
land.end.i.peel:
35+
%4 = phi i64 [ %2, %entry ], [ %0, %lop.rhscnt.i.peel ]
36+
%5 = phi i64 [ %1, %entry ], [ %3, %lop.rhscnt.i.peel ]
37+
store i64 %5, ptr getelementptr inbounds nuw (i8, ptr @a, i64 48), align 8
38+
store i64 %4, ptr getelementptr inbounds nuw (i8, ptr @a, i64 56), align 8
39+
ret void
40+
}

0 commit comments

Comments
 (0)