Skip to content

Commit 9a3aedb

Browse files
committed
[SLP]Do not try to schedule bundle with non-schedulable parent with commutable instructions
Commutable instruction can be reordering during tree building, and if the parent node is not scheduled, its ScheduleData elements are considered independent and compiler do not looks for reordered operands. Need to cancel scheduling of copyables in this case.
1 parent 54c304c commit 9a3aedb

File tree

2 files changed

+59
-1
lines changed

2 files changed

+59
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20853,7 +20853,23 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2085320853
for (Value *V : VL) {
2085420854
if (S.isNonSchedulable(V))
2085520855
continue;
20856-
if (!extendSchedulingRegion(V, S)) {
20856+
// For copybales with parent nodes, which do not need to be scheduled, the
20857+
// parents should not be commutative, otherwise may incorrectly handle deps
20858+
// because of the potential reordering of commutative operations.
20859+
if ((S.isCopyableElement(V) && EI.UserTE && !EI.UserTE->isGather() &&
20860+
EI.UserTE->hasState() && EI.UserTE->doesNotNeedToSchedule() &&
20861+
any_of(EI.UserTE->Scalars,
20862+
[&](Value *V) {
20863+
if (isa<PoisonValue>(V))
20864+
return false;
20865+
auto *I = dyn_cast<Instruction>(V);
20866+
return isCommutative(
20867+
(I && EI.UserTE->isAltShuffle())
20868+
? EI.UserTE->getMatchingMainOpOrAltOp(I)
20869+
: EI.UserTE->getMainOp(),
20870+
V);
20871+
})) ||
20872+
!extendSchedulingRegion(V, S)) {
2085720873
// If the scheduling region got new instructions at the lower end (or it
2085820874
// is a new region for the first bundle). This makes it necessary to
2085920875
// recalculate all dependencies.
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=i686-unknown-linux-android29 -mattr=+sse2 < %s | FileCheck %s
3+
4+
define i64 @test(ptr %a) {
5+
; CHECK-LABEL: define i64 @test(
6+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 0, 0
8+
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A]], align 4
9+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 0
10+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 1, [[TMP1]]
11+
; CHECK-NEXT: [[TMP5:%.*]] = ashr i64 0, 1
12+
; CHECK-NEXT: [[TMP6:%.*]] = ashr i64 0, 0
13+
; CHECK-NEXT: br label %[[BB7:.*]]
14+
; CHECK: [[BB7]]:
15+
; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ [[TMP3]], [[TMP0:%.*]] ]
16+
; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ 0, [[TMP0]] ]
17+
; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ [[TMP6]], [[TMP0]] ]
18+
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP5]], [[TMP0]] ]
19+
; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ 0, [[TMP0]] ]
20+
; CHECK-NEXT: [[TMP13:%.*]] = phi i64 [ [[TMP4]], [[TMP0]] ]
21+
; CHECK-NEXT: ret i64 0
22+
;
23+
%1 = add i64 0, 0
24+
%2 = load i64, ptr %a, align 4
25+
%3 = add i64 0, 0
26+
%4 = add i64 %2, 0
27+
%5 = add i64 0, 0
28+
%6 = add i64 1, %1
29+
%7 = ashr i64 0, 1
30+
%8 = add i64 0, 0
31+
%9 = ashr i64 %8, 0
32+
br label %10
33+
34+
10:
35+
%11 = phi i64 [ %4, %0 ]
36+
%12 = phi i64 [ %3, %0 ]
37+
%13 = phi i64 [ %9, %0 ]
38+
%14 = phi i64 [ %7, %0 ]
39+
%15 = phi i64 [ %5, %0 ]
40+
%16 = phi i64 [ %6, %0 ]
41+
ret i64 0
42+
}

0 commit comments

Comments
 (0)