Skip to content

Commit e7c936e

Browse files
alexey-bataevgithub-actions[bot]
authored andcommitted
Automerge: [SLP]Do not try to schedule bundle with non-schedulable parent with commutable instructions
Commutable instruction can be reordering during tree building, and if the parent node is not scheduled, its ScheduleData elements are considered independent and compiler do not looks for reordered operands. Need to cancel scheduling of copyables in this case.
2 parents 60f28b6 + 9a3aedb commit e7c936e

File tree

2 files changed

+59
-1
lines changed

2 files changed

+59
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20853,7 +20853,23 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2085320853
for (Value *V : VL) {
2085420854
if (S.isNonSchedulable(V))
2085520855
continue;
20856-
if (!extendSchedulingRegion(V, S)) {
20856+
// For copybales with parent nodes, which do not need to be scheduled, the
20857+
// parents should not be commutative, otherwise may incorrectly handle deps
20858+
// because of the potential reordering of commutative operations.
20859+
if ((S.isCopyableElement(V) && EI.UserTE && !EI.UserTE->isGather() &&
20860+
EI.UserTE->hasState() && EI.UserTE->doesNotNeedToSchedule() &&
20861+
any_of(EI.UserTE->Scalars,
20862+
[&](Value *V) {
20863+
if (isa<PoisonValue>(V))
20864+
return false;
20865+
auto *I = dyn_cast<Instruction>(V);
20866+
return isCommutative(
20867+
(I && EI.UserTE->isAltShuffle())
20868+
? EI.UserTE->getMatchingMainOpOrAltOp(I)
20869+
: EI.UserTE->getMainOp(),
20870+
V);
20871+
})) ||
20872+
!extendSchedulingRegion(V, S)) {
2085720873
// If the scheduling region got new instructions at the lower end (or it
2085820874
// is a new region for the first bundle). This makes it necessary to
2085920875
// recalculate all dependencies.
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=i686-unknown-linux-android29 -mattr=+sse2 < %s | FileCheck %s
3+
4+
define i64 @test(ptr %a) {
5+
; CHECK-LABEL: define i64 @test(
6+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 0, 0
8+
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A]], align 4
9+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 0
10+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 1, [[TMP1]]
11+
; CHECK-NEXT: [[TMP5:%.*]] = ashr i64 0, 1
12+
; CHECK-NEXT: [[TMP6:%.*]] = ashr i64 0, 0
13+
; CHECK-NEXT: br label %[[BB7:.*]]
14+
; CHECK: [[BB7]]:
15+
; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ [[TMP3]], [[TMP0:%.*]] ]
16+
; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ 0, [[TMP0]] ]
17+
; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ [[TMP6]], [[TMP0]] ]
18+
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP5]], [[TMP0]] ]
19+
; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ 0, [[TMP0]] ]
20+
; CHECK-NEXT: [[TMP13:%.*]] = phi i64 [ [[TMP4]], [[TMP0]] ]
21+
; CHECK-NEXT: ret i64 0
22+
;
23+
%1 = add i64 0, 0
24+
%2 = load i64, ptr %a, align 4
25+
%3 = add i64 0, 0
26+
%4 = add i64 %2, 0
27+
%5 = add i64 0, 0
28+
%6 = add i64 1, %1
29+
%7 = ashr i64 0, 1
30+
%8 = add i64 0, 0
31+
%9 = ashr i64 %8, 0
32+
br label %10
33+
34+
10:
35+
%11 = phi i64 [ %4, %0 ]
36+
%12 = phi i64 [ %3, %0 ]
37+
%13 = phi i64 [ %9, %0 ]
38+
%14 = phi i64 [ %7, %0 ]
39+
%15 = phi i64 [ %5, %0 ]
40+
%16 = phi i64 [ %6, %0 ]
41+
ret i64 0
42+
}

0 commit comments

Comments
 (0)