Skip to content

Commit 6e0c6a1

Browse files
author
git apple-llvm automerger
committed
Merge commit '9a3aedb09300' from llvm.org/main into next
2 parents ad7f011 + 9a3aedb commit 6e0c6a1

File tree

2 files changed

+59
-1
lines changed

2 files changed

+59
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20853,7 +20853,23 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2085320853
for (Value *V : VL) {
2085420854
if (S.isNonSchedulable(V))
2085520855
continue;
20856-
if (!extendSchedulingRegion(V, S)) {
20856+
// For copybales with parent nodes, which do not need to be scheduled, the
20857+
// parents should not be commutative, otherwise may incorrectly handle deps
20858+
// because of the potential reordering of commutative operations.
20859+
if ((S.isCopyableElement(V) && EI.UserTE && !EI.UserTE->isGather() &&
20860+
EI.UserTE->hasState() && EI.UserTE->doesNotNeedToSchedule() &&
20861+
any_of(EI.UserTE->Scalars,
20862+
[&](Value *V) {
20863+
if (isa<PoisonValue>(V))
20864+
return false;
20865+
auto *I = dyn_cast<Instruction>(V);
20866+
return isCommutative(
20867+
(I && EI.UserTE->isAltShuffle())
20868+
? EI.UserTE->getMatchingMainOpOrAltOp(I)
20869+
: EI.UserTE->getMainOp(),
20870+
V);
20871+
})) ||
20872+
!extendSchedulingRegion(V, S)) {
2085720873
// If the scheduling region got new instructions at the lower end (or it
2085820874
// is a new region for the first bundle). This makes it necessary to
2085920875
// recalculate all dependencies.
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=i686-unknown-linux-android29 -mattr=+sse2 < %s | FileCheck %s
3+
4+
define i64 @test(ptr %a) {
5+
; CHECK-LABEL: define i64 @test(
6+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 0, 0
8+
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A]], align 4
9+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 0
10+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 1, [[TMP1]]
11+
; CHECK-NEXT: [[TMP5:%.*]] = ashr i64 0, 1
12+
; CHECK-NEXT: [[TMP6:%.*]] = ashr i64 0, 0
13+
; CHECK-NEXT: br label %[[BB7:.*]]
14+
; CHECK: [[BB7]]:
15+
; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ [[TMP3]], [[TMP0:%.*]] ]
16+
; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ 0, [[TMP0]] ]
17+
; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ [[TMP6]], [[TMP0]] ]
18+
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP5]], [[TMP0]] ]
19+
; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ 0, [[TMP0]] ]
20+
; CHECK-NEXT: [[TMP13:%.*]] = phi i64 [ [[TMP4]], [[TMP0]] ]
21+
; CHECK-NEXT: ret i64 0
22+
;
23+
%1 = add i64 0, 0
24+
%2 = load i64, ptr %a, align 4
25+
%3 = add i64 0, 0
26+
%4 = add i64 %2, 0
27+
%5 = add i64 0, 0
28+
%6 = add i64 1, %1
29+
%7 = ashr i64 0, 1
30+
%8 = add i64 0, 0
31+
%9 = ashr i64 %8, 0
32+
br label %10
33+
34+
10:
35+
%11 = phi i64 [ %4, %0 ]
36+
%12 = phi i64 [ %3, %0 ]
37+
%13 = phi i64 [ %9, %0 ]
38+
%14 = phi i64 [ %7, %0 ]
39+
%15 = phi i64 [ %5, %0 ]
40+
%16 = phi i64 [ %6, %0 ]
41+
ret i64 0
42+
}

0 commit comments

Comments
 (0)