Skip to content

Commit 8f16837

Browse files
committed
[SLP]Support non-ordered copyable argument in non-commutative instructions
If the non-commutative user has several same operands and at least one of them (but not the first) is copyable, need to consider this opportunity when calculating the number of dependencies. Otherwise, the schedule bundle might be not scheduled correctly and cause a compiler crash Fixes llvm#162925
1 parent d72cd24 commit 8f16837

File tree

2 files changed

+65
-28
lines changed

2 files changed

+65
-28
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5291,19 +5291,19 @@ class BoUpSLP {
52915291
// data.
52925292
for (TreeEntry *TE : Entries) {
52935293
// Check if the user is commutative.
5294-
// The commutatives are handled later, as their oeprands can be
5294+
// The commutatives are handled later, as their operands can be
52955295
// reordered.
52965296
// Same applies even for non-commutative cmps, because we can invert
52975297
// their predicate potentially and, thus, reorder the operands.
52985298
bool IsCommutativeUser =
52995299
::isCommutative(User) ||
53005300
::isCommutative(TE->getMatchingMainOpOrAltOp(User), User);
5301-
EdgeInfo EI(TE, U.getOperandNo());
53025301
if (!IsCommutativeUser && !isa<CmpInst>(User)) {
53035302
unsigned &OpCnt =
53045303
OrderedEntriesCount.try_emplace(TE, 0).first->getSecond();
5304+
EdgeInfo EI(TE, U.getOperandNo());
53055305
if (!getScheduleCopyableData(EI, Op) && OpCnt < NumOps)
5306-
return false;
5306+
continue;
53075307
// Found copyable operand - continue.
53085308
++OpCnt;
53095309
continue;
@@ -5312,33 +5312,38 @@ class BoUpSLP {
53125312
.first->getSecond();
53135313
}
53145314
}
5315-
// Check the commutative/cmp entries.
5316-
if (!PotentiallyReorderedEntriesCount.empty()) {
5317-
for (auto &P : PotentiallyReorderedEntriesCount) {
5318-
auto *It = find(P.first->Scalars, User);
5319-
assert(It != P.first->Scalars.end() &&
5320-
"User is not in the tree entry");
5321-
int Lane = std::distance(P.first->Scalars.begin(), It);
5322-
assert(Lane >= 0 && "Lane is not found");
5323-
if (isa<StoreInst>(User) && !P.first->ReorderIndices.empty())
5324-
Lane = P.first->ReorderIndices[Lane];
5325-
assert(Lane < static_cast<int>(P.first->Scalars.size()) &&
5326-
"Couldn't find extract lane");
5327-
SmallVector<unsigned> OpIndices;
5328-
for (unsigned OpIdx :
5329-
seq<unsigned>(::getNumberOfPotentiallyCommutativeOps(
5330-
P.first->getMainOp()))) {
5331-
if (P.first->getOperand(OpIdx)[Lane] == Op &&
5332-
getScheduleCopyableData(EdgeInfo(P.first, OpIdx), Op))
5333-
--P.getSecond();
5334-
}
5335-
}
5336-
return all_of(PotentiallyReorderedEntriesCount,
5315+
if (PotentiallyReorderedEntriesCount.empty())
5316+
return all_of(OrderedEntriesCount,
53375317
[&](const std::pair<const TreeEntry *, unsigned> &P) {
5338-
return P.second == NumOps - 1;
5318+
return P.second == NumOps;
53395319
});
5340-
}
5341-
return true;
5320+
// Check the commutative/cmp entries.
5321+
for (auto &P : PotentiallyReorderedEntriesCount) {
5322+
auto *It = find(P.first->Scalars, User);
5323+
assert(It != P.first->Scalars.end() && "User is not in the tree entry");
5324+
int Lane = std::distance(P.first->Scalars.begin(), It);
5325+
assert(Lane >= 0 && "Lane is not found");
5326+
if (isa<StoreInst>(User) && !P.first->ReorderIndices.empty())
5327+
Lane = P.first->ReorderIndices[Lane];
5328+
assert(Lane < static_cast<int>(P.first->Scalars.size()) &&
5329+
"Couldn't find extract lane");
5330+
SmallVector<unsigned> OpIndices;
5331+
for (unsigned OpIdx :
5332+
seq<unsigned>(::getNumberOfPotentiallyCommutativeOps(
5333+
P.first->getMainOp()))) {
5334+
if (P.first->getOperand(OpIdx)[Lane] == Op &&
5335+
getScheduleCopyableData(EdgeInfo(P.first, OpIdx), Op))
5336+
--P.getSecond();
5337+
}
5338+
}
5339+
return all_of(PotentiallyReorderedEntriesCount,
5340+
[&](const std::pair<const TreeEntry *, unsigned> &P) {
5341+
return P.second == NumOps - 1;
5342+
}) &&
5343+
all_of(OrderedEntriesCount,
5344+
[&](const std::pair<const TreeEntry *, unsigned> &P) {
5345+
return P.second == NumOps;
5346+
});
53425347
}
53435348

53445349
SmallVector<ScheduleCopyableData *>
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999 -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
3+
4+
define i32 @main(ptr %q, ptr %a, i8 %.pre) {
5+
; CHECK-LABEL: define i32 @main(
6+
; CHECK-SAME: ptr [[Q:%.*]], ptr [[A:%.*]], i8 [[DOTPRE:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[DOTPRE1:%.*]] = load i8, ptr [[Q]], align 1
9+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[DOTPRE]], i32 0
10+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[DOTPRE1]], i32 1
11+
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
12+
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], <i32 0, i32 1>
13+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> <i32 poison, i32 1>, <2 x i32> <i32 0, i32 3>
14+
; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i32> [[TMP4]], [[TMP3]]
15+
; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16>
16+
; CHECK-NEXT: store <2 x i16> [[TMP6]], ptr [[A]], align 2
17+
; CHECK-NEXT: ret i32 0
18+
;
19+
entry:
20+
%.pre1 = load i8, ptr %q, align 1
21+
%conv11.i = sext i8 %.pre to i32
22+
%shl18.i = shl i32 %conv11.i, %conv11.i
23+
%conv19.i = trunc i32 %shl18.i to i16
24+
store i16 %conv19.i, ptr %a, align 2
25+
%0 = sext i8 %.pre1 to i32
26+
%1 = add i32 %0, 1
27+
%shl18.i.1 = shl i32 1, %1
28+
%conv19.i.1 = trunc i32 %shl18.i.1 to i16
29+
%arrayidx21.i.1 = getelementptr i8, ptr %a, i64 2
30+
store i16 %conv19.i.1, ptr %arrayidx21.i.1, align 2
31+
ret i32 0
32+
}

0 commit comments

Comments
 (0)