Skip to content

Commit f413772

Browse files
committed
[SLP]Fix last instruction selection for vectorized last instruction in SplitVectorize nodes
If the last instruction in the SplitVectorize node is vectorized and scheduled as part of some bundles, the SplitVectorize node might be placed in the wrong order, leading to a compiler crash. Need to check if the vectorized node has vector value and place the SplitVectorize node after the vector instruction to prevent a compile crash. Fixes issue reported in llvm#133091 (comment)
1 parent f7cc213 commit f413772

File tree

2 files changed

+109
-2
lines changed

2 files changed

+109
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15412,12 +15412,20 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1541215412

1541315413
if (E->State == TreeEntry::SplitVectorize) {
1541415414
Res = FindLastInst();
15415+
if (ArrayRef<TreeEntry *> Entries = getTreeEntries(Res); !Entries.empty()) {
15416+
for (auto *E : Entries) {
15417+
auto *I = dyn_cast_or_null<Instruction>(E->VectorizedValue);
15418+
if (!I)
15419+
I = &getLastInstructionInBundle(E);
15420+
if (Res->comesBefore(I))
15421+
Res = I;
15422+
}
15423+
}
1541515424
return *Res;
1541615425
}
1541715426

1541815427
// Set insertpoint for gathered loads to the very first load.
15419-
if (E->State != TreeEntry::SplitVectorize &&
15420-
GatheredLoadsEntriesFirst.has_value() &&
15428+
if (GatheredLoadsEntriesFirst.has_value() &&
1542115429
E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() &&
1542215430
E->getOpcode() == Instruction::Load) {
1542315431
Res = FindFirstInst();
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test(ptr %0, <8 x i8> %1) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[TMP0:%.*]], <8 x i8> [[TMP1:%.*]]) {
7+
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP0]], align 2
8+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i64 13436
9+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i64 13536
10+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i64 13437
11+
; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, ptr [[TMP4]], align 4
12+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 5, i32 0, i32 7>
13+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP3]], i32 1
14+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i8> [[TMP9]], <8 x i8> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
15+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i8> [[TMP8]], <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
16+
; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP11]], <8 x i8> [[TMP10]], i64 8)
17+
; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i8>, ptr [[TMP6]], align 1
18+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[TMP13]], <8 x i8> poison, <8 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
19+
; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[TMP7]], i64 0)
20+
; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP15]], <8 x i8> [[TMP14]], i64 8)
21+
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i8> [[TMP16]], [[TMP12]]
22+
; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP5]], align 4
23+
; CHECK-NEXT: ret void
24+
;
25+
%3 = load i8, ptr %0, align 2
26+
%4 = getelementptr i8, ptr %0, i64 13442
27+
%5 = load i8, ptr %4, align 2
28+
%6 = or i8 %5, %3
29+
%7 = getelementptr i8, ptr %0, i64 13550
30+
store i8 %6, ptr %7, align 2
31+
%8 = extractelement <8 x i8> %1, i64 0
32+
%9 = or i8 %5, %8
33+
%10 = getelementptr i8, ptr %0, i64 13542
34+
store i8 %9, ptr %10, align 2
35+
%11 = getelementptr i8, ptr %0, i64 13438
36+
%12 = load i8, ptr %11, align 2
37+
%13 = or i8 %12, %3
38+
%14 = getelementptr i8, ptr %0, i64 13546
39+
store i8 %13, ptr %14, align 2
40+
%15 = extractelement <8 x i8> %1, i64 2
41+
%16 = or i8 %12, %15
42+
%17 = getelementptr i8, ptr %0, i64 13538
43+
store i8 %16, ptr %17, align 2
44+
%18 = getelementptr i8, ptr %0, i64 13440
45+
%19 = load i8, ptr %18, align 4
46+
%20 = or i8 %19, %3
47+
%21 = getelementptr i8, ptr %0, i64 13548
48+
store i8 %20, ptr %21, align 4
49+
%22 = extractelement <8 x i8> %1, i64 4
50+
%23 = or i8 %19, %22
51+
%24 = getelementptr i8, ptr %0, i64 13540
52+
store i8 %23, ptr %24, align 4
53+
%25 = getelementptr i8, ptr %0, i64 13436
54+
%26 = load i8, ptr %25, align 4
55+
%27 = getelementptr i8, ptr %0, i64 13444
56+
%28 = load i8, ptr %27, align 4
57+
%29 = or i8 %28, %26
58+
%30 = getelementptr i8, ptr %0, i64 13544
59+
store i8 %29, ptr %30, align 4
60+
%31 = or i8 %26, %8
61+
%32 = getelementptr i8, ptr %0, i64 13536
62+
store i8 %31, ptr %32, align 4
63+
%33 = getelementptr i8, ptr %0, i64 13443
64+
%34 = load i8, ptr %33, align 1
65+
%35 = or i8 %34, %3
66+
%36 = getelementptr i8, ptr %0, i64 13551
67+
store i8 %35, ptr %36, align 1
68+
%37 = extractelement <8 x i8> %1, i64 7
69+
%38 = or i8 %34, %37
70+
%39 = getelementptr i8, ptr %0, i64 13543
71+
store i8 %38, ptr %39, align 1
72+
%40 = getelementptr i8, ptr %0, i64 13439
73+
%41 = load i8, ptr %40, align 1
74+
%42 = or i8 %41, %3
75+
%43 = getelementptr i8, ptr %0, i64 13547
76+
store i8 %42, ptr %43, align 1
77+
%44 = extractelement <8 x i8> %1, i64 3
78+
%45 = or i8 %41, %44
79+
%46 = getelementptr i8, ptr %0, i64 13539
80+
store i8 %45, ptr %46, align 1
81+
%47 = getelementptr i8, ptr %0, i64 13441
82+
%48 = load i8, ptr %47, align 1
83+
%49 = or i8 %48, %3
84+
%50 = getelementptr i8, ptr %0, i64 13549
85+
store i8 %49, ptr %50, align 1
86+
%51 = extractelement <8 x i8> %1, i64 5
87+
%52 = or i8 %48, %51
88+
%53 = getelementptr i8, ptr %0, i64 13541
89+
store i8 %52, ptr %53, align 1
90+
%54 = getelementptr i8, ptr %0, i64 13437
91+
%55 = load i8, ptr %54, align 1
92+
%56 = or i8 %55, %3
93+
%57 = getelementptr i8, ptr %0, i64 13545
94+
store i8 %56, ptr %57, align 1
95+
%58 = or i8 %55, %8
96+
%59 = getelementptr i8, ptr %0, i64 13537
97+
store i8 %58, ptr %59, align 1
98+
ret void
99+
}

0 commit comments

Comments
 (0)