Skip to content

Commit db6ba82

Browse files
committed
[SLP] Do not match the gather node with copyable parent, containing insert instruction
If the gather/buildvector node has the match and this matching node has a scheduled copyable parent, and the parent node of the original node has a last instruction, which is non-schedulable and is part of the schedule copyable parent, such matching node should be excluded as non-matching, since it produces wrong def-use chain. Fixes llvm#165435
1 parent 17c6c8d commit db6ba82

File tree

2 files changed

+53
-1
lines changed

2 files changed

+53
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16920,7 +16920,10 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1692016920
// otherwise TEPtr depends on TE.
1692116921
if ((TEInsertBlock != InsertPt->getParent() ||
1692216922
TEUseEI.EdgeIdx < UseEI.EdgeIdx || TEUseEI.UserTE != UseEI.UserTE) &&
16923-
!CheckOrdering(InsertPt))
16923+
(!CheckOrdering(InsertPt) ||
16924+
(UseEI.UserTE->hasCopyableElements() &&
16925+
isUsedOutsideBlock(const_cast<Instruction *>(TEInsertPt)) &&
16926+
is_contained(UseEI.UserTE->Scalars, TEInsertPt))))
1692416927
continue;
1692516928
// The node is reused - exit.
1692616929
if (CheckAndUseSameNode(TEPtr))
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define double @test() {
5+
; CHECK-LABEL: define double @test() {
6+
; CHECK-NEXT: [[BB:.*]]:
7+
; CHECK-NEXT: br label %[[BB1:.*]]
8+
; CHECK: [[BB1]]:
9+
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP3:%.*]], %[[BB4:.*]] ]
10+
; CHECK-NEXT: br label %[[BB4]]
11+
; CHECK: [[BB4]]:
12+
; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 1
13+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
14+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[MUL]], i32 0
15+
; CHECK-NEXT: [[TMP3]] = or <4 x i32> [[TMP1]], [[TMP2]]
16+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 1>, i32 [[MUL]], i32 0
17+
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP0]], [[TMP4]]
18+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
19+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP6]], 0
20+
; CHECK-NEXT: br i1 false, label %[[BB7:.*]], label %[[BB1]]
21+
; CHECK: [[BB7]]:
22+
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ [[TMP5]], %[[BB4]] ]
23+
; CHECK-NEXT: ret double 0.000000e+00
24+
;
25+
bb:
26+
br label %bb1
27+
28+
bb1:
29+
%phi = phi i32 [ 0, %bb ], [ 0, %bb4 ]
30+
%phi2 = phi i32 [ 0, %bb ], [ 0, %bb4 ]
31+
%phi3 = phi i32 [ 0, %bb ], [ %or5, %bb4 ]
32+
br label %bb4
33+
34+
bb4:
35+
%or = or i32 %phi2, 0
36+
%mul = mul i32 0, 1
37+
%or5 = or i32 %phi3, %mul
38+
%and = and i32 %or, 0
39+
%or6 = or i32 %phi2, 1
40+
br i1 false, label %bb7, label %bb1
41+
42+
bb7:
43+
%phi8 = phi i32 [ %phi, %bb4 ]
44+
%phi9 = phi i32 [ %or, %bb4 ]
45+
%phi10 = phi i32 [ %or5, %bb4 ]
46+
%phi11 = phi i32 [ %or6, %bb4 ]
47+
ret double 0.000000e+00
48+
}
49+

0 commit comments

Comments
 (0)