Skip to content

Commit 57947ac

Browse files
committed
[SLP]Correctly set the insert point for insertlements with copyable arguments
Need to find the last insertelement instruction in the list for the copyable arguments, otherwise wrong def-use chain may be built Fixes #160671
1 parent d77d3a7 commit 57947ac

File tree

2 files changed

+75
-2
lines changed

2 files changed

+75
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17519,7 +17519,9 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1751917519
return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
1752017520
})) ||
1752117521
all_of(E->Scalars, [&](Value *V) {
17522-
return isa<PoisonValue>(V) || E->isCopyableElement(V) ||
17522+
return isa<PoisonValue>(V) ||
17523+
(E->Idx == 0 && isa<InsertElementInst>(V)) ||
17524+
E->isCopyableElement(V) ||
1752317525
(!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V));
1752417526
}))
1752517527
Res = FindLastInst();
@@ -19119,7 +19121,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1911919121
}
1912019122
case Instruction::InsertElement: {
1912119123
assert(E->ReuseShuffleIndices.empty() && "All inserts should be unique");
19122-
Builder.SetInsertPoint(cast<Instruction>(E->Scalars.back()));
19124+
if (const TreeEntry *OpE = getOperandEntry(E, 1);
19125+
OpE && !OpE->isGather() && OpE->hasState() &&
19126+
!OpE->hasCopyableElements())
19127+
Builder.SetInsertPoint(cast<Instruction>(E->Scalars.back()));
19128+
else
19129+
setInsertPointAfterBundle(E);
1912319130
Value *V = vectorizeOperand(E, 1);
1912419131
ArrayRef<Value *> Op = E->getOperand(1);
1912519132
Type *ScalarTy = Op.front()->getType();
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
3+
4+
define i64 @test(i32 %arg) {
5+
; CHECK-LABEL: define i64 @test(
6+
; CHECK-SAME: i32 [[ARG:%.*]]) {
7+
; CHECK-NEXT: [[BB:.*:]]
8+
; CHECK-NEXT: [[FREEZE:%.*]] = freeze i32 0
9+
; CHECK-NEXT: br i1 false, label %[[BB1:.*]], label %[[BB1]]
10+
; CHECK: [[BB1]]:
11+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) null, align 4
12+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[ARG]], i32 3
13+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[FREEZE]], 0
14+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[FREEZE]], i32 0
15+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
16+
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], zeroinitializer
17+
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], zeroinitializer
18+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[ARG]], i32 0
19+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[LOAD]], i32 1
20+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <2 x i32> [[TMP4]], [[TMP6]]
21+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
22+
; CHECK-NEXT: [[AND:%.*]] = and i1 [[TMP8]], false
23+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
24+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> <i32 0, i32 poison, i32 poison, i32 0>, <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
25+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i32> [[TMP10]], [[TMP0]]
26+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ult <4 x i32> [[TMP10]], [[TMP0]]
27+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i1> [[TMP11]], <4 x i1> [[TMP12]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
28+
; CHECK-NEXT: br i1 false, label %[[BB11:.*]], label %[[BB12:.*]]
29+
; CHECK: [[BB11]]:
30+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[ADD]] to i64
31+
; CHECK-NEXT: ret i64 0
32+
; CHECK: [[BB12]]:
33+
; CHECK-NEXT: [[ZEXT13:%.*]] = zext i32 [[ADD]] to i64
34+
; CHECK-NEXT: ret i64 0
35+
;
36+
bb:
37+
%freeze = freeze i32 0
38+
br i1 false, label %bb1, label %bb1
39+
40+
bb1:
41+
%load = load i32, ptr addrspace(1) null, align 4
42+
%0 = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 0>, i32 %freeze, i32 2
43+
%1 = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 %arg, i32 3
44+
%add = add i32 %freeze, 0
45+
%2 = insertelement <2 x i32> poison, i32 %freeze, i32 0
46+
%3 = shufflevector <2 x i32> %2, <2 x i32> poison, <2 x i32> zeroinitializer
47+
%4 = add <2 x i32> %3, zeroinitializer
48+
%5 = insertelement <2 x i32> poison, i32 %arg, i32 0
49+
%6 = insertelement <2 x i32> %5, i32 %load, i32 1
50+
%7 = icmp ult <2 x i32> %4, %6
51+
%8 = extractelement <2 x i1> %7, i32 0
52+
%and = and i1 %8, false
53+
%9 = insertelement <4 x i32> %0, i32 %add, i32 1
54+
%10 = icmp eq <4 x i32> %9, %1
55+
%11 = icmp ult <4 x i32> %9, %1
56+
%12 = shufflevector <4 x i1> %10, <4 x i1> %11, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
57+
br i1 false, label %bb11, label %bb12
58+
59+
bb11:
60+
%zext = zext i32 %add to i64
61+
ret i64 0
62+
63+
bb12:
64+
%zext13 = zext i32 %add to i64
65+
ret i64 0
66+
}

0 commit comments

Comments
 (0)