Skip to content

Commit e5a4ea2

Browse files
committed
[SLP]Do not remove reduced value, if it is a copyable
If the value is checked for the reduction and it is a copyable element in a root node, it should not be deleted, since it may still be used after vectorization. Fixes llvm#155512
1 parent d63dd5e commit e5a4ea2

File tree

2 files changed

+42
-1
lines changed

2 files changed

+42
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20529,7 +20529,9 @@ Value *BoUpSLP::vectorizeTree(
2052920529
!(GatheredLoadsEntriesFirst.has_value() &&
2053020530
IE->Idx >= *GatheredLoadsEntriesFirst &&
2053120531
VectorizableTree.front()->isGather() &&
20532-
is_contained(VectorizableTree.front()->Scalars, I)))
20532+
is_contained(VectorizableTree.front()->Scalars, I)) &&
20533+
!(!VectorizableTree.front()->isGather() &&
20534+
VectorizableTree.front()->isCopyableElement(I)))
2053320535
continue;
2053420536
SmallVector<SelectInst *> LogicalOpSelects;
2053520537
I->replaceUsesWithIf(PoisonValue::get(I->getType()), [&](Use &U) {
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
3+
4+
@n = external global [0 x i64]
5+
6+
define i32 @main() {
7+
; CHECK-LABEL: define i32 @main(
8+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr align 8 @n, i64 32, <2 x i1> splat (i1 true), i32 2)
11+
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i32>
12+
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> zeroinitializer)
13+
; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
14+
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[TMP3]], <2 x i64> splat (i64 17179869184))
15+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <2 x i64> [[TMP4]] to <2 x i32>
16+
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP5]], <i32 0, i32 1>
17+
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP6]])
18+
; CHECK-NEXT: ret i32 [[TMP7]]
19+
;
20+
entry:
21+
%0 = load i64, ptr getelementptr (i8, ptr @n, i64 32), align 8
22+
%conv13.i.1 = trunc i64 %0 to i32
23+
%cond.i.1 = tail call i32 @llvm.smin.i32(i32 %conv13.i.1, i32 0)
24+
%conv40.i.1 = sext i32 %cond.i.1 to i64
25+
%cond47.i.1 = tail call i64 @llvm.umin.i64(i64 %conv40.i.1, i64 17179869184)
26+
%1 = trunc i64 %cond47.i.1 to i32
27+
%2 = add i32 %1, 1
28+
%3 = load i64, ptr @n, align 8
29+
%conv13.i.2 = trunc i64 %3 to i32
30+
%cond.i.2 = tail call i32 @llvm.smin.i32(i32 %conv13.i.2, i32 0)
31+
%conv40.i.2 = sext i32 %cond.i.2 to i64
32+
%cond47.i.2 = tail call i64 @llvm.umin.i64(i64 %conv40.i.2, i64 17179869184)
33+
%4 = trunc i64 %cond47.i.2 to i32
34+
%5 = or i32 %2, %4
35+
ret i32 %5
36+
}
37+
38+
declare i32 @llvm.smin.i32(i32, i32)
39+
declare i64 @llvm.umin.i64(i64, i64)

0 commit comments

Comments
 (0)