Skip to content

Commit 154138c

Browse files
committed
[SLP]Do not pack div-like copyable values
If a main instruction in the copyables is a div-like instruction, the compiler cannot pack duplicates, extending with poisons, these instructions, being vectorize, will result in undefined behavior. Fixes #164185
1 parent ee50839 commit 154138c

File tree

2 files changed

+58
-2
lines changed

2 files changed

+58
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10546,8 +10546,11 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
1054610546
PoisonValue::get(UniqueValues.front()->getType()));
1054710547
// Check that extended with poisons/copyable operations are still valid
1054810548
// for vectorization (div/rem are not allowed).
10549-
if (!S.areInstructionsWithCopyableElements() &&
10550-
!getSameOpcode(PaddedUniqueValues, TLI).valid()) {
10549+
if ((!S.areInstructionsWithCopyableElements() &&
10550+
!getSameOpcode(PaddedUniqueValues, TLI).valid()) ||
10551+
(S.areInstructionsWithCopyableElements() && S.isMulDivLikeOp() &&
10552+
(S.getMainOp()->isIntDivRem() || S.getMainOp()->isFPDivRem() ||
10553+
isa<CallInst>(S.getMainOp())))) {
1055110554
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
1055210555
ReuseShuffleIndices.clear();
1055310556
return false;
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define ptr @test(ptr %d) {
5+
; CHECK-LABEL: define ptr @test(
6+
; CHECK-SAME: ptr [[D:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1
9+
; CHECK-NEXT: [[CMP4_2:%.*]] = icmp eq i8 [[TMP0]], 0
10+
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 0
11+
; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, 0
12+
; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 0
13+
; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, 0
14+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x i64> poison, i64 [[TMP1]], i32 0
15+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x i64> [[TMP5]], i64 [[TMP3]], i32 1
16+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x i64> [[TMP6]], i64 [[TMP4]], i32 4
17+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <6 x i64> [[TMP7]], <6 x i64> poison, <6 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4>
18+
; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 1, i64 1, i64 1, i64 0>
19+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <6 x i64> [[TMP9]], i32 0
20+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP10]]
21+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <6 x i64> [[TMP9]], i32 1
22+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP12]]
23+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <6 x i64> [[TMP9]], i32 2
24+
; CHECK-NEXT: [[SCEVGEP42:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP14]]
25+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <6 x i64> [[TMP9]], i32 3
26+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP15]]
27+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <6 x i64> [[TMP9]], i32 4
28+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP17]]
29+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <6 x i64> [[TMP9]], i32 5
30+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP19]]
31+
; CHECK-NEXT: ret ptr [[TMP20]]
32+
;
33+
entry:
34+
%0 = load i8, ptr null, align 1
35+
%cmp4.2 = icmp eq i8 %0, 0
36+
%1 = select i1 %cmp4.2, i64 0, i64 0
37+
%2 = shl i64 %1, 1
38+
%3 = getelementptr i8, ptr %d, i64 %2
39+
%4 = xor i64 0, 0
40+
%5 = udiv i64 %4, 0
41+
%6 = mul i64 %5, 6
42+
%7 = getelementptr i8, ptr %d, i64 %6
43+
%8 = shl i64 %1, 0
44+
%scevgep42 = getelementptr i8, ptr %d, i64 %8
45+
%9 = mul i64 %5, 1
46+
%10 = getelementptr i8, ptr %d, i64 %9
47+
%11 = udiv i64 1, 0
48+
%12 = mul i64 %11, 1
49+
%13 = getelementptr i8, ptr %d, i64 %12
50+
%14 = mul i64 %11, 0
51+
%15 = getelementptr i8, ptr %d, i64 %14
52+
ret ptr %15
53+
}

0 commit comments

Comments
 (0)