Skip to content

Commit 6a37fdc

Browse files
committed
Addressed comments
1 parent 0145c57 commit 6a37fdc

File tree

2 files changed

+62
-9
lines changed

2 files changed

+62
-9
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3763,15 +3763,21 @@ static Constant *ConstantFoldFixedVectorCall(
37633763
unsigned NumElements = FVTy->getNumElements();
37643764
unsigned VecNumElements =
37653765
cast<FixedVectorType>(Vec->getType())->getNumElements();
3766+
unsigned StartingIndex = Idx->getZExtValue();
37663767
// Extracting entire vector is nop
3767-
if (NumElements == VecNumElements)
3768+
if (NumElements == VecNumElements && StartingIndex == 0)
37683769
return Vec;
37693770

3770-
unsigned StartingIndex = Idx->getZExtValue();
3771-
assert(StartingIndex + NumElements <= VecNumElements &&
3772-
"Cannot extract more elements than exist in the vector");
3773-
for (unsigned I = 0; I != NumElements; ++I)
3774-
Result[I] = Vec->getAggregateElement(StartingIndex + I);
3771+
const unsigned NonPoisonNumElements =
3772+
std::min(StartingIndex + NumElements, VecNumElements);
3773+
for (unsigned I = StartingIndex; I < NonPoisonNumElements; ++I)
3774+
Result[I - StartingIndex] = Vec->getAggregateElement(I);
3775+
3776+
// Remaining elements are poison since they are out of bounds.
3777+
for (unsigned I = NonPoisonNumElements, E = StartingIndex + NumElements;
3778+
I < E; ++I)
3779+
Result[I - StartingIndex] = PoisonValue::get(FVTy->getElementType());
3780+
37753781
return ConstantVector::get(Result);
37763782
}
37773783
case Intrinsic::vector_insert: {
@@ -3787,9 +3793,15 @@ static Constant *ConstantFoldFixedVectorCall(
37873793
cast<FixedVectorType>(Vec->getType())->getNumElements();
37883794
unsigned IdxN = Idx->getZExtValue();
37893795
// Replacing entire vector with a subvec is nop
3790-
if (SubVecNumElements == VecNumElements)
3796+
if (SubVecNumElements == VecNumElements && IdxN == 0)
37913797
return SubVec;
37923798

3799+
// Make sure indices are in the range [0, VecNumElements), otherwise the
3800+
// result is a poison value.
3801+
if (IdxN >= VecNumElements || IdxN + SubVecNumElements > VecNumElements ||
3802+
(IdxN && (SubVecNumElements % IdxN) != 0))
3803+
return PoisonValue::get(FVTy);
3804+
37933805
unsigned I = 0;
37943806
for (; I < IdxN; ++I)
37953807
Result[I] = Vec->getAggregateElement(I);
@@ -3980,7 +3992,8 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
39803992
return nullptr;
39813993

39823994
unsigned NumElements =
3983-
cast<FixedVectorType>(Vec->getType())->getNumElements() / 2;
3995+
cast<VectorType>(Vec->getType())->getElementCount().getKnownMinValue() /
3996+
2;
39843997
SmallVector<Constant *, 4> Res0(NumElements), Res1(NumElements);
39853998
for (unsigned I = 0; I < NumElements; ++I) {
39863999
Res0[I] = Vec->getAggregateElement(2 * I);

llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt < %s -passes=instsimplify,verify -S | FileCheck %s
2+
; RUN: opt < %s -passes=instsimplify,verify -disable-verify -S | FileCheck %s
33

44
define <3 x i32> @fold_vector_extract() {
55
; CHECK-LABEL: define <3 x i32> @fold_vector_extract() {
@@ -9,6 +9,22 @@ define <3 x i32> @fold_vector_extract() {
99
ret <3 x i32> %1
1010
}
1111

12+
define <3 x i32> @fold_vector_extract_last_poison() {
13+
; CHECK-LABEL: define <3 x i32> @fold_vector_extract_last_poison() {
14+
; CHECK-NEXT: ret <3 x i32> <i32 6, i32 7, i32 poison>
15+
;
16+
%1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 6)
17+
ret <3 x i32> %1
18+
}
19+
20+
define <3 x i32> @fold_vector_extract_poison() {
21+
; CHECK-LABEL: define <3 x i32> @fold_vector_extract_poison() {
22+
; CHECK-NEXT: ret <3 x i32> poison
23+
;
24+
%1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 8)
25+
ret <3 x i32> %1
26+
}
27+
1228
define <8 x i32> @fold_vector_extract_nop() {
1329
; CHECK-LABEL: define <8 x i32> @fold_vector_extract_nop() {
1430
; CHECK-NEXT: ret <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -33,6 +49,22 @@ define <8 x i32> @fold_vector_insert_nop() {
3349
ret <8 x i32> %1
3450
}
3551

52+
define <8 x i32> @fold_vector_insert_poison_idx_range() {
53+
; CHECK-LABEL: define <8 x i32> @fold_vector_insert_poison_idx_range() {
54+
; CHECK-NEXT: ret <8 x i32> poison
55+
;
56+
%1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <6 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>, i64 6)
57+
ret <8 x i32> %1
58+
}
59+
60+
define <8 x i32> @fold_vector_insert_poison_large_idx() {
61+
; CHECK-LABEL: define <8 x i32> @fold_vector_insert_poison_large_idx() {
62+
; CHECK-NEXT: ret <8 x i32> poison
63+
;
64+
%1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <6 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>, i64 -2)
65+
ret <8 x i32> %1
66+
}
67+
3668
define <8 x i32> @fold_vector_interleave2() {
3769
; CHECK-LABEL: define <8 x i32> @fold_vector_interleave2() {
3870
; CHECK-NEXT: ret <8 x i32> <i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 4, i32 8>
@@ -48,3 +80,11 @@ define {<4 x i32>, <4 x i32>} @fold_vector_deinterleav2() {
4880
%1 = call {<4 x i32>, <4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<8 x i32> <i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 4, i32 8>)
4981
ret {<4 x i32>, <4 x i32>} %1
5082
}
83+
84+
define {<vscale x 4 x i32>, <vscale x 4 x i32>} @fold_scalable_vector_deinterleav2() {
85+
; CHECK-LABEL: define { <vscale x 4 x i32>, <vscale x 4 x i32> } @fold_scalable_vector_deinterleav2() {
86+
; CHECK-NEXT: ret { <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer
87+
;
88+
%1 = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<vscale x 8 x i32> zeroinitializer)
89+
ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %1
90+
}

0 commit comments

Comments
 (0)