Skip to content

Commit 0145c57

Browse files
committed
[ConstantFolding] Add folding for [de]interleave2, insert and extract
The change adds folding for 4 vector intrinsics: `interleave2`, `deinterleave2`, `vector_extract` and `vector_insert`. For the last 2 intrinsics the change does not use `ShuffleVector` fold mechanism as it's much simpler to construct result vector explicitly.
1 parent 9f7f4ac commit 0145c57

File tree

2 files changed

+128
-0
lines changed

2 files changed

+128
-0
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1635,6 +1635,10 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
16351635
case Intrinsic::vector_reduce_smax:
16361636
case Intrinsic::vector_reduce_umin:
16371637
case Intrinsic::vector_reduce_umax:
1638+
case Intrinsic::vector_extract:
1639+
case Intrinsic::vector_insert:
1640+
case Intrinsic::vector_interleave2:
1641+
case Intrinsic::vector_deinterleave2:
16381642
// Target intrinsics
16391643
case Intrinsic::amdgcn_perm:
16401644
case Intrinsic::amdgcn_wave_reduce_umin:
@@ -3750,6 +3754,65 @@ static Constant *ConstantFoldFixedVectorCall(
37503754
}
37513755
return nullptr;
37523756
}
3757+
case Intrinsic::vector_extract: {
3758+
auto *Vec = dyn_cast<Constant>(Operands[0]);
3759+
auto *Idx = dyn_cast<ConstantInt>(Operands[1]);
3760+
if (!Vec || !Idx)
3761+
return nullptr;
3762+
3763+
unsigned NumElements = FVTy->getNumElements();
3764+
unsigned VecNumElements =
3765+
cast<FixedVectorType>(Vec->getType())->getNumElements();
3766+
// Extracting entire vector is nop
3767+
if (NumElements == VecNumElements)
3768+
return Vec;
3769+
3770+
unsigned StartingIndex = Idx->getZExtValue();
3771+
assert(StartingIndex + NumElements <= VecNumElements &&
3772+
"Cannot extract more elements than exist in the vector");
3773+
for (unsigned I = 0; I != NumElements; ++I)
3774+
Result[I] = Vec->getAggregateElement(StartingIndex + I);
3775+
return ConstantVector::get(Result);
3776+
}
3777+
case Intrinsic::vector_insert: {
3778+
auto *Vec = dyn_cast<Constant>(Operands[0]);
3779+
auto *SubVec = dyn_cast<Constant>(Operands[1]);
3780+
auto *Idx = dyn_cast<ConstantInt>(Operands[2]);
3781+
if (!Vec || !SubVec || !Idx)
3782+
return nullptr;
3783+
3784+
unsigned SubVecNumElements =
3785+
cast<FixedVectorType>(SubVec->getType())->getNumElements();
3786+
unsigned VecNumElements =
3787+
cast<FixedVectorType>(Vec->getType())->getNumElements();
3788+
unsigned IdxN = Idx->getZExtValue();
3789+
// Replacing entire vector with a subvec is nop
3790+
if (SubVecNumElements == VecNumElements)
3791+
return SubVec;
3792+
3793+
unsigned I = 0;
3794+
for (; I < IdxN; ++I)
3795+
Result[I] = Vec->getAggregateElement(I);
3796+
for (; I < IdxN + SubVecNumElements; ++I)
3797+
Result[I] = SubVec->getAggregateElement(I - IdxN);
3798+
for (; I < VecNumElements; ++I)
3799+
Result[I] = Vec->getAggregateElement(I);
3800+
return ConstantVector::get(Result);
3801+
}
3802+
case Intrinsic::vector_interleave2: {
3803+
auto *Vec0 = dyn_cast<Constant>(Operands[0]);
3804+
auto *Vec1 = dyn_cast<Constant>(Operands[1]);
3805+
if (!Vec0 || !Vec1)
3806+
return nullptr;
3807+
3808+
unsigned NumElements =
3809+
cast<FixedVectorType>(Vec0->getType())->getNumElements();
3810+
for (unsigned I = 0; I < NumElements; ++I) {
3811+
Result[2 * I] = Vec0->getAggregateElement(I);
3812+
Result[2 * I + 1] = Vec1->getAggregateElement(I);
3813+
}
3814+
return ConstantVector::get(Result);
3815+
}
37533816
default:
37543817
break;
37553818
}
@@ -3911,6 +3974,21 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
39113974
return nullptr;
39123975
return ConstantStruct::get(StTy, SinResult, CosResult);
39133976
}
3977+
case Intrinsic::vector_deinterleave2: {
3978+
auto *Vec = dyn_cast<Constant>(Operands[0]);
3979+
if (!Vec)
3980+
return nullptr;
3981+
3982+
unsigned NumElements =
3983+
cast<FixedVectorType>(Vec->getType())->getNumElements() / 2;
3984+
SmallVector<Constant *, 4> Res0(NumElements), Res1(NumElements);
3985+
for (unsigned I = 0; I < NumElements; ++I) {
3986+
Res0[I] = Vec->getAggregateElement(2 * I);
3987+
Res1[I] = Vec->getAggregateElement(2 * I + 1);
3988+
}
3989+
return ConstantStruct::get(StTy, ConstantVector::get(Res0),
3990+
ConstantVector::get(Res1));
3991+
}
39143992
default:
39153993
// TODO: Constant folding of vector intrinsics that fall through here does
39163994
// not work (e.g. overflow intrinsics)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=instsimplify,verify -S | FileCheck %s
3+
4+
define <3 x i32> @fold_vector_extract() {
5+
; CHECK-LABEL: define <3 x i32> @fold_vector_extract() {
6+
; CHECK-NEXT: ret <3 x i32> <i32 3, i32 4, i32 5>
7+
;
8+
%1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 3)
9+
ret <3 x i32> %1
10+
}
11+
12+
define <8 x i32> @fold_vector_extract_nop() {
13+
; CHECK-LABEL: define <8 x i32> @fold_vector_extract_nop() {
14+
; CHECK-NEXT: ret <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15+
;
16+
%1 = call <8 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 0)
17+
ret <8 x i32> %1
18+
}
19+
20+
define <8 x i32> @fold_vector_insert() {
21+
; CHECK-LABEL: define <8 x i32> @fold_vector_insert() {
22+
; CHECK-NEXT: ret <8 x i32> <i32 9, i32 10, i32 11, i32 12, i32 5, i32 6, i32 7, i32 8>
23+
;
24+
%1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, i64 0)
25+
ret <8 x i32> %1
26+
}
27+
28+
define <8 x i32> @fold_vector_insert_nop() {
29+
; CHECK-LABEL: define <8 x i32> @fold_vector_insert_nop() {
30+
; CHECK-NEXT: ret <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
31+
;
32+
%1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>, i64 0)
33+
ret <8 x i32> %1
34+
}
35+
36+
define <8 x i32> @fold_vector_interleave2() {
37+
; CHECK-LABEL: define <8 x i32> @fold_vector_interleave2() {
38+
; CHECK-NEXT: ret <8 x i32> <i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 4, i32 8>
39+
;
40+
%1 = call<8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>)
41+
ret <8 x i32> %1
42+
}
43+
44+
define {<4 x i32>, <4 x i32>} @fold_vector_deinterleav2() {
45+
; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @fold_vector_deinterleav2() {
46+
; CHECK-NEXT: ret { <4 x i32>, <4 x i32> } { <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8> }
47+
;
48+
%1 = call {<4 x i32>, <4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<8 x i32> <i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 4, i32 8>)
49+
ret {<4 x i32>, <4 x i32>} %1
50+
}

0 commit comments

Comments
 (0)