Skip to content

Commit 5130f96

Browse files
committed
[VectorCombine] Fold vector.interleave2 with two constant splats
TBA...
1 parent 7eb193b commit 5130f96

File tree

2 files changed

+55
-0
lines changed

2 files changed

+55
-0
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ class VectorCombine {
125125
bool foldShuffleFromReductions(Instruction &I);
126126
bool foldCastFromReductions(Instruction &I);
127127
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
128+
bool foldInterleaveIntrinsics(Instruction &I);
128129
bool shrinkType(Instruction &I);
129130

130131
void replaceValue(Value &Old, Value &New) {
@@ -3145,6 +3146,45 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
31453146
return true;
31463147
}
31473148

3149+
bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
3150+
// If we're interleaving 2 constant splats, for instance `<vscale x 8 x i32>
3151+
// <splat of 666>` and `<vscale x 8 x i32> <splat of 777>`, we can create a
3152+
// larger splat
3153+
// `<vscale x 8 x i64> <splat of ((777 << 32) | 666)>` first before casting it
3154+
// back into `<vscale x 16 x i32>`.
3155+
using namespace PatternMatch;
3156+
const APInt *SplatVal0, *SplatVal1;
3157+
if (!match(&I, m_Intrinsic<Intrinsic::vector_interleave2>(
3158+
m_APInt(SplatVal0), m_APInt(SplatVal1))))
3159+
return false;
3160+
3161+
LLVM_DEBUG(dbgs() << "VC: Folding interleave2 with two splats: " << I
3162+
<< "\n");
3163+
3164+
auto *VTy =
3165+
cast<VectorType>(cast<IntrinsicInst>(I).getArgOperand(0)->getType());
3166+
auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
3167+
unsigned Width = VTy->getElementType()->getIntegerBitWidth();
3168+
3169+
if (TTI.getInstructionCost(&I, CostKind) <
3170+
TTI.getCastInstrCost(Instruction::BitCast, I.getType(), ExtVTy,
3171+
TTI::CastContextHint::None, CostKind)) {
3172+
LLVM_DEBUG(dbgs() << "VC: The cost to cast from " << *ExtVTy << " to "
3173+
<< *I.getType() << " is too high.\n");
3174+
return false;
3175+
}
3176+
3177+
APInt NewSplatVal = SplatVal1->zext(Width * 2);
3178+
NewSplatVal <<= Width;
3179+
NewSplatVal |= SplatVal0->zext(Width * 2);
3180+
auto *NewSplat = ConstantVector::getSplat(
3181+
ExtVTy->getElementCount(), ConstantInt::get(F.getContext(), NewSplatVal));
3182+
3183+
IRBuilder<> Builder(&I);
3184+
replaceValue(I, *Builder.CreateBitCast(NewSplat, I.getType()));
3185+
return true;
3186+
}
3187+
31483188
/// This is the entry point for all transforms. Pass manager differences are
31493189
/// handled in the callers of this function.
31503190
bool VectorCombine::run() {
@@ -3189,6 +3229,7 @@ bool VectorCombine::run() {
31893229
MadeChange |= scalarizeBinopOrCmp(I);
31903230
MadeChange |= scalarizeLoadExtract(I);
31913231
MadeChange |= scalarizeVPIntrinsic(I);
3232+
MadeChange |= foldInterleaveIntrinsics(I);
31923233
}
31933234

31943235
if (Opcode == Instruction::Store)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -mtriple=riscv64 -mattr=+v,+m,+zvfh %s -passes=vector-combine | FileCheck %s
3+
; RUN: opt -S -mtriple=riscv32 -mattr=+v,+m,+zvfh %s -passes=vector-combine | FileCheck %s
4+
5+
define void @store_factor2_const_splat(ptr %dst) {
6+
; CHECK-LABEL: define void @store_factor2_const_splat(
7+
; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
8+
; CHECK-NEXT: call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> bitcast (<vscale x 8 x i64> splat (i64 3337189589658) to <vscale x 16 x i32>), ptr [[DST]], <vscale x 16 x i1> splat (i1 true), i32 88)
9+
; CHECK-NEXT: ret void
10+
;
11+
%interleave2 = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> splat (i32 666), <vscale x 8 x i32> splat (i32 777))
12+
call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> %interleave2, ptr %dst, <vscale x 16 x i1> splat (i1 true), i32 88)
13+
ret void
14+
}

0 commit comments

Comments
 (0)