Skip to content

Commit 238e8ad

Browse files
committed
[VPlan] Handle VPReplicateRecipe stores
1 parent d2c0451 commit 238e8ad

File tree

3 files changed

+31
-11
lines changed

3 files changed

+31
-11
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,12 @@ m_Broadcast(const Op0_t &Op0) {
364364
return m_VPInstruction<VPInstruction::Broadcast>(Op0);
365365
}
366366

367+
template <typename Op0_t>
368+
inline UnaryVPInstruction_match<Op0_t, VPInstruction::ExtractLastElement>
369+
m_ExtractLastElement(const Op0_t &Op0) {
370+
return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
371+
}
372+
367373
template <typename Op0_t, typename Op1_t>
368374
inline BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::ActiveLaneMask>
369375
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1) {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2654,14 +2654,6 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
26542654
return;
26552655
}
26562656

2657-
// A store of a loop varying value to a uniform address only needs the last
2658-
// copy of the store.
2659-
if (isa<StoreInst>(UI) && vputils::isSingleScalar(getOperand(1))) {
2660-
auto Lane = VPLane::getLastLaneForVF(State.VF);
2661-
scalarizeInstruction(UI, this, VPLane(Lane), State);
2662-
return;
2663-
}
2664-
26652657
// Generate scalar instances for all VF lanes.
26662658
const unsigned EndLane = State.VF.getFixedValue();
26672659
for (unsigned Lane = 0; Lane < EndLane; ++Lane)

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -683,7 +683,8 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
683683
// scalars.
684684
auto *WideIV = cast<VPWidenIntOrFpInductionRecipe>(&Phi);
685685
if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) {
686-
return U->usesScalars(WideIV);
686+
return U->usesScalars(WideIV) ||
687+
match(U, m_ExtractLastElement(m_VPValue()));
687688
}))
688689
continue;
689690

@@ -694,12 +695,19 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
694695
WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
695696
WideIV->getDebugLoc(), Builder);
696697

698+
bool HasWideOps = any_of(WideIV->users(), [WideIV](VPUser *U) {
699+
return !U->usesScalars(WideIV) &&
700+
!match(U, m_ExtractLastElement(m_VPValue()));
701+
});
702+
697703
// Update scalar users of IV to use Step instead.
698704
if (!HasOnlyVectorVFs)
699705
WideIV->replaceAllUsesWith(Steps);
700706
else
701-
WideIV->replaceUsesWithIf(Steps, [WideIV](VPUser &U, unsigned) {
702-
return U.usesScalars(WideIV);
707+
WideIV->replaceUsesWithIf(Steps, [WideIV, HasWideOps](VPUser &U,
708+
unsigned) {
709+
return U.usesScalars(WideIV) ||
710+
(!HasWideOps && match(&U, m_ExtractLastElement(m_VPValue())));
703711
});
704712
}
705713
}
@@ -1209,6 +1217,20 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
12091217
continue;
12101218

12111219
auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
1220+
1221+
if (RepR && isa<StoreInst>(RepR->getUnderlyingInstr()) &&
1222+
vputils::isSingleScalar(RepR->getOperand(1))) {
1223+
auto *Clone = new VPReplicateRecipe(
1224+
RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1225+
true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/);
1226+
Clone->insertBefore(RepOrWidenR);
1227+
auto *Ext = new VPInstruction(VPInstruction::ExtractLastElement,
1228+
{Clone->getOperand(0)});
1229+
Ext->insertBefore(Clone);
1230+
Clone->setOperand(0, Ext);
1231+
RepR->eraseFromParent();
1232+
continue;
1233+
}
12121234
// Skip recipes that aren't single scalars or don't have only their
12131235
// scalar results used. In the latter case, we would introduce extra
12141236
// broadcasts.

0 commit comments

Comments
 (0)