@@ -668,6 +668,7 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
668668void VPlanTransforms::optimizeForVFAndUF (VPlan &Plan, ElementCount BestVF,
669669 unsigned BestUF,
670670 PredicatedScalarEvolution &PSE) {
671+
671672 assert (Plan.hasVF (BestVF) && " BestVF is not available in Plan" );
672673 assert (Plan.hasUF (BestUF) && " BestUF is not available in Plan" );
673674 VPBasicBlock *ExitingVPBB =
@@ -710,6 +711,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
710711 // TODO: Further simplifications are possible
711712 // 1. Replace inductions with constants.
712713 // 2. Replace vector loop region with VPBasicBlock.
714+ //
713715}
714716
715717// / Sink users of \p FOR after the recipe defining the previous value \p
@@ -1657,3 +1659,129 @@ void VPlanTransforms::createInterleaveGroups(
16571659 }
16581660 }
16591661}
1662+
1663+ static bool supportedLoad (VPWidenRecipe *R0, VPValue *V, unsigned Idx) {
1664+ if (auto *W = dyn_cast_or_null<VPWidenLoadRecipe>(V->getDefiningRecipe ())) {
1665+ if (W->getMask ())
1666+ return false ;
1667+ return !W->getMask () && (R0->getOperand (0 ) == V || R0->getOperand (1 ) == V);
1668+ }
1669+
1670+ if (auto *IR = dyn_cast_or_null<VPInterleaveRecipe>(V->getDefiningRecipe ())) {
1671+ return IR->getInterleaveGroup ()->getFactor () ==
1672+ IR->getInterleaveGroup ()->getNumMembers () &&
1673+ IR->getVPValue (Idx) == V;
1674+ }
1675+ return false ;
1676+ }
1677+
1678+ // / Returns true of \p IR is a consecutive interleave group with \p VF members.
1679+ static bool isConsecutiveInterleaveGroup (VPInterleaveRecipe *IR,
1680+ ElementCount VF) {
1681+ if (!IR)
1682+ return false ;
1683+ auto IG = IR->getInterleaveGroup ();
1684+ return IG->getFactor () == IG->getNumMembers () &&
1685+ IG->getNumMembers () == VF.getKnownMinValue ();
1686+ }
1687+
1688+ bool VPlanTransforms::narrowInterleaveGroups (VPlan &Plan, ElementCount VF) {
1689+ using namespace llvm ::VPlanPatternMatch;
1690+ if (VF.isScalable ())
1691+ return false ;
1692+
1693+ bool Changed = false ;
1694+ SmallVector<VPInterleaveRecipe *> StoreGroups;
1695+ for (auto &R : make_early_inc_range (
1696+ *Plan.getVectorLoopRegion ()->getEntryBasicBlock ())) {
1697+ if (match (&R, m_BranchOnCount (m_VPValue (), m_VPValue ())) ||
1698+ isa<VPCanonicalIVPHIRecipe>(&R))
1699+ continue ;
1700+
1701+ // Bail out on recipes not supported at the moment:
1702+ // * phi recipes other than the canonical induction
1703+ // * recipes writing to memory except interleave groups
1704+ // Only support plans with a canonical induction phi.
1705+ if ((R.isPhi () && !isa<VPCanonicalIVPHIRecipe>(&R)) ||
1706+ (R.mayWriteToMemory () && !isa<VPInterleaveRecipe>(&R)))
1707+ return false ;
1708+
1709+ auto *IR = dyn_cast<VPInterleaveRecipe>(&R);
1710+ if (!IR)
1711+ continue ;
1712+
1713+ if (!isConsecutiveInterleaveGroup (IR, VF))
1714+ return false ;
1715+ if (IR->getStoredValues ().empty ())
1716+ continue ;
1717+
1718+ auto *Lane0 = dyn_cast_or_null<VPWidenRecipe>(
1719+ IR->getStoredValues ()[0 ]->getDefiningRecipe ());
1720+ if (!Lane0)
1721+ return false ;
1722+ for (const auto &[I, V] : enumerate(IR->getStoredValues ())) {
1723+ auto *R = dyn_cast<VPWidenRecipe>(V->getDefiningRecipe ());
1724+ if (!R || R->getOpcode () != Lane0->getOpcode ())
1725+ return false ;
1726+ // Work around captured structured bindings being a C++20 extension.
1727+ auto Idx = I;
1728+ if (any_of (R->operands (), [Lane0, Idx](VPValue *V) {
1729+ return !supportedLoad (Lane0, V, Idx);
1730+ }))
1731+ return false ;
1732+ }
1733+
1734+ StoreGroups.push_back (IR);
1735+ }
1736+
1737+ // Narrow operation tree rooted at store groups.
1738+ for (auto *StoreGroup : StoreGroups) {
1739+ auto *Lane0 = cast<VPWidenRecipe>(
1740+ StoreGroup->getStoredValues ()[0 ]->getDefiningRecipe ());
1741+
1742+ unsigned LoadGroupIdx =
1743+ isa<VPInterleaveRecipe>(Lane0->getOperand (1 )->getDefiningRecipe ()) ? 1
1744+ : 0 ;
1745+ unsigned WideLoadIdx = 1 - LoadGroupIdx;
1746+ auto *LoadGroup = cast<VPInterleaveRecipe>(
1747+ Lane0->getOperand (LoadGroupIdx)->getDefiningRecipe ());
1748+
1749+ auto *WideLoad = cast<VPWidenLoadRecipe>(
1750+ Lane0->getOperand (WideLoadIdx)->getDefiningRecipe ());
1751+
1752+ // Narrow wide load to uniform scalar load, as transformed VPlan will only
1753+ // process one original iteration.
1754+ auto *N = new VPReplicateRecipe (&WideLoad->getIngredient (),
1755+ WideLoad->operands (), true );
1756+ // Narrow interleave group to wide load, as transformed VPlan will only
1757+ // process one original iteration.
1758+ auto *L = new VPWidenLoadRecipe (
1759+ *cast<LoadInst>(LoadGroup->getInterleaveGroup ()->getInsertPos ()),
1760+ LoadGroup->getAddr (), LoadGroup->getMask (), true , false ,
1761+ LoadGroup->getDebugLoc ());
1762+ L->insertBefore (LoadGroup);
1763+ N->insertBefore (LoadGroup);
1764+ Lane0->setOperand (LoadGroupIdx, L);
1765+ Lane0->setOperand (WideLoadIdx, N);
1766+
1767+ auto *S = new VPWidenStoreRecipe (
1768+ *cast<StoreInst>(StoreGroup->getInterleaveGroup ()->getInsertPos ()),
1769+ StoreGroup->getAddr (), Lane0, nullptr , true , false ,
1770+ StoreGroup->getDebugLoc ());
1771+ S->insertBefore (StoreGroup);
1772+ StoreGroup->eraseFromParent ();
1773+ Changed = true ;
1774+ }
1775+
1776+ if (!Changed)
1777+ return false ;
1778+
1779+ // Adjust induction to reflect that the transformed plan only processes one
1780+ // original iteration.
1781+ auto *CanIV = Plan.getCanonicalIV ();
1782+ VPInstruction *Inc = cast<VPInstruction>(CanIV->getBackedgeValue ());
1783+ Inc->setOperand (
1784+ 1 , Plan.getOrAddLiveIn (ConstantInt::get (CanIV->getScalarType (), 1 )));
1785+ removeDeadRecipes (Plan);
1786+ return true ;
1787+ }
0 commit comments