@@ -672,6 +672,7 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
672672void VPlanTransforms::optimizeForVFAndUF (VPlan &Plan, ElementCount BestVF,
673673 unsigned BestUF,
674674 PredicatedScalarEvolution &PSE) {
675+
675676 assert (Plan.hasVF (BestVF) && " BestVF is not available in Plan" );
676677 assert (Plan.hasUF (BestUF) && " BestUF is not available in Plan" );
677678 VPBasicBlock *ExitingVPBB =
@@ -713,6 +714,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
713714 // TODO: Further simplifications are possible
714715 // 1. Replace inductions with constants.
715716 // 2. Replace vector loop region with VPBasicBlock.
717+ //
716718}
717719
718720// / Sink users of \p FOR after the recipe defining the previous value \p
@@ -1589,3 +1591,129 @@ void VPlanTransforms::createInterleaveGroups(
15891591 }
15901592 }
15911593}
1594+
1595+ static bool supportedLoad (VPWidenRecipe *R0, VPValue *V, unsigned Idx) {
1596+ if (auto *W = dyn_cast_or_null<VPWidenLoadRecipe>(V->getDefiningRecipe ())) {
1597+ if (W->getMask ())
1598+ return false ;
1599+ return !W->getMask () && (R0->getOperand (0 ) == V || R0->getOperand (1 ) == V);
1600+ }
1601+
1602+ if (auto *IR = dyn_cast_or_null<VPInterleaveRecipe>(V->getDefiningRecipe ())) {
1603+ return IR->getInterleaveGroup ()->getFactor () ==
1604+ IR->getInterleaveGroup ()->getNumMembers () &&
1605+ IR->getVPValue (Idx) == V;
1606+ }
1607+ return false ;
1608+ }
1609+
1610+ // / Returns true of \p IR is a consecutive interleave group with \p VF members.
1611+ static bool isConsecutiveInterleaveGroup (VPInterleaveRecipe *IR,
1612+ ElementCount VF) {
1613+ if (!IR)
1614+ return false ;
1615+ auto IG = IR->getInterleaveGroup ();
1616+ return IG->getFactor () == IG->getNumMembers () &&
1617+ IG->getNumMembers () == VF.getKnownMinValue ();
1618+ }
1619+
1620+ bool VPlanTransforms::narrowInterleaveGroups (VPlan &Plan, ElementCount VF) {
1621+ using namespace llvm ::VPlanPatternMatch;
1622+ if (VF.isScalable ())
1623+ return false ;
1624+
1625+ bool Changed = false ;
1626+ SmallVector<VPInterleaveRecipe *> StoreGroups;
1627+ for (auto &R : make_early_inc_range (
1628+ *Plan.getVectorLoopRegion ()->getEntryBasicBlock ())) {
1629+ if (match (&R, m_BranchOnCount (m_VPValue (), m_VPValue ())) ||
1630+ isa<VPCanonicalIVPHIRecipe>(&R))
1631+ continue ;
1632+
1633+ // Bail out on recipes not supported at the moment:
1634+ // * phi recipes other than the canonical induction
1635+ // * recipes writing to memory except interleave groups
1636+ // Only support plans with a canonical induction phi.
1637+ if ((R.isPhi () && !isa<VPCanonicalIVPHIRecipe>(&R)) ||
1638+ (R.mayWriteToMemory () && !isa<VPInterleaveRecipe>(&R)))
1639+ return false ;
1640+
1641+ auto *IR = dyn_cast<VPInterleaveRecipe>(&R);
1642+ if (!IR)
1643+ continue ;
1644+
1645+ if (!isConsecutiveInterleaveGroup (IR, VF))
1646+ return false ;
1647+ if (IR->getStoredValues ().empty ())
1648+ continue ;
1649+
1650+ auto *Lane0 = dyn_cast_or_null<VPWidenRecipe>(
1651+ IR->getStoredValues ()[0 ]->getDefiningRecipe ());
1652+ if (!Lane0)
1653+ return false ;
1654+ for (const auto &[I, V] : enumerate(IR->getStoredValues ())) {
1655+ auto *R = dyn_cast<VPWidenRecipe>(V->getDefiningRecipe ());
1656+ if (!R || R->getOpcode () != Lane0->getOpcode ())
1657+ return false ;
1658+ // Work around captured structured bindings being a C++20 extension.
1659+ auto Idx = I;
1660+ if (any_of (R->operands (), [Lane0, Idx](VPValue *V) {
1661+ return !supportedLoad (Lane0, V, Idx);
1662+ }))
1663+ return false ;
1664+ }
1665+
1666+ StoreGroups.push_back (IR);
1667+ }
1668+
1669+ // Narrow operation tree rooted at store groups.
1670+ for (auto *StoreGroup : StoreGroups) {
1671+ auto *Lane0 = cast<VPWidenRecipe>(
1672+ StoreGroup->getStoredValues ()[0 ]->getDefiningRecipe ());
1673+
1674+ unsigned LoadGroupIdx =
1675+ isa<VPInterleaveRecipe>(Lane0->getOperand (1 )->getDefiningRecipe ()) ? 1
1676+ : 0 ;
1677+ unsigned WideLoadIdx = 1 - LoadGroupIdx;
1678+ auto *LoadGroup = cast<VPInterleaveRecipe>(
1679+ Lane0->getOperand (LoadGroupIdx)->getDefiningRecipe ());
1680+
1681+ auto *WideLoad = cast<VPWidenLoadRecipe>(
1682+ Lane0->getOperand (WideLoadIdx)->getDefiningRecipe ());
1683+
1684+ // Narrow wide load to uniform scalar load, as transformed VPlan will only
1685+ // process one original iteration.
1686+ auto *N = new VPReplicateRecipe (&WideLoad->getIngredient (),
1687+ WideLoad->operands (), true );
1688+ // Narrow interleave group to wide load, as transformed VPlan will only
1689+ // process one original iteration.
1690+ auto *L = new VPWidenLoadRecipe (
1691+ *cast<LoadInst>(LoadGroup->getInterleaveGroup ()->getInsertPos ()),
1692+ LoadGroup->getAddr (), LoadGroup->getMask (), true , false ,
1693+ LoadGroup->getDebugLoc ());
1694+ L->insertBefore (LoadGroup);
1695+ N->insertBefore (LoadGroup);
1696+ Lane0->setOperand (LoadGroupIdx, L);
1697+ Lane0->setOperand (WideLoadIdx, N);
1698+
1699+ auto *S = new VPWidenStoreRecipe (
1700+ *cast<StoreInst>(StoreGroup->getInterleaveGroup ()->getInsertPos ()),
1701+ StoreGroup->getAddr (), Lane0, nullptr , true , false ,
1702+ StoreGroup->getDebugLoc ());
1703+ S->insertBefore (StoreGroup);
1704+ StoreGroup->eraseFromParent ();
1705+ Changed = true ;
1706+ }
1707+
1708+ if (!Changed)
1709+ return false ;
1710+
1711+ // Adjust induction to reflect that the transformed plan only processes one
1712+ // original iteration.
1713+ auto *CanIV = Plan.getCanonicalIV ();
1714+ VPInstruction *Inc = cast<VPInstruction>(CanIV->getBackedgeValue ());
1715+ Inc->setOperand (
1716+ 1 , Plan.getOrAddLiveIn (ConstantInt::get (CanIV->getScalarType (), 1 )));
1717+ removeDeadRecipes (Plan);
1718+ return true ;
1719+ }
0 commit comments