@@ -149,6 +149,11 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149149 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150150 cl::init(2));
151151
152+ cl::opt<unsigned> ArmMaxBaseUpdatesToCheck(
153+ "arm-max-base-updates-to-check", cl::Hidden,
154+ cl::desc("Maximum number of base-updates to check generating postindex."),
155+ cl::init(64));
156+
152157/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153158constexpr MVT FlagsVT = MVT::i32;
154159
@@ -15842,6 +15847,22 @@ struct BaseUpdateUser {
1584215847 unsigned ConstInc;
1584315848};
1584415849
15850+ static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
15851+ // Check that the add is independent of the load/store.
15852+ // Otherwise, folding it would create a cycle. Search through Addr
15853+ // as well, since the User may not be a direct user of Addr and
15854+ // only share a base pointer.
15855+ SmallPtrSet<const SDNode *, 32> Visited;
15856+ SmallVector<const SDNode *, 16> Worklist;
15857+ Worklist.push_back(N);
15858+ Worklist.push_back(User);
15859+ const unsigned MaxSteps = 1024;
15860+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
15861+ SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
15862+ return false;
15863+ return true;
15864+ }
15865+
1584515866static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
1584615867 struct BaseUpdateUser &User,
1584715868 bool SimpleConstIncOnly,
@@ -16043,6 +16064,9 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
1604316064 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
1604416065 return false;
1604516066
16067+ if (!isValidBaseUpdate(N, User.N))
16068+ return false;
16069+
1604616070 // OK, we found an ADD we can fold into the base update.
1604716071 // Now, create a _UPD node, taking care of not breaking alignment.
1604816072
@@ -16191,21 +16215,6 @@ static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
1619116215 }
1619216216}
1619316217
16194- static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
16195- // Check that the add is independent of the load/store.
16196- // Otherwise, folding it would create a cycle. Search through Addr
16197- // as well, since the User may not be a direct user of Addr and
16198- // only share a base pointer.
16199- SmallPtrSet<const SDNode *, 32> Visited;
16200- SmallVector<const SDNode *, 16> Worklist;
16201- Worklist.push_back(N);
16202- Worklist.push_back(User);
16203- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16204- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16205- return false;
16206- return true;
16207- }
16208-
1620916218/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
1621016219/// NEON load/store intrinsics, and generic vector load/stores, to merge
1621116220/// base address updates.
@@ -16219,6 +16228,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
1621916228 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
1622016229 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
1622116230
16231+ // Limit the number of possible base-updates we look at to prevent degenerate
16232+ // cases.
16233+ unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
16234+
1622216235 SDValue Addr = N->getOperand(AddrOpIdx);
1622316236
1622416237 SmallVector<BaseUpdateUser, 8> BaseUpdates;
@@ -16233,8 +16246,11 @@ static SDValue CombineBaseUpdate(SDNode *N,
1623316246 unsigned ConstInc =
1623416247 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
1623516248
16236- if (ConstInc || User->getOpcode() == ISD::ADD)
16249+ if (ConstInc || User->getOpcode() == ISD::ADD) {
1623716250 BaseUpdates.push_back({User, Inc, ConstInc});
16251+ if (BaseUpdates.size() >= MaxBaseUpdates)
16252+ break;
16253+ }
1623816254 }
1623916255
1624016256 // If the address is a constant pointer increment itself, find
@@ -16261,27 +16277,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
1626116277 unsigned NewConstInc = UserOffset - Offset;
1626216278 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
1626316279 BaseUpdates.push_back({User, NewInc, NewConstInc});
16280+ if (BaseUpdates.size() >= MaxBaseUpdates)
16281+ break;
1626416282 }
1626516283 }
1626616284
1626716285 // Try to fold the load/store with an update that matches memory
1626816286 // access size. This should work well for sequential loads.
16269- //
16270- // Filter out invalid updates as well.
1627116287 unsigned NumValidUpd = BaseUpdates.size();
16272- for (unsigned I = 0; I < NumValidUpd;) {
16288+ for (unsigned I = 0; I < NumValidUpd; I++ ) {
1627316289 BaseUpdateUser &User = BaseUpdates[I];
16274- if (!isValidBaseUpdate(N, User.N)) {
16275- --NumValidUpd;
16276- std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16277- continue;
16278- }
16279-
1628016290 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
1628116291 return SDValue();
16282- ++I;
1628316292 }
16284- BaseUpdates.resize(NumValidUpd);
1628516293
1628616294 // Try to fold with other users. Non-constant updates are considered
1628716295 // first, and constant updates are sorted to not break a sequence of
@@ -16337,8 +16345,9 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
1633716345 Visited.insert(Addr.getNode());
1633816346 Worklist.push_back(N);
1633916347 Worklist.push_back(User);
16340- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16341- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16348+ const unsigned MaxSteps = 1024;
16349+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
16350+ SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
1634216351 continue;
1634316352
1634416353 // Find the new opcode for the updating load/store.
0 commit comments