Skip to content

Commit ebebce1

Browse files
committed
Limit the DFS for pathologically connected graphs
Since the DFS of the dependency graph is brute force it is exponential in the number of children. If a graph is pathological in nature (i.e. many cross-dependencies) then this will not complete in "reasonable" time. To address this we limit the number of visits to any node in the graph, thus capping the complexity to be linear in nature.
1 parent ab2a3f1 commit ebebce1

File tree

1 file changed

+24
-10
lines changed

1 file changed

+24
-10
lines changed

src/pip/_internal/resolution/resolvelib/resolver.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,24 @@ def get_topological_weights(
248248
requirement_keys.
249249
"""
250250
path: set[str | None] = set()
251-
weights: dict[str | None, int] = {}
251+
weights: dict[str | None, list[int]] = {}
252252

253253
def visit(node: str | None) -> None:
254254
if node in path:
255255
# We hit a cycle, so we'll break it here.
256256
return
257257

258+
# The walk is exponential and for pathologically connected graphs (which
259+
# are the ones most likely to contain cycles in the first place) it can
260+
# take until the heat-death of the universe. To counter this we limit
261+
# the number of attempts to visit (i.e. traverse through) any given
262+
# node. We choose a value here which gives decent enough coverage for
263+
# fairly well behaved graphs, and still limits the walk complexity to be
264+
# linear in nature.
265+
cur_weights = weights.get(node, [])
266+
if len(cur_weights) >= 5:
267+
return
268+
258269
# Time to visit the children!
259270
path.add(node)
260271
for child in graph.iter_children(node):
@@ -264,14 +275,14 @@ def visit(node: str | None) -> None:
264275
if node not in requirement_keys:
265276
return
266277

267-
last_known_parent_count = weights.get(node, 0)
268-
weights[node] = max(last_known_parent_count, len(path))
278+
cur_weights.append(len(path))
279+
weights[node] = cur_weights
269280

270-
# Simplify the graph, pruning leaves that have no dependencies.
271-
# This is needed for large graphs (say over 200 packages) because the
272-
# `visit` function is exponentially slower then, taking minutes.
281+
# Simplify the graph, pruning leaves that have no dependencies. This is
282+
# needed for large graphs (say over 200 packages) because the `visit`
283+
# function is slower for large/densely connected graphs, taking minutes.
273284
# See https://github.com/pypa/pip/issues/10557
274-
# We will loop until we explicitly break the loop.
285+
# We repeat the pruning step until we have no more leaves to remove.
275286
while True:
276287
leaves = set()
277288
for key in graph:
@@ -291,12 +302,13 @@ def visit(node: str | None) -> None:
291302
for leaf in leaves:
292303
if leaf not in requirement_keys:
293304
continue
294-
weights[leaf] = weight
305+
weights[leaf] = [weight]
295306
# Remove the leaves from the graph, making it simpler.
296307
for leaf in leaves:
297308
graph.remove(leaf)
298309

299-
# Visit the remaining graph.
310+
# Visit the remaining graph, this will only have nodes to handle if the
311+
# graph had a cycle in it, which the pruning step above could not handle.
300312
# `None` is guaranteed to be the root node by resolvelib.
301313
visit(None)
302314

@@ -305,7 +317,9 @@ def visit(node: str | None) -> None:
305317
difference = set(weights.keys()).difference(requirement_keys)
306318
assert not difference, difference
307319

308-
return weights
320+
# Now give back all the weights, choosing the largest ones from what we
321+
# accumulated.
322+
return {node : max(wgts) for (node, wgts) in weights.items()}
309323

310324

311325
def _req_set_item_sorter(

0 commit comments

Comments
 (0)