Skip to content

Commit c3ad577

Browse files
authored
Merge pull request #13424 from iamsrp-deshaw/main
Guard against pathologically connected dependency graphs
2 parents 4bb2bcb + 1dd0ebe commit c3ad577

File tree

2 files changed

+25
-10
lines changed

2 files changed

+25
-10
lines changed

news/13424.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Minor performance improvement getting the order to install a very large number of interdependent packages.

src/pip/_internal/resolution/resolvelib/resolver.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,24 @@ def get_topological_weights(
248248
requirement_keys.
249249
"""
250250
path: set[str | None] = set()
251-
weights: dict[str | None, int] = {}
251+
weights: dict[str | None, list[int]] = {}
252252

253253
def visit(node: str | None) -> None:
254254
if node in path:
255255
# We hit a cycle, so we'll break it here.
256256
return
257257

258+
# The walk is exponential and for pathologically connected graphs (which
259+
# are the ones most likely to contain cycles in the first place) it can
260+
# take until the heat-death of the universe. To counter this we limit
261+
# the number of attempts to visit (i.e. traverse through) any given
262+
# node. We choose a value here which gives decent enough coverage for
263+
# fairly well behaved graphs, and still limits the walk complexity to be
264+
# linear in nature.
265+
cur_weights = weights.get(node, [])
266+
if len(cur_weights) >= 5:
267+
return
268+
258269
# Time to visit the children!
259270
path.add(node)
260271
for child in graph.iter_children(node):
@@ -264,14 +275,14 @@ def visit(node: str | None) -> None:
264275
if node not in requirement_keys:
265276
return
266277

267-
last_known_parent_count = weights.get(node, 0)
268-
weights[node] = max(last_known_parent_count, len(path))
278+
cur_weights.append(len(path))
279+
weights[node] = cur_weights
269280

270-
# Simplify the graph, pruning leaves that have no dependencies.
271-
# This is needed for large graphs (say over 200 packages) because the
272-
# `visit` function is exponentially slower then, taking minutes.
281+
# Simplify the graph, pruning leaves that have no dependencies. This is
282+
# needed for large graphs (say over 200 packages) because the `visit`
283+
# function is slower for large/densely connected graphs, taking minutes.
273284
# See https://github.com/pypa/pip/issues/10557
274-
# We will loop until we explicitly break the loop.
285+
# We repeat the pruning step until we have no more leaves to remove.
275286
while True:
276287
leaves = set()
277288
for key in graph:
@@ -291,12 +302,13 @@ def visit(node: str | None) -> None:
291302
for leaf in leaves:
292303
if leaf not in requirement_keys:
293304
continue
294-
weights[leaf] = weight
305+
weights[leaf] = [weight]
295306
# Remove the leaves from the graph, making it simpler.
296307
for leaf in leaves:
297308
graph.remove(leaf)
298309

299-
# Visit the remaining graph.
310+
# Visit the remaining graph, this will only have nodes to handle if the
311+
# graph had a cycle in it, which the pruning step above could not handle.
300312
# `None` is guaranteed to be the root node by resolvelib.
301313
visit(None)
302314

@@ -305,7 +317,9 @@ def visit(node: str | None) -> None:
305317
difference = set(weights.keys()).difference(requirement_keys)
306318
assert not difference, difference
307319

308-
return weights
320+
# Now give back all the weights, choosing the largest ones from what we
321+
# accumulated.
322+
return {node: max(wgts) for (node, wgts) in weights.items()}
309323

310324

311325
def _req_set_item_sorter(

0 commit comments

Comments
 (0)