Merge pull request #13253 from notatallshaw/Use-new-narrow_requirement_selection-resolvelib-API-to-speed-up-resolution

notatallshaw · web-flow · commit 0d4ed13f528f · 2025-03-07T08:40:01.000-05:00
Use new `narrow requirement selection` resolvelib api to reduce cost of resolution
diff --git a/docs/html/topics/more-dependency-resolution.md b/docs/html/topics/more-dependency-resolution.md
@@ -132,6 +132,8 @@ operations:
 * `get_preference` - this provides information to the resolver to help it choose
   which requirement to look at "next" when working through the resolution
   process.
+* `narrow_requirement_selection` - this provides a way to limit the number of
+  identifiers passed to `get_preference`.
 * `find_matches` - given a set of constraints, determine what candidates exist
   that satisfy them. This is essentially where the finder interacts with the
   resolver.
@@ -140,19 +142,26 @@ operations:
 * `get_dependencies` - get the dependency metadata for a candidate. This is
   the implementation of the process of getting and reading package metadata.
 
-Of these methods, the only non-trivial one is the `get_preference` method. This
-implements the heuristics used to guide the resolution, telling it which
-requirement to try to satisfy next. It's this method that is responsible for
-trying to guess which route through the dependency tree will be most productive.
-As noted above, it's doing this with limited information. See the following
-diagram
+Of these methods, the only non-trivial ones are the `get_preference` and
+`narrow_requirement_selection` methods. These implement heuristics used
+to guide the resolution, telling it which requirement to try to satisfy next.
+It's these methods that are responsible for trying to guess which route through
+the dependency tree will be most productive. As noted above, it's doing this
+with limited information. See the following diagram:
 
 ![](deps.png)
 
 When the provider is asked to choose between the red requirements (A->B and
 A->C) it doesn't know anything about the dependencies of B or C (i.e., the
 grey parts of the graph).
 
+Pip's current implementation of the provider implements
+`narrow_requirement_selection` as follows:
+
+* If Requires-Python is present only consider that
+* If there are causes of resolution conflict (backtrack causes) then
+  only consider them until there are no longer any resolution conflicts
+
 Pip's current implementation of the provider implements `get_preference` as
 follows:
 
diff --git a/news/13253.feature.rst b/news/13253.feature.rst
@@ -0,0 +1,2 @@
+Speed up resolution by first only considering the preference of
+candidates that must be required to complete the resolution.
diff --git a/src/pip/_internal/resolution/resolvelib/provider.py b/src/pip/_internal/resolution/resolvelib/provider.py
@@ -103,6 +103,49 @@ def __init__(
     def identify(self, requirement_or_candidate: Union[Requirement, Candidate]) -> str:
         return requirement_or_candidate.name
 
+    def narrow_requirement_selection(
+        self,
+        identifiers: Iterable[str],
+        resolutions: Mapping[str, Candidate],
+        candidates: Mapping[str, Iterator[Candidate]],
+        information: Mapping[str, Iterator["PreferenceInformation"]],
+        backtrack_causes: Sequence["PreferenceInformation"],
+    ) -> Iterable[str]:
+        """Produce a subset of identifiers that should be considered before others.
+
+        Currently pip narrows the following selection:
+            * Requires-Python, if present is always returned by itself
+            * Backtrack causes are considered next because they can be identified
+              in linear time here, whereas because get_preference() is called
+              for each identifier, it would be quadratic to check for them there.
+              Further, the current backtrack causes likely need to be resolved
+              before other requirements as a resolution can't be found while
+              there is a conflict.
+        """
+        backtrack_identifiers = set()
+        for info in backtrack_causes:
+            backtrack_identifiers.add(info.requirement.name)
+            if info.parent is not None:
+                backtrack_identifiers.add(info.parent.name)
+
+        current_backtrack_causes = []
+        for identifier in identifiers:
+            # Requires-Python has only one candidate and the check is basically
+            # free, so we always do it first to avoid needless work if it fails.
+            # This skips calling get_preference() for all other identifiers.
+            if identifier == REQUIRES_PYTHON_IDENTIFIER:
+                return [identifier]
+
+            # Check if this identifier is a backtrack cause
+            if identifier in backtrack_identifiers:
+                current_backtrack_causes.append(identifier)
+                continue
+
+        if current_backtrack_causes:
+            return current_backtrack_causes
+
+        return identifiers
+
     def get_preference(
         self,
         identifier: str,
@@ -153,20 +196,9 @@ def get_preference(
         unfree = bool(operators)
         requested_order = self._user_requested.get(identifier, math.inf)
 
-        # Requires-Python has only one candidate and the check is basically
-        # free, so we always do it first to avoid needless work if it fails.
-        requires_python = identifier == REQUIRES_PYTHON_IDENTIFIER
-
-        # Prefer the causes of backtracking on the assumption that the problem
-        # resolving the dependency tree is related to the failures that caused
-        # the backtracking
-        backtrack_cause = self.is_backtrack_cause(identifier, backtrack_causes)
-
         return (
-            not requires_python,
             not direct,
             not pinned,
-            not backtrack_cause,
             requested_order,
             not unfree,
             identifier,
@@ -221,14 +253,3 @@ def is_satisfied_by(self, requirement: Requirement, candidate: Candidate) -> boo
     def get_dependencies(self, candidate: Candidate) -> Sequence[Requirement]:
         with_requires = not self._ignore_dependencies
         return [r for r in candidate.iter_dependencies(with_requires) if r is not None]
-
-    @staticmethod
-    def is_backtrack_cause(
-        identifier: str, backtrack_causes: Sequence["PreferenceInformation"]
-    ) -> bool:
-        for backtrack_cause in backtrack_causes:
-            if identifier == backtrack_cause.requirement.name:
-                return True
-            if backtrack_cause.parent and identifier == backtrack_cause.parent.name:
-                return True
-        return False
diff --git a/tests/unit/resolution_resolvelib/test_provider.py b/tests/unit/resolution_resolvelib/test_provider.py
@@ -1,5 +1,5 @@
 import math
-from typing import TYPE_CHECKING, Dict, Iterable, Optional, Sequence
+from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Sequence
 
 import pytest
 
@@ -36,61 +36,53 @@ def build_req_info(
 @pytest.mark.parametrize(
     "identifier, information, backtrack_causes, user_requested, expected",
     [
-        # Test case for REQUIRES_PYTHON_IDENTIFIER
-        (
-            REQUIRES_PYTHON_IDENTIFIER,
-            {REQUIRES_PYTHON_IDENTIFIER: [build_req_info("python")]},
-            [],
-            {},
-            (False, False, True, True, math.inf, True, REQUIRES_PYTHON_IDENTIFIER),
-        ),
         # Pinned package with "=="
         (
             "pinned-package",
             {"pinned-package": [build_req_info("pinned-package==1.0")]},
             [],
             {},
-            (True, False, False, True, math.inf, False, "pinned-package"),
+            (False, False, math.inf, False, "pinned-package"),
         ),
         # Star-specified package, i.e. with "*"
         (
             "star-specified-package",
             {"star-specified-package": [build_req_info("star-specified-package==1.*")]},
             [],
             {},
-            (True, False, True, True, math.inf, False, "star-specified-package"),
+            (False, True, math.inf, False, "star-specified-package"),
         ),
         # Package that caused backtracking
         (
             "backtrack-package",
             {"backtrack-package": [build_req_info("backtrack-package")]},
             [build_req_info("backtrack-package")],
             {},
-            (True, False, True, False, math.inf, True, "backtrack-package"),
+            (False, True, math.inf, True, "backtrack-package"),
         ),
         # Root package requested by user
         (
             "root-package",
             {"root-package": [build_req_info("root-package")]},
             [],
             {"root-package": 1},
-            (True, False, True, True, 1, True, "root-package"),
+            (False, True, 1, True, "root-package"),
         ),
         # Unfree package (with specifier operator)
         (
             "unfree-package",
             {"unfree-package": [build_req_info("unfree-package<1")]},
             [],
             {},
-            (True, False, True, True, math.inf, False, "unfree-package"),
+            (False, True, math.inf, False, "unfree-package"),
         ),
         # Free package (no operator)
         (
             "free-package",
             {"free-package": [build_req_info("free-package")]},
             [],
             {},
-            (True, False, True, True, math.inf, True, "free-package"),
+            (False, True, math.inf, True, "free-package"),
         ),
     ],
 )
@@ -115,3 +107,70 @@ def test_get_preference(
     )
 
     assert preference == expected, f"Expected {expected}, got {preference}"
+
+
+@pytest.mark.parametrize(
+    "identifiers, backtrack_causes, expected",
+    [
+        # REQUIRES_PYTHON_IDENTIFIER is present
+        (
+            [REQUIRES_PYTHON_IDENTIFIER, "package1", "package2", "backtrack-package"],
+            [build_req_info("backtrack-package")],
+            [REQUIRES_PYTHON_IDENTIFIER],
+        ),
+        # REQUIRES_PYTHON_IDENTIFIER is present after backtrack causes
+        (
+            ["package1", "package2", "backtrack-package", REQUIRES_PYTHON_IDENTIFIER],
+            [build_req_info("backtrack-package")],
+            [REQUIRES_PYTHON_IDENTIFIER],
+        ),
+        # Backtrack causes present (direct requirement)
+        (
+            ["package1", "package2", "backtrack-package"],
+            [build_req_info("backtrack-package")],
+            ["backtrack-package"],
+        ),
+        # Multiple backtrack causes
+        (
+            ["package1", "backtrack1", "backtrack2", "package2"],
+            [build_req_info("backtrack1"), build_req_info("backtrack2")],
+            ["backtrack1", "backtrack2"],
+        ),
+        # No special identifiers - return all
+        (
+            ["package1", "package2"],
+            [],
+            ["package1", "package2"],
+        ),
+        # Empty list of identifiers
+        (
+            [],
+            [],
+            [],
+        ),
+    ],
+)
+def test_narrow_requirement_selection(
+    identifiers: List[str],
+    backtrack_causes: Sequence["PreferenceInformation"],
+    expected: List[str],
+    factory: Factory,
+) -> None:
+    """Test that narrow_requirement_selection correctly prioritizes identifiers:
+    1. REQUIRES_PYTHON_IDENTIFIER (if present)
+    2. Backtrack causes (if present)
+    3. All other identifiers (as-is)
+    """
+    provider = PipProvider(
+        factory=factory,
+        constraints={},
+        ignore_dependencies=False,
+        upgrade_strategy="to-satisfy-only",
+        user_requested={},
+    )
+
+    result = provider.narrow_requirement_selection(
+        identifiers, {}, {}, {}, backtrack_causes
+    )
+
+    assert list(result) == expected, f"Expected {expected}, got {list(result)}"

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+Speed up resolution by first only considering the preference of`
	`2`	`+candidates that must be required to complete the resolution.`