Skip to content

Commit 6d0d514

Browse files
authored
refactor(pip_install): make the API surface of deps parsing smaller (#1657)
With this change we also fix a minor bug where the self edge dependencies within the wheels are correctly handled as per unit tests. Also refactor the code to use the fact that the selects are chosen based on the specificity of the condition. This can make the addition of deps incrementally correct without the need for extra processing in the `build` method. This should hopefully make the implementation more future-proof as it makes use of these generic concepts and is likely to help to extend this code to work with selecting on multiple Python versions. I have added a test to ensure coverage of the new code.
1 parent 3d68f6c commit 6d0d514

File tree

2 files changed

+168
-183
lines changed

2 files changed

+168
-183
lines changed

python/pip_install/tools/wheel_installer/wheel.py

Lines changed: 88 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from dataclasses import dataclass
2323
from enum import Enum
2424
from pathlib import Path
25-
from typing import Any, Dict, List, Optional, Set, Tuple, Union
25+
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union
2626

2727
import installer
2828
from packaging.requirements import Requirement
@@ -86,6 +86,17 @@ def host(cls) -> List["Platform"]:
8686
)
8787
]
8888

89+
def all_specializations(self) -> Iterator["Platform"]:
90+
"""Return the platform itself and all its unambiguous specializations.
91+
92+
For more info about specializations see
93+
https://bazel.build/docs/configurable-attributes
94+
"""
95+
yield self
96+
if self.arch is None:
97+
for arch in Arch:
98+
yield Platform(os=self.os, arch=arch)
99+
89100
def __lt__(self, other: Any) -> bool:
90101
"""Add a comparison method, so that `sorted` returns the most specialized platforms first."""
91102
if not isinstance(other, Platform) or other is None:
@@ -228,57 +239,94 @@ class Deps:
228239
def __init__(
229240
self,
230241
name: str,
242+
*,
243+
requires_dist: Optional[List[str]],
231244
extras: Optional[Set[str]] = None,
232245
platforms: Optional[Set[Platform]] = None,
233246
):
234247
self.name: str = Deps._normalize(name)
248+
self._platforms: Set[Platform] = platforms or set()
249+
250+
# Sort so that the dictionary order in the FrozenDeps is deterministic
251+
# without the final sort because Python retains insertion order. That way
252+
# the sorting by platform is limited within the Platform class itself and
253+
# the unit-tests for the Deps can be simpler.
254+
reqs = sorted(
255+
(Requirement(wheel_req) for wheel_req in requires_dist),
256+
key=lambda x: f"{x.name}:{sorted(x.extras)}",
257+
)
258+
259+
want_extras = self._resolve_extras(reqs, extras)
260+
261+
# Then add all of the requirements in order
235262
self._deps: Set[str] = set()
236263
self._select: Dict[Platform, Set[str]] = defaultdict(set)
237-
self._want_extras: Set[str] = extras or {""} # empty strings means no extras
238-
self._platforms: Set[Platform] = platforms or set()
264+
for req in reqs:
265+
self._add_req(req, want_extras)
239266

240267
def _add(self, dep: str, platform: Optional[Platform]):
241268
dep = Deps._normalize(dep)
242269

243-
# Packages may create dependency cycles when specifying optional-dependencies / 'extras'.
244-
# Example: github.com/google/etils/blob/a0b71032095db14acf6b33516bca6d885fe09e35/pyproject.toml#L32.
270+
# Self-edges are processed in _resolve_extras
245271
if dep == self.name:
246272
return
247273

248-
if platform:
249-
self._select[platform].add(dep)
250-
else:
274+
if not platform:
251275
self._deps.add(dep)
276+
return
252277

253-
@staticmethod
254-
def _normalize(name: str) -> str:
255-
return re.sub(r"[-_.]+", "_", name).lower()
278+
# Add the platform-specific dep
279+
self._select[platform].add(dep)
256280

257-
def add(self, *wheel_reqs: str) -> None:
258-
reqs = [Requirement(wheel_req) for wheel_req in wheel_reqs]
281+
# Add the dep to specializations of the given platform if they
282+
# exist in the select statement.
283+
for p in platform.all_specializations():
284+
if p not in self._select:
285+
continue
259286

260-
# Resolve any extra extras due to self-edges
261-
self._want_extras = self._resolve_extras(reqs)
287+
self._select[p].add(dep)
262288

263-
# process self-edges first to resolve the extras used
264-
for req in reqs:
265-
self._add_req(req)
289+
if len(self._select[platform]) != 1:
290+
return
291+
292+
# We are adding a new item to the select and we need to ensure that
293+
# existing dependencies from less specialized platforms are propagated
294+
# to the newly added dependency set.
295+
for p, deps in self._select.items():
296+
# Check if the existing platform overlaps with the given platform
297+
if p == platform or platform not in p.all_specializations():
298+
continue
266299

267-
def _resolve_extras(self, reqs: List[Requirement]) -> Set[str]:
300+
self._select[platform].update(self._select[p])
301+
302+
@staticmethod
303+
def _normalize(name: str) -> str:
304+
return re.sub(r"[-_.]+", "_", name).lower()
305+
306+
def _resolve_extras(
307+
self, reqs: List[Requirement], extras: Optional[Set[str]]
308+
) -> Set[str]:
268309
"""Resolve extras which are due to depending on self[some_other_extra].
269310
270311
Some packages may have cyclic dependencies resulting from extras being used, one example is
271-
`elint`, where we have one set of extras as aliases for other extras
312+
`etils`, where we have one set of extras as aliases for other extras
272313
and we have an extra called 'all' that includes all other extras.
273314
315+
Example: github.com/google/etils/blob/a0b71032095db14acf6b33516bca6d885fe09e35/pyproject.toml#L32.
316+
274317
When the `requirements.txt` is generated by `pip-tools`, then it is likely that
275318
this step is not needed, but for other `requirements.txt` files this may be useful.
276319
277-
NOTE @aignas 2023-12-08: the extra resolution is not platform dependent, but
278-
in order for it to become platform dependent we would have to have separate targets for each extra in
279-
self._want_extras.
320+
NOTE @aignas 2023-12-08: the extra resolution is not platform dependent,
321+
but in order for it to become platform dependent we would have to have
322+
separate targets for each extra in extras.
280323
"""
281-
extras = self._want_extras
324+
325+
# Resolve any extra extras due to self-edges, empty string means no
326+
# extras The empty string in the set is just a way to make the handling
327+
# of no extras and a single extra easier and having a set of {"", "foo"}
328+
# is equivalent to having {"foo"}.
329+
extras = extras or {""}
282330

283331
self_reqs = []
284332
for req in reqs:
@@ -311,29 +359,34 @@ def _resolve_extras(self, reqs: List[Requirement]) -> Set[str]:
311359

312360
return extras
313361

314-
def _add_req(self, req: Requirement) -> None:
315-
extras = self._want_extras
316-
362+
def _add_req(self, req: Requirement, extras: Set[str]) -> None:
317363
if req.marker is None:
318364
self._add(req.name, None)
319365
return
320366

321367
marker_str = str(req.marker)
322368

369+
if not self._platforms:
370+
if any(req.marker.evaluate({"extra": extra}) for extra in extras):
371+
self._add(req.name, None)
372+
return
373+
323374
# NOTE @aignas 2023-12-08: in order to have reasonable select statements
324375
# we do have to have some parsing of the markers, so it begs the question
325376
# if packaging should be reimplemented in Starlark to have the best solution
326377
# for now we will implement it in Python and see what the best parsing result
327378
# can be before making this decision.
328-
if not self._platforms or not any(
379+
match_os = any(
329380
tag in marker_str
330381
for tag in [
331382
"os_name",
332383
"sys_platform",
333-
"platform_machine",
334384
"platform_system",
335385
]
336-
):
386+
)
387+
match_arch = "platform_machine" in marker_str
388+
389+
if not (match_os or match_arch):
337390
if any(req.marker.evaluate({"extra": extra}) for extra in extras):
338391
self._add(req.name, None)
339392
return
@@ -344,34 +397,15 @@ def _add_req(self, req: Requirement) -> None:
344397
):
345398
continue
346399

347-
if "platform_machine" in marker_str:
400+
if match_arch:
348401
self._add(req.name, plat)
349402
else:
350403
self._add(req.name, Platform(plat.os))
351404

352405
def build(self) -> FrozenDeps:
353-
if not self._select:
354-
return FrozenDeps(
355-
deps=sorted(self._deps),
356-
deps_select={},
357-
)
358-
359-
# Get all of the OS-specific dependencies applicable to all architectures
360-
select = {
361-
p: deps for p, deps in self._select.items() if deps and p.arch is None
362-
}
363-
# Now add them to all arch specific dependencies
364-
select.update(
365-
{
366-
p: deps | select.get(Platform(p.os), set())
367-
for p, deps in self._select.items()
368-
if deps and p.arch is not None
369-
}
370-
)
371-
372406
return FrozenDeps(
373407
deps=sorted(self._deps),
374-
deps_select={str(p): sorted(deps) for p, deps in sorted(select.items())},
408+
deps_select={str(p): sorted(deps) for p, deps in self._select.items()},
375409
)
376410

377411

@@ -429,15 +463,12 @@ def dependencies(
429463
extras_requested: Set[str] = None,
430464
platforms: Optional[Set[Platform]] = None,
431465
) -> FrozenDeps:
432-
dependency_set = Deps(
466+
return Deps(
433467
self.name,
434468
extras=extras_requested,
435469
platforms=platforms,
436-
)
437-
for wheel_req in self.metadata.get_all("Requires-Dist", []):
438-
dependency_set.add(wheel_req)
439-
440-
return dependency_set.build()
470+
requires_dist=self.metadata.get_all("Requires-Dist", []),
471+
).build()
441472

442473
def unzip(self, directory: str) -> None:
443474
installation_schemes = {

0 commit comments

Comments
 (0)