Skip to content

Commit ceeccee

Browse files
ichard26uranusjr
andauthored
importlib: Read distribution name/version from metadata directory names, if possible (#12656)
importlib does not cache metadata in-memory, so querying even simple attributes like distribution names and versions can quickly become expensive (as each access requires reading METADATA). Fortunately, `Distribution.canonical_name` is optimized to parse the metadata directory name to query the name if possible. This commit extends this optimization to the finder implementation and version attribute. .egg-info directory names tend to not include the version so they are not considered for optimizing version lookup. simplewheel-2.0-1-py2.py3-none-any.whl had to be modified to rename the .dist-info directory which mistakenly included the wheel build tag (in violation of the wheel specification). simplewheel/__init__.py simplewheel-2.0-1.dist-info/DESCRIPTION.rst simplewheel-2.0-1.dist-info/metadata.json simplewheel-2.0-1.dist-info/top_level.txt simplewheel-2.0-1.dist-info/WHEEL simplewheel-2.0-1.dist-info/METADATA simplewheel-2.0-1.dist-info/RECORD Otherwise, it was mistaken for part of the version and led pip to think the wheel was a post-release, breaking tests... Co-authored-by: Tzu-ping Chung <[email protected]>
1 parent 601bcf8 commit ceeccee

File tree

8 files changed

+51
-27
lines changed

8 files changed

+51
-27
lines changed

news/12656.feature.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Improve discovery performance of installed packages when the
2+
``importlib.metadata`` backend is used to load distribution metadata
3+
(used by default under Python 3.11+).

news/aa82171b-1578-4128-8db3-9aa72b3a6a84.trivial.rst

Whitespace-only changes.

src/pip/_internal/metadata/importlib/_compat.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import importlib.metadata
2-
from typing import Any, Optional, Protocol, cast
2+
import os
3+
from typing import Any, Optional, Protocol, Tuple, cast
4+
5+
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
36

47

58
class BadMetadata(ValueError):
@@ -43,13 +46,40 @@ def get_info_location(d: importlib.metadata.Distribution) -> Optional[BasePath]:
4346
return getattr(d, "_path", None)
4447

4548

46-
def get_dist_name(dist: importlib.metadata.Distribution) -> str:
47-
"""Get the distribution's project name.
49+
def parse_name_and_version_from_info_directory(
50+
dist: importlib.metadata.Distribution,
51+
) -> Tuple[Optional[str], Optional[str]]:
52+
"""Get a name and version from the metadata directory name.
53+
54+
This is much faster than reading distribution metadata.
55+
"""
56+
info_location = get_info_location(dist)
57+
if info_location is None:
58+
return None, None
59+
60+
stem, suffix = os.path.splitext(info_location.name)
61+
if suffix == ".dist-info":
62+
name, sep, version = stem.partition("-")
63+
if sep:
64+
return name, version
65+
66+
if suffix == ".egg-info":
67+
name = stem.split("-", 1)[0]
68+
return name, None
69+
70+
return None, None
71+
72+
73+
def get_dist_canonical_name(dist: importlib.metadata.Distribution) -> NormalizedName:
74+
"""Get the distribution's normalized name.
4875
4976
The ``name`` attribute is only available in Python 3.10 or later. We are
5077
targeting exactly that, but Mypy does not know this.
5178
"""
79+
if name := parse_name_and_version_from_info_directory(dist)[0]:
80+
return canonicalize_name(name)
81+
5282
name = cast(Any, dist).name
5383
if not isinstance(name, str):
5484
raise BadMetadata(dist, reason="invalid metadata entry 'name'")
55-
return name
85+
return canonicalize_name(name)

src/pip/_internal/metadata/importlib/_dists.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import email.message
22
import importlib.metadata
3-
import os
43
import pathlib
54
import zipfile
65
from typing import (
@@ -31,7 +30,11 @@
3130
from pip._internal.utils.temp_dir import TempDirectory
3231
from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file
3332

34-
from ._compat import BasePath, get_dist_name
33+
from ._compat import (
34+
BasePath,
35+
get_dist_canonical_name,
36+
parse_name_and_version_from_info_directory,
37+
)
3538

3639

3740
class WheelDistribution(importlib.metadata.Distribution):
@@ -154,25 +157,14 @@ def installed_location(self) -> Optional[str]:
154157
return None
155158
return normalize_path(str(self._installed_location))
156159

157-
def _get_dist_name_from_location(self) -> Optional[str]:
158-
"""Try to get the name from the metadata directory name.
159-
160-
This is much faster than reading metadata.
161-
"""
162-
if self._info_location is None:
163-
return None
164-
stem, suffix = os.path.splitext(self._info_location.name)
165-
if suffix not in (".dist-info", ".egg-info"):
166-
return None
167-
return stem.split("-", 1)[0]
168-
169160
@property
170161
def canonical_name(self) -> NormalizedName:
171-
name = self._get_dist_name_from_location() or get_dist_name(self._dist)
172-
return canonicalize_name(name)
162+
return get_dist_canonical_name(self._dist)
173163

174164
@property
175165
def version(self) -> Version:
166+
if version := parse_name_and_version_from_info_directory(self._dist)[1]:
167+
return parse_version(version)
176168
return parse_version(self._dist.version)
177169

178170
@property

src/pip/_internal/metadata/importlib/_envs.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from pip._internal.utils.deprecation import deprecated
1616
from pip._internal.utils.filetypes import WHEEL_EXTENSION
1717

18-
from ._compat import BadMetadata, BasePath, get_dist_name, get_info_location
18+
from ._compat import BadMetadata, BasePath, get_dist_canonical_name, get_info_location
1919
from ._dists import Distribution
2020

2121
logger = logging.getLogger(__name__)
@@ -61,14 +61,13 @@ def _find_impl(self, location: str) -> Iterator[FoundResult]:
6161
for dist in importlib.metadata.distributions(path=[location]):
6262
info_location = get_info_location(dist)
6363
try:
64-
raw_name = get_dist_name(dist)
64+
name = get_dist_canonical_name(dist)
6565
except BadMetadata as e:
6666
logger.warning("Skipping %s due to %s", info_location, e.reason)
6767
continue
68-
normalized_name = canonicalize_name(raw_name)
69-
if normalized_name in self._found_names:
68+
if name in self._found_names:
7069
continue
71-
self._found_names.add(normalized_name)
70+
self._found_names.add(name)
7271
yield dist, info_location
7372

7473
def find(self, location: str) -> Iterator[BaseDistribution]:
Binary file not shown.

tests/functional/test_install.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1195,7 +1195,7 @@ def test_install_nonlocal_compatible_wheel(
11951195
)
11961196
assert result.returncode == SUCCESS
11971197

1198-
distinfo = Path("scratch") / "target" / "simplewheel-2.0-1.dist-info"
1198+
distinfo = Path("scratch") / "target" / "simplewheel-2.0.dist-info"
11991199
result.did_create(distinfo)
12001200

12011201
# Test install without --target

tests/functional/test_install_report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def test_install_report_basic(
3939
assert url.endswith("/packages/simplewheel-2.0-1-py2.py3-none-any.whl")
4040
assert (
4141
simplewheel_report["download_info"]["archive_info"]["hash"]
42-
== "sha256=191d6520d0570b13580bf7642c97ddfbb46dd04da5dd2cf7bef9f32391dfe716"
42+
== "sha256=71e1ca6b16ae3382a698c284013f66504f2581099b2ce4801f60e9536236ceee"
4343
)
4444

4545

0 commit comments

Comments
 (0)