Skip to content

Commit b22b8a7

Browse files
committed
Merge branch 'bugfix/95-perf' into 'master'
Performance optimization of distribution discovery Closes #95 See merge request python-devs/importlib_metadata!106
2 parents e098c1f + e24ba43 commit b22b8a7

File tree

3 files changed

+79
-40
lines changed

3 files changed

+79
-40
lines changed

importlib_metadata/__init__.py

Lines changed: 72 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import operator
1111
import functools
1212
import itertools
13+
import posixpath
1314
import collections
1415

1516
from ._compat import (
@@ -23,7 +24,6 @@
2324
NotADirectoryError,
2425
PermissionError,
2526
pathlib,
26-
PYPY_OPEN_BUG,
2727
ModuleNotFoundError,
2828
MetaPathFinder,
2929
email_message_from_string,
@@ -389,10 +389,6 @@ def path(self):
389389
"""
390390
return vars(self).get('path', sys.path)
391391

392-
@property
393-
def pattern(self):
394-
return '.*' if self.name is None else re.escape(self.name)
395-
396392
@abc.abstractmethod
397393
def find_distributions(self, context=Context()):
398394
"""
@@ -404,6 +400,73 @@ def find_distributions(self, context=Context()):
404400
"""
405401

406402

403+
class FastPath:
404+
"""
405+
Micro-optimized class for searching a path for
406+
children.
407+
"""
408+
409+
def __init__(self, root):
410+
self.root = root
411+
412+
def joinpath(self, child):
413+
return pathlib.Path(self.root, child)
414+
415+
def children(self):
416+
with suppress(Exception):
417+
return os.listdir(self.root or '')
418+
with suppress(Exception):
419+
return self.zip_children()
420+
return []
421+
422+
def zip_children(self):
423+
zip_path = zipp.Path(self.root)
424+
names = zip_path.root.namelist()
425+
self.joinpath = zip_path.joinpath
426+
427+
return (
428+
posixpath.split(child)[0]
429+
for child in names
430+
)
431+
432+
def is_egg(self, search):
433+
root_n_low = os.path.split(self.root)[1].lower()
434+
435+
return (
436+
root_n_low == search.normalized + '.egg'
437+
or root_n_low.startswith(search.prefix)
438+
and root_n_low.endswith('.egg'))
439+
440+
def search(self, name):
441+
for child in self.children():
442+
n_low = child.lower()
443+
if (n_low in name.exact_matches
444+
or n_low.startswith(name.prefix)
445+
and n_low.endswith(name.suffixes)
446+
# legacy case:
447+
or self.is_egg(name) and n_low == 'egg-info'):
448+
yield self.joinpath(child)
449+
450+
451+
class Prepared:
452+
"""
453+
A prepared search for metadata on a possibly-named package.
454+
"""
455+
normalized = ''
456+
prefix = ''
457+
suffixes = '.dist-info', '.egg-info'
458+
exact_matches = [''][:0]
459+
460+
def __init__(self, name):
461+
self.name = name
462+
if name is None:
463+
return
464+
self.normalized = name.lower().replace('-', '_')
465+
self.prefix = self.normalized + '-'
466+
self.exact_matches = [
467+
self.normalized + suffix for suffix in self.suffixes]
468+
469+
407470
@install
408471
class MetadataPathFinder(NullFinder, DistributionFinder):
409472
"""A degenerate finder for distribution packages on the file system.
@@ -421,45 +484,17 @@ def find_distributions(self, context=DistributionFinder.Context()):
421484
(or all names if ``None`` indicated) along the paths in the list
422485
of directories ``context.path``.
423486
"""
424-
found = self._search_paths(context.pattern, context.path)
487+
found = self._search_paths(context.name, context.path)
425488
return map(PathDistribution, found)
426489

427490
@classmethod
428-
def _search_paths(cls, pattern, paths):
491+
def _search_paths(cls, name, paths):
429492
"""Find metadata directories in paths heuristically."""
430493
return itertools.chain.from_iterable(
431-
cls._search_path(path, pattern)
432-
for path in map(cls._switch_path, paths)
494+
path.search(Prepared(name))
495+
for path in map(FastPath, paths)
433496
)
434497

435-
@staticmethod
436-
def _switch_path(path):
437-
if not PYPY_OPEN_BUG or os.path.isfile(path): # pragma: no branch
438-
with suppress(Exception):
439-
return zipp.Path(path)
440-
return pathlib.Path(path)
441-
442-
@classmethod
443-
def _matches_info(cls, normalized, item):
444-
template = r'{pattern}(-.*)?\.(dist|egg)-info'
445-
manifest = template.format(pattern=normalized)
446-
return re.match(manifest, item.name, flags=re.IGNORECASE)
447-
448-
@classmethod
449-
def _matches_legacy(cls, normalized, item):
450-
template = r'{pattern}-.*\.egg[\\/]EGG-INFO'
451-
manifest = template.format(pattern=normalized)
452-
return re.search(manifest, str(item), flags=re.IGNORECASE)
453-
454-
@classmethod
455-
def _search_path(cls, root, pattern):
456-
if not root.is_dir():
457-
return ()
458-
normalized = pattern.replace('-', '_')
459-
return (item for item in root.iterdir()
460-
if cls._matches_info(normalized, item)
461-
or cls._matches_legacy(normalized, item))
462-
463498

464499
class PathDistribution(Distribution):
465500
def __init__(self, path):

importlib_metadata/_compat.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,6 @@ def py2_message_from_string(text): # nocoverpy3
111111
email.message_from_string
112112
)
113113

114-
# https://bitbucket.org/pypy/pypy/issues/3021/ioopen-directory-leaks-a-file-descriptor
115-
PYPY_OPEN_BUG = getattr(sys, 'pypy_version_info', (9, 9, 9))[:3] <= (7, 1, 1)
116-
117114

118115
class PyPy_repr:
119116
"""

importlib_metadata/docs/changelog.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
importlib_metadata NEWS
33
=========================
44

5+
v1.4.0
6+
======
7+
8+
* Through careful optimization, ``distribution()`` is
9+
3-4x faster. Thanks to Antony Lee for the
10+
contribution. Closes #95.
11+
512
v1.3.0
613
======
714

0 commit comments

Comments
 (0)