Skip to content

Commit c05d316

Browse files
committed
Microoptimize MetadataPathFinder.
This speeds up `importlib_metadata.distribution()` ~4-6x both on near-empty venvs and on "well-populated" ones. The avoidance of both regexes and Path objects is necessary to achieve the speedup.
1 parent d9a5f95 commit c05d316

File tree

1 file changed

+31
-22
lines changed

1 file changed

+31
-22
lines changed

importlib_metadata/__init__.py

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ def path(self):
376376
return vars(self).get('path', sys.path)
377377

378378
@property
379-
def pattern(self):
379+
def pattern(self): # Now unused, could be deprecated?
380380
return '.*' if self.name is None else re.escape(self.name)
381381

382382
@abc.abstractmethod
@@ -407,14 +407,14 @@ def find_distributions(self, context=DistributionFinder.Context()):
407407
(or all names if ``None`` indicated) along the paths in the list
408408
of directories ``context.path``.
409409
"""
410-
found = self._search_paths(context.pattern, context.path)
410+
found = self._search_paths(context.name, context.path)
411411
return map(PathDistribution, found)
412412

413413
@classmethod
414-
def _search_paths(cls, pattern, paths):
414+
def _search_paths(cls, name, paths):
415415
"""Find metadata directories in paths heuristically."""
416416
return itertools.chain.from_iterable(
417-
cls._search_path(path, pattern)
417+
cls._search_path(path, name)
418418
for path in map(cls._switch_path, paths)
419419
)
420420

@@ -426,25 +426,34 @@ def _switch_path(path):
426426
return pathlib.Path(path)
427427

428428
@classmethod
429-
def _matches_info(cls, normalized, item):
430-
template = r'{pattern}(-.*)?\.(dist|egg)-info'
431-
manifest = template.format(pattern=normalized)
432-
return re.match(manifest, item.name, flags=re.IGNORECASE)
433-
434-
@classmethod
435-
def _matches_legacy(cls, normalized, item):
436-
template = r'{pattern}-.*\.egg[\\/]EGG-INFO'
437-
manifest = template.format(pattern=normalized)
438-
return re.search(manifest, str(item), flags=re.IGNORECASE)
439-
440-
@classmethod
441-
def _search_path(cls, root, pattern):
429+
def _search_path(cls, root, name):
442430
if not root.is_dir():
443-
return ()
444-
normalized = pattern.replace('-', '_')
445-
return (item for item in root.iterdir()
446-
if cls._matches_info(normalized, item)
447-
or cls._matches_legacy(normalized, item))
431+
return
432+
# This function is microoptimized by avoiding the use of regexes and
433+
# using strs rather than Path objects.
434+
if name is not None:
435+
normalized = name.lower().replace('-', '_')
436+
prefix = normalized + '-'
437+
else:
438+
normalized = prefix = ''
439+
suffixes = ('.dist-info', '.egg-info')
440+
exact_matches = [normalized + suffix for suffix in suffixes]
441+
if isinstance(root, zipp.Path):
442+
root_n_low = os.path.split(root.root.filename.lower())[1].lower()
443+
children = [path.name for path in root.iterdir()]
444+
else: # Normal Path.
445+
root_n_low = root.name.lower()
446+
children = os.listdir(str(root))
447+
root_is_egg = (
448+
root_n_low == normalized + '.egg'
449+
or root_n_low.startswith(prefix) and root_n_low.endswith('.egg'))
450+
for child in children:
451+
n_low = child.lower()
452+
if (n_low in exact_matches
453+
or n_low.startswith(prefix) and n_low.endswith(suffixes)
454+
# legacy case:
455+
or root_is_egg and n_low == 'egg-info'):
456+
yield root / child
448457

449458

450459
class PathDistribution(Distribution):

0 commit comments

Comments
 (0)