Skip to content

Commit 0da7579

Browse files
committed
More speedup via mtime-base caching.
Caching based on mtime is similar to the one done on importlib's FileFinder. Locally, on a large-ish environment, this speeds up repeated calls to `distribution("pip")` ~10x.
1 parent 61a265c commit 0da7579

File tree

2 files changed

+58
-39
lines changed

2 files changed

+58
-39
lines changed

importlib_metadata/__init__.py

Lines changed: 51 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -464,9 +464,15 @@ class FastPath:
464464
children.
465465
"""
466466

467-
def __init__(self, root):
467+
@functools.lru_cache() # type: ignore
468+
def __new__(cls, root):
469+
self = object().__new__(cls)
468470
self.root = str(root)
469471
self.base = os.path.basename(self.root).lower()
472+
self.last_mtime = -1
473+
self.infos = {}
474+
self.eggs = {}
475+
return self
470476

471477
def joinpath(self, child):
472478
return pathlib.Path(self.root, child)
@@ -482,15 +488,47 @@ def zip_children(self):
482488
zip_path = zipp.Path(self.root)
483489
names = zip_path.root.namelist()
484490
self.joinpath = zip_path.joinpath
485-
486491
return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names)
487492

488-
def search(self, name):
489-
return (
490-
self.joinpath(child)
491-
for child in self.children()
492-
if name.matches(child, self.base)
493-
)
493+
def update_cache(self):
494+
root = self.root or "."
495+
try:
496+
mtime = os.stat(root).st_mtime
497+
except OSError:
498+
self.infos.clear()
499+
self.eggs.clear()
500+
self.last_mtime = -1
501+
return
502+
if mtime == self.last_mtime:
503+
return
504+
self.infos.clear()
505+
self.eggs.clear()
506+
base_is_egg = self.base.endswith(".egg")
507+
for child in self.children():
508+
low = child.lower()
509+
if low.endswith((".dist-info", ".egg-info")):
510+
# rpartition is faster than splitext and suitable for this purpose.
511+
name = low.rpartition(".")[0].partition("-")[0]
512+
normalized = Prepared.normalize(name)
513+
self.infos.setdefault(normalized, []).append(child)
514+
elif base_is_egg and low == "egg-info":
515+
name = self.base.rpartition(".")[0].partition("-")[0]
516+
legacy_normalized = Prepared.legacy_normalize(name)
517+
self.eggs.setdefault(legacy_normalized, []).append(child)
518+
self.last_mtime = mtime
519+
520+
def search(self, prepared):
521+
self.update_cache()
522+
if prepared.name:
523+
infos = self.infos.get(prepared.normalized, [])
524+
yield from map(self.joinpath, infos)
525+
eggs = self.eggs.get(prepared.legacy_normalized, [])
526+
yield from map(self.joinpath, eggs)
527+
else:
528+
for infos in self.infos.values():
529+
yield from map(self.joinpath, infos)
530+
for eggs in self.eggs.values():
531+
yield from map(self.joinpath, eggs)
494532

495533

496534
class Prepared:
@@ -499,22 +537,14 @@ class Prepared:
499537
"""
500538

501539
normalized = None
502-
suffixes = 'dist-info', 'egg-info'
503-
exact_matches = [''][:0]
504-
egg_prefix = ''
505-
versionless_egg_name = ''
540+
legacy_normalized = None
506541

507542
def __init__(self, name):
508543
self.name = name
509544
if name is None:
510545
return
511546
self.normalized = self.normalize(name)
512-
self.exact_matches = [
513-
self.normalized + '.' + suffix for suffix in self.suffixes
514-
]
515-
legacy_normalized = self.legacy_normalize(self.name)
516-
self.egg_prefix = legacy_normalized + '-'
517-
self.versionless_egg_name = legacy_normalized + '.egg'
547+
self.legacy_normalized = self.legacy_normalize(name)
518548

519549
@staticmethod
520550
def normalize(name):
@@ -531,27 +561,6 @@ def legacy_normalize(name):
531561
"""
532562
return name.lower().replace('-', '_')
533563

534-
def matches(self, cand, base):
535-
low = cand.lower()
536-
# rpartition is faster than splitext and suitable for this purpose.
537-
pre, _, ext = low.rpartition('.')
538-
name, _, rest = pre.partition('-')
539-
return (
540-
low in self.exact_matches
541-
or ext in self.suffixes
542-
and (not self.normalized or name.replace('.', '_') == self.normalized)
543-
# legacy case:
544-
or self.is_egg(base)
545-
and low == 'egg-info'
546-
)
547-
548-
def is_egg(self, base):
549-
return (
550-
base == self.versionless_egg_name
551-
or base.startswith(self.egg_prefix)
552-
and base.endswith('.egg')
553-
)
554-
555564

556565
@install
557566
class MetadataPathFinder(NullFinder, DistributionFinder):
@@ -581,6 +590,9 @@ def _search_paths(cls, name, paths):
581590
path.search(prepared) for path in map(FastPath, paths)
582591
)
583592

593+
def invalidate_caches(cls):
594+
FastPath.__new__.cache_clear()
595+
584596

585597
class PathDistribution(Distribution):
586598
def __init__(self, path):

tests/test_api.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import re
22
import textwrap
33
import unittest
4+
import importlib
45

56
from . import fixtures
67
from importlib_metadata import (
@@ -224,3 +225,9 @@ def test_distribution_at_str(self):
224225
dist_info_path = self.site_dir / 'distinfo_pkg-1.0.0.dist-info'
225226
dist = Distribution.at(str(dist_info_path))
226227
assert dist.version == '1.0.0'
228+
229+
230+
class InvalidateCache(unittest.TestCase):
231+
def test_invalidate_cache(self):
232+
# No externally observable behavior, but ensures test coverage...
233+
importlib.invalidate_caches()

0 commit comments

Comments
 (0)