Skip to content

Commit 13966ba

Browse files
authored
Merge pull request #563 from Carreau/os-wlk
2 parents 27f484e + 5712be8 commit 13966ba

File tree

3 files changed

+51
-9
lines changed

3 files changed

+51
-9
lines changed

docs/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ Release notes
55
Next release
66
------------
77

8+
* `DirectoryStore` now uses `os.scandir`, which should make listing large store
9+
faster, :issue:`563`
810
* Fix minor bug in `N5Store`.
911
By :user:`gsakkis`, :issue:`550`.
1012

zarr/storage.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -845,15 +845,27 @@ def __eq__(self, other):
845845

846846
def keys(self):
847847
if os.path.exists(self.path):
848-
directories = [(self.path, '')]
849-
while directories:
850-
dir_name, prefix = directories.pop()
851-
for name in os.listdir(dir_name):
852-
path = os.path.join(dir_name, name)
853-
if os.path.isfile(path):
854-
yield prefix + name
855-
elif os.path.isdir(path):
856-
directories.append((path, prefix + name + '/'))
848+
yield from self._keys_fast(self.path)
849+
850+
@staticmethod
851+
def _keys_fast(path, walker=os.walk):
852+
"""
853+
854+
Faster logic on platform where the separator is `/` and using
855+
`os.walk()` to decrease the number of stats.call.
856+
857+
"""
858+
it = iter(walker(path))
859+
d0, dirnames, filenames = next(it)
860+
if d0.endswith('/'):
861+
root_len = len(d0)
862+
else:
863+
root_len = len(d0)+1
864+
for f in filenames:
865+
yield f
866+
for dirpath, _, filenames in it:
867+
for f in filenames:
868+
yield dirpath[root_len:].replace('\\', '/')+'/'+f
857869

858870
def __iter__(self):
859871
return self.keys()

zarr/tests/test_storage.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,34 @@ def test_normalize_keys(self):
827827
assert 'FOO' in store
828828
assert 'foo' in store
829829

830+
def test_listing_keys_slash(self):
831+
832+
def mock_walker_slash(_path):
833+
yield from [
834+
# trailing slash in first key
835+
('root_with_slash/', ['d1', 'g1'], ['.zgroup']),
836+
('root_with_slash/d1', [], ['.zarray']),
837+
('root_with_slash/g1', [], ['.zgroup'])
838+
]
839+
840+
res = set(DirectoryStore._keys_fast('root_with_slash/', walker=mock_walker_slash))
841+
assert res == {'.zgroup', 'g1/.zgroup', 'd1/.zarray'}
842+
843+
def test_listing_keys_no_slash(self):
844+
845+
def mock_walker_no_slash(_path):
846+
yield from [
847+
# no trainling slash in first key
848+
('root_with_no_slash', ['d1', 'g1'], ['.zgroup']),
849+
('root_with_no_slash/d1', [], ['.zarray']),
850+
('root_with_no_slash/g1', [], ['.zgroup'])
851+
]
852+
853+
res = set(
854+
DirectoryStore._keys_fast('root_with_no_slash', mock_walker_no_slash)
855+
)
856+
assert res == {'.zgroup', 'g1/.zgroup', 'd1/.zarray'}
857+
830858

831859
@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec")
832860
class TestFSStore(StoreTests, unittest.TestCase):

0 commit comments

Comments
 (0)