Skip to content

Commit b3cda2b

Browse files
rabernatjoshmoore
andauthored
Fix double counting V3 groups bug (#1268)
* fix double counting groups bug * add release notes Co-authored-by: Josh Moore <[email protected]>
1 parent 596d9c0 commit b3cda2b

File tree

3 files changed

+35
-18
lines changed

3 files changed

+35
-18
lines changed

docs/release.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ Unreleased
1414
# .. warning::
1515
# Pre-release! Use :command:`pip install --pre zarr` to evaluate this release.
1616
17+
* Fix bug that caused double counting of groups in ``groups()`` and ``group_keys()``
18+
methods with V3 stores.
19+
By :user:`Ryan Abernathey <rabernat>` :issue:`1228`.
20+
21+
.. _release_2.13.2:
22+
1723
Maintenance
1824
~~~~~~~~~~~
1925

zarr/hierarchy.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -511,9 +511,15 @@ def group_keys(self):
511511
else:
512512
dir_name = meta_root + self._path
513513
group_sfx = '.group' + self._metadata_key_suffix
514-
for key in sorted(listdir(self._store, dir_name)):
514+
# The fact that we call sorted means this can't be a streaming generator.
515+
# The keys are already in memory.
516+
all_keys = sorted(listdir(self._store, dir_name))
517+
for key in all_keys:
515518
if key.endswith(group_sfx):
516519
key = key[:-len(group_sfx)]
520+
if key in all_keys:
521+
# otherwise we will double count this group
522+
continue
517523
path = self._key_prefix + key
518524
if path.endswith(".array" + self._metadata_key_suffix):
519525
# skip array keys
@@ -552,24 +558,16 @@ def groups(self):
552558
zarr_version=self._version)
553559

554560
else:
555-
dir_name = meta_root + self._path
556-
group_sfx = '.group' + self._metadata_key_suffix
557-
for key in sorted(listdir(self._store, dir_name)):
558-
if key.endswith(group_sfx):
559-
key = key[:-len(group_sfx)]
561+
for key in self.group_keys():
560562
path = self._key_prefix + key
561-
if path.endswith(".array" + self._metadata_key_suffix):
562-
# skip array keys
563-
continue
564-
if contains_group(self._store, path, explicit_only=False):
565-
yield key, Group(
566-
self._store,
567-
path=path,
568-
read_only=self._read_only,
569-
chunk_store=self._chunk_store,
570-
cache_attrs=self.attrs.cache,
571-
synchronizer=self._synchronizer,
572-
zarr_version=self._version)
563+
yield key, Group(
564+
self._store,
565+
path=path,
566+
read_only=self._read_only,
567+
chunk_store=self._chunk_store,
568+
cache_attrs=self.attrs.cache,
569+
synchronizer=self._synchronizer,
570+
zarr_version=self._version)
573571

574572
def array_keys(self, recurse=False):
575573
"""Return an iterator over member names for arrays only.

zarr/tests/test_hierarchy.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,19 @@ def visitor1(val, *args):
770770

771771
g1.store.close()
772772

773+
# regression test for https://github.com/zarr-developers/zarr-python/issues/1228
774+
def test_double_counting_group_v3(self):
775+
root_group = self.create_group()
776+
group_names = ["foo", "foo-", "foo_"]
777+
for name in group_names:
778+
sub_group = root_group.create_group(name)
779+
sub_group.create("bar", shape=10, dtype="i4")
780+
assert list(root_group.group_keys()) == sorted(group_names)
781+
assert list(root_group.groups()) == [
782+
(name, root_group[name])
783+
for name in sorted(group_names)
784+
]
785+
773786
def test_empty_getitem_contains_iterators(self):
774787
# setup
775788
g = self.create_group()

0 commit comments

Comments
 (0)