Skip to content

Commit b900a0e

Browse files
committed
use Store as input rather than StoreLike
1 parent b702060 commit b900a0e

File tree

3 files changed

+136
-98
lines changed

3 files changed

+136
-98
lines changed

src/zarr/core/metadata/converter/cli.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import zarr.core.metadata.converter.migrate_to_v3 as migrate_metadata
88
from zarr.core.sync import sync
9+
from zarr.storage._common import make_store
910

1011
app = typer.Typer()
1112

@@ -100,13 +101,19 @@ def migrate(
100101
"Dry run enabled - no new files will be created or changed. Log of files that would be created on a real run:"
101102
)
102103

103-
write_store = input_store if output_store is None else output_store
104+
input_zarr_store = sync(make_store(input_store, mode="r+"))
105+
106+
if output_store is not None:
107+
output_zarr_store = sync(make_store(output_store, mode="w-"))
108+
write_store = output_zarr_store
109+
else:
110+
write_store = input_zarr_store
104111

105112
if overwrite:
106113
sync(migrate_metadata.remove_metadata(write_store, 3, force=force, dry_run=dry_run))
107114

108115
migrate_metadata.migrate_v2_to_v3(
109-
input_store=input_store, output_store=output_store, dry_run=dry_run
116+
input_store=input_zarr_store, output_store=output_zarr_store, dry_run=dry_run
110117
)
111118

112119
if remove_v2_metadata:
@@ -150,10 +157,11 @@ def remove_metadata(
150157
logger.info(
151158
"Dry run enabled - no files will be deleted or changed. Log of files that would be deleted on a real run:"
152159
)
160+
input_zarr_store = sync(make_store(store, mode="r+"))
153161

154162
sync(
155163
migrate_metadata.remove_metadata(
156-
store=store,
164+
store=input_zarr_store,
157165
zarr_format=cast(Literal[2, 3], int(zarr_format[1:])),
158166
force=force,
159167
dry_run=dry_run,

src/zarr/core/metadata/converter/migrate_to_v3.py

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import asyncio
22
import logging
3-
from typing import Any, Literal, cast
3+
from typing import Literal, cast
44

55
import numcodecs.abc
66

77
import zarr
88
from zarr.abc.codec import ArrayArrayCodec, BytesBytesCodec, Codec
9+
from zarr.abc.store import Store
910
from zarr.codecs.blosc import BloscCodec, BloscShuffle
1011
from zarr.codecs.bytes import BytesCodec
1112
from zarr.codecs.gzip import GzipCodec
@@ -29,17 +30,15 @@
2930
from zarr.core.metadata.v3 import ArrayV3Metadata
3031
from zarr.core.sync import sync
3132
from zarr.registry import get_codec_class
32-
from zarr.storage import StoreLike
33-
from zarr.storage._common import StorePath, make_store_path
33+
from zarr.storage import StorePath
3434

3535
logger = logging.getLogger(__name__)
3636

3737

3838
def migrate_v2_to_v3(
3939
*,
40-
input_store: StoreLike,
41-
output_store: StoreLike | None = None,
42-
storage_options: dict[str, Any] | None = None,
40+
input_store: Store,
41+
output_store: Store | None = None,
4342
dry_run: bool = False,
4443
) -> None:
4544
"""Migrate all v2 metadata in a zarr hierarchy to v3.
@@ -49,26 +48,20 @@ def migrate_v2_to_v3(
4948
5049
Parameters
5150
----------
52-
input_store : StoreLike
53-
Input Zarr to migrate - should be a store, path to directory in file system or name of zip file.
54-
output_store : StoreLike
51+
input_store : Store
52+
Input Zarr to migrate.
53+
output_store : Store, optional
5554
Output location to write v3 metadata (no array data will be copied). If not provided, v3 metadata will be
56-
written to input_store. Should be a store, path to directory in file system or name of zip file.
57-
storage_options : dict | None, optional
58-
If the store is backed by an fsspec-based implementation, then this dict will be passed to
59-
the Store constructor for that implementation. Ignored otherwise. Note - the same storage_options will
60-
be passed to both input_store and output_store (if provided).
55+
written to input_store.
6156
dry_run : bool, optional
6257
Enable a 'dry run' - files that would be created are logged, but no files are created or changed.
6358
"""
6459

65-
zarr_v2 = zarr.open(store=input_store, mode="r+", storage_options=storage_options)
60+
zarr_v2 = zarr.open(store=input_store, mode="r+")
6661

6762
if output_store is not None:
6863
# w- access to not allow overwrite of existing data
69-
output_path = sync(
70-
make_store_path(output_store, mode="w-", storage_options=storage_options)
71-
)
64+
output_path = sync(StorePath.open(output_store, path="", mode="w-"))
7265
else:
7366
output_path = zarr_v2.store_path
7467

@@ -101,9 +94,8 @@ def migrate_to_v3(zarr_v2: Array | Group, output_path: StorePath, dry_run: bool
10194

10295

10396
async def remove_metadata(
104-
store: StoreLike,
97+
store: Store,
10598
zarr_format: ZarrFormat,
106-
storage_options: dict[str, Any] | None = None,
10799
force: bool = False,
108100
dry_run: bool = False,
109101
) -> None:
@@ -113,23 +105,21 @@ async def remove_metadata(
113105
114106
Parameters
115107
----------
116-
store : StoreLike
117-
Store or path to directory in file system or name of zip file.
108+
store : Store
109+
Zarr to remove metadata from.
118110
zarr_format : ZarrFormat
119111
Which format's metadata to remove - 2 or 3.
120-
storage_options : dict | None, optional
121-
If the store is backed by an fsspec-based implementation, then this dict will be passed to
122-
the Store constructor for that implementation. Ignored otherwise.
123112
force : bool, optional
124113
When False, metadata can only be removed if a valid alternative exists e.g. deletion of v2 metadata will
125114
only be allowed when v3 metadata is also present. When True, metadata can be removed when there is no
126115
alternative.
127116
dry_run : bool, optional
128117
Enable a 'dry run' - files that would be deleted are logged, but no files are removed or changed.
129118
"""
130-
store_path = await make_store_path(store, mode="r+", storage_options=storage_options)
131-
if not store_path.store.supports_deletes:
119+
120+
if not store.supports_deletes:
132121
raise ValueError("Store must support deletes to remove metadata")
122+
store_path = await StorePath.open(store, path="", mode="r+")
133123

134124
metadata_files_all = {
135125
2: [ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZMETADATA_V2_JSON],
@@ -142,7 +132,7 @@ async def remove_metadata(
142132
alternative_metadata = 2
143133

144134
awaitables = []
145-
async for file_path in store_path.store.list():
135+
async for file_path in store.list():
146136
parent_path, _, file_name = file_path.rpartition("/")
147137

148138
if file_name not in metadata_files_all[zarr_format]:

src/zarr/storage/_common.py

Lines changed: 107 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -267,35 +267,105 @@ def __eq__(self, other: object) -> bool:
267267
StoreLike: TypeAlias = Store | StorePath | FSMap | Path | str | dict[str, Buffer]
268268

269269

270-
async def make_store_path(
270+
async def make_store(
271271
store_like: StoreLike | None,
272272
*,
273-
path: str | None = "",
274273
mode: AccessModeLiteral | None = None,
275274
storage_options: dict[str, Any] | None = None,
276-
) -> StorePath:
275+
) -> Store:
277276
"""
278-
Convert a `StoreLike` object into a StorePath object.
277+
Convert a `StoreLike` object into a Store object.
278+
279+
`StoreLike` objects are converted to `Store` as follows:
280+
281+
- `Store` or `StorePath` = `Store` object.
282+
- `Path` or `str` = `LocalStore` object.
283+
- `str` that starts with a protocol = `FsspecStore` object.
284+
- `dict[str, Buffer]` = `MemoryStore` object.
285+
- `None` = `MemoryStore` object.
286+
- `FSMap` = `FsspecStore` object.
287+
288+
Parameters
289+
----------
290+
store_like : StoreLike | None
291+
The object to convert to a `Store` object.
292+
mode : StoreAccessMode | None, optional
293+
The mode to use when creating the `Store` object. If None, the
294+
default mode is 'r'.
295+
storage_options : dict[str, Any] | None, optional
296+
The storage options to use when creating the `RemoteStore` object. If
297+
None, the default storage options are used.
298+
299+
Returns
300+
-------
301+
Store
302+
The converted Store object.
303+
304+
Raises
305+
------
306+
TypeError
307+
If the StoreLike object is not one of the supported types, or if storage_options is provided but not used.
308+
ValueError
309+
If storage_options is provided for a store that does not support it.
310+
"""
311+
from zarr.storage._fsspec import FsspecStore # circular import
279312

280-
This function takes a `StoreLike` object and returns a `StorePath` object. The
281-
`StoreLike` object can be a `Store`, `StorePath`, `Path`, `str`, or `dict[str, Buffer]`.
282-
If the `StoreLike` object is a Store or `StorePath`, it is converted to a
283-
`StorePath` object. If the `StoreLike` object is a Path or str, it is converted
284-
to a LocalStore object and then to a `StorePath` object. If the `StoreLike`
285-
object is a dict[str, Buffer], it is converted to a `MemoryStore` object and
286-
then to a `StorePath` object.
313+
used_storage_options = False
314+
assert mode in (None, "r", "r+", "a", "w", "w-")
315+
316+
# if mode 'r' was provided, we'll open any new stores as read-only
317+
_read_only = mode == "r"
287318

288-
If the `StoreLike` object is None, a `MemoryStore` object is created and
289-
converted to a `StorePath` object.
319+
if isinstance(store_like, StorePath):
320+
store = store_like.store
321+
elif isinstance(store_like, Store):
322+
store = store_like
323+
elif store_like is None:
324+
store = await MemoryStore.open(read_only=_read_only)
325+
elif isinstance(store_like, Path):
326+
store = await LocalStore.open(root=store_like, read_only=_read_only)
327+
elif isinstance(store_like, str):
328+
storage_options = storage_options or {}
329+
330+
if _is_fsspec_uri(store_like):
331+
used_storage_options = True
332+
store = FsspecStore.from_url(
333+
store_like, storage_options=storage_options, read_only=_read_only
334+
)
335+
else:
336+
store = await LocalStore.open(root=Path(store_like), read_only=_read_only)
337+
elif isinstance(store_like, dict):
338+
# We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings.
339+
# By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate.
340+
store = await MemoryStore.open(store_dict=store_like, read_only=_read_only)
341+
elif _has_fsspec and isinstance(store_like, FSMap):
342+
if storage_options:
343+
raise ValueError(
344+
"'storage_options was provided but is not used for FSMap store_like objects. Specify the storage options when creating the FSMap instance instead."
345+
)
346+
store = FsspecStore.from_mapper(store_like, read_only=_read_only)
347+
else:
348+
raise TypeError(f"Unsupported type for store_like: '{type(store_like).__name__}'")
290349

291-
If the `StoreLike` object is a str and starts with a protocol, it is
292-
converted to a RemoteStore object and then to a `StorePath` object.
350+
if storage_options and not used_storage_options:
351+
msg = "'storage_options' was provided but unused. 'storage_options' is only used for fsspec filesystem stores."
352+
raise TypeError(msg)
293353

294-
If the `StoreLike` object is a dict[str, Buffer] and the mode is not None,
295-
the `MemoryStore` object is created with the given mode.
354+
return store
296355

297-
If the `StoreLike` object is a str and starts with a protocol, the
298-
RemoteStore object is created with the given mode and storage options.
356+
357+
async def make_store_path(
358+
store_like: StoreLike | None,
359+
*,
360+
path: str | None = "",
361+
mode: AccessModeLiteral | None = None,
362+
storage_options: dict[str, Any] | None = None,
363+
) -> StorePath:
364+
"""
365+
Convert a `StoreLike` object into a StorePath object.
366+
367+
This function takes a `StoreLike` object and returns a `StorePath` object. See `make_store` for details
368+
of which `Store` is used for each type of `store_like` object.
299369
300370
Parameters
301371
----------
@@ -319,58 +389,28 @@ async def make_store_path(
319389
Raises
320390
------
321391
TypeError
322-
If the StoreLike object is not one of the supported types.
323-
"""
324-
from zarr.storage._fsspec import FsspecStore # circular import
392+
If the StoreLike object is not one of the supported types, or if storage_options is provided but not used.
393+
ValueError
394+
If storage_options is provided for a store that does not support it.
325395
326-
used_storage_options = False
396+
See Also
397+
--------
398+
make_store
399+
"""
327400
path_normalized = normalize_path(path)
401+
328402
if isinstance(store_like, StorePath):
329-
result = store_like / path_normalized
403+
if storage_options:
404+
msg = "'storage_options' was provided but unused. 'storage_options' is only used for fsspec filesystem stores."
405+
raise TypeError(msg)
406+
return store_like / path_normalized
407+
elif _has_fsspec and isinstance(store_like, FSMap) and path:
408+
raise ValueError(
409+
"'path' was provided but is not used for FSMap store_like objects. Specify the path when creating the FSMap instance instead."
410+
)
330411
else:
331-
assert mode in (None, "r", "r+", "a", "w", "w-")
332-
# if mode 'r' was provided, we'll open any new stores as read-only
333-
_read_only = mode == "r"
334-
if isinstance(store_like, Store):
335-
store = store_like
336-
elif store_like is None:
337-
store = await MemoryStore.open(read_only=_read_only)
338-
elif isinstance(store_like, Path):
339-
store = await LocalStore.open(root=store_like, read_only=_read_only)
340-
elif isinstance(store_like, str):
341-
storage_options = storage_options or {}
342-
343-
if _is_fsspec_uri(store_like):
344-
used_storage_options = True
345-
store = FsspecStore.from_url(
346-
store_like, storage_options=storage_options, read_only=_read_only
347-
)
348-
else:
349-
store = await LocalStore.open(root=Path(store_like), read_only=_read_only)
350-
elif isinstance(store_like, dict):
351-
# We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings.
352-
# By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate.
353-
store = await MemoryStore.open(store_dict=store_like, read_only=_read_only)
354-
elif _has_fsspec and isinstance(store_like, FSMap):
355-
if path:
356-
raise ValueError(
357-
"'path' was provided but is not used for FSMap store_like objects. Specify the path when creating the FSMap instance instead."
358-
)
359-
if storage_options:
360-
raise ValueError(
361-
"'storage_options was provided but is not used for FSMap store_like objects. Specify the storage options when creating the FSMap instance instead."
362-
)
363-
store = FsspecStore.from_mapper(store_like, read_only=_read_only)
364-
else:
365-
raise TypeError(f"Unsupported type for store_like: '{type(store_like).__name__}'")
366-
367-
result = await StorePath.open(store, path=path_normalized, mode=mode)
368-
369-
if storage_options and not used_storage_options:
370-
msg = "'storage_options' was provided but unused. 'storage_options' is only used for fsspec filesystem stores."
371-
raise TypeError(msg)
372-
373-
return result
412+
store = await make_store(store_like, mode=mode, storage_options=storage_options)
413+
return await StorePath.open(store, path=path_normalized, mode=mode)
374414

375415

376416
def _is_fsspec_uri(uri: str) -> bool:

0 commit comments

Comments
 (0)