Skip to content

Commit f538662

Browse files
Default to RemoteStore for fsspec URIs (#2198)
* Default to RemoteStore for fsspec URIs * fixup * fixup * wip * fixup * Added check for invalid. * fixup * fixup --------- Co-authored-by: Joe Hamman <[email protected]>
1 parent f894335 commit f538662

File tree

6 files changed

+260
-114
lines changed

6 files changed

+260
-114
lines changed

src/zarr/api/asynchronous.py

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ async def open(
194194
zarr_version: ZarrFormat | None = None, # deprecated
195195
zarr_format: ZarrFormat | None = None,
196196
path: str | None = None,
197+
storage_options: dict[str, Any] | None = None,
197198
**kwargs: Any, # TODO: type kwargs as valid args to open_array
198199
) -> AsyncArray | AsyncGroup:
199200
"""Convenience function to open a group or array using file-mode-like semantics.
@@ -211,6 +212,9 @@ async def open(
211212
The zarr format to use when saving.
212213
path : str or None, optional
213214
The path within the store to open.
215+
storage_options : dict
216+
If using an fsspec URL to create the store, these will be passed to
217+
the backend implementation. Ignored otherwise.
214218
**kwargs
215219
Additional parameters are passed through to :func:`zarr.creation.open_array` or
216220
:func:`zarr.hierarchy.open_group`.
@@ -221,7 +225,7 @@ async def open(
221225
Return type depends on what exists in the given store.
222226
"""
223227
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
224-
store_path = await make_store_path(store, mode=mode)
228+
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
225229

226230
if path is not None:
227231
store_path = store_path / path
@@ -276,6 +280,7 @@ async def save_array(
276280
zarr_version: ZarrFormat | None = None, # deprecated
277281
zarr_format: ZarrFormat | None = None,
278282
path: str | None = None,
283+
storage_options: dict[str, Any] | None = None,
279284
**kwargs: Any, # TODO: type kwargs as valid args to create
280285
) -> None:
281286
"""Convenience function to save a NumPy array to the local file system, following a
@@ -291,6 +296,9 @@ async def save_array(
291296
The zarr format to use when saving.
292297
path : str or None, optional
293298
The path within the store where the array will be saved.
299+
storage_options : dict
300+
If using an fsspec URL to create the store, these will be passed to
301+
the backend implementation. Ignored otherwise.
294302
kwargs
295303
Passed through to :func:`create`, e.g., compressor.
296304
"""
@@ -299,7 +307,7 @@ async def save_array(
299307
or _default_zarr_version()
300308
)
301309

302-
store_path = await make_store_path(store, mode="w")
310+
store_path = await make_store_path(store, mode="w", storage_options=storage_options)
303311
if path is not None:
304312
store_path = store_path / path
305313
new = await AsyncArray.create(
@@ -319,6 +327,7 @@ async def save_group(
319327
zarr_version: ZarrFormat | None = None, # deprecated
320328
zarr_format: ZarrFormat | None = None,
321329
path: str | None = None,
330+
storage_options: dict[str, Any] | None = None,
322331
**kwargs: NDArrayLike,
323332
) -> None:
324333
"""Convenience function to save several NumPy arrays to the local file system, following a
@@ -334,22 +343,40 @@ async def save_group(
334343
The zarr format to use when saving.
335344
path : str or None, optional
336345
Path within the store where the group will be saved.
346+
storage_options : dict
347+
If using an fsspec URL to create the store, these will be passed to
348+
the backend implementation. Ignored otherwise.
337349
kwargs
338350
NumPy arrays with data to save.
339351
"""
340352
zarr_format = (
341-
_handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
353+
_handle_zarr_version_or_format(
354+
zarr_version=zarr_version,
355+
zarr_format=zarr_format,
356+
)
342357
or _default_zarr_version()
343358
)
344359

345360
if len(args) == 0 and len(kwargs) == 0:
346361
raise ValueError("at least one array must be provided")
347362
aws = []
348363
for i, arr in enumerate(args):
349-
aws.append(save_array(store, arr, zarr_format=zarr_format, path=f"{path}/arr_{i}"))
364+
aws.append(
365+
save_array(
366+
store,
367+
arr,
368+
zarr_format=zarr_format,
369+
path=f"{path}/arr_{i}",
370+
storage_options=storage_options,
371+
)
372+
)
350373
for k, arr in kwargs.items():
351374
_path = f"{path}/{k}" if path is not None else k
352-
aws.append(save_array(store, arr, zarr_format=zarr_format, path=_path))
375+
aws.append(
376+
save_array(
377+
store, arr, zarr_format=zarr_format, path=_path, storage_options=storage_options
378+
)
379+
)
353380
await asyncio.gather(*aws)
354381

355382

@@ -418,6 +445,7 @@ async def group(
418445
zarr_format: ZarrFormat | None = None,
419446
meta_array: Any | None = None, # not used
420447
attributes: dict[str, JSON] | None = None,
448+
storage_options: dict[str, Any] | None = None,
421449
) -> AsyncGroup:
422450
"""Create a group.
423451
@@ -444,6 +472,9 @@ async def group(
444472
to users. Use `numpy.empty(())` by default.
445473
zarr_format : {2, 3, None}, optional
446474
The zarr format to use when saving.
475+
storage_options : dict
476+
If using an fsspec URL to create the store, these will be passed to
477+
the backend implementation. Ignored otherwise.
447478
448479
Returns
449480
-------
@@ -453,7 +484,7 @@ async def group(
453484

454485
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
455486

456-
store_path = await make_store_path(store)
487+
store_path = await make_store_path(store, storage_options=storage_options)
457488
if path is not None:
458489
store_path = store_path / path
459490

@@ -488,7 +519,7 @@ async def open_group(
488519
synchronizer: Any = None, # not used
489520
path: str | None = None,
490521
chunk_store: StoreLike | None = None, # not used
491-
storage_options: dict[str, Any] | None = None, # not used
522+
storage_options: dict[str, Any] | None = None,
492523
zarr_version: ZarrFormat | None = None, # deprecated
493524
zarr_format: ZarrFormat | None = None,
494525
meta_array: Any | None = None, # not used
@@ -548,10 +579,8 @@ async def open_group(
548579
warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2)
549580
if chunk_store is not None:
550581
warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2)
551-
if storage_options is not None:
552-
warnings.warn("storage_options is not yet implemented", RuntimeWarning, stacklevel=2)
553582

554-
store_path = await make_store_path(store, mode=mode)
583+
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
555584
if path is not None:
556585
store_path = store_path / path
557586

@@ -603,6 +632,7 @@ async def create(
603632
) = None,
604633
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
605634
dimension_names: Iterable[str] | None = None,
635+
storage_options: dict[str, Any] | None = None,
606636
**kwargs: Any,
607637
) -> AsyncArray:
608638
"""Create an array.
@@ -674,6 +704,9 @@ async def create(
674704
to users. Use `numpy.empty(())` by default.
675705
676706
.. versionadded:: 2.13
707+
storage_options : dict
708+
If using an fsspec URL to create the store, these will be passed to
709+
the backend implementation. Ignored otherwise.
677710
678711
Returns
679712
-------
@@ -725,7 +758,7 @@ async def create(
725758
warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2)
726759

727760
mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "w"))
728-
store_path = await make_store_path(store, mode=mode)
761+
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
729762
if path is not None:
730763
store_path = store_path / path
731764

@@ -875,6 +908,7 @@ async def open_array(
875908
zarr_version: ZarrFormat | None = None, # deprecated
876909
zarr_format: ZarrFormat | None = None,
877910
path: PathLike | None = None,
911+
storage_options: dict[str, Any] | None = None,
878912
**kwargs: Any, # TODO: type kwargs as valid args to save
879913
) -> AsyncArray:
880914
"""Open an array using file-mode-like semantics.
@@ -887,6 +921,9 @@ async def open_array(
887921
The zarr format to use when saving.
888922
path : string, optional
889923
Path in store to array.
924+
storage_options : dict
925+
If using an fsspec URL to create the store, these will be passed to
926+
the backend implementation. Ignored otherwise.
890927
**kwargs
891928
Any keyword arguments to pass to the array constructor.
892929
@@ -896,7 +933,7 @@ async def open_array(
896933
The opened array.
897934
"""
898935

899-
store_path = await make_store_path(store)
936+
store_path = await make_store_path(store, storage_options=storage_options)
900937
if path is not None:
901938
store_path = store_path / path
902939

src/zarr/api/synchronous.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ def save_group(
134134
zarr_version: ZarrFormat | None = None, # deprecated
135135
zarr_format: ZarrFormat | None = None,
136136
path: str | None = None,
137+
storage_options: dict[str, Any] | None = None,
137138
**kwargs: NDArrayLike,
138139
) -> None:
139140
return sync(
@@ -143,6 +144,7 @@ def save_group(
143144
zarr_version=zarr_version,
144145
zarr_format=zarr_format,
145146
path=path,
147+
storage_options=storage_options,
146148
**kwargs,
147149
)
148150
)

src/zarr/store/common.py

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from zarr.store.local import LocalStore
1212
from zarr.store.memory import MemoryStore
1313

14+
# from zarr.store.remote import RemoteStore
15+
1416
if TYPE_CHECKING:
1517
from zarr.core.buffer import BufferPrototype
1618
from zarr.core.common import AccessModeLiteral
@@ -75,30 +77,69 @@ def __eq__(self, other: Any) -> bool:
7577

7678

7779
async def make_store_path(
78-
store_like: StoreLike | None, *, mode: AccessModeLiteral | None = None
80+
store_like: StoreLike | None,
81+
*,
82+
mode: AccessModeLiteral | None = None,
83+
storage_options: dict[str, Any] | None = None,
7984
) -> StorePath:
85+
from zarr.store.remote import RemoteStore # circular import
86+
87+
used_storage_options = False
88+
8089
if isinstance(store_like, StorePath):
8190
if mode is not None:
8291
assert AccessMode.from_literal(mode) == store_like.store.mode
83-
return store_like
92+
result = store_like
8493
elif isinstance(store_like, Store):
8594
if mode is not None:
8695
assert AccessMode.from_literal(mode) == store_like.mode
8796
await store_like._ensure_open()
88-
return StorePath(store_like)
97+
result = StorePath(store_like)
8998
elif store_like is None:
9099
if mode is None:
91100
mode = "w" # exception to the default mode = 'r'
92-
return StorePath(await MemoryStore.open(mode=mode))
101+
result = StorePath(await MemoryStore.open(mode=mode))
93102
elif isinstance(store_like, Path):
94-
return StorePath(await LocalStore.open(root=store_like, mode=mode or "r"))
103+
result = StorePath(await LocalStore.open(root=store_like, mode=mode or "r"))
95104
elif isinstance(store_like, str):
96-
return StorePath(await LocalStore.open(root=Path(store_like), mode=mode or "r"))
105+
storage_options = storage_options or {}
106+
107+
if _is_fsspec_uri(store_like):
108+
used_storage_options = True
109+
result = StorePath(
110+
RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r")
111+
)
112+
else:
113+
result = StorePath(await LocalStore.open(root=Path(store_like), mode=mode or "r"))
97114
elif isinstance(store_like, dict):
98115
# We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings.
99116
# By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate.
100-
return StorePath(await MemoryStore.open(store_dict=store_like, mode=mode))
101-
raise TypeError
117+
result = StorePath(await MemoryStore.open(store_dict=store_like, mode=mode))
118+
else:
119+
msg = f"Unsupported type for store_like: '{type(store_like).__name__}'" # type: ignore[unreachable]
120+
raise TypeError(msg)
121+
122+
if storage_options and not used_storage_options:
123+
msg = "'storage_options' was provided but unused. 'storage_options' is only used for fsspec filesystem stores."
124+
raise TypeError(msg)
125+
126+
return result
127+
128+
129+
def _is_fsspec_uri(uri: str) -> bool:
130+
"""
131+
Check if a URI looks like a non-local fsspec URI.
132+
133+
Examples
134+
--------
135+
>>> _is_fsspec_uri("s3://bucket")
136+
True
137+
>>> _is_fsspec_uri("my-directory")
138+
False
139+
>>> _is_fsspec_uri("local://my-directory")
140+
False
141+
"""
142+
return "://" in uri or "::" in uri and "local://" not in uri
102143

103144

104145
async def ensure_no_existing_node(store_path: StorePath, zarr_format: ZarrFormat) -> None:

0 commit comments

Comments
 (0)