Skip to content

Commit 43484a1

Browse files
committed
Merge remote-tracking branch 'upstream/v3' into tom/fix/open-fallback
2 parents 98ba6ca + 81a87d6 commit 43484a1

File tree

21 files changed

+170
-94
lines changed

21 files changed

+170
-94
lines changed

.pre-commit-config.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,7 @@ repos:
4949
hooks:
5050
- id: rst-directive-colons
5151
- id: rst-inline-touching-normal
52+
- repo: https://github.com/numpy/numpydoc
53+
rev: v1.8.0
54+
hooks:
55+
- id: numpydoc-validation

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,3 +319,7 @@ ignore = [
319319
"PC111", # fix Python code in documentation - enable later
320320
"PC180", # for JavaScript - not interested
321321
]
322+
323+
[tool.numpydoc_validation]
324+
# See https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks for list of checks
325+
checks = ["GL06", "GL07", "GL10", "PR03", "PR05", "PR06"]

src/zarr/abc/codec.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020
from zarr.core.indexing import SelectorTuple
2121

2222
__all__ = [
23-
"BaseCodec",
2423
"ArrayArrayCodec",
2524
"ArrayBytesCodec",
2625
"ArrayBytesCodecPartialDecodeMixin",
2726
"ArrayBytesCodecPartialEncodeMixin",
27+
"BaseCodec",
2828
"BytesBytesCodec",
2929
"CodecInput",
3030
"CodecOutput",

src/zarr/abc/store.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class Store(ABC):
4343
_mode: AccessMode
4444
_is_open: bool
4545

46-
def __init__(self, mode: AccessModeLiteral = "r", *args: Any, **kwargs: Any) -> None:
46+
def __init__(self, *args: Any, mode: AccessModeLiteral = "r", **kwargs: Any) -> None:
4747
self._is_open = False
4848
self._mode = AccessMode.from_literal(mode)
4949

@@ -69,13 +69,10 @@ def __exit__(
6969
async def _open(self) -> None:
7070
if self._is_open:
7171
raise ValueError("store is already open")
72-
if not await self.empty():
73-
if self.mode.update or self.mode.readonly:
74-
pass
75-
elif self.mode.overwrite:
76-
await self.clear()
77-
else:
78-
raise FileExistsError("Store already exists")
72+
if self.mode.str == "w":
73+
await self.clear()
74+
elif self.mode.str == "w-" and not await self.empty():
75+
raise FileExistsError("Store already exists")
7976
self._is_open = True
8077

8178
async def _ensure_open(self) -> None:

src/zarr/api/asynchronous.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ async def load(
160160
161161
Parameters
162162
----------
163-
store : Store or string
163+
store : Store or str
164164
Store or path to directory in file system or name of zip file.
165165
path : str or None, optional
166166
The path within the store from which to load.
@@ -204,7 +204,7 @@ async def open(
204204
205205
Parameters
206206
----------
207-
store : Store or string, optional
207+
store : Store or str, optional
208208
Store or path to directory in file system or name of zip file.
209209
mode : {'r', 'r+', 'a', 'w', 'w-'}, optional
210210
Persistence mode: 'r' means read only (must exist); 'r+' means
@@ -271,7 +271,7 @@ async def save(
271271
272272
Parameters
273273
----------
274-
store : Store or string
274+
store : Store or str
275275
Store or path to directory in file system or name of zip file.
276276
args : ndarray
277277
NumPy arrays with data to save.
@@ -307,7 +307,7 @@ async def save_array(
307307
308308
Parameters
309309
----------
310-
store : Store or string
310+
store : Store or str
311311
Store or path to directory in file system or name of zip file.
312312
arr : ndarray
313313
NumPy array with data to save.
@@ -355,7 +355,7 @@ async def save_group(
355355
356356
Parameters
357357
----------
358-
store : Store or string
358+
store : Store or str
359359
Store or path to directory in file system or name of zip file.
360360
args : ndarray
361361
NumPy arrays with data to save.
@@ -471,7 +471,7 @@ async def group(
471471
472472
Parameters
473473
----------
474-
store : Store or string, optional
474+
store : Store or str, optional
475475
Store or path to directory in file system.
476476
overwrite : bool, optional
477477
If True, delete any pre-existing data in `store` at `path` before
@@ -485,7 +485,7 @@ async def group(
485485
to all attribute read operations.
486486
synchronizer : object, optional
487487
Array synchronizer.
488-
path : string, optional
488+
path : str, optional
489489
Group path within store.
490490
meta_array : array-like, optional
491491
An array instance to use for determining arrays to create and return
@@ -551,7 +551,7 @@ async def open_group(
551551
552552
Parameters
553553
----------
554-
store : Store, string, or mapping, optional
554+
store : Store, str, or mapping, optional
555555
Store or path to directory in file system or name of zip file.
556556
557557
Strings are interpreted as paths on the local file system
@@ -574,9 +574,9 @@ async def open_group(
574574
to all attribute read operations.
575575
synchronizer : object, optional
576576
Array synchronizer.
577-
path : string, optional
577+
path : str, optional
578578
Group path within store.
579-
chunk_store : Store or string, optional
579+
chunk_store : Store or str, optional
580580
Store or path to directory in file system or name of zip file.
581581
storage_options : dict
582582
If using an fsspec URL to create the store, these will be passed to
@@ -670,22 +670,22 @@ async def create(
670670
False, will be set to `shape`, i.e., single chunk for the whole array.
671671
If an int, the chunk size in each dimension will be given by the value
672672
of `chunks`. Default is True.
673-
dtype : string or dtype, optional
673+
dtype : str or dtype, optional
674674
NumPy dtype.
675675
compressor : Codec, optional
676676
Primary compressor.
677677
fill_value : object
678678
Default value to use for uninitialized portions of the array.
679679
order : {'C', 'F'}, optional
680680
Memory layout to be used within each chunk.
681-
store : Store or string
681+
store : Store or str
682682
Store or path to directory in file system or name of zip file.
683683
synchronizer : object, optional
684684
Array synchronizer.
685685
overwrite : bool, optional
686686
If True, delete all pre-existing data in `store` at `path` before
687687
creating the array.
688-
path : string, optional
688+
path : str, optional
689689
Path under which array is stored.
690690
chunk_store : MutableMapping, optional
691691
Separate storage for chunks. If not provided, `store` will be used
@@ -943,11 +943,11 @@ async def open_array(
943943
944944
Parameters
945945
----------
946-
store : Store or string
946+
store : Store or str
947947
Store or path to directory in file system or name of zip file.
948948
zarr_format : {2, 3, None}, optional
949949
The zarr format to use when saving.
950-
path : string, optional
950+
path : str, optional
951951
Path in store to array.
952952
storage_options : dict
953953
If using an fsspec URL to create the store, these will be passed to

src/zarr/codecs/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
"ShardingCodec",
2929
"ShardingCodecIndexLocation",
3030
"TransposeCodec",
31-
"VLenUTF8Codec",
3231
"VLenBytesCodec",
32+
"VLenUTF8Codec",
3333
"ZstdCodec",
3434
]
3535

src/zarr/codecs/pipeline.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from zarr.core.common import ChunkCoords, concurrent_map
1818
from zarr.core.config import config
1919
from zarr.core.indexing import SelectorTuple, is_scalar, is_total_slice
20+
from zarr.core.metadata.v2 import _default_fill_value
2021
from zarr.registry import register_pipeline
2122

2223
if TYPE_CHECKING:
@@ -247,7 +248,17 @@ async def read_batch(
247248
if chunk_array is not None:
248249
out[out_selection] = chunk_array
249250
else:
250-
out[out_selection] = chunk_spec.fill_value
251+
fill_value = chunk_spec.fill_value
252+
253+
if fill_value is None:
254+
# Zarr V2 allowed `fill_value` to be null in the metadata.
255+
# Zarr V3 requires it to be set. This has already been
256+
# validated when decoding the metadata, but we support reading
257+
# Zarr V2 data and need to support the case where fill_value
258+
# is None.
259+
fill_value = _default_fill_value(dtype=chunk_spec.dtype)
260+
261+
out[out_selection] = fill_value
251262
else:
252263
chunk_bytes_batch = await concurrent_map(
253264
[
@@ -274,7 +285,10 @@ async def read_batch(
274285
tmp = tmp.squeeze(axis=drop_axes)
275286
out[out_selection] = tmp
276287
else:
277-
out[out_selection] = chunk_spec.fill_value
288+
fill_value = chunk_spec.fill_value
289+
if fill_value is None:
290+
fill_value = _default_fill_value(dtype=chunk_spec.dtype)
291+
out[out_selection] = fill_value
278292

279293
def _merge_chunk_array(
280294
self,

src/zarr/core/array.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ async def _create_v2(
380380
chunks=chunks,
381381
order=order,
382382
dimension_separator=dimension_separator,
383-
fill_value=0 if fill_value is None else fill_value,
383+
fill_value=fill_value,
384384
compressor=compressor,
385385
filters=filters,
386386
attributes=attributes,
@@ -1288,11 +1288,11 @@ def get_basic_selection(
12881288
array. May be any combination of int and/or slice or ellipsis for multidimensional arrays.
12891289
out : NDBuffer, optional
12901290
If given, load the selected data directly into this buffer.
1291+
prototype : BufferPrototype, optional
1292+
The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used.
12911293
fields : str or sequence of str, optional
12921294
For arrays with a structured dtype, one or more fields can be specified to
12931295
extract data for.
1294-
prototype : BufferPrototype, optional
1295-
The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used.
12961296
12971297
Returns
12981298
-------
@@ -2284,6 +2284,17 @@ def resize(self, new_shape: ChunkCoords) -> Array:
22842284
This method does not modify the original Array object. Instead, it returns a new Array
22852285
with the specified shape.
22862286
2287+
Notes
2288+
-----
2289+
When resizing an array, the data are not rearranged in any way.
2290+
2291+
If one or more dimensions are shrunk, any chunks falling outside the
2292+
new array shape will be deleted from the underlying store.
2293+
However, it is noteworthy that the chunks partially falling inside the new array
2294+
(i.e. boundary chunks) will remain intact, and therefore,
2295+
the data falling outside the new array but inside the boundary chunks
2296+
would be restored by a subsequent resize operation that grows the array size.
2297+
22872298
Examples
22882299
--------
22892300
>>> import zarr
@@ -2301,17 +2312,6 @@ def resize(self, new_shape: ChunkCoords) -> Array:
23012312
(20000, 1000)
23022313
>>> z2.shape
23032314
(50, 50)
2304-
2305-
Notes
2306-
-----
2307-
When resizing an array, the data are not rearranged in any way.
2308-
2309-
If one or more dimensions are shrunk, any chunks falling outside the
2310-
new array shape will be deleted from the underlying store.
2311-
However, it is noteworthy that the chunks partially falling inside the new array
2312-
(i.e. boundary chunks) will remain intact, and therefore,
2313-
the data falling outside the new array but inside the boundary chunks
2314-
would be restored by a subsequent resize operation that grows the array size.
23152315
"""
23162316
return type(self)(
23172317
sync(

src/zarr/core/group.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ async def get(
299299
300300
Parameters
301301
----------
302-
key : string
302+
key : str
303303
Group member name.
304304
default : object
305305
Default value to return if key is not found (default: None).
@@ -396,7 +396,7 @@ async def require_group(self, name: str, overwrite: bool = False) -> AsyncGroup:
396396
397397
Parameters
398398
----------
399-
name : string
399+
name : str
400400
Group name.
401401
overwrite : bool, optional
402402
Overwrite any existing group with given `name` if present.
@@ -525,7 +525,7 @@ async def create_dataset(self, name: str, **kwargs: Any) -> AsyncArray:
525525
526526
Parameters
527527
----------
528-
name : string
528+
name : str
529529
Array name.
530530
kwargs : dict
531531
Additional arguments passed to :func:`zarr.AsyncGroup.create_array`.
@@ -558,11 +558,11 @@ async def require_dataset(
558558
559559
Parameters
560560
----------
561-
name : string
561+
name : str
562562
Array name.
563563
shape : int or tuple of ints
564564
Array shape.
565-
dtype : string or dtype, optional
565+
dtype : str or dtype, optional
566566
NumPy dtype.
567567
exact : bool, optional
568568
If True, require `dtype` to match exactly. If false, require
@@ -592,11 +592,11 @@ async def require_array(
592592
593593
Parameters
594594
----------
595-
name : string
595+
name : str
596596
Array name.
597597
shape : int or tuple of ints
598598
Array shape.
599-
dtype : string or dtype, optional
599+
dtype : str or dtype, optional
600600
NumPy dtype.
601601
exact : bool, optional
602602
If True, require `dtype` to match exactly. If false, require
@@ -857,7 +857,7 @@ def get(self, path: str, default: DefaultT | None = None) -> Array | Group | Def
857857
858858
Parameters
859859
----------
860-
key : string
860+
key : str
861861
Group member name.
862862
default : object
863863
Default value to return if key is not found (default: None).
@@ -1003,7 +1003,7 @@ def require_group(self, name: str, **kwargs: Any) -> Group:
10031003
10041004
Parameters
10051005
----------
1006-
name : string
1006+
name : str
10071007
Group name.
10081008
overwrite : bool, optional
10091009
Overwrite any existing group with given `name` if present.
@@ -1125,7 +1125,7 @@ def create_dataset(self, name: str, **kwargs: Any) -> Array:
11251125
11261126
Parameters
11271127
----------
1128-
name : string
1128+
name : str
11291129
Array name.
11301130
kwargs : dict
11311131
Additional arguments passed to :func:`zarr.Group.create_array`
@@ -1150,11 +1150,11 @@ def require_dataset(self, name: str, **kwargs: Any) -> Array:
11501150
11511151
Parameters
11521152
----------
1153-
name : string
1153+
name : str
11541154
Array name.
11551155
shape : int or tuple of ints
11561156
Array shape.
1157-
dtype : string or dtype, optional
1157+
dtype : str or dtype, optional
11581158
NumPy dtype.
11591159
exact : bool, optional
11601160
If True, require `dtype` to match exactly. If false, require
@@ -1177,11 +1177,11 @@ def require_array(self, name: str, **kwargs: Any) -> Array:
11771177
11781178
Parameters
11791179
----------
1180-
name : string
1180+
name : str
11811181
Array name.
11821182
shape : int or tuple of ints
11831183
Array shape.
1184-
dtype : string or dtype, optional
1184+
dtype : str or dtype, optional
11851185
NumPy dtype.
11861186
exact : bool, optional
11871187
If True, require `dtype` to match exactly. If false, require

0 commit comments

Comments
 (0)