diff --git a/changes/3403.misc.rst b/changes/3403.misc.rst new file mode 100644 index 0000000000..2ad4821d73 --- /dev/null +++ b/changes/3403.misc.rst @@ -0,0 +1,6 @@ +Moves some indexing-specific exceptions to ``zarr.errors``, and ensures that all Zarr-specific +exception classes accept a pre-formatted string as a single argument. This is a breaking change to +the following exceptions classes: :class:`zarr.errors.BoundsCheckError`, :class:`zarr.errors.NegativeStepError` +:class:`zarr.errors.VindexInvalidSelectionError`. These classes previously generated internally +formatted error messages when given a single argument. After this change, formatting of the error +message is up to the routine invoking the error. \ No newline at end of file diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index fdbd3b34bd..f206d48377 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -862,7 +862,8 @@ async def open_group( overwrite=overwrite, attributes=attributes, ) - raise GroupNotFoundError(store, store_path.path) + msg = f"No group found in store {store!r} at path {store_path.path!r}" + raise GroupNotFoundError(msg) async def create( @@ -1268,7 +1269,8 @@ async def open_array( overwrite=overwrite, **kwargs, ) - raise ArrayNotFoundError(store_path.store, store_path.path) from err + msg = f"No array found in store {store_path.store} at path {store_path.path}" + raise ArrayNotFoundError(msg) from err async def open_like( diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 47d9dc1047..ce19f99ba0 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -257,7 +257,8 @@ async def get_array_metadata( else: zarr_format = 2 else: - raise MetadataValidationError("zarr_format", "2, 3, or None", zarr_format) + msg = f"Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '{zarr_format}'." # type: ignore[unreachable] + raise MetadataValidationError(msg) metadata_dict: dict[str, JSON] if zarr_format == 2: diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 4c14fb357c..7b2e506a14 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -96,7 +96,8 @@ def parse_node_type(data: Any) -> NodeType: """Parse the node_type field from metadata.""" if data in ("array", "group"): return cast("Literal['array', 'group']", data) - raise MetadataValidationError("node_type", "array or group", data) + msg = f"Invalid value for 'node_type'. Expected 'array' or 'group'. Got '{data}'." + raise MetadataValidationError(msg) # todo: convert None to empty dict @@ -574,7 +575,8 @@ async def open( else: zarr_format = 2 else: - raise MetadataValidationError("zarr_format", "2, 3, or None", zarr_format) + msg = f"Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '{zarr_format}'." # type: ignore[unreachable] + raise MetadataValidationError(msg) if zarr_format == 2: # this is checked above, asserting here for mypy @@ -3129,10 +3131,12 @@ async def create_hierarchy( else: # we have proposed an explicit group, which is an error, given that a # group already exists. - raise ContainsGroupError(store, key) + msg = f"A group exists in store {store!r} at path {key!r}." + raise ContainsGroupError(msg) elif isinstance(extant_node, ArrayV2Metadata | ArrayV3Metadata): # we are trying to overwrite an existing array. this is an error. - raise ContainsArrayError(store, key) + msg = f"An array exists in store {store!r} at path {key!r}." + raise ContainsArrayError(msg) nodes_explicit: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata] = {} @@ -3549,7 +3553,8 @@ def _build_metadata_v3(zarr_json: dict[str, JSON]) -> ArrayV3Metadata | GroupMet Convert a dict representation of Zarr V3 metadata into the corresponding metadata class. """ if "node_type" not in zarr_json: - raise MetadataValidationError("node_type", "array or group", "nothing (the key is missing)") + msg = "Required key 'node_type' is missing from the provided metadata document." + raise MetadataValidationError(msg) match zarr_json: case {"node_type": "array"}: return ArrayV3Metadata.from_dict(zarr_json) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 2736915f46..be60f4208f 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -28,6 +28,12 @@ from zarr.core.common import ceildiv, product from zarr.core.metadata import T_ArrayMetadata +from zarr.errors import ( + ArrayIndexError, + BoundsCheckError, + NegativeStepError, + VindexInvalidSelectionError, +) if TYPE_CHECKING: from zarr.core.array import Array, AsyncArray @@ -51,29 +57,6 @@ Fields = str | list[str] | tuple[str, ...] -class ArrayIndexError(IndexError): - pass - - -class BoundsCheckError(IndexError): - _msg = "" - - def __init__(self, dim_len: int) -> None: - self._msg = f"index out of bounds for dimension with length {dim_len}" - - -class NegativeStepError(IndexError): - _msg = "only slices with step >= 1 are supported" - - -class VindexInvalidSelectionError(IndexError): - _msg = ( - "unsupported selection type for vectorized indexing; only " - "coordinate selection (tuple of integer arrays) and mask selection " - "(single Boolean array) are supported; got {!r}" - ) - - def err_too_many_indices(selection: Any, shape: tuple[int, ...]) -> None: raise IndexError(f"too many indices for array; expected {len(shape)}, got {len(selection)}") @@ -361,7 +344,8 @@ def normalize_integer_selection(dim_sel: int, dim_len: int) -> int: # handle out of bounds if dim_sel >= dim_len or dim_sel < 0: - raise BoundsCheckError(dim_len) + msg = f"index out of bounds for dimension with length {dim_len}" + raise BoundsCheckError(msg) return dim_sel @@ -421,7 +405,7 @@ def __init__(self, dim_sel: slice, dim_len: int, dim_chunk_len: int) -> None: # normalize start, stop, step = dim_sel.indices(dim_len) if step < 1: - raise NegativeStepError + raise NegativeStepError("only slices with step >= 1 are supported.") object.__setattr__(self, "start", start) object.__setattr__(self, "stop", stop) @@ -744,7 +728,8 @@ def wraparound_indices(x: npt.NDArray[Any], dim_len: int) -> None: def boundscheck_indices(x: npt.NDArray[Any], dim_len: int) -> None: if np.any(x < 0) or np.any(x >= dim_len): - raise BoundsCheckError(dim_len) + msg = f"index out of bounds for dimension with length {dim_len}" + raise BoundsCheckError(msg) @dataclass(frozen=True) @@ -1098,7 +1083,8 @@ def __init__( dim_indexers.append(dim_indexer) if start >= dim_len or start < 0: - raise BoundsCheckError(dim_len) + msg = f"index out of bounds for dimension with length {dim_len}" + raise BoundsCheckError(msg) shape = tuple(s.nitems for s in dim_indexers) @@ -1329,7 +1315,12 @@ def __getitem__( elif is_mask_selection(new_selection, self.array.shape): return self.array.get_mask_selection(new_selection, fields=fields) else: - raise VindexInvalidSelectionError(new_selection) + msg = ( + "unsupported selection type for vectorized indexing; only " + "coordinate selection (tuple of integer arrays) and mask selection " + f"(single Boolean array) are supported; got {new_selection!r}" + ) + raise VindexInvalidSelectionError(msg) def __setitem__( self, selection: CoordinateSelection | MaskSelection, value: npt.ArrayLike @@ -1342,7 +1333,12 @@ def __setitem__( elif is_mask_selection(new_selection, self.array.shape): self.array.set_mask_selection(new_selection, value, fields=fields) else: - raise VindexInvalidSelectionError(new_selection) + msg = ( + "unsupported selection type for vectorized indexing; only " + "coordinate selection (tuple of integer arrays) and mask selection " + f"(single Boolean array) are supported; got {new_selection!r}" + ) + raise VindexInvalidSelectionError(msg) @dataclass(frozen=True) @@ -1368,7 +1364,12 @@ async def getitem( elif is_mask_selection(new_selection, self.array.shape): return await self.array.get_mask_selection(new_selection, fields=fields) else: - raise VindexInvalidSelectionError(new_selection) + msg = ( + "unsupported selection type for vectorized indexing; only " + "coordinate selection (tuple of integer arrays) and mask selection " + f"(single Boolean array) are supported; got {new_selection!r}" + ) + raise VindexInvalidSelectionError(msg) def check_fields(fields: Fields | None, dtype: np.dtype[Any]) -> np.dtype[Any]: @@ -1487,7 +1488,12 @@ def get_indexer( elif is_mask_selection(new_selection, shape): return MaskIndexer(cast("MaskSelection", selection), shape, chunk_grid) else: - raise VindexInvalidSelectionError(new_selection) + msg = ( + "unsupported selection type for vectorized indexing; only " + "coordinate selection (tuple of integer arrays) and mask selection " + f"(single Boolean array) are supported; got {new_selection!r}" + ) + raise VindexInvalidSelectionError(msg) elif is_pure_orthogonal_indexing(pure_selection, len(shape)): return OrthogonalIndexer(cast("OrthogonalSelection", selection), shape, chunk_grid) else: diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index d1420b1ddd..649a490409 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -41,13 +41,15 @@ def parse_zarr_format(data: object) -> Literal[3]: if data == 3: return 3 - raise MetadataValidationError("zarr_format", 3, data) + msg = f"Invalid value for 'zarr_format'. Expected '3'. Got '{data}'." + raise MetadataValidationError(msg) def parse_node_type_array(data: object) -> Literal["array"]: if data == "array": return "array" - raise NodeTypeValidationError("node_type", "array", data) + msg = f"Invalid value for 'node_type'. Expected 'array'. Got '{data}'." + raise NodeTypeValidationError(msg) def parse_codecs(data: object) -> tuple[Codec, ...]: diff --git a/src/zarr/errors.py b/src/zarr/errors.py index 472199ff1b..331ae52f9e 100644 --- a/src/zarr/errors.py +++ b/src/zarr/errors.py @@ -1,15 +1,17 @@ -from typing import Any - __all__ = [ + "ArrayIndexError", "ArrayNotFoundError", "BaseZarrError", + "BoundsCheckError", "ContainsArrayAndGroupError", "ContainsArrayError", "ContainsGroupError", "GroupNotFoundError", "MetadataValidationError", + "NegativeStepError", "NodeTypeValidationError", "UnstableSpecificationWarning", + "VindexInvalidSelectionError", "ZarrDeprecationWarning", "ZarrFutureWarning", "ZarrRuntimeWarning", @@ -21,10 +23,19 @@ class BaseZarrError(ValueError): Base error which all zarr errors are sub-classed from. """ - _msg = "" + _msg: str = "{}" - def __init__(self, *args: Any) -> None: - super().__init__(self._msg.format(*args)) + def __init__(self, *args: object) -> None: + """ + If a single argument is passed, treat it as a pre-formatted message. + + If multiple arguments are passed, they are used as arguments for a template string class + variable. This behavior is deprecated. + """ + if len(args) == 1: + super().__init__(args[0]) + else: + super().__init__(self._msg.format(*args)) class NodeNotFoundError(BaseZarrError, FileNotFoundError): @@ -32,29 +43,13 @@ class NodeNotFoundError(BaseZarrError, FileNotFoundError): Raised when a node (array or group) is not found at a certain path. """ - def __init__(self, *args: Any) -> None: - if len(args) == 1: - # Pre-formatted message - super(BaseZarrError, self).__init__(args[0]) - else: - # Store and path arguments - format them - _msg = "No node found in store {!r} at path {!r}" - super(BaseZarrError, self).__init__(_msg.format(*args)) - class ArrayNotFoundError(NodeNotFoundError): """ Raised when an array isn't found at a certain path. """ - def __init__(self, *args: Any) -> None: - if len(args) == 1: - # Pre-formatted message - super(BaseZarrError, self).__init__(args[0]) - else: - # Store and path arguments - format them - _msg = "No array found in store {!r} at path {!r}" - super(BaseZarrError, self).__init__(_msg.format(*args)) + _msg = "No array found in store {!r} at path {!r}" class GroupNotFoundError(NodeNotFoundError): @@ -62,14 +57,7 @@ class GroupNotFoundError(NodeNotFoundError): Raised when a group isn't found at a certain path. """ - def __init__(self, *args: Any) -> None: - if len(args) == 1: - # Pre-formatted message - super(BaseZarrError, self).__init__(args[0]) - else: - # Store and path arguments - format them - _msg = "No group found in store {!r} at path {!r}" - super(BaseZarrError, self).__init__(_msg.format(*args)) + _msg = "No group found in store {!r} at path {!r}" class ContainsGroupError(BaseZarrError): @@ -106,8 +94,6 @@ class UnknownCodecError(BaseZarrError): Raised when a unknown codec was used. """ - _msg = "{}" - class NodeTypeValidationError(MetadataValidationError): """ @@ -146,3 +132,15 @@ class ZarrRuntimeWarning(RuntimeWarning): """ A warning for dubious runtime behavior. """ + + +class VindexInvalidSelectionError(IndexError): ... + + +class NegativeStepError(IndexError): ... + + +class BoundsCheckError(IndexError): ... + + +class ArrayIndexError(IndexError): ... diff --git a/src/zarr/storage/_common.py b/src/zarr/storage/_common.py index ff757f9a99..817bda7892 100644 --- a/src/zarr/storage/_common.py +++ b/src/zarr/storage/_common.py @@ -413,9 +413,11 @@ async def ensure_no_existing_node(store_path: StorePath, zarr_format: ZarrFormat extant_node = await _contains_node_v3(store_path) if extant_node == "array": - raise ContainsArrayError(store_path.store, store_path.path) + msg = f"An array exists in store {store_path.store!r} at path {store_path.path!r}." + raise ContainsArrayError(msg) elif extant_node == "group": - raise ContainsGroupError(store_path.store, store_path.path) + msg = f"An array exists in store {store_path.store!r} at path {store_path.path!r}." + raise ContainsGroupError(msg) elif extant_node == "nothing": return msg = f"Invalid value for extant_node: {extant_node}" # type: ignore[unreachable] @@ -476,7 +478,13 @@ async def _contains_node_v2(store_path: StorePath) -> Literal["array", "group", _group = await contains_group(store_path=store_path, zarr_format=2) if _array and _group: - raise ContainsArrayAndGroupError(store_path.store, store_path.path) + msg = ( + "Array and group metadata documents (.zarray and .zgroup) were both found in store " + f"{store_path.store!r} at path {store_path.path!r}. " + "Only one of these files may be present in a given directory / prefix. " + "Remove the .zarray file, or the .zgroup file, or both." + ) + raise ContainsArrayAndGroupError(msg) elif _array: return "array" elif _group: diff --git a/tests/test_api.py b/tests/test_api.py index 9950d385a3..ff969d406f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1231,13 +1231,13 @@ def test_open_modes_creates_group(tmp_path: Path, mode: str) -> None: async def test_metadata_validation_error() -> None: with pytest.raises( MetadataValidationError, - match="Invalid value for 'zarr_format'. Expected '2, 3, or None'. Got '3.0'.", + match="Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '3.0'.", ): await zarr.api.asynchronous.open_group(zarr_format="3.0") # type: ignore[arg-type] with pytest.raises( MetadataValidationError, - match="Invalid value for 'zarr_format'. Expected '2, 3, or None'. Got '3.0'.", + match="Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '3.0'.", ): await zarr.api.asynchronous.open_array(shape=(1,), zarr_format="3.0") # type: ignore[arg-type] diff --git a/tests/test_errors.py b/tests/test_errors.py new file mode 100644 index 0000000000..ccc9e597bb --- /dev/null +++ b/tests/test_errors.py @@ -0,0 +1,78 @@ +"""Test errors""" + +from zarr.errors import ( + ArrayNotFoundError, + ContainsArrayAndGroupError, + ContainsArrayError, + ContainsGroupError, + GroupNotFoundError, + MetadataValidationError, + NodeTypeValidationError, +) + + +def test_group_not_found_error() -> None: + """ + Test that calling GroupNotFoundError with multiple arguments returns a formatted string. + This is deprecated behavior. + """ + err = GroupNotFoundError("store", "path") + assert str(err) == "No group found in store 'store' at path 'path'" + + +def test_array_not_found_error() -> None: + """ + Test that calling ArrayNotFoundError with multiple arguments returns a formatted string. + This is deprecated behavior. + """ + err = ArrayNotFoundError("store", "path") + assert str(err) == "No array found in store 'store' at path 'path'" + + +def test_metadata_validation_error() -> None: + """ + Test that calling MetadataValidationError with multiple arguments returns a formatted string. + This is deprecated behavior. + """ + err = MetadataValidationError("a", "b", "c") + assert str(err) == "Invalid value for 'a'. Expected 'b'. Got 'c'." + + +def test_contains_group_error() -> None: + """ + Test that calling ContainsGroupError with multiple arguments returns a formatted string. + This is deprecated behavior. + """ + err = ContainsGroupError("store", "path") + assert str(err) == "A group exists in store 'store' at path 'path'." + + +def test_contains_array_error() -> None: + """ + Test that calling ContainsArrayError with multiple arguments returns a formatted string. + This is deprecated behavior. + """ + err = ContainsArrayError("store", "path") + assert str(err) == "An array exists in store 'store' at path 'path'." + + +def test_contains_array_and_group_error() -> None: + """ + Test that calling ContainsArrayAndGroupError with multiple arguments returns a formatted string. + This is deprecated behavior. + """ + err = ContainsArrayAndGroupError("store", "path") + assert str(err) == ( + "Array and group metadata documents (.zarray and .zgroup) were both found in store 'store' " + "at path 'path'. Only one of these files may be present in a given directory / prefix. " + "Remove the .zarray file, or the .zgroup file, or both." + ) + + +def test_node_type_validation_error() -> None: + """ + Test that calling NodeTypeValidationError with multiple arguments returns a formatted string. + This is deprecated behavior. + """ + err = NodeTypeValidationError("a", "b", "c") + assert str(err) == "Invalid value for 'a'. Expected 'b'. Got 'c'." diff --git a/tests/test_group.py b/tests/test_group.py index e5cfe82daa..2d9070bd67 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -2220,8 +2220,8 @@ def test_build_metadata_v3(option: Literal["array", "group", "invalid"]) -> None metadata_dict = GroupMetadata(zarr_format=3).to_dict() metadata_dict.pop("node_type") # TODO: fix the error message - msg = "Invalid value for 'node_type'. Expected 'array or group'. Got 'nothing (the key is missing)'." - with pytest.raises(MetadataValidationError, match=re.escape(msg)): + msg = "Required key 'node_type' is missing from the provided metadata document." + with pytest.raises(MetadataValidationError, match=msg): _build_metadata_v3(metadata_dict) diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py index 4fdcce7b6b..1405bf533b 100644 --- a/tests/test_metadata/test_v3.py +++ b/tests/test_metadata/test_v3.py @@ -93,7 +93,7 @@ def test_parse_node_type_valid() -> None: def test_parse_node_type_invalid(node_type: Any) -> None: with pytest.raises( MetadataValidationError, - match=f"Invalid value for 'node_type'. Expected 'array or group'. Got '{node_type}'.", + match=f"Invalid value for 'node_type'. Expected 'array' or 'group'. Got '{node_type}'.", ): parse_node_type(node_type)