diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 98979ce05d7..e61e920ce32 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -147,6 +147,8 @@ def _infer_coords_and_dims( "data" ) + original_dims = dims # Keep reference for error messages + if isinstance(dims, str): dims = (dims,) elif dims is None: @@ -161,12 +163,27 @@ def _infer_coords_and_dims( coord, name=dim, auto_convert=False ).to_index_variable() dims[n] = coord.name - dims_tuple = tuple(dims) + elif isinstance(dims, Iterable): + dims = tuple(dims) + else: + # Single non-string, non-iterable hashable (int, UUID, etc.) + dims = (dims,) + dims_tuple = dims if len(dims_tuple) != len(shape): - raise ValueError( - "different number of dimensions on data " - f"and dims: {len(shape)} vs {len(dims_tuple)}" - ) + # Provide helpful error message for tuple ambiguity case + if isinstance(original_dims, tuple) and len(dims_tuple) > 1 and len(shape) == 1: + raise ValueError( + f"You passed dims={original_dims} for 1-dimensional data. " + f"This is ambiguous: did you mean {len(dims_tuple)} separate dimensions, " + f"or a single dimension with tuple name {original_dims}? " + f"For a single tuple-named dimension, use dims=[{original_dims}]. " + f"For multiple dimensions, use {len(dims_tuple)}-dimensional data." + ) + else: + raise ValueError( + "different number of dimensions on data " + f"and dims: {len(shape)} vs {len(dims_tuple)}" + ) for d in dims_tuple: if not hashable(d): raise TypeError(f"Dimension {d} is not hashable") @@ -321,6 +338,10 @@ class DataArray( to the number of dimensions. If this argument is omitted, dimension names are taken from ``coords`` (if possible) and otherwise default to ``['dim_0', ... 'dim_n']``. + + Note: Tuples are treated as sequences, so ('a', 'b') means two + dimensions named 'a' and 'b'. To use a tuple as a single dimension + name, wrap it in a list: [('a', 'b')]. name : str or None, optional Name of this array. attrs : dict_like or None, optional diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index d22fc37aa4f..ba926cc9913 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1707,8 +1707,8 @@ def __init__( ) self._index_type = index_type - self._indexes = dict(**indexes) - self._variables = dict(**variables) + self._indexes = dict(indexes) + self._variables = dict(variables) self._dims: Mapping[Hashable, int] | None = None self.__coord_name_id: dict[Any, int] | None = None diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 06d7218fe7c..e134b75a1fb 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -47,6 +47,7 @@ is_duck_dask_array, maybe_coerce_to_str, ) +from xarray.namedarray._typing import _DimsLike from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import ( @@ -369,7 +370,7 @@ class Variable(NamedArray, AbstractArray, VariableArithmetic): def __init__( self, - dims, + dims: _DimsLike, data: T_DuckArray | ArrayLike, attrs=None, encoding=None, @@ -378,10 +379,14 @@ def __init__( """ Parameters ---------- - dims : str or sequence of str - Name(s) of the the data dimension(s). Must be either a string (only - for 1D data) or a sequence of strings with length equal to the + dims : Hashable or sequence of Hashable + Name(s) of the the data dimension(s). Must be either a Hashable + (only for 1D data) or a sequence of Hashables with length equal to the number of dimensions. + + Note: Tuples are treated as sequences, so ('a', 'b') means two + dimensions named 'a' and 'b'. To use a tuple as a single dimension + name, wrap it in a list: [('a', 'b')]. data : array_like Data array which supports numpy-like data access. attrs : dict_like or None, optional diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 9610b96d4f9..0244070dd60 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -81,7 +81,7 @@ def dtype(self) -> _DType_co: ... _Dim = Hashable _Dims = tuple[_Dim, ...] -_DimsLike = Union[str, Iterable[_Dim]] +_DimsLike = Union[_Dim, Iterable[_Dim]] # https://data-apis.org/array-api/latest/API_specification/indexing.html # TODO: np.array_api was bugged and didn't allow (None,), but should! diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index dac8162ca45..7c5ed14ad27 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -500,12 +500,31 @@ def dims(self, value: _DimsLike) -> None: self._dims = self._parse_dimensions(value) def _parse_dimensions(self, dims: _DimsLike) -> _Dims: - dims = (dims,) if isinstance(dims, str) else tuple(dims) + original_dims = dims # Keep reference to original input for error messages + + if isinstance(dims, str): + dims = (dims,) + elif isinstance(dims, Iterable): + dims = tuple(dims) + else: + # Single non-string, non-iterable hashable (int, UUID, etc.) + dims = (dims,) + if len(dims) != self.ndim: - raise ValueError( - f"dimensions {dims} must have the same length as the " - f"number of data dimensions, ndim={self.ndim}" - ) + # Provide a more helpful error message that explains the tuple ambiguity + if isinstance(original_dims, tuple) and len(dims) > 1 and self.ndim == 1: + raise ValueError( + f"You passed dims={original_dims} for 1-dimensional data. " + f"This is ambiguous: did you mean {len(dims)} separate dimensions, " + f"or a single dimension with tuple name {original_dims}? " + f"For a single tuple-named dimension, use dims=[{original_dims}]. " + f"For multiple dimensions, use {len(dims)}-dimensional data." + ) + else: + raise ValueError( + f"dimensions {dims} must have the same length as the " + f"number of data dimensions, ndim={self.ndim}" + ) if len(set(dims)) < len(dims): repeated_dims = {d for d in dims if dims.count(d) > 1} warnings.warn( diff --git a/xarray/tests/test_hashable.py b/xarray/tests/test_hashable.py index 9f92c604dc3..c64b093b999 100644 --- a/xarray/tests/test_hashable.py +++ b/xarray/tests/test_hashable.py @@ -1,5 +1,6 @@ from __future__ import annotations +import uuid from enum import Enum from typing import TYPE_CHECKING, Union @@ -10,7 +11,7 @@ if TYPE_CHECKING: from xarray.core.types import TypeAlias - DimT: TypeAlias = Union[int, tuple, "DEnum", "CustomHashable"] + DimT: TypeAlias = Union[int, tuple, "DEnum", "CustomHashable", uuid.UUID] class DEnum(Enum): @@ -32,15 +33,38 @@ def __hash__(self) -> int: pytest.param(("a", "b"), id="tuple"), pytest.param(DEnum.dim, id="enum"), pytest.param(CustomHashable(3), id="HashableObject"), + pytest.param(uuid.UUID("12345678-1234-5678-1234-567812345678"), id="uuid"), + ], +) + +parametrize_wrapped = pytest.mark.parametrize( + "wrapped", + [ + pytest.param(True, id="wrapped"), + pytest.param(False, id="bare"), ], ) @parametrize_dim -def test_hashable_dims(dim: DimT) -> None: - v = Variable([dim], [1, 2, 3]) - da = DataArray([1, 2, 3], dims=[dim]) - Dataset({"a": ([dim], [1, 2, 3])}) +@parametrize_wrapped +def test_hashable_dims(dim: DimT, wrapped: bool) -> None: + # Pass dims either wrapped in a list or bare + dims_arg = [dim] if wrapped else dim + + # Bare tuple case should error with helpful message for 1D data + if not wrapped and isinstance(dim, tuple): + with pytest.raises(ValueError, match="This is ambiguous"): + Variable(dims_arg, [1, 2, 3]) + with pytest.raises(ValueError, match="This is ambiguous"): + DataArray([1, 2, 3], dims=dims_arg) + with pytest.raises(ValueError): + Dataset({"a": (dims_arg, [1, 2, 3])}) + return # Don't run the other tests for this case + + v = Variable(dims_arg, [1, 2, 3]) + da = DataArray([1, 2, 3], dims=dims_arg) + Dataset({"a": (dims_arg, [1, 2, 3])}) # alternative constructors DataArray(v)