initial commit

TomNicholas · TomNicholas · commit 50445b9582d5 · 2023-03-24T12:45:32.000-04:00
diff --git a/cubed_xarray/__init__.py b/cubed_xarray/__init__.py
@@ -0,0 +1,9 @@
+from importlib.metadata import version
+
+
+try:
+    __version__ = version("cubed-xarray")
+except Exception:
+    # Local copy or not installed with setuptools.
+    # Disable minimum version checks on downstream libraries.
+    __version__ = "999"
diff --git a/cubed_xarray/cubedmanager.py b/cubed_xarray/cubedmanager.py
@@ -0,0 +1,217 @@
+import functools
+import sys
+from abc import ABC, abstractmethod
+from collections.abc import Sequence
+from importlib.metadata import entry_points
+from typing import TYPE_CHECKING, Any, Callable, Generic, Optional, TypeVar, Union, Tuple, List
+
+import numpy as np
+
+from xarray.core import utils
+from xarray.core.parallelcompat import ChunkManagerEntrypoint
+from xarray.core.pycompat import is_chunked_array, is_duck_dask_array
+
+T_ChunkedArray = TypeVar("T_ChunkedArray")
+
+# TODO importing TypeAlias is a pain on python 3.9 without typing_extensions in the CI
+# T_Chunks: TypeAlias = tuple[tuple[int, ...], ...]
+T_Chunks = Any
+
+CHUNK_MANAGERS: dict[str, type["ChunkManagerEntrypoint"]] = {}
+
+if TYPE_CHECKING:
+    from xarray.core.types import CubedArray, ZarrArray
+
+
+class CubedManager(ChunkManagerEntrypoint["CubedArray"]):
+    array_cls: type["CubedArray"]
+
+    def __init__(self):
+        from cubed import Array
+
+        self.array_cls = Array
+
+    def chunks(self, data: "CubedArray") -> T_Chunks:
+        return data.chunks
+
+    def from_array(self, data: np.ndarray, chunks, **kwargs) -> "CubedArray":
+        from cubed import Array, from_array
+
+        from xarray.core import indexing
+
+        # cubed-specific kwargs
+        spec = kwargs.pop("spec", None)
+
+        if isinstance(data, Array):
+            data = data.rechunk(chunks)
+        elif is_duck_dask_array(data):
+            raise TypeError("Trying to rechunk a dask array using cubed")
+        else:
+            if isinstance(data, indexing.ExplicitlyIndexed):
+                # Unambiguously handle array storage backends (like NetCDF4 and h5py)
+                # that can't handle general array indexing. For example, in netCDF4 you
+                # can do "outer" indexing along two dimensions independent, which works
+                # differently from how NumPy handles it.
+                # da.from_array works by using lazy indexing with a tuple of slices.
+                # Using OuterIndexer is a pragmatic choice: dask does not yet handle
+                # different indexing types in an explicit way:
+                # https://github.com/dask/dask/issues/2883
+                data = indexing.ImplicitToExplicitIndexingAdapter(
+                    data, indexing.OuterIndexer
+                )
+
+            if utils.is_dict_like(chunks):
+                chunks = tuple(chunks.get(n, s) for n, s in enumerate(data.shape))
+
+            data = from_array(
+                data,
+                chunks,
+                spec=spec,
+            )
+
+        return data
+
+    def rechunk(self, data: "CubedArray", chunks, **kwargs) -> "CubedArray":
+        return data.rechunk(chunks, **kwargs)
+
+    def compute(self, *data: "CubedArray", **kwargs) -> np.ndarray:
+        from cubed import compute
+
+        return compute(*data, **kwargs)
+
+    @property
+    def array_api(self) -> Any:
+        from cubed import array_api
+
+        return array_api
+
+    def reduction(
+        self,
+        arr: T_ChunkedArray,
+        func: Callable,
+        combine_func: Optional[Callable] = None,
+        aggregate_func: Optional[Callable] = None,
+        axis: Optional[Union[int, Sequence[int]]] = None,
+        dtype: Optional[np.dtype] = None,
+        keepdims: bool = False,
+    ) -> T_ChunkedArray:
+        from cubed.core.ops import reduction
+
+        return reduction(
+            arr,
+            func=func,
+            combine_func=combine_func,
+            aggegrate_func=aggregate_func,  # TODO fix the typo in argument name in cubed
+            axis=axis,
+            dtype=dtype,
+            keepdims=keepdims,
+        )
+
+    def map_blocks(
+        self,
+        func,
+        *args,
+        dtype=None,
+        chunks=None,
+        drop_axis=[],
+        new_axis=None,
+        **kwargs,
+    ):
+        from cubed.core.ops import map_blocks
+
+        return map_blocks(
+            func,
+            *args,
+            dtype=dtype,
+            chunks=chunks,
+            drop_axis=drop_axis,
+            new_axis=new_axis,
+            **kwargs,
+        )
+
+    def blockwise(
+        self,
+        func,
+        out_ind,
+        *args: Any,
+        # can't type this as mypy assumes args are all same type, but blockwise args alternate types
+        dtype=None,
+        adjust_chunks=None,
+        new_axes=None,
+        align_arrays=True,
+        target_store=None,
+        **kwargs,
+    ):
+        from cubed.core.ops import blockwise
+
+        # TODO where to get the target_store kwarg from? Filter down from a blockwise call? Set as attribute on CubedManager?
+
+        return blockwise(
+            func,
+            out_ind,
+            *args,
+            dtype=dtype,
+            adjust_chunks=adjust_chunks,
+            new_axes=new_axes,
+            align_arrays=align_arrays,
+            target_store=target_store,
+            **kwargs,
+        )
+
+    def apply_gufunc(
+        self,
+        func,
+        signature,
+        *args,
+        axes=None,
+        axis=None,
+        keepdims=False,
+        output_dtypes=None,
+        output_sizes=None,
+        vectorize=None,
+        allow_rechunk=False,
+        meta=None,
+        **kwargs,
+    ):
+        if allow_rechunk:
+            raise NotImplementedError(
+                "cubed.apply_gufunc doesn't support allow_rechunk"
+            )
+        if keepdims:
+            raise NotImplementedError("cubed.apply_gufunc doesn't support keepdims")
+
+        from cubed import apply_gufunc
+
+        return apply_gufunc(
+            func,
+            signature,
+            *args,
+            axes=axes,
+            axis=axis,
+            output_dtypes=output_dtypes,
+            output_sizes=output_sizes,
+            vectorize=vectorize,
+            **kwargs,
+        )
+
+    def unify_chunks(
+        self, *args, **kwargs
+    ) -> tuple[dict[str, T_Chunks], list["CubedArray"]]:
+        from cubed.core import unify_chunks
+
+        return unify_chunks(*args, **kwargs)
+
+    def store(
+        self,
+        sources: Union["CubedArray", Sequence["CubedArray"]],
+        targets: Union["ZarrArray", Sequence["ZarrArray"]],
+        **kwargs: dict[str, Any],
+    ):
+        """Used when writing to any backend."""
+        from cubed.core.ops import store
+
+        return store(
+            sources,
+            targets,
+            **kwargs,
+        )
diff --git a/cubed_xarray/tests/__init__.py b/cubed_xarray/tests/__init__.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,54 @@
+[project]
+name = "cubed-xarray"
+authors = [
+    {name = "Tom Nicholas", email = "tomnicholas1@googlemail.com"}
+]
+description = "Interface for using cubed with xarray for parallel computation."
+license = {text = "Apache-2"}
+readme = "README.md"
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Environment :: Console",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Topic :: Scientific/Engineering",
+]
+requires-python = ">=3.9"
+dependencies = [
+    "numpy >= 1.17",
+    "xarray >= 0.16.1",
+    "cubed >= 0.6.0",
+]
+dynamic = ["version"]
+
+[project.urls]
+Home = "https://github.com/xarray-contrib/cubed-xarray"
+Documentation = "https://github.com/xarray-contrib/cubed-xarray#readme"
+
+[tool.setuptools.packages.find]
+include = [
+    "cubed_xarray",
+    "cubed_xarray.tests",
+]
+
+[build-system]
+requires = ["setuptools >= 64", "setuptools_scm >= 7.0"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools_scm]
+fallback_version = "999"
+
+[tool.pytest.ini_options]
+junit_family = "xunit2"
+
+[tool.isort]
+profile = "black"
+skip_gitignore = "true"
+force_to_top = "true"
+default_section = "THIRDPARTY"
+known_first_party = "cubed_xarray"
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+cubed>=0.6.0
+numpy>=1.17.1
+xarray>=2023.03.0
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,3 @@
+[options.entry_points]
+xarray.chunkmanagers =
+    cubed = cubed_xarray.cubedmanager:CubedManager
diff --git a/setup.py b/setup.py
@@ -0,0 +1,6 @@
+#!/user/bin/env python
+
+from setuptools import setup
+
+if __name__ == "__main__":
+    setup()

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+cubed>=0.6.0`
	`2`	`+numpy>=1.17.1`
	`3`	`+xarray>=2023.03.0`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[options.entry_points]`
	`2`	`+xarray.chunkmanagers =`
	`3`	`+ cubed = cubed_xarray.cubedmanager:CubedManager`