formatting

d-v-b · d-v-b · commit ef721d160b24 · 2023-01-24T13:15:44.000-05:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,16 @@
+ci:
+  autoupdate_commit_msg: "chore: update pre-commit hooks"
+  autofix_commit_msg: "style: pre-commit fixes"
+default_stages: [commit, push]
+default_language_version:
+  python: python3
+repos:
+  - repo: https://github.com/psf/black
+    rev: 22.12.0
+    hooks:
+    - id: black
+      language_version: python3.9
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+    - id: check-yaml
diff --git a/README.rst b/README.rst
@@ -47,6 +47,7 @@ which returns this (a collection of DataArrays, each with decreasing size):
     Coordinates:
     * dim_0    (dim_0) float64 0.5 2.5]
 
+By default, the values of the downsampled arrays are cast to the same data type as the input. This behavior can be changed with the ``preserve_dtype`` keyword argument to ``multiscale``.
 
 Generate a multiscale representation of an ``xarray.DataArray``:
 
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,6 +22,7 @@ sphinx-issues = "^3.0.1"
 pytest-cov = "^3.0.0"
 pytest = "^7.1.2"
 mypy = "^0.971"
+pre-commit = "^3.0.0"
 
 [build-system]
 requires = ["poetry>=0.12"]
diff --git a/src/xarray_multiscale/chunks.py b/src/xarray_multiscale/chunks.py
@@ -38,7 +38,13 @@ def normalize_chunks(
         chunk_size = _chunk_size
 
     new_chunks = map(
-        tz.first, da.core.normalize_chunks(chunk_size, array.shape, dtype=array.dtype)
+        tz.first,
+        da.core.normalize_chunks(
+            chunk_size,
+            array.shape,
+            dtype=array.dtype,
+            previous_chunks=array.data.chunksize,
+        ),
     )
 
     result = tuple(new_chunks)
diff --git a/src/xarray_multiscale/multiscale.py b/src/xarray_multiscale/multiscale.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Hashable, List, Sequence, Union
+from typing import Any, Dict, Hashable, List, Literal, Sequence, Union
 
 import numpy as np
 import numpy.typing as npt
@@ -13,13 +13,15 @@
 from xarray_multiscale.reducers import WindowedReducer
 from xarray_multiscale.util import adjust_shape, broadcast_to_rank, logn
 
+ChunkOption = Literal["preserve", "auto"]
+
 
 def multiscale(
     array: npt.NDArray[Any],
     reduction: WindowedReducer,
     scale_factors: Union[Sequence[int], int],
     preserve_dtype: bool = True,
-    chunks: Union[str, Sequence[int], Dict[Hashable, int]] = "auto",
+    chunks: Union[str, Sequence[int], Dict[Hashable, int]] = "preserve",
     chained: bool = True,
 ) -> List[DataArray]:
     """
@@ -44,10 +46,11 @@ def multiscale(
         input array. If False, output arrays will have data type determined
         by the output of the reduction function.
 
-    chunks : sequence or dict of ints, or the string "auto" (default)
+    chunks : sequence or dict of ints, or the string "preserve" (default)
         Set the chunking of the output arrays. Applies only to dask arrays.
-        If `chunks` is set to "auto" (the default), then chunk sizes will
-        decrease with each level of downsampling.
+        If `chunks` is set to "preserve" (the default), then chunk sizes will
+        decrease with each level of downsampling. Otherwise, this argument is
+        passed to `xarray_multiscale.chunks.normalize_chunks`.
 
         Otherwise, this keyword argument will be passed to the
         `xarray.DataArray.chunk` method for each output array,
@@ -108,7 +111,7 @@ def multiscale(
             source = result[0]
         result.append(downscale(source, reduction, scale, preserve_dtype))
 
-    if darray.chunks is not None:
+    if darray.chunks is not None and chunks != "preserve":
         new_chunks = [normalize_chunks(r, chunks) for r in result]
         result = [r.chunk(ch) for r, ch in zip(result, new_chunks)]
 
diff --git a/tests/test_multiscale.py b/tests/test_multiscale.py
@@ -4,9 +4,14 @@
 from xarray import DataArray
 from xarray.testing import assert_equal
 
-from xarray_multiscale.multiscale import (adjust_shape, downsampling_depth,
-                                          downscale, downscale_coords,
-                                          downscale_dask, multiscale)
+from xarray_multiscale.multiscale import (
+    adjust_shape,
+    downsampling_depth,
+    downscale,
+    downscale_coords,
+    downscale_dask,
+    multiscale,
+)
 from xarray_multiscale.reducers import windowed_mean
 
 
@@ -130,33 +135,54 @@ def test_multiscale(ndim: int, chained: bool):
 
 def test_chunking():
     ndim = 3
-    shape = (9,) * ndim
-    base_array = da.zeros(shape, chunks=(1,) * ndim)
-    chunks = (1,) * ndim
+    shape = (16,) * ndim
+    chunks = (4,) * ndim
+    base_array = da.zeros(shape, chunks=chunks)
     reducer = windowed_mean
-    multi = multiscale(base_array, reducer, 2, chunks=chunks)
-    assert all([m.data.chunksize == chunks for m in multi])
-
-    chunks = (3,) * ndim
-    multi = multiscale(base_array, reducer, 2, chunks=chunks)
-    for m in multi:
-        assert m.data.chunksize == chunks or m.data.chunksize == m.data.shape
-
-    chunks = (3,) * ndim
-    multi = multiscale(base_array, reducer, 2, chunks=chunks)
-    for m in multi:
-        assert (
-            np.greater_equal(m.data.chunksize, chunks).all()
-            or m.data.chunksize == m.data.shape
-        )
+    scale_factors = (2,) * ndim
+
+    multi = multiscale(base_array, reducer, scale_factors)
+    expected_chunks = [
+        np.floor_divide(chunks, [s**idx for s in scale_factors])
+        for idx, m in enumerate(multi)
+    ]
+    expected_chunks = [
+        x
+        if np.all(x)
+        else [
+            1,
+        ]
+        * ndim
+        for x in expected_chunks
+    ]
+    assert all(
+        [np.array_equal(m.data.chunksize, e) for m, e in zip(multi, expected_chunks)]
+    )
+
+    multi = multiscale(base_array, reducer, scale_factors, chunks=chunks)
+    expected_chunks = [
+        chunks if np.greater(m.shape, chunks).all() else m.shape
+        for idx, m in enumerate(multi)
+    ]
+    assert all(
+        [np.array_equal(m.data.chunksize, e) for m, e in zip(multi, expected_chunks)]
+    )
+
+    chunks = (3, -1, -1)
+    multi = multiscale(base_array, reducer, scale_factors, chunks=chunks)
+    expected_chunks = [
+        (min(chunks[0], m.shape[0]), m.shape[1], m.shape[2]) for m in multi
+    ]
+    assert all(
+        [np.array_equal(m.data.chunksize, e) for m, e in zip(multi, expected_chunks)]
+    )
 
     chunks = 3
-    multi = multiscale(base_array, reducer, 2, chunks=chunks)
-    for m in multi:
-        assert (
-            np.greater_equal(m.data.chunksize, (chunks,) * ndim).all()
-            or m.data.chunksize == m.data.shape
-        )
+    multi = multiscale(base_array, reducer, scale_factors, chunks=chunks)
+    expected_chunks = [tuple(min(chunks, s) for s in m.shape) for m in multi]
+    assert all(
+        [np.array_equal(m.data.chunksize, e) for m, e in zip(multi, expected_chunks)]
+    )
 
 
 def test_coords():