update docstrings and tests; change 'recursive' to 'chained' in multiscale signature.

d-v-b · d-v-b · commit fb12bd1b042c · 2021-06-17T14:06:29.000-04:00
diff --git a/src/xarray_multiscale/multiscale.py b/src/xarray_multiscale/multiscale.py
@@ -4,43 +4,60 @@
 from xarray import DataArray
 from typing import Any, List, Optional, Tuple, Union, Sequence, Callable, Dict
 from scipy.interpolate import interp1d
-from dask.array.core import slices_from_chunks, normalize_chunks
 from dask.array import coarsen
 
 
-
 def multiscale(
     array: Any,
     reduction: Callable[[Any], Any],
     scale_factors: Union[Sequence[int], int],
     pad_mode: Optional[str] = None,
     preserve_dtype: bool = True,
     chunks: Optional[Union[Sequence[int], Dict[str, int]]] = None,
-    recursive: bool = False,
+    chained: bool = True,
 ) -> List[DataArray]:
     """
-    Lazily generate a multiscale representation of an array
+    Generate a lazy, coordinate-aware multiscale representation of an array.
 
     Parameters
     ----------
-    array: ndarray to be downscaled.
+    array : numpy array, dask array, or xarray DataArray
+        The array to be downscaled
+
+    reduction : callable
+        A function that aggregates chunks of data over windows. See the documentation of `dask.array.coarsen` for the expected
+        signature of this callable.
 
-    reduction: a function that aggregates data over windows.
+    scale_factors : iterable of ints
+        The desired downscaling factors, one for each axis.
 
-    scale_factors: an iterable of integers that specifies how much to downscale each axis of the array.
+    pad_mode : string or None, default=None
+        How arrays should be padded prior to downscaling in order to ensure that each array dimension
+        is evenly divisible by the respective scale factor. When set to `None` (default), the input will be sliced before downscaling
+        if its dimensions are not divisible by `scale_factors`.
 
-    pad_mode: How (or if) the input should be padded. When set to `None` the input will be trimmed as needed.
+    preserve_dtype : bool, default=True
+        Determines whether the multiresolution arrays are all cast to the same dtype as the input.
 
-    preserve_dtype: boolean, defaults to True, determines whether lower levels of the pyramid are coerced to the same dtype as the input. This assumes that
-    the reduction function accepts a "dtype" kwarg, e.g. numpy.mean(x, dtype='int').
+    chunks : sequence or dict of ints, or None, default=None.
+        If `chunks` is supplied, all output arrays are returned with this chunking. If not None, this
+        argument is passed directly to the `xarray.DataArray.chunk` method of each output array.
 
-    chunks: Sequence or Dict of ints, defaults to None. If `chunks` is supplied, all DataArrays are rechunked with these chunks before being returned.
+    chained : bool, default=True
+        If True (default), the nth downscaled array is generated by applying the reduction function on the n-1th
+        downscaled array with the user-supplied `scale_factors`. This means that the nth downscaled array directly depends on the n-1th
+        downscaled array. Note that nonlinear reductions like the windowed mode may give inaccurate results with `chained` set to True.
 
-    recursive: boolean, defaults to False. ToDo
+        If False, the nth downscaled array is generated by applying the reduction function on the 0th downscaled array
+        (i.e., the input array) with the `scale_factors` raised to the nth power. This means that the nth downscaled array directly
+        depends on the input array.
 
-    Returns a list of DataArrays, one per level of downscaling. These DataArrays have `coords` properties that track the changing offset (if any)
-    induced by the downsampling operation. Additionally, the scale factors are stored each DataArray's attrs propery under the key `scale_factors`
+    Returns
     -------
+    result : list of DataArrays
+        The `coords` attribute of these DataArrays properties that track the changing offset (if any)
+        induced by the downsampling operation. Additionally, the scale factors are stored each DataArray's attrs propery under the key `scale_factors`
+
 
     """
     needs_padding = pad_mode != None
@@ -56,19 +73,16 @@ def multiscale(
     else:
         padded_shape = prepad(array, scale_factors, pad_mode=pad_mode).shape
 
-    # figure out the maximum depth
     levels = range(
         0, 1 + get_downscale_depth(padded_shape, scale_factors, pad=needs_padding)
     )
-    scales = tuple(
-        tuple(s ** l for s in scale_factors) for l in levels
-    )
+    scales = tuple(tuple(s ** l for s in scale_factors) for l in levels)
     result = [_ingest_array(array, scales[0])]
 
     for level in levels[1:]:
-        if recursive:
+        if chained:
             scale = scale_factors
-            downscaled = downscale(result[-1], reduction, scale, pad_mode=pad_mode) 
+            downscaled = downscale(result[-1], reduction, scale, pad_mode=pad_mode)
         else:
             scale = scales[level]
             downscaled = downscale(result[0], reduction, scale, pad_mode=pad_mode)
@@ -80,14 +94,21 @@ def multiscale(
     if chunks is not None:
         if isinstance(chunks, Sequence):
             _chunks = {k: v for k, v in zip(result[0].dims, chunks)}
-        else:
+        elif isinstance(chunks, dict):
             _chunks = chunks
+        else:
+            raise ValueError(
+                f"Chunks must be an instance or a dict, not {type(chunks)}"
+            )
         result = [r.chunk(_chunks) for r in result]
 
     return result
 
 
 def _ingest_array(array: Any, scales: Sequence[int]):
+    """
+    Ingest an array in preparation for downscaling
+    """
     if hasattr(array, "coords"):
         # if the input is a xarray.DataArray, assign a new variable to the DataArray and use the variable
         # `array` to refer to the data property of that array
@@ -101,7 +122,7 @@ def _ingest_array(array: Any, scales: Sequence[int]):
         data = da.asarray(array)
         dims = tuple(f"dim_{d}" for d in range(data.ndim))
         coords = {
-            dim: DataArray(offset + np.arange(s, dtype="float32"), dims=dim)
+            dim: DataArray(offset + np.arange(s, dtype="float"), dims=dim)
             for dim, s, offset in zip(dims, array.shape, get_downsampled_offset(scales))
         }
         name = None
@@ -118,7 +139,13 @@ def even_padding(length: int, window: int) -> int:
     Parameters
     ----------
     length : int
-    window: int
+
+    window : int
+
+    Returns
+    -------
+    int
+        Value that, when added to `length`, results in a sum that is evenly divided by `window`
     """
     return (window - (length % window)) % window
 
@@ -132,8 +159,10 @@ def logn(x: float, n: float) -> float:
     x : float or int.
     n: float or int.
 
-    Returns np.log(x) / np.log(n)
+    Returns
     -------
+    float
+        np.log(x) / np.log(n)
 
     """
     result: float = np.log(x) / np.log(n)
@@ -147,20 +176,25 @@ def prepad(
     rechunk: bool = True,
 ) -> da.array:
     """
-    Pad an array such that its new dimensions are evenly divisible by some integer.
+    Lazily pad an array such that its new dimensions are evenly divisible by some integer.
 
     Parameters
     ----------
-    array: An ndarray that will be padded.
+    array : ndarray
+        Array that will be padded.
 
-    scale_factors: An iterable of integers. The output array is guaranteed to have dimensions that are each evenly divisible
-    by the corresponding scale factor, and chunks that are smaller than or equal to the scale factor (if the array has chunks)
+    scale_factors : Sequence of ints
+        The output array is guaranteed to have dimensions that are each evenly divisible
+        by the corresponding scale factor, and chunks that are smaller than or equal
+        to the scale factor (if the array has chunks)
 
-    mode: String. The edge mode used by the padding routine. See `dask.array.pad` for more documentation.
+    pad_mode : str
+        The edge mode used by the padding routine. This parameter will be passed to
+        `dask.array.pad` as the `mode` keyword.
 
-    Returns a dask array with padded dimensions.
+    Returns
     -------
-
+    dask array
     """
 
     if pad_mode == None:
@@ -198,7 +232,8 @@ def prepad(
                     extended_coords, dims=k, attrs=old_coord.attrs
                 )
         result = DataArray(
-            result, coords=new_coords, dims=array.dims, attrs=array.attrs)
+            result, coords=new_coords, dims=array.dims, attrs=array.attrs
+        )
     return result
 
 
@@ -214,16 +249,22 @@ def downscale(
 
     Parameters
     ----------
-    array: The narray to be downscaled.
+    array : numpy array, dask array, xarray DataArray
+        The array to be downscaled.
 
-    reduction: The function to apply to each window of the array.
+    reduction : callable
+        A function that aggregates chunks of data over windows. See the documentation of `dask.array.coarsen` for the expected
+        signature of this callable.
 
-    scale_factors: A list if ints specifying how much to downscale the array per dimension.
+    scale_factors : iterable of ints
+        The desired downscaling factors, one for each axis.
 
-    trim_excess: A boolean that determines whether the size of the input array should be increased or decreased such that
-    each scale factor tiles its respective array axis. Defaults to False, which will result in the input being padded.
+    trim_excess : bool, default=False
+        Whether the size of the input array should be increased or decreased such that
+        each scale factor tiles its respective array axis. Defaults to False, which will result in the input being padded.
 
-    **kwargs: extra kwargs passed to dask.array.coarsen
+    **kwargs
+        extra kwargs passed to dask.array.coarsen
 
     Returns the downscaled version of the input as a dask array.
     -------
@@ -243,7 +284,6 @@ def downscale(
         **kwargs,
     )
 
-
     if isinstance(array, xarray.DataArray):
         base_coords = array.coords
         new_coords = base_coords
@@ -256,10 +296,19 @@ def downscale(
                     attrs=base_coords[bc].attrs,
                 )
                 for s, bc, offset, sc in zip(
-                    coarsened.shape, base_coords, get_downsampled_offset(scale_factors), scale_factors
+                    coarsened.shape,
+                    base_coords,
+                    get_downsampled_offset(scale_factors),
+                    scale_factors,
                 )
             )
-        coarsened = DataArray(coarsened, dims=array.dims, coords=new_coords, attrs=array.attrs, name=array.name)
+        coarsened = DataArray(
+            coarsened,
+            dims=array.dims,
+            coords=new_coords,
+            attrs=array.attrs,
+            name=array.name,
+        )
 
     return coarsened
 
@@ -324,25 +373,3 @@ def slice_span(sl: slice) -> int:
     Measure the length of a slice
     """
     return sl.stop - sl.start
-
-
-def blocked_pyramid(
-    arr, block_size: Sequence, scale_factors: Sequence[int] = (2, 2, 2), **kwargs
-):
-    full_pyr = multiscale(arr, scale_factors=scale_factors, **kwargs)
-    slices = slices_from_chunks(normalize_chunks(block_size, arr.shape))
-    absolute_block_size = tuple(map(slice_span, slices[0]))
-
-    results = []
-    for idx, sl in enumerate(slices):
-        regions = [
-            tuple(map(downscale_slice, sl, tuple(np.power(scale_factors, exp))))
-            for exp in range(len(full_pyr))
-        ]
-        if tuple(map(slice_span, sl)) == absolute_block_size:
-            pyr = multiscale(arr[sl], scale_factors=scale_factors, **kwargs)
-        else:
-            pyr = [full_pyr[l][r] for l, r in enumerate(regions)]
-        assert len(pyr) == len(regions)
-        results.append((regions, pyr))
-    return results
diff --git a/tests/test_multiscale.py b/tests/test_multiscale.py
@@ -81,7 +81,7 @@ def test_multiscale():
 
     pyr_trimmed = multiscale(array, np.mean, 2, pad_mode=None)
     pyr_padded = multiscale(array, np.mean, 2, pad_mode="reflect")
-    pyr_trimmed_recursive = multiscale(array, np.mean, 2, pad_mode=None, recursive=True)
+    pyr_trimmed_unchained = multiscale(array, np.mean, 2, pad_mode=None, chained=False)
     assert [p.shape for p in pyr_padded] == [
         shape,
         (5, 5, 5),
@@ -99,6 +99,6 @@ def test_multiscale():
         pyr_trimmed[-2].data.mean().compute(), pyr_trimmed[-1].data.compute().mean()
     )
     assert np.array_equal(
-        pyr_trimmed_recursive[-2].data.mean().compute(), pyr_trimmed_recursive[-1].data.compute().mean()
+        pyr_trimmed_unchained[-2].data.mean().compute(), pyr_trimmed_unchained[-1].data.compute().mean()
     )
     assert np.allclose(pyr_padded[0].data.mean().compute(), 0.17146776406035666)