Various final fixes

lshaw8317 · lshaw8317 · commit f9e97aa612d2 · 2025-09-10T19:07:34.000+02:00
diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py
@@ -350,6 +350,7 @@ def __array_namespace_info__() -> Info:
     frombuffer,
     fromiter,
     get_slice_nchunks,
+    meshgrid,
     nans,
     uninit,
     zeros,
@@ -578,6 +579,7 @@ def __array_namespace_info__() -> Info:
     "matrix_transpose",
     "max",
     "mean",
+    "meshgrid",
     "min",
     "nans",
     "ndarray_from_cframe",
diff --git a/src/blosc2/blosc2_ext.pyx b/src/blosc2/blosc2_ext.pyx
@@ -2612,8 +2612,8 @@ cdef class NDArray:
                 mask_[i] = mask[i]
             _check_rc(b2nd_squeeze_index(self.array, mask_), "Error while squeezing array")
 
-        if self.array.shape[0] == 1 and self.ndim == 1:
-            self.array.ndim = 0
+        #if self.array.shape[0] == 1 and self.ndim == 1:
+        #    self.array.ndim = 0
 
     def as_ffi_ptr(self):
         return PyCapsule_New(self.array, <char *> "b2nd_array_t*", NULL)
diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
@@ -1671,11 +1671,16 @@ def slices_eval_getitem(
 
 
 def infer_reduction_dtype(dtype, operation):
-    # It may change in the future, but for now, this mimics NumPy's (2.1) behavior pretty well
+    # It may change in the future, but mostly array-api compliant
+    my_float = np.result_type(
+        dtype, np.float32 if dtype == np.float32 or dtype == np.complex64 else blosc2.DEFAULT_FLOAT
+    )
     if operation in {ReduceOp.SUM, ReduceOp.PROD}:
-        return np.result_type(dtype, np.int64 if np.issubdtype(dtype, np.integer) else np.float64)
+        if np.issubdtype(dtype, np.unsignedinteger):
+            return np.result_type(dtype, np.uint64)
+        return np.result_type(dtype, np.int64 if np.issubdtype(dtype, np.integer) else my_float)
     elif operation in {ReduceOp.MEAN, ReduceOp.STD, ReduceOp.VAR}:
-        return np.float64
+        return my_float
     elif operation in {ReduceOp.MIN, ReduceOp.MAX}:
         return dtype
     elif operation in {ReduceOp.ANY, ReduceOp.ALL}:
diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py
@@ -986,6 +986,11 @@ def __index__(self) -> bool:
             )
         return self.__int__()
 
+    def __complex__(self) -> complex:
+        if math.prod(self.shape) != 1:
+            raise ValueError(f"Cannot convert array of shape {self.shape} to complex float.")
+        return complex(self[()])
+
     @is_documented_by(sum)
     def sum(self, axis=None, dtype=None, keepdims=False, **kwargs):
         expr = blosc2.LazyExpr(new_op=(self, None, None))
@@ -3701,7 +3706,7 @@ def arange_fill(inputs, output, offset):
         start, _, step = inputs
         start += offset[0] * step
         stop = start + lout * step
-        if (stop - start) // step == lout:  # USE ARANGE IF POSSIBLE (2X FASTER)
+        if math.ceil((stop - start) / step) == lout:  # USE ARANGE IF POSSIBLE (2X FASTER)
             output[:] = np.arange(start, stop, step, dtype=output.dtype)
         else:  # use linspace to have finer control over exclusion of endpoint for float types
             output[:] = np.linspace(start, stop, lout, endpoint=False, dtype=output.dtype)
@@ -4202,23 +4207,29 @@ def save(array: NDArray, urlpath: str, contiguous=True, **kwargs: Any) -> None:
     array.save(urlpath, contiguous, **kwargs)
 
 
-def asarray(array: Sequence | np.ndarray | blosc2.C2Array | NDArray, copy=True, **kwargs: Any) -> NDArray:
+def asarray(  # noqa : C901
+    array: Sequence | np.ndarray | blosc2.C2Array | NDArray, copy: bool | None = None, **kwargs: Any
+) -> NDArray:
     """Convert the `array` to an `NDArray`.
 
     Parameters
     ----------
     array: array_like
         An array supporting numpy array interface.
 
-    Other Parameters
-    ----------------
+    copy: bool | None, optional
+        Whether or not to copy the input. If True, the function copies.
+        If False, raise a ValueError if copy is necessary. If None and
+        input is NDArray, avoid copy by returning lazyexpr.
+        Default: None.
+
     kwargs: dict, optional
         Keyword arguments that are supported by the :func:`empty` constructor.
 
     Returns
     -------
-    out: :ref:`NDArray`
-        An new NDArray made of :paramref:`array`.
+    out: :ref:`NDArray` or :ref:`LazyExpr`
+        An new NDArray or LazyExpr made of :paramref:`array`.
 
     Notes
     -----
@@ -4237,8 +4248,7 @@ def asarray(array: Sequence | np.ndarray | blosc2.C2Array | NDArray, copy=True,
     >>> # Create a NDArray from a NumPy array
     >>> nda = blosc2.asarray(a)
     """
-    if not copy:
-        raise ValueError("asarray which avoids copy not implemented yet.")
+
     # Convert scalars to numpy array
     casting = kwargs.pop("casting", "unsafe")
     if casting != "unsafe":
@@ -4256,41 +4266,49 @@ def asarray(array: Sequence | np.ndarray | blosc2.C2Array | NDArray, copy=True,
         blocks = array.blocks
     chunks, blocks = compute_chunks_blocks(array.shape, chunks, blocks, array.dtype, **kwargs)
 
-    # Fast path for small arrays. This is not too expensive in terms of memory consumption.
-    shape = array.shape
-    small_size = 2**24  # 16 MB
-    array_nbytes = math.prod(shape) * array.dtype.itemsize
-    if array_nbytes < small_size:
-        if not isinstance(array, np.ndarray) and hasattr(array, "chunks"):
-            # A getitem operation should be enough to get a numpy array
-            array = array[()]
-
-        array = np.require(array, dtype=dtype, requirements="C")  # require contiguous array
-
-        return blosc2_ext.asarray(array, chunks, blocks, **kwargs)
-
-    # Create the empty array
-    ndarr = empty(shape, array.dtype, chunks=chunks, blocks=blocks, **kwargs)
-    behaved = are_partitions_behaved(shape, chunks, blocks)
-
-    # Get the coordinates of the chunks
-    chunks_idx, nchunks = get_chunks_idx(shape, chunks)
-
-    # Iterate over the chunks and update the empty array
-    for nchunk in range(nchunks):
-        # Compute current slice coordinates
-        coords = tuple(np.unravel_index(nchunk, chunks_idx))
-        slice_ = tuple(
-            slice(c * s, builtins.min((c + 1) * s, shape[i]))
-            for i, (c, s) in enumerate(zip(coords, chunks, strict=True))
-        )
-        # Ensure the array slice is contiguous and of correct dtype
-        array_slice = np.require(array[slice_], dtype=dtype, requirements="C")
-        if behaved:
-            # The whole chunk is to be updated, so this fastpath is safe
-            ndarr.schunk.update_data(nchunk, array_slice, copy=False)
-        else:
-            ndarr[slice_] = array_slice
+    copy = True if copy is None and not isinstance(array, NDArray) else copy
+    if copy:
+        # Fast path for small arrays. This is not too expensive in terms of memory consumption.
+        shape = array.shape
+        small_size = 2**24  # 16 MB
+        array_nbytes = math.prod(shape) * array.dtype.itemsize
+        if array_nbytes < small_size:
+            if not isinstance(array, np.ndarray) and hasattr(array, "chunks"):
+                # A getitem operation should be enough to get a numpy array
+                array = array[()]
+
+            array = np.require(array, dtype=dtype, requirements="C")  # require contiguous array
+
+            return blosc2_ext.asarray(array, chunks, blocks, **kwargs)
+
+        # Create the empty array
+        ndarr = empty(shape, array.dtype, chunks=chunks, blocks=blocks, **kwargs)
+        behaved = are_partitions_behaved(shape, chunks, blocks)
+
+        # Get the coordinates of the chunks
+        chunks_idx, nchunks = get_chunks_idx(shape, chunks)
+
+        # Iterate over the chunks and update the empty array
+        for nchunk in range(nchunks):
+            # Compute current slice coordinates
+            coords = tuple(np.unravel_index(nchunk, chunks_idx))
+            slice_ = tuple(
+                slice(c * s, builtins.min((c + 1) * s, shape[i]))
+                for i, (c, s) in enumerate(zip(coords, chunks, strict=True))
+            )
+            # Ensure the array slice is contiguous and of correct dtype
+            array_slice = np.require(array[slice_], dtype=dtype, requirements="C")
+            if behaved:
+                # The whole chunk is to be updated, so this fastpath is safe
+                ndarr.schunk.update_data(nchunk, array_slice, copy=False)
+            else:
+                ndarr[slice_] = array_slice
+    else:
+        if not isinstance(array, NDArray):
+            raise ValueError("Must always do a copy for asarray unless NDArray provided.")
+        mask = [True] + [False for i in range(array.ndim)]
+        # TODO: make a direct view possible
+        return blosc2.expand_dims(array, axis=0).squeeze(mask)  # way to get a view
 
     return ndarr
 
@@ -5168,3 +5186,28 @@ def broadcast_to(arr, shape):
     A new array with the given shape.
     """
     return (arr + blosc2.zeros(shape, dtype=arr.dtype)).compute()  # return lazyexpr quickly
+
+
+def meshgrid(arrays: NDArray, indexing: str = "xy") -> Sequence[NDArray]:
+    """
+    Returns coordinate matrices from coordinate vectors.
+
+    Parameters
+    ---------
+    arrays: NDArray
+        An arbitrary number of one-dimensional arrays representing grid coordinates. Each array should have the same numeric data type.
+
+    indexing: str
+        Cartesian 'xy' or matrix 'ij' indexing of output. If provided zero or one one-dimensional vector(s) the indexing keyword is ignored.
+        Default: 'xy'.
+
+    Returns
+    --------
+    out: (List[NDArray])
+        List of N arrays, where N is the number of provided one-dimensional input arrays, with same dtype.
+        For N one-dimensional arrays having lengths Ni = len(xi),
+
+        * if matrix indexing ij, then each returned array has shape (N1, N2, N3, ..., Nn).
+        * if Cartesian indexing xy, then each returned array has shape (N2, N1, N3, ..., Nn).
+    """
+    raise NotImplementedError("Working on meshgrid")
diff --git a/tests/ndarray/test_squeeze.py b/tests/ndarray/test_squeeze.py
@@ -6,24 +6,27 @@
 # LICENSE file in the root directory of this source tree)
 #######################################################################
 
+import numpy as np
 import pytest
 
 import blosc2
 
 
 @pytest.mark.parametrize(
-    ("shape", "chunks", "blocks", "fill_value"),
+    ("shape", "chunks", "blocks", "fill_value", "mask"),
     [
-        ((1, 1230), (1, 100), (1, 3), b"0123"),
-        ((23, 1, 1, 34), (20, 1, 1, 20), None, 1234),
-        ((80, 1, 51, 60, 1), None, (6, 1, 6, 26, 1), 3.333),
-        ((1, 1, 1), None, None, True),
+        ((1, 1230), (1, 100), (1, 3), b"0123", [True, False]),
+        ((23, 1, 1, 34), (20, 1, 1, 20), None, 1234, [False, False, True, False]),
+        ((80, 1, 51, 60, 1), None, (6, 1, 6, 26, 1), 3.333, [False] * 4 + [True]),
+        ((1, 1, 1), None, None, True, [False, True, True]),
     ],
 )
-def test_squeeze(shape, chunks, blocks, fill_value):
+def test_squeeze(shape, chunks, blocks, fill_value, mask):
     a = blosc2.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks)
 
-    a.squeeze()
-    b = a[...]
+    b = np.squeeze(a[...], tuple(i for i, m in enumerate(mask) if m))
+    a_ = a.squeeze(mask)
 
-    assert a.shape == b.shape
+    assert a_.shape == b.shape
+    # TODO: this would work if squeeze returns a view
+    # assert a_.shape != a.shape