Blosc · lshaw8317 · Oct 8, 2025 · Oct 4, 2025 · Oct 4, 2025 · Oct 5, 2025
diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
diff --git a/src/blosc2/linalg.py b/src/blosc2/linalg.py
@@ -9,7 +9,7 @@
 import numpy as np
 
 import blosc2
-from blosc2.ndarray import get_intersecting_chunks, slice_to_chunktuple
+from blosc2.ndarray import get_intersecting_chunks, npvecdot, slice_to_chunktuple
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -340,7 +340,7 @@ def vecdot(x1: blosc2.NDArray, x2: blosc2.NDArray, axis: int = -1, **kwargs) ->
     fast_path = kwargs.pop("fast_path", None)  # for testing purposes
     # Added this to pass array-api tests (which use internal getitem to check results)
     if isinstance(x1, np.ndarray) and isinstance(x2, np.ndarray):
-        return np.vecdot(x1, x2, axis=axis)
+        return npvecdot(x1, x2, axis=axis)
 
     x1, x2 = blosc2.asarray(x1), blosc2.asarray(x2)
 
@@ -353,10 +353,6 @@ def vecdot(x1: blosc2.NDArray, x2: blosc2.NDArray, axis: int = -1, **kwargs) ->
     a_keep[a_axes] = False
     b_keep = [True] * x2.ndim
     b_keep[b_axes] = False
-    x1shape = np.array(x1.shape)
-    x2shape = np.array(x2.shape)
-    result_shape = np.broadcast_shapes(x1shape[a_keep], x2shape[b_keep])
-    result = blosc2.zeros(result_shape, dtype=np.result_type(x1, x2), **kwargs)
 
     x1shape = np.array(x1.shape)
     x2shape = np.array(x2.shape)
@@ -403,15 +399,15 @@ def vecdot(x1: blosc2.NDArray, x2: blosc2.NDArray, axis: int = -1, **kwargs) ->
         if fast_path:  # just load everything, also handles case of 0 in shapes
             bx1 = x1[a_selection]
             bx2 = x2[b_selection]
-            result[res_chunk] += np.vecdot(bx1, bx2, axis=axis)  # handles conjugation of bx1
+            result[res_chunk] += npvecdot(bx1, bx2, axis=axis)  # handles conjugation of bx1
         else:  # operands too big, have to go chunk-by-chunk
             for ochunk in range(0, a_shape_red, a_chunks_red):
                 op_chunk = (slice(ochunk, builtins.min(ochunk + a_chunks_red, x1.shape[a_axes]), 1),)
                 a_selection = a_selection[:a_axes] + op_chunk + a_selection[a_axes + 1 :]
                 b_selection = b_selection[:b_axes] + op_chunk + b_selection[b_axes + 1 :]
                 bx1 = x1[a_selection]
                 bx2 = x2[b_selection]
-                res = np.vecdot(bx1, bx2, axis=axis)  # handles conjugation of bx1
+                res = npvecdot(bx1, bx2, axis=axis)  # handles conjugation of bx1
                 result[res_chunk] += res
     return result
 

diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py
@@ -41,11 +41,16 @@
     nplshift = np.bitwise_left_shift
     nprshift = np.bitwise_right_shift
     npbinvert = np.bitwise_invert
+    npvecdot = np.vecdot
 else:  # not array-api compliant
     nplshift = np.left_shift
     nprshift = np.right_shift
     npbinvert = np.bitwise_not
 
+    def npvecdot(a, b, axis=-1):
+        return np.einsum("...i,...i->...", np.moveaxis(np.conj(a), axis, -1), np.moveaxis(b, axis, -1))
+
+
 # These functions in ufunc_map in ufunc_map_1param are implemented in numexpr and so we call
 # those instead (since numexpr uses multithreading it is faster)
 ufunc_map = {
@@ -2932,6 +2937,7 @@ def clip(
     x: blosc2.Array,
     min: int | float | blosc2.Array | None = None,
     max: int | float | blosc2.Array | None = None,
+    **kwargs: Any,
 ) -> NDArray:
     """
     Clamps each element x_i of the input array x to the range [min, max].
@@ -2949,6 +2955,9 @@ def clip(
         Upper-bound of the range to which to clamp. If None, no upper bound must be applied.
         Default: None.
 
+    kwargs: Any
+        kwargs accepted by the :func:`empty` constructor
+
     Returns
     -------
     out: NDArray
@@ -2960,10 +2969,10 @@ def chunkwise_clip(inputs, output, offset):
         x, min, max = inputs
         output[:] = np.clip(x, min, max)
 
-    return blosc2.lazyudf(chunkwise_clip, (x, min, max), dtype=x.dtype, shape=x.shape)
+    return blosc2.lazyudf(chunkwise_clip, (x, min, max), dtype=x.dtype, shape=x.shape, **kwargs)
 
 
-def logaddexp(x1: int | float | blosc2.Array, x2: int | float | blosc2.Array) -> NDArray:
+def logaddexp(x1: int | float | blosc2.Array, x2: int | float | blosc2.Array, **kwargs: Any) -> NDArray:
     """
     Calculates the logarithm of the sum of exponentiations log(exp(x1) + exp(x2)) for
     each element x1_i of the input array x1 with the respective element x2_i of the
@@ -2974,10 +2983,13 @@ def logaddexp(x1: int | float | blosc2.Array, x2: int | float | blosc2.Array) ->
     x1: blosc2.Array
         First input array. May have any real-valued floating-point data type.
 
-    x2:blosc2.Array
+    x2: blosc2.Array
         Second input array. Must be compatible with x1. May have any
         real-valued floating-point data type.
 
+    kwargs: Any
+        kwargs accepted by the :func:`empty` constructor
+
     Returns
     -------
     out: NDArray
@@ -2995,7 +3007,7 @@ def chunkwise_logaddexp(inputs, output, offset):
 
     if np.issubdtype(dtype, np.integer):
         dtype = blosc2.float32
-    return blosc2.lazyudf(chunkwise_logaddexp, (x1, x2), dtype=dtype, shape=x1.shape)
+    return blosc2.lazyudf(chunkwise_logaddexp, (x1, x2), dtype=dtype, shape=x1.shape, **kwargs)
 
 
 # implemented in python-blosc2
@@ -3064,7 +3076,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         if ufunc in ufunc_map:
             value = inputs[0] if inputs[1] is self else inputs[1]
             _check_allowed_dtypes(value)
-            return blosc2.LazyExpr(new_op=(value, ufunc_map[ufunc], self))
+            return blosc2.LazyExpr(new_op=(inputs[0], ufunc_map[ufunc], inputs[1]))
 
         if ufunc in ufunc_map_1param:
             value = inputs[0]