More tests on reductions on a single column

FrancescAlted · FrancescAlted · commit f547296603b2 · 2024-12-02T06:39:14.000+01:00
diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
@@ -1768,7 +1768,7 @@ def update_expr(self, new_op):  # noqa: C901
         new_operands = {}
         # where() handling requires evaluating the expression prior to merge.
         # This is different from reductions, where the expression is evaluated
-        # and returned an NumPy array (for usability convenience).
+        # and returned a NumPy array (for usability convenience).
         # We do things like this to enable the fusion of operations like
         # `a.where(0, 1).sum()`.
         # Another possibility would have been to always evaluate where() and produce
@@ -1783,6 +1783,7 @@ def update_expr(self, new_op):  # noqa: C901
             # We converted some of the operands to NDArray (where() handling above)
             new_operands = {"o0": value1, "o1": value2}
             expression = f"(o0 {op} o1)"
+            return self._new_expr(expression, new_operands, guess=False, out=None, where=None)
         elif isinstance(value1, LazyExpr) and isinstance(value2, LazyExpr):
             # Expression fusion
             # Fuse operands in expressions and detect duplicates
@@ -2113,6 +2114,14 @@ def prod(self, axis=None, dtype=None, keepdims=False, **kwargs):
         return self.compute(_reduce_args=reduce_args, **kwargs)
 
     def get_num_elements(self, axis, item):
+        if hasattr(self, "_where_args") and len(self._where_args) == 1:
+            # We have a where condition, so we need to count the number of elements
+            # fulfilling the condition
+            orig_where_args = self._where_args
+            self._where_args = {"_where_x": blosc2.ones(self.shape, dtype=np.int8)}
+            num_elements = self.sum(axis=axis, dtype=np.int64, item=item)
+            self._where_args = orig_where_args
+            return num_elements
         if np.isscalar(axis):
             axis = (axis,)
         # Compute the number of elements in the array
diff --git a/tests/ndarray/test_lazyexpr_fields.py b/tests/ndarray/test_lazyexpr_fields.py
@@ -502,22 +502,23 @@ def test_iter(shape, chunks, blocks):
     assert _i == shape[0] - 1
 
 
-def test_col_reduction():
+@pytest.mark.parametrize("reduce_op", ["sum", "mean", "min", "max", "std", "var"])
+def test_col_reduction(reduce_op):
     N = 1000
     rng = np.random.default_rng()
     it = ((-x + 1, x - 2, rng.normal()) for x in range(N))
-    sa = blosc2.fromiter(
-        it, dtype=[("A", "i4"), ("B", "f4"), ("C", "f8")], shape=(N,), urlpath="sa-1M.b2nd", mode="w"
-    )
+    sa = blosc2.fromiter(it, dtype=[("A", "i4"), ("B", "f4"), ("C", "f8")], shape=(N,), chunks=(N // 2,))
 
     # The operations
+    reduc = getattr(blosc2, reduce_op)
     C = sa.fields["C"]
-    s = blosc2.sum(C[C > 0])
-    s2 = blosc2.sum(C["C > 0"])
+    s = reduc(C[C > 0])
+    s2 = reduc(C["C > 0"])  # string version
 
     # Check
+    nreduc = getattr(np, reduce_op)
     nsa = sa[:]
     nC = nsa["C"]
-    ns = np.sum(nC[nC > 0])
+    ns = nreduc(nC[nC > 0])
     np.testing.assert_allclose(s, ns)
     np.testing.assert_allclose(s2, ns)