xarray-contrib · jemmajeffree · Jul 18, 2025 · Jul 18, 2025 · Jul 18, 2025 · Jul 18, 2025
diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py
@@ -26,6 +26,11 @@ def reshape(self, shape, **kwargs):
     def squeeze(self, axis=None):
         return MultiArray(tuple(array.squeeze(axis) for array in self.arrays))
 
+    def __setitem__(self, key, value):
+        assert len(value) == len(self.arrays)
+        for array, val in zip(self.arrays, value):
+            array[key] = val
+
     def __array_function__(self, func, types, args, kwargs):
         if func not in MULTIARRAY_HANDLED_FUNCTIONS:
             return NotImplemented
@@ -72,20 +77,10 @@ def expand_dims_MultiArray(multiarray, axis):
 def concatenate_MultiArray(multiarrays, axis):
     n_arrays = len(multiarrays[0].arrays)
     for ma in multiarrays[1:]:
-        if not (
-            len(ma.arrays) == n_arrays
-        ):  # I don't know what trying to concatenate MultiArrays with different numbers of arrays would even mean
-            raise NotImplementedError
-
-    # There's the potential for problematic different shapes coming in here.
-    # Probably warrants some defensive programming, but I'm not sure what to check for while still being generic
-
+        assert len(ma.arrays) == n_arrays
     return MultiArray(
-        tuple(
-            np.concatenate(tuple(ma.arrays[i] for ma in multiarrays), axis)
-            for i in range(multiarrays[0].ndim)
-        )
-    )  # Is this readable?
+        tuple(np.concatenate(tuple(ma.arrays[i] for ma in multiarrays), axis) for i in range(n_arrays))
+    )
 
 
 @implements(np.transpose)

diff --git a/flox/aggregations.py b/flox/aggregations.py
@@ -10,7 +10,6 @@
 
 import numpy as np
 import pandas as pd
-import toolz as tlz
 from numpy.typing import ArrayLike, DTypeLike
 
 from . import aggregate_flox, aggregate_npg, xrutils
@@ -355,7 +354,7 @@ def var_chunk(group_idx, array, *, engine: str, axis=-1, size=None, fill_value=N
         engine=engine,
         axis=axis,
         size=size,
-        fill_value=fill_value[2],  # Unpack fill value bc it's currently defined for multiarray
+        fill_value=0,  # Unpack fill value bc it's currently defined for multiarray
         dtype=dtype,
     )
 
@@ -366,7 +365,7 @@ def var_chunk(group_idx, array, *, engine: str, axis=-1, size=None, fill_value=N
         engine=engine,
         axis=axis,
         size=size,
-        fill_value=fill_value[1],  # Unpack fill value bc it's currently defined for multiarray
+        fill_value=0,  # Unpack fill value bc it's currently defined for multiarray
         dtype=dtype,
     )
 
@@ -380,7 +379,7 @@ def var_chunk(group_idx, array, *, engine: str, axis=-1, size=None, fill_value=N
         engine=engine,
         axis=axis,
         size=size,
-        fill_value=fill_value[0],  # Unpack fill value bc it's currently defined for multiarray
+        fill_value=0,  # Unpack fill value bc it's currently defined for multiarray
         dtype=dtype,
     )
 
@@ -450,7 +449,10 @@ def clip_first(array, n=1):
 
 
 def _var_finalize(multiarray, ddof=0):
-    return multiarray.arrays[0] / (multiarray.arrays[2] - ddof)
+    den = multiarray.arrays[2] - ddof
+    # preserve nans for groups with 0 obs; so these values are -ddof
+    den[den < 0] = 0
+    return multiarray.arrays[0] / den
 
 
 def _std_finalize(sumsq, sum_, count, ddof=0):
@@ -478,10 +480,16 @@ def _std_finalize(sumsq, sum_, count, ddof=0):
 # dtypes=(None, None, np.intp),
 # final_dtype=np.floating,
 # )
+
+
+def blockwise_or_numpy_var(*args, ddof=0, **kwargs):
+    return _var_finalize(var_chunk(*args, **kwargs), ddof)
+
+
 nanvar = Aggregation(
     "nanvar",
     chunk=var_chunk,
-    numpy=tlz.compose(_var_finalize, var_chunk),
+    numpy=blockwise_or_numpy_var,
     combine=(_var_combine,),
     finalize=_var_finalize,
     fill_value=((0, 0, 0),),

diff --git a/flox/core.py b/flox/core.py
@@ -43,6 +43,7 @@
     ScanState,
     _atleast_1d,
     _initialize_aggregation,
+    blockwise_or_numpy_var,
     generic_aggregate,
     quantile_new_dims_func,
     var_chunk,
@@ -1300,9 +1301,7 @@ def chunk_reduce(
             kw_func.update(kw)
 
             # UGLY! but this is because the `var` breaks our design assumptions
-            if reduction is var_chunk or (
-                isinstance(reduction, tlz.functoolz.Compose) and reduction.first is var_chunk
-            ):
+            if reduction is var_chunk or blockwise_or_numpy_var:
                 kw_func.update(engine=engine)
 
             if callable(reduction):

diff --git a/flox/xrutils.py b/flox/xrutils.py
@@ -146,6 +146,9 @@ def is_scalar(value: Any, include_0d: bool = True) -> bool:
 
 
 def notnull(data):
+    if isinstance(data, tuple) and len(data) == 3 and data == (0, 0, 0):
+        # boo: another special case for Var
+        return True
     if not is_duck_array(data):
         data = np.asarray(data)
 
@@ -163,6 +166,9 @@ def notnull(data):
 
 
 def isnull(data: Any):
+    if isinstance(data, tuple) and len(data) == 3 and data == (0, 0, 0):
+        # boo: another special case for Var
+        return False
     if data is None:
         return False
     if not is_duck_array(data):

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -2247,7 +2247,7 @@ def test_sparse_nan_fill_value_reductions(chunks, fill_value, shape, func):
 )  # Expect to expand this to other functions once written. "nanvar" has updated chunk, combine functions. "var", for the moment, still uses the old algorithm
 @pytest.mark.parametrize("engine", ("flox",))  # Expect to expand this to other engines once written
 @pytest.mark.parametrize(
-    "exponent", (10, 12)
+    "exponent", (2, 4, 6, 8, 10, 12)
 )  # Should fail at 10e8 for old algorithm, and survive 10e12 for current
 def test_std_var_precision(func, exponent, engine):
     # Generate a dataset with small variance and big mean