Fix binning by datetime.

dcherian · dcherian · commit 6fb807d36502 · 2022-06-05T17:11:46.000-04:00
diff --git a/flox/core.py b/flox/core.py
@@ -1566,7 +1566,7 @@ def groupby_reduce(
         if kwargs["fill_value"] is None:
             kwargs["fill_value"] = agg.fill_value[agg.name]
 
-        partial_agg = partial(dask_groupby_agg, agg=agg, split_out=split_out, **kwargs)
+        partial_agg = partial(dask_groupby_agg, split_out=split_out, **kwargs)
 
         if method in ["split-reduce", "cohorts"]:
             cohorts = find_group_cohorts(
@@ -1585,15 +1585,14 @@ def groupby_reduce(
                     array_subset = np.take(array_subset, idxr, axis=ax)
                 numblocks = np.prod([len(array_subset.chunks[ax]) for ax in axis])
 
-                # First deep copy becasue we might be doping blockwise,
-                # which sets agg.finalize=None, then map-reduce (GH102)
-                agg = copy.deepcopy(agg)
-
                 # get final result for these groups
                 r, *g = partial_agg(
                     array_subset,
                     by[np.ix_(*indexer)],
                     expected_groups=pd.Index(cohort),
+                    # First deep copy becasue we might be doping blockwise,
+                    # which sets agg.finalize=None, then map-reduce (GH102)
+                    agg=copy.deepcopy(agg),
                     # reindex to expected_groups at the blockwise step.
                     # this approach avoids replacing non-cohort members with
                     # np.nan or some other sentinel value, and preserves dtypes
@@ -1619,6 +1618,7 @@ def groupby_reduce(
                 array,
                 by,
                 expected_groups=None if method == "blockwise" else expected_groups,
+                agg=agg,
                 reindex=reindex,
                 method=method,
                 sort=sort,
diff --git a/tests/test_core.py b/tests/test_core.py
@@ -856,16 +856,35 @@ def test_map_reduce_blockwise_mixed():
 
 
 @requires_dask
-@pytest.mark.parametrize("method", ["blockwise", "split-reduce", "map-reduce", "cohorts"])
+@pytest.mark.parametrize("method", ["split-reduce", "blockwise", "map-reduce", "cohorts"])
 def test_group_by_datetime(engine, method):
-    t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series()
-    data = t.dt.dayofyear
-    actual, _ = groupby_reduce(
-        dask.array.from_array(data.values, chunks=365),
-        t,
+    kwargs = dict(
         func="mean",
         method=method,
         engine=engine,
     )
+    t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series()
+    data = t.dt.dayofyear
+    daskarray = dask.array.from_array(data.values, chunks=30)
+
+    actual, _ = groupby_reduce(daskarray, t, **kwargs)
     expected = data.to_numpy().astype(float)
     assert_equal(expected, actual)
+
+    if method == "blockwise":
+        return None
+
+    edges = pd.date_range("1999-12-31", "2000-12-31", freq="M").to_series().to_numpy()
+    actual, _ = groupby_reduce(daskarray, t.to_numpy(), isbin=True, expected_groups=edges, **kwargs)
+    expected = data.resample("M").mean().to_numpy()
+    assert_equal(expected, actual)
+
+    actual, _ = groupby_reduce(
+        np.broadcast_to(daskarray, (2, 3, daskarray.shape[-1])),
+        t.to_numpy(),
+        isbin=True,
+        expected_groups=edges,
+        **kwargs,
+    )
+    expected = np.broadcast_to(expected, (2, 3, expected.shape[-1]))
+    assert_equal(expected, actual)