Skip to content

Commit 1803f77

Browse files
Fix mixing of blockwise and map-reduce strategies. (#103)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 1954380 commit 1803f77

File tree

3 files changed

+20
-1
lines changed

3 files changed

+20
-1
lines changed

docs/source/user-stories/climatology-hourly.ipynb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1701,7 +1701,8 @@
17011701
"id": "494766c2-305a-4518-b2b7-a85bcc7fd5b2",
17021702
"metadata": {},
17031703
"source": [
1704-
"View the performance report [here](https://rawcdn.githack.com/dcherian/flox/592c46ba0bb859f732968b68426b6332caebc213/docs/source/user-stories/hourly-climatology.html)"
1704+
"View the performance report\n",
1705+
"[here](https://rawcdn.githack.com/dcherian/flox/592c46ba0bb859f732968b68426b6332caebc213/docs/source/user-stories/hourly-climatology.html)\n"
17051706
]
17061707
}
17071708
],

flox/core.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,6 +1583,10 @@ def groupby_reduce(
15831583
array_subset = np.take(array_subset, idxr, axis=ax)
15841584
numblocks = np.prod([len(array_subset.chunks[ax]) for ax in axis])
15851585

1586+
# First deep copy becasue we might be doping blockwise,
1587+
# which sets agg.finalize=None, then map-reduce (GH102)
1588+
agg = copy.deepcopy(agg)
1589+
15861590
# get final result for these groups
15871591
r, *g = partial_agg(
15881592
array_subset,

tests/test_core.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -828,3 +828,17 @@ def test_datetime_binning():
828828
expected = pd.cut(by, time_bins).codes.copy()
829829
expected[0] = 14 # factorize doesn't return -1 for nans
830830
assert_equal(group_idx, expected)
831+
832+
833+
@requires_dask
834+
def test_map_reduce_blockwise_mixed():
835+
t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series()
836+
data = t.dt.dayofyear
837+
actual = groupby_reduce(
838+
dask.array.from_array(data.values, chunks=365),
839+
t.dt.month,
840+
func="mean",
841+
method="split-reduce",
842+
)
843+
expected = groupby_reduce(data, t.dt.month, func="mean")
844+
assert_equal(expected, actual)

0 commit comments

Comments
 (0)