File tree Expand file tree Collapse file tree 1 file changed +9
-11
lines changed Expand file tree Collapse file tree 1 file changed +9
-11
lines changed Original file line number Diff line number Diff line change @@ -2164,11 +2164,10 @@ def _factorize_multiple(
2164
2164
for by_ , expect in zip (by , expected_groups ):
2165
2165
if expect is None :
2166
2166
if is_duck_dask_array (by_ ):
2167
- raise ValueError (
2168
- "Please provide expected_groups when grouping by a dask array."
2169
- )
2170
-
2171
- found_group = pd .unique (by_ .reshape (- 1 ))
2167
+ # could be remote dataset, execute remotely in that case
2168
+ found_group = np .unique (by_ .reshape (- 1 )).compute ()
2169
+ else :
2170
+ found_group = pd .unique (by_ .reshape (- 1 ))
2172
2171
else :
2173
2172
found_group = expect .to_numpy ()
2174
2173
@@ -2475,15 +2474,14 @@ def groupby_reduce(
2475
2474
2476
2475
# Don't factorize early only when
2477
2476
# grouping by dask arrays, and not having expected_groups
2477
+ # except for cohorts
2478
2478
factorize_early = not (
2479
2479
# can't do it if we are grouping by dask array but don't have expected_groups
2480
- any (is_dask and ex_ is None for is_dask , ex_ in zip (by_is_dask , expected_groups ))
2481
- )
2482
-
2483
- if method == "cohorts" and not factorize_early :
2484
- raise ValueError (
2485
- "method='cohorts' can only be used when grouping by dask arrays if `expected_groups` is provided."
2480
+ any (
2481
+ is_dask and ex_ is None and method != "cohorts"
2482
+ for is_dask , ex_ in zip (by_is_dask , expected_groups )
2486
2483
)
2484
+ )
2487
2485
2488
2486
expected_ : pd .RangeIndex | None
2489
2487
if factorize_early :
You can’t perform that action at this time.
0 commit comments