Skip to content

Commit 5cb4bcf

Browse files
committed
WIP
1 parent 2b90845 commit 5cb4bcf

File tree

1 file changed

+9
-11
lines changed

1 file changed

+9
-11
lines changed

flox/core.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2164,11 +2164,10 @@ def _factorize_multiple(
21642164
for by_, expect in zip(by, expected_groups):
21652165
if expect is None:
21662166
if is_duck_dask_array(by_):
2167-
raise ValueError(
2168-
"Please provide expected_groups when grouping by a dask array."
2169-
)
2170-
2171-
found_group = pd.unique(by_.reshape(-1))
2167+
# could be remote dataset, execute remotely in that case
2168+
found_group = np.unique(by_.reshape(-1)).compute()
2169+
else:
2170+
found_group = pd.unique(by_.reshape(-1))
21722171
else:
21732172
found_group = expect.to_numpy()
21742173

@@ -2475,15 +2474,14 @@ def groupby_reduce(
24752474

24762475
# Don't factorize early only when
24772476
# grouping by dask arrays, and not having expected_groups
2477+
# except for cohorts
24782478
factorize_early = not (
24792479
# can't do it if we are grouping by dask array but don't have expected_groups
2480-
any(is_dask and ex_ is None for is_dask, ex_ in zip(by_is_dask, expected_groups))
2481-
)
2482-
2483-
if method == "cohorts" and not factorize_early:
2484-
raise ValueError(
2485-
"method='cohorts' can only be used when grouping by dask arrays if `expected_groups` is provided."
2480+
any(
2481+
is_dask and ex_ is None and method != "cohorts"
2482+
for is_dask, ex_ in zip(by_is_dask, expected_groups)
24862483
)
2484+
)
24872485

24882486
expected_: pd.RangeIndex | None
24892487
if factorize_early:

0 commit comments

Comments
 (0)