Skip to content

Commit f789e50

Browse files
trivialfisrjzamora
andauthored
[backport][dask] Workaround the tokenizer. (dmlc#10419) (dmlc#10423)
Co-authored-by: Richard (Rick) Zamora <[email protected]>
1 parent b994f2a commit f789e50

File tree

4 files changed

+12
-10
lines changed

4 files changed

+12
-10
lines changed

python-package/xgboost/dask/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,10 +1237,12 @@ def _infer_predict_output(
12371237
async def _get_model_future(
12381238
client: "distributed.Client", model: Union[Booster, Dict, "distributed.Future"]
12391239
) -> "distributed.Future":
1240+
# See https://github.com/dask/dask/issues/11179#issuecomment-2168094529 for
1241+
# the use of hash.
12401242
if isinstance(model, Booster):
1241-
booster = await client.scatter(model, broadcast=True)
1243+
booster = await client.scatter(model, broadcast=True, hash=False)
12421244
elif isinstance(model, dict):
1243-
booster = await client.scatter(model["booster"], broadcast=True)
1245+
booster = await client.scatter(model["booster"], broadcast=True, hash=False)
12441246
elif isinstance(model, distributed.Future):
12451247
booster = model
12461248
t = booster.type

tests/ci_build/Dockerfile.gpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ RUN \
2525
mamba create -y -n gpu_test -c rapidsai -c nvidia -c conda-forge \
2626
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
2727
"nccl>=${NCCL_SHORT_VER}" \
28-
dask=2024.1.1 \
28+
dask \
2929
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
3030
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
3131
"pyspark>=3.4.0" cloudpickle cuda-python && \

tests/ci_build/conda_env/linux_cpu_test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ dependencies:
1717
- scikit-learn
1818
- pandas
1919
- matplotlib
20-
- dask>=2022.6
21-
- distributed>=2022.6
20+
- dask
21+
- distributed
2222
- python-graphviz
2323
- hypothesis>=6.46
2424
- astroid

tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,10 @@ def test_categorical(self, local_cuda_client: Client) -> None:
248248
import dask_cudf
249249

250250
X, y = make_categorical(local_cuda_client, 10000, 30, 13)
251-
X = dask_cudf.from_dask_dataframe(X)
251+
X = X.to_backend("cudf")
252252

253253
X_onehot, _ = make_categorical(local_cuda_client, 10000, 30, 13, True)
254-
X_onehot = dask_cudf.from_dask_dataframe(X_onehot)
254+
X_onehot = X_onehot.to_backend("cudf")
255255
run_categorical(local_cuda_client, "hist", "cuda", X, X_onehot, y)
256256

257257
@given(
@@ -383,9 +383,9 @@ def test_dask_classifier(self, model: str, local_cuda_client: Client) -> None:
383383

384384
X_, y_, w_ = generate_array(with_weights=True)
385385
y_ = (y_ * 10).astype(np.int32)
386-
X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X_))
387-
y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y_))
388-
w = dask_cudf.from_dask_dataframe(dd.from_dask_array(w_))
386+
X = dd.from_dask_array(X_).to_backend("cudf")
387+
y = dd.from_dask_array(y_).to_backend("cudf")
388+
w = dd.from_dask_array(w_).to_backend("cudf")
389389
run_dask_classifier(X, y, w, model, "gpu_hist", local_cuda_client, 10)
390390

391391
def test_empty_dmatrix(self, local_cuda_client: Client) -> None:

0 commit comments

Comments
 (0)