Skip to content

Commit e5f29dd

Browse files
authored
Check for invalid cuDF input. (dmlc#11248)
1 parent ce4fc7f commit e5f29dd

File tree

3 files changed

+40
-7
lines changed

3 files changed

+40
-7
lines changed

python-package/xgboost/_data_utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ class _ArrayLikeArg(Protocol):
1616
def __array_interface__(self) -> "ArrayInf": ...
1717

1818

19+
class _CudaArrayLikeArg(Protocol):
20+
@property
21+
def __cuda_array_interface__(self) -> "ArrayInf": ...
22+
23+
1924
class TransformedDf(Protocol):
2025
"""Protocol class for storing transformed dataframe."""
2126

@@ -151,3 +156,12 @@ def array_interface(data: np.ndarray) -> bytes:
151156
interface = array_interface_dict(data)
152157
interface_str = bytes(json.dumps(interface), "utf-8")
153158
return interface_str
159+
160+
161+
def check_cudf_meta(data: _CudaArrayLikeArg, field: str) -> None:
162+
"Make sure no missing value in meta data."
163+
if (
164+
"mask" in data.__cuda_array_interface__
165+
and data.__cuda_array_interface__["mask"] is not None
166+
):
167+
raise ValueError(f"Missing value is not allowed for: {field}")

python-package/xgboost/data.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
array_hasobject,
3030
array_interface,
3131
array_interface_dict,
32+
check_cudf_meta,
3233
cuda_array_interface,
3334
make_array_interface,
3435
)
@@ -1555,14 +1556,15 @@ def _meta_from_cudf_df(data: DataType, field: str, handle: ctypes.c_void_p) -> N
15551556

15561557

15571558
def _meta_from_cudf_series(data: DataType, field: str, handle: ctypes.c_void_p) -> None:
1558-
interface = bytes(json.dumps([data.__cuda_array_interface__], indent=2), "utf-8")
1559-
_check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface))
1559+
check_cudf_meta(data, field)
1560+
inf = cuda_array_interface(data)
1561+
_check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), inf))
15601562

15611563

15621564
def _meta_from_cupy_array(data: DataType, field: str, handle: ctypes.c_void_p) -> None:
15631565
data = _transform_cupy_array(data)
1564-
interface = bytes(json.dumps([data.__cuda_array_interface__], indent=2), "utf-8")
1565-
_check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface))
1566+
inf = cuda_array_interface(data)
1567+
_check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), inf))
15661568

15671569

15681570
def dispatch_meta_backend(
@@ -1604,15 +1606,15 @@ def dispatch_meta_backend(
16041606
data = _transform_dlpack(data)
16051607
_meta_from_cupy_array(data, name, handle)
16061608
return
1607-
if _is_cupy_alike(data):
1608-
_meta_from_cupy_array(data, name, handle)
1609-
return
16101609
if _is_cudf_ser(data):
16111610
_meta_from_cudf_series(data, name, handle)
16121611
return
16131612
if _is_cudf_df(data):
16141613
_meta_from_cudf_df(data, name, handle)
16151614
return
1615+
if _is_cupy_alike(data):
1616+
_meta_from_cupy_array(data, name, handle)
1617+
return
16161618
if _is_modin_df(data):
16171619
_meta_from_pandas_df(data, name, dtype=dtype, handle=handle)
16181620
return

tests/python-gpu/test_from_cudf.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,3 +382,20 @@ def test_from_cudf_iter(enable_categorical):
382382
predict = reg.predict(m)
383383
predict_with_it = reg_with_it.predict(m_it)
384384
np.testing.assert_allclose(predict_with_it, predict)
385+
386+
387+
def test_invalid_meta() -> None:
388+
df = cudf.DataFrame({"f0": [0, 1, 2], "f1": [2, 3, 4], "y": [None, 1, 2]})
389+
y = df["y"]
390+
X = df.drop(["y"], axis=1)
391+
with pytest.raises(ValueError, match="Missing value"):
392+
xgb.DMatrix(X, y)
393+
with pytest.raises(ValueError, match="Missing value"):
394+
xgb.QuantileDMatrix(X, y)
395+
y = X.copy()
396+
y.iloc[0, 0] = None
397+
# check by the cuDF->cupy converter.
398+
with pytest.raises(ValueError, match="no nulls"):
399+
xgb.DMatrix(X, y)
400+
with pytest.raises(ValueError, match="no nulls"):
401+
xgb.QuantileDMatrix(X, y)

0 commit comments

Comments
 (0)