Skip to content

Commit ce4fc7f

Browse files
authored
Add checks for invalid inputs for cv. (dmlc#11255)
1 parent a8db6e0 commit ce4fc7f

File tree

4 files changed

+34
-8
lines changed

4 files changed

+34
-8
lines changed

python-package/xgboost/callback.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,9 @@ class EarlyStopping(TrainingCallback):
325325
maximize :
326326
Whether to maximize evaluation metric. None means auto (discouraged).
327327
save_best :
328-
Whether training should return the best model or the last model.
328+
Whether training should return the best model or the last model. This is only
329+
supported with tree methods. Also, the `cv` function doesn't return a model, the
330+
parameter is not applicable.
329331
min_delta :
330332
331333
.. versionadded:: 1.5.0
@@ -380,6 +382,11 @@ def __init__(
380382

381383
def before_training(self, model: _Model) -> _Model:
382384
self.starting_round = model.num_boosted_rounds()
385+
if not isinstance(model, Booster) and self.save_best:
386+
raise ValueError(
387+
"`save_best` is not applicable to the `cv` function as it doesn't return"
388+
" a model."
389+
)
383390
return model
384391

385392
def _update_rounds(
@@ -428,7 +435,7 @@ def minimize(new: _Score, best: _Score) -> bool:
428435
self.stopping_history[name][metric] = cast(_ScoreList, [score])
429436
self.best_scores[name] = {}
430437
self.best_scores[name][metric] = [score]
431-
model.set_attr(best_score=str(score), best_iteration=str(epoch))
438+
model.set_attr(best_score=str(get_s(score)), best_iteration=str(epoch))
432439
elif not improve_op(score, self.best_scores[name][metric][-1]):
433440
# Not improved
434441
self.stopping_history[name][metric].append(score) # type: ignore
@@ -437,7 +444,7 @@ def minimize(new: _Score, best: _Score) -> bool:
437444
self.stopping_history[name][metric].append(score) # type: ignore
438445
self.best_scores[name][metric].append(score)
439446
record = self.stopping_history[name][metric][-1]
440-
model.set_attr(best_score=str(record), best_iteration=str(epoch))
447+
model.set_attr(best_score=str(get_s(record)), best_iteration=str(epoch))
441448
self.current_rounds = 0 # reset
442449

443450
if self.current_rounds >= self.rounds:

python-package/xgboost/training.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
Objective,
2424
XGBoostError,
2525
_deprecate_positional_args,
26+
_RefMixIn,
2627
)
2728

2829
_CVFolds = Sequence["CVPack"]
@@ -153,7 +154,7 @@ def train(
153154
raise TypeError("Invalid type for the `evals`.")
154155

155156
if (
156-
hasattr(va, "ref")
157+
isinstance(va, _RefMixIn)
157158
and va.ref is not weakref.ref(dtrain)
158159
and va is not dtrain
159160
):
@@ -442,9 +443,10 @@ def cv(
442443
----------
443444
params : dict
444445
Booster params.
445-
dtrain : DMatrix
446-
Data to be trained.
447-
num_boost_round : int
446+
dtrain :
447+
Data to be trained. Only the :py:class:`DMatrix` without external memory is
448+
supported.
449+
num_boost_round :
448450
Number of boosting iterations.
449451
nfold : int
450452
Number of folds in CV.
@@ -525,9 +527,10 @@ def cv(
525527
raise XGBoostError(
526528
"sklearn needs to be installed in order to use stratified cv"
527529
)
528-
529530
if isinstance(metrics, str):
530531
metrics = [metrics]
532+
if isinstance(dtrain, _RefMixIn):
533+
raise ValueError("`QuantileDMatrix` is not yet supported.")
531534

532535
params = params.copy()
533536
if isinstance(params, list):

tests/python/test_early_stopping.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,16 @@ def evalerror(preds: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
107107
)
108108
self.assert_metrics_length(cv, 1)
109109

110+
with pytest.raises(ValueError, match="`save_best`"):
111+
cv = xgb.cv(
112+
params,
113+
dm,
114+
num_boost_round=10,
115+
nfold=10,
116+
early_stopping_rounds=1,
117+
callbacks=[xgb.callback.EarlyStopping(3, save_best=True)],
118+
)
119+
110120
@pytest.mark.skipif(**tm.no_sklearn())
111121
@pytest.mark.skipif(**tm.no_pandas())
112122
def test_cv_early_stopping_with_multiple_eval_sets_and_metrics(self):

tests/python/test_quantile_dmatrix.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,9 @@ def test_sparse_predict(self) -> None:
395395
Xy = xgb.DMatrix(X, y, enable_categorical=True)
396396
p1 = booster.predict(Xy)
397397
np.testing.assert_allclose(p0, p1)
398+
399+
def test_cv_error(self) -> None:
400+
X, y = make_sparse_regression(8, 2, sparsity=0.2, as_dense=False)
401+
Xy = xgb.QuantileDMatrix(X, y)
402+
with pytest.raises(ValueError, match=""):
403+
cv = xgb.cv({}, Xy, 10, nfold=10, early_stopping_rounds=10)

0 commit comments

Comments
 (0)