Skip to content

Commit 73e0df6

Browse files
authored
Move feature weight to skl parameters. (dmlc#9506)
1 parent 82bba31 commit 73e0df6

File tree

6 files changed

+63
-16
lines changed

6 files changed

+63
-16
lines changed

python-package/xgboost/dask/__init__.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,6 +1639,10 @@ async def _fit_async(
16391639
feature_weights: Optional[_DaskCollection],
16401640
) -> _DaskCollection:
16411641
params = self.get_xgb_params()
1642+
model, metric, params, feature_weights = self._configure_fit(
1643+
xgb_model, params, feature_weights
1644+
)
1645+
16421646
dtrain, evals = await _async_wrap_evaluation_matrices(
16431647
client=self.client,
16441648
device=self.device,
@@ -1665,7 +1669,6 @@ async def _fit_async(
16651669
obj: Optional[Callable] = _objective_decorator(self.objective)
16661670
else:
16671671
obj = None
1668-
model, metric, params = self._configure_fit(xgb_model, params)
16691672
results = await self.client.sync(
16701673
_train_async,
16711674
asynchronous=True,
@@ -1729,6 +1732,10 @@ async def _fit_async(
17291732
feature_weights: Optional[_DaskCollection],
17301733
) -> "DaskXGBClassifier":
17311734
params = self.get_xgb_params()
1735+
model, metric, params, feature_weights = self._configure_fit(
1736+
xgb_model, params, feature_weights
1737+
)
1738+
17321739
dtrain, evals = await _async_wrap_evaluation_matrices(
17331740
self.client,
17341741
device=self.device,
@@ -1773,7 +1780,6 @@ async def _fit_async(
17731780
obj: Optional[Callable] = _objective_decorator(self.objective)
17741781
else:
17751782
obj = None
1776-
model, metric, params = self._configure_fit(xgb_model, params)
17771783
results = await self.client.sync(
17781784
_train_async,
17791785
asynchronous=True,
@@ -1953,6 +1959,9 @@ async def _fit_async(
19531959
feature_weights: Optional[_DaskCollection],
19541960
) -> "DaskXGBRanker":
19551961
params = self.get_xgb_params()
1962+
model, metric, params, feature_weights = self._configure_fit(
1963+
xgb_model, params, feature_weights
1964+
)
19561965
dtrain, evals = await _async_wrap_evaluation_matrices(
19571966
self.client,
19581967
device=self.device,
@@ -1974,7 +1983,6 @@ async def _fit_async(
19741983
enable_categorical=self.enable_categorical,
19751984
feature_types=self.feature_types,
19761985
)
1977-
model, metric, params = self._configure_fit(xgb_model, params)
19781986
results = await self.client.sync(
19791987
_train_async,
19801988
asynchronous=True,

python-package/xgboost/sklearn.py

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,13 @@ def task(i: int) -> float:
389389
Used for specifying feature types without constructing a dataframe. See
390390
:py:class:`DMatrix` for details.
391391
392-
max_cat_to_onehot : {Optional[int]}
392+
feature_weights : Optional[ArrayLike]
393+
394+
Weight for each feature, defines the probability of each feature being selected
395+
when colsample is being used. All values must be greater than 0, otherwise a
396+
`ValueError` is thrown.
397+
398+
max_cat_to_onehot : Optional[int]
393399
394400
.. versionadded:: 1.6.0
395401
@@ -607,7 +613,7 @@ def _wrap_evaluation_matrices(
607613
qid: Optional[Any],
608614
sample_weight: Optional[Any],
609615
base_margin: Optional[Any],
610-
feature_weights: Optional[Any],
616+
feature_weights: Optional[ArrayLike],
611617
eval_set: Optional[Sequence[Tuple[Any, Any]]],
612618
sample_weight_eval_set: Optional[Sequence[Any]],
613619
base_margin_eval_set: Optional[Sequence[Any]],
@@ -753,6 +759,7 @@ def __init__(
753759
validate_parameters: Optional[bool] = None,
754760
enable_categorical: bool = False,
755761
feature_types: Optional[FeatureTypes] = None,
762+
feature_weights: Optional[ArrayLike] = None,
756763
max_cat_to_onehot: Optional[int] = None,
757764
max_cat_threshold: Optional[int] = None,
758765
multi_strategy: Optional[str] = None,
@@ -799,6 +806,7 @@ def __init__(
799806
self.validate_parameters = validate_parameters
800807
self.enable_categorical = enable_categorical
801808
self.feature_types = feature_types
809+
self.feature_weights = feature_weights
802810
self.max_cat_to_onehot = max_cat_to_onehot
803811
self.max_cat_threshold = max_cat_threshold
804812
self.multi_strategy = multi_strategy
@@ -895,6 +903,7 @@ def _wrapper_params(self) -> Set[str]:
895903
"early_stopping_rounds",
896904
"callbacks",
897905
"feature_types",
906+
"feature_weights",
898907
}
899908
return wrapper_specific
900909

@@ -1065,10 +1074,12 @@ def _configure_fit(
10651074
self,
10661075
booster: Optional[Union[Booster, "XGBModel", str]],
10671076
params: Dict[str, Any],
1077+
feature_weights: Optional[ArrayLike],
10681078
) -> Tuple[
10691079
Optional[Union[Booster, str, "XGBModel"]],
10701080
Optional[Metric],
10711081
Dict[str, Any],
1082+
Optional[ArrayLike],
10721083
]:
10731084
"""Configure parameters for :py:meth:`fit`."""
10741085
if isinstance(booster, XGBModel):
@@ -1101,13 +1112,23 @@ def _duplicated(parameter: str) -> None:
11011112
else:
11021113
params.update({"eval_metric": self.eval_metric})
11031114

1115+
if feature_weights is not None:
1116+
_deprecated("feature_weights")
1117+
if feature_weights is not None and self.feature_weights is not None:
1118+
_duplicated("feature_weights")
1119+
feature_weights = (
1120+
self.feature_weights
1121+
if self.feature_weights is not None
1122+
else feature_weights
1123+
)
1124+
11041125
tree_method = params.get("tree_method", None)
11051126
if self.enable_categorical and tree_method == "exact":
11061127
raise ValueError(
11071128
"Experimental support for categorical data is not implemented for"
11081129
" current tree method yet."
11091130
)
1110-
return model, metric, params
1131+
return model, metric, params, feature_weights
11111132

11121133
def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix:
11131134
# Use `QuantileDMatrix` to save memory.
@@ -1184,12 +1205,19 @@ def fit(
11841205
A list of the form [M_1, M_2, ..., M_n], where each M_i is an array like
11851206
object storing base margin for the i-th validation set.
11861207
feature_weights :
1187-
Weight for each feature, defines the probability of each feature being
1188-
selected when colsample is being used. All values must be greater than 0,
1189-
otherwise a `ValueError` is thrown.
1208+
1209+
.. deprecated:: 3.0.0
1210+
1211+
Use `feature_weights` in :py:meth:`__init__` or :py:meth:`set_params`
1212+
instead.
11901213
11911214
"""
11921215
with config_context(verbosity=self.verbosity):
1216+
params = self.get_xgb_params()
1217+
model, metric, params, feature_weights = self._configure_fit(
1218+
xgb_model, params, feature_weights
1219+
)
1220+
11931221
evals_result: TrainingCallback.EvalsLog = {}
11941222
train_dmatrix, evals = _wrap_evaluation_matrices(
11951223
missing=self.missing,
@@ -1209,15 +1237,13 @@ def fit(
12091237
enable_categorical=self.enable_categorical,
12101238
feature_types=self.feature_types,
12111239
)
1212-
params = self.get_xgb_params()
12131240

12141241
if callable(self.objective):
12151242
obj: Optional[Objective] = _objective_decorator(self.objective)
12161243
params["objective"] = "reg:squarederror"
12171244
else:
12181245
obj = None
12191246

1220-
model, metric, params = self._configure_fit(xgb_model, params)
12211247
self._Booster = train(
12221248
params,
12231249
train_dmatrix,
@@ -1631,7 +1657,9 @@ def fit(
16311657
params["objective"] = "multi:softprob"
16321658
params["num_class"] = self.n_classes_
16331659

1634-
model, metric, params = self._configure_fit(xgb_model, params)
1660+
model, metric, params, feature_weights = self._configure_fit(
1661+
xgb_model, params, feature_weights
1662+
)
16351663
train_dmatrix, evals = _wrap_evaluation_matrices(
16361664
missing=self.missing,
16371665
X=X,
@@ -2148,8 +2176,9 @@ def fit(
21482176
evals_result: TrainingCallback.EvalsLog = {}
21492177
params = self.get_xgb_params()
21502178

2151-
model, metric, params = self._configure_fit(xgb_model, params)
2152-
2179+
model, metric, params, feature_weights = self._configure_fit(
2180+
xgb_model, params, feature_weights
2181+
)
21532182
self._Booster = train(
21542183
params,
21552184
train_dmatrix,

python-package/xgboost/spark/core.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,7 @@ def __init__(self) -> None:
641641
repartition_random_shuffle=False,
642642
feature_names=None,
643643
feature_types=None,
644+
feature_weights=None,
644645
arbitrary_params_dict={},
645646
launch_tracker_on_driver=True,
646647
)

python-package/xgboost/spark/data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ def pred_contribs(
352352
missing=model.missing,
353353
nthread=model.n_jobs,
354354
feature_types=model.feature_types,
355+
feature_weights=model.feature_weights,
355356
enable_categorical=model.enable_categorical,
356357
)
357358
return model.get_booster().predict(

python-package/xgboost/testing/shared.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,13 @@ def get_feature_weights(
6363
"""Get feature weights using the demo parser."""
6464
with tempfile.TemporaryDirectory() as tmpdir:
6565
colsample_bynode = 0.5
66-
reg = model(tree_method=tree_method, colsample_bynode=colsample_bynode)
66+
reg = model(
67+
tree_method=tree_method,
68+
colsample_bynode=colsample_bynode,
69+
feature_weights=fw,
70+
)
6771

68-
reg.fit(X, y, feature_weights=fw)
72+
reg.fit(X, y)
6973
model_path = os.path.join(tmpdir, "model.json")
7074
reg.save_model(model_path)
7175
with open(model_path, "r", encoding="utf-8") as fd:

tests/python/test_with_sklearn.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1212,6 +1212,10 @@ def test_feature_weights(tree_method):
12121212
assert poly_increasing[0] > 0.08
12131213
assert poly_decreasing[0] < -0.08
12141214

1215+
reg = xgb.XGBRegressor(feature_weights=np.ones((kCols, )))
1216+
with pytest.raises(ValueError, match="Use the one in"):
1217+
reg.fit(X, y, feature_weights=np.ones((kCols, )))
1218+
12151219

12161220
def run_boost_from_prediction_binary(tree_method, X, y, as_frame: Optional[Callable]):
12171221
"""

0 commit comments

Comments
 (0)