Skip to content

Commit 0ffc52e

Browse files
authored
[backport] Fix using categorical data with the ranker. (dmlc#9753) (dmlc#9778)
1 parent a408254 commit 0ffc52e

File tree

4 files changed

+48
-3
lines changed

4 files changed

+48
-3
lines changed

python-package/xgboost/sklearn.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2093,7 +2093,17 @@ def score(self, X: ArrayLike, y: ArrayLike) -> float:
20932093
20942094
"""
20952095
X, qid = _get_qid(X, None)
2096-
Xyq = DMatrix(X, y, qid=qid)
2096+
# fixme(jiamingy): base margin and group weight is not yet supported. We might
2097+
# need to make extra special fields in the dataframe.
2098+
Xyq = DMatrix(
2099+
X,
2100+
y,
2101+
qid=qid,
2102+
missing=self.missing,
2103+
enable_categorical=self.enable_categorical,
2104+
nthread=self.n_jobs,
2105+
feature_types=self.feature_types,
2106+
)
20972107
if callable(self.eval_metric):
20982108
metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)
20992109
result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric)

python-package/xgboost/testing/ranking.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,28 @@ def neg_mse(*args: Any, **kwargs: Any) -> float:
7575

7676
with pytest.raises(ValueError, match="Either `group` or `qid`."):
7777
ranker.fit(df, y, eval_set=[(X, y)])
78+
79+
80+
def run_ranking_categorical(device: str) -> None:
81+
"""Test LTR with categorical features."""
82+
from sklearn.model_selection import cross_val_score
83+
84+
X, y = tm.make_categorical(
85+
n_samples=512, n_features=10, n_categories=3, onehot=False
86+
)
87+
rng = np.random.default_rng(1994)
88+
qid = rng.choice(3, size=y.shape[0])
89+
qid = np.sort(qid)
90+
X["qid"] = qid
91+
92+
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
93+
ltr.fit(X, y)
94+
score = ltr.score(X, y)
95+
assert score > 0.9
96+
97+
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
98+
99+
# test using the score function inside sklearn.
100+
scores = cross_val_score(ltr, X, y)
101+
for s in scores:
102+
assert s > 0.7

tests/python-gpu/test_gpu_with_sklearn.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import xgboost as xgb
1111
from xgboost import testing as tm
12-
from xgboost.testing.ranking import run_ranking_qid_df
12+
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
1313

1414
sys.path.append("tests/python")
1515
import test_with_sklearn as twskl # noqa
@@ -165,6 +165,11 @@ def test_ranking_qid_df():
165165
run_ranking_qid_df(cudf, "gpu_hist")
166166

167167

168+
@pytest.mark.skipif(**tm.no_pandas())
169+
def test_ranking_categorical() -> None:
170+
run_ranking_categorical(device="cuda")
171+
172+
168173
@pytest.mark.skipif(**tm.no_cupy())
169174
@pytest.mark.mgpu
170175
def test_device_ordinal() -> None:

tests/python/test_with_sklearn.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
import xgboost as xgb
1414
from xgboost import testing as tm
15-
from xgboost.testing.ranking import run_ranking_qid_df
15+
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
1616
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
1717
from xgboost.testing.updater import get_basescore
1818

@@ -173,6 +173,11 @@ def test_ranking():
173173
np.testing.assert_almost_equal(pred, pred_orig)
174174

175175

176+
@pytest.mark.skipif(**tm.no_pandas())
177+
def test_ranking_categorical() -> None:
178+
run_ranking_categorical(device="cpu")
179+
180+
176181
def test_ranking_metric() -> None:
177182
from sklearn.metrics import roc_auc_score
178183

0 commit comments

Comments
 (0)