Skip to content

Commit 0b280dd

Browse files
authored
Merge branch 'dmlc:release_3.0.0' into release_3.0.0
2 parents 8919c08 + 205d2e6 commit 0b280dd

File tree

13 files changed

+155
-63
lines changed

13 files changed

+155
-63
lines changed

doc/parameter.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,10 @@ These are parameters specific to learning to rank task. See :doc:`Learning to Ra
540540

541541
Whether to normalize the leaf value by lambda gradient. This can sometimes stagnate the training progress.
542542

543+
.. versionchanged:: 3.0.0
544+
545+
When the ``mean`` method is used, it's normalized by the ``lambdarank_num_pair_per_sample`` instead of gradient.
546+
543547
* ``lambdarank_score_normalization`` [default = ``true``]
544548

545549
.. versionadded:: 3.0.0

doc/tutorials/learning_to_rank.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,6 @@ The learning to rank implementation has been significantly updated in 2.0 with a
198198
# 1.7 only supports sampling, while 2.0 and later use top-k as the default.
199199
# See above sections for the trade-off.
200200
"lambdarank_pair_method": "mean",
201-
# Normalization was added in 2.0
202-
"lambdarank_normalization": False,
203201
# 1.7 uses the ranknet loss while later versions use the NDCG weighted loss
204202
"objective": "rank:pairwise",
205203
# 1.7 doesn't have this normalization.

include/xgboost/base.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,13 @@ using bst_bin_t = std::int32_t; // NOLINT
105105
* @brief Type for data row index (sample).
106106
*/
107107
using bst_idx_t = std::uint64_t; // NOLINT
108-
/*! \brief Type for tree node index. */
108+
/**
109+
* \brief Type for tree node index.
110+
*/
109111
using bst_node_t = std::int32_t; // NOLINT
110-
/*! \brief Type for ranking group index. */
112+
/**
113+
* @brief Type for ranking group index.
114+
*/
111115
using bst_group_t = std::uint32_t; // NOLINT
112116
/**
113117
* @brief Type for indexing into output targets.

python-package/xgboost/testing/ranking.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def run_ranking_categorical(device: str) -> None:
105105
def run_normalization(device: str) -> None:
106106
"""Test normalization."""
107107
X, y, qid, _ = tm.make_ltr(2048, 4, 64, 3)
108+
# top-k
108109
ltr = xgb.XGBRanker(objective="rank:pairwise", n_estimators=4, device=device)
109110
ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
110111
e0 = ltr.evals_result()
@@ -119,6 +120,53 @@ def run_normalization(device: str) -> None:
119120
e1 = ltr.evals_result()
120121
assert e1["validation_0"]["ndcg@32"][-1] > e0["validation_0"]["ndcg@32"][-1]
121122

123+
# mean
124+
ltr = xgb.XGBRanker(
125+
objective="rank:pairwise",
126+
n_estimators=4,
127+
device=device,
128+
lambdarank_pair_method="mean",
129+
lambdarank_normalization=True,
130+
)
131+
ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
132+
e0 = ltr.evals_result()
133+
134+
ltr = xgb.XGBRanker(
135+
objective="rank:pairwise",
136+
n_estimators=4,
137+
device=device,
138+
lambdarank_pair_method="mean",
139+
lambdarank_normalization=False,
140+
)
141+
ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
142+
e1 = ltr.evals_result()
143+
# no normalization since the number of pairs is 1.
144+
assert e1["validation_0"]["ndcg"][-1] == e0["validation_0"]["ndcg"][-1]
145+
146+
# mean
147+
ltr = xgb.XGBRanker(
148+
objective="rank:pairwise",
149+
n_estimators=4,
150+
device=device,
151+
lambdarank_pair_method="mean",
152+
lambdarank_normalization=True,
153+
lambdarank_num_pair_per_sample=4,
154+
)
155+
ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
156+
e0 = ltr.evals_result()
157+
158+
ltr = xgb.XGBRanker(
159+
objective="rank:pairwise",
160+
n_estimators=4,
161+
device=device,
162+
lambdarank_pair_method="mean",
163+
lambdarank_normalization=False,
164+
lambdarank_num_pair_per_sample=4,
165+
)
166+
ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
167+
e1 = ltr.evals_result()
168+
assert e1["validation_0"]["ndcg"][-1] != e0["validation_0"]["ndcg"][-1]
169+
122170

123171
def run_score_normalization(device: str, objective: str) -> None:
124172
"""Test normalization by score differences."""

src/common/ranking_utils.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ XGBOOST_DEVICE __forceinline__ std::size_t ThreadsForMean(std::size_t group_size
3030
std::size_t n_pairs) {
3131
return group_size * n_pairs;
3232
}
33+
// Number of threads in a group divided by the number of samples in this group, returns
34+
// the number of pairs for pair-wise ltr with sampling.
3335
XGBOOST_DEVICE __forceinline__ std::size_t PairsForGroup(std::size_t n_threads,
3436
std::size_t group_size) {
3537
return n_threads / group_size;

src/common/ranking_utils.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
115115
}
116116

117117
[[nodiscard]] bool HasTruncation() const { return lambdarank_pair_method == PairMethod::kTopK; }
118+
[[nodiscard]] bool IsMean() const { return lambdarank_pair_method == PairMethod::kMean; }
118119

119120
// Used for evaluation metric and cache initialization, iterate through top-k or the whole list
120121
[[nodiscard]] auto TopK() const {
@@ -180,7 +181,8 @@ class RankingCache {
180181
HostDeviceVector<std::size_t> y_sorted_idx_cache_;
181182
// Cached labels sorted by the model
182183
HostDeviceVector<float> y_ranked_by_model_;
183-
// store rounding factor for objective for each group
184+
// Rounding factor for CUDA deterministic floating point summation. One rounding factor
185+
// for each ranking group.
184186
linalg::Vector<GradientPair> roundings_;
185187
// rounding factor for cost
186188
HostDeviceVector<double> cost_rounding_;
@@ -215,6 +217,9 @@ class RankingCache {
215217
if (!info.weights_.Empty()) {
216218
CHECK_EQ(Groups(), info.weights_.Size()) << error::GroupWeight();
217219
}
220+
if (param_.HasTruncation()) {
221+
CHECK_GE(param_.NumPair(), 1);
222+
}
218223
}
219224
[[nodiscard]] std::size_t MaxPositionSize() const {
220225
// Use truncation level as bound.
@@ -267,21 +272,21 @@ class RankingCache {
267272
}
268273

269274
// CUDA cache getters, the cache is shared between metric and objective, some of these
270-
// fields are lazy initialized to avoid unnecessary allocation.
275+
// fields are initialized lazily to avoid unnecessary allocation.
271276
[[nodiscard]] common::Span<std::size_t const> CUDAThreadsGroupPtr() const {
272277
CHECK(!threads_group_ptr_.Empty());
273278
return threads_group_ptr_.ConstDeviceSpan();
274279
}
275280
[[nodiscard]] std::size_t CUDAThreads() const { return n_cuda_threads_; }
276281

277-
linalg::VectorView<GradientPair> CUDARounding(Context const* ctx) {
282+
[[nodiscard]] linalg::VectorView<GradientPair> CUDARounding(Context const* ctx) {
278283
if (roundings_.Size() == 0) {
279284
roundings_.SetDevice(ctx->Device());
280285
roundings_.Reshape(Groups());
281286
}
282287
return roundings_.View(ctx->Device());
283288
}
284-
common::Span<double> CUDACostRounding(Context const* ctx) {
289+
[[nodiscard]] common::Span<double> CUDACostRounding(Context const* ctx) {
285290
if (cost_rounding_.Size() == 0) {
286291
cost_rounding_.SetDevice(ctx->Device());
287292
cost_rounding_.Resize(1);

src/objective/lambdarank_obj.cc

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -225,10 +225,23 @@ class LambdaRankObj : public FitIntercept {
225225
};
226226

227227
MakePairs(ctx_, iter, p_cache_, g, g_label, g_rank, loop);
228-
if (sum_lambda > 0.0 && param_.lambdarank_normalization) {
229-
double norm = std::log2(1.0 + sum_lambda) / sum_lambda;
230-
std::transform(g_gpair.Values().data(), g_gpair.Values().data() + g_gpair.Size(),
231-
g_gpair.Values().data(), [norm](GradientPair const& g) { return g * norm; });
228+
if (param_.lambdarank_normalization) {
229+
double norm = 1.0;
230+
if (param_.IsMean()) {
231+
// Normalize using the number of pairs for mean.
232+
auto n_pairs = this->p_cache_->Param().NumPair();
233+
auto scale = 1.0 / static_cast<double>(n_pairs);
234+
norm = scale;
235+
} else {
236+
// Normalize using gradient for top-k.
237+
if (sum_lambda > 0.0) {
238+
norm = std::log2(1.0 + sum_lambda) / sum_lambda;
239+
}
240+
}
241+
if (norm != 1.0) {
242+
std::transform(linalg::begin(g_gpair), linalg::end(g_gpair), linalg::begin(g_gpair),
243+
[norm](GradientPair const& g) { return g * norm; });
244+
}
232245
}
233246

234247
auto w_norm = p_cache_->WeightNorm();

src/objective/lambdarank_obj.cu

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,19 @@
33
*
44
* \brief CUDA implementation of lambdarank.
55
*/
6+
#include <dmlc/registry.h> // for DMLC_REGISTRY_FILE_TAG
67
#include <thrust/fill.h> // for fill_n
78
#include <thrust/for_each.h> // for for_each_n
89
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
910
#include <thrust/iterator/zip_iterator.h> // for make_zip_iterator
1011
#include <thrust/tuple.h> // for make_tuple, tuple, tie, get
1112

12-
#include <algorithm> // for min
13-
#include <cassert> // for assert
14-
#include <cmath> // for abs, log2, isinf
15-
#include <cstddef> // for size_t
16-
#include <cstdint> // for int32_t
17-
#include <memory> // for shared_ptr
13+
#include <algorithm> // for min
14+
#include <cassert> // for assert
15+
#include <cmath> // for abs, log2, isinf
16+
#include <cstddef> // for size_t
17+
#include <cstdint> // for int32_t
18+
#include <memory> // for shared_ptr
1819
#include <utility>
1920

2021
#include "../common/algorithm.cuh" // for SegmentedArgSort
@@ -31,7 +32,7 @@
3132
#include "xgboost/host_device_vector.h" // for HostDeviceVector
3233
#include "xgboost/linalg.h" // for VectorView, Range, Vector
3334
#include "xgboost/logging.h"
34-
#include "xgboost/span.h" // for Span
35+
#include "xgboost/span.h" // for Span
3536

3637
namespace xgboost::obj {
3738
DMLC_REGISTRY_FILE_TAG(lambdarank_obj_cu);
@@ -82,7 +83,7 @@ struct GetGradOp {
8283
MakePairsOp<has_truncation> make_pair;
8384
Delta delta;
8485

85-
bool need_update;
86+
bool const need_update;
8687

8788
auto __device__ operator()(std::size_t idx) -> GradCostNorm {
8889
auto const& args = make_pair.args;
@@ -95,6 +96,7 @@ struct GetGradOp {
9596
auto g_predt = args.predts.subspan(data_group_begin, n_data);
9697
auto g_gpair = args.gpairs.Slice(linalg::Range(data_group_begin, data_group_begin + n_data));
9798
auto g_rank = args.d_sorted_idx.subspan(data_group_begin, n_data);
99+
auto n_pairs = args.n_pairs;
98100

99101
auto [i, j] = make_pair(idx, g);
100102

@@ -108,7 +110,9 @@ struct GetGradOp {
108110

109111
double cost{0};
110112

111-
auto delta_op = [&](auto const&... args) { return delta(args..., g); };
113+
auto delta_op = [&](auto const&... args) {
114+
return delta(args..., g);
115+
};
112116
GradientPair pg =
113117
LambdaGrad<unbiased, norm_by_diff>(g_label, g_predt, g_rank, rank_high, rank_low, delta_op,
114118
args.ti_plus, args.tj_minus, &cost);
@@ -118,7 +122,6 @@ struct GetGradOp {
118122

119123
if (need_update) {
120124
// second run, update the gradient
121-
122125
auto ng = Repulse(pg);
123126

124127
auto gr = args.d_roundings(g);
@@ -153,6 +156,7 @@ struct GetGradOp {
153156
}
154157
}
155158
}
159+
156160
return thrust::make_tuple(GradientPair{std::abs(pg.GetGrad()), std::abs(pg.GetHess())},
157161
std::abs(cost), -2.0 * static_cast<double>(pg.GetGrad()));
158162
}
@@ -215,12 +219,12 @@ void CalcGrad(Context const* ctx, MetaInfo const& info, std::shared_ptr<ltr::Ran
215219
auto hess = std::max(lg.GetHess(), rg.GetHess());
216220
auto cost = std::max(thrust::get<1>(l), thrust::get<1>(r));
217221
double sum_lambda = thrust::get<2>(l) + thrust::get<2>(r);
218-
return thrust::make_tuple(GradientPair{std::abs(grad), std::abs(hess)}, cost, sum_lambda);
222+
return thrust::make_tuple(GradientPair{grad, hess}, cost, sum_lambda);
219223
};
220224
auto init = thrust::make_tuple(GradientPair{0.0f, 0.0f}, 0.0, 0.0);
221225
common::Span<GradCostNorm> d_max_lambdas = p_cache->MaxLambdas<GradCostNorm>(ctx, n_groups);
222226
CHECK_EQ(n_groups * sizeof(GradCostNorm), d_max_lambdas.size_bytes());
223-
227+
// Reduce by group.
224228
std::size_t bytes;
225229
cub::DeviceSegmentedReduce::Reduce(nullptr, bytes, val_it, d_max_lambdas.data(), n_groups,
226230
d_threads_group_ptr.data(), d_threads_group_ptr.data() + 1,
@@ -267,22 +271,35 @@ void CalcGrad(Context const* ctx, MetaInfo const& info, std::shared_ptr<ltr::Ran
267271
*/
268272
auto d_weights = common::MakeOptionalWeights(ctx, info.weights_);
269273
auto w_norm = p_cache->WeightNorm();
270-
auto norm = p_cache->Param().lambdarank_normalization;
274+
auto need_norm = p_cache->Param().lambdarank_normalization;
275+
auto n_pairs = p_cache->Param().NumPair();
276+
bool is_mean = p_cache->Param().IsMean();
277+
CHECK_EQ(is_mean, !has_truncation);
271278
thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), d_gpair.Size(),
272279
[=] XGBOOST_DEVICE(std::size_t i) mutable {
273280
auto g = dh::SegmentId(d_gptr, i);
274-
auto sum_lambda = thrust::get<2>(d_max_lambdas[g]);
275-
// Normalization
276-
if (sum_lambda > 0.0 && norm) {
277-
double norm = std::log2(1.0 + sum_lambda) / sum_lambda;
281+
if (need_norm) {
282+
double norm = 1.0;
283+
if (has_truncation) {
284+
// Normalize using gradient for top-k.
285+
auto sum_lambda = thrust::get<2>(d_max_lambdas[g]);
286+
if (sum_lambda > 0.0) {
287+
norm = std::log2(1.0 + sum_lambda) / sum_lambda;
288+
}
289+
} else {
290+
// Normalize using the number of pairs for mean.
291+
double scale = 1.0 / static_cast<double>(n_pairs);
292+
norm = scale;
293+
}
278294
d_gpair(i, 0) *= norm;
279295
}
296+
280297
d_gpair(i, 0) *= (d_weights[g] * w_norm);
281298
});
282299
}
283300

284301
/**
285-
* \brief Handles boilerplate code like getting device span.
302+
* @brief Handles boilerplate code like getting device spans.
286303
*/
287304
template <bool norm_by_diff, typename Delta>
288305
void Launch(Context const* ctx, std::int32_t iter, HostDeviceVector<float> const& preds,
@@ -302,7 +319,6 @@ void Launch(Context const* ctx, std::int32_t iter, HostDeviceVector<float> const
302319
out_gpair->Reshape(preds.Size(), 1);
303320

304321
CHECK(p_cache);
305-
306322
auto d_rounding = p_cache->CUDARounding(ctx);
307323
auto d_cost_rounding = p_cache->CUDACostRounding(ctx);
308324

@@ -325,9 +341,10 @@ void Launch(Context const* ctx, std::int32_t iter, HostDeviceVector<float> const
325341
d_y_sorted_idx = SortY(ctx, info, rank_idx, p_cache);
326342
}
327343

328-
KernelInputs args{ti_plus, tj_minus, li, lj, d_gptr, d_threads_group_ptr,
329-
rank_idx, label, predts, gpairs, d_rounding, d_cost_rounding.data(),
330-
d_y_sorted_idx, iter};
344+
auto n_pairs = p_cache->Param().NumPair();
345+
KernelInputs args{ti_plus, tj_minus, li, lj, d_gptr, d_threads_group_ptr,
346+
rank_idx, label, predts, gpairs, d_rounding, d_cost_rounding.data(),
347+
n_pairs, d_y_sorted_idx, iter};
331348

332349
// dispatch based on unbiased and truncation
333350
if (p_cache->Param().HasTruncation()) {

src/objective/lambdarank_obj.cuh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ struct KernelInputs {
6666
linalg::VectorView<GradientPair const> d_roundings;
6767
double const *d_cost_rounding;
6868

69+
ltr::position_t const n_pairs;
6970
common::Span<std::size_t const> d_y_sorted_idx;
7071

7172
std::int32_t iter;
@@ -136,9 +137,10 @@ struct MakePairsOp {
136137
// The index pointing to the first element of the next bucket
137138
std::size_t right_bound = n_data - n_rights;
138139

139-
thrust::minstd_rand rng(args.iter);
140+
std::uint32_t seed = args.iter * (static_cast<std::uint32_t>(args.d_group_ptr.size()) - 1) + g;
141+
thrust::minstd_rand rng(seed);
140142
auto pair_idx = i;
141-
rng.discard(sample_pair_idx * n_data + g + pair_idx); // fixme
143+
rng.discard(idx - args.d_threads_group_ptr[g]); // idx within group
142144
thrust::uniform_int_distribution<std::size_t> dist(0, n_lefts + n_rights - 1);
143145
auto ridx = dist(rng);
144146
SPAN_CHECK(ridx < n_lefts + n_rights);

src/objective/lambdarank_obj.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,15 +227,16 @@ void MakePairs(Context const* ctx, std::int32_t iter,
227227
ltr::position_t cnt = group_ptr[g + 1] - group_ptr[g];
228228

229229
if (cache->Param().HasTruncation()) {
230-
for (std::size_t i = 0; i < std::min(cnt, cache->Param().NumPair()); ++i) {
230+
for (std::size_t i = 0, n = std::min(cnt, cache->Param().NumPair()); i < n; ++i) {
231231
for (std::size_t j = i + 1; j < cnt; ++j) {
232232
op(i, j);
233233
}
234234
}
235235
} else {
236236
CHECK_EQ(g_rank.size(), g_label.Size());
237-
std::minstd_rand rnd(iter);
238-
rnd.discard(g); // fixme(jiamingy): honor the global seed
237+
238+
std::uint32_t seed = iter * (static_cast<std::uint32_t>(group_ptr.size()) - 1) + g;
239+
std::minstd_rand rnd(seed);
239240
// sort label according to the rank list
240241
auto it = common::MakeIndexTransformIter(
241242
[&g_rank, &g_label](std::size_t idx) { return g_label(g_rank[idx]); });
@@ -244,7 +245,6 @@ void MakePairs(Context const* ctx, std::int32_t iter,
244245
// permutation iterator to get the original label
245246
auto rev_it = common::MakeIndexTransformIter(
246247
[&](std::size_t idx) { return g_label(g_rank[y_sorted_idx[idx]]); });
247-
248248
for (std::size_t i = 0; i < cnt;) {
249249
std::size_t j = i + 1;
250250
// find the bucket boundary

0 commit comments

Comments
 (0)