Skip to content

Commit 031d66e

Browse files
authored
Configuration for init estimation. (dmlc#8343)
* Configuration for init estimation. * Check whether the model needs configuration based on const attribute `ModelFitted` instead of a mutable state. * Add parameter `boost_from_average` to tell whether the user has specified base score. * Add tests.
1 parent 2176e51 commit 031d66e

File tree

10 files changed

+242
-106
lines changed

10 files changed

+242
-106
lines changed

doc/parameter.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,9 +370,11 @@ Specify the learning task and the corresponding learning objective. The objectiv
370370
- ``reg:gamma``: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be `gamma-distributed <https://en.wikipedia.org/wiki/Gamma_distribution#Occurrence_and_applications>`_.
371371
- ``reg:tweedie``: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be `Tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications>`_.
372372

373-
* ``base_score`` [default=0.5]
373+
* ``base_score``
374374

375375
- The initial prediction score of all instances, global bias
376+
- The parameter is automatically estimated for selected objectives before training. To
377+
disable the estimation, specify a real number argument.
376378
- For sufficient number of iterations, changing this value will not have too much effect.
377379

378380
* ``eval_metric`` [default according to objective]

include/xgboost/gbm.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ class GradientBooster : public Model, public Configurable {
7575
/*! \brief Return number of boosted rounds.
7676
*/
7777
virtual int32_t BoostedRounds() const = 0;
78+
/**
79+
* \brief Whether the model has already been trained. When tree booster is chosen, then
80+
* returns true when there are existing trees.
81+
*/
82+
virtual bool ModelFitted() const = 0;
7883
/*!
7984
* \brief perform update to the model(boosting)
8085
* \param p_fmat feature matrix that provide access to features

include/xgboost/learner.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ struct LearnerModelParam {
328328
void Copy(LearnerModelParam const& that);
329329

330330
/* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
331-
bool Initialized() const { return num_feature != 0; }
331+
bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
332332
};
333333

334334
} // namespace xgboost

src/common/host_device_vector.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ class HostDeviceVectorImpl {
162162
if (device_ >= 0) {
163163
LazySyncHost(GPUAccess::kNone);
164164
}
165+
166+
if (device_ >= 0 && device >= 0) {
167+
CHECK_EQ(device_, device) << "New device ordinal is different from previous one.";
168+
}
165169
device_ = device;
166170
if (device_ >= 0) {
167171
LazyResizeDevice(data_h_.size());

src/common/linalg_op.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
*/
44
#ifndef XGBOOST_COMMON_LINALG_OP_H_
55
#define XGBOOST_COMMON_LINALG_OP_H_
6-
#include <type_traits>
76
#include <cstdint> // std::int32_t
7+
#include <type_traits>
88

99
#include "common.h"
1010
#include "threading_utils.h"
@@ -43,12 +43,12 @@ void ElementWiseKernelHost(linalg::TensorView<T, D> t, int32_t n_threads, Fn&& f
4343

4444
#if !defined(XGBOOST_USE_CUDA)
4545
template <typename T, int32_t D, typename Fn>
46-
void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, void* s = nullptr) {
46+
void ElementWiseKernelDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
4747
common::AssertGPUSupport();
4848
}
4949

5050
template <typename T, int32_t D, typename Fn>
51-
void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, void* s = nullptr) {
51+
void ElementWiseTransformDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
5252
common::AssertGPUSupport();
5353
}
5454

src/gbm/gblinear.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ class GBLinear : public GradientBooster {
9595
return model_.num_boosted_rounds;
9696
}
9797

98+
bool ModelFitted() const override { return BoostedRounds() != 0; }
99+
98100
void Load(dmlc::Stream* fi) override {
99101
model_.Load(fi);
100102
}

src/gbm/gbtree.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,10 @@ class GBTree : public GradientBooster {
252252
return model_.trees.size() / this->LayerTrees();
253253
}
254254

255+
bool ModelFitted() const override {
256+
return !model_.trees.empty() || !model_.trees_to_update.empty();
257+
}
258+
255259
void PredictBatch(DMatrix *p_fmat, PredictionCacheEntry *out_preds,
256260
bool training, unsigned layer_begin, unsigned layer_end) override;
257261

src/learner.cc

Lines changed: 81 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <dmlc/thread_local.h>
1313

1414
#include <algorithm>
15+
#include <array>
1516
#include <atomic>
1617
#include <iomanip>
1718
#include <limits> // std::numeric_limits
@@ -27,7 +28,6 @@
2728
#include "common/charconv.h"
2829
#include "common/common.h"
2930
#include "common/io.h"
30-
#include "common/linalg_op.h"
3131
#include "common/observer.h"
3232
#include "common/random.h"
3333
#include "common/threading_utils.h"
@@ -64,6 +64,15 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
6464

6565
namespace xgboost {
6666
Learner::~Learner() = default;
67+
namespace {
68+
StringView ModelNotFitted() { return "Model is not yet initialized (not fitted)."; }
69+
70+
template <typename T>
71+
T& UsePtr(T& ptr) { // NOLINT
72+
CHECK(ptr);
73+
return ptr;
74+
}
75+
} // anonymous namespace
6776

6877
/*! \brief training parameter for regression
6978
*
@@ -75,20 +84,28 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
7584
/* \brief global bias */
7685
bst_float base_score;
7786
/* \brief number of features */
78-
uint32_t num_feature;
87+
bst_feature_t num_feature;
7988
/* \brief number of classes, if it is multi-class classification */
80-
int32_t num_class;
89+
std::int32_t num_class;
8190
/*! \brief Model contain additional properties */
8291
int32_t contain_extra_attrs;
8392
/*! \brief Model contain eval metrics */
8493
int32_t contain_eval_metrics;
8594
/*! \brief the version of XGBoost. */
86-
uint32_t major_version;
87-
uint32_t minor_version;
95+
std::uint32_t major_version;
96+
std::uint32_t minor_version;
8897

8998
uint32_t num_target{1};
90-
91-
int32_t base_score_estimated{0};
99+
/**
100+
* \brief Whether we should calculate the base score from training data.
101+
*
102+
* This is a private parameter as we can't expose it as boolean due to binary model
103+
* format. Exposing it as integer creates inconsistency with other parameters.
104+
*
105+
* Automatically disabled when base_score is specifed by user. int32 is used instead
106+
* of bool for the ease of serialization.
107+
*/
108+
std::int32_t boost_from_average{true};
92109
/*! \brief reserved field */
93110
int reserved[25];
94111
/*! \brief constructor */
@@ -98,14 +115,14 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
98115
num_target = 1;
99116
major_version = std::get<0>(Version::Self());
100117
minor_version = std::get<1>(Version::Self());
101-
base_score_estimated = 0;
118+
boost_from_average = true;
102119
static_assert(sizeof(LearnerModelParamLegacy) == 136,
103120
"Do not change the size of this struct, as it will break binary IO.");
104121
}
105122

106123
// Skip other legacy fields.
107124
Json ToJson() const {
108-
Object obj;
125+
Json obj{Object{}};
109126
char floats[NumericLimits<float>::kToCharsSize];
110127
auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
111128
CHECK(ret.ec == std::errc{});
@@ -120,15 +137,19 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
120137
ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,
121138
static_cast<int64_t>(num_class));
122139
CHECK(ret.ec == std::errc());
123-
obj["num_class"] =
124-
std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};
140+
obj["num_class"] = std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};
125141

126142
ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,
127143
static_cast<int64_t>(num_target));
128144
obj["num_target"] =
129145
std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};
130146

131-
return Json(std::move(obj));
147+
ret = to_chars(integers, integers + NumericLimits<std::int64_t>::kToCharsSize,
148+
static_cast<std::int64_t>(boost_from_average));
149+
obj["boost_from_average"] =
150+
std::string{integers, static_cast<std::size_t>(std::distance(integers, ret.ptr))};
151+
152+
return obj;
132153
}
133154
void FromJson(Json const& obj) {
134155
auto const& j_param = get<Object const>(obj);
@@ -139,13 +160,15 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
139160
if (n_targets_it != j_param.cend()) {
140161
m["num_target"] = get<String const>(n_targets_it->second);
141162
}
163+
auto bse_it = j_param.find("boost_from_average");
164+
if (bse_it != j_param.cend()) {
165+
m["boost_from_average"] = get<String const>(bse_it->second);
166+
}
142167

143168
this->Init(m);
144169

145170
std::string str = get<String const>(j_param.at("base_score"));
146171
from_chars(str.c_str(), str.c_str() + str.size(), base_score);
147-
// It can only be estimated during the first training, we consider it estimated afterward
148-
base_score_estimated = 1;
149172
}
150173

151174
LearnerModelParamLegacy ByteSwap() const {
@@ -158,22 +181,21 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
158181
dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
159182
dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
160183
dmlc::ByteSwap(&x.num_target, sizeof(x.num_target), 1);
161-
dmlc::ByteSwap(&x.base_score_estimated, sizeof(x.base_score_estimated), 1);
184+
dmlc::ByteSwap(&x.boost_from_average, sizeof(x.boost_from_average), 1);
162185
dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
163186
return x;
164187
}
165188

166189
template <typename Container>
167190
Args UpdateAllowUnknown(Container const& kwargs) {
168191
// Detect whether user has made their own base score.
169-
if (std::find_if(kwargs.cbegin(), kwargs.cend(),
170-
[](auto const& kv) { return kv.first == "base_score"; }) != kwargs.cend()) {
171-
base_score_estimated = true;
172-
}
173-
if (std::find_if(kwargs.cbegin(), kwargs.cend(), [](auto const& kv) {
174-
return kv.first == "base_score_estimated";
175-
}) != kwargs.cend()) {
176-
LOG(FATAL) << "`base_score_estimated` cannot be specified as hyper-parameter.";
192+
auto find_key = [&kwargs](char const* key) {
193+
return std::find_if(kwargs.cbegin(), kwargs.cend(),
194+
[key](auto const& kv) { return kv.first == key; });
195+
};
196+
auto it = find_key("base_score");
197+
if (it != kwargs.cend()) {
198+
boost_from_average = false;
177199
}
178200
return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown(kwargs);
179201
}
@@ -195,7 +217,9 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
195217
.set_default(1)
196218
.set_lower_bound(1)
197219
.describe("Number of target for multi-target regression.");
198-
DMLC_DECLARE_FIELD(base_score_estimated).set_default(0);
220+
DMLC_DECLARE_FIELD(boost_from_average)
221+
.set_default(true)
222+
.describe("Whether we should calculate the base score from training data.");
199223
}
200224
};
201225

@@ -224,7 +248,7 @@ LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy
224248

225249
linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device) const {
226250
// multi-class is not yet supported.
227-
CHECK_EQ(base_score_.Size(), 1);
251+
CHECK_EQ(base_score_.Size(), 1) << ModelNotFitted();
228252
if (device == Context::kCpuId) {
229253
// Make sure that we won't run into race condition.
230254
CHECK(base_score_.Data()->HostCanRead());
@@ -385,6 +409,21 @@ class LearnerConfiguration : public Learner {
385409
// Initial prediction.
386410
std::vector<std::string> metric_names_;
387411

412+
void ConfigureModelParamWithoutBaseScore() {
413+
// Convert mparam to learner_model_param
414+
this->ConfigureTargets();
415+
416+
auto task = UsePtr(obj_)->Task();
417+
linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
418+
auto h_base_score = base_score.HostView();
419+
420+
// transform to margin
421+
h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
422+
// move it to model param, which is shared with all other components.
423+
learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
424+
CHECK(learner_model_param_.Initialized());
425+
CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
426+
}
388427
/**
389428
* \brief Calculate the `base_score` based on input data.
390429
*
@@ -403,38 +442,24 @@ class LearnerConfiguration : public Learner {
403442
// - model loaded from new binary or JSON.
404443
// - model is created from scratch.
405444
// - model is configured second time due to change of parameter
406-
CHECK(obj_);
407-
if (!mparam_.base_score_estimated) {
445+
if (!learner_model_param_.Initialized()) {
446+
this->ConfigureModelParamWithoutBaseScore();
447+
}
448+
if (mparam_.boost_from_average && !UsePtr(gbm_)->ModelFitted()) {
408449
if (p_fmat) {
450+
auto const& info = p_fmat->Info();
451+
info.Validate(Ctx()->gpu_id);
409452
// We estimate it from input data.
410453
linalg::Tensor<float, 1> base_score;
411-
obj_->InitEstimation(p_fmat->Info(), &base_score);
454+
UsePtr(obj_)->InitEstimation(info, &base_score);
412455
mparam_.base_score = base_score(0);
413456
CHECK(!std::isnan(mparam_.base_score));
414-
} else {
415-
mparam_.base_score = ObjFunction::DefaultBaseScore();
416457
}
417-
mparam_.base_score_estimated = true;
418458
// Update the shared model parameter
419-
this->ConfigureModelParam();
459+
this->ConfigureModelParamWithoutBaseScore();
420460
}
421-
}
422-
423-
// Convert mparam to learner_model_param
424-
void ConfigureModelParam() {
425-
this->ConfigureTargets();
426-
427-
CHECK(obj_);
428-
auto task = obj_->Task();
429-
linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
430-
auto h_base_score = base_score.HostView();
431-
432-
// transform to margin
433-
h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
434-
// move it to model param, which is shared with all other components.
435-
learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
436-
CHECK(learner_model_param_.Initialized());
437-
CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
461+
CHECK(!std::isnan(mparam_.base_score));
462+
CHECK(!std::isinf(mparam_.base_score));
438463
}
439464

440465
public:
@@ -496,7 +521,8 @@ class LearnerConfiguration : public Learner {
496521
learner_model_param_.task = obj_->Task(); // required by gbm configuration.
497522
this->ConfigureGBM(old_tparam, args);
498523
ctx_.ConfigureGpuId(this->gbm_->UseGPU());
499-
this->ConfigureModelParam();
524+
525+
this->ConfigureModelParamWithoutBaseScore();
500526

501527
this->ConfigureMetrics(args);
502528

@@ -510,8 +536,8 @@ class LearnerConfiguration : public Learner {
510536
}
511537

512538
void CheckModelInitialized() const {
513-
CHECK(learner_model_param_.Initialized()) << "Model not yet initialized.";
514-
CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0);
539+
CHECK(learner_model_param_.Initialized()) << ModelNotFitted();
540+
CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0) << ModelNotFitted();
515541
}
516542

517543
virtual PredictionContainer* GetPredictionCache() const {
@@ -1318,8 +1344,6 @@ class LearnerImpl : public LearnerIO {
13181344
HostDeviceVector<GradientPair>* in_gpair) override {
13191345
monitor_.Start("BoostOneIter");
13201346
this->Configure();
1321-
// Should have been set to default in the first prediction.
1322-
CHECK(mparam_.base_score_estimated);
13231347

13241348
if (ctx_.seed_per_iteration) {
13251349
common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
@@ -1380,7 +1404,9 @@ class LearnerImpl : public LearnerIO {
13801404
static_cast<int>(pred_interactions) +
13811405
static_cast<int>(pred_contribs);
13821406
this->Configure();
1383-
this->InitBaseScore(nullptr);
1407+
if (training) {
1408+
this->InitBaseScore(nullptr);
1409+
}
13841410
this->CheckModelInitialized();
13851411

13861412
CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
@@ -1425,7 +1451,6 @@ class LearnerImpl : public LearnerIO {
14251451
HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin,
14261452
uint32_t iteration_end) override {
14271453
this->Configure();
1428-
this->InitBaseScore(nullptr);
14291454
this->CheckModelInitialized();
14301455

14311456
auto& out_predictions = this->GetThreadLocal().prediction_entry;

0 commit comments

Comments
 (0)