12
12
#include < dmlc/thread_local.h>
13
13
14
14
#include < algorithm>
15
+ #include < array>
15
16
#include < atomic>
16
17
#include < iomanip>
17
18
#include < limits> // std::numeric_limits
27
28
#include " common/charconv.h"
28
29
#include " common/common.h"
29
30
#include " common/io.h"
30
- #include " common/linalg_op.h"
31
31
#include " common/observer.h"
32
32
#include " common/random.h"
33
33
#include " common/threading_utils.h"
@@ -64,6 +64,15 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
64
64
65
65
namespace xgboost {
66
66
Learner::~Learner () = default ;
67
+ namespace {
68
+ StringView ModelNotFitted () { return " Model is not yet initialized (not fitted)." ; }
69
+
70
+ template <typename T>
71
+ T& UsePtr (T& ptr) { // NOLINT
72
+ CHECK (ptr);
73
+ return ptr;
74
+ }
75
+ } // anonymous namespace
67
76
68
77
/* ! \brief training parameter for regression
69
78
*
@@ -75,20 +84,28 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
75
84
/* \brief global bias */
76
85
bst_float base_score;
77
86
/* \brief number of features */
78
- uint32_t num_feature;
87
+ bst_feature_t num_feature;
79
88
/* \brief number of classes, if it is multi-class classification */
80
- int32_t num_class;
89
+ std:: int32_t num_class;
81
90
/* ! \brief Model contain additional properties */
82
91
int32_t contain_extra_attrs;
83
92
/* ! \brief Model contain eval metrics */
84
93
int32_t contain_eval_metrics;
85
94
/* ! \brief the version of XGBoost. */
86
- uint32_t major_version;
87
- uint32_t minor_version;
95
+ std:: uint32_t major_version;
96
+ std:: uint32_t minor_version;
88
97
89
98
uint32_t num_target{1 };
90
-
91
- int32_t base_score_estimated{0 };
99
+ /* *
100
+ * \brief Whether we should calculate the base score from training data.
101
+ *
102
+ * This is a private parameter as we can't expose it as boolean due to binary model
103
+ * format. Exposing it as integer creates inconsistency with other parameters.
104
+ *
105
+ * Automatically disabled when base_score is specifed by user. int32 is used instead
106
+ * of bool for the ease of serialization.
107
+ */
108
+ std::int32_t boost_from_average{true };
92
109
/* ! \brief reserved field */
93
110
int reserved[25 ];
94
111
/* ! \brief constructor */
@@ -98,14 +115,14 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
98
115
num_target = 1 ;
99
116
major_version = std::get<0 >(Version::Self ());
100
117
minor_version = std::get<1 >(Version::Self ());
101
- base_score_estimated = 0 ;
118
+ boost_from_average = true ;
102
119
static_assert (sizeof (LearnerModelParamLegacy) == 136 ,
103
120
" Do not change the size of this struct, as it will break binary IO." );
104
121
}
105
122
106
123
// Skip other legacy fields.
107
124
Json ToJson () const {
108
- Object obj;
125
+ Json obj{Object{}} ;
109
126
char floats[NumericLimits<float >::kToCharsSize ];
110
127
auto ret = to_chars (floats, floats + NumericLimits<float >::kToCharsSize , base_score);
111
128
CHECK (ret.ec == std::errc{});
@@ -120,15 +137,19 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
120
137
ret = to_chars (integers, integers + NumericLimits<int64_t >::kToCharsSize ,
121
138
static_cast <int64_t >(num_class));
122
139
CHECK (ret.ec == std::errc ());
123
- obj[" num_class" ] =
124
- std::string{integers, static_cast <size_t >(std::distance (integers, ret.ptr ))};
140
+ obj[" num_class" ] = std::string{integers, static_cast <size_t >(std::distance (integers, ret.ptr ))};
125
141
126
142
ret = to_chars (integers, integers + NumericLimits<int64_t >::kToCharsSize ,
127
143
static_cast <int64_t >(num_target));
128
144
obj[" num_target" ] =
129
145
std::string{integers, static_cast <size_t >(std::distance (integers, ret.ptr ))};
130
146
131
- return Json (std::move (obj));
147
+ ret = to_chars (integers, integers + NumericLimits<std::int64_t >::kToCharsSize ,
148
+ static_cast <std::int64_t >(boost_from_average));
149
+ obj[" boost_from_average" ] =
150
+ std::string{integers, static_cast <std::size_t >(std::distance (integers, ret.ptr ))};
151
+
152
+ return obj;
132
153
}
133
154
void FromJson (Json const & obj) {
134
155
auto const & j_param = get<Object const >(obj);
@@ -139,13 +160,15 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
139
160
if (n_targets_it != j_param.cend ()) {
140
161
m[" num_target" ] = get<String const >(n_targets_it->second );
141
162
}
163
+ auto bse_it = j_param.find (" boost_from_average" );
164
+ if (bse_it != j_param.cend ()) {
165
+ m[" boost_from_average" ] = get<String const >(bse_it->second );
166
+ }
142
167
143
168
this ->Init (m);
144
169
145
170
std::string str = get<String const >(j_param.at (" base_score" ));
146
171
from_chars (str.c_str (), str.c_str () + str.size (), base_score);
147
- // It can only be estimated during the first training, we consider it estimated afterward
148
- base_score_estimated = 1 ;
149
172
}
150
173
151
174
LearnerModelParamLegacy ByteSwap () const {
@@ -158,22 +181,21 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
158
181
dmlc::ByteSwap (&x.major_version , sizeof (x.major_version ), 1 );
159
182
dmlc::ByteSwap (&x.minor_version , sizeof (x.minor_version ), 1 );
160
183
dmlc::ByteSwap (&x.num_target , sizeof (x.num_target ), 1 );
161
- dmlc::ByteSwap (&x.base_score_estimated , sizeof (x.base_score_estimated ), 1 );
184
+ dmlc::ByteSwap (&x.boost_from_average , sizeof (x.boost_from_average ), 1 );
162
185
dmlc::ByteSwap (x.reserved , sizeof (x.reserved [0 ]), sizeof (x.reserved ) / sizeof (x.reserved [0 ]));
163
186
return x;
164
187
}
165
188
166
189
template <typename Container>
167
190
Args UpdateAllowUnknown (Container const & kwargs) {
168
191
// Detect whether user has made their own base score.
169
- if (std::find_if (kwargs.cbegin (), kwargs.cend (),
170
- [](auto const & kv) { return kv.first == " base_score" ; }) != kwargs.cend ()) {
171
- base_score_estimated = true ;
172
- }
173
- if (std::find_if (kwargs.cbegin (), kwargs.cend (), [](auto const & kv) {
174
- return kv.first == " base_score_estimated" ;
175
- }) != kwargs.cend ()) {
176
- LOG (FATAL) << " `base_score_estimated` cannot be specified as hyper-parameter." ;
192
+ auto find_key = [&kwargs](char const * key) {
193
+ return std::find_if (kwargs.cbegin (), kwargs.cend (),
194
+ [key](auto const & kv) { return kv.first == key; });
195
+ };
196
+ auto it = find_key (" base_score" );
197
+ if (it != kwargs.cend ()) {
198
+ boost_from_average = false ;
177
199
}
178
200
return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown (kwargs);
179
201
}
@@ -195,7 +217,9 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
195
217
.set_default (1 )
196
218
.set_lower_bound (1 )
197
219
.describe (" Number of target for multi-target regression." );
198
- DMLC_DECLARE_FIELD (base_score_estimated).set_default (0 );
220
+ DMLC_DECLARE_FIELD (boost_from_average)
221
+ .set_default (true )
222
+ .describe (" Whether we should calculate the base score from training data." );
199
223
}
200
224
};
201
225
@@ -224,7 +248,7 @@ LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy
224
248
225
249
linalg::TensorView<float const , 1 > LearnerModelParam::BaseScore (int32_t device) const {
226
250
// multi-class is not yet supported.
227
- CHECK_EQ (base_score_.Size (), 1 );
251
+ CHECK_EQ (base_score_.Size (), 1 ) << ModelNotFitted () ;
228
252
if (device == Context::kCpuId ) {
229
253
// Make sure that we won't run into race condition.
230
254
CHECK (base_score_.Data ()->HostCanRead ());
@@ -385,6 +409,21 @@ class LearnerConfiguration : public Learner {
385
409
// Initial prediction.
386
410
std::vector<std::string> metric_names_;
387
411
412
+ void ConfigureModelParamWithoutBaseScore () {
413
+ // Convert mparam to learner_model_param
414
+ this ->ConfigureTargets ();
415
+
416
+ auto task = UsePtr (obj_)->Task ();
417
+ linalg::Tensor<float , 1 > base_score ({1 }, Ctx ()->gpu_id );
418
+ auto h_base_score = base_score.HostView ();
419
+
420
+ // transform to margin
421
+ h_base_score (0 ) = obj_->ProbToMargin (mparam_.base_score );
422
+ // move it to model param, which is shared with all other components.
423
+ learner_model_param_ = LearnerModelParam (Ctx (), mparam_, std::move (base_score), task);
424
+ CHECK (learner_model_param_.Initialized ());
425
+ CHECK_NE (learner_model_param_.BaseScore (Ctx ()).Size (), 0 );
426
+ }
388
427
/* *
389
428
* \brief Calculate the `base_score` based on input data.
390
429
*
@@ -403,38 +442,24 @@ class LearnerConfiguration : public Learner {
403
442
// - model loaded from new binary or JSON.
404
443
// - model is created from scratch.
405
444
// - model is configured second time due to change of parameter
406
- CHECK (obj_);
407
- if (!mparam_.base_score_estimated ) {
445
+ if (!learner_model_param_.Initialized ()) {
446
+ this ->ConfigureModelParamWithoutBaseScore ();
447
+ }
448
+ if (mparam_.boost_from_average && !UsePtr (gbm_)->ModelFitted ()) {
408
449
if (p_fmat) {
450
+ auto const & info = p_fmat->Info ();
451
+ info.Validate (Ctx ()->gpu_id );
409
452
// We estimate it from input data.
410
453
linalg::Tensor<float , 1 > base_score;
411
- obj_->InitEstimation (p_fmat-> Info () , &base_score);
454
+ UsePtr ( obj_) ->InitEstimation (info , &base_score);
412
455
mparam_.base_score = base_score (0 );
413
456
CHECK (!std::isnan (mparam_.base_score ));
414
- } else {
415
- mparam_.base_score = ObjFunction::DefaultBaseScore ();
416
457
}
417
- mparam_.base_score_estimated = true ;
418
458
// Update the shared model parameter
419
- this ->ConfigureModelParam ();
459
+ this ->ConfigureModelParamWithoutBaseScore ();
420
460
}
421
- }
422
-
423
- // Convert mparam to learner_model_param
424
- void ConfigureModelParam () {
425
- this ->ConfigureTargets ();
426
-
427
- CHECK (obj_);
428
- auto task = obj_->Task ();
429
- linalg::Tensor<float , 1 > base_score ({1 }, Ctx ()->gpu_id );
430
- auto h_base_score = base_score.HostView ();
431
-
432
- // transform to margin
433
- h_base_score (0 ) = obj_->ProbToMargin (mparam_.base_score );
434
- // move it to model param, which is shared with all other components.
435
- learner_model_param_ = LearnerModelParam (Ctx (), mparam_, std::move (base_score), task);
436
- CHECK (learner_model_param_.Initialized ());
437
- CHECK_NE (learner_model_param_.BaseScore (Ctx ()).Size (), 0 );
461
+ CHECK (!std::isnan (mparam_.base_score ));
462
+ CHECK (!std::isinf (mparam_.base_score ));
438
463
}
439
464
440
465
public:
@@ -496,7 +521,8 @@ class LearnerConfiguration : public Learner {
496
521
learner_model_param_.task = obj_->Task (); // required by gbm configuration.
497
522
this ->ConfigureGBM (old_tparam, args);
498
523
ctx_.ConfigureGpuId (this ->gbm_ ->UseGPU ());
499
- this ->ConfigureModelParam ();
524
+
525
+ this ->ConfigureModelParamWithoutBaseScore ();
500
526
501
527
this ->ConfigureMetrics (args);
502
528
@@ -510,8 +536,8 @@ class LearnerConfiguration : public Learner {
510
536
}
511
537
512
538
void CheckModelInitialized () const {
513
- CHECK (learner_model_param_.Initialized ()) << " Model not yet initialized. " ;
514
- CHECK_NE (learner_model_param_.BaseScore (this ->Ctx ()).Size (), 0 );
539
+ CHECK (learner_model_param_.Initialized ()) << ModelNotFitted () ;
540
+ CHECK_NE (learner_model_param_.BaseScore (this ->Ctx ()).Size (), 0 ) << ModelNotFitted () ;
515
541
}
516
542
517
543
virtual PredictionContainer* GetPredictionCache () const {
@@ -1318,8 +1344,6 @@ class LearnerImpl : public LearnerIO {
1318
1344
HostDeviceVector<GradientPair>* in_gpair) override {
1319
1345
monitor_.Start (" BoostOneIter" );
1320
1346
this ->Configure ();
1321
- // Should have been set to default in the first prediction.
1322
- CHECK (mparam_.base_score_estimated );
1323
1347
1324
1348
if (ctx_.seed_per_iteration ) {
1325
1349
common::GlobalRandom ().seed (ctx_.seed * kRandSeedMagic + iter);
@@ -1380,7 +1404,9 @@ class LearnerImpl : public LearnerIO {
1380
1404
static_cast <int >(pred_interactions) +
1381
1405
static_cast <int >(pred_contribs);
1382
1406
this ->Configure ();
1383
- this ->InitBaseScore (nullptr );
1407
+ if (training) {
1408
+ this ->InitBaseScore (nullptr );
1409
+ }
1384
1410
this ->CheckModelInitialized ();
1385
1411
1386
1412
CHECK_LE (multiple_predictions, 1 ) << " Perform one kind of prediction at a time." ;
@@ -1425,7 +1451,6 @@ class LearnerImpl : public LearnerIO {
1425
1451
HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin,
1426
1452
uint32_t iteration_end) override {
1427
1453
this ->Configure ();
1428
- this ->InitBaseScore (nullptr );
1429
1454
this ->CheckModelInitialized ();
1430
1455
1431
1456
auto & out_predictions = this ->GetThreadLocal ().prediction_entry ;
0 commit comments