7
7
8
8
#include < algorithm>
9
9
#include < cmath>
10
- #include < limits >
11
- #include < memory>
12
- #include < utility>
10
+ #include < cstddef > // for size_t
11
+ #include < memory> // for unique_ptr, make_unique
12
+ #include < utility> // for move
13
13
#include < vector>
14
14
15
15
#include " ../collective/communicator-inl.cuh"
@@ -216,9 +216,9 @@ struct GPUHistMakerDevice {
216
216
void InitFeatureGroupsOnce () {
217
217
if (!feature_groups) {
218
218
CHECK (page);
219
- feature_groups. reset ( new FeatureGroups (page->Cuts (), page->is_dense ,
220
- dh::MaxSharedMemoryOptin (ctx_->gpu_id ),
221
- sizeof (GradientPairPrecise) ));
219
+ feature_groups = std::make_unique< FeatureGroups> (page->Cuts (), page->is_dense ,
220
+ dh::MaxSharedMemoryOptin (ctx_->gpu_id ),
221
+ sizeof (GradientPairPrecise));
222
222
}
223
223
}
224
224
@@ -244,10 +244,10 @@ struct GPUHistMakerDevice {
244
244
245
245
this ->evaluator_ .Reset (page->Cuts (), feature_types, dmat->Info ().num_col_ , param, ctx_->gpu_id );
246
246
247
- quantiser. reset ( new GradientQuantiser (this ->gpair ) );
247
+ quantiser = std::make_unique< GradientQuantiser> (this ->gpair );
248
248
249
249
row_partitioner.reset (); // Release the device memory first before reallocating
250
- row_partitioner. reset ( new RowPartitioner (ctx_->gpu_id , sample.sample_rows ) );
250
+ row_partitioner = std::make_unique< RowPartitioner> (ctx_->gpu_id , sample.sample_rows );
251
251
252
252
// Init histogram
253
253
hist.Init (ctx_->gpu_id , page->Cuts ().TotalBins ());
@@ -294,7 +294,7 @@ struct GPUHistMakerDevice {
294
294
dh::TemporaryArray<GPUExpandEntry> entries (2 * candidates.size ());
295
295
// Store the feature set ptrs so they dont go out of scope before the kernel is called
296
296
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t >>> feature_sets;
297
- for (size_t i = 0 ; i < candidates.size (); i++) {
297
+ for (std:: size_t i = 0 ; i < candidates.size (); i++) {
298
298
auto candidate = candidates.at (i);
299
299
int left_nidx = tree[candidate.nid ].LeftChild ();
300
300
int right_nidx = tree[candidate.nid ].RightChild ();
@@ -327,14 +327,13 @@ struct GPUHistMakerDevice {
327
327
d_node_inputs.data ().get (), h_node_inputs.data (),
328
328
h_node_inputs.size () * sizeof (EvaluateSplitInputs), cudaMemcpyDefault));
329
329
330
- this ->evaluator_ .EvaluateSplits (nidx, max_active_features,
331
- dh::ToSpan (d_node_inputs), shared_inputs,
332
- dh::ToSpan (entries));
330
+ this ->evaluator_ .EvaluateSplits (nidx, max_active_features, dh::ToSpan (d_node_inputs),
331
+ shared_inputs, dh::ToSpan (entries));
333
332
dh::safe_cuda (cudaMemcpyAsync (pinned_candidates_out.data (),
334
333
entries.data ().get (), sizeof (GPUExpandEntry) * entries.size (),
335
334
cudaMemcpyDeviceToHost));
336
335
dh::DefaultStream ().Sync ();
337
- }
336
+ }
338
337
339
338
void BuildHist (int nidx) {
340
339
auto d_node_hist = hist.GetNodeHistogram (nidx);
@@ -366,31 +365,37 @@ struct GPUHistMakerDevice {
366
365
struct NodeSplitData {
367
366
RegTree::Node split_node;
368
367
FeatureType split_type;
369
- common::CatBitField node_cats;
368
+ common::KCatBitField node_cats;
370
369
};
371
370
372
- void UpdatePosition (const std::vector<GPUExpandEntry>& candidates, RegTree* p_tree) {
373
- if (candidates.empty ()) return ;
374
- std::vector<int > nidx (candidates.size ());
375
- std::vector<int > left_nidx (candidates.size ());
376
- std::vector<int > right_nidx (candidates.size ());
371
+ void UpdatePosition (std::vector<GPUExpandEntry> const & candidates, RegTree* p_tree) {
372
+ if (candidates.empty ()) {
373
+ return ;
374
+ }
375
+
376
+ std::vector<bst_node_t > nidx (candidates.size ());
377
+ std::vector<bst_node_t > left_nidx (candidates.size ());
378
+ std::vector<bst_node_t > right_nidx (candidates.size ());
377
379
std::vector<NodeSplitData> split_data (candidates.size ());
380
+
378
381
for (size_t i = 0 ; i < candidates.size (); i++) {
379
- auto & e = candidates[i];
382
+ auto const & e = candidates[i];
380
383
RegTree::Node split_node = (*p_tree)[e.nid ];
381
384
auto split_type = p_tree->NodeSplitType (e.nid );
382
385
nidx.at (i) = e.nid ;
383
386
left_nidx.at (i) = split_node.LeftChild ();
384
387
right_nidx.at (i) = split_node.RightChild ();
385
- split_data.at (i) = NodeSplitData{split_node, split_type, e.split .split_cats };
388
+ split_data.at (i) = NodeSplitData{split_node, split_type, evaluator_.GetDeviceNodeCats (e.nid )};
389
+
390
+ CHECK_EQ (split_type == FeatureType::kCategorical , e.split .is_cat );
386
391
}
387
392
388
393
auto d_matrix = page->GetDeviceAccessor (ctx_->gpu_id );
389
394
row_partitioner->UpdatePositionBatch (
390
395
nidx, left_nidx, right_nidx, split_data,
391
396
[=] __device__ (bst_uint ridx, const NodeSplitData& data) {
392
397
// given a row index, returns the node id it belongs to
393
- bst_float cut_value = d_matrix.GetFvalue (ridx, data.split_node .SplitIndex ());
398
+ float cut_value = d_matrix.GetFvalue (ridx, data.split_node .SplitIndex ());
394
399
// Missing value
395
400
bool go_left = true ;
396
401
if (isnan (cut_value)) {
@@ -620,7 +625,6 @@ struct GPUHistMakerDevice {
620
625
CHECK (common::CheckNAN (candidate.split .fvalue ));
621
626
std::vector<common::CatBitField::value_type> split_cats;
622
627
623
- CHECK_GT (candidate.split .split_cats .Bits ().size (), 0 );
624
628
auto h_cats = this ->evaluator_ .GetHostNodeCats (candidate.nid );
625
629
auto n_bins_feature = page->Cuts ().FeatureBins (candidate.split .findex );
626
630
split_cats.resize (common::CatBitField::ComputeStorageSize (n_bins_feature), 0 );
0 commit comments