Skip to content

Commit f588252

Browse files
authored
[sycl] add loss guided hist building (dmlc#10251)
Co-authored-by: Dmitry Razdoburdin <>
1 parent 9b46505 commit f588252

File tree

7 files changed

+459
-30
lines changed

7 files changed

+459
-30
lines changed

plugin/sycl/common/hist_util.h

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,93 @@ ::sycl::event SubtractionHist(::sycl::queue qu,
4646
const GHistRow<GradientSumT, MemoryType::on_device>& src2,
4747
size_t size, ::sycl::event event_priv);
4848

49+
/*!
50+
* \brief Histograms of gradient statistics for multiple nodes
51+
*/
52+
template<typename GradientSumT, MemoryType memory_type = MemoryType::shared>
53+
class HistCollection {
54+
public:
55+
using GHistRowT = GHistRow<GradientSumT, memory_type>;
56+
57+
// Access histogram for i-th node
58+
GHistRowT& operator[](bst_uint nid) {
59+
return *(data_.at(nid));
60+
}
61+
62+
const GHistRowT& operator[](bst_uint nid) const {
63+
return *(data_.at(nid));
64+
}
65+
66+
// Initialize histogram collection
67+
void Init(::sycl::queue qu, uint32_t nbins) {
68+
qu_ = qu;
69+
if (nbins_ != nbins) {
70+
nbins_ = nbins;
71+
data_.clear();
72+
}
73+
}
74+
75+
// Create an empty histogram for i-th node
76+
::sycl::event AddHistRow(bst_uint nid) {
77+
::sycl::event event;
78+
if (data_.count(nid) == 0) {
79+
data_[nid] =
80+
std::make_shared<GHistRowT>(&qu_, nbins_,
81+
xgboost::detail::GradientPairInternal<GradientSumT>(0, 0),
82+
&event);
83+
} else {
84+
data_[nid]->Resize(&qu_, nbins_,
85+
xgboost::detail::GradientPairInternal<GradientSumT>(0, 0),
86+
&event);
87+
}
88+
return event;
89+
}
90+
91+
private:
92+
/*! \brief Number of all bins over all features */
93+
uint32_t nbins_ = 0;
94+
95+
std::unordered_map<uint32_t, std::shared_ptr<GHistRowT>> data_;
96+
97+
::sycl::queue qu_;
98+
};
99+
100+
/*!
101+
* \brief Stores temporary histograms to compute them in parallel
102+
*/
103+
template<typename GradientSumT>
104+
class ParallelGHistBuilder {
105+
public:
106+
using GHistRowT = GHistRow<GradientSumT, MemoryType::on_device>;
107+
108+
void Init(::sycl::queue qu, size_t nbins) {
109+
qu_ = qu;
110+
if (nbins != nbins_) {
111+
hist_buffer_.Init(qu_, nbins);
112+
nbins_ = nbins;
113+
}
114+
}
115+
116+
void Reset(size_t nblocks) {
117+
hist_device_buffer_.Resize(&qu_, nblocks * nbins_ * 2);
118+
}
119+
120+
GHistRowT& GetDeviceBuffer() {
121+
return hist_device_buffer_;
122+
}
123+
124+
protected:
125+
/*! \brief Number of bins in each histogram */
126+
size_t nbins_ = 0;
127+
/*! \brief Buffers for histograms for all nodes processed */
128+
HistCollection<GradientSumT> hist_buffer_;
129+
130+
/*! \brief Buffer for additional histograms for Parallel processing */
131+
GHistRowT hist_device_buffer_;
132+
133+
::sycl::queue qu_;
134+
};
135+
49136
/*!
50137
* \brief Builder for histograms of gradient statistics
51138
*/

plugin/sycl/data.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ class USMVector {
8080
qu->fill(data_.get(), v, size_).wait();
8181
}
8282

83+
USMVector(::sycl::queue* qu, size_t size, T v,
84+
::sycl::event* event) : size_(size), capacity_(size) {
85+
data_ = allocate_memory_(qu, size_);
86+
*event = qu->fill(data_.get(), v, size_, *event);
87+
}
88+
8389
USMVector(::sycl::queue* qu, const std::vector<T> &vec) {
8490
size_ = vec.size();
8591
capacity_ = size_;

plugin/sycl/tree/hist_row_adder.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*!
2+
* Copyright 2017-2024 by Contributors
3+
* \file hist_row_adder.h
4+
*/
5+
#ifndef PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_
6+
#define PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_
7+
8+
#include <vector>
9+
#include <algorithm>
10+
11+
namespace xgboost {
12+
namespace sycl {
13+
namespace tree {
14+
15+
template <typename GradientSumT>
16+
class HistRowsAdder {
17+
public:
18+
virtual void AddHistRows(HistUpdater<GradientSumT>* builder,
19+
std::vector<int>* sync_ids, RegTree *p_tree) = 0;
20+
virtual ~HistRowsAdder() = default;
21+
};
22+
23+
template <typename GradientSumT>
24+
class BatchHistRowsAdder: public HistRowsAdder<GradientSumT> {
25+
public:
26+
void AddHistRows(HistUpdater<GradientSumT>* builder,
27+
std::vector<int>* sync_ids, RegTree *p_tree) override {
28+
builder->builder_monitor_.Start("AddHistRows");
29+
30+
for (auto const& entry : builder->nodes_for_explicit_hist_build_) {
31+
int nid = entry.nid;
32+
auto event = builder->hist_.AddHistRow(nid);
33+
}
34+
for (auto const& node : builder->nodes_for_subtraction_trick_) {
35+
auto event = builder->hist_.AddHistRow(node.nid);
36+
}
37+
38+
builder->builder_monitor_.Stop("AddHistRows");
39+
}
40+
};
41+
42+
} // namespace tree
43+
} // namespace sycl
44+
} // namespace xgboost
45+
46+
#endif // PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_

plugin/sycl/tree/hist_synchronizer.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*!
2+
* Copyright 2017-2024 by Contributors
3+
* \file hist_synchronizer.h
4+
*/
5+
#ifndef PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_
6+
#define PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_
7+
8+
#include <vector>
9+
10+
#include "../common/hist_util.h"
11+
#include "expand_entry.h"
12+
13+
namespace xgboost {
14+
namespace sycl {
15+
namespace tree {
16+
17+
template <typename GradientSumT>
18+
class HistUpdater;
19+
20+
template <typename GradientSumT>
21+
class HistSynchronizer {
22+
public:
23+
virtual void SyncHistograms(HistUpdater<GradientSumT>* builder,
24+
const std::vector<int>& sync_ids,
25+
RegTree *p_tree) = 0;
26+
virtual ~HistSynchronizer() = default;
27+
};
28+
29+
template <typename GradientSumT>
30+
class BatchHistSynchronizer: public HistSynchronizer<GradientSumT> {
31+
public:
32+
void SyncHistograms(HistUpdater<GradientSumT>* builder,
33+
const std::vector<int>& sync_ids,
34+
RegTree *p_tree) override {
35+
builder->builder_monitor_.Start("SyncHistograms");
36+
const size_t nbins = builder->hist_builder_.GetNumBins();
37+
38+
hist_sync_events_.resize(builder->nodes_for_explicit_hist_build_.size());
39+
for (int i = 0; i < builder->nodes_for_explicit_hist_build_.size(); i++) {
40+
const auto entry = builder->nodes_for_explicit_hist_build_[i];
41+
auto& this_hist = builder->hist_[entry.nid];
42+
43+
if (!(*p_tree)[entry.nid].IsRoot()) {
44+
const size_t parent_id = (*p_tree)[entry.nid].Parent();
45+
auto& parent_hist = builder->hist_[parent_id];
46+
auto& sibling_hist = builder->hist_[entry.GetSiblingId(p_tree, parent_id)];
47+
hist_sync_events_[i] = common::SubtractionHist(builder->qu_, &sibling_hist, parent_hist,
48+
this_hist, nbins, ::sycl::event());
49+
}
50+
}
51+
builder->qu_.wait_and_throw();
52+
53+
builder->builder_monitor_.Stop("SyncHistograms");
54+
}
55+
56+
std::vector<::sycl::event> GetEvents() const {
57+
return hist_sync_events_;
58+
}
59+
60+
private:
61+
std::vector<::sycl::event> hist_sync_events_;
62+
};
63+
64+
} // namespace tree
65+
} // namespace sycl
66+
} // namespace xgboost
67+
68+
#endif // PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_

plugin/sycl/tree/hist_updater.cc

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,69 @@
77

88
#include <oneapi/dpl/random>
99

10+
#include "../common/hist_util.h"
11+
1012
namespace xgboost {
1113
namespace sycl {
1214
namespace tree {
1315

16+
template <typename GradientSumT>
17+
void HistUpdater<GradientSumT>::SetHistSynchronizer(
18+
HistSynchronizer<GradientSumT> *sync) {
19+
hist_synchronizer_.reset(sync);
20+
}
21+
22+
template <typename GradientSumT>
23+
void HistUpdater<GradientSumT>::SetHistRowsAdder(
24+
HistRowsAdder<GradientSumT> *adder) {
25+
hist_rows_adder_.reset(adder);
26+
}
27+
28+
template <typename GradientSumT>
29+
void HistUpdater<GradientSumT>::BuildHistogramsLossGuide(
30+
ExpandEntry entry,
31+
const common::GHistIndexMatrix &gmat,
32+
RegTree *p_tree,
33+
const USMVector<GradientPair, MemoryType::on_device> &gpair_device) {
34+
nodes_for_explicit_hist_build_.clear();
35+
nodes_for_subtraction_trick_.clear();
36+
nodes_for_explicit_hist_build_.push_back(entry);
37+
38+
if (!(*p_tree)[entry.nid].IsRoot()) {
39+
auto sibling_id = entry.GetSiblingId(p_tree);
40+
nodes_for_subtraction_trick_.emplace_back(sibling_id, p_tree->GetDepth(sibling_id));
41+
}
42+
43+
std::vector<int> sync_ids;
44+
hist_rows_adder_->AddHistRows(this, &sync_ids, p_tree);
45+
qu_.wait_and_throw();
46+
BuildLocalHistograms(gmat, p_tree, gpair_device);
47+
hist_synchronizer_->SyncHistograms(this, sync_ids, p_tree);
48+
}
49+
50+
template<typename GradientSumT>
51+
void HistUpdater<GradientSumT>::BuildLocalHistograms(
52+
const common::GHistIndexMatrix &gmat,
53+
RegTree *p_tree,
54+
const USMVector<GradientPair, MemoryType::on_device> &gpair_device) {
55+
builder_monitor_.Start("BuildLocalHistograms");
56+
const size_t n_nodes = nodes_for_explicit_hist_build_.size();
57+
::sycl::event event;
58+
59+
for (size_t i = 0; i < n_nodes; i++) {
60+
const int32_t nid = nodes_for_explicit_hist_build_[i].nid;
61+
62+
if (row_set_collection_[nid].Size() > 0) {
63+
event = BuildHist(gpair_device, row_set_collection_[nid], gmat, &(hist_[nid]),
64+
&(hist_buffer_.GetDeviceBuffer()), event);
65+
} else {
66+
common::InitHist(qu_, &(hist_[nid]), hist_[nid].Size(), &event);
67+
}
68+
}
69+
qu_.wait_and_throw();
70+
builder_monitor_.Stop("BuildLocalHistograms");
71+
}
72+
1473
template<typename GradientSumT>
1574
void HistUpdater<GradientSumT>::InitSampling(
1675
const USMVector<GradientPair, MemoryType::on_device> &gpair,
@@ -70,6 +129,21 @@ void HistUpdater<GradientSumT>::InitData(
70129
// initialize the row set
71130
{
72131
row_set_collection_.Clear();
132+
133+
// initialize histogram collection
134+
uint32_t nbins = gmat.cut.Ptrs().back();
135+
hist_.Init(qu_, nbins);
136+
137+
hist_buffer_.Init(qu_, nbins);
138+
size_t buffer_size = kBufferSize;
139+
if (buffer_size > info.num_row_ / kMinBlockSize + 1) {
140+
buffer_size = info.num_row_ / kMinBlockSize + 1;
141+
}
142+
hist_buffer_.Reset(buffer_size);
143+
144+
// initialize histogram builder
145+
hist_builder_ = common::GHistBuilder<GradientSumT>(qu_, nbins);
146+
73147
USMVector<size_t, MemoryType::on_device>* row_indices = &(row_set_collection_.Data());
74148
row_indices->Resize(&qu_, info.num_row_);
75149
size_t* p_row_indices = row_indices->Data();
@@ -122,6 +196,25 @@ void HistUpdater<GradientSumT>::InitData(
122196
}
123197
}
124198
row_set_collection_.Init();
199+
200+
{
201+
/* determine layout of data */
202+
const size_t nrow = info.num_row_;
203+
const size_t ncol = info.num_col_;
204+
const size_t nnz = info.num_nonzero_;
205+
// number of discrete bins for feature 0
206+
const uint32_t nbins_f0 = gmat.cut.Ptrs()[1] - gmat.cut.Ptrs()[0];
207+
if (nrow * ncol == nnz) {
208+
// dense data with zero-based indexing
209+
data_layout_ = kDenseDataZeroBased;
210+
} else if (nbins_f0 == 0 && nrow * (ncol - 1) == nnz) {
211+
// dense data with one-based indexing
212+
data_layout_ = kDenseDataOneBased;
213+
} else {
214+
// sparse data
215+
data_layout_ = kSparseData;
216+
}
217+
}
125218
}
126219

127220
template class HistUpdater<float>;

0 commit comments

Comments
 (0)