17
17
18
18
#include " categorical.h"
19
19
#include " column_matrix.h"
20
+ #include " ../tree/hist/expand_entry.h"
20
21
#include " xgboost/generic_parameters.h"
21
22
#include " xgboost/tree_model.h"
22
23
@@ -107,34 +108,42 @@ class PartitionBuilder {
107
108
}
108
109
109
110
template <typename BinIdxType, bool any_missing, bool any_cat>
110
- void Partition (const size_t node_in_set, const size_t nid, const common::Range1d range,
111
+ void Partition (const size_t node_in_set, std::vector<xgboost::tree::CPUExpandEntry> const &nodes,
112
+ const common::Range1d range,
111
113
const bst_bin_t split_cond, GHistIndexMatrix const & gmat,
112
- const ColumnMatrix& column_matrix, const RegTree& tree, const size_t * rid) {
114
+ const common::ColumnMatrix& column_matrix,
115
+ const RegTree& tree, const size_t * rid) {
113
116
common::Span<const size_t > rid_span (rid + range.begin (), rid + range.end ());
114
117
common::Span<size_t > left = GetLeftBuffer (node_in_set, range.begin (), range.end ());
115
118
common::Span<size_t > right = GetRightBuffer (node_in_set, range.begin (), range.end ());
116
- const bst_uint fid = tree[nid].SplitIndex ();
117
- const bool default_left = tree[nid].DefaultLeft ();
119
+ std::size_t nid = nodes[node_in_set].nid ;
120
+ bst_feature_t fid = tree[nid].SplitIndex ();
121
+ bool default_left = tree[nid].DefaultLeft ();
118
122
bool is_cat = tree.GetSplitTypes ()[nid] == FeatureType::kCategorical ;
119
123
auto node_cats = tree.NodeCats (nid);
120
124
121
125
auto const & index = gmat.index ;
122
126
auto const & cut_values = gmat.cut .Values ();
123
127
auto const & cut_ptrs = gmat.cut .Ptrs ();
124
128
125
- auto pred = [&](auto ridx, auto bin_id) {
129
+ auto gidx_calc = [&](auto ridx) {
130
+ auto begin = gmat.RowIdx (ridx);
131
+ if (gmat.IsDense ()) {
132
+ return static_cast <bst_bin_t >(index[begin + fid]);
133
+ }
134
+ auto end = gmat.RowIdx (ridx + 1 );
135
+ auto f_begin = cut_ptrs[fid];
136
+ auto f_end = cut_ptrs[fid + 1 ];
137
+ // bypassing the column matrix as we need the cut value instead of bin idx for categorical
138
+ // features.
139
+ return BinarySearchBin (begin, end, index, f_begin, f_end);
140
+ };
141
+
142
+ auto pred_hist = [&](auto ridx, auto bin_id) {
126
143
if (any_cat && is_cat) {
127
- auto begin = gmat.RowIdx (ridx);
128
- auto end = gmat.RowIdx (ridx + 1 );
129
- auto f_begin = cut_ptrs[fid];
130
- auto f_end = cut_ptrs[fid + 1 ];
131
- // bypassing the column matrix as we need the cut value instead of bin idx for categorical
132
- // features.
133
- auto gidx = BinarySearchBin (begin, end, index, f_begin, f_end);
134
- bool go_left;
135
- if (gidx == -1 ) {
136
- go_left = default_left;
137
- } else {
144
+ auto gidx = gidx_calc (ridx);
145
+ bool go_left = default_left;
146
+ if (gidx > -1 ) {
138
147
go_left = Decision (node_cats, cut_values[gidx], default_left);
139
148
}
140
149
return go_left;
@@ -143,25 +152,43 @@ class PartitionBuilder {
143
152
}
144
153
};
145
154
146
- std::pair< size_t , size_t > child_nodes_sizes;
147
- if (column_matrix. GetColumnType (fid) == xgboost::common:: kDenseColumn ) {
148
- auto column = column_matrix. DenseColumn <BinIdxType, any_missing>(fid) ;
149
- if (default_left ) {
150
- child_nodes_sizes = PartitionKernel< true , any_missing>(&column, rid_span, left, right,
151
- gmat. base_rowid , pred );
152
- } else {
153
- child_nodes_sizes = PartitionKernel< false , any_missing>(&column, rid_span, left, right,
154
- gmat. base_rowid , pred);
155
+ auto pred_approx = [&]( auto ridx) {
156
+ auto gidx = gidx_calc (ridx);
157
+ bool go_left = default_left ;
158
+ if (gidx > - 1 ) {
159
+ if (is_cat) {
160
+ go_left = Decision (node_cats, cut_values[gidx], default_left );
161
+ } else {
162
+ go_left = cut_values[gidx] <= nodes[node_in_set]. split . split_value ;
163
+ }
155
164
}
165
+ return go_left;
166
+ };
167
+
168
+ std::pair<size_t , size_t > child_nodes_sizes;
169
+ if (!column_matrix.IsInitialized ()) {
170
+ child_nodes_sizes = PartitionRangeKernel (rid_span, left, right, pred_approx);
156
171
} else {
157
- CHECK_EQ (any_missing, true );
158
- auto column = column_matrix.SparseColumn <BinIdxType>(fid, rid_span.front () - gmat.base_rowid );
159
- if (default_left) {
160
- child_nodes_sizes = PartitionKernel<true , any_missing>(&column, rid_span, left, right,
161
- gmat.base_rowid , pred);
172
+ if (column_matrix.GetColumnType (fid) == xgboost::common::kDenseColumn ) {
173
+ auto column = column_matrix.DenseColumn <BinIdxType, any_missing>(fid);
174
+ if (default_left) {
175
+ child_nodes_sizes = PartitionKernel<true , any_missing>(&column, rid_span, left, right,
176
+ gmat.base_rowid , pred_hist);
177
+ } else {
178
+ child_nodes_sizes = PartitionKernel<false , any_missing>(&column, rid_span, left, right,
179
+ gmat.base_rowid , pred_hist);
180
+ }
162
181
} else {
163
- child_nodes_sizes = PartitionKernel<false , any_missing>(&column, rid_span, left, right,
164
- gmat.base_rowid , pred);
182
+ CHECK_EQ (any_missing, true );
183
+ auto column =
184
+ column_matrix.SparseColumn <BinIdxType>(fid, rid_span.front () - gmat.base_rowid );
185
+ if (default_left) {
186
+ child_nodes_sizes = PartitionKernel<true , any_missing>(&column, rid_span, left, right,
187
+ gmat.base_rowid , pred_hist);
188
+ } else {
189
+ child_nodes_sizes = PartitionKernel<false , any_missing>(&column, rid_span, left, right,
190
+ gmat.base_rowid , pred_hist);
191
+ }
165
192
}
166
193
}
167
194
@@ -172,37 +199,6 @@ class PartitionBuilder {
172
199
SetNRightElems (node_in_set, range.begin (), n_right);
173
200
}
174
201
175
- /* *
176
- * \brief Partition tree nodes with specific range of row indices.
177
- *
178
- * \tparam Pred Predicate for whether a row should be partitioned to the left node.
179
- *
180
- * \param node_in_set The index of node in current batch of nodes.
181
- * \param nid The canonical node index (node index in the tree).
182
- * \param range The range of input row index.
183
- * \param fidx Feature index.
184
- * \param p_row_set_collection Pointer to rows that are being partitioned.
185
- * \param pred A callback function that returns whether current row should be
186
- * partitioned to the left node, it should accept the row index as
187
- * input and returns a boolean value.
188
- */
189
- template <typename Pred>
190
- void PartitionRange (const size_t node_in_set, const size_t nid, common::Range1d range,
191
- common::RowSetCollection* p_row_set_collection, Pred pred) {
192
- auto & row_set_collection = *p_row_set_collection;
193
- const size_t * p_ridx = row_set_collection[nid].begin ;
194
- common::Span<const size_t > ridx (p_ridx + range.begin (), p_ridx + range.end ());
195
- common::Span<size_t > left = this ->GetLeftBuffer (node_in_set, range.begin (), range.end ());
196
- common::Span<size_t > right = this ->GetRightBuffer (node_in_set, range.begin (), range.end ());
197
- std::pair<size_t , size_t > child_nodes_sizes = PartitionRangeKernel (ridx, left, right, pred);
198
-
199
- const size_t n_left = child_nodes_sizes.first ;
200
- const size_t n_right = child_nodes_sizes.second ;
201
-
202
- this ->SetNLeftElems (node_in_set, range.begin (), n_left);
203
- this ->SetNRightElems (node_in_set, range.begin (), n_right);
204
- }
205
-
206
202
// allocate thread local memory, should be called for each specific task
207
203
void AllocateForTask (size_t id) {
208
204
if (mem_blocks_[id].get () == nullptr ) {
0 commit comments