Skip to content

Commit 4fe67f1

Browse files
authored
[EM] Have one partitioner for each batch. (dmlc#10760)
- Initialize one partitioner for each batch. - Collect partition size during initialization. - Support base ridx in the finalization.
1 parent 3043827 commit 4fe67f1

File tree

10 files changed

+211
-181
lines changed

10 files changed

+211
-181
lines changed

src/common/device_helpers.cuh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -387,11 +387,6 @@ void CopyTo(Src const &src, Dst *dst) {
387387
src.size() * sizeof(SVT), cudaMemcpyDefault));
388388
}
389389

390-
template <class HContainer, class DContainer>
391-
void CopyToD(HContainer const &h, DContainer *d) {
392-
CopyTo(h, d);
393-
}
394-
395390
// Keep track of pinned memory allocation
396391
struct PinnedMemory {
397392
void *temp_storage{nullptr};

src/common/threading_utils.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,15 +124,15 @@ void NameThread(std::thread* t, StringView name) {
124124
char old[16];
125125
auto ret = pthread_getname_np(handle, old, 16);
126126
if (ret != 0) {
127-
LOG(WARNING) << "Failed to get the name from thread";
127+
LOG(DEBUG) << "Failed to get the name from thread";
128128
}
129129
auto new_name = std::string{old} + ">" + name.c_str(); // NOLINT
130130
if (new_name.size() > 15) {
131131
new_name = new_name.substr(new_name.size() - 15);
132132
}
133133
ret = pthread_setname_np(handle, new_name.c_str());
134134
if (ret != 0) {
135-
LOG(WARNING) << "Failed to name thread:" << ret << " :" << new_name;
135+
LOG(DEBUG) << "Failed to name thread:" << ret << " :" << new_name;
136136
}
137137
#else
138138
(void)name;

src/tree/gpu_hist/gradient_based_sampler.cu

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ NoSampling::NoSampling(BatchParam batch_param) : batch_param_(std::move(batch_pa
152152

153153
GradientBasedSample NoSampling::Sample(Context const*, common::Span<GradientPair> gpair,
154154
DMatrix* dmat) {
155-
return {dmat->Info().num_row_, dmat, gpair};
155+
return {dmat, gpair};
156156
}
157157

158158
ExternalMemoryNoSampling::ExternalMemoryNoSampling(BatchParam batch_param)
@@ -179,7 +179,7 @@ GradientBasedSample ExternalMemoryNoSampling::Sample(Context const* ctx,
179179
this->p_fmat_new_ =
180180
std::make_unique<data::IterativeDMatrix>(new_page, p_fmat->Info(), batch_param_);
181181
}
182-
return {p_fmat->Info().num_row_, this->p_fmat_new_.get(), gpair};
182+
return {this->p_fmat_new_.get(), gpair};
183183
}
184184

185185
UniformSampling::UniformSampling(BatchParam batch_param, float subsample)
@@ -192,7 +192,7 @@ GradientBasedSample UniformSampling::Sample(Context const* ctx, common::Span<Gra
192192
thrust::replace_if(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair),
193193
thrust::counting_iterator<std::size_t>(0),
194194
BernoulliTrial(common::GlobalRandom()(), subsample_), GradientPair());
195-
return {p_fmat->Info().num_row_, p_fmat, gpair};
195+
return {p_fmat, gpair};
196196
}
197197

198198
ExternalMemoryUniformSampling::ExternalMemoryUniformSampling(size_t n_rows,
@@ -252,7 +252,8 @@ GradientBasedSample ExternalMemoryUniformSampling::Sample(Context const* ctx,
252252
// Create the new DMatrix
253253
this->p_fmat_new_ = std::make_unique<data::IterativeDMatrix>(
254254
new_page, dmat->Info().Slice(ctx, dh::ToSpan(compact_row_index_), nnz), batch_param_);
255-
return {sample_rows, this->p_fmat_new_.get(), dh::ToSpan(gpair_)};
255+
CHECK_EQ(sample_rows, this->p_fmat_new_->Info().num_row_);
256+
return {this->p_fmat_new_.get(), dh::ToSpan(gpair_)};
256257
}
257258

258259
GradientBasedSampling::GradientBasedSampling(std::size_t n_rows, BatchParam batch_param,
@@ -274,7 +275,7 @@ GradientBasedSample GradientBasedSampling::Sample(Context const* ctx,
274275
thrust::counting_iterator<size_t>(0), dh::tbegin(gpair),
275276
PoissonSampling(dh::ToSpan(threshold_), threshold_index,
276277
RandomWeight(common::GlobalRandom()())));
277-
return {n_rows, dmat, gpair};
278+
return {dmat, gpair};
278279
}
279280

280281
ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling(size_t n_rows,
@@ -334,7 +335,8 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c
334335
// Create the new DMatrix
335336
this->p_fmat_new_ = std::make_unique<data::IterativeDMatrix>(
336337
new_page, dmat->Info().Slice(ctx, dh::ToSpan(compact_row_index_), nnz), batch_param_);
337-
return {sample_rows, this->p_fmat_new_.get(), dh::ToSpan(gpair_)};
338+
CHECK_EQ(sample_rows, this->p_fmat_new_->Info().num_row_);
339+
return {this->p_fmat_new_.get(), dh::ToSpan(gpair_)};
338340
}
339341

340342
GradientBasedSampler::GradientBasedSampler(Context const* /*ctx*/, size_t n_rows,

src/tree/gpu_hist/gradient_based_sampler.cuh

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,9 @@
1212

1313
namespace xgboost::tree {
1414
struct GradientBasedSample {
15-
/*!\brief Number of sampled rows. */
16-
bst_idx_t sample_rows;
17-
/*!\brief Sampled rows in ELLPACK format. */
15+
/** @brief Sampled rows in ELLPACK format. */
1816
DMatrix* p_fmat;
19-
/*!\brief Gradient pairs for the sampled rows. */
17+
/** @brief Gradient pairs for the sampled rows. */
2018
common::Span<GradientPair const> gpair;
2119
};
2220

src/tree/gpu_hist/row_partitioner.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ common::Span<const RowPartitioner::RowIndexT> RowPartitioner::GetRows(bst_node_t
3131
return dh::ToSpan(ridx_).subspan(segment.begin, segment.Size());
3232
}
3333

34-
common::Span<const RowPartitioner::RowIndexT> RowPartitioner::GetRows() {
34+
common::Span<const RowPartitioner::RowIndexT> RowPartitioner::GetRows() const {
3535
return dh::ToSpan(ridx_);
3636
}
3737

src/tree/gpu_hist/row_partitioner.cuh

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -200,11 +200,11 @@ XGBOOST_DEV_INLINE int GetPositionFromSegments(std::size_t idx,
200200

201201
template <int kBlockSize, typename RowIndexT, typename OpT>
202202
__global__ __launch_bounds__(kBlockSize) void FinalisePositionKernel(
203-
const common::Span<const NodePositionInfo> d_node_info,
203+
const common::Span<const NodePositionInfo> d_node_info, bst_idx_t base_ridx,
204204
const common::Span<const RowIndexT> d_ridx, common::Span<bst_node_t> d_out_position, OpT op) {
205205
for (auto idx : dh::GridStrideRange<std::size_t>(0, d_ridx.size())) {
206206
auto position = GetPositionFromSegments(idx, d_node_info.data());
207-
RowIndexT ridx = d_ridx[idx];
207+
RowIndexT ridx = d_ridx[idx] - base_ridx;
208208
bst_node_t new_position = op(ridx, position);
209209
d_out_position[ridx] = new_position;
210210
}
@@ -264,7 +264,12 @@ class RowPartitioner {
264264
/**
265265
* \brief Gets all training rows in the set.
266266
*/
267-
common::Span<const RowIndexT> GetRows();
267+
common::Span<const RowIndexT> GetRows() const;
268+
/**
269+
* @brief Get the number of rows in this partitioner.
270+
*/
271+
std::size_t Size() const { return this->GetRows().size(); }
272+
268273
[[nodiscard]] bst_node_t GetNumNodes() const { return n_nodes_; }
269274

270275
/**
@@ -351,7 +356,8 @@ class RowPartitioner {
351356
* argument and return the new position for this training instance.
352357
*/
353358
template <typename FinalisePositionOpT>
354-
void FinalisePosition(common::Span<bst_node_t> d_out_position, FinalisePositionOpT op) const {
359+
void FinalisePosition(common::Span<bst_node_t> d_out_position, bst_idx_t base_ridx,
360+
FinalisePositionOpT op) const {
355361
dh::TemporaryArray<NodePositionInfo> d_node_info_storage(ridx_segments_.size());
356362
dh::safe_cuda(cudaMemcpyAsync(d_node_info_storage.data().get(), ridx_segments_.data(),
357363
sizeof(NodePositionInfo) * ridx_segments_.size(),
@@ -361,8 +367,8 @@ class RowPartitioner {
361367
const int kItemsThread = 8;
362368
const int grid_size = xgboost::common::DivRoundUp(ridx_.size(), kBlockSize * kItemsThread);
363369
common::Span<RowIndexT const> d_ridx{ridx_.data(), ridx_.size()};
364-
FinalisePositionKernel<kBlockSize>
365-
<<<grid_size, kBlockSize, 0>>>(dh::ToSpan(d_node_info_storage), d_ridx, d_out_position, op);
370+
FinalisePositionKernel<kBlockSize><<<grid_size, kBlockSize, 0>>>(
371+
dh::ToSpan(d_node_info_storage), base_ridx, d_ridx, d_out_position, op);
366372
}
367373
};
368374
}; // namespace xgboost::tree

0 commit comments

Comments
 (0)