Skip to content

Commit ea7bd91

Browse files
authored
[backport] Fix column split race condition. (dmlc#10572) (dmlc#10580)
1 parent 086ca69 commit ea7bd91

File tree

5 files changed

+167
-133
lines changed

5 files changed

+167
-133
lines changed

src/tree/common_row_partitioner.h

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,11 @@ class ColumnSplitHelper {
3636
common::PartitionBuilder<kPartitionBlockSize>* partition_builder,
3737
common::RowSetCollection* row_set_collection)
3838
: partition_builder_{partition_builder}, row_set_collection_{row_set_collection} {
39-
decision_storage_.resize(num_row);
40-
decision_bits_ = BitVector(common::Span<BitVector::value_type>(decision_storage_));
41-
missing_storage_.resize(num_row);
42-
missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_));
39+
auto n_bytes = BitVector::ComputeStorageSize(num_row);
40+
decision_storage_.resize(n_bytes);
41+
decision_bits_ = BitVector{common::Span<BitVector::value_type>{decision_storage_}};
42+
missing_storage_.resize(n_bytes);
43+
missing_bits_ = BitVector{common::Span<BitVector::value_type>{missing_storage_}};
4344
}
4445

4546
template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
@@ -51,14 +52,43 @@ class ColumnSplitHelper {
5152
// we first collect all the decisions and whether the feature is missing into bit vectors.
5253
std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
5354
std::fill(missing_storage_.begin(), missing_storage_.end(), 0);
54-
common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) {
55-
const int32_t nid = nodes[node_in_set].nid;
55+
56+
this->tloc_decision_.resize(decision_storage_.size() * n_threads);
57+
this->tloc_missing_.resize(decision_storage_.size() * n_threads);
58+
std::fill_n(this->tloc_decision_.data(), this->tloc_decision_.size(), 0);
59+
std::fill_n(this->tloc_missing_.data(), this->tloc_missing_.size(), 0);
60+
61+
// Make thread-local storage.
62+
using T = decltype(decision_storage_)::value_type;
63+
auto make_tloc = [&](std::vector<T>& storage, std::int32_t tidx) {
64+
auto span = common::Span<T>{storage};
65+
auto n = decision_storage_.size();
66+
auto bitvec = BitVector{span.subspan(n * tidx, n)};
67+
return bitvec;
68+
};
69+
70+
common::ParallelFor2d(space, n_threads, [&](std::size_t node_in_set, common::Range1d r) {
71+
bst_node_t const nid = nodes[node_in_set].nid;
72+
auto tidx = omp_get_thread_num();
73+
auto decision = make_tloc(this->tloc_decision_, tidx);
74+
auto missing = make_tloc(this->tloc_missing_, tidx);
5675
bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
5776
partition_builder_->MaskRows<BinIdxType, any_missing, any_cat>(
5877
node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
59-
(*row_set_collection_)[nid].begin, &decision_bits_, &missing_bits_);
78+
(*row_set_collection_)[nid].begin, &decision, &missing);
6079
});
6180

81+
// Reduce thread local
82+
auto decision = make_tloc(this->tloc_decision_, 0);
83+
auto missing = make_tloc(this->tloc_missing_, 0);
84+
for (std::int32_t tidx = 1; tidx < n_threads; ++tidx) {
85+
decision |= make_tloc(this->tloc_decision_, tidx);
86+
missing |= make_tloc(this->tloc_missing_, tidx);
87+
}
88+
CHECK_EQ(decision_storage_.size(), decision.NumValues());
89+
std::copy_n(decision.Data(), decision_storage_.size(), decision_storage_.data());
90+
std::copy_n(missing.Data(), missing_storage_.size(), missing_storage_.data());
91+
6292
// Then aggregate the bit vectors across all the workers.
6393
auto rc = collective::Success() << [&] {
6494
return collective::Allreduce(ctx, &decision_storage_, collective::Op::kBitwiseOR);
@@ -85,6 +115,10 @@ class ColumnSplitHelper {
85115
BitVector decision_bits_{};
86116
std::vector<BitVector::value_type> missing_storage_{};
87117
BitVector missing_bits_{};
118+
119+
std::vector<BitVector::value_type> tloc_decision_;
120+
std::vector<BitVector::value_type> tloc_missing_;
121+
88122
common::PartitionBuilder<kPartitionBlockSize>* partition_builder_;
89123
common::RowSetCollection* row_set_collection_;
90124
};

tests/cpp/tree/test_approx.cc

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "../../../src/tree/common_row_partitioner.h"
88
#include "../collective/test_worker.h" // for TestDistributedGlobal
99
#include "../helpers.h"
10+
#include "test_column_split.h" // for TestColumnSplit
1011
#include "test_partitioner.h"
1112

1213
namespace xgboost::tree {
@@ -150,4 +151,26 @@ TEST(Approx, PartitionerColSplit) {
150151
mid_partitioner);
151152
});
152153
}
154+
155+
namespace {
156+
class TestApproxColSplit : public ::testing::TestWithParam<std::tuple<bool, float>> {
157+
public:
158+
void Run() {
159+
auto [categorical, sparsity] = GetParam();
160+
TestColumnSplit(1u, categorical, "grow_histmaker", sparsity);
161+
}
162+
};
163+
} // namespace
164+
165+
TEST_P(TestApproxColSplit, Basic) { this->Run(); }
166+
167+
INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestApproxColSplit, ::testing::ValuesIn([]() {
168+
std::vector<std::tuple<bool, float>> params;
169+
for (auto categorical : {true, false}) {
170+
for (auto sparsity : {0.0f, 0.6f}) {
171+
params.emplace_back(categorical, sparsity);
172+
}
173+
}
174+
return params;
175+
}()));
153176
} // namespace xgboost::tree

tests/cpp/tree/test_column_split.h

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/**
2+
* Copyright 2023-2024, XGBoost Contributors
3+
*/
4+
#pragma once
5+
6+
#include <xgboost/data.h> // for FeatureType, DMatrix
7+
#include <xgboost/tree_model.h> // for RegTree
8+
#include <xgboost/tree_updater.h> // for TreeUpdater
9+
10+
#include <cstddef> // for size_t
11+
#include <memory> // for shared_ptr
12+
#include <vector> // for vector
13+
14+
#include "../../../src/tree/param.h" // for TrainParam
15+
#include "../collective/test_worker.h" // for TestDistributedGlobal
16+
#include "../helpers.h" // for RandomDataGenerator
17+
18+
namespace xgboost::tree {
19+
inline std::shared_ptr<DMatrix> GenerateCatDMatrix(std::size_t rows, std::size_t cols,
20+
float sparsity, bool categorical) {
21+
if (categorical) {
22+
std::vector<FeatureType> ft(cols);
23+
for (size_t i = 0; i < ft.size(); ++i) {
24+
ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
25+
}
26+
return RandomDataGenerator(rows, cols, 0.6f).Seed(3).Type(ft).MaxCategory(17).GenerateDMatrix();
27+
} else {
28+
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
29+
}
30+
}
31+
32+
inline void TestColumnSplit(bst_target_t n_targets, bool categorical, std::string name,
33+
float sparsity) {
34+
auto constexpr kRows = 32;
35+
auto constexpr kCols = 16;
36+
37+
RegTree expected_tree{n_targets, static_cast<bst_feature_t>(kCols)};
38+
ObjInfo task{ObjInfo::kRegression};
39+
Context ctx;
40+
{
41+
auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);
42+
auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
43+
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};
44+
std::vector<HostDeviceVector<bst_node_t>> position(1);
45+
TrainParam param;
46+
param.Init(Args{});
47+
updater->Configure(Args{});
48+
updater->Update(&param, &gpair, p_dmat.get(), position, {&expected_tree});
49+
}
50+
51+
auto verify = [&] {
52+
Context ctx;
53+
auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);
54+
auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
55+
56+
ObjInfo task{ObjInfo::kRegression};
57+
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};
58+
std::vector<HostDeviceVector<bst_node_t>> position(1);
59+
60+
std::unique_ptr<DMatrix> sliced{
61+
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
62+
63+
RegTree tree{n_targets, static_cast<bst_feature_t>(kCols)};
64+
TrainParam param;
65+
param.Init(Args{});
66+
updater->Configure(Args{});
67+
updater->Update(&param, &gpair, sliced.get(), position, {&tree});
68+
69+
Json json{Object{}};
70+
tree.SaveModel(&json);
71+
Json expected_json{Object{}};
72+
expected_tree.SaveModel(&expected_json);
73+
ASSERT_EQ(json, expected_json);
74+
};
75+
76+
auto constexpr kWorldSize = 2;
77+
collective::TestDistributedGlobal(kWorldSize, [&] { verify(); });
78+
}
79+
} // namespace xgboost::tree

tests/cpp/tree/test_histmaker.cc

Lines changed: 4 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,19 @@
11
/**
2-
* Copyright 2019-2023 by XGBoost Contributors
2+
* Copyright 2019-2024, XGBoost Contributors
33
*/
44
#include <gtest/gtest.h>
55
#include <xgboost/tree_model.h>
66
#include <xgboost/tree_updater.h>
77

8-
#include "../../../src/tree/param.h" // for TrainParam
9-
#include "../collective/test_worker.h" // for TestDistributedGlobal
8+
#include "../../../src/tree/param.h" // for TrainParam
109
#include "../helpers.h"
10+
#include "test_column_split.h" // for GenerateCatDMatrix
1111

1212
namespace xgboost::tree {
13-
std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols,
14-
bool categorical = false) {
15-
if (categorical) {
16-
std::vector<FeatureType> ft(cols);
17-
for (size_t i = 0; i < ft.size(); ++i) {
18-
ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
19-
}
20-
return RandomDataGenerator(rows, cols, 0.6f).Seed(3).Type(ft).MaxCategory(17).GenerateDMatrix();
21-
} else {
22-
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
23-
}
24-
}
25-
2613
TEST(GrowHistMaker, InteractionConstraint) {
2714
auto constexpr kRows = 32;
2815
auto constexpr kCols = 16;
29-
auto p_dmat = GenerateDMatrix(kRows, kCols);
16+
auto p_dmat = GenerateCatDMatrix(kRows, kCols, 0.0, false);
3017
Context ctx;
3118

3219
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
@@ -69,62 +56,4 @@ TEST(GrowHistMaker, InteractionConstraint) {
6956
ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
7057
}
7158
}
72-
73-
namespace {
74-
void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical,
75-
RegTree const& expected_tree) {
76-
Context ctx;
77-
auto p_dmat = GenerateDMatrix(rows, cols, categorical);
78-
linalg::Matrix<GradientPair> gpair({rows}, ctx.Device());
79-
gpair.Data()->Copy(GenerateRandomGradients(rows));
80-
81-
82-
ObjInfo task{ObjInfo::kRegression};
83-
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
84-
std::vector<HostDeviceVector<bst_node_t>> position(1);
85-
86-
std::unique_ptr<DMatrix> sliced{
87-
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
88-
89-
RegTree tree{1u, cols};
90-
TrainParam param;
91-
param.Init(Args{});
92-
updater->Configure(Args{});
93-
updater->Update(&param, &gpair, sliced.get(), position, {&tree});
94-
95-
Json json{Object{}};
96-
tree.SaveModel(&json);
97-
Json expected_json{Object{}};
98-
expected_tree.SaveModel(&expected_json);
99-
ASSERT_EQ(json, expected_json);
100-
}
101-
102-
void TestColumnSplit(bool categorical) {
103-
auto constexpr kRows = 32;
104-
auto constexpr kCols = 16;
105-
106-
RegTree expected_tree{1u, kCols};
107-
ObjInfo task{ObjInfo::kRegression};
108-
{
109-
Context ctx;
110-
auto p_dmat = GenerateDMatrix(kRows, kCols, categorical);
111-
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
112-
gpair.Data()->Copy(GenerateRandomGradients(kRows));
113-
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
114-
std::vector<HostDeviceVector<bst_node_t>> position(1);
115-
TrainParam param;
116-
param.Init(Args{});
117-
updater->Configure(Args{});
118-
updater->Update(&param, &gpair, p_dmat.get(), position, {&expected_tree});
119-
}
120-
121-
auto constexpr kWorldSize = 2;
122-
collective::TestDistributedGlobal(
123-
kWorldSize, [&] { VerifyColumnSplit(kRows, kCols, categorical, expected_tree); });
124-
}
125-
} // anonymous namespace
126-
127-
TEST(GrowHistMaker, ColumnSplitNumerical) { TestColumnSplit(false); }
128-
129-
TEST(GrowHistMaker, ColumnSplitCategorical) { TestColumnSplit(true); }
13059
} // namespace xgboost::tree

tests/cpp/tree/test_quantile_hist.cc

Lines changed: 20 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212

1313
#include "../../../src/tree/common_row_partitioner.h"
1414
#include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
15-
#include "../../../src/tree/param.h"
1615
#include "../collective/test_worker.h" // for TestDistributedGlobal
1716
#include "../helpers.h"
17+
#include "test_column_split.h" // for TestColumnSplit
1818
#include "test_partitioner.h"
1919
#include "xgboost/data.h"
2020

@@ -203,57 +203,26 @@ TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner<CPUExpandEn
203203
TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }
204204

205205
namespace {
206-
void VerifyColumnSplit(Context const* ctx, bst_idx_t rows, bst_feature_t cols, bst_target_t n_targets,
207-
RegTree const& expected_tree) {
208-
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
209-
linalg::Matrix<GradientPair> gpair = GenerateRandomGradients(ctx, rows, n_targets);
210-
211-
ObjInfo task{ObjInfo::kRegression};
212-
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_quantile_histmaker", ctx, &task)};
213-
std::vector<HostDeviceVector<bst_node_t>> position(1);
214-
215-
std::unique_ptr<DMatrix> sliced{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
216-
217-
RegTree tree{n_targets, cols};
218-
TrainParam param;
219-
param.Init(Args{});
220-
updater->Configure(Args{});
221-
updater->Update(&param, &gpair, sliced.get(), position, {&tree});
222-
223-
Json json{Object{}};
224-
tree.SaveModel(&json);
225-
Json expected_json{Object{}};
226-
expected_tree.SaveModel(&expected_json);
227-
ASSERT_EQ(json, expected_json);
228-
}
229-
230-
void TestColumnSplit(bst_target_t n_targets) {
231-
auto constexpr kRows = 32;
232-
auto constexpr kCols = 16;
233-
234-
RegTree expected_tree{n_targets, kCols};
235-
ObjInfo task{ObjInfo::kRegression};
236-
Context ctx;
237-
{
238-
auto Xy = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
239-
auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
240-
std::unique_ptr<TreeUpdater> updater{
241-
TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)};
242-
std::vector<HostDeviceVector<bst_node_t>> position(1);
243-
TrainParam param;
244-
param.Init(Args{});
245-
updater->Configure(Args{});
246-
updater->Update(&param, &gpair, Xy.get(), position, {&expected_tree});
206+
class TestHistColSplit : public ::testing::TestWithParam<std::tuple<bst_target_t, bool, float>> {
207+
public:
208+
void Run() {
209+
auto [n_targets, categorical, sparsity] = GetParam();
210+
TestColumnSplit(n_targets, categorical, "grow_quantile_histmaker", sparsity);
247211
}
248-
249-
auto constexpr kWorldSize = 2;
250-
collective::TestDistributedGlobal(kWorldSize, [&] {
251-
VerifyColumnSplit(&ctx, kRows, kCols, n_targets, std::cref(expected_tree));
252-
});
253-
}
212+
};
254213
} // anonymous namespace
255214

256-
TEST(QuantileHist, ColumnSplit) { TestColumnSplit(1); }
257-
258-
TEST(QuantileHist, ColumnSplitMultiTarget) { TestColumnSplit(3); }
215+
TEST_P(TestHistColSplit, Basic) { this->Run(); }
216+
217+
INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestHistColSplit, ::testing::ValuesIn([]() {
218+
std::vector<std::tuple<bst_target_t, bool, float>> params;
219+
for (auto categorical : {true, false}) {
220+
for (auto sparsity : {0.0f, 0.6f}) {
221+
for (bst_target_t n_targets : {1u, 3u}) {
222+
params.emplace_back(n_targets, categorical, sparsity);
223+
}
224+
}
225+
}
226+
return params;
227+
}()));
259228
} // namespace xgboost::tree

0 commit comments

Comments
 (0)