Skip to content

Commit fd0138c

Browse files
authored
[coll] Improve column split tests with named threads. (dmlc#10735)
1 parent 55aef8f commit fd0138c

File tree

10 files changed

+72
-37
lines changed

10 files changed

+72
-37
lines changed

include/xgboost/windefs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@
3030

3131
#endif // xgboost_IS_MINGW
3232

33-
#endif // defined(xgboost_IS_WIN)
33+
#endif // !defined(xgboost_IS_WIN)

src/collective/loop.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <thread> // for thread
1515
#include <utility> // for move
1616

17+
#include "../common/threading_utils.h" // for NameThread
1718
#include "xgboost/collective/poll_utils.h" // for PollHelper
1819
#include "xgboost/collective/result.h" // for Fail, Success
1920
#include "xgboost/collective/socket.h" // for FailWithCode
@@ -271,5 +272,6 @@ Loop::Loop(std::chrono::seconds timeout) : timeout_{timeout} {
271272
worker_ = std::thread{[this] {
272273
this->Process();
273274
}};
275+
common::NameThread(&worker_, "lw");
274276
}
275277
} // namespace xgboost::collective

src/collective/tracker.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <utility> // for move, forward
2424

2525
#include "../common/json_utils.h"
26+
#include "../common/threading_utils.h" // for NameThread
2627
#include "comm.h"
2728
#include "protocol.h" // for kMagic, PeerInfo
2829
#include "tracker.h"
@@ -143,6 +144,8 @@ Result RabitTracker::Bootstrap(std::vector<WorkerProxy>* p_workers) {
143144
Json::Dump(jnext, &str);
144145
worker.Send(StringView{str});
145146
});
147+
std::string name = "tkbs_t-" + std::to_string(r);
148+
common::NameThread(&bootstrap_threads.back(), name.c_str());
146149
}
147150

148151
for (auto& t : bootstrap_threads) {

src/common/threading_utils.cc

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright 2022-2023 by XGBoost Contributors
2+
* Copyright 2022-2024, XGBoost Contributors
33
*/
44
#include "threading_utils.h"
55

@@ -9,7 +9,11 @@
99
#include <fstream> // for ifstream
1010
#include <string> // for string
1111

12-
#include "common.h" // for DivRoundUp
12+
#include "common.h" // for DivRoundUp
13+
14+
#if defined(__linux__)
15+
#include <pthread.h>
16+
#endif
1317

1418
namespace xgboost::common {
1519
/**
@@ -113,4 +117,26 @@ std::int32_t OmpGetNumThreads(std::int32_t n_threads) {
113117
n_threads = std::max(n_threads, 1);
114118
return n_threads;
115119
}
120+
121+
void NameThread(std::thread* t, StringView name) {
122+
#if defined(__linux__)
123+
auto handle = t->native_handle();
124+
char old[16];
125+
auto ret = pthread_getname_np(handle, old, 16);
126+
if (ret != 0) {
127+
LOG(WARNING) << "Failed to get the name from thread";
128+
}
129+
auto new_name = std::string{old} + ">" + name.c_str(); // NOLINT
130+
if (new_name.size() > 15) {
131+
new_name = new_name.substr(new_name.size() - 15);
132+
}
133+
ret = pthread_setname_np(handle, new_name.c_str());
134+
if (ret != 0) {
135+
LOG(WARNING) << "Failed to name thread:" << ret << " :" << new_name;
136+
}
137+
#else
138+
(void)name;
139+
(void)t;
140+
#endif
141+
}
116142
} // namespace xgboost::common

src/common/threading_utils.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright 2019-2023 by XGBoost Contributors
2+
* Copyright 2019-2024, XGBoost Contributors
33
*/
44
#ifndef XGBOOST_COMMON_THREADING_UTILS_H_
55
#define XGBOOST_COMMON_THREADING_UTILS_H_
@@ -11,12 +11,13 @@
1111
#include <cstddef> // for size_t
1212
#include <cstdint> // for int32_t
1313
#include <cstdlib> // for malloc, free
14-
#include <functional> // for function
1514
#include <new> // for bad_alloc
15+
#include <thread> // for thread
1616
#include <type_traits> // for is_signed, conditional_t, is_integral_v, invoke_result_t
1717
#include <vector> // for vector
1818

1919
#include "xgboost/logging.h"
20+
#include "xgboost/string_view.h" // for StringView
2021

2122
#if !defined(_OPENMP)
2223
extern "C" {
@@ -308,6 +309,11 @@ class MemStackAllocator {
308309
* \brief Constant that can be used for initializing static thread local memory.
309310
*/
310311
std::int32_t constexpr DefaultMaxThreads() { return 128; }
312+
313+
/**
314+
* @brief Give the thread a name. Supports only pthread on linux.
315+
*/
316+
void NameThread(std::thread* t, StringView name);
311317
} // namespace xgboost::common
312318

313319
#endif // XGBOOST_COMMON_THREADING_UTILS_H_

src/common/threadpool.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,15 @@
99
#include <memory> // for make_shared
1010
#include <mutex> // for mutex, unique_lock
1111
#include <queue> // for queue
12+
#include <string> // for string
1213
#include <thread> // for thread
1314
#include <type_traits> // for invoke_result_t
1415
#include <utility> // for move
1516
#include <vector> // for vector
1617

18+
#include "threading_utils.h" // for NameThread
19+
#include "xgboost/string_view.h" // for StringView
20+
1721
namespace xgboost::common {
1822
/**
1923
* @brief Simple implementation of a thread pool.
@@ -27,11 +31,12 @@ class ThreadPool {
2731

2832
public:
2933
/**
34+
* @param name Name prefix for threads.
3035
* @param n_threads The number of threads this pool should hold.
3136
* @param init_fn Function called once during thread creation.
3237
*/
3338
template <typename InitFn>
34-
explicit ThreadPool(std::int32_t n_threads, InitFn&& init_fn) {
39+
explicit ThreadPool(StringView name, std::int32_t n_threads, InitFn&& init_fn) {
3540
for (std::int32_t i = 0; i < n_threads; ++i) {
3641
pool_.emplace_back([&, init_fn = std::forward<InitFn>(init_fn)] {
3742
init_fn();
@@ -55,6 +60,8 @@ class ThreadPool {
5560
fn();
5661
}
5762
});
63+
std::string name_i = name.c_str() + std::string{"-"} + std::to_string(i); // NOLINT
64+
NameThread(&pool_.back(), name_i);
5865
}
5966
}
6067

src/data/sparse_page_source.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S>, public FormatStreamPol
336336
public:
337337
SparsePageSourceImpl(float missing, int nthreads, bst_feature_t n_features, bst_idx_t n_batches,
338338
std::shared_ptr<Cache> cache)
339-
: workers_{std::max(2, std::min(nthreads, 16)), InitNewThread{}},
339+
: workers_{StringView{"ext-mem"}, std::max(2, std::min(nthreads, 16)), InitNewThread{}},
340340
missing_{missing},
341341
nthreads_{nthreads},
342342
n_features_{n_features},

tests/cpp/collective/test_worker.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
#include <utility> // for move
1313
#include <vector> // for vector
1414

15-
#include "../../../src/collective/comm.h"
15+
#include "../../../src/collective/comm.h" // for RabitComm
1616
#include "../../../src/collective/communicator-inl.h" // for Init, Finalize
1717
#include "../../../src/collective/tracker.h" // for GetHostAddress
1818
#include "../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
19+
#include "../../../src/common/threading_utils.h" // for NameThread
1920
#include "../helpers.h" // for FileExists
2021

2122
#if defined(XGBOOST_USE_FEDERATED)
@@ -176,6 +177,9 @@ void TestDistributedGlobal(std::int32_t n_workers, WorkerFn worker_fn, bool need
176177
CHECK(status == std::future_status::ready) << "Test timeout";
177178
fut.get();
178179
});
180+
181+
std::string name = "tw-" + std::to_string(i);
182+
common::NameThread(&workers.back(), name.c_str());
179183
}
180184

181185
for (auto& t : workers) {
@@ -199,7 +203,7 @@ class BaseMGPUTest : public ::testing::Test {
199203
* available.
200204
*/
201205
template <typename Fn>
202-
auto DoTest(Fn&& fn, bool is_federated, bool emulate_if_single = false) const {
206+
auto DoTest(Fn&& fn, bool is_federated, [[maybe_unused]] bool emulate_if_single = false) const {
203207
auto n_gpus = common::AllVisibleGPUs();
204208
if (is_federated) {
205209
#if defined(XGBOOST_USE_FEDERATED)

tests/cpp/common/test_threadpool.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ TEST(ThreadPool, Basic) {
2121
// 4 is an invalid value, it's only possible to set it by bypassing the parameter
2222
// validation.
2323
ASSERT_NE(orig, GlobalConfigThreadLocalStore::Get()->verbosity);
24-
ThreadPool pool{n_threads, [config = *GlobalConfigThreadLocalStore::Get()] {
24+
ThreadPool pool{StringView{"test"}, n_threads, [config = *GlobalConfigThreadLocalStore::Get()] {
2525
*GlobalConfigThreadLocalStore::Get() = config;
2626
}};
2727
GlobalConfigThreadLocalStore::Get()->verbosity = orig; // restore

tests/cpp/test_learner.cc

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -745,8 +745,7 @@ void VerifyColumnSplitWithArgs(std::string const& tree_method, bool use_gpu, Arg
745745
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
746746
std::string device = "cpu";
747747
if (use_gpu) {
748-
auto gpu_id = common::AllVisibleGPUs() == 1 ? 0 : rank;
749-
device = "cuda:" + std::to_string(gpu_id);
748+
device = MakeCUDACtx(DistGpuIdx()).DeviceName();
750749
}
751750
auto model = GetModelWithArgs(sliced, tree_method, device, args);
752751
ASSERT_EQ(model, expected_model);
@@ -807,44 +806,32 @@ class ColumnSplitTrainingTest
807806
}
808807
};
809808

810-
auto MakeParamsForTest() {
811-
std::vector<std::tuple<std::string, bool, bool>> configs;
812-
for (auto tm : {"hist", "approx"}) {
813-
#if defined(XGBOOST_USE_CUDA)
814-
std::array<bool, 2> use_gpu{true, false};
815-
#else
816-
std::array<bool, 1> use_gpu{false};
817-
#endif
818-
for (auto i : use_gpu) {
809+
auto WithFed() {
819810
#if defined(XGBOOST_USE_FEDERATED)
820-
std::array<bool, 2> fed{true, false};
811+
return ::testing::Bool();
821812
#else
822-
std::array<bool, 1> fed{false};
813+
return ::testing::Values(false);
823814
#endif
824-
for (auto j : fed) {
825-
configs.emplace_back(tm, i, j);
826-
}
827-
}
828-
}
829-
return configs;
830815
}
831816
} // anonymous namespace
832817

833818
TEST_P(ColumnSplitTrainingTest, ColumnSampler) {
834-
auto param = GetParam();
835-
std::apply(TestColumnSplitColumnSampler, param);
819+
std::apply(TestColumnSplitColumnSampler, GetParam());
836820
}
837821

838822
TEST_P(ColumnSplitTrainingTest, InteractionConstraints) {
839-
auto param = GetParam();
840-
std::apply(TestColumnSplitInteractionConstraints, param);
823+
std::apply(TestColumnSplitInteractionConstraints, GetParam());
841824
}
842825

843826
TEST_P(ColumnSplitTrainingTest, MonotoneConstraints) {
844-
auto param = GetParam();
845-
std::apply(TestColumnSplitMonotoneConstraints, param);
827+
std::apply(TestColumnSplitMonotoneConstraints, GetParam());
846828
}
847829

848-
INSTANTIATE_TEST_SUITE_P(ColumnSplit, ColumnSplitTrainingTest,
849-
::testing::ValuesIn(MakeParamsForTest()));
830+
INSTANTIATE_TEST_SUITE_P(Cpu, ColumnSplitTrainingTest,
831+
::testing::Combine(::testing::Values("hist", "approx"),
832+
::testing::Values(false), WithFed()));
833+
834+
INSTANTIATE_TEST_SUITE_P(MGPU, ColumnSplitTrainingTest,
835+
::testing::Combine(::testing::Values("hist", "approx"),
836+
::testing::Values(true), WithFed()));
850837
} // namespace xgboost

0 commit comments

Comments
 (0)