Skip to content

Commit 00711ab

Browse files
dnmokhovisaevil
andauthored
Implement multiple core type constraints using selectors (#1987)
Co-authored-by: Ilya Isaev <ilya.isaev@intel.com>
1 parent 5991385 commit 00711ab

File tree

10 files changed

+347
-17
lines changed

10 files changed

+347
-17
lines changed

include/oneapi/tbb/detail/_config.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,11 +553,19 @@
553553
#define __TBB_PREVIEW_PARALLEL_PHASE 1
554554
#endif
555555

556+
#if TBB_PREVIEW_TASK_ARENA_CORE_TYPE_SELECTOR || __TBB_BUILD
557+
#define __TBB_PREVIEW_TASK_ARENA_CORE_TYPE_SELECTOR 1
558+
#endif
559+
556560
#if !__TBB_DISABLE_SPEC_EXTENSIONS
557561
#define TBB_EXT_CUSTOM_ASSERTION_HANDLER 202510
558562
#endif
559563

560564
// Feature-test macros
565+
#if __TBB_PREVIEW_TASK_ARENA_CORE_TYPE_SELECTOR
566+
#define TBB_HAS_TASK_ARENA_CORE_TYPE_SELECTOR 202603
567+
#endif
568+
561569
#if __TBB_PREVIEW_TASK_GROUP_EXTENSIONS
562570
#define TBB_HAS_TASK_GROUP_WAIT_FOR_SINGLE_TASK 202603
563571
#endif

include/oneapi/tbb/detail/_utils.h

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include <cstdint>
2323
#include <atomic>
2424
#include <functional>
25+
#include <vector>
26+
#include <climits>
2527

2628
#include "_config.h"
2729
#include "_assert.h"
@@ -156,6 +158,59 @@ T reverse_n_bits(T src, std::size_t n) {
156158
return reverse_bits(src) >> (number_of_bits<T>() - n);
157159
}
158160

161+
//! Encodes/decodes multiple core type IDs into/from a single integer value using bitmask
162+
struct multi_core_type_codec {
163+
using core_type_id = int;
164+
static constexpr core_type_id automatic = -1;
165+
166+
static core_type_id encode(const std::vector<core_type_id>& ids) {
167+
if (ids.empty()) {
168+
return automatic;
169+
}
170+
if (ids.size() == 1) {
171+
return ids[0];
172+
}
173+
174+
core_type_id result = (encoding_format << bitmask_width);
175+
176+
for (core_type_id id : ids) {
177+
__TBB_ASSERT((0 <= id) && (id < static_cast<core_type_id>(bitmask_width)), "Wrong core type id");
178+
result |= (1 << id);
179+
}
180+
181+
return result;
182+
}
183+
static std::vector<core_type_id> decode(core_type_id core_type) {
184+
if (!is_encoded(core_type)) {
185+
return {core_type};
186+
}
187+
188+
std::vector<core_type_id> core_type_ids;
189+
for (size_t bit_pos = 0; bit_pos < bitmask_width; ++bit_pos) {
190+
if (core_type & (1 << bit_pos)) {
191+
core_type_ids.push_back(static_cast<core_type_id>(bit_pos));
192+
}
193+
}
194+
return core_type_ids;
195+
}
196+
static bool is_single(core_type_id id) {
197+
return (id >> bitmask_width) == 0;
198+
}
199+
static bool is_encoded(core_type_id id) {
200+
return (static_cast<std::make_unsigned<core_type_id>::type>(id) >> bitmask_width) == encoding_format;
201+
}
202+
static bool is_core_type(core_type_id id) {
203+
return is_single(id) || is_encoded(id);
204+
}
205+
206+
// Lower bitmask_width bits encode IDs
207+
static constexpr size_t bitmask_width = sizeof(core_type_id) * CHAR_BIT - 4;
208+
209+
// Upper 4 bits: MSb=1 (makes result negative; real core type IDs are non-negative) + 3-bit format version
210+
// (current: 0, max: 6; 1111 is excluded to avoid collision with plain negatives: -1, -2, ..., -268435456)
211+
static constexpr size_t encoding_format = 0x8;
212+
};
213+
159214
// A function to check if passed integer is a power of two
160215
template <typename IntegerType>
161216
constexpr bool is_power_of_two( IntegerType arg ) {

include/oneapi/tbb/info.h

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
Copyright (c) 2019-2022 Intel Corporation
3+
Copyright (c) 2026 UXL Foundation Contributors
34
45
Licensed under the Apache License, Version 2.0 (the "License");
56
you may not use this file except in compliance with the License.
@@ -19,8 +20,11 @@
1920

2021
#include "detail/_config.h"
2122
#include "detail/_namespace_injection.h"
23+
#include "detail/_utils.h"
24+
#include "version.h"
2225

2326
#if __TBB_ARENA_BINDING
27+
#include <tuple>
2428
#include <vector>
2529
#include <cstdint>
2630

@@ -103,6 +107,59 @@ inline int default_concurrency(constraints c) {
103107
return r1::constraints_default_concurrency(c);
104108
}
105109

110+
#if __TBB_PREVIEW_TASK_ARENA_CORE_TYPE_SELECTOR
111+
// Call a custom selector on the available core type(s) and encode those selected
112+
template <typename Selector>
113+
inline core_type_id apply_core_type_selector(Selector selector) {
114+
constexpr core_type_id automatic = -1;
115+
116+
auto ids = core_types();
117+
size_t total = ids.size();
118+
if (total < 2) {
119+
// Not enough core types to select from, so use the default
120+
return automatic;
121+
}
122+
123+
int max_score = 0, max_score_id = -1, num_zero_scores = 0;
124+
std::vector<core_type_id> selected_core_types;
125+
for (size_t index = 0; index < total; ++index) {
126+
int score = selector(std::make_tuple(ids[index], index, total));
127+
if (score > 0) {
128+
selected_core_types.push_back(ids[index]);
129+
}
130+
else if (score == 0) {
131+
++num_zero_scores;
132+
}
133+
134+
if (TBB_runtime_interface_version() < 12180) {
135+
if (score > max_score) {
136+
max_score = score;
137+
max_score_id = ids[index];
138+
}
139+
}
140+
}
141+
if (TBB_runtime_interface_version() < 12180) {
142+
// No runtime multi core type support, so select all or one
143+
if (selected_core_types.size() + num_zero_scores == total) {
144+
selected_core_types.clear(); // all
145+
}
146+
else if (!selected_core_types.empty()) {
147+
selected_core_types = { max_score_id }; // the one with the highest score
148+
}
149+
}
150+
return multi_core_type_codec::encode(selected_core_types);
151+
}
152+
153+
template <typename Selector>
154+
inline int default_concurrency(constraints c, Selector selector) {
155+
constexpr core_type_id selectable = -2;
156+
if (c.core_type == selectable) {
157+
c.core_type = apply_core_type_selector(selector);
158+
}
159+
return default_concurrency(c);
160+
}
161+
#endif
162+
106163
} // namespace d1
107164
} // namespace detail
108165

include/oneapi/tbb/task_arena.h

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,12 @@ class task_arena_base {
253253
public:
254254
//! Typedef for number of threads that is automatic.
255255
static const int automatic = -1;
256+
//! Typedef for current thread index in an uninitialized arena.
256257
static const int not_initialized = -2;
258+
#if __TBB_PREVIEW_TASK_ARENA_CORE_TYPE_SELECTOR
259+
//! Typedef for core type(s) to be specified by the provided selector.
260+
static const int selectable = -2;
261+
#endif
257262
};
258263

259264
template<typename R, typename F>
@@ -364,6 +369,28 @@ class task_arena : public task_arena_base {
364369
)
365370
{}
366371

372+
#if __TBB_PREVIEW_TASK_ARENA_CORE_TYPE_SELECTOR
373+
//! Creates task arena with a custom selector for core types
374+
template <typename Selector,
375+
typename = decltype(static_cast<int>(std::declval<Selector>()(std::declval<std::tuple<int, size_t, size_t>>())))>
376+
task_arena(const constraints& constraints_, Selector selector_,
377+
unsigned reserved_for_masters = 1, priority a_priority = priority::normal
378+
#if __TBB_PREVIEW_PARALLEL_PHASE
379+
, leave_policy lp = leave_policy::automatic
380+
#endif
381+
)
382+
: task_arena_base(constraints_, reserved_for_masters, a_priority
383+
#if __TBB_PREVIEW_PARALLEL_PHASE
384+
, lp
385+
#endif
386+
)
387+
{
388+
if (my_core_type == selectable) {
389+
my_core_type = apply_core_type_selector(selector_);
390+
}
391+
}
392+
#endif
393+
367394
//! Copies settings from another task_arena
368395
task_arena(const task_arena &a) // copy settings but not the reference or instance
369396
: task_arena_base(
@@ -440,6 +467,7 @@ class task_arena : public task_arena_base {
440467
}
441468

442469
#if __TBB_ARENA_BINDING
470+
//! Overrides constraints and forces initialization of internal representation
443471
void initialize(constraints constraints_, unsigned reserved_slots = 1,
444472
priority a_priority = priority::normal
445473
#if __TBB_PREVIEW_PARALLEL_PHASE
@@ -462,6 +490,37 @@ class task_arena : public task_arena_base {
462490
mark_initialized();
463491
}
464492
}
493+
494+
#if __TBB_PREVIEW_TASK_ARENA_CORE_TYPE_SELECTOR
495+
//! Overrides constraints with a custom selector for core types and forces initialization of internal representation
496+
template<typename Selector,
497+
typename = decltype(static_cast<int>(std::declval<Selector>()(std::declval<std::tuple<int, size_t, size_t>>())))>
498+
void initialize(constraints constraints_, Selector selector_,
499+
unsigned reserved_for_masters = 1, priority a_priority = priority::normal
500+
#if __TBB_PREVIEW_PARALLEL_PHASE
501+
, leave_policy lp = leave_policy::automatic
502+
#endif
503+
)
504+
{
505+
__TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena");
506+
if( !is_active() ) {
507+
my_numa_id = constraints_.numa_id;
508+
my_max_concurrency = constraints_.max_concurrency;
509+
my_core_type = constraints_.core_type;
510+
my_max_threads_per_core = constraints_.max_threads_per_core;
511+
my_num_reserved_slots = reserved_for_masters;
512+
my_priority = a_priority;
513+
#if __TBB_PREVIEW_PARALLEL_PHASE
514+
set_leave_policy(lp);
515+
#endif
516+
if (my_core_type == selectable) {
517+
my_core_type = apply_core_type_selector(selector_);
518+
}
519+
r1::initialize(*this);
520+
mark_initialized();
521+
}
522+
}
523+
#endif /*__TBB_PREVIEW_TASK_ARENA_CORE_TYPE_SELECTOR*/
465524
#endif /*__TBB_ARENA_BINDING*/
466525

467526
//! Attaches this instance to the current arena of the thread

src/tbb/arena.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class numa_binding_observer : public tbb::task_scheduler_observer {
5858

5959
numa_binding_observer* construct_binding_observer( d1::task_arena* ta, int num_slots, int numa_id, core_type_id core_type, int max_threads_per_core ) {
6060
numa_binding_observer* binding_observer = nullptr;
61-
if ((core_type >= 0 && core_type_count() > 1) || (numa_id >= 0 && numa_node_count() > 1) || max_threads_per_core > 0) {
61+
if ((multi_core_type_codec::is_core_type(core_type) && core_type_count() > 1) || (numa_id >= 0 && numa_node_count() > 1) || max_threads_per_core > 0) {
6262
binding_observer = new(allocate_memory(sizeof(numa_binding_observer))) numa_binding_observer(ta, num_slots, numa_id, core_type, max_threads_per_core);
6363
__TBB_ASSERT(binding_observer, "Failure during NUMA binding observer allocation and construction");
6464
}

src/tbb/governor.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
Copyright (c) 2005-2025 Intel Corporation
3-
Copyright (c) 2025 UXL Foundation Contributors
3+
Copyright (c) 2025-2026 UXL Foundation Contributors
44
55
Licensed under the Apache License, Version 2.0 (the "License");
66
you may not use this file except in compliance with the License.
@@ -591,7 +591,7 @@ void constraints_assertion(d1::constraints c) {
591591
int* core_types_begin = system_topology::core_types_indexes;
592592
int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count;
593593
__TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic ||
594-
(is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end),
594+
(is_topology_initialized && (multi_core_type_codec::is_encoded(c.core_type) || std::find(core_types_begin, core_types_end, c.core_type) != core_types_end)),
595595
"The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values.");
596596
}
597597

@@ -600,7 +600,7 @@ int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c
600600

601601
const int default_num_threads = int(governor::default_num_threads());
602602

603-
if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) {
603+
if (c.numa_id >= 0 || multi_core_type_codec::is_core_type(c.core_type) || c.max_threads_per_core > 0) {
604604
system_topology::initialize();
605605
const int constrained_default_concurrency =
606606
get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core);

src/tbbbind/tbb_bind.cpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
Copyright (c) 2019-2025 Intel Corporation
3-
Copyright (c) 2025 UXL Foundation Contributors
3+
Copyright (c) 2025-2026 UXL Foundation Contributors
44
55
Licensed under the Apache License, Version 2.0 (the "License");
66
you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
2121
#include "../tbb/assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here.
2222
#include "oneapi/tbb/detail/_assert.h"
2323
#include "oneapi/tbb/detail/_config.h"
24+
#include "oneapi/tbb/detail/_utils.h"
2425

2526
#if _MSC_VER && !__INTEL_COMPILER && !__clang__
2627
#pragma warning( push )
@@ -355,7 +356,13 @@ class system_topology {
355356
void fill_constraints_affinity_mask(affinity_mask input_mask, int numa_node_index, int core_type_index, int max_threads_per_core) {
356357
__TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized system_topology");
357358
__TBB_ASSERT(numa_node_index < (int)numa_affinity_masks_list.size(), "Wrong NUMA node id");
358-
__TBB_ASSERT(core_type_index < (int)core_types_affinity_masks_list.size(), "Wrong core type id");
359+
__TBB_ASSERT(core_type_index == -1 ||
360+
// In the multiple core type format, the MSb of the first bitmask_width bits represents the highest core type id
361+
(multi_core_type_codec::is_single(core_type_index)
362+
? (size_t)core_type_index
363+
: log2(core_type_index & ((1 << multi_core_type_codec::bitmask_width) - 1))) <
364+
core_types_affinity_masks_list.size(),
365+
"Wrong core type id");
359366
__TBB_ASSERT(max_threads_per_core == -1 || max_threads_per_core > 0, "Wrong max_threads_per_core");
360367

361368
hwloc_cpuset_t constraints_mask = hwloc_bitmap_alloc();
@@ -365,8 +372,19 @@ class system_topology {
365372
if (numa_node_index >= 0) {
366373
hwloc_bitmap_and(constraints_mask, constraints_mask, numa_affinity_masks_list[numa_node_index]);
367374
}
368-
if (core_type_index >= 0) {
369-
hwloc_bitmap_and(constraints_mask, constraints_mask, core_types_affinity_masks_list[core_type_index]);
375+
if (multi_core_type_codec::is_core_type(core_type_index)) {
376+
auto core_types = multi_core_type_codec::decode(core_type_index);
377+
__TBB_ASSERT(!core_types.empty(), "Core types list must not be empty");
378+
379+
hwloc_cpuset_t core_types_mask = hwloc_bitmap_alloc();
380+
381+
// Combine affinity masks for specified core types
382+
for (int c : core_types) {
383+
hwloc_bitmap_or(core_types_mask, core_types_mask, core_types_affinity_masks_list[c]);
384+
}
385+
386+
hwloc_bitmap_and(constraints_mask, constraints_mask, core_types_mask);
387+
hwloc_bitmap_free(core_types_mask);
370388
}
371389
if (max_threads_per_core > 0) {
372390
// clear input mask

0 commit comments

Comments
 (0)