Skip to content

Commit f4cce0f

Browse files
author
Jonathan Peyton
committed
[OpenMP][libomp] Add topology information to thread structure
Each time a thread gets a new affinity assigned, it will not only assign its mask, but also topology information including which socket, core, thread and core-attributes (if available) it is now assigned. This occurs for all non-disabled KMP_AFFINITY values as well as OMP_PLACES/OMP_PROC_BIND. The information regarding which socket, core, etc. can take on three values: 1) The actual ID of the unit (0 - (N-1)), given N units 2) UNKNOWN_ID (-1) which indicates it does not know which ID 3) MULTIPLE_ID (-2) which indicates the thread is spread across multiple of this unit (e.g., affinity mask is spread across multiple hardware threads) This new information is stored in th_topology_ids[] array. An example how to get the socket Id, one would read th_topology_ids[KMP_HW_SOCKET]. This could be expanded in the future to something more descriptive for the "multiple" case, like a range of values. For now, the single value suffices. The information regarding the core attributes can take on two values: 1) The actual core-type or core-eff 2) KMP_HW_CORE_TYPE_UNKNOWN if the core type is unknown, and UNKNOWN_CORE_EFF (-1) if the core eff is unknown. This new information is stored in th_topology_attrs. An example how to get the core type, one would read th_topology_attrs.core_type. Differential Revision: https://reviews.llvm.org/D139854
1 parent 4e27097 commit f4cce0f

File tree

3 files changed

+171
-2
lines changed

3 files changed

+171
-2
lines changed

openmp/runtime/src/kmp.h

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,15 @@ class KMPAffinity {
753753
// Only 1 DWORD in the mask should have any procs set.
754754
// Return the appropriate index, or -1 for an invalid mask.
755755
virtual int get_proc_group() const { return -1; }
756+
int get_max_cpu() const {
757+
int cpu;
758+
int max_cpu = -1;
759+
KMP_CPU_SET_ITERATE(cpu, this) {
760+
if (cpu > max_cpu)
761+
max_cpu = cpu;
762+
}
763+
return max_cpu;
764+
}
756765
};
757766
void *operator new(size_t n);
758767
void operator delete(void *p);
@@ -836,6 +845,26 @@ typedef struct kmp_affinity_flags_t {
836845
} kmp_affinity_flags_t;
837846
KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4);
838847

848+
typedef struct kmp_affinity_ids_t {
849+
int ids[KMP_HW_LAST];
850+
int operator[](size_t idx) const { return ids[idx]; }
851+
int &operator[](size_t idx) { return ids[idx]; }
852+
kmp_affinity_ids_t &operator=(const kmp_affinity_ids_t &rhs) {
853+
for (int i = 0; i < KMP_HW_LAST; ++i)
854+
ids[i] = rhs[i];
855+
return *this;
856+
}
857+
} kmp_affinity_ids_t;
858+
859+
typedef struct kmp_affinity_attrs_t {
860+
int core_type : 8;
861+
int core_eff : 8;
862+
unsigned valid : 1;
863+
unsigned reserved : 15;
864+
} kmp_affinity_attrs_t;
865+
#define KMP_AFFINITY_ATTRS_UNKNOWN \
866+
{ KMP_HW_CORE_TYPE_UNKNOWN, kmp_hw_attr_t::UNKNOWN_CORE_EFF, 0, 0 }
867+
839868
typedef struct kmp_affinity_t {
840869
char *proclist;
841870
enum affinity_type type;
@@ -846,6 +875,8 @@ typedef struct kmp_affinity_t {
846875
kmp_affinity_flags_t flags;
847876
unsigned num_masks;
848877
kmp_affin_mask_t *masks;
878+
kmp_affinity_ids_t *ids;
879+
kmp_affinity_attrs_t *attrs;
849880
unsigned num_os_id_masks;
850881
kmp_affin_mask_t *os_id_masks;
851882
const char *env_var;
@@ -855,7 +886,7 @@ typedef struct kmp_affinity_t {
855886
{ \
856887
nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \
857888
{TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE}, 0, \
858-
nullptr, 0, nullptr, env \
889+
nullptr, nullptr, nullptr, 0, nullptr, env \
859890
}
860891

861892
extern enum affinity_top_method __kmp_affinity_top_method;
@@ -2711,6 +2742,8 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
27112742

27122743
#if KMP_AFFINITY_SUPPORTED
27132744
kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
2745+
kmp_affinity_ids_t th_topology_ids; /* thread's current topology ids */
2746+
kmp_affinity_attrs_t th_topology_attrs; /* thread's current topology attrs */
27142747
#endif
27152748
omp_allocator_handle_t th_def_allocator; /* default allocator */
27162749
/* The data set by the primary thread at reinit, then R/W by the worker */

openmp/runtime/src/kmp_affinity.cpp

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4053,6 +4053,107 @@ static int __kmp_affinity_max_proc_per_core(int nprocs, int bottom_level,
40534053

40544054
static int *procarr = NULL;
40554055
static int __kmp_aff_depth = 0;
4056+
static int *__kmp_osid_to_hwthread_map = NULL;
4057+
4058+
static void __kmp_affinity_get_mask_topology_info(const kmp_affin_mask_t *mask,
4059+
kmp_affinity_ids_t &ids,
4060+
kmp_affinity_attrs_t &attrs) {
4061+
if (!KMP_AFFINITY_CAPABLE())
4062+
return;
4063+
4064+
// Initiailze ids and attrs thread data
4065+
for (int i = 0; i < KMP_HW_LAST; ++i)
4066+
ids[i] = kmp_hw_thread_t::UNKNOWN_ID;
4067+
attrs = KMP_AFFINITY_ATTRS_UNKNOWN;
4068+
4069+
// Iterate through each os id within the mask and determine
4070+
// the topology id and attribute information
4071+
int cpu;
4072+
int depth = __kmp_topology->get_depth();
4073+
KMP_CPU_SET_ITERATE(cpu, mask) {
4074+
int osid_idx = __kmp_osid_to_hwthread_map[cpu];
4075+
const kmp_hw_thread_t &hw_thread = __kmp_topology->at(osid_idx);
4076+
for (int level = 0; level < depth; ++level) {
4077+
kmp_hw_t type = __kmp_topology->get_type(level);
4078+
int id = hw_thread.sub_ids[level];
4079+
if (ids[type] == kmp_hw_thread_t::UNKNOWN_ID || ids[type] == id) {
4080+
ids[type] = id;
4081+
} else {
4082+
// This mask spans across multiple topology units, set it as such
4083+
// and mark every level below as such as well.
4084+
ids[type] = kmp_hw_thread_t::MULTIPLE_ID;
4085+
for (; level < depth; ++level) {
4086+
kmp_hw_t type = __kmp_topology->get_type(level);
4087+
ids[type] = kmp_hw_thread_t::MULTIPLE_ID;
4088+
}
4089+
}
4090+
}
4091+
if (!attrs.valid) {
4092+
attrs.core_type = hw_thread.attrs.get_core_type();
4093+
attrs.core_eff = hw_thread.attrs.get_core_eff();
4094+
attrs.valid = 1;
4095+
} else {
4096+
// This mask spans across multiple attributes, set it as such
4097+
if (attrs.core_type != hw_thread.attrs.get_core_type())
4098+
attrs.core_type = KMP_HW_CORE_TYPE_UNKNOWN;
4099+
if (attrs.core_eff != hw_thread.attrs.get_core_eff())
4100+
attrs.core_eff = kmp_hw_attr_t::UNKNOWN_CORE_EFF;
4101+
}
4102+
}
4103+
}
4104+
4105+
static void __kmp_affinity_get_thread_topology_info(kmp_info_t *th) {
4106+
if (!KMP_AFFINITY_CAPABLE())
4107+
return;
4108+
const kmp_affin_mask_t *mask = th->th.th_affin_mask;
4109+
kmp_affinity_ids_t &ids = th->th.th_topology_ids;
4110+
kmp_affinity_attrs_t &attrs = th->th.th_topology_attrs;
4111+
__kmp_affinity_get_mask_topology_info(mask, ids, attrs);
4112+
}
4113+
4114+
// Assign the topology information to each place in the place list
4115+
// A thread can then grab not only its affinity mask, but the topology
4116+
// information associated with that mask. e.g., Which socket is a thread on
4117+
static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
4118+
if (!KMP_AFFINITY_CAPABLE())
4119+
return;
4120+
if (affinity.type != affinity_none) {
4121+
KMP_ASSERT(affinity.num_os_id_masks);
4122+
KMP_ASSERT(affinity.os_id_masks);
4123+
}
4124+
KMP_ASSERT(affinity.num_masks);
4125+
KMP_ASSERT(affinity.masks);
4126+
KMP_ASSERT(__kmp_affin_fullMask);
4127+
4128+
int max_cpu = __kmp_affin_fullMask->get_max_cpu();
4129+
int num_hw_threads = __kmp_topology->get_num_hw_threads();
4130+
4131+
// Allocate thread topology information
4132+
if (!affinity.ids) {
4133+
affinity.ids = (kmp_affinity_ids_t *)__kmp_allocate(
4134+
sizeof(kmp_affinity_ids_t) * affinity.num_masks);
4135+
}
4136+
if (!affinity.attrs) {
4137+
affinity.attrs = (kmp_affinity_attrs_t *)__kmp_allocate(
4138+
sizeof(kmp_affinity_attrs_t) * affinity.num_masks);
4139+
}
4140+
if (!__kmp_osid_to_hwthread_map) {
4141+
// Want the +1 because max_cpu should be valid index into map
4142+
__kmp_osid_to_hwthread_map =
4143+
(int *)__kmp_allocate(sizeof(int) * (max_cpu + 1));
4144+
}
4145+
4146+
// Create the OS proc to hardware thread map
4147+
for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread)
4148+
__kmp_osid_to_hwthread_map[__kmp_topology->at(hw_thread).os_id] = hw_thread;
4149+
4150+
for (unsigned i = 0; i < affinity.num_masks; ++i) {
4151+
kmp_affinity_ids_t &ids = affinity.ids[i];
4152+
kmp_affinity_attrs_t &attrs = affinity.attrs[i];
4153+
kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.masks, i);
4154+
__kmp_affinity_get_mask_topology_info(mask, ids, attrs);
4155+
}
4156+
}
40564157

40574158
// Create a one element mask array (set of places) which only contains the
40584159
// initial process's affinity mask
@@ -4063,6 +4164,7 @@ static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) {
40634164
KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
40644165
kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0);
40654166
KMP_CPU_COPY(dest, __kmp_affin_fullMask);
4167+
__kmp_affinity_get_topology_info(affinity);
40664168
}
40674169

40684170
static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) {
@@ -4432,6 +4534,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
44324534
if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
44334535
KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var);
44344536
affinity.type = affinity_none;
4537+
__kmp_create_affinity_none_places(affinity);
44354538
affinity.flags.initialized = TRUE;
44364539
return;
44374540
}
@@ -4508,6 +4611,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
45084611
default:
45094612
KMP_ASSERT2(0, "Unexpected affinity setting");
45104613
}
4614+
__kmp_affinity_get_topology_info(affinity);
45114615
affinity.flags.initialized = TRUE;
45124616
}
45134617

@@ -4538,6 +4642,10 @@ void __kmp_affinity_uninitialize(void) {
45384642
KMP_CPU_FREE_ARRAY(affinity->os_id_masks, affinity->num_os_id_masks);
45394643
if (affinity->proclist != NULL)
45404644
__kmp_free(affinity->proclist);
4645+
if (affinity->ids != NULL)
4646+
__kmp_free(affinity->ids);
4647+
if (affinity->attrs != NULL)
4648+
__kmp_free(affinity->attrs);
45414649
*affinity = KMP_AFFINITY_INIT(affinity->env_var);
45424650
}
45434651
if (__kmp_affin_origMask != NULL) {
@@ -4552,6 +4660,10 @@ void __kmp_affinity_uninitialize(void) {
45524660
__kmp_free(procarr);
45534661
procarr = NULL;
45544662
}
4663+
if (__kmp_osid_to_hwthread_map) {
4664+
__kmp_free(__kmp_osid_to_hwthread_map);
4665+
__kmp_osid_to_hwthread_map = NULL;
4666+
}
45554667
#if KMP_USE_HWLOC
45564668
if (__kmp_hwloc_topology != NULL) {
45574669
hwloc_topology_destroy(__kmp_hwloc_topology);
@@ -4584,12 +4696,21 @@ static void __kmp_select_mask_by_gtid(int gtid, const kmp_affinity_t *affinity,
45844696
*mask = KMP_CPU_INDEX(affinity->masks, *place);
45854697
}
45864698

4699+
// This function initializes the per-thread data concerning affinity including
4700+
// the mask and topology information
45874701
void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
4702+
4703+
kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4704+
4705+
// Set the thread topology information to default of unknown
4706+
for (int id = 0; id < KMP_HW_LAST; ++id)
4707+
th->th.th_topology_ids[id] = kmp_hw_thread_t::UNKNOWN_ID;
4708+
th->th.th_topology_attrs = KMP_AFFINITY_ATTRS_UNKNOWN;
4709+
45884710
if (!KMP_AFFINITY_CAPABLE()) {
45894711
return;
45904712
}
45914713

4592-
kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
45934714
if (th->th.th_affin_mask == NULL) {
45944715
KMP_CPU_ALLOC(th->th.th_affin_mask);
45954716
} else {
@@ -4654,6 +4775,11 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
46544775
th->th.th_first_place = 0;
46554776
th->th.th_last_place = affinity->num_masks - 1;
46564777
}
4778+
// Copy topology information associated with the place
4779+
if (i >= 0) {
4780+
th->th.th_topology_ids = __kmp_affinity.ids[i];
4781+
th->th.th_topology_attrs = __kmp_affinity.attrs[i];
4782+
}
46574783

46584784
if (i == KMP_PLACE_ALL) {
46594785
KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
@@ -4718,6 +4844,9 @@ void __kmp_affinity_set_place(int gtid) {
47184844
KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place);
47194845
KMP_CPU_COPY(th->th.th_affin_mask, mask);
47204846
th->th.th_current_place = th->th.th_new_place;
4847+
// Copy topology information associated with the place
4848+
th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4849+
th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
47214850

47224851
if (__kmp_affinity.flags.verbose) {
47234852
char buf[KMP_AFFIN_MASK_PRINT_LEN];
@@ -5037,6 +5166,7 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
50375166
KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
50385167
tid, buf);
50395168
}
5169+
__kmp_affinity_get_thread_topology_info(th);
50405170
__kmp_set_system_affinity(mask, TRUE);
50415171
} else { // Non-uniform topology
50425172

@@ -5203,6 +5333,7 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
52035333
KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
52045334
tid, buf);
52055335
}
5336+
__kmp_affinity_get_thread_topology_info(th);
52065337
__kmp_set_system_affinity(mask, TRUE);
52075338
}
52085339
}

openmp/runtime/src/kmp_affinity.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,9 +681,14 @@ struct kmp_hw_attr_t {
681681
bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
682682
};
683683

684+
#if KMP_AFFINITY_SUPPORTED
685+
KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t));
686+
#endif
687+
684688
class kmp_hw_thread_t {
685689
public:
686690
static const int UNKNOWN_ID = -1;
691+
static const int MULTIPLE_ID = -2;
687692
static int compare_ids(const void *a, const void *b);
688693
static int compare_compact(const void *a, const void *b);
689694
int ids[KMP_HW_LAST];

0 commit comments

Comments
 (0)