@@ -4053,6 +4053,107 @@ static int __kmp_affinity_max_proc_per_core(int nprocs, int bottom_level,
40534053
40544054static int *procarr = NULL ;
40554055static int __kmp_aff_depth = 0 ;
4056+ static int *__kmp_osid_to_hwthread_map = NULL ;
4057+
4058+ static void __kmp_affinity_get_mask_topology_info (const kmp_affin_mask_t *mask,
4059+ kmp_affinity_ids_t &ids,
4060+ kmp_affinity_attrs_t &attrs) {
4061+ if (!KMP_AFFINITY_CAPABLE ())
4062+ return ;
4063+
4064+ // Initiailze ids and attrs thread data
4065+ for (int i = 0 ; i < KMP_HW_LAST; ++i)
4066+ ids[i] = kmp_hw_thread_t ::UNKNOWN_ID;
4067+ attrs = KMP_AFFINITY_ATTRS_UNKNOWN;
4068+
4069+ // Iterate through each os id within the mask and determine
4070+ // the topology id and attribute information
4071+ int cpu;
4072+ int depth = __kmp_topology->get_depth ();
4073+ KMP_CPU_SET_ITERATE (cpu, mask) {
4074+ int osid_idx = __kmp_osid_to_hwthread_map[cpu];
4075+ const kmp_hw_thread_t &hw_thread = __kmp_topology->at (osid_idx);
4076+ for (int level = 0 ; level < depth; ++level) {
4077+ kmp_hw_t type = __kmp_topology->get_type (level);
4078+ int id = hw_thread.sub_ids [level];
4079+ if (ids[type] == kmp_hw_thread_t ::UNKNOWN_ID || ids[type] == id) {
4080+ ids[type] = id;
4081+ } else {
4082+ // This mask spans across multiple topology units, set it as such
4083+ // and mark every level below as such as well.
4084+ ids[type] = kmp_hw_thread_t ::MULTIPLE_ID;
4085+ for (; level < depth; ++level) {
4086+ kmp_hw_t type = __kmp_topology->get_type (level);
4087+ ids[type] = kmp_hw_thread_t ::MULTIPLE_ID;
4088+ }
4089+ }
4090+ }
4091+ if (!attrs.valid ) {
4092+ attrs.core_type = hw_thread.attrs .get_core_type ();
4093+ attrs.core_eff = hw_thread.attrs .get_core_eff ();
4094+ attrs.valid = 1 ;
4095+ } else {
4096+ // This mask spans across multiple attributes, set it as such
4097+ if (attrs.core_type != hw_thread.attrs .get_core_type ())
4098+ attrs.core_type = KMP_HW_CORE_TYPE_UNKNOWN;
4099+ if (attrs.core_eff != hw_thread.attrs .get_core_eff ())
4100+ attrs.core_eff = kmp_hw_attr_t ::UNKNOWN_CORE_EFF;
4101+ }
4102+ }
4103+ }
4104+
4105+ static void __kmp_affinity_get_thread_topology_info (kmp_info_t *th) {
4106+ if (!KMP_AFFINITY_CAPABLE ())
4107+ return ;
4108+ const kmp_affin_mask_t *mask = th->th .th_affin_mask ;
4109+ kmp_affinity_ids_t &ids = th->th .th_topology_ids ;
4110+ kmp_affinity_attrs_t &attrs = th->th .th_topology_attrs ;
4111+ __kmp_affinity_get_mask_topology_info (mask, ids, attrs);
4112+ }
4113+
4114+ // Assign the topology information to each place in the place list
4115+ // A thread can then grab not only its affinity mask, but the topology
4116+ // information associated with that mask. e.g., Which socket is a thread on
4117+ static void __kmp_affinity_get_topology_info (kmp_affinity_t &affinity) {
4118+ if (!KMP_AFFINITY_CAPABLE ())
4119+ return ;
4120+ if (affinity.type != affinity_none) {
4121+ KMP_ASSERT (affinity.num_os_id_masks );
4122+ KMP_ASSERT (affinity.os_id_masks );
4123+ }
4124+ KMP_ASSERT (affinity.num_masks );
4125+ KMP_ASSERT (affinity.masks );
4126+ KMP_ASSERT (__kmp_affin_fullMask);
4127+
4128+ int max_cpu = __kmp_affin_fullMask->get_max_cpu ();
4129+ int num_hw_threads = __kmp_topology->get_num_hw_threads ();
4130+
4131+ // Allocate thread topology information
4132+ if (!affinity.ids ) {
4133+ affinity.ids = (kmp_affinity_ids_t *)__kmp_allocate (
4134+ sizeof (kmp_affinity_ids_t ) * affinity.num_masks );
4135+ }
4136+ if (!affinity.attrs ) {
4137+ affinity.attrs = (kmp_affinity_attrs_t *)__kmp_allocate (
4138+ sizeof (kmp_affinity_attrs_t ) * affinity.num_masks );
4139+ }
4140+ if (!__kmp_osid_to_hwthread_map) {
4141+ // Want the +1 because max_cpu should be valid index into map
4142+ __kmp_osid_to_hwthread_map =
4143+ (int *)__kmp_allocate (sizeof (int ) * (max_cpu + 1 ));
4144+ }
4145+
4146+ // Create the OS proc to hardware thread map
4147+ for (int hw_thread = 0 ; hw_thread < num_hw_threads; ++hw_thread)
4148+ __kmp_osid_to_hwthread_map[__kmp_topology->at (hw_thread).os_id ] = hw_thread;
4149+
4150+ for (unsigned i = 0 ; i < affinity.num_masks ; ++i) {
4151+ kmp_affinity_ids_t &ids = affinity.ids [i];
4152+ kmp_affinity_attrs_t &attrs = affinity.attrs [i];
4153+ kmp_affin_mask_t *mask = KMP_CPU_INDEX (affinity.masks , i);
4154+ __kmp_affinity_get_mask_topology_info (mask, ids, attrs);
4155+ }
4156+ }
40564157
40574158// Create a one element mask array (set of places) which only contains the
40584159// initial process's affinity mask
@@ -4063,6 +4164,7 @@ static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) {
40634164 KMP_CPU_ALLOC_ARRAY (affinity.masks , affinity.num_masks );
40644165 kmp_affin_mask_t *dest = KMP_CPU_INDEX (affinity.masks , 0 );
40654166 KMP_CPU_COPY (dest, __kmp_affin_fullMask);
4167+ __kmp_affinity_get_topology_info (affinity);
40664168}
40674169
40684170static void __kmp_aux_affinity_initialize_masks (kmp_affinity_t &affinity) {
@@ -4432,6 +4534,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
44324534 if ((nproc < 2 ) || (nproc < __kmp_avail_proc)) {
44334535 KMP_AFF_WARNING (affinity, AffBalancedNotAvail, env_var);
44344536 affinity.type = affinity_none;
4537+ __kmp_create_affinity_none_places (affinity);
44354538 affinity.flags .initialized = TRUE ;
44364539 return ;
44374540 }
@@ -4508,6 +4611,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
45084611 default :
45094612 KMP_ASSERT2 (0 , " Unexpected affinity setting" );
45104613 }
4614+ __kmp_affinity_get_topology_info (affinity);
45114615 affinity.flags .initialized = TRUE ;
45124616}
45134617
@@ -4538,6 +4642,10 @@ void __kmp_affinity_uninitialize(void) {
45384642 KMP_CPU_FREE_ARRAY (affinity->os_id_masks , affinity->num_os_id_masks );
45394643 if (affinity->proclist != NULL )
45404644 __kmp_free (affinity->proclist );
4645+ if (affinity->ids != NULL )
4646+ __kmp_free (affinity->ids );
4647+ if (affinity->attrs != NULL )
4648+ __kmp_free (affinity->attrs );
45414649 *affinity = KMP_AFFINITY_INIT (affinity->env_var );
45424650 }
45434651 if (__kmp_affin_origMask != NULL ) {
@@ -4552,6 +4660,10 @@ void __kmp_affinity_uninitialize(void) {
45524660 __kmp_free (procarr);
45534661 procarr = NULL ;
45544662 }
4663+ if (__kmp_osid_to_hwthread_map) {
4664+ __kmp_free (__kmp_osid_to_hwthread_map);
4665+ __kmp_osid_to_hwthread_map = NULL ;
4666+ }
45554667#if KMP_USE_HWLOC
45564668 if (__kmp_hwloc_topology != NULL ) {
45574669 hwloc_topology_destroy (__kmp_hwloc_topology);
@@ -4584,12 +4696,21 @@ static void __kmp_select_mask_by_gtid(int gtid, const kmp_affinity_t *affinity,
45844696 *mask = KMP_CPU_INDEX (affinity->masks , *place);
45854697}
45864698
4699+ // This function initializes the per-thread data concerning affinity including
4700+ // the mask and topology information
45874701void __kmp_affinity_set_init_mask (int gtid, int isa_root) {
4702+
4703+ kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR (__kmp_threads[gtid]);
4704+
4705+ // Set the thread topology information to default of unknown
4706+ for (int id = 0 ; id < KMP_HW_LAST; ++id)
4707+ th->th .th_topology_ids [id] = kmp_hw_thread_t ::UNKNOWN_ID;
4708+ th->th .th_topology_attrs = KMP_AFFINITY_ATTRS_UNKNOWN;
4709+
45884710 if (!KMP_AFFINITY_CAPABLE ()) {
45894711 return ;
45904712 }
45914713
4592- kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR (__kmp_threads[gtid]);
45934714 if (th->th .th_affin_mask == NULL ) {
45944715 KMP_CPU_ALLOC (th->th .th_affin_mask );
45954716 } else {
@@ -4654,6 +4775,11 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
46544775 th->th .th_first_place = 0 ;
46554776 th->th .th_last_place = affinity->num_masks - 1 ;
46564777 }
4778+ // Copy topology information associated with the place
4779+ if (i >= 0 ) {
4780+ th->th .th_topology_ids = __kmp_affinity.ids [i];
4781+ th->th .th_topology_attrs = __kmp_affinity.attrs [i];
4782+ }
46574783
46584784 if (i == KMP_PLACE_ALL) {
46594785 KA_TRACE (100 , (" __kmp_affinity_set_init_mask: binding T#%d to all places\n " ,
@@ -4718,6 +4844,9 @@ void __kmp_affinity_set_place(int gtid) {
47184844 KMP_CPU_INDEX (__kmp_affinity.masks , th->th .th_new_place );
47194845 KMP_CPU_COPY (th->th .th_affin_mask , mask);
47204846 th->th .th_current_place = th->th .th_new_place ;
4847+ // Copy topology information associated with the place
4848+ th->th .th_topology_ids = __kmp_affinity.ids [th->th .th_new_place ];
4849+ th->th .th_topology_attrs = __kmp_affinity.attrs [th->th .th_new_place ];
47214850
47224851 if (__kmp_affinity.flags .verbose ) {
47234852 char buf[KMP_AFFIN_MASK_PRINT_LEN];
@@ -5037,6 +5166,7 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
50375166 KMP_INFORM (BoundToOSProcSet, env_var, (kmp_int32)getpid (), __kmp_gettid (),
50385167 tid, buf);
50395168 }
5169+ __kmp_affinity_get_thread_topology_info (th);
50405170 __kmp_set_system_affinity (mask, TRUE );
50415171 } else { // Non-uniform topology
50425172
@@ -5203,6 +5333,7 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
52035333 KMP_INFORM (BoundToOSProcSet, env_var, (kmp_int32)getpid (), __kmp_gettid (),
52045334 tid, buf);
52055335 }
5336+ __kmp_affinity_get_thread_topology_info (th);
52065337 __kmp_set_system_affinity (mask, TRUE );
52075338 }
52085339}
0 commit comments