Skip to content

Commit 6fe5b36

Browse files
author
Ralph Castain
authored
Merge pull request #3963 from rhc54/topic/hwfix
Restore binding support
2 parents 9211b5d + 96f07ae commit 6fe5b36

File tree

1 file changed

+117
-1
lines changed

1 file changed

+117
-1
lines changed

opal/mca/hwloc/base/hwloc_base_util.c

Lines changed: 117 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,102 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo,
110110
return obj;
111111
}
112112

113+
#if HWLOC_API_VERSION < 0x20000
114+
/* determine the node-level available cpuset based on
115+
* online vs allowed vs user-specified cpus
116+
*/
117+
int opal_hwloc_base_filter_cpus(hwloc_topology_t topo)
118+
{
119+
hwloc_obj_t root, pu;
120+
hwloc_cpuset_t avail = NULL, pucpus, res;
121+
opal_hwloc_topo_data_t *sum;
122+
opal_hwloc_obj_data_t *data;
123+
char **ranges=NULL, **range=NULL;
124+
int idx, cpu, start, end;
125+
126+
root = hwloc_get_root_obj(topo);
127+
128+
if (NULL == root->userdata) {
129+
root->userdata = (void*)OBJ_NEW(opal_hwloc_topo_data_t);
130+
}
131+
sum = (opal_hwloc_topo_data_t*)root->userdata;
132+
133+
/* should only ever enter here once, but check anyway */
134+
if (NULL != sum->available) {
135+
return OPAL_SUCCESS;
136+
}
137+
138+
/* process any specified default cpu set against this topology */
139+
if (NULL == opal_hwloc_base_cpu_list) {
140+
/* get the root available cpuset */
141+
avail = hwloc_bitmap_alloc();
142+
hwloc_bitmap_and(avail, root->online_cpuset, root->allowed_cpuset);
143+
OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
144+
"hwloc:base: no cpus specified - using root available cpuset"));
145+
} else {
146+
OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
147+
"hwloc:base: filtering cpuset"));
148+
/* find the specified logical cpus */
149+
ranges = opal_argv_split(opal_hwloc_base_cpu_list, ',');
150+
avail = hwloc_bitmap_alloc();
151+
hwloc_bitmap_zero(avail);
152+
res = hwloc_bitmap_alloc();
153+
pucpus = hwloc_bitmap_alloc();
154+
for (idx=0; idx < opal_argv_count(ranges); idx++) {
155+
range = opal_argv_split(ranges[idx], '-');
156+
switch (opal_argv_count(range)) {
157+
case 1:
158+
/* only one cpu given - get that object */
159+
cpu = strtoul(range[0], NULL, 10);
160+
if (NULL != (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) {
161+
hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset);
162+
hwloc_bitmap_or(res, avail, pucpus);
163+
hwloc_bitmap_copy(avail, res);
164+
data = (opal_hwloc_obj_data_t*)pu->userdata;
165+
if (NULL == data) {
166+
pu->userdata = (void*)OBJ_NEW(opal_hwloc_obj_data_t);
167+
data = (opal_hwloc_obj_data_t*)pu->userdata;
168+
}
169+
data->npus++;
170+
}
171+
break;
172+
case 2:
173+
/* range given */
174+
start = strtoul(range[0], NULL, 10);
175+
end = strtoul(range[1], NULL, 10);
176+
for (cpu=start; cpu <= end; cpu++) {
177+
if (NULL != (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) {
178+
hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset);
179+
hwloc_bitmap_or(res, avail, pucpus);
180+
hwloc_bitmap_copy(avail, res);
181+
data = (opal_hwloc_obj_data_t*)pu->userdata;
182+
if (NULL == data) {
183+
pu->userdata = (void*)OBJ_NEW(opal_hwloc_obj_data_t);
184+
data = (opal_hwloc_obj_data_t*)pu->userdata;
185+
}
186+
data->npus++;
187+
}
188+
}
189+
break;
190+
default:
191+
break;
192+
}
193+
opal_argv_free(range);
194+
}
195+
if (NULL != ranges) {
196+
opal_argv_free(ranges);
197+
}
198+
hwloc_bitmap_free(res);
199+
hwloc_bitmap_free(pucpus);
200+
}
201+
202+
/* cache this info */
203+
sum->available = avail;
204+
205+
return OPAL_SUCCESS;
206+
}
207+
#endif
208+
113209
static void fill_cache_line_size(void)
114210
{
115211
int i = 0, cache_level = 2;
@@ -204,12 +300,26 @@ int opal_hwloc_base_get_topology(void)
204300
return OPAL_ERROR;
205301
}
206302
free(val);
303+
#if HWLOC_API_VERSION < 0x20000
304+
/* filter the cpus thru any default cpu set */
305+
if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) {
306+
hwloc_topology_destroy(opal_hwloc_topology);
307+
return rc;
308+
}
309+
#endif
207310
} else if (NULL == opal_hwloc_base_topo_file) {
208311
if (0 != hwloc_topology_init(&opal_hwloc_topology) ||
209312
0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology, 0, true) ||
210313
0 != hwloc_topology_load(opal_hwloc_topology)) {
211314
return OPAL_ERR_NOT_SUPPORTED;
212315
}
316+
#if HWLOC_API_VERSION < 0x20000
317+
/* filter the cpus thru any default cpu set */
318+
if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) {
319+
hwloc_topology_destroy(opal_hwloc_topology);
320+
return rc;
321+
}
322+
#endif
213323
} else {
214324
if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) {
215325
return rc;
@@ -1766,6 +1876,7 @@ int opal_hwloc_base_cset2mapstr(char *str, int len,
17661876

17671877
/* if the cpuset includes all available cpus, then we are unbound */
17681878
root = hwloc_get_root_obj(topo);
1879+
#if HWLOC_API_VERSION < 0x20000
17691880
if (NULL != root->userdata) {
17701881
sum = (opal_hwloc_topo_data_t*)root->userdata;
17711882
if (NULL == sum->available) {
@@ -1775,6 +1886,11 @@ int opal_hwloc_base_cset2mapstr(char *str, int len,
17751886
return OPAL_ERR_NOT_BOUND;
17761887
}
17771888
}
1889+
#else
1890+
if (0 != hwloc_bitmap_isincluded(root->cpuset, cpuset)) {
1891+
return OPAL_ERR_NOT_BOUND;
1892+
}
1893+
#endif
17781894

17791895
/* Iterate over all existing sockets */
17801896
for (socket = hwloc_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0);
@@ -1903,7 +2019,7 @@ static void sort_by_dist(hwloc_topology_t topo, char* device_name, opal_list_t *
19032019
opal_list_append(sorted_list, &numa_node->super);
19042020
}
19052021
#else
1906-
if (0 != hwloc_distances_get_by_type(topo, HWLOC_OBJ_NODE, &distances_nr, &distances, 0, 0) || 0 == distances_nr) {
2022+
if (0 != hwloc_distances_get_by_type(topo, HWLOC_OBJ_NODE, &distances_nr, &distances, 0, 0) || 0 == distances_nr) {
19072023
opal_output_verbose(5, opal_hwloc_base_framework.framework_output,
19082024
"hwloc:base:get_sorted_numa_list: There is no information about distances on the node.");
19092025
return;

0 commit comments

Comments
 (0)