Skip to content

Commit d4327fd

Browse files
author
Ralph Castain
committed
The node index isn't normally passed with the packed node object, so we need to set it on the remote end as the orted needs to pass it down to the procs. Refactor the registration code to better package proc-level info - we will separate out the node and app levels in a subsequent change.
1 parent 163999b commit d4327fd

File tree

2 files changed

+179
-143
lines changed

2 files changed

+179
-143
lines changed

orte/orted/pmix/pmix_server_register_fns.c

Lines changed: 177 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
5454
{
5555
int rc;
5656
orte_proc_t *pptr;
57-
int i, k;
57+
int i, k, n, nlocalprocs;
5858
opal_list_t *info, *pmap;
5959
opal_value_t *kv;
6060
orte_node_t *node, *n2;
6161
opal_vpid_t vpid;
62-
char **list, **procs, **micro, *tmp, *regex;
62+
char **list, **procs, **micro, *tmp, *regex, *cpulist, *peerlist;
6363
orte_job_t *dmns;
6464
orte_job_map_t *map;
6565
orte_app_context_t *app;
@@ -178,6 +178,48 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
178178
kv->data.uint32 = node->index;
179179
opal_list_append(info, &kv->super);
180180

181+
/* pass our node size */
182+
kv = OBJ_NEW(opal_value_t);
183+
kv->key = strdup(OPAL_PMIX_NODE_SIZE);
184+
kv->type = OPAL_UINT32;
185+
kv->data.uint32 = node->num_procs;
186+
opal_list_append(info, &kv->super);
187+
188+
/* univ size */
189+
kv = OBJ_NEW(opal_value_t);
190+
kv->key = strdup(OPAL_PMIX_UNIV_SIZE);
191+
kv->type = OPAL_UINT32;
192+
kv->data.uint32 = jdata->total_slots_alloc;
193+
opal_list_append(info, &kv->super);
194+
195+
/* job size */
196+
kv = OBJ_NEW(opal_value_t);
197+
kv->key = strdup(OPAL_PMIX_JOB_SIZE);
198+
kv->type = OPAL_UINT32;
199+
kv->data.uint32 = jdata->num_procs;
200+
opal_list_append(info, &kv->super);
201+
202+
/* number of apps in this job */
203+
kv = OBJ_NEW(opal_value_t);
204+
kv->key = strdup(OPAL_PMIX_JOB_NUM_APPS);
205+
kv->type = OPAL_UINT32;
206+
kv->data.uint32 = jdata->num_apps;
207+
opal_list_append(info, &kv->super);
208+
209+
/* local size */
210+
kv = OBJ_NEW(opal_value_t);
211+
kv->key = strdup(OPAL_PMIX_LOCAL_SIZE);
212+
kv->type = OPAL_UINT32;
213+
kv->data.uint32 = jdata->num_local_procs;
214+
opal_list_append(info, &kv->super);
215+
216+
/* max procs */
217+
kv = OBJ_NEW(opal_value_t);
218+
kv->key = strdup(OPAL_PMIX_MAX_PROCS);
219+
kv->type = OPAL_UINT32;
220+
kv->data.uint32 = jdata->total_slots_alloc;
221+
opal_list_append(info, &kv->super);
222+
181223
/* identify our local node object within the map,
182224
* if we were included */
183225
node = NULL;
@@ -192,22 +234,55 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
192234
}
193235
}
194236
if (NULL != node) {
195-
/* node size */
237+
vpid = ORTE_VPID_MAX;
238+
for (i=0; i < node->procs->size; i++) {
239+
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
240+
continue;
241+
}
242+
if (pptr->name.jobid == jdata->jobid) {
243+
if (pptr->name.vpid < vpid) {
244+
vpid = pptr->name.vpid;
245+
}
246+
/* go ahead and register this client */
247+
if (OPAL_SUCCESS != (rc = opal_pmix.server_register_client(&pptr->name, uid, gid,
248+
(void*)pptr, NULL, NULL))) {
249+
ORTE_ERROR_LOG(rc);
250+
}
251+
}
252+
}
253+
/* pass the local ldr */
196254
kv = OBJ_NEW(opal_value_t);
197-
kv->key = strdup(OPAL_PMIX_NODE_SIZE);
198-
kv->type = OPAL_UINT32;
199-
kv->data.uint32 = node->num_procs;
255+
kv->key = strdup(OPAL_PMIX_LOCALLDR);
256+
kv->type = OPAL_VPID;
257+
kv->data.name.vpid = vpid;
200258
opal_list_append(info, &kv->super);
259+
}
260+
261+
/* for each proc in this job, create an object that
262+
* includes the info describing the proc so the recipient has a complete
263+
* picture. This allows procs to connect to each other without
264+
* any further info exchange, assuming the underlying transports
265+
* support it. We also pass all the proc-specific data here so
266+
* that each proc can lookup info about every other proc in the job */
267+
268+
for (n=0; n < map->nodes->size; n++) {
269+
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, n))) {
270+
continue;
271+
}
201272
/* construct the list of local peers, while adding
202273
* each proc's locality info */
203274
list = NULL;
204275
procs = NULL;
276+
cpulist = NULL;
277+
peerlist = NULL;
205278
vpid = ORTE_VPID_MAX;
279+
nlocalprocs = 0;
206280
for (i=0; i < node->procs->size; i++) {
207281
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
208282
continue;
209283
}
210284
if (pptr->name.jobid == jdata->jobid) {
285+
++nlocalprocs;
211286
opal_argv_append_nosize(&list, ORTE_VPID_PRINT(pptr->name.vpid));
212287
if (pptr->name.vpid < vpid) {
213288
vpid = pptr->name.vpid;
@@ -225,168 +300,127 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
225300
} else {
226301
opal_argv_append_nosize(&procs, "UNBOUND");
227302
}
228-
/* go ahead and register this client */
229-
if (OPAL_SUCCESS != (rc = opal_pmix.server_register_client(&pptr->name, uid, gid,
230-
(void*)pptr, NULL, NULL))) {
231-
ORTE_ERROR_LOG(rc);
232-
}
233303
}
234304
}
235-
236305
/* construct the list of peers for transmission */
237306
if (NULL != list) {
238-
tmp = opal_argv_join(list, ',');
307+
peerlist = opal_argv_join(list, ',');
239308
opal_argv_free(list);
240309
list = NULL;
241-
/* pass the list of peers */
242-
kv = OBJ_NEW(opal_value_t);
243-
kv->key = strdup(OPAL_PMIX_LOCAL_PEERS);
244-
kv->type = OPAL_STRING;
245-
kv->data.string = tmp;
246-
opal_list_append(info, &kv->super);
247310
}
248311
/* construct the list of cpusets for transmission */
249312
if (NULL != procs) {
250-
tmp = opal_argv_join(procs, ':');
313+
cpulist = opal_argv_join(procs, ':');
251314
opal_argv_free(procs);
252315
procs = NULL;
253-
/* pass the list of cpusets */
254-
kv = OBJ_NEW(opal_value_t);
255-
kv->key = strdup(OPAL_PMIX_LOCAL_CPUSETS);
256-
kv->type = OPAL_STRING;
257-
kv->data.string = tmp;
258-
opal_list_append(info, &kv->super);
259316
}
260-
/* pass the local ldr */
261-
kv = OBJ_NEW(opal_value_t);
262-
kv->key = strdup(OPAL_PMIX_LOCALLDR);
263-
kv->type = OPAL_VPID;
264-
kv->data.name.vpid = vpid;
265-
opal_list_append(info, &kv->super);
266-
}
267-
268-
/* univ size */
269-
kv = OBJ_NEW(opal_value_t);
270-
kv->key = strdup(OPAL_PMIX_UNIV_SIZE);
271-
kv->type = OPAL_UINT32;
272-
kv->data.uint32 = jdata->total_slots_alloc;
273-
opal_list_append(info, &kv->super);
274-
275-
/* job size */
276-
kv = OBJ_NEW(opal_value_t);
277-
kv->key = strdup(OPAL_PMIX_JOB_SIZE);
278-
kv->type = OPAL_UINT32;
279-
kv->data.uint32 = jdata->num_procs;
280-
opal_list_append(info, &kv->super);
281-
282-
/* number of apps in this job */
283-
kv = OBJ_NEW(opal_value_t);
284-
kv->key = strdup(OPAL_PMIX_JOB_NUM_APPS);
285-
kv->type = OPAL_UINT32;
286-
kv->data.uint32 = jdata->num_apps;
287-
opal_list_append(info, &kv->super);
288317

289-
/* local size */
290-
kv = OBJ_NEW(opal_value_t);
291-
kv->key = strdup(OPAL_PMIX_LOCAL_SIZE);
292-
kv->type = OPAL_UINT32;
293-
kv->data.uint32 = jdata->num_local_procs;
294-
opal_list_append(info, &kv->super);
295-
296-
/* max procs */
297-
kv = OBJ_NEW(opal_value_t);
298-
kv->key = strdup(OPAL_PMIX_MAX_PROCS);
299-
kv->type = OPAL_UINT32;
300-
kv->data.uint32 = jdata->total_slots_alloc;
301-
opal_list_append(info, &kv->super);
318+
/* now cycle across each proc on this node, passing all data that
319+
* varies by proc */
320+
for (i=0; i < node->procs->size; i++) {
321+
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
322+
continue;
323+
}
324+
/* setup the proc map object */
325+
kv = OBJ_NEW(opal_value_t);
326+
kv->key = strdup(OPAL_PMIX_PROC_DATA);
327+
kv->type = OPAL_PTR;
328+
kv->data.ptr = OBJ_NEW(opal_list_t);
329+
opal_list_append(info, &kv->super);
330+
pmap = kv->data.ptr;
302331

303-
/* for each proc in this job, create an object that
304-
* includes the info describing the proc so the recipient has a complete
305-
* picture. This allows procs to connect to each other without
306-
* an further info exchange, assuming the underlying transports
307-
* support it */
332+
/* must start with rank */
333+
kv = OBJ_NEW(opal_value_t);
334+
kv->key = strdup(OPAL_PMIX_RANK);
335+
kv->type = OPAL_VPID;
336+
kv->data.name.vpid = pptr->name.vpid;
337+
opal_list_append(pmap, &kv->super);
308338

309-
for (i=0; i < jdata->procs->size; i++) {
310-
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
311-
continue;
312-
}
313-
kv = OBJ_NEW(opal_value_t);
314-
kv->key = strdup(OPAL_PMIX_PROC_DATA);
315-
kv->type = OPAL_PTR;
316-
kv->data.ptr = OBJ_NEW(opal_list_t);
317-
opal_list_append(info, &kv->super);
318-
pmap = kv->data.ptr;
339+
/* pass the list of peers */
340+
kv = OBJ_NEW(opal_value_t);
341+
kv->key = strdup(OPAL_PMIX_LOCAL_PEERS);
342+
kv->type = OPAL_STRING;
343+
kv->data.string = peerlist;
344+
opal_list_append(pmap, &kv->super);
319345

320-
/* rank */
321-
kv = OBJ_NEW(opal_value_t);
322-
kv->key = strdup(OPAL_PMIX_RANK);
323-
kv->type = OPAL_VPID;
324-
kv->data.name.vpid = pptr->name.vpid;
325-
opal_list_append(pmap, &kv->super);
346+
/* pass the list of cpusets */
347+
kv = OBJ_NEW(opal_value_t);
348+
kv->key = strdup(OPAL_PMIX_LOCAL_CPUSETS);
349+
kv->type = OPAL_STRING;
350+
kv->data.string = cpulist;
351+
opal_list_append(pmap, &kv->super);
326352

327-
/* appnum */
328-
kv = OBJ_NEW(opal_value_t);
329-
kv->key = strdup(OPAL_PMIX_APPNUM);
330-
kv->type = OPAL_UINT32;
331-
kv->data.uint32 = pptr->app_idx;
332-
opal_list_append(pmap, &kv->super);
353+
/* appnum */
354+
kv = OBJ_NEW(opal_value_t);
355+
kv->key = strdup(OPAL_PMIX_APPNUM);
356+
kv->type = OPAL_UINT32;
357+
kv->data.uint32 = pptr->app_idx;
358+
opal_list_append(pmap, &kv->super);
333359

334-
/* app ldr */
335-
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx);
336-
kv = OBJ_NEW(opal_value_t);
337-
kv->key = strdup(OPAL_PMIX_APPLDR);
338-
kv->type = OPAL_VPID;
339-
kv->data.name.vpid = app->first_rank;
340-
opal_list_append(pmap, &kv->super);
360+
/* app ldr */
361+
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx);
362+
kv = OBJ_NEW(opal_value_t);
363+
kv->key = strdup(OPAL_PMIX_APPLDR);
364+
kv->type = OPAL_VPID;
365+
kv->data.name.vpid = app->first_rank;
366+
opal_list_append(pmap, &kv->super);
341367

342-
/* global/univ rank */
343-
kv = OBJ_NEW(opal_value_t);
344-
kv->key = strdup(OPAL_PMIX_GLOBAL_RANK);
345-
kv->type = OPAL_VPID;
346-
kv->data.name.vpid = pptr->name.vpid + jdata->offset;
347-
opal_list_append(pmap, &kv->super);
368+
/* global/univ rank */
369+
kv = OBJ_NEW(opal_value_t);
370+
kv->key = strdup(OPAL_PMIX_GLOBAL_RANK);
371+
kv->type = OPAL_VPID;
372+
kv->data.name.vpid = pptr->name.vpid + jdata->offset;
373+
opal_list_append(pmap, &kv->super);
348374

349-
/* app rank */
350-
kv = OBJ_NEW(opal_value_t);
351-
kv->key = strdup(OPAL_PMIX_APP_RANK);
352-
kv->type = OPAL_VPID;
353-
kv->data.name.vpid = pptr->app_rank;
354-
opal_list_append(pmap, &kv->super);
375+
/* app rank */
376+
kv = OBJ_NEW(opal_value_t);
377+
kv->key = strdup(OPAL_PMIX_APP_RANK);
378+
kv->type = OPAL_VPID;
379+
kv->data.name.vpid = pptr->app_rank;
380+
opal_list_append(pmap, &kv->super);
355381

356-
/* app size */
357-
kv = OBJ_NEW(opal_value_t);
358-
kv->key = strdup(OPAL_PMIX_APP_SIZE);
359-
kv->type = OPAL_UINT32;
360-
kv->data.uint32 = app->num_procs;
361-
opal_list_append(info, &kv->super);
382+
/* app size */
383+
kv = OBJ_NEW(opal_value_t);
384+
kv->key = strdup(OPAL_PMIX_APP_SIZE);
385+
kv->type = OPAL_UINT32;
386+
kv->data.uint32 = app->num_procs;
387+
opal_list_append(info, &kv->super);
362388

363-
/* local rank */
364-
kv = OBJ_NEW(opal_value_t);
365-
kv->key = strdup(OPAL_PMIX_LOCAL_RANK);
366-
kv->type = OPAL_UINT16;
367-
kv->data.uint16 = pptr->local_rank;
368-
opal_list_append(pmap, &kv->super);
389+
/* local rank */
390+
kv = OBJ_NEW(opal_value_t);
391+
kv->key = strdup(OPAL_PMIX_LOCAL_RANK);
392+
kv->type = OPAL_UINT16;
393+
kv->data.uint16 = pptr->local_rank;
394+
opal_list_append(pmap, &kv->super);
369395

370-
/* node rank */
371-
kv = OBJ_NEW(opal_value_t);
372-
kv->key = strdup(OPAL_PMIX_NODE_RANK);
373-
kv->type = OPAL_UINT16;
374-
kv->data.uint32 = pptr->node_rank;
375-
opal_list_append(pmap, &kv->super);
396+
/* node rank */
397+
kv = OBJ_NEW(opal_value_t);
398+
kv->key = strdup(OPAL_PMIX_NODE_RANK);
399+
kv->type = OPAL_UINT16;
400+
kv->data.uint32 = pptr->node_rank;
401+
opal_list_append(pmap, &kv->super);
376402

377-
/* hostname */
378-
kv = OBJ_NEW(opal_value_t);
379-
kv->key = strdup(OPAL_PMIX_HOSTNAME);
380-
kv->type = OPAL_STRING;
381-
kv->data.string = strdup(pptr->node->name);
382-
opal_list_append(pmap, &kv->super);
403+
/* hostname */
404+
kv = OBJ_NEW(opal_value_t);
405+
kv->key = strdup(OPAL_PMIX_HOSTNAME);
406+
kv->type = OPAL_STRING;
407+
kv->data.string = strdup(pptr->node->name);
408+
opal_list_append(pmap, &kv->super);
383409

384-
/* node ID */
385-
kv = OBJ_NEW(opal_value_t);
386-
kv->key = strdup(OPAL_PMIX_NODEID);
387-
kv->type = OPAL_UINT32;
388-
kv->data.uint32 = pptr->node->index;
389-
opal_list_append(pmap, &kv->super);
410+
/* node ID */
411+
kv = OBJ_NEW(opal_value_t);
412+
kv->key = strdup(OPAL_PMIX_NODEID);
413+
kv->type = OPAL_UINT32;
414+
kv->data.uint32 = pptr->node->index;
415+
opal_list_append(pmap, &kv->super);
416+
}
417+
/* cleanup */
418+
if (NULL != cpulist) {
419+
free(cpulist);
420+
}
421+
if (NULL != peerlist) {
422+
free(peerlist);
423+
}
390424
}
391425

392426
/* mark the job as registered */

orte/util/nidmap.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,8 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo)
280280
ORTE_ERROR_LOG(rc);
281281
return rc;
282282
}
283+
/* set the nodeid */
284+
node->index = vpid;
283285
/* do we already have this node? */
284286
nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, vpid);
285287
/* set the new node object into the array */

0 commit comments

Comments
 (0)