|
70 | 70 | #include "orte/runtime/orte_quit.h" |
71 | 71 | #include "orte/util/name_fns.h" |
72 | 72 | #include "orte/util/nidmap.h" |
| 73 | +#include "orte/util/pre_condition_transports.h" |
73 | 74 | #include "orte/util/proc_info.h" |
74 | 75 | #include "orte/util/regex.h" |
75 | 76 | #include "orte/mca/state/state.h" |
@@ -272,6 +273,9 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata) |
272 | 273 | int i; |
273 | 274 | orte_app_context_t *app; |
274 | 275 | orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; |
| 276 | + char *key; |
| 277 | + orte_job_t *parent; |
| 278 | + orte_process_name_t name, *nptr; |
275 | 279 |
|
276 | 280 | OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, |
277 | 281 | "%s plm:base:setup_job", |
@@ -308,6 +312,50 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata) |
308 | 312 | ORTE_FLAG_SET(caddy->jdata, ORTE_JOB_FLAG_RECOVERABLE); |
309 | 313 | } |
310 | 314 |
|
| 315 | + /* setup transport keys in case the MPI layer needs them. If |
| 316 | + * this is a dynamic spawn, then use the same keys as the |
| 317 | + * parent process had so the new/old procs can communicate. |
| 318 | + * Otherwise we can use the jobfam and stepid as unique keys |
| 319 | + * because they are unique values assigned by the RM |
| 320 | + */ |
| 321 | + nptr = &name; |
| 322 | + if (orte_get_attribute(&caddy->jdata->attributes, ORTE_JOB_LAUNCH_PROXY, (void**)&nptr, OPAL_NAME)) { |
| 323 | + /* get the parent jdata */ |
| 324 | + if (NULL == (parent = orte_get_job_data_object(name.jobid))) { |
| 325 | + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); |
| 326 | + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); |
| 327 | + OBJ_RELEASE(caddy); |
| 328 | + return; |
| 329 | + } |
| 330 | + key = NULL; |
| 331 | + if (!orte_get_attribute(&parent->attributes, ORTE_JOB_TRANSPORT_KEY, (void**)&key, OPAL_STRING) || |
| 332 | + NULL == key) { |
| 333 | + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); |
| 334 | + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); |
| 335 | + OBJ_RELEASE(caddy); |
| 336 | + return; |
| 337 | + } |
| 338 | + /* record it */ |
| 339 | + orte_set_attribute(&caddy->jdata->attributes, ORTE_JOB_TRANSPORT_KEY, ORTE_ATTR_LOCAL, key, OPAL_STRING); |
| 340 | + /* add the transport key envar to each app */ |
| 341 | + for (i=0; i < caddy->jdata->apps->size; i++) { |
| 342 | + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(caddy->jdata->apps, i))) { |
| 343 | + continue; |
| 344 | + } |
| 345 | + opal_setenv(OPAL_MCA_PREFIX"orte_precondition_transports", key, true, &app->env); |
| 346 | + } |
| 347 | + free(key); |
| 348 | + } else { |
| 349 | + /* this will also record the transport key attribute in the job object, and |
| 350 | + * adds the key envar to each app */ |
| 351 | + if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata))) { |
| 352 | + ORTE_ERROR_LOG(rc); |
| 353 | + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); |
| 354 | + OBJ_RELEASE(caddy); |
| 355 | + return; |
| 356 | + } |
| 357 | + } |
| 358 | + |
311 | 359 | /* if app recovery is not defined, set apps to defaults */ |
312 | 360 | for (i=0; i < caddy->jdata->apps->size; i++) { |
313 | 361 | if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(caddy->jdata->apps, i))) { |
|
0 commit comments