Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit fb71b10

Browse files
authored
Merge pull request #1349 from rhc54/cmr201/state
Ensure that the "running" state is correctly updated.
2 parents e83938e + 18dae97 commit fb71b10

File tree

1 file changed

+46
-3
lines changed

1 file changed

+46
-3
lines changed

orte/mca/state/orted/state_orted.c

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,10 @@ static void track_jobs(int fd, short argc, void *cbdata)
156156
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
157157
opal_buffer_t *alert;
158158
orte_plm_cmd_flag_t cmd;
159-
int rc;
159+
int rc, i;
160+
orte_proc_state_t running = ORTE_PROC_STATE_RUNNING;
161+
orte_proc_t *child;
162+
orte_vpid_t null=ORTE_VPID_INVALID;
160163

161164
if (ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE == caddy->job_state) {
162165
OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output,
@@ -172,12 +175,52 @@ static void track_jobs(int fd, short argc, void *cbdata)
172175
OBJ_RELEASE(alert);
173176
goto cleanup;
174177
}
175-
/* pack the job info */
176-
if (ORTE_SUCCESS != (rc = pack_state_update(alert, caddy->jdata))) {
178+
/* pack the jobid */
179+
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &caddy->jdata->jobid, 1, ORTE_JOBID))) {
177180
ORTE_ERROR_LOG(rc);
178181
OBJ_RELEASE(alert);
179182
goto cleanup;
180183
}
184+
for (i=0; i < orte_local_children->size; i++) {
185+
if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
186+
continue;
187+
}
188+
/* if this child is part of the job... */
189+
if (child->name.jobid == caddy->jdata->jobid) {
190+
/* pack the child's vpid */
191+
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &(child->name.vpid), 1, ORTE_VPID))) {
192+
ORTE_ERROR_LOG(rc);
193+
OBJ_RELEASE(alert);
194+
goto cleanup;
195+
}
196+
/* pack the pid */
197+
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &child->pid, 1, OPAL_PID))) {
198+
ORTE_ERROR_LOG(rc);
199+
OBJ_RELEASE(alert);
200+
goto cleanup;
201+
}
202+
/* pack the RUNNING state */
203+
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &running, 1, ORTE_PROC_STATE))) {
204+
ORTE_ERROR_LOG(rc);
205+
OBJ_RELEASE(alert);
206+
goto cleanup;
207+
}
208+
/* pack its exit code */
209+
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &child->exit_code, 1, ORTE_EXIT_CODE))) {
210+
ORTE_ERROR_LOG(rc);
211+
OBJ_RELEASE(alert);
212+
goto cleanup;
213+
}
214+
}
215+
}
216+
217+
/* flag that this job is complete so the receiver can know */
218+
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &null, 1, ORTE_VPID))) {
219+
ORTE_ERROR_LOG(rc);
220+
OBJ_RELEASE(alert);
221+
goto cleanup;
222+
}
223+
181224
/* send it */
182225
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
183226
ORTE_RML_TAG_PLM,

0 commit comments

Comments
 (0)