@@ -252,7 +252,7 @@ static void track_procs(int fd, short argc, void *cbdata)
252
252
orte_job_t * jdata ;
253
253
orte_proc_t * pdata , * pptr ;
254
254
opal_buffer_t * alert ;
255
- int rc , i ;
255
+ int rc , i , j ;
256
256
orte_plm_cmd_flag_t cmd ;
257
257
char * rtmod ;
258
258
@@ -416,6 +416,31 @@ static void track_procs(int fd, short argc, void *cbdata)
416
416
}
417
417
/* mark that we sent it so we ensure we don't do it again */
418
418
orte_set_attribute (& jdata -> attributes , ORTE_JOB_TERM_NOTIFIED , ORTE_ATTR_LOCAL , NULL , OPAL_BOOL );
419
+ /* cleanup the procs as these are gone */
420
+ for (i = 0 ; i < orte_local_children -> size ; i ++ ) {
421
+ if (NULL == (pptr = (orte_proc_t * )opal_pointer_array_get_item (orte_local_children , i ))) {
422
+ continue ;
423
+ }
424
+ /* if this child is part of the job... */
425
+ if (pptr -> name .jobid == jdata -> jobid ) {
426
+ /* clear the entry in the local children */
427
+ opal_pointer_array_set_item (orte_local_children , i , NULL );
428
+ /* find it in the node->procs array */
429
+ for (j = 0 ; j < pptr -> node -> procs -> size ; j ++ ) {
430
+ if (NULL == (pdata = (orte_proc_t * )opal_pointer_array_get_item (pptr -> node -> procs , j ))) {
431
+ continue ;
432
+ }
433
+ if (pdata == pptr ) {
434
+ /* remove it */
435
+ opal_pointer_array_set_item (pptr -> node -> procs , j , NULL );
436
+ OBJ_RELEASE (pdata ); // maintain accounting
437
+ break ;
438
+ }
439
+ }
440
+ OBJ_RELEASE (pptr ); // maintain accounting
441
+ }
442
+ }
443
+
419
444
}
420
445
}
421
446
0 commit comments