@@ -460,6 +460,7 @@ void orte_state_base_report_progress(int fd, short argc, void *cbdata)
460
460
}
461
461
462
462
static void _send_notification (int status ,
463
+ orte_proc_state_t state ,
463
464
orte_process_name_t * proc ,
464
465
orte_process_name_t * target )
465
466
{
@@ -485,19 +486,43 @@ static void _send_notification(int status,
485
486
return ;
486
487
}
487
488
488
- /* the source is me */
489
- if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , ORTE_PROC_MY_NAME , 1 , ORTE_NAME ))) {
489
+ /* the source is the proc */
490
+ if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , proc , 1 , ORTE_NAME ))) {
490
491
ORTE_ERROR_LOG (rc );
491
492
OBJ_RELEASE (buf );
492
493
return ;
493
494
}
494
495
495
- /* we are going to pass three opal_value_t's */
496
- rc = 3 ;
497
- if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , & rc , 1 , OPAL_INT ))) {
498
- ORTE_ERROR_LOG (rc );
499
- OBJ_RELEASE (buf );
500
- return ;
496
+ if (OPAL_ERR_PROC_ABORTED == status ) {
497
+ /* we will pass four opal_value_t's */
498
+ rc = 4 ;
499
+ if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , & rc , 1 , OPAL_INT ))) {
500
+ ORTE_ERROR_LOG (rc );
501
+ OBJ_RELEASE (buf );
502
+ return ;
503
+ }
504
+ /* pass along the affected proc(s) */
505
+ OBJ_CONSTRUCT (& kv , opal_value_t );
506
+ kv .key = strdup (OPAL_PMIX_EVENT_AFFECTED_PROC );
507
+ kv .type = OPAL_NAME ;
508
+ kv .data .name .jobid = proc -> jobid ;
509
+ kv .data .name .vpid = proc -> vpid ;
510
+ kvptr = & kv ;
511
+ if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , & kvptr , 1 , OPAL_VALUE ))) {
512
+ ORTE_ERROR_LOG (rc );
513
+ OBJ_DESTRUCT (& kv );
514
+ OBJ_RELEASE (buf );
515
+ return ;
516
+ }
517
+ OBJ_DESTRUCT (& kv );
518
+ } else {
519
+ /* we are going to pass three opal_value_t's */
520
+ rc = 3 ;
521
+ if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , & rc , 1 , OPAL_INT ))) {
522
+ ORTE_ERROR_LOG (rc );
523
+ OBJ_RELEASE (buf );
524
+ return ;
525
+ }
501
526
}
502
527
503
528
/* pass along the affected proc(s) */
@@ -699,19 +724,19 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata)
699
724
/* notify everyone who asked for it */
700
725
target .jobid = jdata -> jobid ;
701
726
target .vpid = ORTE_VPID_WILDCARD ;
702
- _send_notification (OPAL_ERR_JOB_TERMINATED , & target , ORTE_NAME_WILDCARD );
727
+ _send_notification (OPAL_ERR_JOB_TERMINATED , pdata -> state , & target , ORTE_NAME_WILDCARD );
703
728
} else {
704
729
target .jobid = jdata -> jobid ;
705
730
target .vpid = ORTE_VPID_WILDCARD ;
706
- _send_notification (OPAL_ERR_JOB_TERMINATED , & target , & parent );
731
+ _send_notification (OPAL_ERR_JOB_TERMINATED , pdata -> state , & target , & parent );
707
732
}
708
733
}
709
734
} else if (ORTE_PROC_STATE_TERMINATED < pdata -> state &&
710
735
!orte_job_term_ordered ) {
711
736
/* if this was an abnormal term, notify the other procs of the termination */
712
737
parent .jobid = jdata -> jobid ;
713
738
parent .vpid = ORTE_VPID_WILDCARD ;
714
- _send_notification (OPAL_ERR_PROC_ABORTED , & pdata -> name , & parent );
739
+ _send_notification (OPAL_ERR_PROC_ABORTED , pdata -> state , & pdata -> name , & parent );
715
740
}
716
741
}
717
742
0 commit comments