@@ -107,8 +107,8 @@ char err_buffer[MPI_MAX_ERROR_STRING];
107
107
MPI_Comm CAF_COMM_WORLD ;
108
108
109
109
/* Failed Images */
110
- MPI_Comm lock_comm ;
111
- MPI_Request lock_req ;
110
+ MPI_Comm lock_comm , stopped_comm ;
111
+ MPI_Request lock_req , stopped_req ;
112
112
int used_comm = -1 , n_failed_imgs = 0 , error_called = 0 ;
113
113
int * ranks_gc ,* ranks_gf ; //to be returned by failed images
114
114
MPI_Errhandler errh ,errh_w ;
@@ -493,6 +493,10 @@ PREFIX (init) (int *argc, char ***argv)
493
493
MPI_Comm_set_errhandler (lock_comm , errh );
494
494
MPI_Irecv (& tmp_lock ,1 ,MPI_INT ,MPI_ANY_SOURCE ,MPI_ANY_TAG ,lock_comm ,& lock_req );
495
495
496
+ MPI_Comm_dup (CAF_COMM_WORLD , & stopped_comm );
497
+ MPI_Comm_set_errhandler (stopped_comm , errh );
498
+ MPI_Irecv (& tmp_lock ,1 ,MPI_INT ,MPI_ANY_SOURCE ,MPI_ANY_TAG ,lock_comm ,& stopped_req );
499
+
496
500
MPI_Win_create_errhandler (verbose_win_errhandler , & errh_w );
497
501
498
502
ranks_gf = (int * )malloc (caf_num_images * sizeof (int ));
@@ -525,15 +529,14 @@ _gfortran_caf_finalize (void)
525
529
PREFIX (finalize ) (void )
526
530
#endif
527
531
{
532
+ int flag = 0 ;
528
533
* img_status = STAT_STOPPED_IMAGE ; /* GFC_STAT_STOPPED_IMAGE = 6000 */
529
534
MPI_Win_sync (* stat_tok );
530
535
531
536
completed = 1 ;
537
+ MPIX_Comm_revoke (CAF_COMM_WORLD );
538
+ MPI_Barrier (stopped_comm );
532
539
533
- MPI_Cancel (& lock_req );
534
- MPI_Request_free (& lock_req );
535
- MPI_Barrier (CAF_COMM_WORLD );
536
-
537
540
while (caf_static_list != NULL )
538
541
{
539
542
caf_static_t * tmp = caf_static_list -> prev ;
@@ -600,7 +603,7 @@ int communicator_shrink(MPI_Comm *comm)
600
603
MPIX_Comm_shrink (* comm , & shrunk );
601
604
MPI_Comm_set_errhandler ( shrunk , errh );
602
605
MPI_Comm_size (shrunk , & ns ); MPI_Comm_rank (shrunk , & srank );
603
-
606
+
604
607
MPI_Comm_rank (* comm , & crank );
605
608
606
609
/* Split does the magic: removing spare processes and reordering ranks
@@ -611,6 +614,7 @@ int communicator_shrink(MPI_Comm *comm)
611
614
* new failures have disrupted the process: we need to
612
615
* make sure we succeeded at all ranks, or retry until it works. */
613
616
flag = MPIX_Comm_agree (shrunk , & flag );
617
+
614
618
MPI_Comm_free (& shrunk );
615
619
if ( MPI_SUCCESS != flag ) {
616
620
if ( MPI_SUCCESS == rc ) MPI_Comm_free (* newcomm );
@@ -633,7 +637,7 @@ void *
633
637
/* int ierr; */
634
638
void * mem ;
635
639
size_t actual_size ;
636
- int l_var = 0 , * init_array = NULL ,ierr = 0 ;
640
+ int l_var = 0 , * init_array = NULL ,ierr = 0 , flag = 0 ;
637
641
638
642
if (unlikely (caf_is_finalized ))
639
643
goto error ;
@@ -662,6 +666,8 @@ void *
662
666
else
663
667
actual_size = size ;
664
668
669
+ MPI_Test (& lock_req ,& flag ,MPI_STATUS_IGNORE );
670
+
665
671
if (error_called == 1 )
666
672
{
667
673
communicator_shrink (& CAF_COMM_WORLD );
0 commit comments