Skip to content

Commit d66b185

Browse files
author
Alessandro Fanfarillo
committed
Draft stopped images
1 parent 4fb6da1 commit d66b185

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

src/mpi/mpi_caf.c

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ int used_comm = -1, n_failed_imgs=0, error_called=0;
113113
int *ranks_gc,*ranks_gf; //to be returned by failed images
114114
MPI_Errhandler errh,errh_w;
115115
int completed = 0,tmp_lock;
116+
int *stopped_images;
116117

117118
static int cmpfunc (const void *a, const void *b)
118119
{
@@ -501,7 +502,8 @@ PREFIX (init) (int *argc, char ***argv)
501502

502503
ranks_gf = (int*)malloc(caf_num_images * sizeof(int));
503504
ranks_gc = (int*)malloc(caf_num_images * sizeof(int));
504-
505+
stopped_images = (int*)calloc(caf_num_images, sizeof(int));
506+
505507
#if MPI_VERSION >= 3
506508
MPI_Info_create (&mpi_info_same_size);
507509
MPI_Info_set (mpi_info_same_size, "same_size", "true");
@@ -533,7 +535,10 @@ PREFIX (finalize) (void)
533535
*img_status = STAT_STOPPED_IMAGE; /* GFC_STAT_STOPPED_IMAGE = 6000 */
534536
MPI_Win_sync(*stat_tok);
535537

536-
completed = 1;
538+
MPIX_Comm_revoke(CAF_COMM_WORLD);
539+
communicator_shrink(&CAF_COMM_WORLD);
540+
541+
MPI_Barrier(stopped_comm);
537542

538543
while (caf_static_list != NULL)
539544
{
@@ -561,24 +566,16 @@ PREFIX (finalize) (void)
561566
MPI_Info_free (&mpi_info_same_size);
562567
#endif // MPI_VERSION
563568

564-
//MPI_Comm_free(&CAF_COMM_WORLD);
565-
566-
printf("Before revoke\n");
569+
/* MPI_Comm_free(&CAF_COMM_WORLD); */
567570

568-
MPIX_Comm_revoke(CAF_COMM_WORLD);
569-
printf("After revoke\n");
570-
MPI_Test(&stopped_req,&flag,MPI_STATUS_IGNORE);
571-
communicator_shrink(&stopped_comm);
572-
MPI_Barrier(stopped_comm);
573-
printf("After barrier\n");
574571
/* Only call Finalize if CAF runtime Initialized MPI. */
575572
if (caf_owns_mpi) {
576573
MPI_Finalize();
577574
}
578575
pthread_mutex_lock(&lock_am);
579576
caf_is_finalized = 1;
580577
pthread_mutex_unlock(&lock_am);
581-
printf("finalizing\n");
578+
/* printf("finalizing\n"); */
582579
exit(0);
583580
}
584581

@@ -614,6 +611,8 @@ int communicator_shrink(MPI_Comm *comm)
614611

615612
/* Split does the magic: removing spare processes and reordering ranks
616613
* so that all surviving processes remain at their former place */
614+
if (*img_status == STAT_STOPPED_IMAGE)
615+
crank = -1;
617616
rc = MPI_Comm_split(shrunk, crank<0?MPI_UNDEFINED:1, crank, newcomm);
618617

619618
/* Split or some of the communications above may have failed if
@@ -708,10 +707,11 @@ void *
708707
MPI_Win_flush(caf_this_image-1, *p);
709708
# endif // CAF_MPI_LOCK_UNLOCK
710709
free(init_array);
711-
MPI_Barrier(CAF_COMM_WORLD);
712710
/* PREFIX(sync_all) (NULL,NULL,0); */
713711
}
714712

713+
MPI_Barrier(CAF_COMM_WORLD);
714+
715715
if(error_called == 1)
716716
{
717717
communicator_shrink(&CAF_COMM_WORLD);

0 commit comments

Comments
 (0)