Skip to content

Commit 17699e5

Browse files
author
Alessandro Fanfarillo
committed
Partial fault tolerant support for locks
1 parent d6640d9 commit 17699e5

File tree

1 file changed

+38
-21
lines changed

1 file changed

+38
-21
lines changed

src/mpi/mpi_caf.c

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ void mutex_lock(MPI_Win win, int image_index, int index, int *stat,
268268
{
269269
const char msg[] = "Already locked";
270270
#if MPI_VERSION >= 3
271-
int value=1, compare = 0, newval = caf_this_image, i = 1;
271+
int value=1, compare = 0, newval = caf_this_image, i = 1,zero=0;
272272

273273
if(stat != NULL)
274274
*stat = 0;
@@ -290,8 +290,26 @@ void mutex_lock(MPI_Win win, int image_index, int index, int *stat,
290290
while(value != 0)
291291
{
292292
locking_atomic_op(win, &value, newval, compare, image_index, index);
293-
usleep(caf_this_image*i);
294-
i++;
293+
printf("n_failed_images: %d\n",n_failed_imgs);
294+
for(i=0;i<n_failed_imgs;i++)
295+
{
296+
printf("value: %d\n",value);
297+
if(ranks_gc[i] == value)
298+
{
299+
# ifdef CAF_MPI_LOCK_UNLOCK
300+
MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image_index-1, 0, win);
301+
# endif // CAF_MPI_LOCK_UNLOCK
302+
MPI_Fetch_and_op(&zero, &value, MPI_INT, image_index-1, index*sizeof(int), MPI_REPLACE, win);
303+
# ifdef CAF_MPI_LOCK_UNLOCK
304+
MPI_Win_unlock (image_index-1, win);
305+
# else // CAF_MPI_LOCK_UNLOCK
306+
MPI_Win_flush (image_index-1, win);
307+
# endif // CAF_MPI_LOCK_UNLOCK
308+
break;
309+
}
310+
}
311+
// usleep(caf_this_image*i);
312+
//i++;
295313
}
296314

297315
return;
@@ -457,7 +475,6 @@ PREFIX (init) (int *argc, char ***argv)
457475
/* MPI_Barrier(CAF_COMM_WORLD); */
458476
}
459477

460-
461478
/* Finalize coarray program. */
462479

463480
void
@@ -467,12 +484,13 @@ _gfortran_caf_finalize (void)
467484
PREFIX (finalize) (void)
468485
#endif
469486
{
470-
completed = 1;
471487
*img_status = STAT_STOPPED_IMAGE; /* GFC_STAT_STOPPED_IMAGE = 6000 */
472488
MPI_Win_sync(*stat_tok);
473489

474-
MPI_Barrier(CAF_COMM_WORLD);
490+
completed = 1;
475491

492+
MPI_Barrier(CAF_COMM_WORLD);
493+
476494
while (caf_static_list != NULL)
477495
{
478496
caf_static_t *tmp = caf_static_list->prev;
@@ -552,21 +570,21 @@ static int communicator_shrink()
552570

553571
/* /\* the rank 0 in the shrinked comm is going to determine the */
554572
/* * ranks at which the spares need to be inserted. *\/ */
555-
/* if(0 == srank) { */
556-
/* /\* getting the group of dead processes: */
557-
/* * those in comm, but not in shrinked are the deads *\/ */
558-
/* MPI_Comm_group(CAF_COMM_WORLD, &cgrp); MPI_Comm_group(shrunk, &sgrp); */
559-
/* MPI_Group_difference(cgrp, sgrp, &dgrp); MPI_Group_size(dgrp, &nd); */
560-
/* /\* Computing the rank assignment for the newly inserted spares *\/ */
561-
/* for(i=0; i<ns-(nc-nd); i++) { */
562-
/* if( i < nd ) MPI_Group_translate_ranks(dgrp, 1, &i, cgrp, &drank); */
563-
/* else drank=-1; /\* still a spare *\/ */
564-
/* /\* sending their new assignment to all spares *\/ */
565-
/* MPI_Send(&drank, 1, MPI_INT, i+nc-nd, 1, shrunk); */
566-
/* } */
567-
/* MPI_Group_free(&cgrp); MPI_Group_free(&sgrp); MPI_Group_free(&dgrp); */
573+
/* if(0 == srank) { */
574+
/* /\* getting the group of dead processes: */
575+
/* * those in comm, but not in shrinked are the deads *\/ */
576+
/* MPI_Comm_group(CAF_COMM_WORLD, &cgrp); MPI_Comm_group(shrunk, &sgrp); */
577+
/* MPI_Group_difference(cgrp, sgrp, &dgrp); MPI_Group_size(dgrp, &nd); */
578+
/* /\* Computing the rank assignment for the newly inserted spares *\/ */
579+
/* for(i=0; i<ns-(nc-nd); i++) { */
580+
/* if( i < nd ) MPI_Group_translate_ranks(dgrp, 1, &i, cgrp, &drank); */
581+
/* else drank=-1; /\* still a spare *\/ */
582+
/* /\* sending their new assignment to all spares *\/ */
583+
/* MPI_Send(&drank, 1, MPI_INT, i+nc-nd, 1, shrunk); */
568584
/* } */
569-
/* } else { /\* I was a spare, waiting for my new assignment *\/ */
585+
/* MPI_Group_free(&cgrp); MPI_Group_free(&sgrp); MPI_Group_free(&dgrp); */
586+
/* } */
587+
/* else { /\* I was a spare, waiting for my new assignment *\/ */
570588
/* MPI_Recv(&crank, 1, MPI_INT, 0, 1, shrunk, MPI_STATUS_IGNORE); */
571589
/* } */
572590

@@ -587,7 +605,6 @@ static int communicator_shrink()
587605
return MPI_SUCCESS;
588606
}
589607

590-
591608
#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
592609
void *
593610
_gfortran_caf_register (size_t size, caf_register_t type, caf_token_t *token,

0 commit comments

Comments
 (0)