Skip to content

Commit f4d3ce2

Browse files
author
Alessandro Fanfarillo
committed
Fixed bug in locking
1 parent dc64a97 commit f4d3ce2

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

src/mpi/mpi_caf.c

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,10 @@ MPI_Comm CAF_COMM_WORLD;
109109
/* Failed Images */
110110
MPI_Comm lock_comm,stopped_comm;
111111
MPI_Request lock_req,stopped_req;
112-
int used_comm = -1, n_failed_imgs=0, error_called=0;
112+
int used_comm = -1, n_failed_imgs=0;
113+
int error_called = 0, fake_error_called = 0;
113114
int *ranks_gc,*ranks_gf; //to be returned by failed images
114-
MPI_Errhandler errh,errh_w;
115+
MPI_Errhandler errh,errh_w,errh_fake;
115116
int completed = 0,tmp_lock;
116117
int *stopped_images;
117118

@@ -280,6 +281,7 @@ void mutex_lock(MPI_Win win, int image_index, int index, int *stat,
280281
{
281282
MPIX_Comm_revoke(CAF_COMM_WORLD);
282283
communicator_shrink(&CAF_COMM_WORLD);
284+
communicator_shrink(&lock_comm);
283285
error_called = 0;
284286
}
285287

@@ -311,6 +313,7 @@ void mutex_lock(MPI_Win win, int image_index, int index, int *stat,
311313
{
312314
MPIX_Comm_revoke(CAF_COMM_WORLD);
313315
communicator_shrink(&CAF_COMM_WORLD);
316+
communicator_shrink(&lock_comm);
314317
error_called = 0;
315318
ierr = STAT_FAILED_IMAGE;
316319
}
@@ -324,7 +327,8 @@ void mutex_lock(MPI_Win win, int image_index, int index, int *stat,
324327
# ifdef CAF_MPI_LOCK_UNLOCK
325328
MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image_index-1, 0, win);
326329
# endif // CAF_MPI_LOCK_UNLOCK
327-
MPI_Fetch_and_op(&zero, &value, MPI_INT, image_index-1, index*sizeof(int), MPI_REPLACE, win);
330+
/* MPI_Fetch_and_op(&zero, &newval, MPI_INT, image_index-1, index*sizeof(int), MPI_REPLACE, win); */
331+
MPI_Compare_and_swap(&zero,&value,&newval,MPI_INT,image_index-1,index*sizeof(int), win);
328332
# ifdef CAF_MPI_LOCK_UNLOCK
329333
MPI_Win_unlock (image_index-1, win);
330334
# else // CAF_MPI_LOCK_UNLOCK
@@ -373,6 +377,7 @@ void mutex_unlock(MPI_Win win, int image_index, int index, int *stat,
373377
{
374378
MPIX_Comm_revoke(CAF_COMM_WORLD);
375379
communicator_shrink(&CAF_COMM_WORLD);
380+
communicator_shrink(&lock_comm);
376381
error_called = 0;
377382
ierr = STAT_FAILED_IMAGE;
378383
}
@@ -387,8 +392,9 @@ void mutex_unlock(MPI_Win win, int image_index, int index, int *stat,
387392
MPI_Win_flush (image_index-1, win);
388393
# endif // CAF_MPI_LOCK_UNLOCK
389394

390-
if(value == 0)
391-
goto stat_error;
395+
/* Temporarily commented */
396+
/* if(value == 0) */
397+
/* goto stat_error; */
392398

393399
if(stat)
394400
*stat = ierr;
@@ -483,15 +489,17 @@ PREFIX (init) (int *argc, char ***argv)
483489
stat_tok = malloc (sizeof(MPI_Win));
484490

485491
MPI_Comm_create_errhandler(verbose_comm_errhandler, &errh);
492+
/* MPI_Comm_create_errhandler(fake_comm_errhandler, &errh_fake); */
486493
MPI_Comm_set_errhandler(CAF_COMM_WORLD, errh);
487494

488495
MPI_Comm_dup(CAF_COMM_WORLD, &lock_comm);
496+
/* MPI_Comm_set_errhandler(lock_comm, errh_fake); */
489497
MPI_Comm_set_errhandler(lock_comm, errh);
490498
MPI_Irecv(&tmp_lock,1,MPI_INT,MPI_ANY_SOURCE,MPI_ANY_TAG,lock_comm,&lock_req);
491499

492500
MPI_Comm_dup(CAF_COMM_WORLD, &stopped_comm);
493501
MPI_Comm_set_errhandler(stopped_comm, errh);
494-
MPI_Irecv(&tmp_lock,1,MPI_INT,MPI_ANY_SOURCE,MPI_ANY_TAG,lock_comm,&stopped_req);
502+
MPI_Irecv(&tmp_lock,1,MPI_INT,MPI_ANY_SOURCE,MPI_ANY_TAG,stopped_comm,&stopped_req);
495503

496504
MPI_Win_create_errhandler(verbose_win_errhandler, &errh_w);
497505

@@ -1218,7 +1226,9 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
12181226
if(error_called == 1)
12191227
{
12201228
communicator_shrink(&CAF_COMM_WORLD);
1229+
communicator_shrink(&lock_comm);
12211230
error_called = 0;
1231+
fake_error_called = 0;
12221232
ierr = STAT_FAILED_IMAGE;
12231233
}
12241234

0 commit comments

Comments
 (0)