@@ -109,9 +109,10 @@ MPI_Comm CAF_COMM_WORLD;
109
109
/* Failed Images */
110
110
MPI_Comm lock_comm ,stopped_comm ;
111
111
MPI_Request lock_req ,stopped_req ;
112
- int used_comm = -1 , n_failed_imgs = 0 , error_called = 0 ;
112
+ int used_comm = -1 , n_failed_imgs = 0 ;
113
+ int error_called = 0 , fake_error_called = 0 ;
113
114
int * ranks_gc ,* ranks_gf ; //to be returned by failed images
114
- MPI_Errhandler errh ,errh_w ;
115
+ MPI_Errhandler errh ,errh_w , errh_fake ;
115
116
int completed = 0 ,tmp_lock ;
116
117
int * stopped_images ;
117
118
@@ -280,6 +281,7 @@ void mutex_lock(MPI_Win win, int image_index, int index, int *stat,
280
281
{
281
282
MPIX_Comm_revoke (CAF_COMM_WORLD );
282
283
communicator_shrink (& CAF_COMM_WORLD );
284
+ communicator_shrink (& lock_comm );
283
285
error_called = 0 ;
284
286
}
285
287
@@ -311,6 +313,7 @@ void mutex_lock(MPI_Win win, int image_index, int index, int *stat,
311
313
{
312
314
MPIX_Comm_revoke (CAF_COMM_WORLD );
313
315
communicator_shrink (& CAF_COMM_WORLD );
316
+ communicator_shrink (& lock_comm );
314
317
error_called = 0 ;
315
318
ierr = STAT_FAILED_IMAGE ;
316
319
}
@@ -324,7 +327,8 @@ void mutex_lock(MPI_Win win, int image_index, int index, int *stat,
324
327
# ifdef CAF_MPI_LOCK_UNLOCK
325
328
MPI_Win_lock (MPI_LOCK_EXCLUSIVE , image_index - 1 , 0 , win );
326
329
# endif // CAF_MPI_LOCK_UNLOCK
327
- MPI_Fetch_and_op (& zero , & value , MPI_INT , image_index - 1 , index * sizeof (int ), MPI_REPLACE , win );
330
+ /* MPI_Fetch_and_op(&zero, &newval, MPI_INT, image_index-1, index*sizeof(int), MPI_REPLACE, win); */
331
+ MPI_Compare_and_swap (& zero ,& value ,& newval ,MPI_INT ,image_index - 1 ,index * sizeof (int ), win );
328
332
# ifdef CAF_MPI_LOCK_UNLOCK
329
333
MPI_Win_unlock (image_index - 1 , win );
330
334
# else // CAF_MPI_LOCK_UNLOCK
@@ -373,6 +377,7 @@ void mutex_unlock(MPI_Win win, int image_index, int index, int *stat,
373
377
{
374
378
MPIX_Comm_revoke (CAF_COMM_WORLD );
375
379
communicator_shrink (& CAF_COMM_WORLD );
380
+ communicator_shrink (& lock_comm );
376
381
error_called = 0 ;
377
382
ierr = STAT_FAILED_IMAGE ;
378
383
}
@@ -387,8 +392,9 @@ void mutex_unlock(MPI_Win win, int image_index, int index, int *stat,
387
392
MPI_Win_flush (image_index - 1 , win );
388
393
# endif // CAF_MPI_LOCK_UNLOCK
389
394
390
- if (value == 0 )
391
- goto stat_error ;
395
+ /* Temporarily commented */
396
+ /* if(value == 0) */
397
+ /* goto stat_error; */
392
398
393
399
if (stat )
394
400
* stat = ierr ;
@@ -483,15 +489,17 @@ PREFIX (init) (int *argc, char ***argv)
483
489
stat_tok = malloc (sizeof (MPI_Win ));
484
490
485
491
MPI_Comm_create_errhandler (verbose_comm_errhandler , & errh );
492
+ /* MPI_Comm_create_errhandler(fake_comm_errhandler, &errh_fake); */
486
493
MPI_Comm_set_errhandler (CAF_COMM_WORLD , errh );
487
494
488
495
MPI_Comm_dup (CAF_COMM_WORLD , & lock_comm );
496
+ /* MPI_Comm_set_errhandler(lock_comm, errh_fake); */
489
497
MPI_Comm_set_errhandler (lock_comm , errh );
490
498
MPI_Irecv (& tmp_lock ,1 ,MPI_INT ,MPI_ANY_SOURCE ,MPI_ANY_TAG ,lock_comm ,& lock_req );
491
499
492
500
MPI_Comm_dup (CAF_COMM_WORLD , & stopped_comm );
493
501
MPI_Comm_set_errhandler (stopped_comm , errh );
494
- MPI_Irecv (& tmp_lock ,1 ,MPI_INT ,MPI_ANY_SOURCE ,MPI_ANY_TAG ,lock_comm ,& stopped_req );
502
+ MPI_Irecv (& tmp_lock ,1 ,MPI_INT ,MPI_ANY_SOURCE ,MPI_ANY_TAG ,stopped_comm ,& stopped_req );
495
503
496
504
MPI_Win_create_errhandler (verbose_win_errhandler , & errh_w );
497
505
@@ -1218,7 +1226,9 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
1218
1226
if (error_called == 1 )
1219
1227
{
1220
1228
communicator_shrink (& CAF_COMM_WORLD );
1229
+ communicator_shrink (& lock_comm );
1221
1230
error_called = 0 ;
1231
+ fake_error_called = 0 ;
1222
1232
ierr = STAT_FAILED_IMAGE ;
1223
1233
}
1224
1234
0 commit comments