@@ -87,7 +87,7 @@ caf_static_t *caf_tot = NULL;
87
87
88
88
/* Image status variable */
89
89
90
- static int * img_status = NULL ;
90
+ static int img_status = 0 ;
91
91
MPI_Win * stat_tok ;
92
92
93
93
/* Active messages variables */
@@ -114,7 +114,7 @@ int error_called = 0, fake_error_called = 0;
114
114
int * ranks_gc ,* ranks_gf , * failed_images_array ; //to be returned by failed images
115
115
MPI_Errhandler errh ,errh_w ,errh_fake ;
116
116
int completed = 0 ,tmp_lock ;
117
- int * stopped_images , n_stopped_imgs ;
117
+ int * stopped_imgs , n_stopped_imgs ;
118
118
119
119
static int cmpfunc (const void * a , const void * b )
120
120
{
@@ -511,27 +511,30 @@ PREFIX (init) (int *argc, char ***argv)
511
511
MPI_Comm_dup (CAF_COMM_WORLD , & stopped_comm );
512
512
MPI_Comm_set_errhandler (stopped_comm , errh );
513
513
MPI_Irecv (& tmp_lock ,1 ,MPI_INT ,MPI_ANY_SOURCE ,MPI_ANY_TAG ,stopped_comm ,& stopped_req );
514
-
514
+
515
515
MPI_Win_create_errhandler (verbose_win_errhandler , & errh_w );
516
516
517
517
ranks_gf = (int * )calloc (caf_num_images ,sizeof (int ));
518
518
ranks_gc = (int * )calloc (caf_num_images ,sizeof (int ));
519
519
failed_images_array = (int * )calloc (caf_num_images ,sizeof (int ));
520
- stopped_images = (int * )calloc (caf_num_images , sizeof (int ));
521
-
520
+ /* stopped_imgs = (int*)calloc(caf_num_images, sizeof(int)); */
521
+
522
522
#if MPI_VERSION >= 3
523
- MPI_Info_create (& mpi_info_same_size );
524
- MPI_Info_set (mpi_info_same_size , "same_size" , "true" );
523
+ /* MPI_Info_create (&mpi_info_same_size); */
524
+ /* MPI_Info_set (mpi_info_same_size, "same_size", "true"); */
525
525
/* Setting img_status */
526
- MPI_Win_allocate (sizeof (int ), 1 , mpi_info_same_size , stopped_comm , & img_status , stat_tok );
526
+ /* MPI_Win_allocate(sizeof(int), 1, mpi_info_same_size, stopped_comm, &img_status, stat_tok); */
527
+ MPI_Win_allocate (sizeof (int )* caf_num_images , 1 , MPI_INFO_NULL , stopped_comm , & stopped_imgs , stat_tok );
527
528
# ifndef CAF_MPI_LOCK_UNLOCK
528
529
MPI_Win_lock_all (MPI_MODE_NOCHECK , * stat_tok );
529
530
# endif // CAF_MPI_LOCK_UNLOCK
530
531
#else
531
- MPI_Alloc_mem (sizeof (int ), MPI_INFO_NULL , & img_status , stat_tok );
532
- MPI_Win_create (img_status , sizeof (int ), 1 , MPI_INFO_NULL , stopped_comm , stat_tok );
532
+ MPI_Alloc_mem (sizeof (int )* caf_num_images , MPI_INFO_NULL , & stopped_imgs , stat_tok );
533
+ MPI_Win_create (stopped_imgs , sizeof (int )* caf_num_images , 1 , MPI_INFO_NULL , stopped_comm , stat_tok );
533
534
#endif // MPI_VERSION
534
- * img_status = 0 ;
535
+ for (i = 0 ;i < caf_num_images ;i ++ )
536
+ stopped_imgs [i ] = 0 ;
537
+ /* *img_status = 0; */
535
538
MPI_Win_set_errhandler (* stat_tok ,errh_w );
536
539
}
537
540
MPI_Barrier (CAF_COMM_WORLD );
@@ -546,9 +549,27 @@ _gfortran_caf_finalize (void)
546
549
PREFIX (finalize ) (void )
547
550
#endif
548
551
{
549
- int flag = 0 ;
550
- * img_status = STAT_STOPPED_IMAGE ; /* GFC_STAT_STOPPED_IMAGE = 6000 */
551
- MPI_Win_sync (* stat_tok );
552
+ int flag = 0 ,i = 0 ,j = 0 ,failed = 0 ,one = 1 ;
553
+ img_status = STAT_STOPPED_IMAGE ; /* GFC_STAT_STOPPED_IMAGE = 6000 */
554
+ stopped_imgs [caf_this_image - 1 ] = 1 ;
555
+ /* MPI_Win_sync(*stat_tok); */
556
+
557
+ for (i = 0 ;i < caf_num_images ;i ++ )
558
+ {
559
+ for (j = 0 ; j < n_failed_imgs ; j ++ )
560
+ if (i == (failed_images_array [j ] - 1 ))
561
+ {
562
+ failed = 1 ;
563
+ break ;
564
+ }
565
+ if (!failed )
566
+ {
567
+ MPI_Accumulate (& one , 1 , MPI_INT , i , (caf_this_image - 1 )* sizeof (int ), 1 , MPI_INT , MPI_REPLACE , * stat_tok );
568
+ }
569
+ failed = 0 ;
570
+ }
571
+
572
+ MPI_Win_flush_all (* stat_tok );
552
573
553
574
MPIX_Comm_revoke (CAF_COMM_WORLD );
554
575
communicator_shrink (& CAF_COMM_WORLD );
@@ -578,7 +599,7 @@ PREFIX (finalize) (void)
578
599
tmp_tot = prev ;
579
600
}
580
601
#if MPI_VERSION >= 3
581
- MPI_Info_free (& mpi_info_same_size );
602
+ /* MPI_Info_free (&mpi_info_same_size); */
582
603
#endif // MPI_VERSION
583
604
584
605
/* MPI_Comm_free(&CAF_COMM_WORLD); */
@@ -625,7 +646,7 @@ int communicator_shrink(MPI_Comm *comm)
625
646
MPI_Comm_rank (MPI_COMM_WORLD , & crank );
626
647
/* Split does the magic: removing spare processes and reordering ranks
627
648
* so that all surviving processes remain at their former place */
628
- if (* img_status == STAT_STOPPED_IMAGE )
649
+ if (img_status == STAT_STOPPED_IMAGE )
629
650
crank = -1 ;
630
651
rc = MPI_Comm_split (shrunk , crank < 0 ?MPI_UNDEFINED :1 , crank , newcomm );
631
652
flag = (rc == MPI_SUCCESS );
@@ -2819,7 +2840,7 @@ PREFIX (fail_image) (void)
2819
2840
int
2820
2841
PREFIX (image_status ) (int image )
2821
2842
{
2822
- int i ,res = 0 , remote_stat = 0 , ierr ;
2843
+ int i ,res = 0 , ierr ;
2823
2844
2824
2845
for (i = 0 ;i < n_failed_imgs ;i ++ )
2825
2846
if (image == failed_images_array [i ])
@@ -2828,16 +2849,16 @@ PREFIX (image_status) (int image)
2828
2849
if (res == STAT_FAILED_IMAGE )
2829
2850
return res ;
2830
2851
2831
- # ifdef CAF_MPI_LOCK_UNLOCK
2832
- MPI_Win_lock (MPI_LOCK_SHARED , image - 1 , 0 , * stat_tok );
2833
- # endif // CAF_MPI_LOCK_UNLOCK
2834
- ierr = MPI_Get (& remote_stat , 1 , MPI_INT , image - 1 , 0 , 1 , MPI_INT , * stat_tok );
2835
- # ifdef CAF_MPI_LOCK_UNLOCK
2836
- MPI_Win_unlock (image - 1 , * stat_tok );
2837
- # else // CAF_MPI_LOCK_UNLOCK
2838
- MPI_Win_flush (image - 1 , * stat_tok );
2839
- # endif // CAF_MPI_LOCK_UNLOCK
2840
- if (remote_stat != 0 )
2852
+ /* # ifdef CAF_MPI_LOCK_UNLOCK */
2853
+ /* MPI_Win_lock (MPI_LOCK_SHARED, image-1, 0, *stat_tok); */
2854
+ /* # endif // CAF_MPI_LOCK_UNLOCK */
2855
+ /* ierr = MPI_Get (&remote_stat, 1, MPI_INT, image-1, 0, 1, MPI_INT, *stat_tok); */
2856
+ /* # ifdef CAF_MPI_LOCK_UNLOCK */
2857
+ /* MPI_Win_unlock (image-1, *stat_tok); */
2858
+ /* # else // CAF_MPI_LOCK_UNLOCK */
2859
+ /* MPI_Win_flush (image-1, *stat_tok); */
2860
+ /* # endif // CAF_MPI_LOCK_UNLOCK */
2861
+ if (stopped_imgs [ image - 1 ] != 0 )
2841
2862
res = STAT_STOPPED_IMAGE ;
2842
2863
2843
2864
if (ierr != MPI_SUCCESS )
@@ -2865,10 +2886,18 @@ void
2865
2886
PREFIX (stopped_images ) (gfc_descriptor_t * array , int team __attribute__ ((unused )),
2866
2887
int kind __attribute__ ((unused )))
2867
2888
{
2868
- int * mem = (int * )calloc (n_stopped_imgs ,sizeof (int ));
2889
+ int i , * mem ,j = 0 ;
2890
+ n_stopped_imgs = 0 ;
2891
+ mem = (int * )calloc (caf_num_images ,sizeof (int ));
2892
+ for (i = 0 ;i < caf_num_images ;i ++ )
2893
+ if (stopped_imgs [i ] == 1 )
2894
+ {
2895
+ mem [j ] = i + 1 ;
2896
+ j ++ ;
2897
+ }
2898
+ n_stopped_imgs = j ;
2899
+ mem = realloc (mem ,sizeof (int )* n_stopped_imgs );
2869
2900
array -> base_addr = mem ;
2870
- memcpy (mem ,stopped_images ,n_stopped_imgs * sizeof (int ));
2871
- qsort (mem ,n_stopped_imgs ,sizeof (int ),cmpfunc );
2872
2901
array -> dtype = 265 ;
2873
2902
array -> dim [0 ].lower_bound = 1 ;
2874
2903
array -> dim [0 ]._ubound = n_stopped_imgs - 1 ;
0 commit comments