@@ -141,10 +141,17 @@ MPI_Comm CAF_COMM_WORLD;
141141 (and thus finalization) of MPI. */
142142bool caf_owns_mpi = false;
143143
144- /* Foo function pointers for coreduce */
145- int (* foo_int32_t )(void * , void * );
146- float (* foo_float )(void * , void * );
147- double (* foo_double )(void * , void * );
144+ /* Foo function pointers for coreduce.
145+ The handles when arguments are passed by reference. */
146+ int (* int32_t_by_reference )(void * , void * );
147+ float (* float_by_reference )(void * , void * );
148+ double (* double_by_reference )(void * , void * );
149+ /* Strings are always passed by reference. */
150+ void (* char_by_reference )(void * , int , void * , void * , int , int );
151+ /* The handles when arguments are passed by value. */
152+ int (* int32_t_by_value )(int32_t , int32_t );
153+ float (* float_by_value )(float , float );
154+ double (* double_by_value )(double , double );
148155
149156/* Define shortcuts for Win_lock and _unlock depending on whether the primitives
150157 are available in the MPI implementation. When they are not available the
@@ -3125,17 +3132,29 @@ name (datatype *invec, datatype *inoutvec, int *len, \
31253132 operator; \
31263133}
31273134
3128- #define FOOFUNC (TYPE ) foo_ ## TYPE
3135+ #define REFERENCE_FUNC (TYPE ) TYPE ## _by_reference
3136+ #define VALUE_FUNC (TYPE ) TYPE ## _by_value
31293137
31303138#define GEN_COREDUCE (name , dt ) \
31313139static void \
3132- name (void *invec, void *inoutvec, int *len, \
3140+ name##_by_reference_adapter (void *invec, void *inoutvec, int *len, \
31333141 MPI_Datatype *datatype) \
31343142{ \
31353143 int i; \
31363144 for(i=0;i<*len;i++) \
31373145 { \
3138- *((dt*)inoutvec) = (dt)(FOOFUNC(dt)((dt *)invec,(dt *)inoutvec)); \
3146+ *((dt*)inoutvec) = (dt)(REFERENCE_FUNC(dt)((dt *)invec, (dt *)inoutvec)); \
3147+ invec+=sizeof(dt); inoutvec+=sizeof(dt); \
3148+ } \
3149+ } \
3150+ static void \
3151+ name##_by_value_adapter (void *invec, void *inoutvec, int *len, \
3152+ MPI_Datatype *datatype) \
3153+ { \
3154+ int i; \
3155+ for(i=0;i<*len;i++) \
3156+ { \
3157+ *((dt*)inoutvec) = (dt)(VALUE_FUNC(dt)(*(dt *)invec, *(dt *)inoutvec)); \
31393158 invec+=sizeof(dt); inoutvec+=sizeof(dt); \
31403159 } \
31413160}
@@ -3144,6 +3163,22 @@ GEN_COREDUCE (redux_int32, int32_t)
31443163GEN_COREDUCE (redux_real32 , float )
31453164GEN_COREDUCE (redux_real64 , double )
31463165
3166+ static void \
3167+ redux_char_by_reference_adapter (void * invec , void * inoutvec , int * len ,
3168+ MPI_Datatype * datatype )
3169+ {
3170+ long int string_len ;
3171+ MPI_Type_extent (* datatype , & string_len );
3172+ for (int i = 0 ; i < * len ; i ++ )
3173+ {
3174+ /* The length of the result is fixed, i.e., no deferred string length is
3175+ * allowed there. */
3176+ REFERENCE_FUNC (char )((char * )inoutvec , string_len , (char * )invec , (char * )inoutvec , string_len , string_len );
3177+ invec += sizeof (char ) * string_len ;
3178+ inoutvec += sizeof (char ) * string_len ;
3179+ }
3180+ }
3181+
31473182#ifndef MPI_INTEGER1
31483183GEN_REDUCTION (do_sum_int1 , int8_t , inoutvec [i ] += invec [i ])
31493184GEN_REDUCTION (do_min_int1 , int8_t ,
@@ -3198,7 +3233,7 @@ GEN_REDUCTION (do_max_complex10, _Complex __float128,
31983233
31993234
32003235static MPI_Datatype
3201- get_MPI_datatype (gfc_descriptor_t * desc )
3236+ get_MPI_datatype (gfc_descriptor_t * desc , int char_len )
32023237{
32033238 /* FIXME: Better check whether the sizes are okay and supported;
32043239 MPI3 adds more types, e.g. MPI_INTEGER1. */
@@ -3255,22 +3290,29 @@ get_MPI_datatype (gfc_descriptor_t *desc)
32553290 GFC_DTYPE_TYPE_SIZE == GFC_TYPE_CHARACTER + 64*strlen
32563291*/
32573292 if ( (GFC_DTYPE_TYPE_SIZE (desc )- GFC_DTYPE_CHARACTER )%64 == 0 )
3258- return MPI_CHARACTER ;
3293+ {
3294+ MPI_Datatype string ;
3295+ if (char_len == 0 )
3296+ char_len = GFC_DESCRIPTOR_SIZE (desc );
3297+ MPI_Type_contiguous (char_len , MPI_CHARACTER , & string );
3298+ MPI_Type_commit (& string );
3299+ return string ;
3300+ }
32593301
32603302 caf_runtime_error ("Unsupported data type in collective: %ld\n" ,GFC_DTYPE_TYPE_SIZE (desc ));
32613303 return 0 ;
32623304}
32633305
32643306
32653307static void
3266- co_reduce_1 (MPI_Op op , gfc_descriptor_t * source , int result_image , int * stat ,
3267- char * errmsg , int src_len __attribute__ (( unused )) , int errmsg_len )
3308+ internal_co_reduce (MPI_Op op , gfc_descriptor_t * source , int result_image , int * stat ,
3309+ char * errmsg , int src_len , int errmsg_len )
32683310{
32693311 size_t i , size ;
32703312 int j , ierr ;
32713313 int rank = GFC_DESCRIPTOR_RANK (source );
32723314
3273- MPI_Datatype datatype = get_MPI_datatype (source );
3315+ MPI_Datatype datatype = get_MPI_datatype (source , src_len );
32743316
32753317 size = 1 ;
32763318 for (j = 0 ; j < rank ; j ++ )
@@ -3285,17 +3327,17 @@ co_reduce_1 (MPI_Op op, gfc_descriptor_t *source, int result_image, int *stat,
32853327 if (rank == 0 || PREFIX (is_contiguous ) (source ))
32863328 {
32873329 if (result_image == 0 )
3288- ierr = MPI_Allreduce (MPI_IN_PLACE , source -> base_addr , size , datatype ,
3330+ ierr = MPI_Allreduce (MPI_IN_PLACE , source -> base_addr , size , datatype ,
32893331 op , CAF_COMM_WORLD );
32903332 else if (result_image == caf_this_image )
3291- ierr = MPI_Reduce (MPI_IN_PLACE , source -> base_addr , size , datatype , op ,
3333+ ierr = MPI_Reduce (MPI_IN_PLACE , source -> base_addr , size , datatype , op ,
32923334 result_image - 1 , CAF_COMM_WORLD );
32933335 else
3294- ierr = MPI_Reduce (source -> base_addr , NULL , size , datatype , op ,
3336+ ierr = MPI_Reduce (source -> base_addr , NULL , size , datatype , op ,
32953337 result_image - 1 , CAF_COMM_WORLD );
32963338 if (ierr )
32973339 goto error ;
3298- return ;
3340+ goto co_reduce_cleanup ;
32993341 }
33003342
33013343 for (i = 0 ; i < size ; i ++ )
@@ -3316,18 +3358,21 @@ co_reduce_1 (MPI_Op op, gfc_descriptor_t *source, int result_image, int *stat,
33163358 void * sr = (void * )((char * ) source -> base_addr
33173359 + array_offset_sr * GFC_DESCRIPTOR_SIZE (source ));
33183360 if (result_image == 0 )
3319- ierr = MPI_Allreduce (MPI_IN_PLACE , sr , 1 , datatype , op ,
3361+ ierr = MPI_Allreduce (MPI_IN_PLACE , sr , 1 , datatype , op ,
33203362 CAF_COMM_WORLD );
33213363 else if (result_image == caf_this_image )
3322- ierr = MPI_Reduce (MPI_IN_PLACE , sr , 1 , datatype , op ,
3364+ ierr = MPI_Reduce (MPI_IN_PLACE , sr , 1 , datatype , op ,
33233365 result_image - 1 , CAF_COMM_WORLD );
33243366 else
3325- ierr = MPI_Reduce (sr , NULL , 1 , datatype , op , result_image - 1 ,
3367+ ierr = MPI_Reduce (sr , NULL , 1 , datatype , op , result_image - 1 ,
33263368 CAF_COMM_WORLD );
33273369 if (ierr )
33283370 goto error ;
33293371 }
33303372
3373+ co_reduce_cleanup :
3374+ if (GFC_DESCRIPTOR_TYPE (source ) == BT_CHARACTER )
3375+ MPI_Type_free (& datatype );
33313376 if (stat )
33323377 * stat = 0 ;
33333378 return ;
@@ -3360,7 +3405,7 @@ PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *e
33603405 int j , ierr ;
33613406 int rank = GFC_DESCRIPTOR_RANK (a );
33623407
3363- MPI_Datatype datatype = get_MPI_datatype (a );
3408+ MPI_Datatype datatype = get_MPI_datatype (a , 0 );
33643409
33653410 size = 1 ;
33663411 for (j = 0 ; j < rank ; j ++ )
@@ -3375,7 +3420,7 @@ PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *e
33753420 if (rank == 0 )
33763421 {
33773422 if (datatype != MPI_CHARACTER )
3378- ierr = MPI_Bcast (a -> base_addr , size , datatype , source_image - 1 , CAF_COMM_WORLD );
3423+ ierr = MPI_Bcast (a -> base_addr , size , datatype , source_image - 1 , CAF_COMM_WORLD );
33793424 else
33803425 {
33813426 int a_length ;
@@ -3386,12 +3431,12 @@ PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *e
33863431 if (ierr )
33873432 goto error ;
33883433 /* Broadcast the string itself */
3389- ierr = MPI_Bcast (a -> base_addr , a_length , datatype , source_image - 1 , CAF_COMM_WORLD );
3434+ ierr = MPI_Bcast (a -> base_addr , a_length , datatype , source_image - 1 , CAF_COMM_WORLD );
33903435 }
33913436
33923437 if (ierr )
33933438 goto error ;
3394- return ;
3439+ goto co_broadcast_exit ;
33953440 }
33963441 else if (datatype == MPI_CHARACTER ) /* rank !=0 */
33973442 {
@@ -3422,6 +3467,11 @@ PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *e
34223467 goto error ;
34233468 }
34243469
3470+ co_broadcast_exit :
3471+ if (stat )
3472+ * stat = 0 ;
3473+ if (GFC_DESCRIPTOR_TYPE (a ) == BT_CHARACTER )
3474+ MPI_Type_free (& datatype );
34253475 return ;
34263476
34273477error :
@@ -3445,63 +3495,99 @@ PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *e
34453495 memset (& errmsg [len ], '\0' , errmsg_len - len );
34463496}
34473497
3498+ /** The front-end function for co_reduce functionality. It sets up the MPI_Op
3499+ * for use in MPI_*Reduce functions. */
34483500void
34493501PREFIX (co_reduce ) (gfc_descriptor_t * a , void * (* opr ) (void * , void * ), int opr_flags ,
34503502 int result_image , int * stat , char * errmsg , int a_len , int errmsg_len )
34513503{
34523504 MPI_Op op ;
3453- if (GFC_DESCRIPTOR_TYPE (a ) == BT_INTEGER )
3505+ /* Integers and logicals can be treated the same. */
3506+ if (GFC_DESCRIPTOR_TYPE (a ) == BT_INTEGER
3507+ || GFC_DESCRIPTOR_TYPE (a ) == BT_LOGICAL )
34543508 {
3455- foo_int32_t = (typeof (foo_int32_t ))opr ;
3456- MPI_Op_create (redux_int32 , 1 , & op );
3509+ /* When the ARG_VALUE opr_flag is set, then the user-function expects its
3510+ * arguments to be passed by value. */
3511+ if ((opr_flags & GFC_CAF_ARG_VALUE ) > 0 )
3512+ {
3513+ int32_t_by_value = (typeof (VALUE_FUNC (int32_t )))opr ;
3514+ MPI_Op_create (redux_int32_by_value_adapter , 1 , & op );
3515+ }
3516+ else
3517+ {
3518+ int32_t_by_reference = (typeof (REFERENCE_FUNC (int32_t )))opr ;
3519+ MPI_Op_create (redux_int32_by_reference_adapter , 1 , & op );
3520+ }
34573521 }
3522+ /* Treat reals/doubles. */
34583523 else if (GFC_DESCRIPTOR_TYPE (a ) == BT_REAL )
34593524 {
3525+ /* When the ARG_VALUE opr_flag is set, then the user-function expects its
3526+ * arguments to be passed by value. */
34603527 if (GFC_DESCRIPTOR_SIZE (a ) == sizeof (float ))
34613528 {
3462- foo_float = (typeof (foo_float ))opr ;
3463- MPI_Op_create (redux_real32 , 1 , & op );
3529+ if ((opr_flags & GFC_CAF_ARG_VALUE ) > 0 )
3530+ {
3531+ float_by_value = (typeof (VALUE_FUNC (float )))opr ;
3532+ MPI_Op_create (redux_real32_by_value_adapter , 1 , & op );
3533+ }
3534+ else
3535+ {
3536+ float_by_reference = (typeof (REFERENCE_FUNC (float )))opr ;
3537+ MPI_Op_create (redux_real32_by_reference_adapter , 1 , & op );
3538+ }
34643539 }
34653540 else
3466- {
3467- foo_double = (typeof (foo_double ))opr ;
3468- MPI_Op_create (redux_real64 , 1 , & op );
3541+ {
3542+ /* When the ARG_VALUE opr_flag is set, then the user-function expects its
3543+ * arguments to be passed by value. */
3544+ if ((opr_flags & GFC_CAF_ARG_VALUE ) > 0 )
3545+ {
3546+ double_by_value = (typeof (VALUE_FUNC (double )))opr ;
3547+ MPI_Op_create (redux_real64_by_value_adapter , 1 , & op );
3548+ }
3549+ else
3550+ {
3551+ double_by_reference = (typeof (REFERENCE_FUNC (double )))opr ;
3552+ MPI_Op_create (redux_real64_by_reference_adapter , 1 , & op );
3553+ }
34693554 }
34703555 }
3471- else if (GFC_DESCRIPTOR_TYPE (a ) == BT_LOGICAL )
3556+ else if (GFC_DESCRIPTOR_TYPE (a ) == BT_CHARACTER )
34723557 {
3473- foo_int32_t = (typeof (foo_int32_t ))opr ;
3474- MPI_Op_create (redux_int32 , 1 , & op );
3558+ /* Char array functions always pass by reference. */
3559+ char_by_reference = (typeof (REFERENCE_FUNC (char )))opr ;
3560+ MPI_Op_create (redux_char_by_reference_adapter , 1 , & op );
34753561 }
34763562 else
34773563 {
34783564 caf_runtime_error ("Data type not yet supported for co_reduce\n" );
34793565 }
34803566
3481- co_reduce_1 (op , a , result_image , stat , errmsg , 0 , errmsg_len );
3567+ internal_co_reduce (op , a , result_image , stat , errmsg , a_len , errmsg_len );
34823568}
34833569
34843570void
34853571PREFIX (co_sum ) (gfc_descriptor_t * a , int result_image , int * stat , char * errmsg ,
34863572 int errmsg_len )
34873573{
3488- co_reduce_1 (MPI_SUM , a , result_image , stat , errmsg , 0 , errmsg_len );
3574+ internal_co_reduce (MPI_SUM , a , result_image , stat , errmsg , 0 , errmsg_len );
34893575}
34903576
34913577
34923578void
34933579PREFIX (co_min ) (gfc_descriptor_t * a , int result_image , int * stat , char * errmsg ,
34943580 int src_len , int errmsg_len )
34953581{
3496- co_reduce_1 (MPI_MIN , a , result_image , stat , errmsg , src_len , errmsg_len );
3582+ internal_co_reduce (MPI_MIN , a , result_image , stat , errmsg , src_len , errmsg_len );
34973583}
34983584
34993585
35003586void
35013587PREFIX (co_max ) (gfc_descriptor_t * a , int result_image , int * stat ,
35023588 char * errmsg , int src_len , int errmsg_len )
35033589{
3504- co_reduce_1 (MPI_MAX , a , result_image , stat , errmsg , src_len , errmsg_len );
3590+ internal_co_reduce (MPI_MAX , a , result_image , stat , errmsg , src_len , errmsg_len );
35053591}
35063592
35073593
0 commit comments