@@ -141,10 +141,17 @@ MPI_Comm CAF_COMM_WORLD;
141
141
(and thus finalization) of MPI. */
142
142
bool caf_owns_mpi = false;
143
143
144
- /* Foo function pointers for coreduce */
145
- int (* foo_int32_t )(void * , void * );
146
- float (* foo_float )(void * , void * );
147
- double (* foo_double )(void * , void * );
144
+ /* Foo function pointers for coreduce.
145
+ The handles when arguments are passed by reference. */
146
+ int (* int32_t_by_reference )(void * , void * );
147
+ float (* float_by_reference )(void * , void * );
148
+ double (* double_by_reference )(void * , void * );
149
+ /* Strings are always passed by reference. */
150
+ void (* char_by_reference )(void * , int , void * , void * , int , int );
151
+ /* The handles when arguments are passed by value. */
152
+ int (* int32_t_by_value )(int32_t , int32_t );
153
+ float (* float_by_value )(float , float );
154
+ double (* double_by_value )(double , double );
148
155
149
156
/* Define shortcuts for Win_lock and _unlock depending on whether the primitives
150
157
are available in the MPI implementation. When they are not available the
@@ -3125,17 +3132,29 @@ name (datatype *invec, datatype *inoutvec, int *len, \
3125
3132
operator; \
3126
3133
}
3127
3134
3128
- #define FOOFUNC (TYPE ) foo_ ## TYPE
3135
+ #define REFERENCE_FUNC (TYPE ) TYPE ## _by_reference
3136
+ #define VALUE_FUNC (TYPE ) TYPE ## _by_value
3129
3137
3130
3138
#define GEN_COREDUCE (name , dt ) \
3131
3139
static void \
3132
- name (void *invec, void *inoutvec, int *len, \
3140
+ name##_by_reference_adapter (void *invec, void *inoutvec, int *len, \
3133
3141
MPI_Datatype *datatype) \
3134
3142
{ \
3135
3143
int i; \
3136
3144
for(i=0;i<*len;i++) \
3137
3145
{ \
3138
- *((dt*)inoutvec) = (dt)(FOOFUNC(dt)((dt *)invec,(dt *)inoutvec)); \
3146
+ *((dt*)inoutvec) = (dt)(REFERENCE_FUNC(dt)((dt *)invec, (dt *)inoutvec)); \
3147
+ invec+=sizeof(dt); inoutvec+=sizeof(dt); \
3148
+ } \
3149
+ } \
3150
+ static void \
3151
+ name##_by_value_adapter (void *invec, void *inoutvec, int *len, \
3152
+ MPI_Datatype *datatype) \
3153
+ { \
3154
+ int i; \
3155
+ for(i=0;i<*len;i++) \
3156
+ { \
3157
+ *((dt*)inoutvec) = (dt)(VALUE_FUNC(dt)(*(dt *)invec, *(dt *)inoutvec)); \
3139
3158
invec+=sizeof(dt); inoutvec+=sizeof(dt); \
3140
3159
} \
3141
3160
}
@@ -3144,6 +3163,22 @@ GEN_COREDUCE (redux_int32, int32_t)
3144
3163
GEN_COREDUCE (redux_real32 , float )
3145
3164
GEN_COREDUCE (redux_real64 , double )
3146
3165
3166
+ static void \
3167
+ redux_char_by_reference_adapter (void * invec , void * inoutvec , int * len ,
3168
+ MPI_Datatype * datatype )
3169
+ {
3170
+ long int string_len ;
3171
+ MPI_Type_extent (* datatype , & string_len );
3172
+ for (int i = 0 ; i < * len ; i ++ )
3173
+ {
3174
+ /* The length of the result is fixed, i.e., no deferred string length is
3175
+ * allowed there. */
3176
+ REFERENCE_FUNC (char )((char * )inoutvec , string_len , (char * )invec , (char * )inoutvec , string_len , string_len );
3177
+ invec += sizeof (char ) * string_len ;
3178
+ inoutvec += sizeof (char ) * string_len ;
3179
+ }
3180
+ }
3181
+
3147
3182
#ifndef MPI_INTEGER1
3148
3183
GEN_REDUCTION (do_sum_int1 , int8_t , inoutvec [i ] += invec [i ])
3149
3184
GEN_REDUCTION (do_min_int1 , int8_t ,
@@ -3198,7 +3233,7 @@ GEN_REDUCTION (do_max_complex10, _Complex __float128,
3198
3233
3199
3234
3200
3235
static MPI_Datatype
3201
- get_MPI_datatype (gfc_descriptor_t * desc )
3236
+ get_MPI_datatype (gfc_descriptor_t * desc , int char_len )
3202
3237
{
3203
3238
/* FIXME: Better check whether the sizes are okay and supported;
3204
3239
MPI3 adds more types, e.g. MPI_INTEGER1. */
@@ -3255,22 +3290,29 @@ get_MPI_datatype (gfc_descriptor_t *desc)
3255
3290
GFC_DTYPE_TYPE_SIZE == GFC_TYPE_CHARACTER + 64*strlen
3256
3291
*/
3257
3292
if ( (GFC_DTYPE_TYPE_SIZE (desc )- GFC_DTYPE_CHARACTER )%64 == 0 )
3258
- return MPI_CHARACTER ;
3293
+ {
3294
+ MPI_Datatype string ;
3295
+ if (char_len == 0 )
3296
+ char_len = GFC_DESCRIPTOR_SIZE (desc );
3297
+ MPI_Type_contiguous (char_len , MPI_CHARACTER , & string );
3298
+ MPI_Type_commit (& string );
3299
+ return string ;
3300
+ }
3259
3301
3260
3302
caf_runtime_error ("Unsupported data type in collective: %ld\n" ,GFC_DTYPE_TYPE_SIZE (desc ));
3261
3303
return 0 ;
3262
3304
}
3263
3305
3264
3306
3265
3307
static void
3266
- co_reduce_1 (MPI_Op op , gfc_descriptor_t * source , int result_image , int * stat ,
3267
- char * errmsg , int src_len __attribute__ (( unused )) , int errmsg_len )
3308
+ internal_co_reduce (MPI_Op op , gfc_descriptor_t * source , int result_image , int * stat ,
3309
+ char * errmsg , int src_len , int errmsg_len )
3268
3310
{
3269
3311
size_t i , size ;
3270
3312
int j , ierr ;
3271
3313
int rank = GFC_DESCRIPTOR_RANK (source );
3272
3314
3273
- MPI_Datatype datatype = get_MPI_datatype (source );
3315
+ MPI_Datatype datatype = get_MPI_datatype (source , src_len );
3274
3316
3275
3317
size = 1 ;
3276
3318
for (j = 0 ; j < rank ; j ++ )
@@ -3285,17 +3327,17 @@ co_reduce_1 (MPI_Op op, gfc_descriptor_t *source, int result_image, int *stat,
3285
3327
if (rank == 0 || PREFIX (is_contiguous ) (source ))
3286
3328
{
3287
3329
if (result_image == 0 )
3288
- ierr = MPI_Allreduce (MPI_IN_PLACE , source -> base_addr , size , datatype ,
3330
+ ierr = MPI_Allreduce (MPI_IN_PLACE , source -> base_addr , size , datatype ,
3289
3331
op , CAF_COMM_WORLD );
3290
3332
else if (result_image == caf_this_image )
3291
- ierr = MPI_Reduce (MPI_IN_PLACE , source -> base_addr , size , datatype , op ,
3333
+ ierr = MPI_Reduce (MPI_IN_PLACE , source -> base_addr , size , datatype , op ,
3292
3334
result_image - 1 , CAF_COMM_WORLD );
3293
3335
else
3294
- ierr = MPI_Reduce (source -> base_addr , NULL , size , datatype , op ,
3336
+ ierr = MPI_Reduce (source -> base_addr , NULL , size , datatype , op ,
3295
3337
result_image - 1 , CAF_COMM_WORLD );
3296
3338
if (ierr )
3297
3339
goto error ;
3298
- return ;
3340
+ goto co_reduce_cleanup ;
3299
3341
}
3300
3342
3301
3343
for (i = 0 ; i < size ; i ++ )
@@ -3316,18 +3358,21 @@ co_reduce_1 (MPI_Op op, gfc_descriptor_t *source, int result_image, int *stat,
3316
3358
void * sr = (void * )((char * ) source -> base_addr
3317
3359
+ array_offset_sr * GFC_DESCRIPTOR_SIZE (source ));
3318
3360
if (result_image == 0 )
3319
- ierr = MPI_Allreduce (MPI_IN_PLACE , sr , 1 , datatype , op ,
3361
+ ierr = MPI_Allreduce (MPI_IN_PLACE , sr , 1 , datatype , op ,
3320
3362
CAF_COMM_WORLD );
3321
3363
else if (result_image == caf_this_image )
3322
- ierr = MPI_Reduce (MPI_IN_PLACE , sr , 1 , datatype , op ,
3364
+ ierr = MPI_Reduce (MPI_IN_PLACE , sr , 1 , datatype , op ,
3323
3365
result_image - 1 , CAF_COMM_WORLD );
3324
3366
else
3325
- ierr = MPI_Reduce (sr , NULL , 1 , datatype , op , result_image - 1 ,
3367
+ ierr = MPI_Reduce (sr , NULL , 1 , datatype , op , result_image - 1 ,
3326
3368
CAF_COMM_WORLD );
3327
3369
if (ierr )
3328
3370
goto error ;
3329
3371
}
3330
3372
3373
+ co_reduce_cleanup :
3374
+ if (GFC_DESCRIPTOR_TYPE (source ) == BT_CHARACTER )
3375
+ MPI_Type_free (& datatype );
3331
3376
if (stat )
3332
3377
* stat = 0 ;
3333
3378
return ;
@@ -3360,7 +3405,7 @@ PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *e
3360
3405
int j , ierr ;
3361
3406
int rank = GFC_DESCRIPTOR_RANK (a );
3362
3407
3363
- MPI_Datatype datatype = get_MPI_datatype (a );
3408
+ MPI_Datatype datatype = get_MPI_datatype (a , 0 );
3364
3409
3365
3410
size = 1 ;
3366
3411
for (j = 0 ; j < rank ; j ++ )
@@ -3375,7 +3420,7 @@ PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *e
3375
3420
if (rank == 0 )
3376
3421
{
3377
3422
if (datatype != MPI_CHARACTER )
3378
- ierr = MPI_Bcast (a -> base_addr , size , datatype , source_image - 1 , CAF_COMM_WORLD );
3423
+ ierr = MPI_Bcast (a -> base_addr , size , datatype , source_image - 1 , CAF_COMM_WORLD );
3379
3424
else
3380
3425
{
3381
3426
int a_length ;
@@ -3386,12 +3431,12 @@ PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *e
3386
3431
if (ierr )
3387
3432
goto error ;
3388
3433
/* Broadcast the string itself */
3389
- ierr = MPI_Bcast (a -> base_addr , a_length , datatype , source_image - 1 , CAF_COMM_WORLD );
3434
+ ierr = MPI_Bcast (a -> base_addr , a_length , datatype , source_image - 1 , CAF_COMM_WORLD );
3390
3435
}
3391
3436
3392
3437
if (ierr )
3393
3438
goto error ;
3394
- return ;
3439
+ goto co_broadcast_exit ;
3395
3440
}
3396
3441
else if (datatype == MPI_CHARACTER ) /* rank !=0 */
3397
3442
{
@@ -3422,6 +3467,11 @@ PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *e
3422
3467
goto error ;
3423
3468
}
3424
3469
3470
+ co_broadcast_exit :
3471
+ if (stat )
3472
+ * stat = 0 ;
3473
+ if (GFC_DESCRIPTOR_TYPE (a ) == BT_CHARACTER )
3474
+ MPI_Type_free (& datatype );
3425
3475
return ;
3426
3476
3427
3477
error :
@@ -3445,63 +3495,99 @@ PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *e
3445
3495
memset (& errmsg [len ], '\0' , errmsg_len - len );
3446
3496
}
3447
3497
3498
+ /** The front-end function for co_reduce functionality. It sets up the MPI_Op
3499
+ * for use in MPI_*Reduce functions. */
3448
3500
void
3449
3501
PREFIX (co_reduce ) (gfc_descriptor_t * a , void * (* opr ) (void * , void * ), int opr_flags ,
3450
3502
int result_image , int * stat , char * errmsg , int a_len , int errmsg_len )
3451
3503
{
3452
3504
MPI_Op op ;
3453
- if (GFC_DESCRIPTOR_TYPE (a ) == BT_INTEGER )
3505
+ /* Integers and logicals can be treated the same. */
3506
+ if (GFC_DESCRIPTOR_TYPE (a ) == BT_INTEGER
3507
+ || GFC_DESCRIPTOR_TYPE (a ) == BT_LOGICAL )
3454
3508
{
3455
- foo_int32_t = (typeof (foo_int32_t ))opr ;
3456
- MPI_Op_create (redux_int32 , 1 , & op );
3509
+ /* When the ARG_VALUE opr_flag is set, then the user-function expects its
3510
+ * arguments to be passed by value. */
3511
+ if ((opr_flags & GFC_CAF_ARG_VALUE ) > 0 )
3512
+ {
3513
+ int32_t_by_value = (typeof (VALUE_FUNC (int32_t )))opr ;
3514
+ MPI_Op_create (redux_int32_by_value_adapter , 1 , & op );
3515
+ }
3516
+ else
3517
+ {
3518
+ int32_t_by_reference = (typeof (REFERENCE_FUNC (int32_t )))opr ;
3519
+ MPI_Op_create (redux_int32_by_reference_adapter , 1 , & op );
3520
+ }
3457
3521
}
3522
+ /* Treat reals/doubles. */
3458
3523
else if (GFC_DESCRIPTOR_TYPE (a ) == BT_REAL )
3459
3524
{
3525
+ /* When the ARG_VALUE opr_flag is set, then the user-function expects its
3526
+ * arguments to be passed by value. */
3460
3527
if (GFC_DESCRIPTOR_SIZE (a ) == sizeof (float ))
3461
3528
{
3462
- foo_float = (typeof (foo_float ))opr ;
3463
- MPI_Op_create (redux_real32 , 1 , & op );
3529
+ if ((opr_flags & GFC_CAF_ARG_VALUE ) > 0 )
3530
+ {
3531
+ float_by_value = (typeof (VALUE_FUNC (float )))opr ;
3532
+ MPI_Op_create (redux_real32_by_value_adapter , 1 , & op );
3533
+ }
3534
+ else
3535
+ {
3536
+ float_by_reference = (typeof (REFERENCE_FUNC (float )))opr ;
3537
+ MPI_Op_create (redux_real32_by_reference_adapter , 1 , & op );
3538
+ }
3464
3539
}
3465
3540
else
3466
- {
3467
- foo_double = (typeof (foo_double ))opr ;
3468
- MPI_Op_create (redux_real64 , 1 , & op );
3541
+ {
3542
+ /* When the ARG_VALUE opr_flag is set, then the user-function expects its
3543
+ * arguments to be passed by value. */
3544
+ if ((opr_flags & GFC_CAF_ARG_VALUE ) > 0 )
3545
+ {
3546
+ double_by_value = (typeof (VALUE_FUNC (double )))opr ;
3547
+ MPI_Op_create (redux_real64_by_value_adapter , 1 , & op );
3548
+ }
3549
+ else
3550
+ {
3551
+ double_by_reference = (typeof (REFERENCE_FUNC (double )))opr ;
3552
+ MPI_Op_create (redux_real64_by_reference_adapter , 1 , & op );
3553
+ }
3469
3554
}
3470
3555
}
3471
- else if (GFC_DESCRIPTOR_TYPE (a ) == BT_LOGICAL )
3556
+ else if (GFC_DESCRIPTOR_TYPE (a ) == BT_CHARACTER )
3472
3557
{
3473
- foo_int32_t = (typeof (foo_int32_t ))opr ;
3474
- MPI_Op_create (redux_int32 , 1 , & op );
3558
+ /* Char array functions always pass by reference. */
3559
+ char_by_reference = (typeof (REFERENCE_FUNC (char )))opr ;
3560
+ MPI_Op_create (redux_char_by_reference_adapter , 1 , & op );
3475
3561
}
3476
3562
else
3477
3563
{
3478
3564
caf_runtime_error ("Data type not yet supported for co_reduce\n" );
3479
3565
}
3480
3566
3481
- co_reduce_1 (op , a , result_image , stat , errmsg , 0 , errmsg_len );
3567
+ internal_co_reduce (op , a , result_image , stat , errmsg , a_len , errmsg_len );
3482
3568
}
3483
3569
3484
3570
void
3485
3571
PREFIX (co_sum ) (gfc_descriptor_t * a , int result_image , int * stat , char * errmsg ,
3486
3572
int errmsg_len )
3487
3573
{
3488
- co_reduce_1 (MPI_SUM , a , result_image , stat , errmsg , 0 , errmsg_len );
3574
+ internal_co_reduce (MPI_SUM , a , result_image , stat , errmsg , 0 , errmsg_len );
3489
3575
}
3490
3576
3491
3577
3492
3578
void
3493
3579
PREFIX (co_min ) (gfc_descriptor_t * a , int result_image , int * stat , char * errmsg ,
3494
3580
int src_len , int errmsg_len )
3495
3581
{
3496
- co_reduce_1 (MPI_MIN , a , result_image , stat , errmsg , src_len , errmsg_len );
3582
+ internal_co_reduce (MPI_MIN , a , result_image , stat , errmsg , src_len , errmsg_len );
3497
3583
}
3498
3584
3499
3585
3500
3586
void
3501
3587
PREFIX (co_max ) (gfc_descriptor_t * a , int result_image , int * stat ,
3502
3588
char * errmsg , int src_len , int errmsg_len )
3503
3589
{
3504
- co_reduce_1 (MPI_MAX , a , result_image , stat , errmsg , src_len , errmsg_len );
3590
+ internal_co_reduce (MPI_MAX , a , result_image , stat , errmsg , src_len , errmsg_len );
3505
3591
}
3506
3592
3507
3593
0 commit comments