@@ -1804,7 +1804,8 @@ convert_with_strides (void *dst, int dst_type, int dst_kind, ptrdiff_t byte_dst_
1804
1804
1805
1805
static void
1806
1806
copy_char_to_self (void * src , int src_type , int src_size , int src_kind ,
1807
- void * dst , int dst_type , int dst_size , int dst_kind )
1807
+ void * dst , int dst_type , int dst_size , int dst_kind ,
1808
+ size_t size , bool src_is_scalar )
1808
1809
{
1809
1810
#ifdef GFC_CAF_CHECK
1810
1811
if (dst_type != BT_CHARACTER || src_type != BT_CHARACTER )
@@ -1817,31 +1818,49 @@ copy_char_to_self (void *src, int src_type, int src_size, int src_kind,
1817
1818
* memory location. No offset summation is needed. */
1818
1819
if (dst_kind == src_kind )
1819
1820
{
1820
- memmove (dst , src , min_len * dst_kind );
1821
- /* Fill dest when source is too short. */
1822
- if (dst_len > src_len )
1821
+ for (size_t c = 0 ; c < size ; ++ c )
1823
1822
{
1824
- int32_t * dest_addr = (int32_t * )(dst + dst_kind * src_len );
1825
- const size_t pad_num = dst_len - src_len ;
1826
- if (dst_kind == 1 )
1827
- memset (dest_addr , ' ' , pad_num );
1828
- else if (dst_kind == 4 )
1823
+ memmove (dst , src , min_len * dst_kind );
1824
+ /* Fill dest when source is too short. */
1825
+ if (dst_len > src_len )
1829
1826
{
1830
- const void * end_addr = & (dest_addr [pad_num ]);
1831
- while (dest_addr != end_addr )
1832
- * (dest_addr ++ ) = (int32_t )' ' ;
1827
+ int32_t * dest_addr = (int32_t * )(dst + dst_kind * src_len );
1828
+ const size_t pad_num = dst_len - src_len ;
1829
+ if (dst_kind == 1 )
1830
+ memset (dest_addr , ' ' , pad_num );
1831
+ else if (dst_kind == 4 )
1832
+ {
1833
+ const void * end_addr = & (dest_addr [pad_num ]);
1834
+ while (dest_addr != end_addr )
1835
+ * (dest_addr ++ ) = (int32_t )' ' ;
1836
+ }
1837
+ else
1838
+ caf_runtime_error (unreachable );
1833
1839
}
1834
- else
1835
- caf_runtime_error (unreachable );
1840
+ dst = (void * )((ptrdiff_t )(dst ) + dst_size );
1841
+ if (!src_is_scalar )
1842
+ src = (void * )((ptrdiff_t )(src ) + src_size );
1836
1843
}
1837
1844
}
1838
1845
else
1839
1846
{
1840
1847
/* Assign using kind-conversion. */
1841
1848
if (dst_kind == 1 && src_kind == 4 )
1842
- assign_char1_from_char4 (dst_len , src_len , dst , src );
1849
+ for (size_t c = 0 ; c < size ; ++ c )
1850
+ {
1851
+ assign_char1_from_char4 (dst_len , src_len , dst , src );
1852
+ dst = (void * )((ptrdiff_t )(dst ) + dst_size );
1853
+ if (!src_is_scalar )
1854
+ src = (void * )((ptrdiff_t )(src ) + src_size );
1855
+ }
1843
1856
else if (dst_kind == 4 && src_kind == 1 )
1844
- assign_char4_from_char1 (dst_len , src_len , dst , src );
1857
+ for (size_t c = 0 ; c < size ; ++ c )
1858
+ {
1859
+ assign_char4_from_char1 (dst_len , src_len , dst , src );
1860
+ dst = (void * )((ptrdiff_t )(dst ) + dst_size );
1861
+ if (!src_is_scalar )
1862
+ src = (void * )((ptrdiff_t )(src ) + src_size );
1863
+ }
1845
1864
else
1846
1865
caf_runtime_error ("_caf_send(): Unsupported char kinds in same image assignment (kind(lhs)= %d, kind(rhs) = %d)" ,
1847
1866
dst_kind , src_kind );
@@ -2139,12 +2158,13 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
2139
2158
{
2140
2159
if (same_image )
2141
2160
{
2142
- dprint ("%d/%d: %s() in caf_this == image_index\n" ,
2143
- caf_this_image , caf_num_images , __FUNCTION__ );
2161
+ dprint ("%d/%d: %s() in caf_this == image_index, size = %d, dst_kind = %d, src_kind = %d \n" ,
2162
+ caf_this_image , caf_num_images , __FUNCTION__ , size , dst_kind , src_kind );
2144
2163
if (dst_type == BT_CHARACTER )
2145
2164
/* The size is encoded in the descriptor's type for char arrays. */
2146
2165
copy_char_to_self (src -> base_addr , src_type , src_size , src_kind ,
2147
- dest -> base_addr , dst_type , dst_size , dst_kind );
2166
+ dest -> base_addr , dst_type , dst_size , dst_kind ,
2167
+ size , src_rank == 0 );
2148
2168
else
2149
2169
copy_to_self (src , src_kind , dest , dst_kind , size , stat );
2150
2170
return ;
@@ -2168,7 +2188,8 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
2168
2188
|| (dst_kind != src_kind && dst_type == BT_CHARACTER ))
2169
2189
{
2170
2190
copy_char_to_self (src -> base_addr , src_type , src_size , src_kind ,
2171
- t_buff , dst_type , dst_size , dst_kind );
2191
+ t_buff , dst_type , dst_size , dst_kind ,
2192
+ size , src_rank == 0 );
2172
2193
ierr = MPI_Put (t_buff , dst_size , MPI_BYTE , remote_image ,
2173
2194
offset , dst_size , MPI_BYTE , * p );
2174
2195
}
@@ -2353,18 +2374,23 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
2353
2374
ptrdiff_t array_offset_dst = 0 ;
2354
2375
ptrdiff_t extent = 1 ;
2355
2376
ptrdiff_t tot_ext = 1 ;
2356
- for ( j = 0 ; j < dst_rank - 1 ; ++ j )
2377
+ if (! same_image || ! mrt )
2357
2378
{
2358
- array_offset_dst += ((i / tot_ext )
2359
- % (dest -> dim [j ]._ubound
2360
- - dest -> dim [j ].lower_bound + 1 ))
2361
- * dest -> dim [j ]._stride ;
2362
- extent = (dest -> dim [j ]._ubound - dest -> dim [j ].lower_bound + 1 );
2363
- tot_ext *= extent ;
2364
- }
2379
+ /* For same image and may require temp, the dst_offset is compute
2380
+ on storage. */
2381
+ for (j = 0 ; j < dst_rank - 1 ; ++ j )
2382
+ {
2383
+ array_offset_dst += ((i / tot_ext )
2384
+ % (dest -> dim [j ]._ubound
2385
+ - dest -> dim [j ].lower_bound + 1 ))
2386
+ * dest -> dim [j ]._stride ;
2387
+ extent = (dest -> dim [j ]._ubound - dest -> dim [j ].lower_bound + 1 );
2388
+ tot_ext *= extent ;
2389
+ }
2365
2390
2366
- array_offset_dst += (i / tot_ext ) * dest -> dim [dst_rank - 1 ]._stride ;
2367
- dst_offset = offset + array_offset_dst * dst_size ;
2391
+ array_offset_dst += (i / tot_ext ) * dest -> dim [dst_rank - 1 ]._stride ;
2392
+ dst_offset = array_offset_dst * dst_size ;
2393
+ }
2368
2394
2369
2395
void * sr ;
2370
2396
if (src_rank != 0 )
@@ -2397,17 +2423,18 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
2397
2423
dst_size , src_kind , src_size );
2398
2424
if (same_type_and_kind )
2399
2425
ierr = MPI_Put (sr , dst_size , MPI_BYTE , remote_image ,
2400
- dst_offset , dst_size , MPI_BYTE , * p );
2426
+ offset + dst_offset , dst_size , MPI_BYTE , * p );
2401
2427
else
2402
2428
{
2403
2429
convert_type (t_buff , dst_type , dst_kind ,
2404
2430
sr , src_type , src_kind , stat );
2405
2431
ierr = MPI_Put (t_buff , dst_size , MPI_BYTE , remote_image ,
2406
- dst_offset , dst_size , MPI_BYTE , * p );
2432
+ offset + dst_offset , dst_size , MPI_BYTE , * p );
2407
2433
}
2408
2434
if (pad_str )
2409
- ierr = MPI_Put (pad_str , dst_size - src_size , MPI_BYTE , remote_image ,
2410
- dst_offset , dst_size - src_size , MPI_BYTE , * p );
2435
+ ierr = MPI_Put (pad_str , dst_size - src_size , MPI_BYTE ,
2436
+ remote_image , offset + dst_offset ,
2437
+ dst_size - src_size , MPI_BYTE , * p );
2411
2438
}
2412
2439
else
2413
2440
{
@@ -2464,7 +2491,7 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
2464
2491
}
2465
2492
2466
2493
array_offset_dst += (i / tot_ext ) * dest -> dim [dst_rank - 1 ]._stride ;
2467
- dst_offset = offset + array_offset_dst * dst_size ;
2494
+ dst_offset = array_offset_dst * dst_size ;
2468
2495
memmove (dest -> base_addr + dst_offset , t_buff +
2469
2496
i * dst_size , dst_size );
2470
2497
}
@@ -2490,7 +2517,14 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
2490
2517
if (stat )
2491
2518
* stat = mpi_error ;
2492
2519
else
2493
- PREFIX (error_stop ) (mpi_error );
2520
+ {
2521
+ int error_len = 2048 ;
2522
+ char error_str [error_len ];
2523
+ strcpy (error_str , "MPI-error: " );
2524
+ MPI_Error_string (mpi_error , & error_str [11 ], & error_len );
2525
+ PREFIX (error_stop_str ) (error_str , error_len + 11 );
2526
+ }
2527
+ // PREFIX(error_stop) (mpi_error);
2494
2528
}
2495
2529
}
2496
2530
@@ -2886,9 +2920,7 @@ copy_data (void *ds, mpi_caf_token_t *token, MPI_Aint offset, int dst_type,
2886
2920
This typedef is made to allow storing a copy of a remote descriptor on the
2887
2921
stack without having to care about the rank. */
2888
2922
typedef struct gfc_max_dim_descriptor_t {
2889
- void * base_addr ;
2890
- size_t offset ;
2891
- ptrdiff_t dtype ;
2923
+ gfc_descriptor_t base ;
2892
2924
descriptor_dimension dim [GFC_MAX_DIMENSIONS ];
2893
2925
} gfc_max_dim_descriptor_t ;
2894
2926
@@ -4021,11 +4053,14 @@ PREFIX(is_present) (caf_token_t token, int image_index, caf_reference_t *refs)
4021
4053
MPI_BYTE , global_dynamic_win );
4022
4054
}
4023
4055
#ifdef EXTRA_DEBUG_OUTPUT
4024
- fprintf (stderr , "%d/%d: %s() remote desc rank: %d (ref_rank: %d)\n" , caf_this_image , caf_num_images ,
4025
- __FUNCTION__ , GFC_DESCRIPTOR_RANK (& src_desc ), ref_rank );
4026
- for (i = 0 ; i < GFC_DESCRIPTOR_RANK (& src_desc ); ++ i )
4027
- fprintf (stderr , "%d/%d: %s() remote desc dim[%d] = (lb = %d, ub = %d, stride = %d)\n" , caf_this_image , caf_num_images ,
4028
- __FUNCTION__ , i , src_desc .dim [i ].lower_bound , src_desc .dim [i ]._ubound , src_desc .dim [i ]._stride );
4056
+ {
4057
+ gfc_descriptor_t * src = (gfc_descriptor_t * )(& src_desc );
4058
+ fprintf (stderr , "%d/%d: %s() remote desc rank: %d (ref_rank: %d)\n" , caf_this_image , caf_num_images ,
4059
+ __FUNCTION__ , GFC_DESCRIPTOR_RANK (src ), ref_rank );
4060
+ for (i = 0 ; i < GFC_DESCRIPTOR_RANK (src ); ++ i )
4061
+ fprintf (stderr , "%d/%d: %s() remote desc dim[%d] = (lb = %d, ub = %d, stride = %d)\n" , caf_this_image , caf_num_images ,
4062
+ __FUNCTION__ , i , src_desc .dim [i ].lower_bound , src_desc .dim [i ]._ubound , src_desc .dim [i ]._stride );
4063
+ }
4029
4064
#endif
4030
4065
4031
4066
for (i = 0 ; riter -> u .a .mode [i ] != CAF_ARR_REF_NONE ; ++ i )
0 commit comments