Skip to content

Commit d012267

Browse files
committed
Merge branch 'vehre/issue-292-type-conversion-during-communication' of github.com:sourceryinstitute/opencoarrays into vehre/issue-292-type-conversion-during-communication
2 parents fd83172 + e08c244 commit d012267

File tree

2 files changed

+270
-75
lines changed

2 files changed

+270
-75
lines changed

src/mpi/mpi_caf.c

Lines changed: 114 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,20 +1787,18 @@ convert_type (void *dst, int dst_type, int dst_kind, void *src, int src_type,
17871787
}
17881788

17891789
static void
1790-
convert_with_strides (void *dst, int dst_type, int dst_kind, ptrdiff_t dst_stride,
1791-
void *src, int src_type, int src_kind, ptrdiff_t src_stride,
1790+
convert_with_strides (void *dst, int dst_type, int dst_kind, ptrdiff_t byte_dst_stride,
1791+
void *src, int src_type, int src_kind, ptrdiff_t byte_src_stride,
17921792
size_t num, int *stat)
17931793
{
17941794
/* Compute the step from one item to convert to the next in bytes. The stride
17951795
* is expected to be the one or similar to the array.stride, i.e. *_stride is
17961796
* expected to be >= 1 to progress from one item to the next. */
1797-
dst_stride = dst_stride * dst_kind;
1798-
src_stride = src_stride * src_kind;
17991797
for (size_t i = 0; i < num; ++i)
18001798
{
18011799
convert_type (dst, dst_type, dst_kind, src, src_type, src_kind, stat);
1802-
dst += dst_stride;
1803-
src += src_stride;
1800+
dst += byte_dst_stride;
1801+
src += byte_src_stride;
18041802
}
18051803
}
18061804

@@ -1867,8 +1865,10 @@ copy_to_self (gfc_descriptor_t *src, int src_kind,
18671865
/* When the rank is 0 then a scalar is copied to a vector and the stride
18681866
* is zero. */
18691867
convert_with_strides (dest->base_addr, GFC_DESCRIPTOR_TYPE (dest), dst_kind,
1870-
1, src->base_addr, GFC_DESCRIPTOR_TYPE (src), src_kind,
1871-
GFC_DESCRIPTOR_RANK (src) > 0, size, stat);
1868+
GFC_DTYPE_TYPE_SIZE (dest), src->base_addr,
1869+
GFC_DESCRIPTOR_TYPE (src), src_kind,
1870+
GFC_DESCRIPTOR_RANK (src) > 0 ? GFC_DTYPE_TYPE_SIZE (src)
1871+
: 0, size, stat);
18721872
}
18731873

18741874
/* token: The token of the array to be written to. */
@@ -2079,17 +2079,17 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
20792079
dst_type = GFC_DESCRIPTOR_TYPE(dest);
20802080
const bool src_contiguous = PREFIX (is_contiguous) (src),
20812081
dst_contiguous = PREFIX (is_contiguous) (dest);
2082+
const bool same_image = caf_this_image == image_index,
2083+
same_type_and_kind = dst_type == src_type && dst_kind == src_kind;
20822084

20832085
MPI_Win *p = TOKEN(token);
20842086
ptrdiff_t dst_offset = 0;
20852087
void *pad_str = NULL;
20862088
bool free_pad_str = false;
20872089
void *t_buff = NULL;
20882090
bool free_t_buff = false;
2089-
bool *buff_map = NULL;
20902091
const bool dest_char_array_is_longer
2091-
= dst_type == BT_CHARACTER && dst_size > src_size
2092-
&& caf_this_image != image_index;
2092+
= dst_type == BT_CHARACTER && dst_size > src_size && !same_image;
20932093
const int remote_image = image_index - 1;
20942094

20952095
/* Ensure stat is always set. */
@@ -2108,6 +2108,8 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
21082108
if (size == 0)
21092109
return;
21102110

2111+
dprint ("%d/%d: %s() dst_vector = %p, image_index = %d.\n", caf_this_image, caf_num_images,
2112+
__FUNCTION__, dst_vector, image_index);
21112113
check_image_health(image_index, stat);
21122114

21132115
/* For char arrays: create the padding array, when dst is longer than src. */
@@ -2118,7 +2120,11 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
21182120
/* For big arrays alloca() may not be able to get the memory on the stack.
21192121
* Use a regular malloc then. */
21202122
if ((free_pad_str = ((pad_str = alloca (pad_sz)) == NULL)))
2121-
pad_str = malloc (pad_sz);
2123+
{
2124+
pad_str = malloc (pad_sz);
2125+
if (t_buff == NULL)
2126+
caf_runtime_error ("Unable to allocate memory for internal buffer in send().");
2127+
}
21222128
if (dst_kind == 1)
21232129
memset (pad_str, ' ', pad_num);
21242130
else /* dst_kind == 4. */
@@ -2131,7 +2137,7 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
21312137
if (src_contiguous && dst_contiguous
21322138
&& dst_vector == NULL)
21332139
{
2134-
if(caf_this_image == image_index)
2140+
if(same_image)
21352141
{
21362142
dprint ("%d/%d: %s() in caf_this == image_index\n",
21372143
caf_this_image, caf_num_images, __FUNCTION__);
@@ -2149,9 +2155,13 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
21492155
if (dst_kind != src_kind || dest_char_array_is_longer
21502156
|| src_rank == 0)
21512157
if ((free_t_buff = ((t_buff = alloca (dst_size * size)) == NULL)))
2152-
t_buff = malloc (dst_size * size);
2158+
{
2159+
t_buff = malloc (dst_size * size);
2160+
if (t_buff == NULL)
2161+
caf_runtime_error ("Unable to allocate memory for internal buffer in send().");
2162+
}
21532163

2154-
if ((dst_type == src_type && dst_kind == src_kind && dst_rank == src_rank)
2164+
if ((same_type_and_kind && dst_rank == src_rank)
21552165
|| dst_type == BT_CHARACTER)
21562166
{
21572167
if (dest_char_array_is_longer
@@ -2173,9 +2183,9 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
21732183
}
21742184
else
21752185
{
2176-
convert_with_strides (t_buff, dst_type, dst_kind, 1,
2186+
convert_with_strides (t_buff, dst_type, dst_kind, dst_size,
21772187
src->base_addr, src_type, src_kind,
2178-
src_rank > 0,
2188+
src_rank > 0 ? src_size: 0,
21792189
size, stat);
21802190
ierr = MPI_Put (t_buff, dst_size * size, MPI_BYTE, remote_image,
21812191
offset, dst_size * size, MPI_BYTE, *p);
@@ -2204,8 +2214,6 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
22042214
#else
22052215
MPI_Win_flush (remote_image, *p);
22062216
#endif // CAF_MPI_LOCK_UNLOCK
2207-
if (free_t_buff)
2208-
free (t_buff);
22092217
}
22102218
}
22112219
else
@@ -2319,77 +2327,111 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
23192327
MPI_Type_free (&dt_d);
23202328

23212329
#else
2322-
if(caf_this_image == image_index && mrt)
2330+
if(same_image && mrt)
23232331
{
2324-
t_buff = calloc(size,GFC_DESCRIPTOR_SIZE (dest));
2325-
buff_map = calloc(size,sizeof(bool));
2332+
if ((free_t_buff = (((t_buff = alloca (dst_size * size))) == NULL)))
2333+
{
2334+
t_buff = malloc (dst_size * size);
2335+
if (t_buff == NULL)
2336+
caf_runtime_error ("Unable to allocate memory for internal buffer in send().");
2337+
}
2338+
}
2339+
else if (!same_type_and_kind && !same_image)
2340+
{
2341+
if ((free_t_buff = (((t_buff = alloca (dst_size))) == NULL)))
2342+
{
2343+
t_buff = malloc (dst_size);
2344+
if (t_buff == NULL)
2345+
caf_runtime_error ("Unable to allocate memory for internal buffer in send().");
2346+
}
23262347
}
23272348

2328-
CAF_Win_lock (MPI_LOCK_EXCLUSIVE, image_index - 1, *p);
2329-
for (i = 0; i < size; i++)
2349+
if (!same_image)
2350+
CAF_Win_lock (MPI_LOCK_EXCLUSIVE, remote_image, *p);
2351+
for (i = 0; i < size; ++i)
23302352
{
23312353
ptrdiff_t array_offset_dst = 0;
2332-
ptrdiff_t stride = 1;
23332354
ptrdiff_t extent = 1;
2334-
ptrdiff_t tot_ext = 1;
2335-
for (j = 0; j < dst_rank-1; j++)
2355+
ptrdiff_t tot_ext = 1;
2356+
for (j = 0; j < dst_rank - 1; ++j)
23362357
{
23372358
array_offset_dst += ((i / tot_ext)
23382359
% (dest->dim[j]._ubound
23392360
- dest->dim[j].lower_bound + 1))
23402361
* dest->dim[j]._stride;
23412362
extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
2342-
stride = dest->dim[j]._stride;
2343-
tot_ext *= extent;
2363+
tot_ext *= extent;
23442364
}
23452365

2346-
array_offset_dst += (i / tot_ext) * dest->dim[dst_rank-1]._stride;
2347-
dst_offset = offset + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest);
2366+
array_offset_dst += (i / tot_ext) * dest->dim[dst_rank - 1]._stride;
2367+
dst_offset = offset + array_offset_dst * dst_size;
23482368

23492369
void *sr;
2350-
if (GFC_DESCRIPTOR_RANK (src) != 0)
2370+
if (src_rank != 0)
23512371
{
23522372
ptrdiff_t array_offset_sr = 0;
2353-
stride = 1;
23542373
extent = 1;
2355-
tot_ext = 1;
2356-
for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
2374+
tot_ext = 1;
2375+
for (j = 0; j < src_rank - 1; ++j)
23572376
{
23582377
array_offset_sr += ((i / tot_ext)
23592378
% (src->dim[j]._ubound
23602379
- src->dim[j].lower_bound + 1))
23612380
* src->dim[j]._stride;
23622381
extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
2363-
stride = src->dim[j]._stride;
2364-
tot_ext *= extent;
2382+
tot_ext *= extent;
23652383
}
23662384

23672385
array_offset_sr += (i / tot_ext) * src->dim[dst_rank-1]._stride;
23682386
sr = (void *)((char *) src->base_addr
2369-
+ array_offset_sr*GFC_DESCRIPTOR_SIZE (src));
2387+
+ array_offset_sr * src_size);
23702388
}
23712389
else
23722390
sr = src->base_addr;
23732391

2374-
if(caf_this_image == image_index)
2392+
if(!same_image)
23752393
{
2376-
if(!mrt)
2377-
memmove(dest->base_addr+dst_offset,sr,GFC_DESCRIPTOR_SIZE (src));
2394+
// Do the more likely first.
2395+
dprint ("%d/%d: %s() kind(dst) = %d, el_sz(dst) = %d, kind(src) = %d, el_sz(src) = %d.\n",
2396+
caf_this_image, caf_num_images, __FUNCTION__, dst_kind,
2397+
dst_size, src_kind, src_size);
2398+
if (same_type_and_kind)
2399+
ierr = MPI_Put (sr, dst_size, MPI_BYTE, remote_image,
2400+
dst_offset, dst_size, MPI_BYTE, *p);
23782401
else
23792402
{
2380-
memmove(t_buff+i*GFC_DESCRIPTOR_SIZE (src),sr,GFC_DESCRIPTOR_SIZE (src));
2381-
buff_map[i] = true;
2403+
convert_type (t_buff, dst_type, dst_kind,
2404+
sr, src_type, src_kind, stat);
2405+
ierr = MPI_Put (t_buff, dst_size, MPI_BYTE, remote_image,
2406+
dst_offset, dst_size, MPI_BYTE, *p);
23822407
}
2408+
if (pad_str)
2409+
ierr = MPI_Put (pad_str, dst_size - src_size, MPI_BYTE, remote_image,
2410+
dst_offset, dst_size - src_size, MPI_BYTE, *p);
23832411
}
23842412
else
23852413
{
2386-
CAF_Win_lock (MPI_LOCK_EXCLUSIVE, image_index - 1, *p);
2387-
ierr = MPI_Put (sr, GFC_DESCRIPTOR_SIZE (dest), MPI_BYTE, image_index-1,
2388-
dst_offset, GFC_DESCRIPTOR_SIZE (dest), MPI_BYTE, *p);
2389-
if (pad_str)
2390-
ierr = MPI_Put (pad_str, dst_size - src_size, MPI_BYTE, image_index-1,
2391-
dst_offset, dst_size - src_size, MPI_BYTE, *p);
2392-
CAF_Win_unlock (image_index - 1, *p);
2414+
if(!mrt)
2415+
{
2416+
dprint ("%d/%d: %s() strided same_image, no temp, for i = %d, dst_offset = %d.\n",
2417+
caf_this_image, caf_num_images, __FUNCTION__, i,
2418+
dst_offset);
2419+
if (same_type_and_kind)
2420+
memmove(dest->base_addr + dst_offset, sr, src_size);
2421+
else
2422+
convert_type (dest->base_addr + dst_offset, dst_type,
2423+
dst_kind, sr, src_type, src_kind, stat);
2424+
}
2425+
else
2426+
{
2427+
dprint ("%d/%d: %s() strided same_image, *WITH* temp, for i = %d.\n",
2428+
caf_this_image, caf_num_images, __FUNCTION__, i);
2429+
if (same_type_and_kind)
2430+
memmove(t_buff + i * dst_size, sr, src_size);
2431+
else
2432+
convert_type (t_buff + i * dst_size, dst_type, dst_kind,
2433+
sr, src_type, src_kind, stat);
2434+
}
23932435
}
23942436

23952437
#ifndef WITH_FAILED_IMAGES
@@ -2400,42 +2442,39 @@ PREFIX (send) (caf_token_t token, size_t offset, int image_index,
24002442
}
24012443
#endif
24022444
}
2445+
if (!same_image)
2446+
CAF_Win_unlock (remote_image, *p);
24032447

2404-
if(caf_this_image == image_index && mrt)
2448+
2449+
if(same_image && mrt)
24052450
{
2406-
for(i=0;i<size;i++)
2451+
for(i = 0; i < size; ++i)
24072452
{
2408-
if(buff_map[i])
2453+
ptrdiff_t array_offset_dst = 0;
2454+
ptrdiff_t extent = 1;
2455+
ptrdiff_t tot_ext = 1;
2456+
for (j = 0; j < dst_rank - 1; j++)
24092457
{
2410-
ptrdiff_t array_offset_dst = 0;
2411-
ptrdiff_t stride = 1;
2412-
ptrdiff_t extent = 1;
2413-
ptrdiff_t tot_ext = 1;
2414-
for (j = 0; j < dst_rank-1; j++)
2415-
{
2416-
array_offset_dst += ((i / tot_ext)
2417-
% (dest->dim[j]._ubound
2418-
- dest->dim[j].lower_bound + 1))
2419-
* dest->dim[j]._stride;
2420-
extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
2421-
stride = dest->dim[j]._stride;
2422-
tot_ext *= extent;
2423-
}
2424-
2425-
//extent = (dest->dim[rank-1]._ubound - dest->dim[rank-1].lower_bound + 1);
2426-
array_offset_dst += (i / tot_ext) * dest->dim[dst_rank-1]._stride;
2427-
dst_offset = offset + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest);
2428-
memmove(src->base_addr+dst_offset,t_buff+i*GFC_DESCRIPTOR_SIZE (src),GFC_DESCRIPTOR_SIZE (src));
2458+
array_offset_dst += ((i / tot_ext)
2459+
% (dest->dim[j]._ubound
2460+
- dest->dim[j].lower_bound + 1))
2461+
* dest->dim[j]._stride;
2462+
extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
2463+
tot_ext *= extent;
24292464
}
2465+
2466+
array_offset_dst += (i / tot_ext) * dest->dim[dst_rank - 1]._stride;
2467+
dst_offset = offset + array_offset_dst * dst_size;
2468+
memmove (dest->base_addr + dst_offset, t_buff +
2469+
i * dst_size, dst_size);
24302470
}
2431-
free(t_buff);
2432-
free(buff_map);
24332471
}
2434-
CAF_Win_unlock (image_index - 1, *p);
24352472
#endif
24362473
}
24372474

24382475
/* Free memory, when not allocated on stack. */
2476+
if (free_t_buff)
2477+
free(t_buff);
24392478
if (free_pad_str)
24402479
free (pad_str);
24412480

0 commit comments

Comments
 (0)