Skip to content

Commit eb43922

Browse files
committed
ompi/datatype: fix bug in darray that causes MPI/IO failures
This commit fixes errors in the lb and extent of darray datatypes. For these datatypes the lb should be the start offset of the rank's data in the array and the extent should be the size of the entire datatype. In master the lb was always 0 and the extent was always to small. This commit updates the call to opal_datatype_resize to set the correct lb and fixes the extent calculation. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent f9e8a55 commit eb43922

File tree

1 file changed

+26
-24
lines changed

1 file changed

+26
-24
lines changed

ompi/datatype/ompi_datatype_create_darray.c

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* -*- Mode: C; c-basic-offset:4 ; -*- */
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
33
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
44
* University Research and Technology
@@ -13,6 +13,8 @@
1313
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
1414
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
1515
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
16+
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
17+
* reserved.
1618
* $COPYRIGHT$
1719
*
1820
* Additional copyrights may follow
@@ -169,6 +171,7 @@ int32_t ompi_datatype_create_darray(int size,
169171
ptrdiff_t orig_extent, *st_offsets = NULL;
170172
int i, start_loop, end_loop, step;
171173
int *coords = NULL, rc = OMPI_SUCCESS;
174+
ptrdiff_t displs[2], tmp_size = 1;
172175

173176
/* speedy corner case */
174177
if (ndims < 1) {
@@ -187,10 +190,13 @@ int32_t ompi_datatype_create_darray(int size,
187190
int tmp_rank = rank, procs = size;
188191

189192
coords = (int *) malloc(ndims * sizeof(int));
193+
displs[1] = orig_extent;
190194
for (i = 0 ; i < ndims ; i++) {
191195
procs = procs / psize_array[i];
192196
coords[i] = tmp_rank / procs;
193197
tmp_rank = tmp_rank % procs;
198+
/* compute the upper bound of the datatype, including all dimensions */
199+
displs[1] *= gsize_array[i];
194200
}
195201
}
196202

@@ -246,7 +252,6 @@ int32_t ompi_datatype_create_darray(int size,
246252
lastType = *newtype;
247253
}
248254

249-
250255
/**
251256
* We need to shift the content (useful data) of the datatype, so
252257
* we need to force the displacement to be moved. Therefore, we
@@ -255,29 +260,26 @@ int32_t ompi_datatype_create_darray(int size,
255260
* new data, and insert the last_Type with the correct
256261
* displacement.
257262
*/
258-
{
259-
ptrdiff_t displs[2], tmp_size = 1;
260-
261-
displs[0] = st_offsets[start_loop];
262-
displs[1] = orig_extent;
263-
for (i = start_loop + step; i != end_loop; i += step) {
264-
tmp_size *= gsize_array[i - step];
265-
displs[0] += tmp_size * st_offsets[i];
266-
displs[1] *= gsize_array[i];
267-
}
268-
displs[0] *= orig_extent;
269-
270-
*newtype = ompi_datatype_create(lastType->super.desc.used);
271-
rc = ompi_datatype_add(*newtype, lastType, 1, displs[0], displs[1]);
272-
ompi_datatype_destroy(&lastType);
273-
opal_datatype_resize( &(*newtype)->super, 0, displs[1] );
274-
/* need to destroy the old type even in error condition, so
275-
don't check return code from above until after cleanup. */
276-
if (MPI_SUCCESS != rc) newtype = NULL;
263+
displs[0] = st_offsets[start_loop];
264+
for (i = start_loop + step; i != end_loop; i += step) {
265+
tmp_size *= gsize_array[i - step];
266+
displs[0] += tmp_size * st_offsets[i];
267+
}
268+
displs[0] *= orig_extent;
269+
270+
*newtype = ompi_datatype_create(lastType->super.desc.used);
271+
rc = ompi_datatype_add(*newtype, lastType, 1, displs[0], displs[1]);
272+
ompi_datatype_destroy(&lastType);
273+
/* need to destroy the old type even in error condition, so
274+
don't check return code from above until after cleanup. */
275+
if (MPI_SUCCESS != rc) {
276+
ompi_datatype_destroy (newtype);
277+
} else {
278+
(void) opal_datatype_resize( &(*newtype)->super, 0, displs[1]);
277279
}
278280

279281
cleanup:
280-
if (NULL != st_offsets) free(st_offsets);
281-
if (NULL != coords) free(coords);
282-
return OMPI_SUCCESS;
282+
free(st_offsets);
283+
free(coords);
284+
return rc;
283285
}

0 commit comments

Comments
 (0)