diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index 86f7396abe9..dbecfcfe229 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -40,6 +40,7 @@ #include "ompi/constants.h" #include "opal/datatype/opal_convertor.h" #include "opal/util/output.h" +#include "ompi/util/count_disp_array.h" #include "mpi.h" BEGIN_C_DECLS @@ -124,7 +125,7 @@ OMPI_DECLSPEC int32_t ompi_datatype_default_convertors_init( void ); OMPI_DECLSPEC int32_t ompi_datatype_default_convertors_fini( void ); OMPI_DECLSPEC void ompi_datatype_dump (const ompi_datatype_t* pData); -OMPI_DECLSPEC ompi_datatype_t* ompi_datatype_create( int32_t expectedSize ); +OMPI_DECLSPEC ompi_datatype_t* ompi_datatype_create( size_t expectedSize ); static inline int32_t ompi_datatype_is_committed( const ompi_datatype_t* type ) @@ -151,7 +152,7 @@ ompi_datatype_is_predefined( const ompi_datatype_t* type ) } static inline int32_t -ompi_datatype_is_contiguous_memory_layout( const ompi_datatype_t* type, int32_t count ) +ompi_datatype_is_contiguous_memory_layout( const ompi_datatype_t* type, size_t count ) { return opal_datatype_is_contiguous_memory_layout(&type->super, count); } @@ -189,27 +190,27 @@ ompi_datatype_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd, size OMPI_DECLSPEC int32_t ompi_datatype_duplicate( const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_contiguous( int count, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_vector( int count, int bLength, int stride, +OMPI_DECLSPEC int32_t ompi_datatype_create_contiguous( size_t count, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); +OMPI_DECLSPEC int32_t ompi_datatype_create_vector( size_t count, size_t bLength, ptrdiff_t stride, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_hvector( int count, int bLength, ptrdiff_t stride, +OMPI_DECLSPEC int32_t ompi_datatype_create_hvector( size_t count, size_t bLength, ptrdiff_t stride, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const int* pDisp, +OMPI_DECLSPEC int32_t ompi_datatype_create_indexed( size_t count, const ompi_count_array_t pBlockLength, const ompi_disp_array_t pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const ptrdiff_t* pDisp, +OMPI_DECLSPEC int32_t ompi_datatype_create_hindexed( size_t count, const ompi_count_array_t pBlockLength, const ompi_disp_array_t pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* pDisp, +OMPI_DECLSPEC int32_t ompi_datatype_create_indexed_block( size_t count, size_t bLength, const ompi_disp_array_t pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const ptrdiff_t* pDisp, +OMPI_DECLSPEC int32_t ompi_datatype_create_hindexed_block( size_t count, size_t bLength, const ompi_disp_array_t pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const ptrdiff_t* pDisp, +OMPI_DECLSPEC int32_t ompi_datatype_create_struct( size_t count, const ompi_count_array_t pBlockLength, const ompi_disp_array_t pDisp, ompi_datatype_t* const* pTypes, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_darray( int size, int rank, int ndims, int const* gsize_array, - int const* distrib_array, int const* darg_array, - int const* psize_array, int order, const ompi_datatype_t* oldtype, +OMPI_DECLSPEC int32_t ompi_datatype_create_darray( int size, int rank, int ndims, const ompi_count_array_t gsize_array, + const int* distrib_array, const int* darg_array, + const int* psize_array, int order, const ompi_datatype_t* oldtype, ompi_datatype_t** newtype); -OMPI_DECLSPEC int32_t ompi_datatype_create_subarray(int ndims, int const* size_array, int const* subsize_array, - int const* start_array, int order, +OMPI_DECLSPEC int32_t ompi_datatype_create_subarray(int ndims, const ompi_count_array_t size_array, const ompi_count_array_t subsize_array, + const ompi_count_array_t start_array, int order, const ompi_datatype_t* oldtype, ompi_datatype_t** newtype); static inline int32_t ompi_datatype_create_resized( const ompi_datatype_t* oldType, @@ -296,25 +297,26 @@ ompi_datatype_copy_content_same_ddt( const ompi_datatype_t* type, size_t count, return 0; } -OMPI_DECLSPEC const ompi_datatype_t* ompi_datatype_match_size( int size, uint16_t datakind, uint16_t datalang ); +OMPI_DECLSPEC const ompi_datatype_t* ompi_datatype_match_size( size_t size, uint16_t datakind, uint16_t datalang ); /* * */ -OMPI_DECLSPEC int32_t ompi_datatype_sndrcv( const void *sbuf, int32_t scount, const ompi_datatype_t* sdtype, - void *rbuf, int32_t rcount, const ompi_datatype_t* rdtype); +OMPI_DECLSPEC int32_t ompi_datatype_sndrcv( const void *sbuf, size_t scount, const ompi_datatype_t* sdtype, + void *rbuf, size_t rcount, const ompi_datatype_t* rdtype); /* * */ OMPI_DECLSPEC int32_t ompi_datatype_get_args( const ompi_datatype_t* pData, int32_t which, - int32_t * ci, int32_t * i, - int32_t * ca, ptrdiff_t* a, - int32_t * cd, ompi_datatype_t** d, int32_t * type); + size_t * ci, int* i, + size_t * cl, MPI_Count* l, + size_t * ca, ptrdiff_t* a, + size_t * cd, ompi_datatype_t** d, int32_t * type); OMPI_DECLSPEC int32_t ompi_datatype_set_args( ompi_datatype_t* pData, - int32_t ci, const int32_t ** i, - int32_t ca, const ptrdiff_t* a, - int32_t cd, ompi_datatype_t* const * d,int32_t type); + size_t ci, size_t cl, const ompi_count_array_t *counts, + size_t ca, const ompi_disp_array_t a, + size_t cd, ompi_datatype_t* const * d,int32_t type); OMPI_DECLSPEC int32_t ompi_datatype_copy_args( const ompi_datatype_t* source_data, ompi_datatype_t* dest_data ); OMPI_DECLSPEC int32_t ompi_datatype_release_args( ompi_datatype_t* pData ); diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index 22e3c3f51f2..45c2b682377 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -16,6 +16,7 @@ * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2025 Stony Brook University. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,19 +42,22 @@ static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype, void** packed_buffer, int* next_index ); static ompi_datatype_t* -__ompi_datatype_create_from_args( int32_t* i, ptrdiff_t * a, +__ompi_datatype_create_from_args( const int* i, const size_t *l, const ptrdiff_t * a, ompi_datatype_t** d, int32_t type ); typedef struct __dt_args { opal_atomic_int32_t ref_count; int32_t create_type; size_t total_pack_size; - int32_t ci; - int32_t ca; - int32_t cd; - int* i; - ptrdiff_t* a; - ompi_datatype_t** d; + /* TODO: should they be size_t? */ + size_t ci; + size_t ca; + size_t cd; + size_t cl; + ptrdiff_t* a; + ompi_datatype_t** d; + size_t* l; // array of size_t counts + int* i; // array of integer counts } ompi_datatype_args_t; /** @@ -71,30 +75,44 @@ typedef struct __dt_args { #define OMPI_DATATYPE_ALIGN_PTR(PTR, TYPE) #endif /* OPAL_ALIGN_WORD_SIZE_INTEGERS */ +/** + * Copies count elements from the given count array into either + * the integer or size_t destination depending on whether the + * count array is 32 or 64 bit. Advances the destination pointer. + */ +static inline void copy_count_array(size_t count, int**__restrict__ desti, size_t**__restrict__ destc, ompi_count_array_t array) { + size_t elem_size = opal_count_array_is_64bit(array) ? sizeof(size_t) : sizeof(int); + void *dest = opal_count_array_is_64bit(array) ? (void*)*destc : (void*)*desti; + memcpy(dest, opal_count_array_ptr(array), count * elem_size); + if (opal_count_array_is_64bit(array)) { + *destc += count; + } else { + *desti += count; + } +} + int32_t ompi_datatype_set_args( ompi_datatype_t* pData, - int32_t ci, const int32_t** i, - int32_t ca, const ptrdiff_t* a, - int32_t cd, ompi_datatype_t* const * d, int32_t type) + size_t ci, size_t cl, const ompi_count_array_t *counts, + size_t ca, const opal_disp_array_t a, + size_t cd, ompi_datatype_t* const * d, int32_t type) { - int pos; + size_t pos; assert( NULL == pData->args ); - int length = sizeof(ompi_datatype_args_t) + ci * sizeof(int) + - ca * sizeof(ptrdiff_t) + cd * sizeof(MPI_Datatype); + + size_t length = sizeof(ompi_datatype_args_t) + ci * sizeof(int) + + cl * sizeof(size_t) + ca * sizeof(ptrdiff_t) + + cd * sizeof(MPI_Datatype); char* buf = (char*)malloc( length ); ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)buf; + size_t *pl = NULL; + int *pi = NULL; pArgs->ci = ci; pArgs->i = NULL; + pArgs->cl = cl; pArgs->l = NULL; pArgs->ca = ca; pArgs->a = NULL; pArgs->cd = cd; pArgs->d = NULL; pArgs->create_type = type; - /** - * Some architectures require 64 bits pointers (to pointers) to - * be 64 bits aligned. As in the ompi_datatype_args_t structure we have - * 2 such array of pointers and one to an array of ints, if we start by - * setting the 64 bits aligned one we will not have any trouble. Problem - * originally reported on SPARC 64. - */ buf += sizeof(ompi_datatype_args_t); if( 0 != pArgs->ca ) { pArgs->a = (ptrdiff_t*)buf; @@ -104,10 +122,18 @@ int32_t ompi_datatype_set_args( ompi_datatype_t* pData, pArgs->d = (ompi_datatype_t**)buf; buf += pArgs->cd * sizeof(MPI_Datatype); } - if( 0 != pArgs->ci ) pArgs->i = (int*)buf; + if (0 != pArgs->cl ) { + pArgs->l = pl = (size_t*)buf; + buf += pArgs->cl * sizeof(size_t); + } + if( 0 != pArgs->ci ) { + pArgs->i = pi = (int*)buf; + buf += pArgs->ci * sizeof(int); + } pArgs->ref_count = 1; - pArgs->total_pack_size = (4 + ci) * sizeof(int) + + pArgs->total_pack_size = 5 * sizeof(size_t) + ci * sizeof(int) + + cl * sizeof(size_t) + cd * sizeof(MPI_Datatype) + ca * sizeof(ptrdiff_t); switch(type) { @@ -117,92 +143,92 @@ int32_t ompi_datatype_set_args( ompi_datatype_t* pData, break; case MPI_COMBINER_CONTIGUOUS: - pArgs->i[0] = i[0][0]; + copy_count_array(1, &pi, &pl, counts[0]); break; case MPI_COMBINER_VECTOR: - pArgs->i[0] = i[0][0]; - pArgs->i[1] = i[1][0]; - pArgs->i[2] = i[2][0]; + copy_count_array(1, &pi, &pl, counts[0]); + copy_count_array(1, &pi, &pl, counts[1]); + copy_count_array(1, &pi, &pl, counts[2]); break; case MPI_COMBINER_HVECTOR_INTEGER: case MPI_COMBINER_HVECTOR: - pArgs->i[0] = i[0][0]; - pArgs->i[1] = i[1][0]; + copy_count_array(1, &pi, &pl, counts[0]); + copy_count_array(1, &pi, &pl, counts[1]); break; - case MPI_COMBINER_INDEXED: - pos = 1; - pArgs->i[0] = i[0][0]; - memcpy( pArgs->i + pos, i[1], i[0][0] * sizeof(int) ); - pos += i[0][0]; - memcpy( pArgs->i + pos, i[2], i[0][0] * sizeof(int) ); + case MPI_COMBINER_INDEXED: { + size_t count = opal_count_array_get(counts[0], 0); + copy_count_array(1, &pi, &pl, counts[0]); + copy_count_array(count, &pi, &pl, counts[1]); + copy_count_array(count, &pi, &pl, counts[2]); break; + } case MPI_COMBINER_HINDEXED_INTEGER: - case MPI_COMBINER_HINDEXED: - pArgs->i[0] = i[0][0]; - memcpy( pArgs->i + 1, i[1], i[0][0] * sizeof(int) ); + case MPI_COMBINER_HINDEXED: { + size_t count = opal_count_array_get(counts[0], 0); + copy_count_array(1, &pi, &pl, counts[0]); + copy_count_array(count, &pi, &pl, counts[1]); break; + } - case MPI_COMBINER_INDEXED_BLOCK: - pArgs->i[0] = i[0][0]; - pArgs->i[1] = i[1][0]; - memcpy( pArgs->i + 2, i[2], i[0][0] * sizeof(int) ); + case MPI_COMBINER_INDEXED_BLOCK: { + size_t count = opal_count_array_get(counts[0], 0); + copy_count_array(1, &pi, &pl, counts[0]); + copy_count_array(1, &pi, &pl, counts[1]); + copy_count_array(count, &pi, &pl, counts[2]); break; + } case MPI_COMBINER_STRUCT_INTEGER: - case MPI_COMBINER_STRUCT: - pArgs->i[0] = i[0][0]; - memcpy( pArgs->i + 1, i[1], i[0][0] * sizeof(int) ); + case MPI_COMBINER_STRUCT: { + size_t count = opal_count_array_get(counts[0], 0); + copy_count_array(1, &pi, &pl, counts[0]); + copy_count_array(count, &pi, &pl, counts[1]); break; + } - case MPI_COMBINER_SUBARRAY: - pos = 1; - pArgs->i[0] = i[0][0]; - memcpy( pArgs->i + pos, i[1], pArgs->i[0] * sizeof(int) ); - pos += pArgs->i[0]; - memcpy( pArgs->i + pos, i[2], pArgs->i[0] * sizeof(int) ); - pos += pArgs->i[0]; - memcpy( pArgs->i + pos, i[3], pArgs->i[0] * sizeof(int) ); - pos += pArgs->i[0]; - pArgs->i[pos] = i[4][0]; + case MPI_COMBINER_SUBARRAY: { + size_t count = opal_count_array_get(counts[0], 0); + copy_count_array(1, &pi, &pl, counts[0]); + copy_count_array(count, &pi, &pl, counts[1]); + copy_count_array(count, &pi, &pl, counts[2]); + copy_count_array(count, &pi, &pl, counts[3]); + copy_count_array(1, &pi, &pl, counts[4]); break; + } - case MPI_COMBINER_DARRAY: - pos = 3; - pArgs->i[0] = i[0][0]; - pArgs->i[1] = i[1][0]; - pArgs->i[2] = i[2][0]; - - memcpy( pArgs->i + pos, i[3], i[2][0] * sizeof(int) ); - pos += i[2][0]; - memcpy( pArgs->i + pos, i[4], i[2][0] * sizeof(int) ); - pos += i[2][0]; - memcpy( pArgs->i + pos, i[5], i[2][0] * sizeof(int) ); - pos += i[2][0]; - memcpy( pArgs->i + pos, i[6], i[2][0] * sizeof(int) ); - pos += i[2][0]; - pArgs->i[pos] = i[7][0]; + case MPI_COMBINER_DARRAY: { + size_t ndim = opal_count_array_get(counts[2], 0); + copy_count_array(1, &pi, &pl, counts[0]); + copy_count_array(1, &pi, &pl, counts[1]); + copy_count_array(1, &pi, &pl, counts[2]); + copy_count_array(ndim, &pi, &pl, counts[3]); + copy_count_array(ndim, &pi, &pl, counts[4]); + copy_count_array(ndim, &pi, &pl, counts[5]); + copy_count_array(ndim, &pi, &pl, counts[6]); + copy_count_array(1, &pi, &pl, counts[7]); break; + } case MPI_COMBINER_F90_REAL: case MPI_COMBINER_F90_COMPLEX: - pArgs->i[0] = i[0][0]; - pArgs->i[1] = i[1][0]; + copy_count_array(1, &pi, &pl, counts[0]); + copy_count_array(1, &pi, &pl, counts[1]); break; case MPI_COMBINER_F90_INTEGER: - pArgs->i[0] = i[0][0]; + copy_count_array(1, &pi, &pl, counts[0]); break; case MPI_COMBINER_RESIZED: break; case MPI_COMBINER_HINDEXED_BLOCK: - pArgs->i[0] = i[0][0]; - pArgs->i[1] = i[1][0]; + copy_count_array(1, &pi, &pl, counts[0]); + copy_count_array(1, &pi, &pl, counts[1]); break; default: @@ -211,7 +237,7 @@ int32_t ompi_datatype_set_args( ompi_datatype_t* pData, /* copy the array of MPI_Aint, aka ptrdiff_t */ if( pArgs->a != NULL ) - memcpy( pArgs->a, a, ca * sizeof(ptrdiff_t) ); + memcpy( pArgs->a, ompi_disp_array_ptr(a), ca * sizeof(ptrdiff_t) ); for( pos = 0; pos < cd; pos++ ) { pArgs->d[pos] = d[pos]; @@ -239,7 +265,7 @@ int32_t ompi_datatype_set_args( ompi_datatype_t* pData, int32_t ompi_datatype_print_args( const ompi_datatype_t* pData ) { - int32_t i; + size_t i; ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)pData->args; if( ompi_datatype_is_predefined(pData) ) { @@ -249,15 +275,22 @@ int32_t ompi_datatype_print_args( const ompi_datatype_t* pData ) if( pArgs == NULL ) return MPI_ERR_INTERN; - printf( "type %d count ints %d count disp %d count datatype %d\n", - pArgs->create_type, pArgs->ci, pArgs->ca, pArgs->cd ); + printf( "type %d count ints %zu count counts %zu count disp %zu count datatype %zu\n", + pArgs->create_type, pArgs->ci, pArgs->cl, pArgs->ca, pArgs->cd ); if( pArgs->i != NULL ) { - printf( "ints: " ); + printf( "ints: "); for( i = 0; i < pArgs->ci; i++ ) { printf( "%d ", pArgs->i[i] ); } printf( "\n" ); } + if( pArgs->l != NULL ) { + printf( "counts: "); + for( i = 0; i < pArgs->cl; i++ ) { + printf( "%zu ", pArgs->l[i] ); + } + printf( "\n" ); + } if( pArgs->a != NULL ) { printf( "MPI_Aint: " ); for( i = 0; i < pArgs->ca; i++ ) { @@ -309,9 +342,10 @@ int32_t ompi_datatype_print_args( const ompi_datatype_t* pData ) int32_t ompi_datatype_get_args( const ompi_datatype_t* pData, int32_t which, - int32_t* ci, int32_t* i, - int32_t* ca, ptrdiff_t* a, - int32_t* cd, ompi_datatype_t** d, int32_t* type) + size_t* ci, int* i, + size_t* cl, MPI_Count* l, + size_t* ca, ptrdiff_t* a, + size_t* cd, ompi_datatype_t** d, int32_t* type) { ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)pData->args; @@ -320,6 +354,7 @@ int32_t ompi_datatype_get_args( const ompi_datatype_t* pData, int32_t which, switch(which){ case 0: *ci = 0; + *cl = 0; *ca = 0; *cd = 0; *type = MPI_COMBINER_NAMED; @@ -335,17 +370,18 @@ int32_t ompi_datatype_get_args( const ompi_datatype_t* pData, int32_t which, switch(which){ case 0: /* GET THE LENGTHS */ *ci = pArgs->ci; + *cl = pArgs->cl; *ca = pArgs->ca; *cd = pArgs->cd; *type = pArgs->create_type; break; case 1: /* GET THE ARGUMENTS */ - if(*ci < pArgs->ci || *ca < pArgs->ca || *cd < pArgs->cd) { - return MPI_ERR_ARG; - } if( (NULL != i) && (NULL != pArgs->i) ) { memcpy( i, pArgs->i, pArgs->ci * sizeof(int) ); } + if( (NULL != l) && (NULL != pArgs->l) ) { + memcpy( l, pArgs->l, pArgs->cl * sizeof(size_t) ); + } if( (NULL != a) && (NULL != pArgs->a) ) { memcpy( a, pArgs->a, pArgs->ca * sizeof(ptrdiff_t) ); } @@ -384,7 +420,7 @@ int32_t ompi_datatype_copy_args( const ompi_datatype_t* source_data, */ int32_t ompi_datatype_release_args( ompi_datatype_t* pData ) { - int i; + size_t i; ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)pData->args; assert( 0 < pArgs->ref_count ); @@ -409,13 +445,16 @@ int32_t ompi_datatype_release_args( ompi_datatype_t* pData ) static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype, void** packed_buffer, int* next_index ) { - int i, *position = (int*)*packed_buffer; + size_t i; + int *iposition = NULL; ompi_datatype_args_t* args = (ompi_datatype_args_t*)datatype->args; char* next_packed = (char*)*packed_buffer; + iposition = (int*)next_packed; + if( ompi_datatype_is_predefined(datatype) ) { - position[0] = MPI_COMBINER_NAMED; - position[1] = datatype->id; /* On the OMPI - layer, copy the ompi_datatype.id */ + iposition[0] = MPI_COMBINER_NAMED; + iposition[1] = datatype->id; /* On the OMPI - layer, copy the ompi_datatype.id */ next_packed += (2 * sizeof(int)); *packed_buffer = next_packed; return OMPI_SUCCESS; @@ -427,28 +466,31 @@ static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype, packed_buffer, next_index ); } - position[0] = args->create_type; - position[1] = args->ci; - position[2] = args->ca; - position[3] = args->cd; - next_packed += (4 * sizeof(int)); - /* Spoiler: We will access the data in this storage structure, and thus we - * need to align it to the expected boundaries (special thanks to Sparc64). - * The simplest way is to ensure that prior to each type that must be 64 - * bits aligned, we have a pointer that is 64 bits aligned. That will minimize - * the memory requirements in all cases where no displacements are stored. - */ + iposition[0] = args->create_type; + next_packed += sizeof(int); + /* align pointer to 64 bits */ + OMPI_DATATYPE_ALIGN_PTR(next_packed, char*); + size_t *cposition = ((size_t*)next_packed); + cposition[0] = args->ci; + cposition[1] = args->cl; + cposition[2] = args->ca; + cposition[3] = args->cd; + next_packed += (4 * sizeof(size_t)); if( 0 < args->ca ) { - /* description of the displacements must be 64 bits aligned */ - OMPI_DATATYPE_ALIGN_PTR(next_packed, char*); - memcpy( next_packed, args->a, sizeof(ptrdiff_t) * args->ca ); next_packed += sizeof(ptrdiff_t) * args->ca; } - position = (int*)next_packed; + if ( 0 < args->cl ) { + memcpy( next_packed, args->l, sizeof(size_t) * args->cl ); + next_packed += sizeof(size_t) * args->cl; + } + /* advance int pointer */ + iposition = (int*)next_packed; + + /* skip the datatypes */ next_packed += sizeof(int) * args->cd; - /* copy the array of counts (32 bits aligned) */ + /* copy the array of 32bit counts at the end */ memcpy( next_packed, args->i, sizeof(int) * args->ci ); next_packed += args->ci * sizeof(int); @@ -456,9 +498,9 @@ static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype, for( i = 0; i < args->cd; i++ ) { ompi_datatype_t* temp_data = args->d[i]; if( ompi_datatype_is_predefined(temp_data) ) { - position[i] = temp_data->id; /* On the OMPI - layer, copy the ompi_datatype.id */ + iposition[i] = temp_data->id; /* On the OMPI - layer, copy the ompi_datatype.id */ } else { - position[i] = *next_index; + iposition[i] = *next_index; (*next_index)++; __ompi_datatype_pack_description( temp_data, (void**)&next_packed, @@ -548,13 +590,16 @@ size_t ompi_datatype_pack_description_length( ompi_datatype_t* datatype ) static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** packed_buffer, const struct ompi_proc_t* remote_processor ) { - int* position; + int* iposition; + size_t *cposition; ompi_datatype_t* datatype = NULL; ompi_datatype_t** array_of_datatype; ptrdiff_t* array_of_disp; - int* array_of_length; - int number_of_length, number_of_disp, number_of_datatype, data_id; - int create_type, i; + int* array_of_ints; + size_t *array_of_counts = NULL; + size_t number_of_ints, number_of_counts, number_of_disp, number_of_datatype, data_id; + int create_type; + size_t i; char* next_buffer; #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT @@ -567,9 +612,14 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p #endif next_buffer = (char*)*packed_buffer; - position = (int*)next_buffer; - - create_type = position[0]; + cposition = (size_t*)next_buffer; + iposition = (int*)next_buffer; + + create_type = (int)iposition[0]; + next_buffer += sizeof(int); + /* align pointer to 64 bits */ + OMPI_DATATYPE_ALIGN_PTR(next_buffer, char*); + cposition = (size_t*)next_buffer; #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (need_swap) { create_type = opal_swap_bytes4(create_type); @@ -577,48 +627,50 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p #endif if( MPI_COMBINER_NAMED == create_type ) { /* there we have a simple predefined datatype */ - data_id = position[1]; + data_id = iposition[1]; #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (need_swap) { data_id = opal_swap_bytes4(data_id); } #endif assert( data_id < OMPI_DATATYPE_MAX_PREDEFINED ); - *packed_buffer = position + 2; + *packed_buffer = iposition + 2; return (ompi_datatype_t*)ompi_datatype_basicDatatypes[data_id]; } - number_of_length = position[1]; - number_of_disp = position[2]; - number_of_datatype = position[3]; + number_of_ints = cposition[0]; + number_of_counts = cposition[1]; + number_of_disp = cposition[2]; + number_of_datatype = cposition[3]; #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (need_swap) { - number_of_length = opal_swap_bytes4(number_of_length); - number_of_disp = opal_swap_bytes4(number_of_disp); - number_of_datatype = opal_swap_bytes4(number_of_datatype); + number_of_ints = opal_swap_bytes8(number_of_ints); + number_of_counts = opal_swap_bytes8(number_of_counts); + number_of_disp = opal_swap_bytes8(number_of_disp); + number_of_datatype = opal_swap_bytes8(number_of_datatype); } #endif array_of_datatype = (ompi_datatype_t**)malloc( sizeof(ompi_datatype_t*) * number_of_datatype ); - next_buffer += (4 * sizeof(int)); /* move after the header */ - - /* description of the displacements (if ANY !) should always be aligned - on MPI_Aint, aka ptrdiff_t */ - if (number_of_disp > 0) { - OMPI_DATATYPE_ALIGN_PTR(next_buffer, char*); - } - + next_buffer += (4 * sizeof(size_t)); /* move after the header */ + /* the array of displacements */ array_of_disp = (ptrdiff_t*)next_buffer; next_buffer += number_of_disp * sizeof(ptrdiff_t); + if (number_of_counts > 0) { + array_of_counts = (size_t*)next_buffer; + next_buffer += number_of_counts * sizeof(size_t); + } /* the other datatypes */ - position = (int*)next_buffer; + iposition = (int*)next_buffer; next_buffer += number_of_datatype * sizeof(int); /* the array of lengths (32 bits aligned) */ - array_of_length = (int*)next_buffer; - next_buffer += (number_of_length * sizeof(int)); + if (number_of_ints > 0) { + array_of_ints = (int*)next_buffer; + next_buffer += number_of_ints * sizeof(int); + } for( i = 0; i < number_of_datatype; i++ ) { - data_id = position[i]; + data_id = iposition[i]; #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (need_swap) { data_id = opal_swap_bytes4(data_id); @@ -644,8 +696,16 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (need_swap) { - for (i = 0 ; i < number_of_length ; ++i) { - array_of_length[i] = opal_swap_bytes4(array_of_length[i]); + if (i_size == 8) { + size_t* arr = (size_t*)opal_count_array_ptr(array_of_length); + for (i = 0 ; i < number_of_length ; ++i) { + arr[i] = opal_swap_bytes8(arr[i]); + } + } else { + int* arr = (int*)opal_count_array_ptr(array_of_length); + for (i = 0 ; i < number_of_length ; ++i) { + arr[i] = opal_swap_bytes4(arr[i]); + } } for (i = 0 ; i < number_of_disp ; ++i) { #if SIZEOF_PTRDIFF_T == 4 @@ -658,7 +718,7 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p } } #endif - datatype = __ompi_datatype_create_from_args( array_of_length, array_of_disp, + datatype = __ompi_datatype_create_from_args( array_of_ints, array_of_counts, array_of_disp, array_of_datatype, create_type ); *packed_buffer = next_buffer; cleanup_and_exit: @@ -671,11 +731,14 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p return datatype; } -static ompi_datatype_t* __ompi_datatype_create_from_args( int32_t* i, MPI_Aint* a, +static ompi_datatype_t* __ompi_datatype_create_from_args( const int* i, const size_t *l, const ptrdiff_t* a, ompi_datatype_t** d, int32_t type ) { + size_t count, ci = 0, cl = 0; ompi_datatype_t* datatype = NULL; + ompi_disp_array_t disp_array = OMPI_DISP_ARRAY_CREATE(a); + switch(type){ /******************************************************************/ case MPI_COMBINER_DUP: @@ -684,81 +747,219 @@ static ompi_datatype_t* __ompi_datatype_create_from_args( int32_t* i, MPI_Aint* assert(0); /* shouldn't happen */ break; /******************************************************************/ - case MPI_COMBINER_CONTIGUOUS: - ompi_datatype_create_contiguous( i[0], d[0], &datatype ); - ompi_datatype_set_args( datatype, 1, (const int **) &i, 0, NULL, 1, d, MPI_COMBINER_CONTIGUOUS ); + case MPI_COMBINER_CONTIGUOUS: { + ompi_count_array_t a_i[1]; + if (l == NULL) { + count = i[0]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(i); + ci = 1; + } else { // large count variant + count = l[0]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(l); + cl = 1; + } + ompi_datatype_create_contiguous( count, d[0], &datatype ); + ompi_datatype_set_args( datatype, ci, cl, a_i, 0, OMPI_DISP_ARRAY_NULL, 1, d, MPI_COMBINER_CONTIGUOUS ); break; + } /******************************************************************/ - case MPI_COMBINER_VECTOR: - ompi_datatype_create_vector( i[0], i[1], i[2], d[0], &datatype ); - { - const int* a_i[3] = {&i[0], &i[1], &i[2]}; - ompi_datatype_set_args( datatype, 3, a_i, 0, NULL, 1, d, MPI_COMBINER_VECTOR ); + case MPI_COMBINER_VECTOR: { + size_t blocklength, stride; + opal_count_array_t a_i[3]; + if (l == NULL) { + count = i[0]; + blocklength= i[1]; + stride = i[2]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(i); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(i + 1); + a_i[2] = OMPI_COUNT_ARRAY_CREATE(i + 2); + ci = 3; + } else { // large count variant + count = l[0]; + blocklength= l[1]; + stride = l[2]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(l); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(l + 1); + a_i[2] = OMPI_COUNT_ARRAY_CREATE(l + 2); + cl = 3; } + ompi_datatype_create_vector( count, blocklength, stride, d[0], &datatype ); + ompi_datatype_set_args( datatype, ci, cl, a_i, 0, OMPI_DISP_ARRAY_NULL, 1, d, MPI_COMBINER_VECTOR ); break; + } /******************************************************************/ case MPI_COMBINER_HVECTOR_INTEGER: case MPI_COMBINER_HVECTOR: - ompi_datatype_create_hvector( i[0], i[1], a[0], d[0], &datatype ); { - const int* a_i[2] = {&i[0], &i[1]}; - ompi_datatype_set_args( datatype, 2, a_i, 1, a, 1, d, MPI_COMBINER_HVECTOR ); + size_t blocklength; + opal_count_array_t a_i[2]; + if (l == NULL) { + count = i[0]; + blocklength = i[1]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(i); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(i + 1); + ci = 2; + } else { // large count variant + count = l[0]; + blocklength = l[1]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(l); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(l + 1); + cl = 2; + } + ompi_datatype_create_hvector( count, blocklength, a[0], d[0], &datatype ); + ompi_datatype_set_args( datatype, ci, cl, a_i, 1, disp_array, 1, d, MPI_COMBINER_HVECTOR ); } break; /******************************************************************/ case MPI_COMBINER_INDEXED: /* TO CHECK */ - ompi_datatype_create_indexed( i[0], &(i[1]), &(i[1+i[0]]), d[0], &datatype ); { - const int* a_i[3] = {&i[0], &i[1], &(i[1+i[0]])}; - ompi_datatype_set_args( datatype, 2 * i[0] + 1, a_i, 0, NULL, 1, d, MPI_COMBINER_INDEXED ); + opal_count_array_t a_i[3]; + if (l == NULL) { + count = i[0]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(i); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(i + 1); + a_i[2] = OMPI_COUNT_ARRAY_CREATE(i + 1 + count); + ci = 2 * count + 1; + } else { + count = l[0]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(l); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(l + 1); + a_i[2] = OMPI_COUNT_ARRAY_CREATE(l + 1 + count); + cl = 2 * count + 1; + } + ompi_datatype_create_indexed( count, a_i[1], a_i[2], d[0], &datatype ); + ompi_datatype_set_args( datatype, ci, cl, a_i, 0, OMPI_DISP_ARRAY_NULL, 1, d, MPI_COMBINER_INDEXED ); } break; /******************************************************************/ case MPI_COMBINER_HINDEXED_INTEGER: case MPI_COMBINER_HINDEXED: - ompi_datatype_create_hindexed( i[0], &(i[1]), a, d[0], &datatype ); { - const int* a_i[2] = {&i[0], &i[1]}; - ompi_datatype_set_args( datatype, i[0] + 1, a_i, i[0], a, 1, d, MPI_COMBINER_HINDEXED ); + opal_count_array_t a_i[2]; + if (l == NULL) { + count = i[0]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(i); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(i + 1); + ci = count+1; + } else { + count = l[0]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(l); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(l + 1); + cl = count+1; + } + ompi_datatype_create_hindexed( count, a_i[1], disp_array, d[0], &datatype ); + ompi_datatype_set_args( datatype, ci, cl, a_i, count, disp_array, 1, d, MPI_COMBINER_HINDEXED ); } break; /******************************************************************/ case MPI_COMBINER_INDEXED_BLOCK: - ompi_datatype_create_indexed_block( i[0], i[1], &(i[2]), d[0], &datatype ); { - const int* a_i[3] = {&i[0], &i[1], &i[2]}; - ompi_datatype_set_args( datatype, i[0] + 2, a_i, 0, NULL, 1, d, MPI_COMBINER_INDEXED_BLOCK ); + opal_count_array_t a_i[3]; + size_t blocklength; + if (l == NULL) { + count = i[0]; + blocklength = i[1]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(i); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(i + 1); + a_i[2] = OMPI_COUNT_ARRAY_CREATE(i + 2); + ci = 2 + count; + } else { + count = l[0]; + blocklength = l[1]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(l); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(l + 1); + a_i[2] = OMPI_COUNT_ARRAY_CREATE(l + 2); + cl = 2 + count; + } + ompi_datatype_create_indexed_block( count, blocklength, a_i[2], d[0], &datatype ); + ompi_datatype_set_args( datatype, ci, cl, a_i, 0, OMPI_DISP_ARRAY_NULL, 1, d, MPI_COMBINER_INDEXED_BLOCK ); } break; /******************************************************************/ case MPI_COMBINER_STRUCT_INTEGER: case MPI_COMBINER_STRUCT: - ompi_datatype_create_struct( i[0], &(i[1]), a, d, &datatype ); { - const int* a_i[2] = {&i[0], &i[1]}; - ompi_datatype_set_args( datatype, i[0] + 1, a_i, i[0], a, i[0], d, MPI_COMBINER_STRUCT ); + opal_count_array_t a_i[2]; + if (l == NULL) { + count = i[0]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(i); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(i + 1); + ci = 2 * count + 1; + } else { + count = l[0]; + a_i[0] = OMPI_COUNT_ARRAY_CREATE(l); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(l + 1); + cl = count + 1; + } + ompi_datatype_create_struct( count, a_i[1], disp_array, d, &datatype ); + ompi_datatype_set_args( datatype, ci, cl, a_i, count, disp_array, count, d, MPI_COMBINER_STRUCT ); } break; /******************************************************************/ case MPI_COMBINER_SUBARRAY: - ompi_datatype_create_subarray( i[0], &i[1 + 0 * i[0]], &i[1 + 1 * i[0]], - &i[1 + 2 * i[0]], i[1 + 3 * i[0]], - d[0], &datatype ); { - const int* a_i[5] = {&i[0], &i[1 + 0 * i[0]], &i[1 + 1 * i[0]], &i[1 + 2 * i[0]], &i[1 + 3 * i[0]]}; - ompi_datatype_set_args( datatype, 3 * i[0] + 2, a_i, 0, NULL, 1, d, MPI_COMBINER_SUBARRAY); + count = i[0]; // first element in int array + int order; + opal_count_array_t a_i[5]; + if (l == NULL) { + a_i[0] = OMPI_COUNT_ARRAY_CREATE(i); + a_i[1] = OMPI_COUNT_ARRAY_CREATE(i + 1); + a_i[2] = OMPI_COUNT_ARRAY_CREATE(i + 1 + count); + a_i[3] = OMPI_COUNT_ARRAY_CREATE(i + 1 + 2*count); + a_i[4] = OMPI_COUNT_ARRAY_CREATE(i + 1 + 3*count); + order = i[3*count+1]; // last element in int array + ci = 3 * count + 2; + } else { + a_i[0] = OMPI_COUNT_ARRAY_CREATE(i); // ndim + a_i[1] = OMPI_COUNT_ARRAY_CREATE(l); // sizes + a_i[2] = OMPI_COUNT_ARRAY_CREATE(l + count); // subsizes + a_i[3] = OMPI_COUNT_ARRAY_CREATE(l + 2*count); // starts + a_i[4] = OMPI_COUNT_ARRAY_CREATE(i+1); // order + order = i[1]; // second (and last) element in int array + cl = 3 * count; + ci = 2; + } + ompi_datatype_create_subarray( count, a_i[1], a_i[2], a_i[3], order, d[0], &datatype ); + ompi_datatype_set_args( datatype, ci, cl, a_i, 0, OMPI_DISP_ARRAY_NULL, 1, d, MPI_COMBINER_SUBARRAY ); } break; /******************************************************************/ case MPI_COMBINER_DARRAY: - ompi_datatype_create_darray( i[0] /* size */, i[1] /* rank */, i[2] /* ndims */, - &i[3 + 0 * i[2]], &i[3 + 1 * i[2]], - &i[3 + 2 * i[2]], &i[3 + 3 * i[2]], - i[3 + 4 * i[2]], d[0], &datatype ); { - const int* a_i[8] = {&i[0], &i[1], &i[2], &i[3 + 0 * i[2]], &i[3 + 1 * i[2]], &i[3 + 2 * i[2]], - &i[3 + 3 * i[2]], &i[3 + 4 * i[2]]}; - ompi_datatype_set_args( datatype, 4 * i[2] + 4, a_i, 0, NULL, 1, d, MPI_COMBINER_DARRAY); + int size = i[0]; + int rank = i[1]; + int ndims = i[2]; + ompi_count_array_t gsize_array; + const int *distrib_array; + const int *darg_array; + const int *psize_array; + int order; + if (l == NULL) { + gsize_array = OMPI_COUNT_ARRAY_CREATE(i + 3); + distrib_array = &i[3 + 1*ndims]; + darg_array = &i[3 + 2*ndims]; + psize_array = &i[3 + 3*ndims]; + order = i[3 + 4*ndims]; + ci = 4 + 4 * ndims; + } else { + gsize_array = OMPI_COUNT_ARRAY_CREATE(l); + distrib_array = &i[3 + 0*ndims]; + darg_array = &i[3 + 1*ndims]; + psize_array = &i[3 + 2*ndims]; + order = i[3 + 3*ndims]; + ci = 4 + 3 * ndims; + cl = ndims; + } + opal_count_array_t a_i[8] = {OMPI_COUNT_ARRAY_CREATE(&size), + OMPI_COUNT_ARRAY_CREATE(&rank), + OMPI_COUNT_ARRAY_CREATE(&ndims), + gsize_array, + OMPI_COUNT_ARRAY_CREATE(distrib_array), + OMPI_COUNT_ARRAY_CREATE(darg_array), + OMPI_COUNT_ARRAY_CREATE(psize_array), + OMPI_COUNT_ARRAY_CREATE(&order)}; + ompi_datatype_create_darray( size, rank, ndims, gsize_array, distrib_array, darg_array, psize_array, order, d[0], &datatype ); + ompi_datatype_set_args( datatype, ci, cl, a_i, 0, OMPI_DISP_ARRAY_NULL, 1, d, MPI_COMBINER_DARRAY); } break; /******************************************************************/ @@ -775,14 +976,24 @@ static ompi_datatype_t* __ompi_datatype_create_from_args( int32_t* i, MPI_Aint* /******************************************************************/ case MPI_COMBINER_RESIZED: ompi_datatype_create_resized(d[0], a[0], a[1], &datatype); - ompi_datatype_set_args( datatype, 0, NULL, 2, a, 1, d, MPI_COMBINER_RESIZED ); + ompi_datatype_set_args( datatype, 0, 0, NULL, 2, disp_array, 1, d, MPI_COMBINER_RESIZED ); break; /******************************************************************/ case MPI_COMBINER_HINDEXED_BLOCK: - ompi_datatype_create_hindexed_block( i[0], i[1], a, d[0], &datatype ); { - const int* a_i[2] = {&i[0], &i[1]}; - ompi_datatype_set_args( datatype, 2, a_i, i[0], a, 1, d, MPI_COMBINER_HINDEXED_BLOCK ); + size_t bLength = 0; + if (l == NULL) { + count = i[0]; + bLength = i[1]; + ci = 2; + } else { + count = l[0]; + bLength = l[1]; + cl = 2; + } + ompi_datatype_create_hindexed_block( count, bLength, disp_array, d[0], &datatype ); + opal_count_array_t a_i[2] = {OMPI_COUNT_ARRAY_CREATE(&count), OMPI_COUNT_ARRAY_CREATE(&bLength)}; + ompi_datatype_set_args( datatype, ci, cl, a_i, count, disp_array, 1, d, MPI_COMBINER_HINDEXED_BLOCK ); } break; /******************************************************************/ @@ -816,7 +1027,7 @@ ompi_datatype_t* ompi_datatype_get_single_predefined_type_from_args( ompi_dataty { ompi_datatype_t *predef = NULL, *current_type, *current_predef; ompi_datatype_args_t* args = (ompi_datatype_args_t*)type->args; - int i; + size_t i; if( ompi_datatype_is_predefined(type) ) return type; diff --git a/ompi/datatype/ompi_datatype_create.c b/ompi/datatype/ompi_datatype_create.c index 4b01f2dfed6..dc0110c70e9 100644 --- a/ompi/datatype/ompi_datatype_create.c +++ b/ompi/datatype/ompi_datatype_create.c @@ -71,7 +71,7 @@ static void __ompi_datatype_release(ompi_datatype_t * datatype) OBJ_CLASS_INSTANCE(ompi_datatype_t, opal_datatype_t, __ompi_datatype_allocate, __ompi_datatype_release); -ompi_datatype_t * ompi_datatype_create( int32_t expectedSize ) +ompi_datatype_t * ompi_datatype_create( size_t expectedSize ) { int ret; ompi_datatype_t * datatype = (ompi_datatype_t*)OBJ_NEW(ompi_datatype_t); diff --git a/ompi/datatype/ompi_datatype_create_contiguous.c b/ompi/datatype/ompi_datatype_create_contiguous.c index 6a287caa41c..92b238ccb3f 100644 --- a/ompi/datatype/ompi_datatype_create_contiguous.c +++ b/ompi/datatype/ompi_datatype_create_contiguous.c @@ -24,7 +24,7 @@ #include "ompi/datatype/ompi_datatype_internal.h" #include "mpi.h" -int32_t ompi_datatype_create_contiguous( int count, const ompi_datatype_t* oldType, +int32_t ompi_datatype_create_contiguous( size_t count, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ompi_datatype_t* pdt; diff --git a/ompi/datatype/ompi_datatype_create_darray.c b/ompi/datatype/ompi_datatype_create_darray.c index e0292755c4b..e13e99d09de 100644 --- a/ompi/datatype/ompi_datatype_create_darray.c +++ b/ompi/datatype/ompi_datatype_create_darray.c @@ -30,7 +30,7 @@ #include "ompi/datatype/ompi_datatype.h" static int -block(const int *gsize_array, int dim, int ndims, int nprocs, +block(ompi_count_array_t gsize_array, int dim, int ndims, int nprocs, int rank, int darg, int order, ptrdiff_t orig_extent, ompi_datatype_t *type_old, ompi_datatype_t **type_new, ptrdiff_t *st_offset) @@ -38,7 +38,7 @@ block(const int *gsize_array, int dim, int ndims, int nprocs, int blksize, global_size, mysize, i, j, rc, start_loop, step; ptrdiff_t stride, disps[2]; - global_size = gsize_array[dim]; + global_size = ompi_count_array_get(gsize_array, dim); if (darg == MPI_DISTRIBUTE_DFLT_DARG) blksize = (global_size + nprocs - 1) / nprocs; @@ -62,7 +62,7 @@ block(const int *gsize_array, int dim, int ndims, int nprocs, if (OMPI_SUCCESS != rc) return rc; } else { for (i = start_loop ; i != dim ; i += step) { - stride *= gsize_array[i]; + stride *= ompi_count_array_get(gsize_array, i); } rc = ompi_datatype_create_hvector(mysize, 1, stride, type_old, type_new); if (OMPI_SUCCESS != rc) return rc; @@ -76,11 +76,11 @@ block(const int *gsize_array, int dim, int ndims, int nprocs, disps[0] = 0; disps[1] = orig_extent; if (order == MPI_ORDER_FORTRAN) { for(i=0; i<=dim; i++) { - disps[1] *= gsize_array[i]; + disps[1] *= ompi_count_array_get(gsize_array, i); } } else { for(i=ndims-1; i>=dim; i--) { - disps[1] *= gsize_array[i]; + disps[1] *= ompi_count_array_get(gsize_array, i); } } rc = opal_datatype_resize( &(*type_new)->super, disps[0], disps[1] ); @@ -91,7 +91,7 @@ block(const int *gsize_array, int dim, int ndims, int nprocs, static int -cyclic(const int *gsize_array, int dim, int ndims, int nprocs, +cyclic(ompi_count_array_t gsize_array, int dim, int ndims, int nprocs, int rank, int darg, int order, ptrdiff_t orig_extent, ompi_datatype_t* type_old, ompi_datatype_t **type_new, ptrdiff_t *st_offset) @@ -107,7 +107,7 @@ cyclic(const int *gsize_array, int dim, int ndims, int nprocs, } st_index = rank * blksize; - end_index = gsize_array[dim] - 1; + end_index = ompi_count_array_get(gsize_array, dim) - 1; if (end_index < st_index) { local_size = 0; @@ -123,11 +123,11 @@ cyclic(const int *gsize_array, int dim, int ndims, int nprocs, stride = nprocs*blksize*orig_extent; if (order == MPI_ORDER_FORTRAN) { for (i=0; idim; i--) { - stride *= gsize_array[i]; + stride *= ompi_count_array_get(gsize_array, i); } } @@ -142,7 +142,7 @@ cyclic(const int *gsize_array, int dim, int ndims, int nprocs, disps [0] = 0; disps [1] = count*stride; blklens[0] = 1; blklens[1] = rem; - rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp); + rc = ompi_datatype_create_struct(2, OMPI_COUNT_ARRAY_CREATE(blklens), OMPI_DISP_ARRAY_CREATE(disps), types, &type_tmp); ompi_datatype_destroy(type_new); /* even in error condition, need to destroy type_new, so check for error after destroy. */ @@ -154,11 +154,11 @@ cyclic(const int *gsize_array, int dim, int ndims, int nprocs, disps[0] = 0; disps[1] = orig_extent; if (order == MPI_ORDER_FORTRAN) { for(i=0; i<=dim; i++) { - disps[1] *= gsize_array[i]; + disps[1] *= ompi_count_array_get(gsize_array, i); } } else { for(i=ndims-1; i>=dim; i--) { - disps[1] *= gsize_array[i]; + disps[1] *= ompi_count_array_get(gsize_array, i); } } rc = opal_datatype_resize( &(*type_new)->super, disps[0], disps[1] ); @@ -174,10 +174,10 @@ cyclic(const int *gsize_array, int dim, int ndims, int nprocs, int32_t ompi_datatype_create_darray(int size, int rank, int ndims, - int const* gsize_array, - int const* distrib_array, - int const* darg_array, - int const* psize_array, + ompi_count_array_t gsize_array, + const int* distrib_array, + const int* darg_array, + const int* psize_array, int order, const ompi_datatype_t* oldtype, ompi_datatype_t** newtype) @@ -209,7 +209,7 @@ int32_t ompi_datatype_create_darray(int size, coords[i] = tmp_rank / procs; tmp_rank = tmp_rank % procs; /* compute the upper bound of the datatype, including all dimensions */ - displs[1] *= gsize_array[i]; + displs[1] *= ompi_count_array_get(gsize_array, i); } } @@ -275,7 +275,7 @@ int32_t ompi_datatype_create_darray(int size, */ displs[0] = st_offsets[start_loop]; for (i = start_loop + step; i != end_loop; i += step) { - tmp_size *= gsize_array[i - step]; + tmp_size *= ompi_count_array_get(gsize_array, i - step); displs[0] += tmp_size * st_offsets[i]; } displs[0] *= orig_extent; diff --git a/ompi/datatype/ompi_datatype_create_indexed.c b/ompi/datatype/ompi_datatype_create_indexed.c index 2684d9d7df0..e2cdd86aa35 100644 --- a/ompi/datatype/ompi_datatype_create_indexed.c +++ b/ompi/datatype/ompi_datatype_create_indexed.c @@ -31,38 +31,38 @@ /* We try to merge together data that are contiguous */ -int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const int* pDisp, +int32_t ompi_datatype_create_indexed( size_t count, const ompi_count_array_t pBlockLength, const ompi_disp_array_t pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ptrdiff_t extent, disp, endat; ompi_datatype_t* pdt; size_t dLength; - int i; + size_t i; /* ignore all cases that lead to an empty type */ ompi_datatype_type_size(oldType, &dLength); - for( i = 0; (i < count) && (0 == pBlockLength[i]); i++ ); /* find first non zero */ + for( i = 0; (i < count) && (0 == ompi_count_array_get(pBlockLength, i)); i++ ); /* find first non zero */ if( (i == count) || (0 == dLength) ) { return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } - disp = pDisp[i]; - dLength = pBlockLength[i]; + disp = ompi_disp_array_get(pDisp, i); + dLength = ompi_count_array_get(pBlockLength, i); endat = disp + dLength; ompi_datatype_type_extent( oldType, &extent ); pdt = ompi_datatype_create( (count - i) * (2 + oldType->super.desc.used) ); for( i += 1; i < count; i++ ) { - if( 0 == pBlockLength[i] ) /* ignore empty length */ + if( 0 == ompi_count_array_get(pBlockLength, i) ) /* ignore empty length */ continue; - if( endat == pDisp[i] ) { /* contiguous with the previsious */ - dLength += pBlockLength[i]; - endat += pBlockLength[i]; + if( endat == ompi_disp_array_get(pDisp, i) ) { /* contiguous with the previsious */ + dLength += ompi_count_array_get(pBlockLength, i); + endat += ompi_count_array_get(pBlockLength, i); } else { ompi_datatype_add( pdt, oldType, dLength, disp * extent, extent ); - disp = pDisp[i]; - dLength = pBlockLength[i]; - endat = disp + pBlockLength[i]; + disp = ompi_disp_array_get(pDisp, i); + dLength = ompi_count_array_get(pBlockLength, i); + endat = disp + dLength; } } ompi_datatype_add( pdt, oldType, dLength, disp * extent, extent ); @@ -72,38 +72,38 @@ int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const } -int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const ptrdiff_t* pDisp, +int32_t ompi_datatype_create_hindexed( size_t count, const ompi_count_array_t pBlockLength, const ompi_disp_array_t pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ptrdiff_t extent, disp, endat; ompi_datatype_t* pdt; size_t dLength; - int i; + size_t i; /* ignore all cases that lead to an empty type */ ompi_datatype_type_size(oldType, &dLength); - for( i = 0; (i < count) && (0 == pBlockLength[i]); i++ ); /* find first non zero */ + for( i = 0; (i < count) && (0 == ompi_count_array_get(pBlockLength, i)); i++ ); /* find first non zero */ if( (i == count) || (0 == dLength) ) { return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } ompi_datatype_type_extent( oldType, &extent ); - disp = pDisp[i]; - dLength = pBlockLength[i]; + disp = ompi_disp_array_get(pDisp, i); + dLength = ompi_count_array_get(pBlockLength, i); endat = disp + dLength * extent; pdt = ompi_datatype_create( (count - i) * (2 + oldType->super.desc.used) ); for( i += 1; i < count; i++ ) { - if( 0 == pBlockLength[i] ) /* ignore empty length */ + if( 0 == ompi_count_array_get(pBlockLength, i) ) /* ignore empty length */ continue; - if( endat == pDisp[i] ) { /* contiguous with the previsious */ - dLength += pBlockLength[i]; - endat += pBlockLength[i] * extent; + if( endat == ompi_disp_array_get(pDisp, i) ) { /* contiguous with the previsious */ + dLength += ompi_count_array_get(pBlockLength, i); + endat += ompi_count_array_get(pBlockLength, i) * extent; } else { ompi_datatype_add( pdt, oldType, dLength, disp, extent ); - disp = pDisp[i]; - dLength = pBlockLength[i]; - endat = disp + pBlockLength[i] * extent; + disp = ompi_disp_array_get(pDisp, i); + dLength = ompi_count_array_get(pBlockLength, i); + endat = disp + dLength * extent; } } ompi_datatype_add( pdt, oldType, dLength, disp, extent ); @@ -113,30 +113,30 @@ int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const } -int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* pDisp, +int32_t ompi_datatype_create_indexed_block( size_t count, size_t bLength, const ompi_disp_array_t pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ptrdiff_t extent, disp, endat; ompi_datatype_t* pdt; size_t dLength; - int i; + size_t i; if( (count == 0) || (bLength == 0) ) { return ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newType); } ompi_datatype_type_extent( oldType, &extent ); pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); - disp = pDisp[0]; + disp = ompi_disp_array_get(pDisp, 0); dLength = bLength; endat = disp + dLength; for( i = 1; i < count; i++ ) { - if( endat == pDisp[i] ) { + if( endat == ompi_disp_array_get(pDisp, i) ) { /* contiguous with the previsious */ dLength += bLength; endat += bLength; } else { ompi_datatype_add( pdt, oldType, dLength, disp * extent, extent ); - disp = pDisp[i]; + disp = ompi_disp_array_get(pDisp, i); dLength = bLength; endat = disp + bLength; } @@ -147,30 +147,30 @@ int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* p return OMPI_SUCCESS; } -int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const ptrdiff_t* pDisp, +int32_t ompi_datatype_create_hindexed_block( size_t count, size_t bLength, const ompi_disp_array_t pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ptrdiff_t extent, disp, endat; ompi_datatype_t* pdt; size_t dLength; - int i; + size_t i; if( (count == 0) || (bLength == 0) ) { return ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newType); } ompi_datatype_type_extent( oldType, &extent ); pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); - disp = pDisp[0]; + disp = ompi_disp_array_get(pDisp, 0); dLength = bLength; endat = disp + dLength * extent; for( i = 1; i < count; i++ ) { - if( endat == pDisp[i] ) { + if( endat == ompi_disp_array_get(pDisp, i) ) { /* contiguous with the previsious */ dLength += bLength; endat += bLength * extent; } else { ompi_datatype_add( pdt, oldType, dLength, disp, extent ); - disp = pDisp[i]; + disp = ompi_disp_array_get(pDisp, i); dLength = bLength; endat = disp + bLength * extent; } diff --git a/ompi/datatype/ompi_datatype_create_struct.c b/ompi/datatype/ompi_datatype_create_struct.c index 72d3251b936..78997a3bfd8 100644 --- a/ompi/datatype/ompi_datatype_create_struct.c +++ b/ompi/datatype/ompi_datatype_create_struct.c @@ -28,16 +28,16 @@ #include "ompi/datatype/ompi_datatype.h" -int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const ptrdiff_t* pDisp, +int32_t ompi_datatype_create_struct( size_t count, const ompi_count_array_t pBlockLength, const ompi_disp_array_t pDisp, ompi_datatype_t* const * pTypes, ompi_datatype_t** newType ) { ptrdiff_t disp = 0, endto, lastExtent, lastDisp; ompi_datatype_t *pdt, *lastType; - int i, start_from; + size_t i, start_from; size_t lastBlock; /* Find first non-zero length element */ - for( i = 0; (i < count) && (0 == pBlockLength[i]); i++ ); + for( i = 0; (i < count) && (0 == ompi_count_array_get(pBlockLength, i)); i++ ); if( i == count ) { /* either nothing or nothing relevant */ return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } @@ -46,22 +46,22 @@ int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const p */ start_from = i; lastType = (ompi_datatype_t*)pTypes[start_from]; - lastBlock = pBlockLength[start_from]; + lastBlock = ompi_count_array_get(pBlockLength, start_from); lastExtent = lastType->super.ub - lastType->super.lb; - lastDisp = pDisp[start_from]; - endto = pDisp[start_from] + lastExtent * lastBlock; + lastDisp = ompi_disp_array_get(pDisp, start_from); + endto = lastDisp + lastExtent * lastBlock; for( i = (start_from + 1); i < count; i++ ) { - if( (pTypes[i] == lastType) && (pDisp[i] == endto) ) { - lastBlock += pBlockLength[i]; + if( (pTypes[i] == lastType) && (ompi_disp_array_get(pDisp, i) == endto) ) { + lastBlock += ompi_count_array_get(pBlockLength, i); endto = lastDisp + lastBlock * lastExtent; } else { disp += lastType->super.desc.used; if( lastBlock > 1 ) disp += 2; lastType = (ompi_datatype_t*)pTypes[i]; lastExtent = lastType->super.ub - lastType->super.lb; - lastBlock = pBlockLength[i]; - lastDisp = pDisp[i]; + lastBlock = ompi_count_array_get(pBlockLength, i); + lastDisp = ompi_disp_array_get(pDisp, i); endto = lastDisp + lastExtent * lastBlock; } } @@ -69,24 +69,24 @@ int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const p if( lastBlock != 1 ) disp += 2; lastType = (ompi_datatype_t*)pTypes[start_from]; - lastBlock = pBlockLength[start_from]; + lastBlock = ompi_count_array_get(pBlockLength, start_from); lastExtent = lastType->super.ub - lastType->super.lb; - lastDisp = pDisp[start_from]; - endto = pDisp[start_from] + lastExtent * lastBlock; + lastDisp = ompi_disp_array_get(pDisp, start_from); + endto = lastDisp + lastExtent * lastBlock; pdt = ompi_datatype_create( (int32_t)disp ); /* Do again the same loop but now add the elements */ for( i = (start_from + 1); i < count; i++ ) { - if( (pTypes[i] == lastType) && (pDisp[i] == endto) ) { - lastBlock += pBlockLength[i]; + if( (pTypes[i] == lastType) && (ompi_disp_array_get(pDisp, i) == endto) ) { + lastBlock += ompi_count_array_get(pBlockLength, i); endto = lastDisp + lastBlock * lastExtent; } else { ompi_datatype_add( pdt, lastType, lastBlock, lastDisp, lastExtent ); lastType = (ompi_datatype_t*)pTypes[i]; lastExtent = lastType->super.ub - lastType->super.lb; - lastBlock = pBlockLength[i]; - lastDisp = pDisp[i]; + lastBlock = ompi_count_array_get(pBlockLength, i); + lastDisp = ompi_disp_array_get(pDisp, i); endto = lastDisp + lastExtent * lastBlock; } } diff --git a/ompi/datatype/ompi_datatype_create_subarray.c b/ompi/datatype/ompi_datatype_create_subarray.c index fcf44407725..bc7412d8040 100644 --- a/ompi/datatype/ompi_datatype_create_subarray.c +++ b/ompi/datatype/ompi_datatype_create_subarray.c @@ -29,9 +29,9 @@ #include "ompi/datatype/ompi_datatype.h" int32_t ompi_datatype_create_subarray(int ndims, - int const* size_array, - int const* subsize_array, - int const* start_array, + const ompi_count_array_t size_array, + const ompi_count_array_t subsize_array, + const ompi_count_array_t start_array, int order, const ompi_datatype_t* oldtype, ompi_datatype_t** newtype) @@ -54,9 +54,9 @@ int32_t ompi_datatype_create_subarray(int ndims, ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newtype); return MPI_SUCCESS; } - ompi_datatype_create_contiguous( subsize_array[0], oldtype, &last_type ); - size = size_array[0]; - displ = start_array[0]; + ompi_datatype_create_contiguous( ompi_count_array_get(subsize_array, 0), oldtype, &last_type ); + size = ompi_count_array_get(size_array, 0); + displ = ompi_count_array_get(start_array, 0); goto replace_subarray_type; } @@ -74,19 +74,22 @@ int32_t ompi_datatype_create_subarray(int ndims, * first dimension data outside the loop, such that we dont have to create * a duplicate of the oldtype just to be able to free it. */ - ompi_datatype_create_vector( subsize_array[i+step], subsize_array[i], size_array[i], + ompi_datatype_create_vector( ompi_count_array_get(subsize_array, i+step), + ompi_count_array_get(subsize_array, i), + ompi_count_array_get(size_array, i), oldtype, newtype ); last_type = *newtype; - size = (MPI_Aint)size_array[i] * (MPI_Aint)size_array[i+step]; - displ = (MPI_Aint)start_array[i] + (MPI_Aint)start_array[i+step] * (MPI_Aint)size_array[i]; + size = (MPI_Aint)ompi_count_array_get(size_array, i) * (MPI_Aint)ompi_count_array_get(size_array, i+step); + displ = (MPI_Aint)ompi_count_array_get(start_array, i) + + (MPI_Aint)ompi_count_array_get(start_array, i+step) * (MPI_Aint)ompi_count_array_get(size_array, i); for( i += 2 * step; i != end_loop; i += step ) { - ompi_datatype_create_hvector( subsize_array[i], 1, size * extent, + ompi_datatype_create_hvector( ompi_count_array_get(subsize_array, i), 1, size * extent, last_type, newtype ); ompi_datatype_destroy( &last_type ); - displ += size * start_array[i]; - size *= size_array[i]; + displ += size * ompi_count_array_get(start_array, i); + size *= ompi_count_array_get(size_array, i); last_type = *newtype; } diff --git a/ompi/datatype/ompi_datatype_create_vector.c b/ompi/datatype/ompi_datatype_create_vector.c index c4829a4b54c..b15679eba0e 100644 --- a/ompi/datatype/ompi_datatype_create_vector.c +++ b/ompi/datatype/ompi_datatype_create_vector.c @@ -28,7 +28,7 @@ #include "ompi/datatype/ompi_datatype.h" -int32_t ompi_datatype_create_vector( int count, int bLength, int stride, +int32_t ompi_datatype_create_vector( size_t count, size_t bLength, ptrdiff_t stride, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ompi_datatype_t *pTempData, *pData; @@ -39,7 +39,7 @@ int32_t ompi_datatype_create_vector( int count, int bLength, int stride, } pData = ompi_datatype_create( oldType->super.desc.used + 2 ); - if( (bLength == stride) || (1 >= count) ) { /* the elements are contiguous */ + if( (bLength == (size_t)stride) || (1 >= count) ) { /* the elements are contiguous */ ompi_datatype_add( pData, oldType, (size_t)count * bLength, 0, extent ); } else { if( 1 == bLength ) { @@ -57,7 +57,7 @@ int32_t ompi_datatype_create_vector( int count, int bLength, int stride, } -int32_t ompi_datatype_create_hvector( int count, int bLength, ptrdiff_t stride, +int32_t ompi_datatype_create_hvector( size_t count, size_t bLength, ptrdiff_t stride, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ompi_datatype_t *pTempData, *pData; @@ -68,7 +68,7 @@ int32_t ompi_datatype_create_hvector( int count, int bLength, ptrdiff_t stride, } pTempData = ompi_datatype_create( oldType->super.desc.used + 2 ); - if( ((extent * bLength) == stride) || (1 >= count) ) { /* contiguous */ + if( ((extent * bLength) == (size_t)stride) || (1 >= count) ) { /* contiguous */ pData = pTempData; ompi_datatype_add( pData, oldType, count * bLength, 0, extent ); } else { diff --git a/ompi/datatype/ompi_datatype_match_size.c b/ompi/datatype/ompi_datatype_match_size.c index 1e036c7003b..9797336c549 100644 --- a/ompi/datatype/ompi_datatype_match_size.c +++ b/ompi/datatype/ompi_datatype_match_size.c @@ -26,7 +26,7 @@ extern int32_t ompi_datatype_number_of_predefined_data; -const ompi_datatype_t* ompi_datatype_match_size( int size, uint16_t datakind, uint16_t datalang ) +const ompi_datatype_t* ompi_datatype_match_size( size_t size, uint16_t datakind, uint16_t datalang ) { int32_t i; const ompi_datatype_t* datatype; @@ -45,7 +45,7 @@ const ompi_datatype_t* ompi_datatype_match_size( int size, uint16_t datakind, ui continue; if( (datatype->super.flags & OMPI_DATATYPE_FLAG_DATA_TYPE) != datakind ) continue; - if( (size_t)size == datatype->super.size ) { + if( size == datatype->super.size ) { return datatype; } } diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index a094b6877e9..8639699c4ec 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -438,7 +438,9 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}}; displ[1] = (ptrdiff_t)(&(s[0].v2)); \ displ[1] -= base; \ \ - ompi_datatype_create_struct( 2, bLength, displ, types, &ptype ); \ + ompi_datatype_create_struct( 2, OMPI_COUNT_ARRAY_CREATE(bLength), \ + OMPI_DISP_ARRAY_CREATE(displ), types, \ + &ptype ); \ displ[0] = (ptrdiff_t)(&(s[1])); \ displ[0] -= base; \ if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \ diff --git a/ompi/datatype/ompi_datatype_sndrcv.c b/ompi/datatype/ompi_datatype_sndrcv.c index 967c7509271..dd263d5e5d7 100644 --- a/ompi/datatype/ompi_datatype_sndrcv.c +++ b/ompi/datatype/ompi_datatype_sndrcv.c @@ -42,8 +42,8 @@ * - communicator * Returns: - MPI_SUCCESS or error code */ -int32_t ompi_datatype_sndrcv( const void *sbuf, int32_t scount, const ompi_datatype_t* sdtype, - void *rbuf, int32_t rcount, const ompi_datatype_t* rdtype) +int32_t ompi_datatype_sndrcv( const void *sbuf, size_t scount, const ompi_datatype_t* sdtype, + void *rbuf, size_t rcount, const ompi_datatype_t* rdtype) { opal_convertor_t send_convertor, recv_convertor; struct iovec iov; @@ -73,11 +73,11 @@ int32_t ompi_datatype_sndrcv( const void *sbuf, int32_t scount, const ompi_datat iov_count = 1; iov.iov_base = (IOVBASE_TYPE*)rbuf; iov.iov_len = scount * sdtype->super.size; - if( (int32_t)iov.iov_len > rcount ) iov.iov_len = rcount; + if( iov.iov_len > rcount ) iov.iov_len = rcount; opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data ); OBJ_DESTRUCT( &send_convertor ); - return ((max_data < (size_t)rcount) ? MPI_ERR_TRUNCATE : MPI_SUCCESS); + return ((max_data < rcount) ? MPI_ERR_TRUNCATE : MPI_SUCCESS); } /* If send packed. */ @@ -90,11 +90,11 @@ int32_t ompi_datatype_sndrcv( const void *sbuf, int32_t scount, const ompi_datat iov_count = 1; iov.iov_base = (IOVBASE_TYPE*)sbuf; iov.iov_len = rcount * rdtype->super.size; - if( (int32_t)iov.iov_len > scount ) iov.iov_len = scount; + if( iov.iov_len > scount ) iov.iov_len = scount; opal_convertor_unpack( &recv_convertor, &iov, &iov_count, &max_data ); OBJ_DESTRUCT( &recv_convertor ); - return (((size_t)scount > max_data) ? MPI_ERR_TRUNCATE : MPI_SUCCESS); + return ((scount > max_data) ? MPI_ERR_TRUNCATE : MPI_SUCCESS); } iov.iov_len = length = 64 * 1024; diff --git a/ompi/mca/coll/base/coll_base_allgatherv.c b/ompi/mca/coll/base/coll_base_allgatherv.c index 24cd84ec616..27bd3b31971 100644 --- a/ompi/mca/coll/base/coll_base_allgatherv.c +++ b/ompi/mca/coll/base/coll_base_allgatherv.c @@ -101,7 +101,8 @@ int ompi_coll_base_allgatherv_intra_bruck(const void *sbuf, size_t scount, mca_coll_base_module_t *module) { int line = -1, err = 0, rank, size, sendto, recvfrom, distance, blockcount, i; - int *new_rcounts = NULL, *new_rdispls = NULL, *new_scounts = NULL, *new_sdispls = NULL; + size_t *new_rcounts = NULL, *new_scounts = NULL; + ptrdiff_t *new_rdispls = NULL, *new_sdispls = NULL; ptrdiff_t rlb, rext; char *tmpsend = NULL, *tmprecv = NULL; struct ompi_datatype_t *new_rdtype, *new_sdtype; @@ -142,11 +143,11 @@ int ompi_coll_base_allgatherv_intra_bruck(const void *sbuf, size_t scount, blockcount = 1; tmpsend = (char*) rbuf; - new_rcounts = (int*) calloc(4*size, sizeof(int)); + new_rcounts = (size_t*) calloc(2*size, sizeof(size_t)); if (NULL == new_rcounts) { err = -1; line = __LINE__; goto err_hndl; } - new_rdispls = new_rcounts + size; - new_scounts = new_rdispls + size; - new_sdispls = new_scounts + size; + new_scounts = new_rcounts + size; + new_rdispls = (ptrdiff_t*) (new_scounts + size); + new_sdispls = new_rdispls + size; for (distance = 1; distance < size; distance<<=1) { @@ -168,10 +169,12 @@ int ompi_coll_base_allgatherv_intra_bruck(const void *sbuf, size_t scount, new_rcounts[i] = ompi_count_array_get(rcounts, tmp_rrank); new_rdispls[i] = ompi_disp_array_get(rdispls, tmp_rrank); } - err = ompi_datatype_create_indexed(blockcount, new_scounts, new_sdispls, + err = ompi_datatype_create_indexed(blockcount, OMPI_COUNT_ARRAY_CREATE(new_scounts), + OMPI_DISP_ARRAY_CREATE(new_sdispls), rdtype, &new_sdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } - err = ompi_datatype_create_indexed(blockcount, new_rcounts, new_rdispls, + err = ompi_datatype_create_indexed(blockcount, OMPI_COUNT_ARRAY_CREATE(new_rcounts), + OMPI_DISP_ARRAY_CREATE(new_rdispls), rdtype, &new_rdtype); err = ompi_datatype_commit(&new_sdtype); @@ -215,8 +218,8 @@ int ompi_coll_base_allgatherv_intra_bruck(const void *sbuf, size_t scount, * and non-decreasing exchanged data sizes. Described in "Sparbit: a new * logarithmic-cost and data locality-aware MPI Allgather algorithm". * - * Memory requirements: - * Additional memory for N requests. + * Memory requirements: + * Additional memory for N requests. * * Example on 6 nodes, with l representing the highest power of two smaller than N, in this case l = * 4 (more details can be found on the paper): @@ -237,7 +240,7 @@ int ompi_coll_base_allgatherv_intra_bruck(const void *sbuf, size_t scount, * [ ] [ ] [4] [ ] [4] [ ] * [ ] [ ] [ ] [5] [ ] [5] * Step 1: Each process sends its own block to process r + l/2 and receives another from r - l/2. - * The block received on the previous step is ignored to avoid a future double-write. + * The block received on the previous step is ignored to avoid a future double-write. * # 0 1 2 3 4 5 * [0] [ ] [0] [ ] [0] [ ] * [ ] [1] [ ] [1] [ ] [1] @@ -246,7 +249,7 @@ int ompi_coll_base_allgatherv_intra_bruck(const void *sbuf, size_t scount, * [4] [ ] [4] [ ] [4] [ ] * [ ] [5] [ ] [5] [ ] [5] * Step 1: Each process sends all the data it has (3 blocks) to process r + l/4 and similarly - * receives all the data from process r - l/4. + * receives all the data from process r - l/4. * # 0 1 2 3 4 5 * [0] [0] [0] [0] [0] [0] * [1] [1] [1] [1] [1] [1] @@ -267,7 +270,7 @@ int ompi_coll_base_allgatherv_intra_sparbit(const void *sbuf, size_t scount, /* ################# VARIABLE DECLARATION, BUFFER CREATION AND PREPARATION FOR THE ALGORITHM ######################## */ /* list of variable declaration */ - int rank = 0, comm_size = 0, comm_log = 0, exclusion = 0; + int rank = 0, comm_size = 0, comm_log = 0, exclusion = 0; int data_expected = 1, transfer_count = 0, step_requests = 0; int sendto, recvfrom, send_disp, recv_disp; uint32_t last_ignore, ignore_steps, distance = 1; @@ -284,7 +287,7 @@ int ompi_coll_base_allgatherv_intra_sparbit(const void *sbuf, size_t scount, /* printf("utilizando o allgatherv novo!!\n"); */ /* algorithm choice information printing */ - OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:sparbit:allgather_sync_intra rank %d", rank)); comm_size = ompi_comm_size(comm); @@ -297,7 +300,7 @@ int ompi_coll_base_allgatherv_intra_sparbit(const void *sbuf, size_t scount, /* tmprecv and tmpsend are used as abstract pointers to simplify send and receive buffer choice */ tmprecv = (char *) rbuf; if(MPI_IN_PLACE != sbuf){ - tmpsend = (char *) sbuf; + tmpsend = (char *) sbuf; err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv + ompi_disp_array_get(rdispls, rank) * rext, scount, rdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } @@ -305,7 +308,7 @@ int ompi_coll_base_allgatherv_intra_sparbit(const void *sbuf, size_t scount, tmpsend = tmprecv; requests = (MPI_Request *) malloc(comm_size * sizeof(MPI_Request)); - + /* ################# ALGORITHM LOGIC ######################## */ /* calculate log2 of the total process count */ @@ -317,8 +320,8 @@ int ompi_coll_base_allgatherv_intra_sparbit(const void *sbuf, size_t scount, /* perform the parallel binomial tree distribution steps */ for (int i = 0; i < comm_log; ++i) { - sendto = (rank + distance) % comm_size; - recvfrom = (rank - distance + comm_size) % comm_size; + sendto = (rank + distance) % comm_size; + recvfrom = (rank - distance + comm_size) % comm_size; exclusion = (distance & ignore_steps) == distance; for (transfer_count = 0; transfer_count < data_expected - exclusion; transfer_count++) { @@ -342,12 +345,12 @@ int ompi_coll_base_allgatherv_intra_sparbit(const void *sbuf, size_t scount, } ompi_request_wait_all(step_requests, requests, MPI_STATUSES_IGNORE); - distance >>= 1; + distance >>= 1; /* calculates the data expected for the next step, based on the current number of blocks and eventual exclusions */ data_expected = (data_expected << 1) - exclusion; exclusion = step_requests = 0; } - + free(requests); return OMPI_SUCCESS; @@ -513,7 +516,8 @@ ompi_coll_base_allgatherv_intra_neighborexchange(const void *sbuf, size_t scount int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from; size_t new_scounts[2], new_rcounts[2]; ptrdiff_t new_sdispls[2], new_rdispls[2]; - int tmp_new_scounts[2], tmp_new_rcounts[2], tmp_new_sdispls[2], tmp_new_rdispls[2]; + size_t tmp_new_scounts[2], tmp_new_rcounts[2]; + ptrdiff_t tmp_new_sdispls[2], tmp_new_rdispls[2]; ptrdiff_t rlb, rext; char *tmpsend = NULL, *tmprecv = NULL; struct ompi_datatype_t *new_rdtype, *new_sdtype; @@ -611,7 +615,8 @@ ompi_coll_base_allgatherv_intra_neighborexchange(const void *sbuf, size_t scount tmp_new_scounts[1] = new_scounts[1]; tmp_new_sdispls[0] = new_sdispls[0]; tmp_new_sdispls[1] = new_sdispls[1]; - err = ompi_datatype_create_indexed(2, tmp_new_scounts, tmp_new_sdispls, rdtype, + err = ompi_datatype_create_indexed(2, OMPI_COUNT_ARRAY_CREATE(tmp_new_scounts), + OMPI_DISP_ARRAY_CREATE(tmp_new_sdispls), rdtype, &new_sdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } err = ompi_datatype_commit(&new_sdtype); @@ -626,7 +631,8 @@ ompi_coll_base_allgatherv_intra_neighborexchange(const void *sbuf, size_t scount tmp_new_rcounts[1] = new_rcounts[1]; tmp_new_rdispls[0] = new_rdispls[0]; tmp_new_rdispls[1] = new_rdispls[1]; - err = ompi_datatype_create_indexed(2, tmp_new_rcounts, tmp_new_rdispls, rdtype, + err = ompi_datatype_create_indexed(2, OMPI_COUNT_ARRAY_CREATE(tmp_new_rcounts), + OMPI_DISP_ARRAY_CREATE(tmp_new_rdispls), rdtype, &new_rdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } err = ompi_datatype_commit(&new_rdtype); @@ -757,7 +763,6 @@ ompi_coll_base_allgatherv_intra_basic_default(const void *sbuf, size_t scount, MPI_Aint extent, lb; char *send_buf = NULL; struct ompi_datatype_t *newtype, *send_type; - int *tmp_rcounts, *tmp_disps; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -801,22 +806,8 @@ ompi_coll_base_allgatherv_intra_basic_default(const void *sbuf, size_t scount, * datatype. */ - /* TODO:BIGCOUNT: Remove temporaries once ompi_datatype interface is updated */ - tmp_rcounts = malloc(size * sizeof(int)); - if (NULL == tmp_rcounts) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - tmp_disps = malloc(size * sizeof(int)); - if (NULL == tmp_disps) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - for (int i = 0; i < size; i++) { - tmp_rcounts[i] = ompi_count_array_get(rcounts, i); - tmp_disps[i] = ompi_disp_array_get(disps, i); - } - err = ompi_datatype_create_indexed(size,tmp_rcounts,tmp_disps,rdtype,&newtype); - free(tmp_rcounts); - free(tmp_disps); + err = ompi_datatype_create_indexed(size, rcounts, disps, + rdtype, &newtype); if (MPI_SUCCESS != err) { return err; } diff --git a/ompi/mca/coll/base/coll_base_reduce_scatter_block.c b/ompi/mca/coll/base/coll_base_reduce_scatter_block.c index f72469d1a00..294d95f646c 100644 --- a/ompi/mca/coll/base/coll_base_reduce_scatter_block.c +++ b/ompi/mca/coll/base/coll_base_reduce_scatter_block.c @@ -202,7 +202,8 @@ ompi_coll_base_reduce_scatter_block_intra_recursivedoubling( struct ompi_datatype_t *dtypesend = NULL, *dtyperecv = NULL; char *tmprecv_raw = NULL, *tmpbuf_raw = NULL, *tmprecv, *tmpbuf; ptrdiff_t span, gap, totalcount, extent; - int blocklens[2], displs[2]; + size_t blocklens[2]; + ptrdiff_t displs[2]; int err = MPI_SUCCESS; int comm_size = ompi_comm_size(comm); int rank = ompi_comm_rank(comm); @@ -270,7 +271,8 @@ ompi_coll_base_reduce_scatter_block_intra_recursivedoubling( rcount * (comm_size - cur_tree_root - mask) : 0; displs[0] = 0; displs[1] = comm_size * rcount - blocklens[1]; - err = ompi_datatype_create_indexed(2, blocklens, displs, dtype, &dtypesend); + err = ompi_datatype_create_indexed(2, OMPI_COUNT_ARRAY_CREATE(blocklens), + OMPI_DISP_ARRAY_CREATE(displs), dtype, &dtypesend); if (MPI_SUCCESS != err) { goto cleanup_and_return; } err = ompi_datatype_commit(&dtypesend); if (MPI_SUCCESS != err) { goto cleanup_and_return; } @@ -281,7 +283,8 @@ ompi_coll_base_reduce_scatter_block_intra_recursivedoubling( rcount * (comm_size - remote_tree_root - mask) : 0; displs[0] = 0; displs[1] = comm_size * rcount - blocklens[1]; - err = ompi_datatype_create_indexed(2, blocklens, displs, dtype, &dtyperecv); + err = ompi_datatype_create_indexed(2, OMPI_COUNT_ARRAY_CREATE(blocklens), + OMPI_DISP_ARRAY_CREATE(displs), dtype, &dtyperecv); if (MPI_SUCCESS != err) { goto cleanup_and_return; } err = ompi_datatype_commit(&dtyperecv); if (MPI_SUCCESS != err) { goto cleanup_and_return; } diff --git a/ompi/mca/coll/inter/coll_inter_allgatherv.c b/ompi/mca/coll/inter/coll_inter_allgatherv.c index fa7c9e14301..6e26374f542 100644 --- a/ompi/mca/coll/inter/coll_inter_allgatherv.c +++ b/ompi/mca/coll/inter/coll_inter_allgatherv.c @@ -107,21 +107,8 @@ mca_coll_inter_allgatherv_inter(const void *sbuf, size_t scount, goto exit; } - /* TODO:BIGCOUNT: Remove tehese temporaries once ompi_datatype is updated for bigcount */ - int *tmp_rcounts = malloc(sizeof(int) * size); - int *tmp_disps = malloc(sizeof(int) * size); - if (NULL == tmp_rcounts || NULL == tmp_disps) { - err = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - for (i = 0; i < size; ++i) { - tmp_rcounts[i] = (int) ompi_count_array_get(rcounts, i); - tmp_disps[i] = (int) ompi_disp_array_get(disps, i); - } - ompi_datatype_create_indexed(size,tmp_rcounts,tmp_disps,rdtype,&ndtype); + ompi_datatype_create_indexed(size,rcounts,disps,rdtype,&ndtype); ompi_datatype_commit(&ndtype); - free(tmp_rcounts); - free(tmp_disps); if (0 == rank) { /* Exchange data between roots */ diff --git a/ompi/mca/coll/inter/coll_inter_gatherv.c b/ompi/mca/coll/inter/coll_inter_gatherv.c index 1e1d8840a44..45bddac9081 100644 --- a/ompi/mca/coll/inter/coll_inter_gatherv.c +++ b/ompi/mca/coll/inter/coll_inter_gatherv.c @@ -62,20 +62,8 @@ mca_coll_inter_gatherv_inter(const void *sbuf, size_t scount, size_local = ompi_comm_size(comm); if (MPI_ROOT == root) { /* I am the root, receiving the data from zero. */ - /* TODO:BIGCOUNT: Remove these temporaries once ompi_datatype is updated for bigcount */ - int *tmp_rcounts = malloc(sizeof(int) * size); - int *tmp_disps = malloc(sizeof(int) * size); - if (NULL == tmp_rcounts || NULL == tmp_disps) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - for (i = 0; i < size; ++i) { - tmp_rcounts[i] = ompi_count_array_get(rcounts, i); - tmp_disps[i] = ompi_disp_array_get(disps, i); - } - ompi_datatype_create_indexed(size, tmp_rcounts, tmp_disps, rdtype, &ndtype); + ompi_datatype_create_indexed(size, rcounts, disps, rdtype, &ndtype); ompi_datatype_commit(&ndtype); - free(tmp_rcounts); - free(tmp_disps); err = MCA_PML_CALL(recv(rbuf, 1, ndtype, 0, MCA_COLL_BASE_TAG_GATHERV, diff --git a/ompi/mca/coll/inter/coll_inter_scatterv.c b/ompi/mca/coll/inter/coll_inter_scatterv.c index 5d98e1ea099..927cbb2f3c6 100644 --- a/ompi/mca/coll/inter/coll_inter_scatterv.c +++ b/ompi/mca/coll/inter/coll_inter_scatterv.c @@ -61,6 +61,7 @@ mca_coll_inter_scatterv_inter(const void *sbuf, ompi_count_array_t scounts, size = ompi_comm_remote_size(comm); size_local = ompi_comm_size(comm); + if (MPI_PROC_NULL == root) { /* do nothing */ err = OMPI_SUCCESS; @@ -100,9 +101,9 @@ mca_coll_inter_scatterv_inter(const void *sbuf, ompi_count_array_t scounts, displace[i] = displace[i-1] + counts[i-1]; } } - /* perform the scatterv locally */ OMPI_COUNT_ARRAY_INIT(&counts_arg, counts); OMPI_DISP_ARRAY_INIT(&displace_arg, displace); + /* perform the scatterv locally */ err = comm->c_local_comm->c_coll->coll_scatterv(ptmp, counts_arg, displace_arg, rdtype, rbuf, rcount, rdtype, 0, comm->c_local_comm, @@ -139,20 +140,8 @@ mca_coll_inter_scatterv_inter(const void *sbuf, ompi_count_array_t scounts, return err; } - /* TODO:BIGCOUNT: Remove these temporaries once ompi_datatype is updated for bigcount */ - int *tmp_scounts = malloc(sizeof(int) * size); - int *tmp_disps = malloc(sizeof(int) * size); - if (NULL == tmp_scounts || NULL == tmp_disps) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - for (i = 0; i < size; ++i) { - tmp_scounts[i] = (int) ompi_count_array_get(scounts, i); - tmp_disps[i] = (int) ompi_disp_array_get(disps, i); - } - ompi_datatype_create_indexed(size,tmp_scounts,tmp_disps,sdtype,&ndtype); + ompi_datatype_create_indexed(size,scounts,disps,sdtype,&ndtype); ompi_datatype_commit(&ndtype); - free(tmp_scounts); - free(tmp_disps); err = MCA_PML_CALL(send(sbuf, 1, ndtype, 0, MCA_COLL_BASE_TAG_SCATTERV, diff --git a/ompi/mca/common/ompio/common_ompio_file_open.c b/ompi/mca/common/ompio/common_ompio_file_open.c index 9104b175e70..e6cbc71afe0 100644 --- a/ompi/mca/common/ompio/common_ompio_file_open.c +++ b/ompi/mca/common/ompio/common_ompio_file_open.c @@ -120,7 +120,7 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm, goto fn_fail; } } - + mca_common_ompio_set_file_defaults (ompio_fh); ompio_fh->f_split_coll_req = NULL; @@ -187,9 +187,9 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm, } if ( true == use_sharedfp ) { - /* open the file once more for the shared file pointer if required. + /* open the file once more for the shared file pointer if required. ** Can be disabled by the user if no shared file pointer operations - ** are used by his application. + ** are used by his application. */ if ( NULL != ompio_fh->f_sharedfp ) { ret = ompio_fh->f_sharedfp->sharedfp_file_open(comm, @@ -212,7 +212,7 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm, "native", info); - + /* If file has been opened in the append mode, move the internal file pointer of OMPIO to the very end of the file. */ @@ -361,7 +361,7 @@ int mca_common_ompio_file_close (ompio_file_t *ompio_fh) free (ompio_fh->f_file_convertor); ompio_fh->f_file_convertor = NULL; } - + if (NULL != ompio_fh->f_datarep) { free (ompio_fh->f_datarep); ompio_fh->f_datarep = NULL; @@ -372,7 +372,7 @@ int mca_common_ompio_file_close (ompio_file_t *ompio_fh) ompio_fh->f_coll_write_time = NULL; } free (ompio_fh->f_fullfilename); - + if ( NULL != ompio_fh->f_coll_read_time ) { free ( ompio_fh->f_coll_read_time ); ompio_fh->f_coll_read_time = NULL; @@ -393,7 +393,7 @@ int mca_common_ompio_file_close (ompio_file_t *ompio_fh) if ( MPI_DATATYPE_NULL != ompio_fh->f_orig_filetype ){ ompi_datatype_destroy (&ompio_fh->f_orig_filetype); } - + if (MPI_COMM_NULL != ompio_fh->f_comm && !(ompio_fh->f_flags & OMPIO_SHAREDFP_IS_SET) ) { ompi_comm_free (&ompio_fh->f_comm); } @@ -446,11 +446,11 @@ int mca_common_ompio_set_file_defaults (ompio_file_t *fh) int blocklen[2] = {1, 1}; ptrdiff_t d[2], base; int i, flag; - + fh->f_flags = 0; fh->f_perm = OMPIO_PERM_NULL; fh->f_io_array = NULL; - + fh->f_bytes_per_agg = OMPIO_MCA_GET(fh, bytes_per_agg); opal_info_get (fh->f_info, "cb_buffer_size", &stripe_str, &flag); if ( flag ) { @@ -464,7 +464,7 @@ int mca_common_ompio_set_file_defaults (ompio_file_t *fh) fh->f_atomicity = 0; fh->f_stripe_size = 0; fh->f_stripe_count = 0; - + /* File View */ fh->f_fview.f_flags = 0; fh->f_fview.f_offset = 0; @@ -478,7 +478,7 @@ int mca_common_ompio_set_file_defaults (ompio_file_t *fh) fh->f_etype = MPI_DATATYPE_NULL; fh->f_filetype = MPI_DATATYPE_NULL; fh->f_orig_filetype = MPI_DATATYPE_NULL; - + fh->f_init_procs_per_group = -1; fh->f_init_procs_in_group = NULL; fh->f_procs_per_group = -1; @@ -488,26 +488,26 @@ int mca_common_ompio_set_file_defaults (ompio_file_t *fh) fh->f_num_aggrs = -1; fh->f_aggr_list = NULL; fh->f_datarep = NULL; - + /*Create a derived datatype for the created iovec */ types[0] = &ompi_mpi_long.dt; types[1] = &ompi_mpi_long.dt; - + d[0] = (ptrdiff_t) fh->f_fview.f_decoded_iov; d[1] = (ptrdiff_t) &fh->f_fview.f_decoded_iov[0].iov_len; - + base = d[0]; for (i=0 ; i<2 ; i++) { d[i] -= base; } - + ompi_datatype_create_struct (2, - blocklen, - d, + OMPI_COUNT_ARRAY_CREATE(blocklen), + OMPI_DISP_ARRAY_CREATE(d), types, &fh->f_iov_type); ompi_datatype_commit (&fh->f_iov_type); - + return OMPI_SUCCESS; } else { @@ -523,14 +523,14 @@ int mca_common_ompio_file_delete (const char *filename, ompio_file_t *fh = NULL; /* No locking required for file_delete according to my understanding. - One thread will succeed, the other ones silently ignore the + One thread will succeed, the other ones silently ignore the error that the file is already deleted. */ /* Create an incomplete file handle, it will basically only contain the filename. It is needed to select the correct component in the fs framework and call the file_remove - function corresponding to the file type. + function corresponding to the file type. */ ret = mca_common_ompio_create_incomplete_file_handle(filename, &fh); if (OMPI_SUCCESS != ret) { @@ -708,7 +708,7 @@ int mca_common_ompio_decode_datatype (struct ompio_file_t *fh, free (temp_iov); opal_convertor_cleanup (&convertor); - + return OMPI_SUCCESS; } diff --git a/ompi/mca/common/ompio/common_ompio_file_read_all.c b/ompi/mca/common/ompio/common_ompio_file_read_all.c index 1b2f8d6c474..c4fb6d3fdcb 100644 --- a/ompi/mca/common/ompio/common_ompio_file_read_all.c +++ b/ompi/mca/common/ompio/common_ompio_file_read_all.c @@ -234,7 +234,7 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh, fh->f_procs_in_group, fh->f_procs_per_group, fh->f_comm); - + if (OMPI_SUCCESS != ret){ goto exit; } @@ -293,7 +293,7 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh, fh->f_procs_in_group, fh->f_procs_per_group, fh->f_comm); - + if (OMPI_SUCCESS != ret){ goto exit; } @@ -502,6 +502,9 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh, *****************************************************************/ bytes_received = 0; + /** + * TODO: replace with big count? + */ while (bytes_to_read_in_cycle) { /* This next block identifies which process is the holder ** of the sorted[current_index] element; @@ -774,8 +777,8 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh, send_req[i] = MPI_REQUEST_NULL; if ( 0 < disp_index[i] ) { ompi_datatype_create_hindexed(disp_index[i], - blocklen_per_process[i], - displs_per_process[i], + OMPI_COUNT_ARRAY_CREATE(blocklen_per_process[i]), + OMPI_DISP_ARRAY_CREATE(displs_per_process[i]), MPI_BYTE, &sendtype[i]); ompi_datatype_commit(&sendtype[i]); @@ -854,8 +857,8 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh, } ompi_datatype_create_hindexed(block_index+1, - blocklength_proc, - displs_proc, + OMPI_COUNT_ARRAY_CREATE(blocklength_proc), + OMPI_DISP_ARRAY_CREATE(displs_proc), MPI_BYTE, &newType); ompi_datatype_commit(&newType); diff --git a/ompi/mca/common/ompio/common_ompio_file_view.c b/ompi/mca/common/ompio/common_ompio_file_view.c index ad569c56f7a..8be378f650e 100644 --- a/ompi/mca/common/ompio/common_ompio_file_view.c +++ b/ompi/mca/common/ompio/common_ompio_file_view.c @@ -47,8 +47,8 @@ static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newt ompi_datatype_destroy (&type); return MPI_ERR_INTERN; } - - ompi_datatype_set_args( type, 0, NULL, 0, NULL, 1, &oldtype, MPI_COMBINER_DUP ); + + ompi_datatype_set_args( type, 0, 0, NULL, 0, OMPI_DISP_ARRAY_NULL, 1, &oldtype, MPI_COMBINER_DUP ); *newtype = type; return OMPI_SUCCESS; @@ -92,7 +92,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh, } shared_fp_base_module->sharedfp_get_position(fh, &disp); } - + fview_clear(&(fh->f_fview)); if (NULL != fh->f_etype) { ompi_datatype_destroy (&fh->f_etype); @@ -129,7 +129,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh, fh->f_file_convertor = opal_convertor_create (opal_local_arch, 0); fh->f_flags |= OMPIO_DATAREP_NATIVE; } - + datatype_duplicate (filetype, &(fh->f_orig_filetype)); opal_datatype_get_extent (&filetype->super, &lb, &ftype_extent); opal_datatype_type_size (&filetype->super, &ftype_size); @@ -176,7 +176,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh, // File view is not a multiple of the etype. return MPI_ERR_ARG; } - + // make sure that displacement is not negative, which could // lead to an illegal access. if ( 0 < fh->f_fview.f_iov_count && 0 > (off_t)fh->f_fview.f_decoded_iov[0].iov_base ) { @@ -186,7 +186,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh, return MPI_ERR_IO; } - + if( SIMPLE_PLUS == OMPIO_MCA_GET(fh, grouping_option) ) { fh->f_cc_size = get_contiguous_chunk_size (fh, 1); } @@ -238,11 +238,11 @@ int mca_common_ompio_set_view (ompio_file_t *fh, OBJ_RELEASE(stripe_str); } } - + if ( -1 != OMPIO_MCA_GET(fh, num_aggregators) || -1 != num_cb_nodes) { /* The user requested a particular number of aggregators */ - num_groups = OMPIO_MCA_GET(fh, num_aggregators); + num_groups = OMPIO_MCA_GET(fh, num_aggregators); if ( -1 != num_cb_nodes ) { /* A hint through an MPI Info object trumps an mca parameter value */ num_groups = num_cb_nodes; @@ -253,7 +253,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh, mca_common_ompio_forced_grouping ( fh, num_groups, contg_groups); } else { - if ( SIMPLE != OMPIO_MCA_GET(fh, grouping_option) && + if ( SIMPLE != OMPIO_MCA_GET(fh, grouping_option) && SIMPLE_PLUS != OMPIO_MCA_GET(fh, grouping_option) ) { ret = mca_common_ompio_fview_based_grouping(fh, &num_groups, @@ -266,15 +266,15 @@ int mca_common_ompio_set_view (ompio_file_t *fh, else { int done=0; int ndims; - + if ( fh->f_comm->c_flags & OMPI_COMM_CART ){ ret = fh->f_comm->c_topo->topo.cart.cartdim_get( fh->f_comm, &ndims); if ( OMPI_SUCCESS != ret ){ goto exit; } - if ( ndims > 1 ) { - ret = mca_common_ompio_cart_based_grouping( fh, - &num_groups, + if ( ndims > 1 ) { + ret = mca_common_ompio_cart_based_grouping( fh, + &num_groups, contg_groups); if (OMPI_SUCCESS != ret ) { opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_cart_based_grouping failed\n"); @@ -283,7 +283,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh, done=1; } } - + if ( !done ) { ret = mca_common_ompio_simple_grouping(fh, &num_groups, @@ -300,7 +300,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh, int ii, jj; printf("BEFORE finalize_init: comm size = %d num_groups = %d\n", fh->f_size, num_groups); for ( ii=0; ii< num_groups; ii++ ) { - printf("contg_groups[%d].procs_per_contg_group=%d\n", ii, contg_groups[ii].procs_per_contg_group); + printf("contg_groups[%d].procs_per_contg_group=%d\n", ii, contg_groups[ii].procs_per_contg_group); printf("contg_groups[%d].procs_in_contg_group.[", ii); for ( jj=0; jj< contg_groups[ii].procs_per_contg_group; jj++ ) { @@ -399,7 +399,7 @@ OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (ompio_file_t *fh, int flag) } avg[1] = (OMPI_MPI_OFFSET_TYPE) fh->f_fview.f_iov_count; avg[2] = (OMPI_MPI_OFFSET_TYPE) fh->f_fview.f_view_size; - + fh->f_comm->c_coll->coll_allreduce (avg, global_avg, 3, diff --git a/ompi/mca/fcoll/base/fcoll_base_coll_array.c b/ompi/mca/fcoll/base/fcoll_base_coll_array.c index 68f25ace6fb..79444c7d5a1 100644 --- a/ompi/mca/fcoll/base/fcoll_base_coll_array.c +++ b/ompi/mca/fcoll/base/fcoll_base_coll_array.c @@ -50,7 +50,6 @@ int ompi_fcoll_base_coll_allgatherv_array (void *sbuf, int err = OMPI_SUCCESS; ptrdiff_t extent, lb; int i, rank, j; - int *tmp_rcounts = NULL, *tmp_disps = NULL; char *send_buf = NULL; struct ompi_datatype_t *newtype, *send_type; @@ -93,24 +92,11 @@ int ompi_fcoll_base_coll_allgatherv_array (void *sbuf, return err; } - /* TODO:BIGCOUNT: remove tmp_rcounts and tmp_disps once the ompi_datatype - * interface is udpated to use size_t/ptrdiff_t - */ - tmp_rcounts = (int *)malloc(2 * procs_per_group * sizeof(int)); - if (NULL == tmp_rcounts) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - tmp_disps = tmp_rcounts + procs_per_group; - for (i = 0; i < procs_per_group; i++) { - tmp_rcounts[i] = (int) rcounts[i]; - tmp_disps[i] = (int) disps[i]; - } err = ompi_datatype_create_indexed (procs_per_group, - tmp_rcounts, - tmp_disps, + OMPI_COUNT_ARRAY_CREATE(rcounts), + OMPI_DISP_ARRAY_CREATE(disps), rdtype, &newtype); - free(tmp_rcounts); if (MPI_SUCCESS != err) { return err; } @@ -118,7 +104,7 @@ int ompi_fcoll_base_coll_allgatherv_array (void *sbuf, if(MPI_SUCCESS != err) { return err; } - + ompi_fcoll_base_coll_bcast_array (rbuf, 1, newtype, @@ -126,7 +112,7 @@ int ompi_fcoll_base_coll_allgatherv_array (void *sbuf, procs_in_group, procs_per_group, comm); - + ompi_datatype_destroy (&newtype); return OMPI_SUCCESS; @@ -349,7 +335,7 @@ int ompi_fcoll_base_coll_allgather_array (void *sbuf, procs_in_group, procs_per_group, comm); - + if (OMPI_SUCCESS == err) { err = ompi_fcoll_base_coll_bcast_array (rbuf, rcount * procs_per_group, diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c index 2ce3ef5d27f..879f696ecc7 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c @@ -138,7 +138,7 @@ mca_fcoll_dynamic_file_write_all (struct ompio_file_t *fh, count, buf, &max_data, - fh->f_mem_convertor, + fh->f_mem_convertor, &decoded_iov, &iov_count); if (OMPI_SUCCESS != ret ){ @@ -190,7 +190,7 @@ mca_fcoll_dynamic_file_write_all (struct ompio_file_t *fh, fh->f_procs_in_group, fh->f_procs_per_group, fh->f_comm); - + if( OMPI_SUCCESS != ret){ goto exit; } @@ -254,7 +254,7 @@ mca_fcoll_dynamic_file_write_all (struct ompio_file_t *fh, fh->f_procs_in_group, fh->f_procs_per_group, fh->f_comm); - + if( OMPI_SUCCESS != ret){ goto exit; } @@ -784,8 +784,8 @@ mca_fcoll_dynamic_file_write_all (struct ompio_file_t *fh, recv_req[i] = MPI_REQUEST_NULL; if ( 0 < disp_index[i] ) { ompi_datatype_create_hindexed(disp_index[i], - blocklen_per_process[i], - displs_per_process[i], + OMPI_COUNT_ARRAY_CREATE(blocklen_per_process[i]), + OMPI_DISP_ARRAY_CREATE(displs_per_process[i]), MPI_BYTE, &recvtype[i]); ompi_datatype_commit(&recvtype[i]); diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c index 1f9b5f8bef7..3779d45ced1 100644 --- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c @@ -58,7 +58,7 @@ typedef struct mca_io_ompio_aggregator_data { ompi_datatype_t **recvtype, **prev_recvtype; struct iovec *global_iov_array; int current_index, current_position; - int bytes_to_write_in_cycle, bytes_remaining, procs_per_group; + int bytes_to_write_in_cycle, bytes_remaining, procs_per_group; int *procs_in_group, iov_index; int bytes_sent, prev_bytes_sent; struct iovec *decoded_iov; @@ -91,20 +91,20 @@ typedef struct mca_io_ompio_aggregator_data { -static int shuffle_init ( int index, int cycles, int aggregator, int rank, - mca_io_ompio_aggregator_data *data, +static int shuffle_init ( int index, int cycles, int aggregator, int rank, + mca_io_ompio_aggregator_data *data, ompi_request_t **reqs ); static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data ); -int mca_fcoll_dynamic_gen2_break_file_view ( struct iovec *decoded_iov, int iov_count, - struct iovec *local_iov_array, int local_count, +int mca_fcoll_dynamic_gen2_break_file_view ( struct iovec *decoded_iov, int iov_count, + struct iovec *local_iov_array, int local_count, struct iovec ***broken_decoded_iovs, int **broken_iov_counts, - struct iovec ***broken_iov_arrays, int **broken_counts, + struct iovec ***broken_iov_arrays, int **broken_counts, MPI_Aint **broken_total_lengths, - int stripe_count, int stripe_size); + int stripe_count, int stripe_size); -int mca_fcoll_dynamic_gen2_get_configuration (ompio_file_t *fh, int *dynamic_gen2_num_io_procs, +int mca_fcoll_dynamic_gen2_get_configuration (ompio_file_t *fh, int *dynamic_gen2_num_io_procs, int **ret_aggregators); @@ -133,12 +133,12 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, ompi_request_t **reqs1=NULL,**reqs2=NULL; ompi_request_t **curr_reqs=NULL,**prev_reqs=NULL; mca_io_ompio_aggregator_data **aggr_data=NULL; - + ptrdiff_t *displs = NULL; int dynamic_gen2_num_io_procs; size_t max_data = 0; MPI_Aint *total_bytes_per_process = NULL; - + struct iovec **broken_iov_arrays=NULL; struct iovec **broken_decoded_iovs=NULL; int *broken_counts=NULL; @@ -157,8 +157,8 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0; mca_common_ompio_print_entry nentry; #endif - - + + /************************************************************************** ** 1. In case the data is not contiguous in memory, decode it into an iovec **************************************************************************/ @@ -167,7 +167,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, /* since we want to overlap 2 iterations, define the bytes_per_cycle to be half of what the user requested */ bytes_per_cycle =bytes_per_cycle/2; - + ret = mca_common_ompio_decode_datatype ((struct ompio_file_t *) fh, datatype, count, @@ -183,7 +183,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, if ( MPI_STATUS_IGNORE != status ) { status->_ucount = max_data; } - + /* difference to the first generation of this function: ** dynamic_gen2_num_io_procs should be the number of io_procs per group ** consequently.Initially, we will have only 1 group. @@ -201,7 +201,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, if ( fh->f_stripe_size == 0 ) { - // EDGAR: just a quick heck for testing + // EDGAR: just a quick heck for testing //fh->f_stripe_size = 1048576; fh->f_stripe_size = 65536; } @@ -210,8 +210,8 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, if (OMPI_SUCCESS != ret){ goto exit; } - - aggr_data = (mca_io_ompio_aggregator_data **) malloc ( dynamic_gen2_num_io_procs * + + aggr_data = (mca_io_ompio_aggregator_data **) malloc ( dynamic_gen2_num_io_procs * sizeof(mca_io_ompio_aggregator_data*)); for ( i=0; i< dynamic_gen2_num_io_procs; i++ ) { @@ -242,12 +242,12 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, *************************************************************************/ // broken_iov_arrays[0] contains broken_counts[0] entries to aggregator 0, // broken_iov_arrays[1] contains broken_counts[1] entries to aggregator 1, etc. - ret = mca_fcoll_dynamic_gen2_break_file_view ( decoded_iov, iov_count, - local_iov_array, local_count, + ret = mca_fcoll_dynamic_gen2_break_file_view ( decoded_iov, iov_count, + local_iov_array, local_count, &broken_decoded_iovs, &broken_iov_counts, - &broken_iov_arrays, &broken_counts, + &broken_iov_arrays, &broken_counts, &broken_total_lengths, - dynamic_gen2_num_io_procs, fh->f_stripe_size); + dynamic_gen2_num_io_procs, fh->f_stripe_size); /************************************************************************** @@ -276,7 +276,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + ret = ompi_fcoll_base_coll_allgather_array (broken_total_lengths, dynamic_gen2_num_io_procs, MPI_LONG, @@ -315,7 +315,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, cycles = ceil((double)broken_total_lengths[i]/bytes_per_cycle); } } - + result_counts = (int *) malloc ( dynamic_gen2_num_io_procs * fh->f_procs_per_group * sizeof(int) ); if ( NULL == result_counts ) { @@ -334,7 +334,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, dynamic_gen2_num_io_procs, MPI_INT, fh->f_comm, - fh->f_comm->c_coll->coll_allgather_module); + fh->f_comm->c_coll->coll_allgather_module); } else { ret = ompi_fcoll_base_coll_allgather_array (broken_counts, @@ -377,14 +377,14 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + displs[0] = 0; total_fview_count = (int) aggr_data[i]->fview_count[0]; for (j=1 ; jf_procs_per_group ; j++) { total_fview_count += aggr_data[i]->fview_count[j]; displs[j] = displs[j-1] + aggr_data[i]->fview_count[j-1]; } - + #if DEBUG_ON printf("total_fview_count : %d\n", total_fview_count); if (aggregators[i] == fh->f_rank) { @@ -397,7 +397,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, } } #endif - + /* allocate the global iovec */ if (0 != total_fview_count) { aggr_data[i]->global_iov_array = (struct iovec*) malloc (total_fview_count * @@ -406,9 +406,9 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, opal_output(1, "OUT OF MEMORY\n"); ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; - } + } } - + #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_comm_time = MPI_Wtime(); #endif @@ -445,7 +445,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, end_comm_time = MPI_Wtime(); comm_time += (end_comm_time - start_comm_time); #endif - + /**************************************************************************************** *** 5. Sort the global offset/lengths list based on the offsets. *** The result of the sort operation is the 'sorted', an integer array, @@ -463,18 +463,18 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, } ompi_fcoll_base_sort_iovec (aggr_data[i]->global_iov_array, total_fview_count, aggr_data[i]->sorted); } - + if (NULL != local_iov_array){ free(local_iov_array); local_iov_array = NULL; } - + if (NULL != displs){ free(displs); displs=NULL; } - - + + #if DEBUG_ON if (aggregators[i] == fh->f_rank) { uint32_t tv=0; @@ -490,9 +490,9 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, *** 6. Determine the number of cycles required to execute this *** operation *************************************************************/ - + aggr_data[i]->bytes_per_cycle = bytes_per_cycle; - + if (aggregators[i] == fh->f_rank) { aggr_data[i]->disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); if (NULL == aggr_data[i]->disp_index) { @@ -500,7 +500,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + aggr_data[i]->max_disp_index = (int *)calloc (fh->f_procs_per_group, sizeof (int)); if (NULL == aggr_data[i]->max_disp_index) { opal_output (1, "OUT OF MEMORY\n"); @@ -514,15 +514,15 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + aggr_data[i]->displs_per_process = (MPI_Aint **)calloc (fh->f_procs_per_group, sizeof (MPI_Aint*)); if (NULL == aggr_data[i]->displs_per_process) { opal_output (1, "OUT OF MEMORY\n"); ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - - + + aggr_data[i]->global_buf = (char *) malloc (bytes_per_cycle); aggr_data[i]->prev_global_buf = (char *) malloc (bytes_per_cycle); if (NULL == aggr_data[i]->global_buf || NULL == aggr_data[i]->prev_global_buf){ @@ -530,10 +530,10 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - - aggr_data[i]->recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * + + aggr_data[i]->recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); - aggr_data[i]->prev_recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * + aggr_data[i]->prev_recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); if (NULL == aggr_data[i]->recvtype || NULL == aggr_data[i]->prev_recvtype) { opal_output (1, "OUT OF MEMORY\n"); @@ -545,11 +545,11 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, aggr_data[i]->prev_recvtype[l] = MPI_DATATYPE_NULL; } } - + #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_exch = MPI_Wtime(); #endif - } + } reqs1 = (ompi_request_t **)malloc ((fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs *sizeof(ompi_request_t *)); reqs2 = (ompi_request_t **)malloc ((fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs *sizeof(ompi_request_t *)); @@ -568,11 +568,11 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, curr_reqs = reqs1; prev_reqs = reqs2; - + /* Initialize communication for iteration 0 */ if ( cycles > 0 ) { for ( i=0; if_rank, aggr_data[i], + ret = shuffle_init ( 0, cycles, aggregators[i], fh->f_rank, aggr_data[i], &curr_reqs[i*(fh->f_procs_per_group + 1)] ); if ( OMPI_SUCCESS != ret ) { goto exit; @@ -583,11 +583,11 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, for (index = 1; index < cycles; index++) { SWAP_REQUESTS(curr_reqs,prev_reqs); - SWAP_AGGR_POINTERS(aggr_data,dynamic_gen2_num_io_procs); + SWAP_AGGR_POINTERS(aggr_data,dynamic_gen2_num_io_procs); /* Initialize communication for iteration i */ for ( i=0; if_rank, aggr_data[i], + ret = shuffle_init ( index, cycles, aggregators[i], fh->f_rank, aggr_data[i], &curr_reqs[i*(fh->f_procs_per_group + 1)] ); if ( OMPI_SUCCESS != ret ) { goto exit; @@ -595,13 +595,13 @@ int mca_fcoll_dynamic_gen2_file_write_all (struct ompio_file_t *fh, } /* Finish communication for iteration i-1 */ - ret = ompi_request_wait_all ( (fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs, + ret = ompi_request_wait_all ( (fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs, prev_reqs, MPI_STATUS_IGNORE); if (OMPI_SUCCESS != ret){ goto exit; } - + /* Write data for iteration i-1 */ for ( i=0; i 0 ) { SWAP_REQUESTS(curr_reqs,prev_reqs); - SWAP_AGGR_POINTERS(aggr_data,dynamic_gen2_num_io_procs); - - ret = ompi_request_wait_all ( (fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs, + SWAP_AGGR_POINTERS(aggr_data,dynamic_gen2_num_io_procs); + + ret = ompi_request_wait_all ( (fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs, prev_reqs, MPI_STATUS_IGNORE); if (OMPI_SUCCESS != ret){ goto exit; } - + /* Write data for iteration i=cycles-1 */ for ( i=0; if_rank) { if (NULL != aggr_data[i]->recvtype){ for (j =0; j< aggr_data[i]->procs_per_group; j++) { @@ -686,7 +686,7 @@ exit : free(aggr_data[i]->recvtype); free(aggr_data[i]->prev_recvtype); } - + free (aggr_data[i]->disp_index); free (aggr_data[i]->max_disp_index); free (aggr_data[i]->global_buf); @@ -695,7 +695,7 @@ exit : free (aggr_data[i]->blocklen_per_process[l]); free (aggr_data[i]->displs_per_process[l]); } - + free (aggr_data[i]->blocklen_per_process); free (aggr_data[i]->displs_per_process); } @@ -703,7 +703,7 @@ exit : free (aggr_data[i]->global_iov_array); free (aggr_data[i]->fview_count); free (aggr_data[i]->decoded_iov); - + free (aggr_data[i]); } free (aggr_data); @@ -729,7 +729,7 @@ exit : free(reqs2); free(result_counts); - + return OMPI_SUCCESS; } @@ -739,14 +739,14 @@ static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator int ret=OMPI_SUCCESS; int last_array_pos=0; int last_pos=0; - + if ( aggregator == fh->f_rank && aggr_data->prev_num_io_entries) { fh->f_flags |= OMPIO_COLLECTIVE_OP; while ( aggr_data->prev_bytes_to_write > 0 ) { ssize_t tret; - aggr_data->prev_bytes_to_write -= mca_fcoll_dynamic_gen2_split_iov_array (fh, aggr_data->prev_io_array, - aggr_data->prev_num_io_entries, + aggr_data->prev_bytes_to_write -= mca_fcoll_dynamic_gen2_split_iov_array (fh, aggr_data->prev_io_array, + aggr_data->prev_num_io_entries, &last_array_pos, &last_pos ); tret = fh->f_fbtl->fbtl_pwritev (fh); if ( 0 > tret ) { @@ -760,31 +760,31 @@ static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator printf("fh->f_num_of_io_entries=%d\n", fh->f_num_of_io_entries); printf("[%d]: fh->f_io_array[0].offset = %ld .size = %ld\n", fh->f_rank, (long)fh->f_io_array[0].offset, fh->f_io_array[0].length); - if ( fh->f_num_of_io_entries > 1 ) + if ( fh->f_num_of_io_entries > 1 ) printf("[%d]: fh->f_io_array[1].offset = %ld .size = %ld\n", fh->f_rank, (long)fh->f_io_array[1].offset, fh->f_io_array[1].length); - - + + int n = fh->f_num_of_io_entries-1; - if ( fh->f_num_of_io_entries > 2 ) + if ( fh->f_num_of_io_entries > 2 ) printf("[%d]: fh->f_io_array[n].offset = %ld .size = %ld\n", fh->f_rank, (long)fh->f_io_array[n].offset, fh->f_io_array[n].length); #endif } - fh->f_flags &= ~OMPIO_COLLECTIVE_OP; + fh->f_flags &= ~OMPIO_COLLECTIVE_OP; free ( fh->f_io_array ); free ( aggr_data->prev_io_array); - } + } exit: fh->f_io_array=NULL; fh->f_num_of_io_entries=0; - + return ret; } -static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_io_ompio_aggregator_data *data, +static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_io_ompio_aggregator_data *data, ompi_request_t **reqs ) { int bytes_sent = 0; @@ -809,7 +809,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i *** 7a. Getting ready for next cycle: initializing and freeing buffers **********************************************************************/ if (aggregator == rank) { - + if (NULL != data->recvtype){ for (i =0; i< data->procs_per_group; i++) { if ( MPI_DATATYPE_NULL != data->recvtype[i] ) { @@ -818,7 +818,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i } } } - + for(l=0;lprocs_per_group;l++){ data->disp_index[l] = 0; @@ -838,7 +838,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i } } } /* (aggregator == rank */ - + /************************************************************************** *** 7b. Determine the number of bytes to be actually written in this cycle **************************************************************************/ @@ -865,20 +865,20 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i /********************************************************** **Gather the Data from all the processes at the writers ** *********************************************************/ - + #if DEBUG_ON printf("bytes_to_write_in_cycle: %ld, cycle : %d\n", data->bytes_to_write_in_cycle, index); #endif - + /***************************************************************** *** 7c. Calculate how much data will be contributed in this cycle *** by each process *****************************************************************/ - + /* The blocklen and displs calculation only done at aggregators!*/ while (data->bytes_to_write_in_cycle) { - + /* This next block identifies which process is the holder ** of the sorted[current_index] element; */ @@ -892,10 +892,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i blocks += data->fview_count[j+1]; } } - + if (data->bytes_remaining) { /* Finish up a partially used buffer from the previous cycle */ - + if (data->bytes_remaining <= data->bytes_to_write_in_cycle) { /* The data fits completely into the block */ if (aggregator == rank) { @@ -904,7 +904,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base + (data->global_iov_array[data->sorted[data->current_index]].iov_len - data->bytes_remaining); - + data->disp_index[data->n] += 1; /* In this cases the length is consumed so allocating for @@ -912,10 +912,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i if ( data->disp_index[data->n] == data->max_disp_index[data->n] ) { data->max_disp_index[data->n] *= 2; data->blocklen_per_process[data->n] = (int *) realloc( - (void *)data->blocklen_per_process[data->n], + (void *)data->blocklen_per_process[data->n], (data->max_disp_index[data->n])*sizeof(int)); data->displs_per_process[data->n] = (MPI_Aint *) realloc( - (void *)data->displs_per_process[data->n], + (void *)data->displs_per_process[data->n], (data->max_disp_index[data->n])*sizeof(MPI_Aint)); } @@ -940,7 +940,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i - data->bytes_remaining); data->disp_index[data->n] += 1; } - + if (data->procs_in_group[data->n] == rank) { bytes_sent += data->bytes_to_write_in_cycle; } @@ -962,7 +962,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i } if (data->procs_in_group[data->n] == rank) { bytes_sent += data->bytes_to_write_in_cycle; - + } data->bytes_remaining = data->global_iov_array[data->sorted[data->current_index]].iov_len - data->bytes_to_write_in_cycle; @@ -976,7 +976,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i data->global_iov_array[data->sorted[data->current_index]].iov_len; data->displs_per_process[data->n][data->disp_index[data->n]] = (ptrdiff_t) data->global_iov_array[data->sorted[data->current_index]].iov_base; - + data->disp_index[data->n] += 1; /*realloc for next blocklength @@ -985,10 +985,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i if ( data->disp_index[data->n] == data->max_disp_index[data->n] ) { data->max_disp_index[data->n] *= 2; data->blocklen_per_process[data->n] = (int *) realloc( - (void *)data->blocklen_per_process[data->n], + (void *)data->blocklen_per_process[data->n], (data->max_disp_index[data->n]*sizeof(int))); data->displs_per_process[data->n] = (MPI_Aint *)realloc( - (void *)data->displs_per_process[data->n], + (void *)data->displs_per_process[data->n], (data->max_disp_index[data->n]*sizeof(MPI_Aint))); } data->blocklen_per_process[data->n][data->disp_index[data->n]] = 0; @@ -1003,8 +1003,8 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i } } } - - + + /************************************************************************* *** 7d. Calculate the displacement on where to put the data and allocate *** the receive buffer (global_buf) @@ -1017,13 +1017,13 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i entries_per_aggregator++ ; } } - + #if DEBUG_ON printf("%d: cycle: %d, bytes_sent: %d\n ",rank,index, bytes_sent); printf("%d : Entries per aggregator : %d\n",rank,entries_per_aggregator); #endif - + if (entries_per_aggregator > 0){ file_offsets_for_agg = (mca_io_ompio_local_io_array *) malloc(entries_per_aggregator*sizeof(mca_io_ompio_local_io_array)); @@ -1032,7 +1032,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + sorted_file_offsets = (int *) malloc (entries_per_aggregator*sizeof(int)); if (NULL == sorted_file_offsets){ @@ -1040,10 +1040,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + /*Moving file offsets to an IO array!*/ temp_index = 0; - + for (i=0;iprocs_per_group; i++){ for(j=0;jdisp_index[i];j++){ if (data->blocklen_per_process[i][j] > 0){ @@ -1053,11 +1053,11 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i file_offsets_for_agg[temp_index].offset = data->displs_per_process[i][j]; temp_index++; - + #if DEBUG_ON printf("************Cycle: %d, Aggregator: %d ***************\n", index+1,rank); - + printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", data->procs_in_group[i],j, data->blocklen_per_process[i][j],j, @@ -1067,33 +1067,33 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i } } } - + /* Sort the displacements for each aggregator*/ local_heap_sort (file_offsets_for_agg, entries_per_aggregator, sorted_file_offsets); - + /*create contiguous memory displacements based on blocklens on the same displs array and map it to this aggregator's actual file-displacements (this is in the io-array created above)*/ memory_displacements = (MPI_Aint *) malloc (entries_per_aggregator * sizeof(MPI_Aint)); - + memory_displacements[sorted_file_offsets[0]] = 0; for (i=1; iprocs_per_group * sizeof (int)); if (NULL == temp_disp_index) { opal_output (1, "OUT OF MEMORY\n"); ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + /*Now update the displacements array with memory offsets*/ #if DEBUG_ON global_count = 0; @@ -1115,14 +1115,14 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i file_offsets_for_agg[sorted_file_offsets[i]].length; #endif } - + if (NULL != temp_disp_index){ free(temp_disp_index); temp_disp_index = NULL; } - + #if DEBUG_ON - + printf("************Cycle: %d, Aggregator: %d ***************\n", index+1,rank); for (i=0;iprocs_per_group; i++){ @@ -1133,7 +1133,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i data->blocklen_per_process[i][j],j, data->displs_per_process[i][j], rank); - + } } } @@ -1160,13 +1160,13 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i reqs[i] = MPI_REQUEST_NULL; if ( 0 < data->disp_index[i] ) { ompi_datatype_create_hindexed(data->disp_index[i], - data->blocklen_per_process[i], - data->displs_per_process[i], + OMPI_COUNT_ARRAY_CREATE(data->blocklen_per_process[i]), + OMPI_DISP_ARRAY_CREATE(data->displs_per_process[i]), MPI_BYTE, &data->recvtype[i]); ompi_datatype_commit(&data->recvtype[i]); opal_datatype_type_size(&data->recvtype[i]->super, &datatype_size); - + if (datatype_size){ ret = MCA_PML_CALL(irecv(data->global_buf, 1, @@ -1240,8 +1240,8 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i if ( 0 <= block_index ) { ompi_datatype_create_hindexed(block_index+1, - blocklength_proc, - displs_proc, + OMPI_COUNT_ARRAY_CREATE(blocklength_proc), + OMPI_DISP_ARRAY_CREATE(displs_proc), MPI_BYTE, &newType); ompi_datatype_commit(&newType); @@ -1263,7 +1263,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i } } - + #if DEBUG_ON if (aggregator == rank){ printf("************Cycle: %d, Aggregator: %d ***************\n", @@ -1272,7 +1272,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i printf (" RECV %d \n",((int *)data->global_buf)[i]); } #endif - + //#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN // end_comm_time = MPI_Wtime(); // comm_time += (end_comm_time - start_comm_time); @@ -1280,10 +1280,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i /********************************************************** *** 7f. Create the io array, and pass it to fbtl *********************************************************/ - + if (aggregator == rank && entries_per_aggregator>0) { - - + + data->io_array = (mca_common_ompio_io_array_t *) malloc (entries_per_aggregator * sizeof (mca_common_ompio_io_array_t)); if (NULL == data->io_array) { @@ -1291,7 +1291,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + data->num_io_entries = 0; /*First entry for every aggregator*/ data->io_array[0].offset = @@ -1301,7 +1301,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i data->io_array[0].memory_address = data->global_buf+memory_displacements[sorted_file_offsets[0]]; data->num_io_entries++; - + for (i=1;iglobal_buf+memory_displacements[sorted_file_offsets[i]]; data->num_io_entries++; } - + } - + #if DEBUG_ON printf("*************************** %d\n", num_of_io_entries); for (i=0 ; i= max_lengths[owner][0] ) { broken_mem_iovs[owner] = (struct iovec*) realloc ( broken_mem_iovs[owner], - mem_count * block[owner][0] * + mem_count * block[owner][0] * sizeof(struct iovec )); max_lengths[owner][0] = mem_count * block[owner][0]; block[owner][0]++; @@ -1500,7 +1500,7 @@ int mca_fcoll_dynamic_gen2_break_file_view ( struct iovec *mem_iov, int mem_coun broken_file_counts[owner]++; if ( broken_file_counts[owner] >= max_lengths[owner][1] ) { broken_file_iovs[owner] = (struct iovec*) realloc ( broken_file_iovs[owner], - file_count * block[owner][1] * + file_count * block[owner][1] * sizeof(struct iovec )); max_lengths[owner][1] = file_count * block[owner][1]; block[owner][1]++; @@ -1511,9 +1511,9 @@ int mca_fcoll_dynamic_gen2_break_file_view ( struct iovec *mem_iov, int mem_coun } while( temp_len > 0 ); i++; - } + } + - /* Step 2: recalculating the total lengths per aggregator */ for ( i=0; i< stripe_count; i++ ) { for ( j=0; jf_stripe_size); off_t endaddr = baseaddr + fh->f_stripe_size; - + if ( 0 == array_pos && 0 == pos ) { fh->f_io_array = (mca_common_ompio_io_array_t *) malloc ( num_entries * sizeof(mca_common_ompio_io_array_t)); if ( NULL == fh->f_io_array ){ @@ -1634,21 +1634,21 @@ int mca_fcoll_dynamic_gen2_split_iov_array ( ompio_file_t *fh, mca_common_ompio_ return -1; } } - + int i=0; do { fh->f_io_array[i].memory_address = (char *)io_array[array_pos].memory_address + pos; fh->f_io_array[i].offset = (char *)io_array[array_pos].offset + pos; off_t length = io_array[array_pos].length - pos; - + if ( ( (off_t)fh->f_io_array[i].offset + length) < endaddr ) { fh->f_io_array[i].length = length; } else { fh->f_io_array[i].length = endaddr - (size_t)fh->f_io_array[i].offset; } - + pos += fh->f_io_array[i].length; bytes_written += fh->f_io_array[i].length; i++; @@ -1658,14 +1658,14 @@ int mca_fcoll_dynamic_gen2_split_iov_array ( ompio_file_t *fh, mca_common_ompio_ array_pos++; } } while ( (array_pos < num_entries) && (((off_t)io_array[array_pos].offset+pos ) < endaddr) ); - + fh->f_num_of_io_entries = i; *ret_array_pos = array_pos; *ret_pos = pos; return bytes_written; } - + static int local_heap_sort (mca_io_ompio_local_io_array *io_array, int num_entries, int *sorted) diff --git a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_read_all.c b/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_read_all.c index f6a492e621c..087432c91ea 100644 --- a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_read_all.c +++ b/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_read_all.c @@ -465,7 +465,7 @@ int mca_fcoll_vulcan_file_read_all (struct ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_read_time = MPI_Wtime(); #endif - for (i = 0; i < fh->f_num_aggrs; i++) { + for (i = 0; i < fh->f_num_aggrs; i++) { ret = read_init (fh, 0, cycles, fh->f_aggr_list[i], fh->f_rank, aggr_data[i], read_sync_type, &req_tmp, use_accelerator_buffer); @@ -840,8 +840,8 @@ static int shuffle_init (int index, int cycles, int aggregator, int rank, mca_io reqs[i] = MPI_REQUEST_NULL; if (0 < data->disp_index[i]) { ompi_datatype_create_hindexed (data->disp_index[i], - data->blocklen_per_process[i], - data->displs_per_process[i], + OMPI_COUNT_ARRAY_CREATE(data->blocklen_per_process[i]), + OMPI_DISP_ARRAY_CREATE(data->displs_per_process[i]), MPI_BYTE, &data->recvtype[i]); ompi_datatype_commit (&data->recvtype[i]); @@ -918,8 +918,8 @@ static int shuffle_init (int index, int cycles, int aggregator, int rank, mca_io if (0 <= block_index) { ompi_datatype_create_hindexed (block_index+1, - blocklength_proc, - displs_proc, + OMPI_COUNT_ARRAY_CREATE(blocklength_proc), + OMPI_DISP_ARRAY_CREATE(displs_proc), MPI_BYTE, &newType); ompi_datatype_commit (&newType); diff --git a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c b/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c index b6e9be6d2ca..066fddc547d 100644 --- a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c +++ b/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c @@ -72,11 +72,11 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, ompi_request_t **reqs = NULL; ompi_request_t *req_iwrite = MPI_REQUEST_NULL; mca_io_ompio_aggregator_data **aggr_data=NULL; - + ptrdiff_t *displs = NULL; int vulcan_num_io_procs; size_t max_data = 0; - + struct iovec **broken_iov_arrays=NULL; struct iovec **broken_decoded_iovs=NULL; int *broken_counts=NULL; @@ -98,8 +98,8 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0; mca_common_ompio_print_entry nentry; #endif - - + + /************************************************************************** ** 1. In case the data is not contiguous in memory, decode it into an iovec **************************************************************************/ @@ -124,7 +124,7 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, /* since we want to overlap 2 iterations, define the bytes_per_cycle to be half of what the user requested */ bytes_per_cycle =bytes_per_cycle/2; - + ret = mca_common_ompio_decode_datatype ((struct ompio_file_t *) fh, datatype, count, @@ -136,11 +136,11 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, if (OMPI_SUCCESS != ret ){ goto exit; } - + if ( MPI_STATUS_IGNORE != status ) { status->_ucount = max_data; } - + ret = mca_fcoll_vulcan_get_configuration (fh, vulcan_num_io_procs, max_data); if (OMPI_SUCCESS != ret){ goto exit; @@ -148,9 +148,9 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, opal_output_verbose(10, ompi_fcoll_base_framework.framework_output, "Using %d aggregators for the write_all operation \n", fh->f_num_aggrs); - aggr_data = (mca_io_ompio_aggregator_data **) malloc ( fh->f_num_aggrs * + aggr_data = (mca_io_ompio_aggregator_data **) malloc ( fh->f_num_aggrs * sizeof(mca_io_ompio_aggregator_data*)); - + for ( i=0; i< fh->f_num_aggrs; i++ ) { // At this point we know the number of aggregators. If there is a correlation between // number of aggregators and number of IO nodes, we know how many aggr_data arrays we need @@ -165,7 +165,7 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, aggr_index = i; } } - + /********************************************************************* *** 2. Generate the local offsets/lengths array corresponding to *** this write operation @@ -177,22 +177,22 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, if (ret != OMPI_SUCCESS){ goto exit; } - + /************************************************************************* ** 2b. Separate the local_iov_array entries based on the number of aggregators *************************************************************************/ // Modifications for the even distribution: long domain_size; ret = mca_fcoll_vulcan_minmax ( fh, local_iov_array, local_count, fh->f_num_aggrs, &domain_size); - + // broken_iov_arrays[0] contains broken_counts[0] entries to aggregator 0, // broken_iov_arrays[1] contains broken_counts[1] entries to aggregator 1, etc. - ret = mca_fcoll_vulcan_break_file_view ( decoded_iov, iov_count, - local_iov_array, local_count, + ret = mca_fcoll_vulcan_break_file_view ( decoded_iov, iov_count, + local_iov_array, local_count, &broken_decoded_iovs, &broken_iov_counts, - &broken_iov_arrays, &broken_counts, + &broken_iov_arrays, &broken_counts, &broken_total_lengths, - fh->f_num_aggrs, domain_size); + fh->f_num_aggrs, domain_size); /************************************************************************** @@ -211,12 +211,12 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, if( OMPI_SUCCESS != ret){ goto exit; } - + #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN end_comm_time = MPI_Wtime(); comm_time += (end_comm_time - start_comm_time); #endif - + cycles=0; for ( i=0; if_num_aggrs; i++ ) { #if DEBUG_ON @@ -226,13 +226,13 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, cycles = ceil((double)broken_total_lengths[i]/bytes_per_cycle); } } - + result_counts = (int *) malloc ( fh->f_num_aggrs * fh->f_procs_per_group * sizeof(int) ); if ( NULL == result_counts ) { ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_comm_time = MPI_Wtime(); #endif @@ -251,7 +251,7 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, end_comm_time = MPI_Wtime(); comm_time += (end_comm_time - start_comm_time); #endif - + /************************************************************* *** 4. Allgather the offset/lengths array from all processes *************************************************************/ @@ -273,14 +273,14 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + displs[0] = 0; total_fview_count = (uint32_t) aggr_data[i]->fview_count[0]; for (j=1 ; jf_procs_per_group ; j++) { total_fview_count += aggr_data[i]->fview_count[j]; displs[j] = displs[j-1] + aggr_data[i]->fview_count[j-1]; } - + #if DEBUG_ON printf("total_fview_count : %d\n", total_fview_count); if (fh->f_aggr_list[i] == fh->f_rank) { @@ -293,7 +293,7 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, } } #endif - + /* allocate the global iovec */ if (0 != total_fview_count) { aggr_data[i]->global_iov_array = (struct iovec*) malloc (total_fview_count * @@ -302,9 +302,9 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, opal_output(1, "OUT OF MEMORY\n"); ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; - } + } } - + #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_comm_time = MPI_Wtime(); #endif @@ -326,7 +326,7 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, end_comm_time = MPI_Wtime(); comm_time += (end_comm_time - start_comm_time); #endif - + /**************************************************************************************** *** 5. Sort the global offset/lengths list based on the offsets. *** The result of the sort operation is the 'sorted', an integer array, @@ -344,18 +344,18 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, } ompi_fcoll_base_sort_iovec (aggr_data[i]->global_iov_array, total_fview_count, aggr_data[i]->sorted); } - + if (NULL != local_iov_array){ free(local_iov_array); local_iov_array = NULL; } - + if (NULL != displs){ free(displs); displs=NULL; } - - + + #if DEBUG_ON if (fh->f_aggr_list[i] == fh->f_rank) { uint32_t tv=0; @@ -371,9 +371,9 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, *** 6. Determine the number of cycles required to execute this *** operation *************************************************************/ - + aggr_data[i]->bytes_per_cycle = bytes_per_cycle; - + if (fh->f_aggr_list[i] == fh->f_rank) { aggr_data[i]->disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); if (NULL == aggr_data[i]->disp_index) { @@ -388,21 +388,21 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + aggr_data[i]->blocklen_per_process = (int **)calloc (fh->f_procs_per_group, sizeof (int*)); if (NULL == aggr_data[i]->blocklen_per_process) { opal_output (1, "OUT OF MEMORY\n"); ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + aggr_data[i]->displs_per_process = (MPI_Aint **)calloc (fh->f_procs_per_group, sizeof (MPI_Aint*)); if (NULL == aggr_data[i]->displs_per_process) { opal_output (1, "OUT OF MEMORY\n"); ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + if (use_accelerator_buffer) { opal_output_verbose(10, ompi_fcoll_base_framework.framework_output, "Allocating GPU device buffer for aggregation\n"); @@ -429,10 +429,10 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, goto exit; } } - - aggr_data[i]->recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * + + aggr_data[i]->recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); - aggr_data[i]->prev_recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * + aggr_data[i]->prev_recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); if (NULL == aggr_data[i]->recvtype || NULL == aggr_data[i]->prev_recvtype) { opal_output (1, "OUT OF MEMORY\n"); @@ -444,7 +444,7 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, aggr_data[i]->prev_recvtype[l] = MPI_DATATYPE_NULL; } } - + #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_exch = MPI_Wtime(); #endif @@ -553,7 +553,7 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, } } } - + #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN end_exch = MPI_Wtime(); exch_write += end_exch - start_exch; @@ -790,7 +790,7 @@ static int shuffle_init (int index, int num_cycles, int aggregator, int rank, *** by each process *****************************************************************/ mca_fcoll_vulcan_calc_blocklen_disps (data, aggregator, rank, &bytes_sent); - + /************************************************************************* *** 7d. Calculate the displacement on where to put the data *************************************************************************/ @@ -833,13 +833,13 @@ static int shuffle_init (int index, int num_cycles, int aggregator, int rank, reqs[i] = MPI_REQUEST_NULL; if (0 < data->disp_index[i]) { ompi_datatype_create_hindexed(data->disp_index[i], - data->blocklen_per_process[i], - data->displs_per_process[i], + OMPI_COUNT_ARRAY_CREATE(data->blocklen_per_process[i]), + OMPI_DISP_ARRAY_CREATE(data->displs_per_process[i]), MPI_BYTE, &data->recvtype[i]); ompi_datatype_commit(&data->recvtype[i]); opal_datatype_type_size(&data->recvtype[i]->super, &datatype_size); - + if (datatype_size) { ret = MCA_PML_CALL(irecv(data->global_buf, 1, @@ -909,8 +909,8 @@ static int shuffle_init (int index, int num_cycles, int aggregator, int rank, if ( 0 <= block_index ) { ompi_datatype_create_hindexed(block_index+1, - blocklength_proc, - displs_proc, + OMPI_COUNT_ARRAY_CREATE(blocklength_proc), + OMPI_DISP_ARRAY_CREATE(displs_proc), MPI_BYTE, &newType); ompi_datatype_commit(&newType); @@ -958,7 +958,7 @@ static int shuffle_init (int index, int num_cycles, int aggregator, int rank, return ret; } - + int mca_fcoll_vulcan_minmax (ompio_file_t *fh, struct iovec *iov, int iov_count, int num_aggregators, long *new_stripe_size) { long min, max, globalmin, globalmax; @@ -974,7 +974,7 @@ int mca_fcoll_vulcan_minmax (ompio_file_t *fh, struct iovec *iov, int iov_count, } fh->f_comm->c_coll->coll_allreduce ( &min, &globalmin, 1, MPI_LONG, MPI_MIN, fh->f_comm, fh->f_comm->c_coll->coll_allreduce_module); - + fh->f_comm->c_coll->coll_allreduce ( &max, &globalmax, 1, MPI_LONG, MPI_MAX, fh->f_comm, fh->f_comm->c_coll->coll_allreduce_module); @@ -988,23 +988,23 @@ int mca_fcoll_vulcan_minmax (ompio_file_t *fh, struct iovec *iov, int iov_count, return OMPI_SUCCESS; } -int mca_fcoll_vulcan_break_file_view ( struct iovec *mem_iov, int mem_count, - struct iovec *file_iov, int file_count, +int mca_fcoll_vulcan_break_file_view ( struct iovec *mem_iov, int mem_count, + struct iovec *file_iov, int file_count, struct iovec ***ret_broken_mem_iovs, int **ret_broken_mem_counts, - struct iovec ***ret_broken_file_iovs, int **ret_broken_file_counts, + struct iovec ***ret_broken_file_iovs, int **ret_broken_file_counts, MPI_Aint **ret_broken_total_lengths, int stripe_count, size_t stripe_size) { int i, j, ret=OMPI_SUCCESS; - struct iovec **broken_mem_iovs=NULL; + struct iovec **broken_mem_iovs=NULL; int *broken_mem_counts=NULL; - struct iovec **broken_file_iovs=NULL; + struct iovec **broken_file_iovs=NULL; int *broken_file_counts=NULL; MPI_Aint *broken_total_lengths=NULL; int **block=NULL, **max_lengths=NULL; - - broken_mem_iovs = (struct iovec **) malloc ( stripe_count * sizeof(struct iovec *)); - broken_file_iovs = (struct iovec **) malloc ( stripe_count * sizeof(struct iovec *)); + + broken_mem_iovs = (struct iovec **) malloc ( stripe_count * sizeof(struct iovec *)); + broken_file_iovs = (struct iovec **) malloc ( stripe_count * sizeof(struct iovec *)); if ( NULL == broken_mem_iovs || NULL == broken_file_iovs ) { ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; @@ -1013,7 +1013,7 @@ int mca_fcoll_vulcan_break_file_view ( struct iovec *mem_iov, int mem_count, broken_mem_iovs[i] = (struct iovec*) calloc (1, sizeof(struct iovec )); broken_file_iovs[i] = (struct iovec*) calloc (1, sizeof(struct iovec )); } - + broken_mem_counts = (int *) calloc ( stripe_count, sizeof(int)); broken_file_counts = (int *) calloc ( stripe_count, sizeof(int)); broken_total_lengths = (MPI_Aint *) calloc ( stripe_count, sizeof(MPI_Aint)); @@ -1029,7 +1029,7 @@ int mca_fcoll_vulcan_break_file_view ( struct iovec *mem_iov, int mem_count, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + for ( i=0; i= max_lengths[owner][0] ) { broken_mem_iovs[owner] = (struct iovec*) realloc ( broken_mem_iovs[owner], - mem_count * block[owner][0] * + mem_count * block[owner][0] * sizeof(struct iovec )); max_lengths[owner][0] = mem_count * block[owner][0]; block[owner][0]++; @@ -1141,7 +1141,7 @@ int mca_fcoll_vulcan_break_file_view ( struct iovec *mem_iov, int mem_count, broken_file_counts[owner]++; if ( broken_file_counts[owner] >= max_lengths[owner][1] ) { broken_file_iovs[owner] = (struct iovec*) realloc ( broken_file_iovs[owner], - file_count * block[owner][1] * + file_count * block[owner][1] * sizeof(struct iovec )); max_lengths[owner][1] = file_count * block[owner][1]; block[owner][1]++; @@ -1152,9 +1152,9 @@ int mca_fcoll_vulcan_break_file_view ( struct iovec *mem_iov, int mem_count, } while( temp_len > 0 ); i++; - } + } + - /* Step 2: recalculating the total lengths per aggregator */ for ( i=0; i< stripe_count; i++ ) { for ( j=0; jf_size; i++ ) { fh->f_procs_in_group[i]=i; } - + return ret; -} +} void mca_fcoll_vulcan_calc_blocklen_disps (mca_io_ompio_aggregator_data *data, int aggregator, int rank, size_t *bytes_comm) diff --git a/ompi/mca/io/ompio/io_ompio.c b/ompi/mca/io/ompio/io_ompio.c index 506b6897e46..2e13e039f8f 100644 --- a/ompi/mca/io/ompio/io_ompio.c +++ b/ompi/mca/io/ompio/io_ompio.c @@ -183,8 +183,8 @@ int ompi_io_ompio_generate_current_file_view (struct ompio_file_t *fh, d[i] -= base; } ompi_datatype_create_struct (3, - blocklen, - d, + OMPI_COUNT_ARRAY_CREATE(blocklen), + OMPI_DISP_ARRAY_CREATE(d), types, &io_array_type); ompi_datatype_commit (&io_array_type); @@ -563,9 +563,9 @@ int mca_io_ompio_get_mca_parameter_value ( char *mca_parameter_name, int name_le opal_output (1, "Error in mca_io_ompio_get_mca_parameter_value: unknown parameter name"); } - /* Using here OMPI_ERROR_MAX instead of OMPI_ERROR, since -1 (which is OMPI_ERROR) - ** is a valid value for some mca parameters, indicating that the user did not set - ** that parameter value + /* Using here OMPI_ERROR_MAX instead of OMPI_ERROR, since -1 (which is OMPI_ERROR) + ** is a valid value for some mca parameters, indicating that the user did not set + ** that parameter value */ return OMPI_ERR_MAX; } diff --git a/ompi/mca/io/ompio/io_ompio_file_set_view.c b/ompi/mca/io/ompio/io_ompio_file_set_view.c index 5a4f8136295..a3e508f6946 100644 --- a/ompi/mca/io/ompio/io_ompio_file_set_view.c +++ b/ompi/mca/io/ompio/io_ompio_file_set_view.c @@ -48,8 +48,8 @@ static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newt ompi_datatype_destroy (&type); return MPI_ERR_INTERN; } - - ompi_datatype_set_args( type, 0, NULL, 0, NULL, 1, &oldtype, MPI_COMBINER_DUP ); + + ompi_datatype_set_args( type, 0, 0, NULL, 0, OMPI_DISP_ARRAY_NULL, 1, &oldtype, MPI_COMBINER_DUP ); *newtype = type; return OMPI_SUCCESS; @@ -83,8 +83,8 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp, // MPI_DISPLACEMENT_CURRENT is only valid if amode is MPI_MODE_SEQUENTIAL return MPI_ERR_DISP; } - - + + OPAL_THREAD_LOCK(&fp->f_lock); ret = mca_common_ompio_set_view(fh, disp, etype, filetype, datarep, info); OPAL_THREAD_UNLOCK(&fp->f_lock); diff --git a/ompi/mpi/c/type_contiguous.c.in b/ompi/mpi/c/type_contiguous.c.in index cc88f3cab77..13ef58d6ee4 100644 --- a/ompi/mpi/c/type_contiguous.c.in +++ b/ompi/mpi/c/type_contiguous.c.in @@ -32,11 +32,6 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -/* - * TODO:BIGCOUNT this file will need to be updated once - * the datatype framework supports bigcount - */ - PROTOTYPE ERROR_CLASS type_contiguous(COUNT count, DATATYPE oldtype, @@ -56,12 +51,6 @@ PROTOTYPE ERROR_CLASS type_contiguous(COUNT count, } else if( count < 0 ) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COUNT, FUNC_NAME); } -#if OMPI_BIGCOUNT_SRC - OMPI_CHECK_MPI_COUNT_INT_CONVERSION_OVERFLOW(rc, count); - if (OMPI_SUCCESS != rc) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(rc, FUNC_NAME); - } -#endif } rc = ompi_datatype_create_contiguous( count, oldtype, newtype ); @@ -69,13 +58,11 @@ PROTOTYPE ERROR_CLASS type_contiguous(COUNT count, /* data description */ { -#if OMPI_BIGCOUNT_SRC - int icount = (int)count; - const int* a_i[1] = {&icount}; -#else - const int* a_i[1] = {&count}; -#endif - ompi_datatype_set_args( *newtype, 1, a_i, 0, NULL, 1, &oldtype, MPI_COMBINER_CONTIGUOUS ); + ompi_count_array_t a_i[1] = {OMPI_COUNT_ARRAY_CREATE(&count)}; + ompi_datatype_set_args( *newtype, + (sizeof(count) == sizeof(size_t)) ? 0 : 1, + (sizeof(count) == sizeof(size_t)) ? 1 : 0, + a_i, 0, OMPI_COUNT_ARRAY_NULL, 1, &oldtype, MPI_COMBINER_CONTIGUOUS ); } OMPI_ERRHANDLER_NOHANDLE_RETURN(rc, rc, FUNC_NAME ); diff --git a/ompi/mpi/c/type_create_darray.c.in b/ompi/mpi/c/type_create_darray.c.in index fcf00c8fe14..87e3d8b0066 100644 --- a/ompi/mpi/c/type_create_darray.c.in +++ b/ompi/mpi/c/type_create_darray.c.in @@ -33,11 +33,6 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -/* - * TODO:BIGCOUNT this file will need to be updated once - * the datatype framework supports bigcount - */ - PROTOTYPE ERROR_CLASS type_create_darray(INT size, INT rank, @@ -51,7 +46,6 @@ PROTOTYPE ERROR_CLASS type_create_darray(INT size, DATATYPE_OUT newtype) { int i, rc; - int *igsize_array = NULL; MEMCHECKER( memchecker_datatype(oldtype); @@ -75,14 +69,6 @@ PROTOTYPE ERROR_CLASS type_create_darray(INT size, return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, FUNC_NAME); } if( ndims > 0 ) { -#if OMPI_BIGCOUNT_SRC - for( i = 0; i < ndims; i++ ) { - OMPI_CHECK_MPI_COUNT_INT_CONVERSION_OVERFLOW(rc, gsize_array[i]); - if (OMPI_SUCCESS != rc) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(rc, FUNC_NAME); - } - } -#endif for( i = 0; i < ndims; i++ ) { if( (MPI_DISTRIBUTE_BLOCK != distrib_array[i]) && (MPI_DISTRIBUTE_CYCLIC != distrib_array[i]) && @@ -104,30 +90,32 @@ PROTOTYPE ERROR_CLASS type_create_darray(INT size, } } -#if OMPI_BIGCOUNT_SRC - igsize_array = (int *)malloc(ndims * sizeof(int)); - if (NULL == igsize_array) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - for (int ii=0;iiname, sizeof(datatype->name), "COMBINER %s", (*newtype)->name); - a_i[0] = &p; - a_i[1] = &r; - ompi_datatype_set_args( datatype, 2, a_i, 0, NULL, 0, NULL, MPI_COMBINER_F90_COMPLEX ); + ompi_count_array_t a_i[2] = {OMPI_COUNT_ARRAY_CREATE(&p), OMPI_COUNT_ARRAY_CREATE(&r)}; + ompi_datatype_set_args( datatype, 2, 0, a_i, 0, OMPI_DISP_ARRAY_NULL, 0, NULL, MPI_COMBINER_F90_COMPLEX ); rc = opal_hash_table_set_value_uint64( &ompi_mpi_f90_complex_hashtable, key, datatype ); if (OMPI_SUCCESS != rc) { diff --git a/ompi/mpi/c/type_create_f90_integer.c.in b/ompi/mpi/c/type_create_f90_integer.c.in index 222794a9884..379369293e4 100644 --- a/ompi/mpi/c/type_create_f90_integer.c.in +++ b/ompi/mpi/c/type_create_f90_integer.c.in @@ -75,7 +75,6 @@ PROTOTYPE ERROR_CLASS type_create_f90_integer(INT r, DATATYPE_OUT newtype) if( *newtype != &ompi_mpi_datatype_null.dt ) { ompi_datatype_t* datatype; - const int* a_i[1]; int rc; if( OPAL_SUCCESS == opal_hash_table_get_value_uint32( &ompi_mpi_f90_integer_hashtable, @@ -97,8 +96,8 @@ PROTOTYPE ERROR_CLASS type_create_f90_integer(INT r, DATATYPE_OUT newtype) snprintf(datatype->name, sizeof(datatype->name), "COMBINER %s", (*newtype)->name); - a_i[0] = &r; - ompi_datatype_set_args( datatype, 1, a_i, 0, NULL, 0, NULL, MPI_COMBINER_F90_INTEGER ); + ompi_count_array_t a_i[1] = {OMPI_COUNT_ARRAY_CREATE(&r)}; + ompi_datatype_set_args( datatype, 1, 0, a_i, 0, OMPI_DISP_ARRAY_NULL, 0, NULL, MPI_COMBINER_F90_INTEGER ); rc = opal_hash_table_set_value_uint32( &ompi_mpi_f90_integer_hashtable, r, datatype ); if (OMPI_SUCCESS != rc) { diff --git a/ompi/mpi/c/type_create_f90_real.c.in b/ompi/mpi/c/type_create_f90_real.c.in index e7d2e28bde9..c9e5d61be8a 100644 --- a/ompi/mpi/c/type_create_f90_real.c.in +++ b/ompi/mpi/c/type_create_f90_real.c.in @@ -84,7 +84,6 @@ PROTOTYPE ERROR_CLASS type_create_f90_real(INT p, INT r, DATATYPE_OUT newtype) if( *newtype != &ompi_mpi_datatype_null.dt ) { ompi_datatype_t* datatype; - const int* a_i[2] = {&p, &r}; int rc; key = (((uint64_t)p_key) << 32) | ((uint64_t)r_key); @@ -107,7 +106,8 @@ PROTOTYPE ERROR_CLASS type_create_f90_real(INT p, INT r, DATATYPE_OUT newtype) snprintf(datatype->name, sizeof(datatype->name), "COMBINER %s", (*newtype)->name); - ompi_datatype_set_args( datatype, 2, a_i, 0, NULL, 0, NULL, MPI_COMBINER_F90_REAL ); + ompi_count_array_t a_i[2] = {OMPI_COUNT_ARRAY_CREATE(&p), OMPI_COUNT_ARRAY_CREATE(&r)}; + ompi_datatype_set_args( datatype, 2, 0, a_i, 0, OMPI_DISP_ARRAY_NULL, 0, NULL, MPI_COMBINER_F90_REAL ); rc = opal_hash_table_set_value_uint64( &ompi_mpi_f90_real_hashtable, key, datatype ); if (OMPI_SUCCESS != rc) { diff --git a/ompi/mpi/c/type_create_hindexed.c.in b/ompi/mpi/c/type_create_hindexed.c.in index 79e03f91bb2..1852d7ed442 100644 --- a/ompi/mpi/c/type_create_hindexed.c.in +++ b/ompi/mpi/c/type_create_hindexed.c.in @@ -33,12 +33,6 @@ #include "ompi/memchecker.h" -/* - * TODO:BIGCOUNT this file will need to be updated once - * the datatype framework supports bigcount - */ - - PROTOTYPE ERROR_CLASS type_create_hindexed(COUNT count, COUNT_ARRAY array_of_blocklengths, AINT_COUNT_ARRAY array_of_displacements, @@ -46,8 +40,6 @@ PROTOTYPE ERROR_CLASS type_create_hindexed(COUNT count, DATATYPE_OUT newtype) { int rc, i; - int *iarray_of_blocklengths = NULL; - MPI_Aint *iarray_of_displacements = NULL; MEMCHECKER( memchecker_datatype(oldtype); @@ -73,34 +65,10 @@ PROTOTYPE ERROR_CLASS type_create_hindexed(COUNT count, FUNC_NAME ); } } -#if OMPI_BIGCOUNT_SRC - OMPI_CHECK_MPI_COUNT_INT_CONVERSION_OVERFLOW(rc, count); - if (OMPI_SUCCESS != rc) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(rc, FUNC_NAME); - } -#endif - } - -#if OMPI_BIGCOUNT_SRC - iarray_of_blocklengths = (int *)malloc(count * sizeof(int)); - if (NULL == iarray_of_blocklengths) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - iarray_of_displacements = (MPI_Aint *)malloc(count * sizeof(MPI_Aint)); - if (NULL == iarray_of_displacements) { - free( iarray_of_blocklengths); - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - for (i = 0; i < (int)count; i++) { - iarray_of_blocklengths[i] = (int)array_of_blocklengths[i]; - iarray_of_displacements[i] = (MPI_Aint)array_of_displacements[i]; } -#else - iarray_of_blocklengths = (int *)array_of_blocklengths; - iarray_of_displacements = (MPI_Aint *)array_of_displacements; -#endif - rc = ompi_datatype_create_hindexed( count, iarray_of_blocklengths, iarray_of_displacements, + rc = ompi_datatype_create_hindexed( count, OMPI_COUNT_ARRAY_CREATE(array_of_blocklengths), + OMPI_COUNT_ARRAY_CREATE(array_of_displacements), oldtype, newtype ); if( rc != MPI_SUCCESS ) { ompi_datatype_destroy( newtype ); @@ -108,15 +76,14 @@ PROTOTYPE ERROR_CLASS type_create_hindexed(COUNT count, } /* data description */ { - const int* a_i[2] = {(int *)&count, iarray_of_blocklengths}; + ompi_count_array_t a_i[2] = {OMPI_COUNT_ARRAY_CREATE(&count), OMPI_COUNT_ARRAY_CREATE(array_of_blocklengths)}; - ompi_datatype_set_args( *newtype, count + 1, a_i, count, iarray_of_displacements, + ompi_datatype_set_args( *newtype, + (sizeof(count) != sizeof(size_t)) ? count + 1 : 0, + (sizeof(count) == sizeof(size_t)) ? count + 1 : 0, + a_i, count, OMPI_DISP_ARRAY_CREATE(array_of_displacements), 1, &oldtype, MPI_COMBINER_HINDEXED ); } -#if OMPI_BIGCOUNT_SRC - free(iarray_of_blocklengths); - free(iarray_of_displacements); -#endif return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_create_hindexed_block.c.in b/ompi/mpi/c/type_create_hindexed_block.c.in index f7bbadd3631..72f4d461638 100644 --- a/ompi/mpi/c/type_create_hindexed_block.c.in +++ b/ompi/mpi/c/type_create_hindexed_block.c.in @@ -25,10 +25,6 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -/* - * TODO:BIGCOUNT this file will need to be updated once - * the datatype framework supports bigcount - */ PROTOTYPE ERROR_CLASS type_create_hindexed_block(COUNT count, COUNT blocklength, @@ -37,7 +33,6 @@ PROTOTYPE ERROR_CLASS type_create_hindexed_block(COUNT count, DATATYPE_OUT newtype) { int rc; - MPI_Aint *iarray_of_displacements = NULL; MEMCHECKER( memchecker_datatype(oldtype); @@ -56,40 +51,23 @@ PROTOTYPE ERROR_CLASS type_create_hindexed_block(COUNT count, return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_TYPE, FUNC_NAME ); } -#if OMPI_BIGCOUNT_SRC - OMPI_CHECK_MPI_COUNT_INT_CONVERSION_OVERFLOW(rc, count); - if (OMPI_SUCCESS != rc) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(rc, FUNC_NAME); - } -#endif } -#if OMPI_BIGCOUNT_SRC - iarray_of_displacements = (MPI_Aint *)malloc(count * sizeof(MPI_Aint)); - if (NULL == iarray_of_displacements) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - for (int ii = 0; ii < (int)count; ii++) { - iarray_of_displacements[ii] = (MPI_Aint)array_of_displacements[ii]; - } -#else - iarray_of_displacements = (MPI_Aint *)array_of_displacements; -#endif - - rc = ompi_datatype_create_hindexed_block( count, blocklength, iarray_of_displacements, + rc = ompi_datatype_create_hindexed_block( count, blocklength, OMPI_DISP_ARRAY_CREATE(array_of_displacements), oldtype, newtype ); if( rc != MPI_SUCCESS ) { ompi_datatype_destroy( newtype ); OMPI_ERRHANDLER_NOHANDLE_RETURN( rc, rc, FUNC_NAME ); } { - const int* a_i[2] = {(int *)&count, (int *)&blocklength}; - ompi_datatype_set_args( *newtype, 2, a_i, count, iarray_of_displacements, 1, &oldtype, + ompi_count_array_t a_i[2] = {OMPI_COUNT_ARRAY_CREATE(&count), OMPI_COUNT_ARRAY_CREATE(&blocklength)}; + + ompi_datatype_set_args( *newtype, + (sizeof(count) != sizeof(size_t)) ? 2 : 0, + (sizeof(count) == sizeof(size_t)) ? 2 : 0, + a_i, count, OMPI_DISP_ARRAY_CREATE(array_of_displacements), 1, &oldtype, MPI_COMBINER_HINDEXED_BLOCK ); } -#if OMPI_BIGCOUNT_SRC - free(iarray_of_displacements); -#endif return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_create_hvector.c.in b/ompi/mpi/c/type_create_hvector.c.in index cb93050d05c..5731770da97 100644 --- a/ompi/mpi/c/type_create_hvector.c.in +++ b/ompi/mpi/c/type_create_hvector.c.in @@ -32,10 +32,6 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -/* - * TODO:BIGCOUNT this file will need to be updated once - * the datatype framework supports bigcount - */ PROTOTYPE ERROR_CLASS type_create_hvector(COUNT count, COUNT blocklength, @@ -62,12 +58,6 @@ PROTOTYPE ERROR_CLASS type_create_hvector(COUNT count, return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_TYPE, FUNC_NAME ); } -#if OMPI_BIGCOUNT_SRC - OMPI_CHECK_MPI_COUNT_INT_CONVERSION_OVERFLOW(rc, count); - if (OMPI_SUCCESS != rc) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(rc, FUNC_NAME); - } -#endif } rc = ompi_datatype_create_hvector ( count, blocklength, stride, oldtype, @@ -75,10 +65,13 @@ PROTOTYPE ERROR_CLASS type_create_hvector(COUNT count, OMPI_ERRHANDLER_NOHANDLE_CHECK(rc, rc, FUNC_NAME ); { - const int* a_i[2] = {(int *)&count, (int *)&blocklength}; - MPI_Aint a_a[1] = {stride}; + ompi_count_array_t a_i[2] = {OMPI_COUNT_ARRAY_CREATE(&count), OMPI_COUNT_ARRAY_CREATE(&blocklength)}; + ompi_disp_array_t a_a = OMPI_DISP_ARRAY_CREATE(&stride); - ompi_datatype_set_args( *newtype, 2, a_i, 1, a_a, 1, &oldtype, MPI_COMBINER_HVECTOR ); + ompi_datatype_set_args( *newtype, + (sizeof(count) != sizeof(size_t)) ? 2 : 0, + (sizeof(count) == sizeof(size_t)) ? 2 : 0, + a_i, 1, a_a, 1, &oldtype, MPI_COMBINER_HVECTOR ); } return MPI_SUCCESS; diff --git a/ompi/mpi/c/type_create_indexed_block.c.in b/ompi/mpi/c/type_create_indexed_block.c.in index 24732bf5086..c6c5e309f18 100644 --- a/ompi/mpi/c/type_create_indexed_block.c.in +++ b/ompi/mpi/c/type_create_indexed_block.c.in @@ -32,11 +32,6 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -/* - * TODO:BIGCOUNT this file will need to be updated once - * the datatype framework supports bigcount - */ - PROTOTYPE ERROR_CLASS type_create_indexed_block(COUNT count, COUNT blocklength, @@ -45,7 +40,6 @@ PROTOTYPE ERROR_CLASS type_create_indexed_block(COUNT count, DATATYPE_OUT newtype) { int rc; - int *iarray_of_displacements = NULL; MEMCHECKER( memchecker_datatype(oldtype); @@ -66,32 +60,23 @@ PROTOTYPE ERROR_CLASS type_create_indexed_block(COUNT count, } } -#if OMPI_BIGCOUNT_SRC - iarray_of_displacements = (int *)malloc(count * sizeof(int)); - if (NULL == iarray_of_displacements) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - for (int ii = 0; ii < (int)count; ii++) { - iarray_of_displacements[ii] = (int)array_of_displacements[ii]; - } -#else - iarray_of_displacements = (int *)array_of_displacements; -#endif - rc = ompi_datatype_create_indexed_block( count, blocklength, iarray_of_displacements, + rc = ompi_datatype_create_indexed_block( count, blocklength, OMPI_COUNT_ARRAY_CREATE(array_of_displacements), oldtype, newtype ); if( rc != MPI_SUCCESS ) { ompi_datatype_destroy( newtype ); OMPI_ERRHANDLER_NOHANDLE_RETURN( rc, rc, FUNC_NAME ); } { - const int* a_i[3] = {(int *)&count, (int *)&blocklength, iarray_of_displacements}; + ompi_count_array_t a_i[3] = {OMPI_COUNT_ARRAY_CREATE(&count), + OMPI_COUNT_ARRAY_CREATE(&blocklength), + OMPI_COUNT_ARRAY_CREATE(array_of_displacements)}; - ompi_datatype_set_args( *newtype, 2 + count, a_i, 0, NULL, 1, &oldtype, + ompi_datatype_set_args( *newtype, + (sizeof(count) != sizeof(size_t)) ? count + 2 : 0, + (sizeof(count) == sizeof(size_t)) ? count + 2 : 0, + a_i, 0, OMPI_DISP_ARRAY_NULL, 1, &oldtype, MPI_COMBINER_INDEXED_BLOCK ); } -#if OMPI_BIGCOUNT_SRC - free(iarray_of_displacements); -#endif return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_create_resized.c.in b/ompi/mpi/c/type_create_resized.c.in index 9e07109ec7b..da6719703d2 100644 --- a/ompi/mpi/c/type_create_resized.c.in +++ b/ompi/mpi/c/type_create_resized.c.in @@ -56,10 +56,8 @@ PROTOTYPE ERROR_CLASS type_create_resized(DATATYPE oldtype, } { - MPI_Aint a_a[2]; - a_a[0] = lb; - a_a[1] = extent; - ompi_datatype_set_args( *newtype, 0, NULL, 2, a_a, 1, &oldtype, MPI_COMBINER_RESIZED ); + MPI_Count a_a[2] = {lb, extent}; + ompi_datatype_set_args( *newtype, 0, 0, NULL, 2, OMPI_DISP_ARRAY_CREATE(a_a), 1, &oldtype, MPI_COMBINER_RESIZED ); } return MPI_SUCCESS; diff --git a/ompi/mpi/c/type_create_struct.c.in b/ompi/mpi/c/type_create_struct.c.in index accea45f603..c18eb6717d8 100644 --- a/ompi/mpi/c/type_create_struct.c.in +++ b/ompi/mpi/c/type_create_struct.c.in @@ -32,20 +32,13 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -/* - * TODO:BIGCOUNT this file will need to be updated once - * the datatype framework supports bigcount - */ - PROTOTYPE ERROR_CLASS type_create_struct(COUNT count, COUNT_ARRAY array_of_blocklengths, AINT_COUNT_ARRAY array_of_displacements, DATATYPE_ARRAY array_of_types, DATATYPE_OUT newtype) { - int i, rc, icount = (int)count; - int *iarray_of_blocklengths = NULL; - MPI_Aint *iarray_of_displacements = NULL; + int i, rc; if ( count > 0 ) { for ( i = 0; i < count; i++ ) { @@ -77,54 +70,25 @@ PROTOTYPE ERROR_CLASS type_create_struct(COUNT count, FUNC_NAME); } } -#if OMPI_BIGCOUNT_SRC - OMPI_CHECK_MPI_COUNT_INT_CONVERSION_OVERFLOW(rc, count); - if (OMPI_SUCCESS != rc) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(rc, FUNC_NAME); - } -#endif } -#if OMPI_BIGCOUNT_SRC - iarray_of_blocklengths = (int *)malloc(count * sizeof(int)); - if (NULL == iarray_of_blocklengths) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - iarray_of_displacements = (MPI_Aint *)malloc(count * sizeof(MPI_Aint)); - if (NULL == iarray_of_displacements) { - free(iarray_of_blocklengths); - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - for (int ii = 0; ii < (int)count; ii++) { - iarray_of_blocklengths[ii] = (int)array_of_blocklengths[ii]; - iarray_of_displacements[ii] = (MPI_Aint)array_of_displacements[ii]; - } -#else - iarray_of_blocklengths = (int *)array_of_blocklengths; - iarray_of_displacements = (MPI_Aint *)array_of_displacements; -#endif - rc = ompi_datatype_create_struct( icount, iarray_of_blocklengths, iarray_of_displacements, + rc = ompi_datatype_create_struct( count, OMPI_COUNT_ARRAY_CREATE(array_of_blocklengths), + OMPI_DISP_ARRAY_CREATE(array_of_displacements), array_of_types, newtype ); if( rc != MPI_SUCCESS ) { ompi_datatype_destroy( newtype ); -#if OMPI_BIGCOUNT_SRC - free(iarray_of_blocklengths); - free(iarray_of_displacements); -#endif - OMPI_ERRHANDLER_NOHANDLE_RETURN( rc, + OMPI_ERRHANDLER_NOHANDLE_RETURN( rc, rc, FUNC_NAME ); } { - const int* a_i[2] = {(int *)&icount, iarray_of_blocklengths}; - - ompi_datatype_set_args( *newtype, icount + 1, a_i, icount, iarray_of_displacements, - icount, array_of_types, MPI_COMBINER_STRUCT ); + ompi_count_array_t a_i[2] = {OMPI_COUNT_ARRAY_CREATE(&count), OMPI_COUNT_ARRAY_CREATE(array_of_blocklengths)}; + ompi_datatype_set_args( *newtype, + (sizeof(count) != sizeof(size_t)) ? count + 1 : 0, + (sizeof(count) == sizeof(size_t)) ? count + 1 : 0, + a_i, count, OMPI_DISP_ARRAY_CREATE(array_of_displacements), + count, array_of_types, MPI_COMBINER_STRUCT ); } -#if OMPI_BIGCOUNT_SRC - free(iarray_of_blocklengths); - free(iarray_of_displacements); -#endif return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_create_subarray.c.in b/ompi/mpi/c/type_create_subarray.c.in index d25cde247c4..caa5fd47dc5 100644 --- a/ompi/mpi/c/type_create_subarray.c.in +++ b/ompi/mpi/c/type_create_subarray.c.in @@ -33,10 +33,6 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -/* - * TODO:BIGCOUNT this file will need to be updated once - * the datatype framework supports bigcount - */ PROTOTYPE ERROR_CLASS type_create_subarray(INT ndims, COUNT_ARRAY size_array, @@ -47,9 +43,6 @@ PROTOTYPE ERROR_CLASS type_create_subarray(INT ndims, DATATYPE_OUT newtype) { int32_t i, rc; - int *isize_array = NULL; - int *isubsize_array = NULL; - int *istart_array = NULL; MEMCHECKER( memchecker_datatype(oldtype); @@ -67,20 +60,6 @@ PROTOTYPE ERROR_CLASS type_create_subarray(INT ndims, return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, FUNC_NAME); } for( i = 0; i < ndims; i++ ) { -#if OMPI_BIGCOUNT_SRC - OMPI_CHECK_MPI_COUNT_INT_CONVERSION_OVERFLOW(rc, size_array[i]); - if (OMPI_SUCCESS != rc) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(rc, FUNC_NAME); - } - OMPI_CHECK_MPI_COUNT_INT_CONVERSION_OVERFLOW(rc, subsize_array[i]); - if (OMPI_SUCCESS != rc) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(rc, FUNC_NAME); - } - OMPI_CHECK_MPI_COUNT_INT_CONVERSION_OVERFLOW(rc, start_array[i]); - if (OMPI_SUCCESS != rc) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(rc, FUNC_NAME); - } -#endif if( (subsize_array[i] < 1) || (subsize_array[i] > size_array[i]) ) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, FUNC_NAME); } else if( (start_array[i] < 0) || (start_array[i] > (size_array[i] - subsize_array[i])) ) { @@ -89,45 +68,26 @@ PROTOTYPE ERROR_CLASS type_create_subarray(INT ndims, } } -#if OMPI_BIGCOUNT_SRC - isize_array = (int *)malloc(ndims * sizeof(int)); - if (NULL == isize_array) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - isubsize_array = (int *)malloc(ndims * sizeof(int)); - if (NULL == isubsize_array) { - free(isize_array); - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - istart_array = (int *)malloc(ndims * sizeof(int)); - if (NULL == istart_array) { - free(isize_array); - free(isubsize_array); - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - for (int ii = 0; ii < ndims; ii++) { - isize_array[ii] = (int)size_array[ii]; - isubsize_array[ii] = (int)subsize_array[ii]; - istart_array[ii] = (int)start_array[ii]; - } -#else - isize_array = (int *)size_array; - isubsize_array = (int *)subsize_array; - istart_array = (int *)start_array; -#endif - rc = ompi_datatype_create_subarray( ndims, isize_array, isubsize_array, istart_array, + rc = ompi_datatype_create_subarray( ndims, OMPI_COUNT_ARRAY_CREATE(size_array), OMPI_COUNT_ARRAY_CREATE(subsize_array), + OMPI_COUNT_ARRAY_CREATE(start_array), order, oldtype, newtype); if( OMPI_SUCCESS == rc ) { - const int* a_i[5] = {&ndims, isize_array, isubsize_array, istart_array, &order}; - - ompi_datatype_set_args( *newtype, 3 * ndims + 2, a_i, 0, NULL, 1, &oldtype, + ompi_count_array_t a_i[5] = {OMPI_COUNT_ARRAY_CREATE(&ndims), + OMPI_COUNT_ARRAY_CREATE(size_array), + OMPI_COUNT_ARRAY_CREATE(subsize_array), + OMPI_COUNT_ARRAY_CREATE(start_array), + OMPI_COUNT_ARRAY_CREATE(&order)}; + size_t ci, cl; + if (sizeof(size_array[0]) == sizeof(size_t)) { + ci = 2; + cl = 3*ndims; + } else { + ci = 3*ndims + 2; + cl = 0; + } + ompi_datatype_set_args( *newtype, ci, cl, a_i, 0, OMPI_DISP_ARRAY_NULL, 1, &oldtype, MPI_COMBINER_SUBARRAY ); } -#if OMPI_BIGCOUNT_SRC - free(isize_array); - free(isubsize_array); - free(istart_array); -#endif OMPI_ERRHANDLER_NOHANDLE_RETURN(rc, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/type_dup.c.in b/ompi/mpi/c/type_dup.c.in index 41abec68692..d5d5156acd1 100644 --- a/ompi/mpi/c/type_dup.c.in +++ b/ompi/mpi/c/type_dup.c.in @@ -52,7 +52,7 @@ PROTOTYPE ERROR_CLASS type_dup (DATATYPE type, DATATYPE_OUT newtype) OMPI_ERRHANDLER_NOHANDLE_RETURN( ret, ret, FUNC_NAME ); } - ompi_datatype_set_args( *newtype, 0, NULL, 0, NULL, 1, &type, MPI_COMBINER_DUP ); + ompi_datatype_set_args( *newtype, 0, 0, NULL, 0, OMPI_DISP_ARRAY_NULL, 1, &type, MPI_COMBINER_DUP ); /* Copy all the old attributes, if there were any. This is done here (vs. ompi_datatype_duplicate()) because MPI_TYPE_DUP is the diff --git a/ompi/mpi/c/type_get_contents.c b/ompi/mpi/c/type_get_contents.c index b2998818d5d..bb42727cd79 100644 --- a/ompi/mpi/c/type_get_contents.c +++ b/ompi/mpi/c/type_get_contents.c @@ -65,9 +65,11 @@ int MPI_Type_get_contents(MPI_Datatype mtype, } } - rc = ompi_datatype_get_args( mtype, 1, &max_integers, array_of_integers, - &max_addresses, array_of_addresses, - &max_datatypes, array_of_datatypes, NULL ); + size_t ci, cl, ca, cd; + rc = ompi_datatype_get_args( mtype, 1, &ci, array_of_integers, + &cl, NULL, + &ca, array_of_addresses, + &cd, array_of_datatypes, NULL ); if( rc != MPI_SUCCESS ) { OMPI_ERRHANDLER_NOHANDLE_RETURN( MPI_ERR_INTERN, MPI_ERR_INTERN, FUNC_NAME ); diff --git a/ompi/mpi/c/type_get_contents_c.c b/ompi/mpi/c/type_get_contents_c.c index 6be23ccc4c4..822ede64507 100644 --- a/ompi/mpi/c/type_get_contents_c.c +++ b/ompi/mpi/c/type_get_contents_c.c @@ -67,10 +67,11 @@ int MPI_Type_get_contents_c(MPI_Datatype mtype, } } -/* TODO:BIGCOUNT: Need to embiggen ompi_datatype_get_args */ - rc = ompi_datatype_get_args( mtype, 1, (int *)&max_integers, array_of_integers, - (int *)&max_addresses, array_of_addresses, - (int *)&max_datatypes, array_of_datatypes, NULL ); + size_t ci = max_integers, cl = max_large_counts, ca = max_addresses, cd = max_datatypes; + rc = ompi_datatype_get_args( mtype, 1, &ci, array_of_integers, + &cl, array_of_large_counts, + &ca, array_of_addresses, + &cd, array_of_datatypes, NULL ); if( rc != MPI_SUCCESS ) { OMPI_ERRHANDLER_NOHANDLE_RETURN( MPI_ERR_INTERN, MPI_ERR_INTERN, FUNC_NAME ); diff --git a/ompi/mpi/c/type_get_envelope.c b/ompi/mpi/c/type_get_envelope.c index 2d6861ec5d0..efcf937f655 100644 --- a/ompi/mpi/c/type_get_envelope.c +++ b/ompi/mpi/c/type_get_envelope.c @@ -61,7 +61,16 @@ int MPI_Type_get_envelope(MPI_Datatype type, } } - rc = ompi_datatype_get_args( type, 0, num_integers, NULL, num_addresses, NULL, - num_datatypes, NULL, combiner ); + size_t ci, cl, ca, cd; + rc = ompi_datatype_get_args( type, 0, &ci, NULL, &cl, NULL, &ca, NULL, + &cd, NULL, combiner ); + /* error out if we have large counts or any of the parameters don't fit */ + if (OMPI_SUCCESS == rc && (ci > INT_MAX || cl > 0 || ca > INT_MAX || cd > INT_MAX)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_TYPE, + FUNC_NAME ); + } + *num_integers = (int)ci; + *num_addresses = (int)ca; + *num_datatypes = (int)cd; OMPI_ERRHANDLER_NOHANDLE_RETURN( rc, rc, FUNC_NAME ); } diff --git a/ompi/mpi/c/type_get_envelope.c.in b/ompi/mpi/c/type_get_envelope.c.in deleted file mode 100644 index e5395d796e4..00000000000 --- a/ompi/mpi/c/type_get_envelope.c.in +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2024 Triad National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mpi/c/bindings.h" -#include "ompi/runtime/params.h" -#include "ompi/communicator/communicator.h" -#include "ompi/errhandler/errhandler.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/memchecker.h" - -PROTOTYPE ERROR_CLASS Type_get_envelope(DATATYPE type, - INT_OUT num_integers, - INT_OUT num_addresses, - INT_OUT num_datatypes, - INT_OUT combiner) -{ - int rc; - - MEMCHECKER( - memchecker_datatype(type); - ); - - if( MPI_PARAM_CHECK ) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == type || MPI_DATATYPE_NULL == type) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_TYPE, - FUNC_NAME ); - } else if (NULL == num_integers || NULL == num_addresses || - NULL == num_datatypes || NULL == combiner) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, - FUNC_NAME ); - } - } - - rc = ompi_datatype_get_args( type, 0, num_integers, NULL, num_addresses, NULL, - num_datatypes, NULL, combiner ); - OMPI_ERRHANDLER_NOHANDLE_RETURN( rc, rc, FUNC_NAME ); -} diff --git a/ompi/mpi/c/type_get_envelope_c.c b/ompi/mpi/c/type_get_envelope_c.c index 24229e327cf..06b159b45db 100644 --- a/ompi/mpi/c/type_get_envelope_c.c +++ b/ompi/mpi/c/type_get_envelope_c.c @@ -61,9 +61,15 @@ int MPI_Type_get_envelope_c(MPI_Datatype type, } } -/* TODO:BIGCOUNT: Need to embiggen ompi_datatype_get_args */ - rc = ompi_datatype_get_args( type, 0, (int *)num_integers, NULL, (int *)num_addresses, NULL, - (int *)num_datatypes, NULL, combiner ); + size_t ci, cl, ca, cd; + rc = ompi_datatype_get_args( type, 0, &ci, NULL, &cl, NULL, &ca, NULL, + &cd, NULL, combiner ); + if( rc == MPI_SUCCESS ) { + *num_integers = (MPI_Count)ci; + *num_addresses = (MPI_Count)ca; + *num_large_counts = (MPI_Count)cl; + *num_datatypes = (MPI_Count)cd; + } OMPI_ERRHANDLER_NOHANDLE_RETURN( rc, rc, FUNC_NAME ); } diff --git a/ompi/mpi/c/type_indexed.c.in b/ompi/mpi/c/type_indexed.c.in index c3ff80c1b1c..79d8b897d49 100644 --- a/ompi/mpi/c/type_indexed.c.in +++ b/ompi/mpi/c/type_indexed.c.in @@ -16,6 +16,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2024 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2025 Stony Brook University. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,11 +33,6 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -/* - * TODO:BIGCOUNT this file will need to be updated once - * the datatype framework supports bigcount - */ - PROTOTYPE ERROR_CLASS type_indexed(COUNT count, COUNT_ARRAY array_of_blocklengths, @@ -45,8 +41,6 @@ PROTOTYPE ERROR_CLASS type_indexed(COUNT count, DATATYPE_OUT newtype) { int rc, i; - int *iarray_of_blocklengths; - int *iarray_of_displacements; MEMCHECKER( memchecker_datatype(oldtype); @@ -66,12 +60,6 @@ PROTOTYPE ERROR_CLASS type_indexed(COUNT count, return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, FUNC_NAME); } -#if OMPI_BIGCOUNT_SRC - OMPI_CHECK_MPI_COUNT_INT_CONVERSION_OVERFLOW(rc, count); - if (OMPI_SUCCESS != rc) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(rc, FUNC_NAME); - } -#endif for( i = 0; i < count; i++ ) { if( array_of_blocklengths[i] < 0 ) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, @@ -80,44 +68,26 @@ PROTOTYPE ERROR_CLASS type_indexed(COUNT count, } } -#if OMPI_BIGCOUNT_SRC - iarray_of_blocklengths = (int *)malloc(count * sizeof(int)); - if (NULL == iarray_of_blocklengths) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - iarray_of_displacements = (int *)malloc(count * sizeof(int)); - if (NULL == iarray_of_displacements) { - free(iarray_of_blocklengths); - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); - } - - for (int ii = 0; ii < (int)count; ii++) { - iarray_of_blocklengths[ii] = (int)array_of_blocklengths[ii]; - iarray_of_displacements[ii] = (int)array_of_displacements[ii]; - } -#else - iarray_of_blocklengths = (int *)array_of_blocklengths; - iarray_of_displacements = (int *)array_of_displacements; -#endif - rc = ompi_datatype_create_indexed ( count, iarray_of_blocklengths, - iarray_of_displacements, + rc = ompi_datatype_create_indexed ( count, OMPI_COUNT_ARRAY_CREATE(array_of_blocklengths), + OMPI_DISP_ARRAY_CREATE(array_of_displacements), oldtype, newtype ); if( rc != MPI_SUCCESS ) { ompi_datatype_destroy( newtype ); - OMPI_ERRHANDLER_NOHANDLE_RETURN( rc, + OMPI_ERRHANDLER_NOHANDLE_RETURN( rc, rc, FUNC_NAME ); } { - const int* a_i[3] = {(int *)&count, iarray_of_blocklengths, iarray_of_displacements}; + const ompi_count_array_t a_i[3] = {OMPI_COUNT_ARRAY_CREATE(&count), + OMPI_COUNT_ARRAY_CREATE(array_of_blocklengths), + OMPI_DISP_ARRAY_CREATE(array_of_displacements)}; - ompi_datatype_set_args( *newtype, 2 * count + 1, a_i, 0, NULL, 1, &oldtype, + ompi_datatype_set_args( *newtype, + (sizeof(count) != 8) ? 2 * count + 1 : 0, + (sizeof(count) == 8) ? 2 * count + 1 : 0, + a_i, 0, OMPI_DISP_ARRAY_NULL, 1, &oldtype, MPI_COMBINER_INDEXED ); } -#if OMPI_BIGCOUNT_SRC - free(iarray_of_blocklengths); - free(iarray_of_displacements); -#endif return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_vector.c.in b/ompi/mpi/c/type_vector.c.in index 08bad80d75f..9e9470490a5 100644 --- a/ompi/mpi/c/type_vector.c.in +++ b/ompi/mpi/c/type_vector.c.in @@ -32,10 +32,6 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -/* - * TODO:BIGCOUNT this file will need to be updated once - * the datatype framework supports bigcount - */ PROTOTYPE ERROR_CLASS type_vector(COUNT count, COUNT blocklength, @@ -62,21 +58,20 @@ PROTOTYPE ERROR_CLASS type_vector(COUNT count, OMPI_ERRHANDLER_NOHANDLE_RETURN( MPI_ERR_ARG, MPI_ERR_ARG, FUNC_NAME ); } -#if OMPI_BIGCOUNT_SRC - OMPI_CHECK_MPI_COUNT_INT_CONVERSION_OVERFLOW(rc, count); - if (OMPI_SUCCESS != rc) { - OMPI_ERRHANDLER_NOHANDLE_RETURN(rc, rc, FUNC_NAME); - } -#endif } rc = ompi_datatype_create_vector ( count, blocklength, stride, oldtype, newtype ); OMPI_ERRHANDLER_NOHANDLE_CHECK(rc, rc, FUNC_NAME ); { - const int* a_i[3] = {(int *)&count, (int *)&blocklength, (int *)&stride}; + const ompi_count_array_t a_i[3] = {OMPI_COUNT_ARRAY_CREATE(&count), + OMPI_COUNT_ARRAY_CREATE(&blocklength), + OMPI_COUNT_ARRAY_CREATE(&stride)}; - ompi_datatype_set_args( *newtype, 3, a_i, 0, NULL, 1, &oldtype, MPI_COMBINER_VECTOR ); + ompi_datatype_set_args( *newtype, + (sizeof(count) != 8) ? 3 : 0, + (sizeof(count) == 8) ? 3 : 0, + a_i, 0, OMPI_DISP_ARRAY_NULL, 1, &oldtype, MPI_COMBINER_VECTOR ); } return MPI_SUCCESS; diff --git a/ompi/util/count_disp_array.h b/ompi/util/count_disp_array.h index f95d65dc858..ea414d63da2 100644 --- a/ompi/util/count_disp_array.h +++ b/ompi/util/count_disp_array.h @@ -9,125 +9,107 @@ #ifndef OMPI_UTIL_COUNT_DISP_ARRAY_H #define OMPI_UTIL_COUNT_DISP_ARRAY_H -#include -#include -#include +#include "opal/util/count_disp_array.h" /* * NOTE: This code chooses between 64-bit and 32-bit pointers by using the * least significant bit as a flag (which is possible since these * pointers will always be multiples of 4 or 8). */ -typedef intptr_t ompi_count_array_t; +typedef opal_count_array_t ompi_count_array_t; + +#define OMPI_COUNT_ARRAY_NULL OPAL_COUNT_ARRAY_NULL /* Initialize an int variant of the count array */ static inline void ompi_count_array_init(ompi_count_array_t *array, const int *data) { - *array = (intptr_t)data | 0x1L; + opal_count_array_init(array, data); } /* Initialize a bigcount variant of the count array */ static inline void ompi_count_array_init_c(ompi_count_array_t *array, const size_t *data) { - *array = (intptr_t)data; + opal_count_array_init_c(array, data); } -#if OPAL_C_HAVE__GENERIC -#define OMPI_COUNT_ARRAY_INIT(array, data) _Generic((data), \ - int *: ompi_count_array_init, \ - const int *: ompi_count_array_init, \ - size_t *: ompi_count_array_init_c, \ - const size_t *: ompi_count_array_init_c, \ - const MPI_Count *: ompi_count_array_init_c)(array, (const void *) data) -#else -#define OMPI_COUNT_ARRAY_INIT(array, data) \ - do { \ - if (sizeof(*(data)) == sizeof(int)) { \ - ompi_count_array_init(array, (const int *) (data)); \ - } else if (sizeof(*(data)) == sizeof(size_t)) { \ - ompi_count_array_init_c(array, (const size_t *) (data)); \ - } \ - } while (0) -#endif +#define OMPI_COUNT_ARRAY_INIT(array, data) OPAL_COUNT_ARRAY_INIT(array, data) + + +static inline ompi_count_array_t ompi_count_array_create(const int *data) +{ + return opal_count_array_create(data); +} + +static inline ompi_count_array_t ompi_count_array_create_c(const size_t *data) +{ + return opal_count_array_create_c(data); +} + +#define OMPI_COUNT_ARRAY_CREATE(data) OPAL_COUNT_ARRAY_CREATE(data) /* Return if the internal type is 64-bit or not */ static inline bool ompi_count_array_is_64bit(ompi_count_array_t array) { - return !(array & 0x1L) && sizeof(size_t) == 8; + return opal_count_array_is_64bit(array); } static inline const void *ompi_count_array_ptr(ompi_count_array_t array) { - if (OPAL_LIKELY(array & 0x1L)){ - return (const void *)(array & ~0x1L); - } - return (const void *) array; + return opal_count_array_ptr(array); } /* Get a count in the array at index i */ static inline size_t ompi_count_array_get(ompi_count_array_t array, size_t i) { - if (OPAL_LIKELY(array & 0x1L)){ - const int *iptr = (const int *)(array & ~0x1L); - return iptr[i]; - } - return ((const size_t *)array)[i]; + return opal_count_array_get(array, i); } -typedef intptr_t ompi_disp_array_t; +typedef opal_disp_array_t ompi_disp_array_t; + +#define OMPI_DISP_ARRAY_NULL OPAL_DISP_ARRAY_NULL /* Initialize an int variant of the disp array */ static inline void ompi_disp_array_init(ompi_disp_array_t *array, const int *data) { - *array = (intptr_t)data | 0x1L; + opal_disp_array_init(array, data); } /* Initialize a bigcount variant of the disp array */ static inline void ompi_disp_array_init_c(ompi_disp_array_t *array, const ptrdiff_t *data) { - *array = (intptr_t)data; + opal_disp_array_init_c(array, data); } -#if OPAL_C_HAVE__GENERIC -#define OMPI_DISP_ARRAY_INIT(array, data) _Generic((data), \ - int *: ompi_disp_array_init, \ - const int *: ompi_disp_array_init, \ - ptrdiff_t *: ompi_disp_array_init_c, \ - const ptrdiff_t *: ompi_disp_array_init_c)(array, data) -#else -#define OMPI_DISP_ARRAY_INIT(array, data) \ - do { \ - if (sizeof(*(data)) == sizeof(int)) { \ - ompi_disp_array_init(array, (const int *) (data)); \ - } else if (sizeof(*(data)) == sizeof(ptrdiff_t)) { \ - ompi_disp_array_init_c(array, (const ptrdiff_t *) (data)); \ - } \ - } while(0) -#endif +#define OMPI_DISP_ARRAY_INIT(array, data) OPAL_DISP_ARRAY_INIT(array, data) + +static inline ompi_disp_array_t ompi_disp_array_create(const int *data) +{ + return opal_disp_array_create(data); +} + +static inline ompi_disp_array_t ompi_disp_array_create_c(const ptrdiff_t *data) +{ + return opal_disp_array_create_c(data); +} + +#define OMPI_DISP_ARRAY_CREATE(data) OPAL_DISP_ARRAY_CREATE(data) /* Return if the internal type is 64-bit or not */ static inline bool ompi_disp_array_is_64bit(ompi_disp_array_t array) { - return !(array & 0x1L) && sizeof(ptrdiff_t) == 8; + return opal_disp_array_is_64bit(array); } /* Get a displacement in the array at index i */ static inline ptrdiff_t ompi_disp_array_get(ompi_disp_array_t array, size_t i) { - if (OPAL_LIKELY(array & 0x1L)){ - const int *iptr = (const int *)(array & ~0x1L); - return iptr[i]; - } - return ((const ptrdiff_t *)array)[i]; + return opal_disp_array_get(array, i); } /* Get a direct pointer to the data */ static inline const void *ompi_disp_array_ptr(ompi_disp_array_t array) { - if (OPAL_LIKELY(array & 0x1L)){ - return (const void *)(array & ~0x1L); - } - return (const void *)array; + return opal_disp_array_ptr(array); } #endif diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 8550683a60d..b0447c44a71 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -253,7 +253,7 @@ int32_t opal_convertor_pack(opal_convertor_t *pConv, struct iovec *iov, uint32_t * environment. The convertor contain minimal information, we only * use the bConverted to manage the conversion. */ - uint32_t i; + size_t i; unsigned char *base_pointer; size_t pending_length = pConv->local_size - pConv->bConverted; @@ -303,7 +303,7 @@ int32_t opal_convertor_unpack(opal_convertor_t *pConv, struct iovec *iov, uint32 * environment. The convertor contain minimal information, we only * use the bConverted to manage the conversion. */ - uint32_t i; + size_t i; unsigned char *base_pointer; size_t pending_length = pConv->local_size - pConv->bConverted; diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index 5f7fc53fa7d..6f58d541087 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -194,8 +194,8 @@ OPAL_DECLSPEC extern const opal_datatype_t opal_datatype_unsigned_long; */ int opal_datatype_register_params(void); OPAL_DECLSPEC int32_t opal_datatype_init(void); -OPAL_DECLSPEC opal_datatype_t *opal_datatype_create(int32_t expectedSize); -OPAL_DECLSPEC int32_t opal_datatype_create_desc(opal_datatype_t *datatype, int32_t expectedSize); +OPAL_DECLSPEC opal_datatype_t *opal_datatype_create(ssize_t expectedSize); +OPAL_DECLSPEC int32_t opal_datatype_create_desc(opal_datatype_t *datatype, ssize_t expectedSize); OPAL_DECLSPEC int32_t opal_datatype_commit(opal_datatype_t *pData); OPAL_DECLSPEC int32_t opal_datatype_destroy(opal_datatype_t **); OPAL_DECLSPEC int32_t opal_datatype_is_monotonic(opal_datatype_t *type); diff --git a/opal/datatype/opal_datatype_add.c b/opal/datatype/opal_datatype_add.c index 2618ad3ba5b..3fce2693e03 100644 --- a/opal/datatype/opal_datatype_add.c +++ b/opal/datatype/opal_datatype_add.c @@ -127,7 +127,7 @@ static inline int IMAX(int a, int b) int32_t opal_datatype_add(opal_datatype_t *pdtBase, const opal_datatype_t *pdtAdd, size_t count, ptrdiff_t disp, ptrdiff_t extent) { - uint32_t newLength, place_needed = 0, i; + opal_datatype_count_t newLength, place_needed = 0, i; short localFlags = 0; /* no specific options yet */ dt_elem_desc_t *pLast, *pLoop = NULL; ptrdiff_t lb, ub, true_lb, true_ub, epsilon, old_true_ub; diff --git a/opal/datatype/opal_datatype_create.c b/opal/datatype/opal_datatype_create.c index 536bdb6bd87..feb07af60e5 100644 --- a/opal/datatype/opal_datatype_create.c +++ b/opal/datatype/opal_datatype_create.c @@ -91,7 +91,7 @@ static void opal_datatype_destruct(opal_datatype_t *datatype) OBJ_CLASS_INSTANCE(opal_datatype_t, opal_object_t, opal_datatype_construct, opal_datatype_destruct); -opal_datatype_t *opal_datatype_create(int32_t expectedSize) +opal_datatype_t *opal_datatype_create(ssize_t expectedSize) { opal_datatype_t *datatype = (opal_datatype_t *) OBJ_NEW(opal_datatype_t); @@ -107,7 +107,7 @@ opal_datatype_t *opal_datatype_create(int32_t expectedSize) return datatype; } -int32_t opal_datatype_create_desc(opal_datatype_t *datatype, int32_t expectedSize) +int32_t opal_datatype_create_desc(opal_datatype_t *datatype, ssize_t expectedSize) { if (expectedSize == -1) { expectedSize = DT_INCREASE_STACK; diff --git a/opal/util/Makefile.am b/opal/util/Makefile.am index afd657b5b09..e75a6b531c2 100644 --- a/opal/util/Makefile.am +++ b/opal/util/Makefile.am @@ -55,6 +55,7 @@ headers = \ bit_ops.h \ clock_gettime.h \ cmd_line.h \ + count_disp_array.h \ crc.h \ ethtool.h \ error.h \ diff --git a/opal/util/count_disp_array.h b/opal/util/count_disp_array.h new file mode 100644 index 00000000000..6ea9874b659 --- /dev/null +++ b/opal/util/count_disp_array.h @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. + * Copyright (c) 2025 Stony Brook University. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#ifndef OPAL_UTIL_COUNT_DISP_ARRAY_H +#define OPAL_UTIL_COUNT_DISP_ARRAY_H + +#include +#include +#include +#include +#include "opal_config.h" + +/* + * NOTE: This code chooses between 64-bit and 32-bit pointers by using the + * least significant bit as a flag (which is possible since these + * pointers will always be multiples of 4 or 8). + */ +typedef intptr_t opal_count_array_t; + +#define OPAL_COUNT_ARRAY_NULL ((opal_count_array_t)0) + +/* Initialize an int variant of the count array */ +static inline void opal_count_array_init(opal_count_array_t *array, const int *data) +{ + *array = (intptr_t)data | 0x1L; +} + +/* Initialize a bigcount variant of the count array */ +static inline void opal_count_array_init_c(opal_count_array_t *array, const size_t *data) +{ + *array = (intptr_t)data; +} + +#if OPAL_C_HAVE__GENERIC +#define OPAL_COUNT_ARRAY_INIT(array, data) _Generic((data), \ + int *: opal_count_array_init, \ + const int *: opal_count_array_init, \ + size_t *: opal_count_array_init_c, \ + const size_t *: opal_count_array_init_c, \ + MPI_Count *: opal_count_array_init_c, \ + const MPI_Count *: opal_count_array_init_c)(array, (const void *) data) +#else +#define OPAL_COUNT_ARRAY_INIT(array, data) \ + do { \ + if (sizeof(*(data)) == sizeof(int)) { \ + opal_count_array_init(array, (const int *) (data)); \ + } else if (sizeof(*(data)) == sizeof(size_t)) { \ + opal_count_array_init_c(array, (const size_t *) (data)); \ + } \ + } while (0) +#endif + + +static inline opal_count_array_t opal_count_array_create(const int *data) +{ + opal_count_array_t array; + opal_count_array_init(&array, data); + return array; +} + +static inline opal_count_array_t opal_count_array_create_c(const size_t *data) +{ + opal_count_array_t array; + opal_count_array_init_c(&array, data); + return array; +} + +static inline opal_count_array_t opal_count_array_create_with_size(const void *data, size_t size) +{ + if (size == sizeof(int)) { + return opal_count_array_create(data); + } else { + return opal_count_array_create_c(data); + } +} + +#define OPAL_COUNT_ARRAY_CREATE(data) opal_count_array_create_with_size((data), sizeof(*(data))) + + +/* Return if the internal type is 64-bit or not */ +static inline bool opal_count_array_is_64bit(opal_count_array_t array) +{ + return !(array & 0x1L) && sizeof(size_t) == 8; +} + +static inline size_t opal_count_array_sizeof(opal_count_array_t array) +{ + return opal_count_array_is_64bit(array) ? sizeof(size_t) : sizeof(int); +} + +static inline const void *opal_count_array_ptr(opal_count_array_t array) +{ + if (OPAL_LIKELY(array & 0x1L)){ + return (const void *)(array & ~0x1L); + } + return (const void *) array; +} + +/* Get a count in the array at index i */ +static inline size_t opal_count_array_get(opal_count_array_t array, size_t i) +{ + if (OPAL_LIKELY(array & 0x1L)){ + const int *iptr = (const int *)(array & ~0x1L); + return (size_t)iptr[i]; + } + return ((const size_t *)array)[i]; +} + +/* Set a count in the array at index i */ +static inline void opal_count_array_set(opal_count_array_t array, size_t i, size_t val) +{ + if (OPAL_LIKELY(array & 0x1L)){ + int *iptr = (int *)(array & ~0x1L); + iptr[i] = (int)val; + } else { + size_t *sptr = (size_t *)array; + sptr[i] = val; + } +} + +typedef intptr_t opal_disp_array_t; + +#define OPAL_DISP_ARRAY_NULL ((opal_disp_array_t)0) + +/* Initialize an int variant of the disp array */ +static inline void opal_disp_array_init(opal_disp_array_t *array, const int *data) +{ + *array = (intptr_t)data | 0x1L; +} + +/* Initialize a bigcount variant of the disp array */ +static inline void opal_disp_array_init_c(opal_disp_array_t *array, const ptrdiff_t *data) +{ + *array = (intptr_t)data; +} + +#if OPAL_C_HAVE__GENERIC +#define OPAL_DISP_ARRAY_INIT(array, data) _Generic((data), \ + int *: opal_disp_array_init, \ + const int *: opal_disp_array_init, \ + ptrdiff_t *: opal_disp_array_init_c, \ + const ptrdiff_t *: opal_disp_array_init_c)(array, data) +#else +#define OPAL_DISP_ARRAY_INIT(array, data) \ + do { \ + if (sizeof(*(data)) == sizeof(int)) { \ + opal_disp_array_init(array, (const int *) (data)); \ + } else if (sizeof(*(data)) == sizeof(ptrdiff_t)) { \ + opal_disp_array_init_c(array, (const ptrdiff_t *) (data)); \ + } \ + } while(0) +#endif + + + +static inline opal_disp_array_t opal_disp_array_create(const int *data) +{ + opal_disp_array_t array; + opal_disp_array_init(&array, data); + return array; +} + +static inline opal_disp_array_t opal_disp_array_create_c(const ptrdiff_t *data) +{ + opal_disp_array_t array; + opal_disp_array_init_c(&array, data); + return array; +} + +static inline opal_disp_array_t opal_disp_array_create_with_size(const void *data, size_t size) +{ + if (size == sizeof(int)) { + return opal_disp_array_create(data); + } else { + return opal_disp_array_create_c(data); + } +} + +#define OPAL_DISP_ARRAY_CREATE(data) opal_disp_array_create_with_size(data, sizeof(*(data))) + + +/* Return if the internal type is 64-bit or not */ +static inline bool opal_disp_array_is_64bit(opal_disp_array_t array) +{ + return !(array & 0x1L) && sizeof(ptrdiff_t) == 8; +} + +static inline size_t opal_disp_array_sizeof(opal_disp_array_t array) +{ + return opal_disp_array_is_64bit(array) ? sizeof(ptrdiff_t) : sizeof(int); +} + +/* Get a displacement in the array at index i */ +static inline ptrdiff_t opal_disp_array_get(opal_disp_array_t array, size_t i) +{ + if (OPAL_LIKELY(array & 0x1L)){ + const int *iptr = (const int *)(array & ~0x1L); + return iptr[i]; + } + return ((const ptrdiff_t *)array)[i]; +} + +/* Set a displacement in the array at index i */ +static inline void opal_disp_array_set(opal_disp_array_t array, size_t i, ptrdiff_t val) +{ + if (OPAL_LIKELY(array & 0x1L)){ + int *iptr = (int *)(array & ~0x1L); + iptr[i] = (int)val; + } else { + ptrdiff_t *pptr = (ptrdiff_t *)array; + pptr[i] = val; + } +} + + +/* Get a direct pointer to the data */ +static inline const void *opal_disp_array_ptr(opal_disp_array_t array) +{ + if (OPAL_LIKELY(array & 0x1L)){ + return (const void *)(array & ~0x1L); + } + return (const void *)array; +} + +#endif diff --git a/test/datatype/ddt_lib.c b/test/datatype/ddt_lib.c index 0a0e9dc7e01..0901e9823f0 100644 --- a/test/datatype/ddt_lib.c +++ b/test/datatype/ddt_lib.c @@ -134,7 +134,8 @@ ompi_datatype_t *upper_matrix(unsigned int mat_size) blocklen[i] = mat_size - i; } - ompi_datatype_create_indexed(mat_size, blocklen, disp, &ompi_mpi_double.dt, &upper); + ompi_datatype_create_indexed(mat_size, OMPI_COUNT_ARRAY_CREATE(blocklen), + OMPI_DISP_ARRAY_CREATE(disp), &ompi_mpi_double.dt, &upper); ompi_datatype_commit(&upper); if (outputFlags & DUMP_DATA_AFTER_COMMIT) { ompi_datatype_dump(upper); @@ -158,7 +159,8 @@ ompi_datatype_t *lower_matrix(unsigned int mat_size) blocklen[i] = i; } - ompi_datatype_create_indexed(mat_size, blocklen, disp, &ompi_mpi_double.dt, &upper); + ompi_datatype_create_indexed(mat_size, OMPI_COUNT_ARRAY_CREATE(blocklen), + OMPI_DISP_ARRAY_CREATE(disp), &ompi_mpi_double.dt, &upper); free(disp); free(blocklen); return upper; @@ -175,7 +177,8 @@ ompi_datatype_t *test_matrix_borders(unsigned int size, unsigned int width) disp[1] = (size - width) * sizeof(double); blocklen[1] = width; - ompi_datatype_create_indexed(2, blocklen, disp, &ompi_mpi_double.dt, &pdt_line); + ompi_datatype_create_indexed(2, OMPI_COUNT_ARRAY_CREATE(blocklen), + OMPI_DISP_ARRAY_CREATE(disp), &ompi_mpi_double.dt, &pdt_line); ompi_datatype_create_contiguous(size, pdt_line, &pdt); OBJ_RELEASE(pdt_line); /*assert( pdt_line == NULL );*/ return pdt; @@ -224,7 +227,8 @@ ompi_datatype_t *test_struct_char_double(void) displ[0] = (char *) &(data.c) - (char *) &(data); displ[1] = (char *) &(data.d) - (char *) &(data); - ompi_datatype_create_struct(2, lengths, displ, types, &pdt); + ompi_datatype_create_struct(2, OMPI_COUNT_ARRAY_CREATE(lengths), + OMPI_DISP_ARRAY_CREATE(displ), types, &pdt); ompi_datatype_commit(&pdt); if (outputFlags & DUMP_DATA_AFTER_COMMIT) { ompi_datatype_dump(pdt); @@ -276,7 +280,8 @@ ompi_datatype_t *test_create_blacs_type(void) { ompi_datatype_t *pdt; - ompi_datatype_create_indexed(18, blacs_length, blacs_indices, &ompi_mpi_int.dt, &pdt); + ompi_datatype_create_indexed(18, OMPI_COUNT_ARRAY_CREATE(blacs_length), + OMPI_DISP_ARRAY_CREATE(blacs_indices), &ompi_mpi_int.dt, &pdt); ompi_datatype_commit(&pdt); if (outputFlags & DUMP_DATA_AFTER_COMMIT) { ompi_datatype_dump(pdt); @@ -327,7 +332,8 @@ ompi_datatype_t *test_struct(void) types[1] = pdt1; - ompi_datatype_create_struct(3, lengths, disp, types, &pdt); + ompi_datatype_create_struct(3, OMPI_COUNT_ARRAY_CREATE(lengths), + OMPI_DISP_ARRAY_CREATE(disp), types, &pdt); OBJ_RELEASE(pdt1); /*assert( pdt1 == NULL );*/ if (outputFlags & DUMP_DATA_AFTER_COMMIT) { ompi_datatype_dump(pdt); @@ -356,7 +362,8 @@ ompi_datatype_t *create_struct_constant_gap_resized_ddt(ompi_datatype_t *type) disps[1] -= disps[2]; /* 8 */ disps[0] -= disps[2]; /* 16 */ - ompi_datatype_create_struct(2, blocklens, disps, types, &temp_type); + ompi_datatype_create_struct(2, OMPI_COUNT_ARRAY_CREATE(blocklens), + OMPI_DISP_ARRAY_CREATE(disps), types, &temp_type); ompi_datatype_create_resized(temp_type, 0, sizeof(data[0]), &struct_type); ompi_datatype_commit(&struct_type); OBJ_RELEASE(temp_type); @@ -394,7 +401,7 @@ ompi_datatype_t *create_strange_dt(void) dispi[0] = (int) ((char *) &(v[0].i1) - (char *) &(v[0])); /* 0 */ dispi[1] = (int) (((char *) (&(v[0].i2)) - (char *) &(v[0])) / sizeof(int)); /* 2 */ - ompi_datatype_create_indexed_block(2, 1, dispi, &ompi_mpi_int.dt, &pdtTemp); + ompi_datatype_create_indexed_block(2, 1, OMPI_DISP_ARRAY_CREATE(dispi), &ompi_mpi_int.dt, &pdtTemp); #ifdef USE_RESIZED /* optional */ displ[0] = 0; @@ -411,7 +418,8 @@ ompi_datatype_t *create_strange_dt(void) displ[0] = 0; displ[1] = (long) ((char *) &(t[0].v[0]) - (char *) &(t[0])); displ[2] = (long) ((char *) &(t[0].last) - (char *) &(t[0])); - ompi_datatype_create_struct(3, pBlock, displ, types, &pdtTemp); + ompi_datatype_create_struct(3, OMPI_COUNT_ARRAY_CREATE(pBlock), + OMPI_DISP_ARRAY_CREATE(displ), types, &pdtTemp); #ifdef USE_RESIZED /* optional */ displ[1] = (char *) &(t[1]) - (char *) &(t[0]); diff --git a/test/datatype/ddt_pack.c b/test/datatype/ddt_pack.c index bdb6bc462ea..5deeb53749b 100644 --- a/test/datatype/ddt_pack.c +++ b/test/datatype/ddt_pack.c @@ -118,14 +118,14 @@ int main(int argc, char *argv[]) types[1] = &ompi_mpi_int.dt; types[2] = &ompi_mpi_int.dt; types[3] = &ompi_mpi_int.dt; - ret = ompi_datatype_create_struct(4, blen, disp, types, &struct_type); + ret = ompi_datatype_create_struct(4, OMPI_COUNT_ARRAY_CREATE(blen), OMPI_DISP_ARRAY_CREATE(disp), types, &struct_type); if (ret != 0) goto cleanup; { int count = 4; - const int *a_i[2] = {&count, blen}; - ret = ompi_datatype_set_args(struct_type, count + 1, a_i, count, disp, count, types, + const ompi_count_array_t a_i[2] = {OMPI_COUNT_ARRAY_CREATE(&count), OMPI_COUNT_ARRAY_CREATE(blen)}; + ret = ompi_datatype_set_args(struct_type, count + 1, 0, a_i, count, OMPI_DISP_ARRAY_CREATE(disp), count, types, MPI_COMBINER_STRUCT); if (ret != 0) goto cleanup; @@ -190,9 +190,11 @@ int main(int argc, char *argv[]) int count = 2; int blocklength = 1; int stride = 1; - const int *a_i[3] = {&count, &blocklength, &stride}; + const ompi_count_array_t a_i[3] = {OMPI_COUNT_ARRAY_CREATE(&count), + OMPI_COUNT_ARRAY_CREATE(&blocklength), + OMPI_COUNT_ARRAY_CREATE(&stride)}; ompi_datatype_t *type = &ompi_mpi_int.dt; - ret = ompi_datatype_set_args(vec_type, 3, a_i, 0, NULL, 1, &type, MPI_COMBINER_VECTOR); + ret = ompi_datatype_set_args(vec_type, 3, 0, a_i, 0, OMPI_DISP_ARRAY_NULL, 1, &type, MPI_COMBINER_VECTOR); if (ret != 0) goto cleanup; } @@ -251,16 +253,18 @@ int main(int argc, char *argv[]) blen[0] = 0; blen[1] = 20 * sizeof(double); - ret = ompi_datatype_create_indexed_block(2, 10, blen, &ompi_mpi_double.dt, &newType); + ret = ompi_datatype_create_indexed_block(2, 10, OMPI_COUNT_ARRAY_CREATE(blen), &ompi_mpi_double.dt, &newType); if (ret != 0) goto cleanup; { int count = 2; int blocklength = 10; - const int *a_i[3] = {&count, &blocklength, blen}; + const ompi_count_array_t a_i[3] = {OMPI_COUNT_ARRAY_CREATE(&count), + OMPI_COUNT_ARRAY_CREATE(&blocklength), + OMPI_COUNT_ARRAY_CREATE(blen)}; ompi_datatype_t *oldtype = &ompi_mpi_double.dt; - ompi_datatype_set_args(newType, 2 + count, a_i, 0, NULL, 1, &oldtype, + ompi_datatype_set_args(newType, 2 + count, 0, a_i, 0, OMPI_DISP_ARRAY_NULL, 1, &oldtype, MPI_COMBINER_INDEXED_BLOCK); if (ret != 0) goto cleanup; @@ -322,15 +326,16 @@ int main(int argc, char *argv[]) disp[0] = 0; disp[1] = 20 * sizeof(double); - ret = ompi_datatype_create_hindexed(2, blen, disp, &ompi_mpi_double.dt, &newType); + ret = ompi_datatype_create_hindexed(2, OMPI_COUNT_ARRAY_CREATE(blen), + OMPI_DISP_ARRAY_CREATE(disp), &ompi_mpi_double.dt, &newType); if (ret != 0) goto cleanup; { int count = 2; - const int *a_i[2] = {&count, blen}; + const ompi_count_array_t a_i[2] = {OMPI_COUNT_ARRAY_CREATE(&count), OMPI_COUNT_ARRAY_CREATE(blen)}; ompi_datatype_t *oldtype = &ompi_mpi_double.dt; - ret = ompi_datatype_set_args(newType, count + 1, a_i, count, disp, 1, &oldtype, + ret = ompi_datatype_set_args(newType, count + 1, 0, a_i, count, OMPI_DISP_ARRAY_CREATE(disp), 1, &oldtype, MPI_COMBINER_HINDEXED); if (ret != 0) goto cleanup; @@ -388,14 +393,14 @@ int main(int argc, char *argv[]) disp[1] = 64; types[0] = &ompi_mpi_int.dt; types[1] = newType; - ret = ompi_datatype_create_struct(2, blen, disp, types, &struct_type); + ret = ompi_datatype_create_struct(2, OMPI_COUNT_ARRAY_CREATE(blen), OMPI_DISP_ARRAY_CREATE(disp), types, &struct_type); if (ret != 0) goto cleanup; { int count = 2; - const int *a_i[2] = {&count, blen}; - ret = ompi_datatype_set_args(struct_type, count + 1, a_i, count, disp, count, types, + const ompi_count_array_t a_i[2] = {OMPI_COUNT_ARRAY_CREATE(&count), OMPI_COUNT_ARRAY_CREATE(blen)}; + ret = ompi_datatype_set_args(struct_type, count + 1, 0, a_i, count, OMPI_DISP_ARRAY_CREATE(disp), count, types, MPI_COMBINER_STRUCT); if (ret != 0) goto cleanup; @@ -461,7 +466,7 @@ int main(int argc, char *argv[]) if (ret != 0) goto cleanup; ompi_datatype_t *type = &ompi_mpi_int.dt; - ret = ompi_datatype_set_args(dup_type, 0, NULL, 0, NULL, 1, &type, MPI_COMBINER_DUP); + ret = ompi_datatype_set_args(dup_type, 0, 0, NULL, 0, OMPI_DISP_ARRAY_NULL, 1, &type, MPI_COMBINER_DUP); if (ret != 0) goto cleanup; packed_ddt_len = ompi_datatype_pack_description_length(dup_type); diff --git a/test/datatype/external32.c b/test/datatype/external32.c index 9ef83afac57..b13b554b7fc 100644 --- a/test/datatype/external32.c +++ b/test/datatype/external32.c @@ -230,10 +230,12 @@ int main(int argc, char *argv[]) ompi_datatype_create_vector(count, blocklength, stride, &ompi_mpi_int.dt, &ddt); { - const int *a_i[3] = {&count, &blocklength, &stride}; + const ompi_count_array_t a_i[3] = {OMPI_COUNT_ARRAY_CREATE(&count), + OMPI_COUNT_ARRAY_CREATE(&blocklength), + OMPI_COUNT_ARRAY_CREATE(&stride)}; ompi_datatype_t *type = &ompi_mpi_int.dt; - ompi_datatype_set_args(ddt, 3, a_i, 0, NULL, 1, &type, MPI_COMBINER_VECTOR); + ompi_datatype_set_args(ddt, 3, 0, a_i, 0, OMPI_DISP_ARRAY_NULL, 1, &type, MPI_COMBINER_VECTOR); } ompi_datatype_commit(&ddt); diff --git a/test/datatype/large_data.c b/test/datatype/large_data.c index 5558d6a6455..93f8ffee7e6 100644 --- a/test/datatype/large_data.c +++ b/test/datatype/large_data.c @@ -105,7 +105,7 @@ int main(int argc, char *argv[]) /* * Large sparse datatype: indexed contiguous */ - ompi_datatype_create_indexed(2, scounts, sdispls, ddt, &stype); + ompi_datatype_create_indexed(2, OMPI_COUNT_ARRAY_CREATE(scounts), OMPI_DISP_ARRAY_CREATE(sdispls), ddt, &stype); ompi_datatype_commit(&stype); packed = count_length_via_convertor_raw("1. INDEX", stype, 1); @@ -121,7 +121,7 @@ int main(int argc, char *argv[]) /* * Large contiguous datatype: indexed contiguous */ - ompi_datatype_create_indexed(2, rcounts, rdispls, ddt, &rtype); + ompi_datatype_create_indexed(2, OMPI_COUNT_ARRAY_CREATE(rcounts), OMPI_DISP_ARRAY_CREATE(rdispls), ddt, &rtype); ompi_datatype_commit(&rtype); packed = count_length_via_convertor_raw("2. INDEX", rtype, 1); diff --git a/test/datatype/unpack_ooo.c b/test/datatype/unpack_ooo.c index 7fdfe790916..ec4d89faecd 100644 --- a/test/datatype/unpack_ooo.c +++ b/test/datatype/unpack_ooo.c @@ -264,7 +264,8 @@ static int unpack_ooo(void) len[0] = 1; len[1] = 1; - rc = ompi_datatype_create_struct(2, len, disp, type, &newtype); + rc = ompi_datatype_create_struct(2, OMPI_COUNT_ARRAY_CREATE(len), + OMPI_DISP_ARRAY_CREATE(disp), type, &newtype); if (OMPI_SUCCESS != rc) { fprintf(stderr, "could not create struct\n"); return 1;