33 * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
44 * University Research and Technology
55 * Corporation. All rights reserved.
6- * Copyright (c) 2004-2016 The University of Tennessee and The University
6+ * Copyright (c) 2004-2017 The University of Tennessee and The University
77 * of Tennessee Research Foundation. All rights
88 * reserved.
99 * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
4343 CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )
4444#endif
4545
46- extern int opal_convertor_create_stack_with_pos_general ( opal_convertor_t * convertor ,
47- int starting_point , const int * sizes );
48-
4946static void opal_convertor_construct ( opal_convertor_t * convertor )
5047{
5148 convertor -> pStack = convertor -> static_stack ;
@@ -226,7 +223,7 @@ int32_t opal_convertor_pack( opal_convertor_t* pConv,
226223 if ( OPAL_LIKELY (pConv -> flags & CONVERTOR_NO_OP ) ) {
227224 /**
228225 * We are doing conversion on a contiguous datatype on a homogeneous
229- * environment. The convertor contain minimal informations , we only
226+ * environment. The convertor contain minimal information , we only
230227 * use the bConverted to manage the conversion.
231228 */
232229 uint32_t i ;
@@ -447,31 +444,49 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
447444 return rc ;
448445}
449446
447+ static size_t
448+ opal_datatype_compute_remote_size ( const opal_datatype_t * pData ,
449+ const size_t * sizes )
450+ {
451+ uint32_t typeMask = pData -> bdt_used ;
452+ size_t length = 0 ;
453+
454+ if ( OPAL_UNLIKELY (NULL == pData -> ptypes ) ) {
455+ /* Allocate and fill the array of types used in the datatype description */
456+ opal_datatype_compute_ptypes ( (opal_datatype_t * )pData );
457+ }
458+
459+ for ( int i = OPAL_DATATYPE_FIRST_TYPE ; typeMask && (i < OPAL_DATATYPE_MAX_PREDEFINED ); i ++ ) {
460+ if ( typeMask & ((uint32_t )1 << i ) ) {
461+ length += (pData -> ptypes [i ] * sizes [i ]);
462+ typeMask ^= ((uint32_t )1 << i );
463+ }
464+ }
465+ return length ;
466+ }
450467
451468/**
452469 * Compute the remote size. If necessary remove the homogeneous flag
453470 * and redirect the convertor description toward the non-optimized
454471 * datatype representation.
455472 */
456- #define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE (convertor , datatype , bdt_mask ) \
457- { \
458- if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \
459- opal_convertor_master_t* master; \
460- int i; \
461- uint32_t mask = datatype->bdt_used; \
462- convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \
463- master = convertor->master; \
464- convertor->remote_size = 0; \
465- for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \
466- if( mask & ((uint32_t)1 << i) ) { \
467- convertor->remote_size += (datatype->btypes[i] * \
468- master->remote_sizes[i]); \
469- mask ^= ((uint32_t)1 << i); \
470- } \
471- } \
472- convertor->remote_size *= convertor->count; \
473- convertor->use_desc = &(datatype->desc); \
474- } \
473+ size_t opal_convertor_compute_remote_size ( opal_convertor_t * pConvertor )
474+ {
475+ opal_datatype_t * datatype = (opal_datatype_t * )pConvertor -> pDesc ;
476+
477+ pConvertor -> remote_size = pConvertor -> local_size ;
478+ if ( OPAL_UNLIKELY (datatype -> bdt_used & pConvertor -> master -> hetero_mask ) ) {
479+ pConvertor -> flags &= (~CONVERTOR_HOMOGENEOUS );
480+ pConvertor -> use_desc = & (datatype -> desc );
481+ if ( 0 == (pConvertor -> flags & CONVERTOR_HAS_REMOTE_SIZE ) ) {
482+ /* This is for a single datatype, we must update it with the count */
483+ pConvertor -> remote_size = opal_datatype_compute_remote_size (datatype ,
484+ pConvertor -> master -> remote_sizes );
485+ pConvertor -> remote_size *= pConvertor -> count ;
486+ }
487+ }
488+ pConvertor -> flags |= CONVERTOR_HAS_REMOTE_SIZE ;
489+ return pConvertor -> remote_size ;
475490}
476491
477492/**
@@ -483,29 +498,26 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
483498 */
484499#define OPAL_CONVERTOR_PREPARE ( convertor , datatype , count , pUserBuf ) \
485500 { \
486- uint32_t bdt_mask; \
487- \
501+ convertor->local_size = count * datatype->size; \
502+ convertor->pBaseBuf = (unsigned char*)pUserBuf; \
503+ convertor->count = count; \
504+ convertor->pDesc = (opal_datatype_t*)datatype; \
505+ convertor->bConverted = 0; \
506+ convertor->use_desc = &(datatype->opt_desc); \
488507 /* If the data is empty we just mark the convertor as \
489508 * completed. With this flag set the pack and unpack functions \
490509 * will not do anything. \
491510 */ \
492511 if ( OPAL_UNLIKELY ((0 == count ) || (0 == datatype -> size )) ) { \
493- convertor -> flags |= OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED ; \
512+ convertor -> flags |= ( OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED | CONVERTOR_HAS_REMOTE_SIZE ); \
494513 convertor -> local_size = convertor -> remote_size = 0 ; \
495514 return OPAL_SUCCESS ; \
496515 } \
497- /* Compute the local in advance */ \
498- convertor -> local_size = count * datatype -> size ; \
499- convertor -> pBaseBuf = (unsigned char * )pUserBuf ; \
500- convertor -> count = count ; \
501516 \
502517 /* Grab the datatype part of the flags */ \
503518 convertor -> flags &= CONVERTOR_TYPE_MASK ; \
504519 convertor -> flags |= (CONVERTOR_DATATYPE_MASK & datatype -> flags ); \
505520 convertor -> flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS ); \
506- convertor -> pDesc = (opal_datatype_t * )datatype ; \
507- convertor -> bConverted = 0 ; \
508- convertor -> use_desc = & (datatype -> opt_desc ); \
509521 \
510522 convertor -> remote_size = convertor -> local_size ; \
511523 if ( OPAL_LIKELY (convertor -> remoteArch == opal_local_arch ) ) { \
@@ -516,9 +528,8 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
516528 } \
517529 } \
518530 \
519- bdt_mask = datatype -> bdt_used & convertor -> master -> hetero_mask ; \
520- OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE ( convertor , datatype , \
521- bdt_mask ); \
531+ assert ( (convertor )-> pDesc == (datatype ) ); \
532+ opal_convertor_compute_remote_size ( convertor ); \
522533 assert ( NULL != convertor -> use_desc -> desc ); \
523534 /* For predefined datatypes (contiguous) do nothing more */ \
524535 /* if checksum is enabled then always continue */ \
@@ -530,7 +541,7 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
530541 } \
531542 convertor -> flags &= ~CONVERTOR_NO_OP ; \
532543 { \
533- uint32_t required_stack_length = datatype -> btypes [ OPAL_DATATYPE_LOOP ] + 1 ; \
544+ uint32_t required_stack_length = datatype -> loops + 1 ; \
534545 \
535546 if ( required_stack_length > convertor -> stack_size ) { \
536547 assert (convertor -> pStack == convertor -> static_stack ); \
@@ -714,8 +725,8 @@ void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos,
714725 opal_output ( 0 , "%d: pos %d count %d disp %ld " , stack_pos , pStack [stack_pos ].index ,
715726 (int )pStack [stack_pos ].count , (long )pStack [stack_pos ].disp );
716727 if ( pStack -> index != -1 )
717- opal_output ( 0 , "\t[desc count %d disp %ld extent %ld]\n" ,
718- pDesc [pStack [stack_pos ].index ].elem .count ,
728+ opal_output ( 0 , "\t[desc count %lu disp %ld extent %ld]\n" ,
729+ ( unsigned long ) pDesc [pStack [stack_pos ].index ].elem .count ,
719730 (long )pDesc [pStack [stack_pos ].index ].elem .disp ,
720731 (long )pDesc [pStack [stack_pos ].index ].elem .extent );
721732 else
0 commit comments