33 * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
44 * University Research and Technology
55 * Corporation. All rights reserved.
6- * Copyright (c) 2004-2014 The University of Tennessee and The University
6+ * Copyright (c) 2004-2016 The University of Tennessee and The University
77 * of Tennessee Research Foundation. All rights
88 * reserved.
99 * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
1212 * All rights reserved.
1313 * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
1414 * Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
15- * Copyright (c) 2013 Research Organization for Information Science
15+ * Copyright (c) 2013-2016 Research Organization for Information Science
1616 * and Technology (RIST). All rights reserved.
1717 * $COPYRIGHT$
1818 *
@@ -449,16 +449,17 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
449449
450450
451451/**
452- * Compute the remote size.
452+ * Compute the remote size. If necessary remove the homogeneous flag
453+ * and redirect the convertor description toward the non-optimized
454+ * datatype representation.
453455 */
454- #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
455456#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE (convertor , datatype , bdt_mask ) \
456457{ \
457458 if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \
458459 opal_convertor_master_t* master; \
459460 int i; \
460461 uint32_t mask = datatype->bdt_used; \
461- convertor->flags ^= CONVERTOR_HOMOGENEOUS; \
462+ convertor->flags &= (~ CONVERTOR_HOMOGENEOUS); \
462463 master = convertor->master; \
463464 convertor->remote_size = 0; \
464465 for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \
@@ -472,13 +473,6 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
472473 convertor->use_desc = &(datatype->desc); \
473474 } \
474475}
475- #else
476- #define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE (convertor , datatype , bdt_mask ) \
477- { \
478- assert(0 == (bdt_mask)); \
479- (void)bdt_mask; /* silence compiler warning */ \
480- }
481- #endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
482476
483477/**
484478 * This macro will initialize a convertor based on a previously created
@@ -511,16 +505,13 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
511505 convertor -> flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS ); \
512506 convertor -> pDesc = (opal_datatype_t * )datatype ; \
513507 convertor -> bConverted = 0 ; \
514- /* By default consider the optimized description */ \
515508 convertor -> use_desc = & (datatype -> opt_desc ); \
516509 \
517510 convertor -> remote_size = convertor -> local_size ; \
518511 if ( OPAL_LIKELY (convertor -> remoteArch == opal_local_arch ) ) { \
519- if ( (convertor -> flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_NO_GAPS )) == OPAL_DATATYPE_FLAG_NO_GAPS ) { \
520- return OPAL_SUCCESS ; \
521- } \
522- if ( ((convertor -> flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_CONTIGUOUS )) \
523- == OPAL_DATATYPE_FLAG_CONTIGUOUS ) && (1 == count ) ) { \
512+ if ( !(convertor -> flags & CONVERTOR_WITH_CHECKSUM ) && \
513+ ((convertor -> flags & OPAL_DATATYPE_FLAG_NO_GAPS ) || \
514+ ((convertor -> flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) && (1 == count ))) ) { \
524515 return OPAL_SUCCESS ; \
525516 } \
526517 } \
@@ -532,8 +523,9 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
532523 /* For predefined datatypes (contiguous) do nothing more */ \
533524 /* if checksum is enabled then always continue */ \
534525 if ( ((convertor -> flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_NO_GAPS )) \
535- == OPAL_DATATYPE_FLAG_NO_GAPS ) && \
536- (convertor -> flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS )) ) { \
526+ == OPAL_DATATYPE_FLAG_NO_GAPS ) && \
527+ ((convertor -> flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS )) == \
528+ (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS )) ) { \
537529 return OPAL_SUCCESS ; \
538530 } \
539531 convertor -> flags &= ~CONVERTOR_NO_OP ; \
@@ -566,26 +558,24 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
566558 OPAL_CONVERTOR_PREPARE ( convertor , datatype , count , pUserBuf );
567559
568560 if ( convertor -> flags & CONVERTOR_WITH_CHECKSUM ) {
569- #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
570561 if ( !(convertor -> flags & CONVERTOR_HOMOGENEOUS ) ) {
571562 convertor -> fAdvance = opal_unpack_general_checksum ;
572- } else
573- #endif
574- if ( convertor -> pDesc -> flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
575- convertor -> fAdvance = opal_unpack_homogeneous_contig_checksum ;
576563 } else {
577- convertor -> fAdvance = opal_generic_simple_unpack_checksum ;
564+ if ( convertor -> pDesc -> flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
565+ convertor -> fAdvance = opal_unpack_homogeneous_contig_checksum ;
566+ } else {
567+ convertor -> fAdvance = opal_generic_simple_unpack_checksum ;
568+ }
578569 }
579570 } else {
580- #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
581571 if ( !(convertor -> flags & CONVERTOR_HOMOGENEOUS ) ) {
582572 convertor -> fAdvance = opal_unpack_general ;
583- } else
584- #endif
585- if ( convertor -> pDesc -> flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
586- convertor -> fAdvance = opal_unpack_homogeneous_contig ;
587573 } else {
588- convertor -> fAdvance = opal_generic_simple_unpack ;
574+ if ( convertor -> pDesc -> flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
575+ convertor -> fAdvance = opal_unpack_homogeneous_contig ;
576+ } else {
577+ convertor -> fAdvance = opal_generic_simple_unpack ;
578+ }
589579 }
590580 }
591581 return OPAL_SUCCESS ;
@@ -605,24 +595,32 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
605595 OPAL_CONVERTOR_PREPARE ( convertor , datatype , count , pUserBuf );
606596
607597 if ( convertor -> flags & CONVERTOR_WITH_CHECKSUM ) {
608- if ( datatype -> flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
609- if ( ((datatype -> ub - datatype -> lb ) == (OPAL_PTRDIFF_TYPE )datatype -> size )
610- || (1 >= convertor -> count ) )
611- convertor -> fAdvance = opal_pack_homogeneous_contig_checksum ;
612- else
613- convertor -> fAdvance = opal_pack_homogeneous_contig_with_gaps_checksum ;
598+ if ( CONVERTOR_SEND_CONVERSION == (convertor -> flags & (CONVERTOR_SEND_CONVERSION |CONVERTOR_HOMOGENEOUS )) ) {
599+ convertor -> fAdvance = opal_pack_general_checksum ;
614600 } else {
615- convertor -> fAdvance = opal_generic_simple_pack_checksum ;
601+ if ( datatype -> flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
602+ if ( ((datatype -> ub - datatype -> lb ) == (OPAL_PTRDIFF_TYPE )datatype -> size )
603+ || (1 >= convertor -> count ) )
604+ convertor -> fAdvance = opal_pack_homogeneous_contig_checksum ;
605+ else
606+ convertor -> fAdvance = opal_pack_homogeneous_contig_with_gaps_checksum ;
607+ } else {
608+ convertor -> fAdvance = opal_generic_simple_pack_checksum ;
609+ }
616610 }
617611 } else {
618- if ( datatype -> flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
619- if ( ((datatype -> ub - datatype -> lb ) == (OPAL_PTRDIFF_TYPE )datatype -> size )
620- || (1 >= convertor -> count ) )
621- convertor -> fAdvance = opal_pack_homogeneous_contig ;
622- else
623- convertor -> fAdvance = opal_pack_homogeneous_contig_with_gaps ;
612+ if ( CONVERTOR_SEND_CONVERSION == (convertor -> flags & (CONVERTOR_SEND_CONVERSION |CONVERTOR_HOMOGENEOUS )) ) {
613+ convertor -> fAdvance = opal_pack_general ;
624614 } else {
625- convertor -> fAdvance = opal_generic_simple_pack ;
615+ if ( datatype -> flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
616+ if ( ((datatype -> ub - datatype -> lb ) == (OPAL_PTRDIFF_TYPE )datatype -> size )
617+ || (1 >= convertor -> count ) )
618+ convertor -> fAdvance = opal_pack_homogeneous_contig ;
619+ else
620+ convertor -> fAdvance = opal_pack_homogeneous_contig_with_gaps ;
621+ } else {
622+ convertor -> fAdvance = opal_generic_simple_pack ;
623+ }
626624 }
627625 }
628626 return OPAL_SUCCESS ;
@@ -678,15 +676,33 @@ int opal_convertor_clone( const opal_convertor_t* source,
678676
679677void opal_convertor_dump ( opal_convertor_t * convertor )
680678{
681- printf ( "Convertor %p count %d stack position %d bConverted %ld\n" , (void * )convertor ,
682- convertor -> count , convertor -> stack_pos , (unsigned long )convertor -> bConverted );
683- printf ( "\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n" ,
684- (unsigned long )convertor -> local_size , (unsigned long )convertor -> remote_size ,
685- convertor -> flags , convertor -> stack_size , convertor -> partial_length );
679+ opal_output ( 0 , "Convertor %p count %d stack position %d bConverted %ld\n"
680+ "\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n"
681+ "\tremote_arch %u local_arch %u\n" ,
682+ (void * )convertor ,
683+ convertor -> count , convertor -> stack_pos , (unsigned long )convertor -> bConverted ,
684+ (unsigned long )convertor -> local_size , (unsigned long )convertor -> remote_size ,
685+ convertor -> flags , convertor -> stack_size , convertor -> partial_length ,
686+ convertor -> remoteArch , opal_local_arch );
687+ if ( convertor -> flags & CONVERTOR_RECV ) opal_output ( 0 , "unpack " );
688+ if ( convertor -> flags & CONVERTOR_SEND ) opal_output ( 0 , "pack " );
689+ if ( convertor -> flags & CONVERTOR_SEND_CONVERSION ) opal_output ( 0 , "conversion " );
690+ if ( convertor -> flags & CONVERTOR_HOMOGENEOUS ) opal_output ( 0 , "homogeneous " );
691+ else opal_output ( 0 , "heterogeneous " );
692+ if ( convertor -> flags & CONVERTOR_NO_OP ) opal_output ( 0 , "no_op " );
693+ if ( convertor -> flags & CONVERTOR_WITH_CHECKSUM ) opal_output ( 0 , "checksum " );
694+ if ( convertor -> flags & CONVERTOR_CUDA ) opal_output ( 0 , "CUDA " );
695+ if ( convertor -> flags & CONVERTOR_CUDA_ASYNC ) opal_output ( 0 , "CUDA Async " );
696+ if ( convertor -> flags & CONVERTOR_COMPLETED ) opal_output ( 0 , "COMPLETED " );
697+
686698 opal_datatype_dump ( convertor -> pDesc );
687- printf ( "Actual stack representation\n" );
688- opal_datatype_dump_stack ( convertor -> pStack , convertor -> stack_pos ,
689- convertor -> pDesc -> desc .desc , convertor -> pDesc -> name );
699+ if ( !((0 == convertor -> stack_pos ) &&
700+ ((size_t )convertor -> pStack [convertor -> stack_pos ].index > convertor -> pDesc -> desc .length )) ) {
701+ /* only if the convertor is completely initialized */
702+ opal_output ( 0 , "Actual stack representation\n" );
703+ opal_datatype_dump_stack ( convertor -> pStack , convertor -> stack_pos ,
704+ convertor -> pDesc -> desc .desc , convertor -> pDesc -> name );
705+ }
690706}
691707
692708
0 commit comments