Skip to content

Commit 1f4697d

Browse files
bosilcaggouaillardet
authored andcommitted
Always build support for HETEROGENEOUS environment (this is needed to
provide external32 support). Add a pack function allowing to provide send conversion (needed on little endian machine in order to pack in the external32 format). (cherry picked from commit cf2bb20)
1 parent 148929b commit 1f4697d

File tree

4 files changed

+277
-57
lines changed

4 files changed

+277
-57
lines changed

opal/datatype/opal_convertor.c

Lines changed: 69 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2014 The University of Tennessee and The University
6+
* Copyright (c) 2004-2016 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@@ -449,16 +449,17 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
449449

450450

451451
/**
452-
* Compute the remote size.
452+
* Compute the remote size. If necessary remove the homogeneous flag
453+
* and redirect the convertor description toward the non-optimized
454+
* datatype representation.
453455
*/
454-
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
455456
#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \
456457
{ \
457458
if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \
458459
opal_convertor_master_t* master; \
459460
int i; \
460461
uint32_t mask = datatype->bdt_used; \
461-
convertor->flags ^= CONVERTOR_HOMOGENEOUS; \
462+
convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \
462463
master = convertor->master; \
463464
convertor->remote_size = 0; \
464465
for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \
@@ -472,13 +473,6 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
472473
convertor->use_desc = &(datatype->desc); \
473474
} \
474475
}
475-
#else
476-
#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \
477-
{ \
478-
assert(0 == (bdt_mask)); \
479-
(void)bdt_mask; /* silence compiler warning */ \
480-
}
481-
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
482476

483477
/**
484478
* This macro will initialize a convertor based on a previously created
@@ -511,16 +505,13 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
511505
convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \
512506
convertor->pDesc = (opal_datatype_t*)datatype; \
513507
convertor->bConverted = 0; \
514-
/* By default consider the optimized description */ \
515508
convertor->use_desc = &(datatype->opt_desc); \
516509
\
517510
convertor->remote_size = convertor->local_size; \
518511
if( OPAL_LIKELY(convertor->remoteArch == opal_local_arch) ) { \
519-
if( (convertor->flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_NO_GAPS)) == OPAL_DATATYPE_FLAG_NO_GAPS ) { \
520-
return OPAL_SUCCESS; \
521-
} \
522-
if( ((convertor->flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_CONTIGUOUS)) \
523-
== OPAL_DATATYPE_FLAG_CONTIGUOUS) && (1 == count) ) { \
512+
if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) && \
513+
((convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) || \
514+
((convertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && (1 == count))) ) { \
524515
return OPAL_SUCCESS; \
525516
} \
526517
} \
@@ -532,8 +523,9 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
532523
/* For predefined datatypes (contiguous) do nothing more */ \
533524
/* if checksum is enabled then always continue */ \
534525
if( ((convertor->flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_NO_GAPS)) \
535-
== OPAL_DATATYPE_FLAG_NO_GAPS) && \
536-
(convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) { \
526+
== OPAL_DATATYPE_FLAG_NO_GAPS) && \
527+
((convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) == \
528+
(CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) { \
537529
return OPAL_SUCCESS; \
538530
} \
539531
convertor->flags &= ~CONVERTOR_NO_OP; \
@@ -566,26 +558,24 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
566558
OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );
567559

568560
if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {
569-
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
570561
if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
571562
convertor->fAdvance = opal_unpack_general_checksum;
572-
} else
573-
#endif
574-
if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
575-
convertor->fAdvance = opal_unpack_homogeneous_contig_checksum;
576563
} else {
577-
convertor->fAdvance = opal_generic_simple_unpack_checksum;
564+
if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
565+
convertor->fAdvance = opal_unpack_homogeneous_contig_checksum;
566+
} else {
567+
convertor->fAdvance = opal_generic_simple_unpack_checksum;
568+
}
578569
}
579570
} else {
580-
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
581571
if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
582572
convertor->fAdvance = opal_unpack_general;
583-
} else
584-
#endif
585-
if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
586-
convertor->fAdvance = opal_unpack_homogeneous_contig;
587573
} else {
588-
convertor->fAdvance = opal_generic_simple_unpack;
574+
if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
575+
convertor->fAdvance = opal_unpack_homogeneous_contig;
576+
} else {
577+
convertor->fAdvance = opal_generic_simple_unpack;
578+
}
589579
}
590580
}
591581
return OPAL_SUCCESS;
@@ -605,24 +595,32 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
605595
OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );
606596

607597
if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {
608-
if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
609-
if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size)
610-
|| (1 >= convertor->count) )
611-
convertor->fAdvance = opal_pack_homogeneous_contig_checksum;
612-
else
613-
convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps_checksum;
598+
if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
599+
convertor->fAdvance = opal_pack_general_checksum;
614600
} else {
615-
convertor->fAdvance = opal_generic_simple_pack_checksum;
601+
if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
602+
if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size)
603+
|| (1 >= convertor->count) )
604+
convertor->fAdvance = opal_pack_homogeneous_contig_checksum;
605+
else
606+
convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps_checksum;
607+
} else {
608+
convertor->fAdvance = opal_generic_simple_pack_checksum;
609+
}
616610
}
617611
} else {
618-
if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
619-
if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size)
620-
|| (1 >= convertor->count) )
621-
convertor->fAdvance = opal_pack_homogeneous_contig;
622-
else
623-
convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps;
612+
if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
613+
convertor->fAdvance = opal_pack_general;
624614
} else {
625-
convertor->fAdvance = opal_generic_simple_pack;
615+
if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
616+
if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size)
617+
|| (1 >= convertor->count) )
618+
convertor->fAdvance = opal_pack_homogeneous_contig;
619+
else
620+
convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps;
621+
} else {
622+
convertor->fAdvance = opal_generic_simple_pack;
623+
}
626624
}
627625
}
628626
return OPAL_SUCCESS;
@@ -678,15 +676,33 @@ int opal_convertor_clone( const opal_convertor_t* source,
678676

679677
void opal_convertor_dump( opal_convertor_t* convertor )
680678
{
681-
printf( "Convertor %p count %d stack position %d bConverted %ld\n", (void*)convertor,
682-
convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted );
683-
printf( "\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n",
684-
(unsigned long)convertor->local_size, (unsigned long)convertor->remote_size,
685-
convertor->flags, convertor->stack_size, convertor->partial_length );
679+
opal_output( 0, "Convertor %p count %d stack position %d bConverted %ld\n"
680+
"\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n"
681+
"\tremote_arch %u local_arch %u\n",
682+
(void*)convertor,
683+
convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted,
684+
(unsigned long)convertor->local_size, (unsigned long)convertor->remote_size,
685+
convertor->flags, convertor->stack_size, convertor->partial_length,
686+
convertor->remoteArch, opal_local_arch );
687+
if( convertor->flags & CONVERTOR_RECV ) opal_output( 0, "unpack ");
688+
if( convertor->flags & CONVERTOR_SEND ) opal_output( 0, "pack ");
689+
if( convertor->flags & CONVERTOR_SEND_CONVERSION ) opal_output( 0, "conversion ");
690+
if( convertor->flags & CONVERTOR_HOMOGENEOUS ) opal_output( 0, "homogeneous " );
691+
else opal_output( 0, "heterogeneous ");
692+
if( convertor->flags & CONVERTOR_NO_OP ) opal_output( 0, "no_op ");
693+
if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) opal_output( 0, "checksum ");
694+
if( convertor->flags & CONVERTOR_CUDA ) opal_output( 0, "CUDA ");
695+
if( convertor->flags & CONVERTOR_CUDA_ASYNC ) opal_output( 0, "CUDA Async ");
696+
if( convertor->flags & CONVERTOR_COMPLETED ) opal_output( 0, "COMPLETED ");
697+
686698
opal_datatype_dump( convertor->pDesc );
687-
printf( "Actual stack representation\n" );
688-
opal_datatype_dump_stack( convertor->pStack, convertor->stack_pos,
689-
convertor->pDesc->desc.desc, convertor->pDesc->name );
699+
if( !((0 == convertor->stack_pos) &&
700+
((size_t)convertor->pStack[convertor->stack_pos].index > convertor->pDesc->desc.length)) ) {
701+
/* only if the convertor is completely initialized */
702+
opal_output( 0, "Actual stack representation\n" );
703+
opal_datatype_dump_stack( convertor->pStack, convertor->stack_pos,
704+
convertor->pDesc->desc.desc, convertor->pDesc->name );
705+
}
690706
}
691707

692708

opal/datatype/opal_convertor.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,7 @@ static inline int opal_convertor_cleanup( opal_convertor_t* convertor )
175175
*/
176176
static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConvertor )
177177
{
178-
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
179178
if (OPAL_UNLIKELY(0 == (pConvertor->flags & CONVERTOR_HOMOGENEOUS))) return 1;
180-
#endif
181179
#if OPAL_CUDA_SUPPORT
182180
if( pConvertor->flags & (CONVERTOR_CUDA | CONVERTOR_CUDA_UNIFIED)) return 1;
183181
#endif

0 commit comments

Comments
 (0)