2626 CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )
2727#endif
2828
29+ /**
30+ * This function deals only with partial elements. The COUNT points however to the whole leftover count,
31+ * but this function is only expected to operate on an amount less than blength, that would allow the rest
32+ * of the pack process to handle only entire blength blocks (plus the left over).
33+ *
34+ * Return 1 if we are now aligned on a block, 0 otherwise.
35+ */
36+ static inline int
37+ pack_partial_blocklen ( opal_convertor_t * CONVERTOR ,
38+ const dt_elem_desc_t * ELEM ,
39+ size_t * COUNT ,
40+ unsigned char * * memory ,
41+ unsigned char * * packed ,
42+ size_t * SPACE )
43+ {
44+ const ddt_elem_desc_t * _elem = & ((ELEM )-> elem );
45+ size_t do_now_bytes = opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
46+ size_t do_now = * (COUNT );
47+ unsigned char * _memory = (* memory ) + _elem -> disp ;
48+ unsigned char * _packed = * packed ;
49+
50+ assert ( * (COUNT ) <= _elem -> count * _elem -> blocklen );
51+
52+ /**
53+ * First check if we already did something on this element ? The COUNT is the number
54+ * of remaining predefined types in the current elem, not how many predefined types
55+ * should be manipulated in the current call (this number is instead reflected on the
56+ * SPACE).
57+ */
58+ if ( 0 == (do_now = (* COUNT ) % _elem -> blocklen ) )
59+ return 1 ;
60+
61+ size_t left_in_block = do_now ; /* left in the current blocklen */
62+
63+ if ( (do_now_bytes * do_now ) > * (SPACE ) )
64+ do_now = (* SPACE ) / do_now_bytes ;
65+
66+ do_now_bytes *= do_now ;
67+
68+ OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , do_now_bytes , (CONVERTOR )-> pBaseBuf ,
69+ (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
70+ DO_DEBUG ( opal_output ( 0 , "pack memcpy( %p, %p, %lu ) => space %lu [partial]\n" ,
71+ _packed , (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE )) ); );
72+ MEMCPY_CSUM ( _packed , _memory , do_now_bytes , (CONVERTOR ) );
73+ * (memory ) += (ptrdiff_t )do_now_bytes ;
74+ if ( do_now == left_in_block ) /* compensate if completed a blocklen */
75+ * (memory ) += _elem -> extent - (_elem -> blocklen * opal_datatype_basicDatatypes [_elem -> common .type ]-> size );
76+
77+ * (COUNT ) -= do_now ;
78+ * (SPACE ) -= do_now_bytes ;
79+ * (packed ) += do_now_bytes ;
80+ return (do_now == left_in_block );
81+ }
82+
83+ /**
84+ * Pack entire blocks, plus a possible remainder if SPACE is constrained to less than COUNT elements.
85+ */
2986static inline void
3087pack_predefined_data ( opal_convertor_t * CONVERTOR ,
3188 const dt_elem_desc_t * ELEM ,
@@ -36,27 +93,24 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
3693{
3794 const ddt_elem_desc_t * _elem = & ((ELEM )-> elem );
3895 size_t blocklen_bytes = opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
39- size_t cando_count = * (COUNT ), do_now , do_now_bytes ;
96+ size_t cando_count = * (COUNT ), do_now_bytes ;
4097 unsigned char * _memory = (* memory ) + _elem -> disp ;
4198 unsigned char * _packed = * packed ;
4299
100+ assert ( 0 == (cando_count % _elem -> blocklen ) ); /* no partials here */
43101 assert ( * (COUNT ) <= _elem -> count * _elem -> blocklen );
44102
45103 if ( (blocklen_bytes * cando_count ) > * (SPACE ) )
46104 cando_count = (* SPACE ) / blocklen_bytes ;
47105
48- do_now = * (COUNT ); /* save the COUNT for later */
49106 /* premptively update the number of COUNT we will return. */
50107 * (COUNT ) -= cando_count ;
51108
52- if ( 1 == _elem -> count ) { /* Everything is contiguous, handle it as a prologue */
53- goto do_epilog ;
54- }
55109 if ( 1 == _elem -> blocklen ) { /* Do as many full blocklen as possible */
56110 for (; cando_count > 0 ; cando_count -- ) {
57111 OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , blocklen_bytes , (CONVERTOR )-> pBaseBuf ,
58112 (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
59- DO_DEBUG ( opal_output ( 0 , "pack 2. memcpy( %p, %p, %lu ) => space %lu\n" ,
113+ DO_DEBUG ( opal_output ( 0 , "pack memcpy( %p, %p, %lu ) => space %lu [blen = 1] \n" ,
60114 (void * )_packed , (void * )_memory , (unsigned long )blocklen_bytes , (unsigned long )(* (SPACE ) - (_packed - * (packed ))) ); );
61115 MEMCPY_CSUM ( _packed , _memory , blocklen_bytes , (CONVERTOR ) );
62116 _packed += blocklen_bytes ;
@@ -65,61 +119,32 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
65119 goto update_and_return ;
66120 }
67121
68- blocklen_bytes *= _elem -> blocklen ;
69- if ( (_elem -> count * _elem -> blocklen ) == cando_count ) {
70- goto skip_prolog ;
71- }
72- /**
73- * First check if we already did something on this element ? The COUNT is the number
74- * of remaining predefined types in the current elem, not how many predefined types
75- * should be manipulated in the current call (this number is instead reflected on the
76- * SPACE).
77- */
78- do_now = do_now % _elem -> blocklen ; /* any partial elements ? */
122+ if ( (1 < _elem -> count ) && (_elem -> blocklen <= cando_count ) ) {
123+ blocklen_bytes *= _elem -> blocklen ;
79124
80- if ( 0 != do_now ) {
81- size_t left_in_block = do_now ; /* left in the current blocklen */
82- do_now = (do_now > cando_count ) ? cando_count : do_now ;
83- do_now_bytes = do_now * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
84-
85- OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , do_now_bytes , (CONVERTOR )-> pBaseBuf ,
86- (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
87- DO_DEBUG ( opal_output ( 0 , "pack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n" ,
88- _packed , (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE )) ); );
89- MEMCPY_CSUM ( _packed , _memory , do_now_bytes , (CONVERTOR ) );
90- _memory += (ptrdiff_t )do_now_bytes ;
91- /* compensate if we just completed a blocklen */
92- if ( do_now == left_in_block )
93- _memory += _elem -> extent - blocklen_bytes ;
94- _packed += do_now_bytes ;
95- cando_count -= do_now ;
96- }
97-
98- skip_prolog :
99- /* Do as many full blocklen as possible */
100- for (size_t _i = 0 ; _elem -> blocklen <= cando_count ; _i ++ ) {
101- OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , blocklen_bytes , (CONVERTOR )-> pBaseBuf ,
102- (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
103- DO_DEBUG ( opal_output ( 0 , "pack 2. memcpy( %p, %p, %lu ) => space %lu\n" ,
104- (void * )_packed , (void * )_memory , (unsigned long )blocklen_bytes , (unsigned long )(* (SPACE ) - (_packed - * (packed ))) ); );
105- MEMCPY_CSUM ( _packed , _memory , blocklen_bytes , (CONVERTOR ) );
106- _packed += blocklen_bytes ;
107- _memory += _elem -> extent ;
108- cando_count -= _elem -> blocklen ;
125+ do { /* Do as many full blocklen as possible */
126+ OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , blocklen_bytes , (CONVERTOR )-> pBaseBuf ,
127+ (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
128+ DO_DEBUG ( opal_output ( 0 , "pack 2. memcpy( %p, %p, %lu ) => space %lu\n" ,
129+ (void * )_packed , (void * )_memory , (unsigned long )blocklen_bytes , (unsigned long )(* (SPACE ) - (_packed - * (packed ))) ); );
130+ MEMCPY_CSUM ( _packed , _memory , blocklen_bytes , (CONVERTOR ) );
131+ _packed += blocklen_bytes ;
132+ _memory += _elem -> extent ;
133+ cando_count -= _elem -> blocklen ;
134+ } while (_elem -> blocklen <= cando_count );
109135 }
110136
111137 /**
112138 * As an epilog do anything left from the last blocklen.
113139 */
114140 if ( 0 != cando_count ) {
115-
116- do_epilog :
117- assert ( cando_count < _elem -> blocklen );
141+ assert ( (cando_count < _elem -> blocklen ) ||
142+ ((1 == _elem -> count ) && (cando_count <= _elem -> blocklen )) );
118143 do_now_bytes = cando_count * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
119144 OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , do_now_bytes , (CONVERTOR )-> pBaseBuf ,
120145 (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
121146 DO_DEBUG ( opal_output ( 0 , "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n" ,
122- (void * )_packed , (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE )) ); );
147+ (void * )_packed , (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE ) - ( _packed - * ( packed )) ) ); );
123148 MEMCPY_CSUM ( _packed , _memory , do_now_bytes , (CONVERTOR ) );
124149 _memory += do_now_bytes ;
125150 _packed += do_now_bytes ;
@@ -159,7 +184,15 @@ static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR,
159184 * (COUNT ) -= _copy_loops ;
160185}
161186
162- #define PACK_PREDEFINED_DATATYPE ( CONVERTOR , /* the convertor */ \
187+ #define PACK_PARTIAL_BLOCKLEN ( CONVERTOR , /* the convertor */ \
188+ ELEM , /* the basic element to be packed */ \
189+ COUNT , /* the number of elements */ \
190+ MEMORY , /* the source pointer (char*) */ \
191+ PACKED , /* the destination pointer (char*) */ \
192+ SPACE ) /* the space in the destination buffer */ \
193+ pack_partial_blocklen( (CONVERTOR), (ELEM), &(COUNT), &(MEMORY), &(PACKED), &(SPACE) )
194+
195+ #define PACK_PREDEFINED_DATATYPE ( CONVERTOR , /* the convertor */ \
163196 ELEM , /* the basic element to be packed */ \
164197 COUNT , /* the number of elements */ \
165198 MEMORY , /* the source pointer (char*) */ \
0 commit comments