@@ -60,27 +60,27 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
6060 CREATE_ELEM ( pElemDesc , last .common .type , OPAL_DATATYPE_FLAG_BASIC ,
6161 last .blocklen , last .count , last .disp , last .extent );
6262 pElemDesc ++ ; nbElems ++ ;
63- last .disp += last .count ;
6463 last .count = 0 ;
6564 }
6665 CREATE_LOOP_END ( pElemDesc , nbElems - pStack -> index + 1 , /* # of elems in this loop */
6766 end_loop -> first_elem_disp , end_loop -> size , end_loop -> common .flags );
68- pElemDesc ++ ; nbElems ++ ;
6967 if ( -- stack_pos >= 0 ) { /* still something to do ? */
7068 ddt_loop_desc_t * pStartLoop = & (pTypeDesc -> desc [pStack -> index - 1 ].loop );
71- pStartLoop -> items = end_loop -> items ;
69+ pStartLoop -> items = pElemDesc -> end_loop . items ;
7270 total_disp = pStack -> disp ; /* update the displacement position */
7371 }
72+ pElemDesc ++ ; nbElems ++ ;
7473 pStack -- ; /* go down one position on the stack */
7574 pos_desc ++ ;
7675 continue ;
7776 }
7877 if ( OPAL_DATATYPE_LOOP == pData -> desc .desc [pos_desc ].elem .common .type ) {
7978 ddt_loop_desc_t * loop = (ddt_loop_desc_t * )& (pData -> desc .desc [pos_desc ]);
80- ddt_endloop_desc_t * end_loop = (ddt_endloop_desc_t * )& (pData -> desc .desc [pos_desc + loop -> items ]);
8179 int index = GET_FIRST_NON_LOOP ( & (pData -> desc .desc [pos_desc ]) );
8280
8381 if ( loop -> common .flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
82+ ddt_endloop_desc_t * end_loop = (ddt_endloop_desc_t * )& (pData -> desc .desc [pos_desc + loop -> items ]);
83+
8484 assert (pData -> desc .desc [pos_desc + index ].elem .disp == end_loop -> first_elem_disp );
8585 compress .common .flags = loop -> common .flags ;
8686 compress .common .type = pData -> desc .desc [pos_desc + index ].elem .common .type ;
@@ -99,7 +99,12 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
9999 compress .count = loop -> loops ;
100100 compress .extent = loop -> extent ;
101101 compress .disp = end_loop -> first_elem_disp ;
102-
102+ if ( compress .extent == (ptrdiff_t )(compress .blocklen * opal_datatype_basicDatatypes [compress .common .type ]-> size ) ) {
103+ /* The compressed element is contiguous: collapse it into a single large blocklen */
104+ compress .blocklen *= compress .count ;
105+ compress .extent *= compress .count ;
106+ compress .count = 1 ;
107+ }
103108 /**
104109 * The current loop has been compressed and can now be treated as if it
105110 * was a data element. We can now look if it can be fused with last,
@@ -161,26 +166,43 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
161166 }
162167
163168 /* are the two elements compatible: aka they have very similar values and they
164- * can be merged together by increasing the count. This optimizes the memory
165- * required for storing the datatype description.
169+ * can be merged together by increasing the count, and/or changing the extent.
166170 */
167- if ( ((last .blocklen * opal_datatype_basicDatatypes [last .common .type ]-> size ) ==
168- (current -> blocklen * opal_datatype_basicDatatypes [current -> common .type ]-> size )) &&
169- (current -> disp == (last .disp + (ptrdiff_t )last .count * last .extent )) &&
170- ((current -> count == 1 ) || (last .extent == current -> extent )) ) {
171- last .count += current -> count ;
172- /* find the lowest common denomitaor type */
171+ if ( (last .blocklen * opal_datatype_basicDatatypes [last .common .type ]-> size ) ==
172+ (current -> blocklen * opal_datatype_basicDatatypes [current -> common .type ]-> size ) ) {
173+ ddt_elem_desc_t save = last ; /* safekeep the type and blocklen */
173174 if ( last .common .type != current -> common .type ) {
174175 last .blocklen *= opal_datatype_basicDatatypes [last .common .type ]-> size ;
175176 last .common .type = OPAL_DATATYPE_UINT1 ;
176177 }
177- /* maximize the contiguous pieces */
178- if ( last .extent == (ptrdiff_t )(last .blocklen * opal_datatype_basicDatatypes [last .common .type ]-> size ) ) {
179- last .blocklen *= last .count ;
180- last .count = 1 ;
181- last .extent = last .blocklen * opal_datatype_basicDatatypes [last .common .type ]-> size ;
178+
179+ if ( 1 == last .count ) {
180+ /* we can ignore the extent of the element with count == 1 and merge them together if their displacements match */
181+ if ( 1 == current -> count ) {
182+ last .extent = current -> disp - last .disp ;
183+ last .count ++ ;
184+ continue ;
185+ }
186+ /* can we compute a matching displacement ? */
187+ if ( (last .disp + current -> extent ) == current -> disp ) {
188+ last .extent = current -> extent ;
189+ last .count = current -> count + 1 ;
190+ continue ;
191+ }
182192 }
183- continue ; /* next data */
193+ if ( (last .extent * (ptrdiff_t )last .count + last .disp ) == current -> disp ) {
194+ if ( 1 == current -> count ) {
195+ last .count ++ ;
196+ continue ;
197+ }
198+ if ( last .extent == current -> extent ) {
199+ last .count += current -> count ;
200+ continue ;
201+ }
202+ }
203+ last .blocklen = save .blocklen ;
204+ last .common .type = save .common .type ;
205+ /* try other optimizations */
184206 }
185207 /* are the elements fusionable such that we can fusion the last blocklen of one with the first
186208 * blocklen of the other.
0 commit comments