Skip to content

Commit 34c82c7

Browse files
author
Sergey Oblomov
committed
PML/UCX: reset converter stack on unordered messages
1 parent 74257aa commit 34c82c7

File tree

4 files changed

+20
-0
lines changed

4 files changed

+20
-0
lines changed

ompi/mca/pml/ucx/pml_ucx_datatype.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ static void* pml_ucx_generic_datatype_start_unpack(void *context, void *buffer,
4040

4141
OMPI_DATATYPE_RETAIN(datatype);
4242
convertor->datatype = datatype;
43+
convertor->offset = 0;
4344
opal_convertor_copy_and_prepare_for_recv(ompi_proc_local_proc->super.proc_convertor,
4445
&datatype->super, count, buffer, 0,
4546
&convertor->opal_conv);
@@ -85,8 +86,15 @@ static ucs_status_t pml_ucx_generic_datatype_unpack(void *state, size_t offset,
8586
iov.iov_base = (void*)src;
8687
iov.iov_len = length;
8788

89+
/* if we detected out-of-order message - apply hack: reset datatype stack
90+
* of convertor to allow re-build it on set_position call */
91+
if ((offset != convertor->offset) &&
92+
!(convertor->opal_conv.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) {
93+
opal_convertor_reset_stack_at_beginning(&convertor->opal_conv);
94+
}
8895
opal_convertor_set_position(&convertor->opal_conv, &offset);
8996
opal_convertor_unpack(&convertor->opal_conv, &iov, &iov_count, &length);
97+
convertor->offset = offset + length;
9098
return UCS_OK;
9199
}
92100

ompi/mca/pml/ucx/pml_ucx_datatype.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ struct pml_ucx_convertor {
1717
opal_free_list_item_t super;
1818
ompi_datatype_t *datatype;
1919
opal_convertor_t opal_conv;
20+
size_t offset;
2021
};
2122

2223

opal/datatype/opal_convertor.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,12 @@ int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
409409
}
410410

411411

412+
int32_t opal_convertor_reset_stack_at_beginning( opal_convertor_t* convertor )
413+
{
414+
return opal_convertor_create_stack_at_begining( convertor, opal_datatype_local_sizes );
415+
}
416+
417+
412418
int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
413419
size_t* position )
414420
{

opal/datatype/opal_convertor.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,11 @@ opal_convertor_set_position( opal_convertor_t* convertor,
339339
return opal_convertor_set_position_nocheck( convertor, position );
340340
}
341341

342+
/*
343+
*
344+
*/
345+
OPAL_DECLSPEC int32_t
346+
opal_convertor_reset_stack_at_beginning( opal_convertor_t* convertor );
342347
/*
343348
*
344349
*/

0 commit comments

Comments
 (0)