Skip to content

Commit 5585b03

Browse files
authored
Merge pull request #8569 from AboorvaDevarajan/fix_perf_regression
ompi/datatype: Fix performance regression in reduce collective
2 parents 037b40b + 6784d6d commit 5585b03

File tree

3 files changed

+4
-32
lines changed

3 files changed

+4
-32
lines changed

opal/datatype/opal_datatype_pack.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
110110
*(COUNT) -= cando_count;
111111

112112
if(_elem->blocklen < 9) {
113-
if(OPAL_LIKELY(OPAL_SUCCESS == opal_datatype_pack_predefined_element(&_memory, &_packed, cando_count, _elem))) {
113+
if((!(CONVERTOR->flags & CONVERTOR_CUDA)) && OPAL_LIKELY(OPAL_SUCCESS ==
114+
opal_datatype_pack_predefined_element(&_memory, &_packed, cando_count, _elem))) {
114115
goto update_and_return;
115116
}
116117
/* else unrecognized _elem->common.type, use the memcpy path */

opal/datatype/opal_datatype_pack_unpack_predefined.h

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -256,26 +256,6 @@ opal_datatype_unpack_predefined_element( unsigned char** rtn_src,
256256
unsigned char *src = *rtn_src;
257257
unsigned char *dest = *rtn_dest;
258258

259-
#if OPAL_CUDA_SUPPORT
260-
if (opal_cuda_check_bufs(dest, src)) {
261-
return OPAL_ERROR;
262-
}
263-
/*
264-
* For checking if elem contains cuda memory, I think it's mostly okay
265-
* to only check the first element as done above. Although a complete
266-
* MPI datatype could easily be made to span both gpu and system memory,
267-
* I don't think that's true for the individual vector elements that make
268-
* up a datatype's description. The only way I can even conceive of that
269-
* being untrue is if the element has only two entries with a crazy
270-
* extent sized to hit both locations. I don't really think that's
271-
* possible, but I'm checking it anyway below.
272-
*/
273-
if (elem->count == 2 && cando_count >= blocklen &&
274-
(opal_cuda_check_bufs(dest + elem->extent, src)))
275-
{
276-
return OPAL_ERROR;
277-
}
278-
#endif
279259
if ((uintptr_t)src % align ||
280260
(uintptr_t)dest % align ||
281261
(elem->extent % align && cando_count > blocklen))
@@ -415,16 +395,6 @@ opal_datatype_pack_predefined_element( unsigned char** rtn_src,
415395
unsigned char *src = *rtn_src;
416396
unsigned char *dest = *rtn_dest;
417397

418-
#if OPAL_CUDA_SUPPORT
419-
if (opal_cuda_check_bufs(dest, src)) {
420-
return OPAL_ERROR;
421-
}
422-
if (elem->count == 2 && cando_count >= blocklen &&
423-
(opal_cuda_check_bufs(dest, src + elem->extent)))
424-
{
425-
return OPAL_ERROR;
426-
}
427-
#endif
428398
if ((uintptr_t)src % align ||
429399
(uintptr_t)dest % align ||
430400
(elem->extent % align && cando_count > blocklen))

opal/datatype/opal_datatype_unpack.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR,
106106
*(COUNT) -= cando_count;
107107

108108
if( _elem->blocklen < 9 ) {
109-
if(OPAL_LIKELY(OPAL_SUCCESS == opal_datatype_unpack_predefined_element(&_packed, &_memory, cando_count, _elem))) {
109+
if((!(CONVERTOR->flags & CONVERTOR_CUDA)) && OPAL_LIKELY(OPAL_SUCCESS ==
110+
opal_datatype_unpack_predefined_element(&_packed, &_memory, cando_count, _elem))) {
110111
goto update_and_return;
111112
}
112113
/* else unrecognized _elem->common.type, use the memcpy path */

0 commit comments

Comments
 (0)