Skip to content

Commit 60e91e8

Browse files
committed
coll/base: give a boost to ompi_coll_base_sendrecv_nonzero_actual()
Based on current implementation it is faster to use a blocking send than the non-blocking version. Switch the exchange function used in the barrier to use the blocking version combined with the non-blocking version of the receive. This is similar to 223d755
1 parent 3e17e2f commit 60e91e8

File tree

1 file changed

+15
-36
lines changed

1 file changed

+15
-36
lines changed

ompi/mca/coll/base/coll_base_util.c

Lines changed: 15 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -39,54 +39,33 @@ int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
3939
ompi_status_public_t* status )
4040

4141
{ /* post receive first, then send, then waitall... should be fast (I hope) */
42-
int err, line = 0, nreqs = 0;
43-
size_t typesize;
44-
ompi_request_t* reqs[2], **req = reqs;
45-
ompi_status_public_t statuses[2];
42+
int err, line = 0;
43+
size_t rtypesize, stypesize;
44+
ompi_request_t *req;
45+
ompi_status_public_t rstatus;
4646

4747
/* post new irecv */
48-
ompi_datatype_type_size(rdatatype, &typesize);
49-
if (0 != rcount && 0 != typesize) {
48+
ompi_datatype_type_size(rdatatype, &rtypesize);
49+
if (0 != rcount && 0 != rtypesize) {
5050
err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag,
51-
comm, req++));
52-
++nreqs;
51+
comm, &req));
5352
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
5453
}
5554

5655
/* send data to children */
57-
ompi_datatype_type_size(sdatatype, &typesize);
58-
if (0 != scount && 0 != typesize) {
59-
err = MCA_PML_CALL(isend( sendbuf, scount, sdatatype, dest, stag,
60-
MCA_PML_BASE_SEND_STANDARD, comm, req++));
61-
++nreqs;
56+
ompi_datatype_type_size(sdatatype, &stypesize);
57+
if (0 != scount && 0 != stypesize) {
58+
err = MCA_PML_CALL(send( sendbuf, scount, sdatatype, dest, stag,
59+
MCA_PML_BASE_SEND_STANDARD, comm));
6260
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
6361
}
6462

65-
if (0 != nreqs) {
66-
err = ompi_request_wait_all( nreqs, reqs, statuses );
67-
if( MPI_ERR_IN_STATUS == err ) { line = __LINE__;
68-
/* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error
69-
* code that we can propagate up the stack. Instead, look for the real
70-
* error code from the MPI_ERROR in the status.
71-
*/
72-
int err_index = 0;
73-
if( MPI_SUCCESS == statuses[0].MPI_ERROR
74-
|| MPI_ERR_PENDING == statuses[0].MPI_ERROR ) {
75-
err_index = 1;
76-
}
77-
if (MPI_STATUS_IGNORE != status) {
78-
*status = statuses[err_index];
79-
}
80-
err = statuses[err_index].MPI_ERROR;
81-
OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred in the %s"
82-
" stage of ompi_coll_base_sendrecv_zero\n",
83-
__FILE__, line, err, (0 == err_index ? "receive" : "send")));
84-
return err;
85-
}
63+
if (0 != rcount && 0 != rtypesize) {
64+
err = ompi_request_wait( &req, &rstatus);
8665
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
8766

8867
if (MPI_STATUS_IGNORE != status) {
89-
*status = statuses[0];
68+
*status = rstatus;
9069
}
9170
} else {
9271
if( MPI_STATUS_IGNORE != status )
@@ -96,7 +75,7 @@ int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
9675
return (MPI_SUCCESS);
9776

9877
error_handler:
99-
/* Error discovered during the posting of the irecv or isend,
78+
/* Error discovered during the posting of the irecv or send,
10079
* and no status is available.
10180
*/
10281
OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n",

0 commit comments

Comments
 (0)