Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit dde6702

Browse files
committed
mtl/ofi: Handle -FI_EAGAIN on send and recv operations.
(cherry picked from commit open-mpi/ompi@7adb9b7)
1 parent e0b85ea commit dde6702

File tree

1 file changed

+105
-93
lines changed

1 file changed

+105
-93
lines changed

ompi/mca/mtl/ofi/mtl_ofi.h

Lines changed: 105 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@
3838
#include "mtl_ofi_endpoint.h"
3939
#include "mtl_ofi_compat.h"
4040

41+
#define FI_RETRY_UNTIL_DONE(FUNC) \
42+
do { \
43+
do { \
44+
ret = FUNC; \
45+
if(OPAL_LIKELY(0 == ret)) {break;} \
46+
} while(-FI_EAGAIN == ret); \
47+
} while(0);
48+
4149
BEGIN_C_DECLS
4250

4351
extern mca_mtl_ofi_module_t ompi_mtl_ofi;
@@ -56,7 +64,8 @@ int ompi_mtl_ofi_progress_no_inline(void);
5664
__opal_attribute_always_inline__ static inline int
5765
ompi_mtl_ofi_progress(void)
5866
{
59-
int ret, count = 0;
67+
ssize_t ret;
68+
int count = 0;
6069
struct fi_cq_tagged_entry wc = { 0 };
6170
struct fi_cq_err_entry error = { 0 };
6271
ompi_mtl_ofi_request_t *ofi_req = NULL;
@@ -215,10 +224,10 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
215224
mca_pml_base_send_mode_t mode,
216225
ompi_mtl_ofi_request_t *ofi_req)
217226
{
218-
int ret;
227+
int ompi_ret;
219228
void *start;
220229
size_t length;
221-
ssize_t ret_length;
230+
ssize_t ret;
222231
bool free_after;
223232
uint64_t match_bits;
224233
ompi_proc_t *ompi_proc = NULL;
@@ -228,8 +237,8 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
228237
ompi_proc = ompi_comm_peer_lookup(comm, dest);
229238
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
230239

231-
ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);
232-
if (OMPI_SUCCESS != ret) return ret;
240+
ompi_ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);
241+
if (OMPI_SUCCESS != ompi_ret) return ompi_ret;
233242

234243
ofi_req->buffer = (free_after) ? start : NULL;
235244
ofi_req->length = length;
@@ -245,19 +254,18 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
245254
ofi_req->completion_count = 2;
246255
MTL_OFI_SET_SEND_BITS(match_bits, comm->c_contextid,
247256
comm->c_my_rank, tag, MTL_OFI_SYNC_SEND);
248-
ret_length = fi_trecv(ompi_mtl_ofi.ep,
249-
NULL,
250-
0,
251-
NULL,
252-
endpoint->peer_fiaddr,
253-
match_bits | MTL_OFI_SYNC_SEND_ACK,
254-
0, /* Exact match, no ignore bits */
255-
(void *) &ack_req->ctx);
256-
if (OPAL_UNLIKELY(ret_length < 0)) {
257+
FI_RETRY_UNTIL_DONE(fi_trecv(ompi_mtl_ofi.ep,
258+
NULL,
259+
0,
260+
NULL,
261+
endpoint->peer_fiaddr,
262+
match_bits | MTL_OFI_SYNC_SEND_ACK,
263+
0, /* Exact match, no ignore bits */
264+
(void *) &ack_req->ctx));
265+
if (OPAL_UNLIKELY(0 > ret)) {
257266
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
258267
"%s:%d: fi_trecv failed: %s(%zd)",
259-
__FILE__, __LINE__,
260-
strerror(errno), ret_length);
268+
__FILE__, __LINE__, fi_strerror(-ret), ret);
261269
return ompi_mtl_ofi_get_error(ret);
262270
}
263271
} else {
@@ -267,32 +275,31 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
267275
}
268276

269277
if (ompi_mtl_ofi.max_inject_size >= length) {
270-
ret_length = fi_tinject(ompi_mtl_ofi.ep,
271-
start,
272-
length,
273-
endpoint->peer_fiaddr,
274-
match_bits);
275-
if (OPAL_UNLIKELY(0 > ret_length)) {
278+
FI_RETRY_UNTIL_DONE(fi_tinject(ompi_mtl_ofi.ep,
279+
start,
280+
length,
281+
endpoint->peer_fiaddr,
282+
match_bits));
283+
if (OPAL_UNLIKELY(0 > ret)) {
276284
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
277-
"%s:%d: fi_tinject failed: %zd",
278-
__FILE__, __LINE__, ret_length);
285+
"%s:%d: fi_tinject failed: %s(%zd)",
286+
__FILE__, __LINE__, fi_strerror(-ret), ret);
279287
return ompi_mtl_ofi_get_error(ret);
280288
}
281289

282290
ofi_req->event_callback(NULL,ofi_req);
283291
} else {
284-
ret_length = fi_tsend(ompi_mtl_ofi.ep,
285-
start,
286-
length,
287-
NULL,
288-
endpoint->peer_fiaddr,
289-
match_bits,
290-
(void *) &ofi_req->ctx);
291-
292-
if (OPAL_UNLIKELY(0 > ret_length)) {
292+
FI_RETRY_UNTIL_DONE(fi_tsend(ompi_mtl_ofi.ep,
293+
start,
294+
length,
295+
NULL,
296+
endpoint->peer_fiaddr,
297+
match_bits,
298+
(void *) &ofi_req->ctx));
299+
if (OPAL_UNLIKELY(0 > ret)) {
293300
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
294-
"%s:%d: fi_tsend failed: %zd",
295-
__FILE__, __LINE__, ret_length);
301+
"%s:%d: fi_tsend failed: %s(%zd)",
302+
__FILE__, __LINE__, fi_strerror(-ret), ret);
296303
return ompi_mtl_ofi_get_error(ret);
297304
}
298305
}
@@ -388,8 +395,8 @@ __opal_attribute_always_inline__ static inline int
388395
ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
389396
ompi_mtl_ofi_request_t *ofi_req)
390397
{
391-
int ret;
392-
ssize_t ret_length;
398+
int ompi_ret;
399+
ssize_t ret;
393400
ompi_proc_t *ompi_proc = NULL;
394401
mca_mtl_ofi_endpoint_t *endpoint = NULL;
395402
int src;
@@ -419,14 +426,14 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
419426
* Unpack data into recv buffer if necessary.
420427
*/
421428
if (OPAL_UNLIKELY(ofi_req->buffer)) {
422-
ret = ompi_mtl_datatype_unpack(ofi_req->convertor,
423-
ofi_req->buffer,
424-
wc->len);
425-
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
429+
ompi_ret = ompi_mtl_datatype_unpack(ofi_req->convertor,
430+
ofi_req->buffer,
431+
wc->len);
432+
if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) {
426433
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
427434
"%s:%d: ompi_mtl_datatype_unpack failed: %d",
428-
__FILE__, __LINE__, ret);
429-
status->MPI_ERROR = ret;
435+
__FILE__, __LINE__, ompi_ret);
436+
status->MPI_ERROR = ompi_ret;
430437
}
431438
}
432439

@@ -456,18 +463,17 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
456463
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
457464
ofi_req->remote_addr = endpoint->peer_fiaddr;
458465
}
459-
ret_length = fi_tsend(ompi_mtl_ofi.ep,
460-
NULL,
461-
0,
462-
NULL,
463-
ofi_req->remote_addr,
464-
wc->tag | MTL_OFI_SYNC_SEND_ACK,
465-
(void *) &ofi_req->ctx);
466-
467-
if (OPAL_UNLIKELY(ret_length < 0)) {
466+
FI_RETRY_UNTIL_DONE(fi_tsend(ompi_mtl_ofi.ep,
467+
NULL,
468+
0,
469+
NULL,
470+
ofi_req->remote_addr,
471+
wc->tag | MTL_OFI_SYNC_SEND_ACK,
472+
(void *) &ofi_req->ctx));
473+
if (OPAL_UNLIKELY(0 > ret)) {
468474
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
469-
"%s:%d: fi_tsend failed: %zd",
470-
__FILE__, __LINE__, ret_length);
475+
"%s:%d: fi_tsend failed: %s(%zd)",
476+
__FILE__, __LINE__, fi_strerror(-ret), ret);
471477
status->MPI_ERROR = OMPI_ERROR;
472478
}
473479
} else {
@@ -513,8 +519,8 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl,
513519
struct opal_convertor_t *convertor,
514520
mca_mtl_request_t *mtl_request)
515521
{
516-
int ret = OMPI_SUCCESS;
517-
ssize_t ret_length;
522+
int ompi_ret = OMPI_SUCCESS;
523+
ssize_t ret;
518524
uint64_t match_bits, mask_bits;
519525
fi_addr_t remote_addr;
520526
ompi_proc_t *ompi_proc = NULL;
@@ -534,9 +540,12 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl,
534540

535541
MTL_OFI_SET_RECV_BITS(match_bits, mask_bits, comm->c_contextid, src, tag);
536542

537-
ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after);
538-
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
539-
return ret;
543+
ompi_ret = ompi_mtl_datatype_recv_buf(convertor,
544+
&start,
545+
&length,
546+
&free_after);
547+
if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) {
548+
return ompi_ret;
540549
}
541550

542551
ofi_req->type = OMPI_MTL_OFI_RECV;
@@ -551,22 +560,21 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl,
551560
ofi_req->remote_addr = remote_addr;
552561
ofi_req->match_bits = match_bits;
553562

554-
ret_length = fi_trecv(ompi_mtl_ofi.ep,
555-
start,
556-
length,
557-
NULL,
558-
remote_addr,
559-
match_bits,
560-
mask_bits,
561-
(void *)&ofi_req->ctx);
562-
563-
if (OPAL_UNLIKELY(ret_length < 0)) {
563+
FI_RETRY_UNTIL_DONE(fi_trecv(ompi_mtl_ofi.ep,
564+
start,
565+
length,
566+
NULL,
567+
remote_addr,
568+
match_bits,
569+
mask_bits,
570+
(void *)&ofi_req->ctx));
571+
if (OPAL_UNLIKELY(0 > ret)) {
564572
if (NULL != ofi_req->buffer) {
565573
free(ofi_req->buffer);
566574
}
567575
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
568576
"%s:%d: fi_trecv failed: %s(%zd)",
569-
__FILE__, __LINE__, strerror(errno), ret_length);
577+
__FILE__, __LINE__, fi_strerror(-ret), ret);
570578
return ompi_mtl_ofi_get_error(ret);
571579
}
572580

@@ -637,12 +645,16 @@ ompi_mtl_ofi_imrecv(struct mca_mtl_base_module_t *mtl,
637645
bool free_after;
638646
struct iovec iov;
639647
struct fi_msg_tagged msg;
640-
int ret;
648+
int ompi_ret;
649+
ssize_t ret;
641650
uint64_t msgflags = FI_CLAIM;
642651

643-
ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after);
644-
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
645-
return ret;
652+
ompi_ret = ompi_mtl_datatype_recv_buf(convertor,
653+
&start,
654+
&length,
655+
&free_after);
656+
if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) {
657+
return ompi_ret;
646658
}
647659

648660
ofi_req->type = OMPI_MTL_OFI_RECV;
@@ -668,12 +680,12 @@ ompi_mtl_ofi_imrecv(struct mca_mtl_base_module_t *mtl,
668680
msg.context = (void *)&ofi_req->ctx;
669681
msg.data = 0;
670682

671-
ret = fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags);
672-
if (ret < 0) {
683+
FI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags));
684+
if (OPAL_UNLIKELY(0 > ret)) {
673685
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
674-
"%s:%d: unexpected return code from fi_trecvmsg: %d",
675-
__FILE__, __LINE__, ret);
676-
return ompi_mtl_ofi_get_error(-ret);
686+
"%s:%d: unexpected return code from fi_trecvmsg: %s(%zd)",
687+
__FILE__, __LINE__, fi_strerror(-ret), ret);
688+
return ompi_mtl_ofi_get_error(ret);
677689
}
678690

679691
return OMPI_SUCCESS;
@@ -723,7 +735,7 @@ ompi_mtl_ofi_iprobe(struct mca_mtl_base_module_t *mtl,
723735
mca_mtl_ofi_endpoint_t *endpoint = NULL;
724736
fi_addr_t remote_proc = 0;
725737
uint64_t match_bits, mask_bits;
726-
int ret;
738+
ssize_t ret;
727739
struct fi_msg_tagged msg;
728740
uint64_t msgflags = FI_PEEK;
729741

@@ -761,18 +773,18 @@ ompi_mtl_ofi_iprobe(struct mca_mtl_base_module_t *mtl,
761773
ofi_req.completion_count = 1;
762774
ofi_req.match_state = 0;
763775

764-
ret = fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags);
765-
if (ret < 0 && -FI_ENOMSG == ret) {
776+
FI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags));
777+
if (-FI_ENOMSG == ret) {
766778
/**
767779
* The search request completed but no matching message was found.
768780
*/
769781
*flag = 0;
770782
return OMPI_SUCCESS;
771-
} else if (ret < 0) {
783+
} else if (OPAL_UNLIKELY(0 > ret)) {
772784
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
773-
"%s:%d: unexpected return code from fi_trecvmsg: %d",
774-
__FILE__, __LINE__, ret);
775-
return ompi_mtl_ofi_get_error(-ret);
785+
"%s:%d: fi_trecvmsg failed: %s(%zd)",
786+
__FILE__, __LINE__, fi_strerror(-ret), ret);
787+
return ompi_mtl_ofi_get_error(ret);
776788
}
777789

778790
while (0 < ofi_req.completion_count) {
@@ -803,7 +815,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl,
803815
mca_mtl_ofi_endpoint_t *endpoint = NULL;
804816
fi_addr_t remote_proc = 0;
805817
uint64_t match_bits, mask_bits;
806-
int ret;
818+
ssize_t ret;
807819
struct fi_msg_tagged msg;
808820
uint64_t msgflags = FI_PEEK | FI_CLAIM;
809821

@@ -846,18 +858,18 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl,
846858
ofi_req->completion_count = 1;
847859
ofi_req->match_state = 0;
848860

849-
ret = fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags);
850-
if (ret < 0 && -FI_ENOMSG == ret) {
861+
FI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags));
862+
if (-FI_ENOMSG == ret) {
851863
/**
852864
* The search request completed but no matching message was found.
853865
*/
854866
*matched = 0;
855867
return OMPI_SUCCESS;
856-
} else if (ret < 0) {
868+
} else if (OPAL_UNLIKELY(0 > ret)) {
857869
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
858-
"%s:%d: unexpected return code from fi_trecvmsg: %d",
859-
__FILE__, __LINE__, ret);
860-
return ompi_mtl_ofi_get_error(-ret);
870+
"%s:%d: fi_trecvmsg failed: %s(%zd)",
871+
__FILE__, __LINE__, fi_strerror(-ret), ret);
872+
return ompi_mtl_ofi_get_error(ret);
861873
}
862874

863875
while (0 < ofi_req->completion_count) {

0 commit comments

Comments
 (0)