diff --git a/ompi/mca/mtl/portals4/mtl_portals4.h b/ompi/mca/mtl/portals4/mtl_portals4.h index bfbb53f6b42..52b21b9354d 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.h +++ b/ompi/mca/mtl/portals4/mtl_portals4.h @@ -73,6 +73,7 @@ struct mca_mtl_portals4_module_t { /* free list of rendezvous get fragments */ opal_free_list_t fl_rndv_get_frag; + int get_retransmit_timeout; /** Network interface handle for matched interface */ ptl_handle_ni_t ni_h; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index 9b36b091acd..915e3e2fc74 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -202,6 +202,16 @@ ompi_mtl_portals4_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_portals4.max_msg_size_mtl); + ompi_mtl_portals4.get_retransmit_timeout=10000; + (void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version, + "get_retransmit_timeout", + "PtlGET retransmission timeout in usec", + MCA_BASE_VAR_TYPE_INT, + NULL, 0, 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_mtl_portals4.get_retransmit_timeout); + OBJ_RELEASE(new_enum); if (0 > ret) { return OMPI_ERR_NOT_SUPPORTED; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index 607a5c96271..230b3785532 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -27,6 +27,7 @@ #include "ompi/mca/mtl/base/base.h" #include "ompi/mca/mtl/base/mtl_base_datatype.h" #include "ompi/message/message.h" +#include "opal/mca/timer/base/base.h" #include "mtl_portals4.h" #include "mtl_portals4_endpoint.h" @@ -81,6 +82,7 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target, frag->frag_remote_offset = remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl; frag->event_callback = ompi_mtl_portals4_rndv_get_frag_progress; + frag->frag_abs_timeout_usec = 0; OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d, size %ld) send", i + 1, frag_count, frag->frag_length)); @@ -322,17 +324,41 @@ ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev, ompi_mtl_portals4_recv_request_t* ptl_request = (ompi_mtl_portals4_recv_request_t*) rndv_get_frag->request; - assert(ev->type==PTL_EVENT_REPLY); + assert(PTL_EVENT_REPLY == ev->type); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Recv %lu (0x%lx) got reply event", ptl_request->opcount, ptl_request->hdr_data)); + if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); + if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_DROPPED)) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "PTL_EVENT_REPLY with ni_fail_type: %u => cannot retry", + (uint32_t)ev->ni_fail_type); + ret = PTL_FAIL; + goto callback_error; + } + + if (0 == rndv_get_frag->frag_abs_timeout_usec) { + /* this is the first retry of the frag. start the timer. */ + /* instead of recording the start time, record the end time + * and avoid addition on each retry. */ + rndv_get_frag->frag_abs_timeout_usec = opal_timer_base_get_usec() + ompi_mtl_portals4.get_retransmit_timeout; + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "setting frag timeout at %lu", + rndv_get_frag->frag_abs_timeout_usec); + } else if (opal_timer_base_get_usec() >= rndv_get_frag->frag_abs_timeout_usec) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "timeout retrying GET"); + ret = PTL_FAIL; + goto callback_error; + } + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Rendezvous Get Failed: Reissuing frag #%u", rndv_get_frag->frag_num)); diff --git a/ompi/mca/mtl/portals4/mtl_portals4_request.h b/ompi/mca/mtl/portals4/mtl_portals4_request.h index b7ae187d6ef..c7e3c31e47a 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_request.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_request.h @@ -22,6 +22,7 @@ #include "opal/datatype/opal_convertor.h" #include "ompi/mca/mtl/mtl.h" +#include "opal/mca/timer/base/base.h" struct ompi_mtl_portals4_message_t; struct ompi_mtl_portals4_pending_request_t; @@ -93,6 +94,8 @@ struct ompi_mtl_portals4_rndv_get_frag_t { ptl_process_t frag_target; ptl_hdr_data_t frag_match_bits; ptl_size_t frag_remote_offset; + /* the absolute time at which this frag times out */ + opal_timer_t frag_abs_timeout_usec; int (*event_callback)(ptl_event_t *ev, struct ompi_mtl_portals4_rndv_get_frag_t*);