diff --git a/ompi/mca/mtl/portals4/mtl_portals4.c b/ompi/mca/mtl/portals4/mtl_portals4.c index d0cb7f5269..2d25c8db7d 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.c +++ b/ompi/mca/mtl/portals4/mtl_portals4.c @@ -86,9 +86,11 @@ portals4_init_interface(void) /* Create send and long message (read) portal table entries */ ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, +#if OMPI_MTL_PORTALS4_FLOW_CONTROL + PTL_PT_FLOWCTRL | +#endif PTL_PT_ONLY_USE_ONCE | - PTL_PT_ONLY_TRUNCATE | - PTL_PT_FLOWCTRL, + PTL_PT_ONLY_TRUNCATE, ompi_mtl_portals4.recv_eq_h, REQ_RECV_TABLE_ID, &ompi_mtl_portals4.recv_idx); diff --git a/ompi/mca/mtl/portals4/mtl_portals4.h b/ompi/mca/mtl/portals4/mtl_portals4.h index 2cde423a31..e350d57115 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.h +++ b/ompi/mca/mtl/portals4/mtl_portals4.h @@ -42,23 +42,23 @@ struct mca_mtl_portals4_module_t { mca_mtl_base_module_t base; /* add_procs() can get called multiple times. this prevents multiple calls to portals4_init_interface(). */ - int need_init; + int32_t need_init; /* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */ - int use_logical; + int32_t use_logical; /* Use flow control: 1 (true) : 0 (false) */ - int use_flowctl; + int32_t use_flowctl; /** Eager limit; messages greater than this use a rendezvous protocol */ - unsigned long long eager_limit; + uint64_t eager_limit; /** Size of short message blocks */ - unsigned long long recv_short_size; + uint64_t recv_short_size; /** Number of short message blocks which should be created during startup */ - int recv_short_num; + uint32_t recv_short_num; /** Length of the send event queues */ - int send_queue_size; + uint32_t send_queue_size; /** Length of the receive event queues */ - int recv_queue_size; + uint32_t recv_queue_size; /** Protocol for long message transfer */ enum { eager, rndv } protocol; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index b0ee9ae9b5..892bb67b7f 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -224,7 +224,7 @@ ompi_mtl_portals4_component_open(void) ompi_mtl_portals4.send_eq_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE; - + ompi_mtl_portals4.send_md_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.long_overflow_me_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL; ompi_mtl_portals4.read_idx = (ptl_pt_index_t) ~0UL; @@ -277,6 +277,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, { int ret; ptl_process_t id; + ptl_ni_limits_t actual_limits; if (enable_mpi_threads && ompi_mpi_thread_multiple) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, @@ -298,13 +299,13 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, PTL_NI_LOGICAL | PTL_NI_MATCHING, PTL_PID_ANY, NULL, - NULL, + &actual_limits, &ompi_mtl_portals4.ni_h); else ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_PHYSICAL | PTL_NI_MATCHING, PTL_PID_ANY, NULL, - NULL, + &actual_limits, &ompi_mtl_portals4.ni_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, @@ -313,6 +314,25 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, goto error; } + if (ompi_comm_rank(MPI_COMM_WORLD) == 0) { + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_entries=%d", actual_limits.max_entries); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_unexpected_headers=%d", actual_limits.max_unexpected_headers); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_mds=%d", actual_limits.max_mds); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_eqs=%d", actual_limits.max_eqs); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_cts=%d", actual_limits.max_cts); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_pt_index=%d", actual_limits.max_pt_index); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_iovecs=%d", actual_limits.max_iovecs); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_list_size=%d", actual_limits.max_list_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_triggered_ops=%d", actual_limits.max_triggered_ops); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_msg_size=%ld", actual_limits.max_msg_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_atomic_size=%ld", actual_limits.max_atomic_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_fetch_atomic_size=%ld", actual_limits.max_fetch_atomic_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_waw_ordered_size=%ld", actual_limits.max_waw_ordered_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_war_ordered_size=%ld", actual_limits.max_war_ordered_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_volatile_size=%ld", actual_limits.max_volatile_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "features=%u", actual_limits.features); + } + ret = PtlGetUid(ompi_mtl_portals4.ni_h, &ompi_mtl_portals4.uid); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, @@ -344,6 +364,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, "My nid,pid = %x,%x", id.phys.nid, id.phys.pid)); + ompi_mtl_portals4.base.mtl_max_tag = MTL_PORTALS4_MAX_TAG; return &ompi_mtl_portals4.base; error: diff --git a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c index d6ba01918d..0464015dd2 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c @@ -219,15 +219,16 @@ ompi_mtl_portals4_flowctl_init(void) int ompi_mtl_portals4_flowctl_fini(void) { - PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx); - PtlCTFree(ompi_mtl_portals4.flowctl.trigger_ct_h); PtlMEUnlink(ompi_mtl_portals4.flowctl.trigger_me_h); - PtlCTFree(ompi_mtl_portals4.flowctl.alert_ct_h); + PtlCTFree(ompi_mtl_portals4.flowctl.trigger_ct_h); PtlMEUnlink(ompi_mtl_portals4.flowctl.alert_me_h); - PtlCTFree(ompi_mtl_portals4.flowctl.fanin_ct_h); + PtlCTFree(ompi_mtl_portals4.flowctl.alert_ct_h); PtlMEUnlink(ompi_mtl_portals4.flowctl.fanin_me_h); - PtlCTFree(ompi_mtl_portals4.flowctl.fanout_ct_h); + PtlCTFree(ompi_mtl_portals4.flowctl.fanin_ct_h); PtlMEUnlink(ompi_mtl_portals4.flowctl.fanout_me_h); + PtlCTFree(ompi_mtl_portals4.flowctl.fanout_ct_h); + + PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx); return OMPI_SUCCESS; } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index 448372917d..077f1f8866 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -91,6 +91,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); + ret = PTL_FAIL; goto callback_error; } @@ -118,7 +119,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, to pull the second part of the message. */ ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit, ((msg_length > ptl_request->delivery_len) ? - ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit, + ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit, ev->initiator, ev->hdr_data, ompi_mtl_portals4.eager_limit, @@ -159,6 +160,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); + ret = PTL_FAIL; goto callback_error; } @@ -204,6 +206,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); + ret = PTL_FAIL; goto callback_error; } @@ -285,7 +288,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength, ((msg_length > ptl_request->delivery_len) ? - ptl_request->delivery_len : msg_length) - ev->mlength, + ptl_request->delivery_len : msg_length) - ev->mlength, ev->initiator, ev->hdr_data, ev->mlength, diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c b/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c index e35d709c56..f96c5b1eee 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c @@ -37,6 +37,7 @@ static int ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev, ompi_mtl_portals4_base_request_t* ptl_base_request) { + int ret = OMPI_SUCCESS; ompi_mtl_portals4_recv_short_request_t *ptl_request = (ompi_mtl_portals4_recv_short_request_t*) ptl_base_request; ompi_mtl_portals4_recv_short_block_t *block = ptl_request->block; @@ -59,10 +60,10 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev, opal_list_remove_item(&ompi_mtl_portals4.recv_short_blocks, &block->base); OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); - ompi_mtl_portals4_recv_short_block_free(block); + ret = ompi_mtl_portals4_recv_short_block_free(block); } else { OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); - ompi_mtl_portals4_activate_block(block); + ret = ompi_mtl_portals4_activate_block(block); } break; @@ -85,6 +86,7 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev, break; case PTL_EVENT_AUTO_UNLINK: + block->me_h = PTL_INVALID_HANDLE; #if OMPI_ENABLE_THREAD_MULTIPLE OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex); switch (block->status) { @@ -99,12 +101,12 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev, opal_list_remove_item(&ompi_mtl_portals4.recv_short_blocks, &block->base); OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); - ompi_mtl_portals4_recv_short_block_free(block); + ret = ompi_mtl_portals4_recv_short_block_free(block); } else { OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output, "mtl:portals4 PTL_EVENT_AUTO_UNLINK received after PTL_EVENT_AUTO_FREE")); - ompi_mtl_portals4_activate_block(block); + ret = ompi_mtl_portals4_activate_block(block); } break; @@ -150,7 +152,7 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev, break; } - return OMPI_SUCCESS; + return ret; } @@ -210,6 +212,7 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block) me.uid = ompi_mtl_portals4.uid; me.options = PTL_ME_OP_PUT | + PTL_ME_EVENT_COMM_DISABLE | PTL_ME_MANAGE_LOCAL | PTL_ME_MAY_ALIGN; if (ompi_mtl_portals4.use_logical) { @@ -244,7 +247,8 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block) int ompi_mtl_portals4_recv_short_init(void) { - int i; + int ret = OMPI_SUCCESS; + uint32_t i; OBJ_CONSTRUCT(&ompi_mtl_portals4.short_block_mutex, opal_mutex_t); OBJ_CONSTRUCT(&(ompi_mtl_portals4.recv_short_blocks), opal_list_t); @@ -258,10 +262,10 @@ ompi_mtl_portals4_recv_short_init(void) } opal_list_append(&ompi_mtl_portals4.recv_short_blocks, &block->base); - ompi_mtl_portals4_activate_block(block); + ret = ompi_mtl_portals4_activate_block(block); } - return OMPI_SUCCESS; + return ret; } @@ -269,35 +273,45 @@ int ompi_mtl_portals4_recv_short_fini(void) { opal_list_item_t *item; + int ret = OMPI_SUCCESS; OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex); while (NULL != (item = opal_list_remove_first(&ompi_mtl_portals4.recv_short_blocks))) { ompi_mtl_portals4_recv_short_block_t *block = (ompi_mtl_portals4_recv_short_block_t*) item; - ompi_mtl_portals4_recv_short_block_free(block); + ret = ompi_mtl_portals4_recv_short_block_free(block); + ompi_mtl_portals4.active_recv_short_blocks--; } OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); - return OMPI_SUCCESS; + return ret; } int ompi_mtl_portals4_recv_short_link(int count) { + int ret = OMPI_SUCCESS; int active = ompi_mtl_portals4.active_recv_short_blocks; int i; if (active < count) { for (i = 0 ; i < (count - active) ; ++i) { ompi_mtl_portals4_recv_short_block_t *block = - ompi_mtl_portals4_recv_short_block_alloc(false); + ompi_mtl_portals4_recv_short_block_alloc(true); if (NULL == block) { return OMPI_ERR_OUT_OF_RESOURCE; } - ompi_mtl_portals4_activate_block(block); + OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex); + opal_list_append(&ompi_mtl_portals4.recv_short_blocks, + &block->base); + OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output, + "recv_short_link: total=%d active=%d", + (int) opal_list_get_size(&ompi_mtl_portals4.recv_short_blocks), ompi_mtl_portals4.active_recv_short_blocks)); + OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); + ret = ompi_mtl_portals4_activate_block(block); } } - return OMPI_SUCCESS; + return ret; }