diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index 4eb7eec0f6d..c80b2b4a58f 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -21,6 +21,8 @@ #include "ompi/mca/mtl/portals4/mtl_portals4.h" +#define REQ_OSC_TABLE_ID 4 + #define OSC_PORTALS4_MB_DATA 0x0000000000000000ULL #define OSC_PORTALS4_MB_CONTROL 0x1000000000000000ULL @@ -51,6 +53,11 @@ struct ompi_osc_portals4_component_t { ptl_size_t matching_atomic_max; ptl_size_t matching_fetch_atomic_max; ptl_size_t matching_atomic_ordered_size; + ptl_size_t ptl_max_msg_size; /* max size given by portals (cf PtlNIInit) */ + bool no_locks; + ptl_uid_t uid; + opal_mutex_t lock; + opal_condition_t cond; opal_free_list_t requests; /* request free list for the r* communication variants */ }; @@ -80,6 +87,7 @@ struct ompi_osc_portals4_module_t { ptl_handle_ni_t ni_h; /* network interface used by this window */ ptl_pt_index_t pt_idx; /* portal table index used by this window (this will be same across window) */ ptl_handle_ct_t ct_h; /* Counting event handle used for completion in this window */ + int ct_link; /* PTL_EVENT_LINK flag */ ptl_handle_md_t md_h; /* memory descriptor describing all of memory used by this window */ ptl_handle_md_t req_md_h; /* memory descriptor with event completion used by this window */ ptl_handle_me_t data_me_h; /* data match list entry (MB are CID | OSC_PORTALS4_MB_DATA) */ @@ -175,7 +183,7 @@ int ompi_osc_portals4_get_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_datatype, int target_rank, - MPI_Aint target_disp, + OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_datatype, struct ompi_op_t *op, @@ -219,7 +227,7 @@ int ompi_osc_portals4_rget_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_datatype, int target_rank, - MPI_Aint target_disp, + OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_datatype, struct ompi_op_t *op, diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index 5e2d677db4a..0e23804298e 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -178,6 +178,97 @@ ompi_osc_portals4_get_dt(struct ompi_datatype_t *dt, ptl_datatype_t *ptl_dt) return 0; } +static ptl_size_t +number_of_fragment(ptl_size_t length, ptl_size_t maxlength) +{ + ptl_size_t nb_frag = length == 0 ? 1 : (length - 1) / maxlength + 1; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d : %ld fragment(s)", __FUNCTION__, __LINE__, nb_frag)); + return nb_frag; +} + +static int +splittedPtlPut(ptl_handle_md_t md_h, + ptl_size_t loc_offset, + ptl_size_t length, + ptl_ack_req_t ack_req, + ptl_process_t target_id, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_b, + ptl_size_t rem_offset, + void *usr_ptr, + ptl_hdr_data_t hdr_data) +{ + ptl_size_t length_sent = 0; + do { + ptl_size_t length_frag; + int ret; + + length_frag = (length > mca_osc_portals4_component.ptl_max_msg_size) ? + mca_osc_portals4_component.ptl_max_msg_size : + length; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "Put size : %lu/%lu, offset:%lu", length_frag, length, length_sent)); + ret = PtlPut(md_h, + loc_offset + length_sent, + length_frag, + ack_req, + target_id, + pt_index, + match_b, + rem_offset + length_sent, + usr_ptr, + hdr_data); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d PtlPut failed with return value %d", + __FUNCTION__, __LINE__, ret); + return ret; + } + length -= length_frag; + length_sent += length_frag; + } while (length); + return PTL_OK; +} + +static int +splittedPtlGet(ptl_handle_md_t md_h, + ptl_size_t loc_offset, + ptl_size_t length, + ptl_process_t target_id, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_b, + ptl_size_t rem_offset, + void *usr_ptr) +{ + ptl_size_t length_submitted = 0; + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, "Get")); + + do { + ptl_size_t length_frag; + int ret; + length_frag = (length > mca_osc_portals4_component.ptl_max_msg_size) ? + mca_osc_portals4_component.ptl_max_msg_size : + length; + ret = PtlGet(md_h, + (ptl_size_t) loc_offset + length_submitted, + length_frag, + target_id, + pt_index, + match_b, + rem_offset + length_submitted, + usr_ptr); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d PtlGet failed with return value %d", + __FUNCTION__, __LINE__, ret); + return ret; + } + length -= length_frag; + length_submitted += length_frag; + } while (length); + return PTL_OK; +} int ompi_osc_portals4_rput(const void *origin_addr, @@ -195,13 +286,13 @@ ompi_osc_portals4_rput(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; size_t offset; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "rput: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", + "rput: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, (unsigned long) win)); @@ -218,22 +309,30 @@ ompi_osc_portals4_rput(const void *origin_addr, "MPI_Rput: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected = 1; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } length *= origin_count; - ret = PtlPut(module->req_md_h, - (ptl_size_t) origin_addr, + request->ops_expected = number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size); + opal_atomic_add_64(&module->opcount, request->ops_expected); + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Put", __FUNCTION__, __LINE__)); + + ret = splittedPtlPut(module->req_md_h, + (ptl_size_t) origin_addr + origin_lb, length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset, + offset + target_lb, request, 0); if (OMPI_SUCCESS != ret) { @@ -262,13 +361,13 @@ ompi_osc_portals4_rget(void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; size_t offset; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", + "rget: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, (unsigned long) win)); @@ -285,21 +384,28 @@ ompi_osc_portals4_rget(void *origin_addr, "MPI_Rget: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected = 1; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } length *= origin_count; - ret = PtlGet(module->req_md_h, - (ptl_size_t) origin_addr, + request->ops_expected = number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size); + opal_atomic_add_64(&module->opcount, request->ops_expected); + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); + ret = splittedPtlGet(module->req_md_h, + (ptl_size_t) origin_addr + origin_lb, length, peer, module->pt_idx, module->match_bits, - offset, + offset + target_lb, request); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); @@ -328,15 +434,15 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "raccumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s 0x%lx", + "raccumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); @@ -356,7 +462,12 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, } else { ptl_size_t md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; @@ -368,35 +479,48 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, do { size_t msg_length = MIN(module->atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected++; if (MPI_REPLACE == op) { - ret = PtlPut(module->req_md_h, - md_offset + sent, + request->ops_expected += number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size); + opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Put", __FUNCTION__, __LINE__)); + ret = splittedPtlPut(module->req_md_h, + md_offset + sent + origin_lb, msg_length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, request, 0); } else { + request->ops_expected++; + opal_atomic_add_64(&module->opcount, 1); ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Raccumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; - + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Raccumulate: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Atomic", __FUNCTION__, __LINE__)); ret = PtlAtomic(module->req_md_h, - offset + sent, + offset + sent + origin_lb, msg_length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, request, 0, ptl_op, @@ -422,7 +546,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_dt, int target, - MPI_Aint target_disp, + OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -434,17 +558,17 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb, result_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", + "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, (unsigned long) result_addr, result_count, result_dt->name, - target, (int) target_disp, + target, (unsigned long) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); @@ -468,16 +592,29 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, if (MPI_REPLACE == op) { ptl_size_t result_md_offset, origin_md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } - ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } + ret = ompi_datatype_type_lb(result_dt, &result_lb); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + + ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Rget_accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } length *= origin_count; result_md_offset = (ptl_size_t) result_addr; @@ -489,15 +626,17 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); request->ops_expected++; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->req_md_h, - result_md_offset + sent, + result_md_offset + sent + result_lb, module->md_h, - origin_md_offset + sent, + origin_md_offset + sent + origin_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, request, 0, NULL, @@ -508,7 +647,12 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, } else if (MPI_NO_OP == op) { ptl_size_t md_offset; - ret = ompi_datatype_type_size(target_dt, &length); + ret = ompi_datatype_get_extent(target_dt, &target_lb, &length); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(result_dt, &result_lb); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; @@ -520,23 +664,34 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected++; - - ret = PtlGet(module->req_md_h, - md_offset + sent, + opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size)); + request->ops_expected += number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); + ret = splittedPtlGet(module->req_md_h, + md_offset + sent + result_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, request); sent += msg_length; } while (sent < length); } else { ptl_size_t result_md_offset, origin_md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(result_dt, &result_lb); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; @@ -547,10 +702,18 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, origin_md_offset = (ptl_size_t) origin_addr; ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Rget_accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Rget_accumulate: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); @@ -558,15 +721,17 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); request->ops_expected++; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d FetchAtomic", __FUNCTION__, __LINE__)); ret = PtlFetchAtomic(module->req_md_h, - result_md_offset + sent, + result_md_offset + sent + result_lb, module->md_h, - origin_md_offset + sent, + origin_md_offset + sent + origin_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, request, 0, ptl_op, @@ -598,13 +763,13 @@ ompi_osc_portals4_put(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; size_t offset; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", + "put: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, (unsigned long) win)); @@ -616,20 +781,26 @@ ompi_osc_portals4_put(const void *origin_addr, "MPI_Put: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - (void)opal_atomic_add_64(&module->opcount, 1); - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { return ret; } length *= origin_count; - ret = PtlPut(module->md_h, - (ptl_size_t) origin_addr, + opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Put", __FUNCTION__, __LINE__)); + ret = splittedPtlPut(module->md_h, + (ptl_size_t) origin_addr + origin_lb, length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset, + offset + target_lb, NULL, 0); if (OMPI_SUCCESS != ret) { @@ -655,13 +826,13 @@ ompi_osc_portals4_get(void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; size_t offset; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "get: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", + "get: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, (unsigned long) win)); @@ -673,19 +844,25 @@ ompi_osc_portals4_get(void *origin_addr, "MPI_Get: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - (void)opal_atomic_add_64(&module->opcount, 1); - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { return ret; } length *= origin_count; - ret = PtlGet(module->md_h, - (ptl_size_t) origin_addr, + opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); + ret = splittedPtlGet(module->md_h, + (ptl_size_t) origin_addr + origin_lb, length, peer, module->pt_idx, module->match_bits, - offset, + offset + target_lb, NULL); if (OMPI_SUCCESS != ret) { return ret; @@ -711,15 +888,15 @@ ompi_osc_portals4_accumulate(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", + "accumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); @@ -734,7 +911,11 @@ ompi_osc_portals4_accumulate(const void *origin_addr, } else { ptl_size_t md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { return ret; } @@ -745,34 +926,47 @@ ompi_osc_portals4_accumulate(const void *origin_addr, do { size_t msg_length = MIN(module->atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); if (MPI_REPLACE == op) { - ret = PtlPut(module->md_h, - md_offset + sent, + opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Put", __FUNCTION__, __LINE__)); + ret = splittedPtlPut(module->md_h, + md_offset + sent + origin_lb, msg_length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, NULL, 0); } else { + (void)opal_atomic_add_64(&module->opcount, 1); ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; - + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Accumulate: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Atomic", __FUNCTION__, __LINE__)); ret = PtlAtomic(module->md_h, - md_offset + sent, + md_offset + sent + origin_lb, msg_length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, NULL, 0, ptl_op, @@ -797,7 +991,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_dt, int target, - MPI_Aint target_disp, + OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -807,17 +1001,17 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb, result_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", + "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, (unsigned long) result_addr, result_count, result_dt->name, - target, (int) target_disp, + target, (unsigned long) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); @@ -836,14 +1030,24 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, if (MPI_REPLACE == op) { ptl_size_t result_md_offset, origin_md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); if (OMPI_SUCCESS != ret) { return ret; } - ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { return ret; } + ret = ompi_datatype_type_lb(result_dt, &result_lb); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Get_accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } length *= origin_count; result_md_offset = (ptl_size_t) result_addr; @@ -854,15 +1058,17 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->md_h, - result_md_offset + sent, + result_md_offset + sent + result_lb, module->md_h, - origin_md_offset + sent, + origin_md_offset + sent + origin_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, NULL, 0, NULL, @@ -873,7 +1079,11 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, } else if (MPI_NO_OP == op) { ptl_size_t md_offset; - ret = ompi_datatype_type_size(target_dt, &length); + ret = ompi_datatype_get_extent(target_dt, &target_lb, &length); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(result_dt, &result_lb); if (OMPI_SUCCESS != ret) { return ret; } @@ -884,22 +1094,31 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); - - ret = PtlGet(module->md_h, - md_offset + sent, + opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); + ret = splittedPtlGet(module->md_h, + md_offset + sent + result_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, NULL); sent += msg_length; } while (sent < length); } else { ptl_size_t result_md_offset, origin_md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(result_dt, &result_lb); if (OMPI_SUCCESS != ret) { return ret; } @@ -909,26 +1128,35 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, origin_md_offset = (ptl_size_t) origin_addr; ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Get_accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; - + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Get_accumulate: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d FetchAtomic", __FUNCTION__, __LINE__)); ret = PtlFetchAtomic(module->md_h, - result_md_offset + sent, + result_md_offset + sent + result_lb, module->md_h, - origin_md_offset + sent, + origin_md_offset + sent + origin_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, NULL, 0, ptl_op, @@ -964,15 +1192,19 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr, ptl_size_t result_md_offset, origin_md_offset; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %d, 0x%lx", + "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %lu, 0x%lx", (unsigned long) origin_addr, (unsigned long) compare_addr, (unsigned long) result_addr, - dt->name, target, (int) target_disp, + dt->name, target, (unsigned long) target_disp, (unsigned long) win)); ret = ompi_osc_portals4_get_dt(dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Compare_and_swap: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } offset = get_displacement(module, target) * target_disp; @@ -986,6 +1218,8 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->md_h, result_md_offset, module->md_h, @@ -1027,15 +1261,19 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, ptl_datatype_t ptl_dt; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %d, %s, 0x%lx", + "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %lu, %s, 0x%lx", (unsigned long) origin_addr, (unsigned long) result_addr, - dt->name, target, (int) target_disp, + dt->name, target, (unsigned long) target_disp, op->o_name, (unsigned long) win)); ret = ompi_osc_portals4_get_dt(dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Fetch_and_op: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } offset = get_displacement(module, target) * target_disp; @@ -1044,14 +1282,15 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, assert(length <= module->fetch_atomic_max); - (void)opal_atomic_add_64(&module->opcount, 1); - if (MPI_REPLACE == op) { ptl_size_t result_md_offset, origin_md_offset; result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; + (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->md_h, result_md_offset, module->md_h, @@ -1071,7 +1310,10 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, md_offset = (ptl_size_t) result_addr; - ret = PtlGet(module->md_h, + opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); + ret = splittedPtlGet(module->md_h, md_offset, length, peer, @@ -1081,13 +1323,20 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, NULL); } else { ptl_size_t result_md_offset, origin_md_offset; + (void)opal_atomic_add_64(&module->opcount, 1); ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Fetch_and_op: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d FetchAtomic", __FUNCTION__, __LINE__)); ret = PtlFetchAtomic(module->md_h, result_md_offset, module->md_h, diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index db2684765e7..889b20e8255 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -198,7 +198,7 @@ progress_callback(void) "%s:%d: PtlEQGet reported dropped event", __FILE__, __LINE__); goto process; - } else if (PTL_EQ_EMPTY) { + } else if (PTL_EQ_EMPTY == ret) { return 0; } else { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -218,6 +218,13 @@ progress_callback(void) count++; if (NULL != ev.user_ptr) { + /* be sure that we receive the PTL_EVENT_LINK */ + if (ev.type == PTL_EVENT_LINK) { + *(int *)ev.user_ptr = *(int *)ev.user_ptr + 1; + opal_condition_broadcast(&mca_osc_portals4_component.cond); + continue; + } + req = (ompi_osc_portals4_request_t*) ev.user_ptr; opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength); ops = opal_atomic_add_32(&req->ops_committed, 1); @@ -243,7 +250,7 @@ component_open(void) static int component_register(void) { - bool ompi_osc_portals4_no_locks = false; + mca_osc_portals4_component.no_locks = false; (void) mca_base_component_var_register(&mca_osc_portals4_component.super.osc_version, "no_locks", "Enable optimizations available only if MPI_LOCK is " @@ -252,7 +259,19 @@ component_register(void) MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &ompi_osc_portals4_no_locks); + &mca_osc_portals4_component.no_locks); + + mca_osc_portals4_component.ptl_max_msg_size = PTL_SIZE_MAX; + (void) mca_base_component_var_register(&mca_osc_portals4_component.super.osc_version, + "max_msg_size", + "Max size supported by portals4 (above that, a message is cut into messages less than that size)", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, + NULL, + 0, + 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_osc_portals4_component.ptl_max_msg_size); return OMPI_SUCCESS; } @@ -287,6 +306,11 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) /* BWB: FIX ME: Need to make sure our ID matches with the MTL... */ + if (mca_osc_portals4_component.ptl_max_msg_size > actual.max_msg_size) + mca_osc_portals4_component.ptl_max_msg_size = actual.max_msg_size; + OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, + "max_size = %lu", mca_osc_portals4_component.ptl_max_msg_size)); + mca_osc_portals4_component.matching_atomic_max = actual.max_atomic_size; mca_osc_portals4_component.matching_fetch_atomic_max = actual.max_fetch_atomic_size; mca_osc_portals4_component.matching_atomic_ordered_size = @@ -305,7 +329,7 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) ret = PtlPTAlloc(mca_osc_portals4_component.matching_ni_h, 0, mca_osc_portals4_component.matching_eq_h, - 4, + REQ_OSC_TABLE_ID, &mca_osc_portals4_component.matching_pt_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -314,6 +338,13 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) return ret; } + if (mca_osc_portals4_component.matching_pt_idx != REQ_OSC_TABLE_ID) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n", + __FILE__, __LINE__, mca_osc_portals4_component.matching_pt_idx); + return ret; + } + OBJ_CONSTRUCT(&mca_osc_portals4_component.requests, opal_free_list_t); ret = opal_free_list_init (&mca_osc_portals4_component.requests, sizeof(ompi_osc_portals4_request_t), @@ -353,8 +384,18 @@ component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct ompi_info_t *info, int flavor) { + int ret; + if (MPI_WIN_FLAVOR_SHARED == flavor) return -1; + ret = PtlGetUid(mca_osc_portals4_component.matching_ni_h, &mca_osc_portals4_component.uid); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlGetUid failed: %d\n", + __FILE__, __LINE__, ret); + return OMPI_ERROR; + } + return 20; } @@ -475,7 +516,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit me.length = size; } me.ct_handle = PTL_CT_NONE; - me.uid = PTL_UID_ANY; + me.uid = mca_osc_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; @@ -486,7 +527,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->pt_idx, &me, PTL_PRIORITY_LIST, - NULL, + &module->ct_link, &module->data_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -498,7 +539,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit me.start = &module->state; me.length = sizeof(module->state); me.ct_handle = PTL_CT_NONE; - me.uid = PTL_UID_ANY; + me.uid = mca_osc_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; @@ -509,7 +550,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->pt_idx, &me, PTL_PRIORITY_LIST, - NULL, + &module->ct_link, &module->control_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -557,6 +598,13 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit PtlAtomicSync(); /* Make sure that everyone's ready to receive. */ + OPAL_THREAD_LOCK(&mca_osc_portals4_component.lock); + while (module->ct_link != 2) { + opal_condition_wait(&mca_osc_portals4_component.cond, + &mca_osc_portals4_component.lock); + } + OPAL_THREAD_UNLOCK(&mca_osc_portals4_component.lock); + module->comm->c_coll.coll_barrier(module->comm, module->comm->c_coll.coll_barrier_module); @@ -601,6 +649,7 @@ ompi_osc_portals4_free(struct ompi_win_t *win) module->comm->c_coll.coll_barrier_module); /* cleanup */ + PtlMEUnlink(module->control_me_h); PtlMEUnlink(module->data_me_h); PtlMDRelease(module->md_h); PtlMDRelease(module->req_md_h); diff --git a/ompi/mca/osc/portals4/osc_portals4_request.h b/ompi/mca/osc/portals4/osc_portals4_request.h index ef7e7b425f3..ae1be6f44d2 100644 --- a/ompi/mca/osc/portals4/osc_portals4_request.h +++ b/ompi/mca/osc/portals4/osc_portals4_request.h @@ -33,6 +33,7 @@ OBJ_CLASS_DECLARATION(ompi_osc_portals4_request_t); req->super.req_mpi_object.win = win; \ req->super.req_complete = false; \ req->super.req_state = OMPI_REQUEST_ACTIVE; \ + req->super.req_status.MPI_ERROR = MPI_SUCCESS; \ req->ops_expected = 0; \ req->ops_committed = 0; \ } while (0)