From a76566c7543a12213167cd1053ed68c3678f1d49 Mon Sep 17 00:00:00 2001 From: Pascal Deveze Date: Thu, 7 Jul 2016 10:34:22 +0200 Subject: [PATCH 01/10] osc/portals4: To allocate a PT, use REQ_OSC_TABLE_ID and test that the right ID is allocated --- ompi/mca/osc/portals4/osc_portals4.h | 2 ++ ompi/mca/osc/portals4/osc_portals4_component.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index 4eb7eec0f6d..b01bd867545 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -21,6 +21,8 @@ #include "ompi/mca/mtl/portals4/mtl_portals4.h" +#define REQ_OSC_TABLE_ID 4 + #define OSC_PORTALS4_MB_DATA 0x0000000000000000ULL #define OSC_PORTALS4_MB_CONTROL 0x1000000000000000ULL diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index db2684765e7..c313f9adc04 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -305,7 +305,7 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) ret = PtlPTAlloc(mca_osc_portals4_component.matching_ni_h, 0, mca_osc_portals4_component.matching_eq_h, - 4, + REQ_OSC_TABLE_ID, &mca_osc_portals4_component.matching_pt_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -314,6 +314,13 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) return ret; } + if (mca_osc_portals4_component.matching_pt_idx != REQ_OSC_TABLE_ID) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n", + __FILE__, __LINE__, mca_osc_portals4_component.matching_pt_idx); + return ret; + } + OBJ_CONSTRUCT(&mca_osc_portals4_component.requests, opal_free_list_t); ret = opal_free_list_init (&mca_osc_portals4_component.requests, sizeof(ompi_osc_portals4_request_t), From 56b36eeb7ea66ce8c986be54c84147192b9294fa Mon Sep 17 00:00:00 2001 From: Pascal Deveze Date: Thu, 7 Jul 2016 10:52:00 +0200 Subject: [PATCH 02/10] osc/portals4: Format of "target_disp" is OPAL_PTRDIFF_TYPE and %lu is the appropriate format to display it. --- ompi/mca/osc/portals4/osc_portals4.h | 4 +-- ompi/mca/osc/portals4/osc_portals4_comm.c | 44 +++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index b01bd867545..88c67b3c591 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -177,7 +177,7 @@ int ompi_osc_portals4_get_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_datatype, int target_rank, - MPI_Aint target_disp, + OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_datatype, struct ompi_op_t *op, @@ -221,7 +221,7 @@ int ompi_osc_portals4_rget_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_datatype, int target_rank, - MPI_Aint target_disp, + OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_datatype, struct ompi_op_t *op, diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index 5e2d677db4a..100cf9fb87c 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -199,9 +199,9 @@ ompi_osc_portals4_rput(const void *origin_addr, size_t offset; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "rput: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", + "rput: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, (unsigned long) win)); @@ -266,9 +266,9 @@ ompi_osc_portals4_rget(void *origin_addr, size_t offset; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", + "rget: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, (unsigned long) win)); @@ -334,9 +334,9 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, ptl_datatype_t ptl_dt; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "raccumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s 0x%lx", + "raccumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); @@ -422,7 +422,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_dt, int target, - MPI_Aint target_disp, + OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -440,11 +440,11 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, ptl_datatype_t ptl_dt; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", + "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, (unsigned long) result_addr, result_count, result_dt->name, - target, (int) target_disp, + target, (unsigned long) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); @@ -602,9 +602,9 @@ ompi_osc_portals4_put(const void *origin_addr, size_t offset; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", + "put: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, (unsigned long) win)); @@ -659,9 +659,9 @@ ompi_osc_portals4_get(void *origin_addr, size_t offset; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "get: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", + "get: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, (unsigned long) win)); @@ -717,9 +717,9 @@ ompi_osc_portals4_accumulate(const void *origin_addr, ptl_datatype_t ptl_dt; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", + "accumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); @@ -797,7 +797,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_dt, int target, - MPI_Aint target_disp, + OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -813,11 +813,11 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, ptl_datatype_t ptl_dt; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", + "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, (unsigned long) result_addr, result_count, result_dt->name, - target, (int) target_disp, + target, (unsigned long) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); @@ -964,11 +964,11 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr, ptl_size_t result_md_offset, origin_md_offset; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %d, 0x%lx", + "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %lu, 0x%lx", (unsigned long) origin_addr, (unsigned long) compare_addr, (unsigned long) result_addr, - dt->name, target, (int) target_disp, + dt->name, target, (unsigned long) target_disp, (unsigned long) win)); ret = ompi_osc_portals4_get_dt(dt, &ptl_dt); @@ -1027,10 +1027,10 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, ptl_datatype_t ptl_dt; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %d, %s, 0x%lx", + "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %lu, %s, 0x%lx", (unsigned long) origin_addr, (unsigned long) result_addr, - dt->name, target, (int) target_disp, + dt->name, target, (unsigned long) target_disp, op->o_name, (unsigned long) win)); From e99e7d08ed651166e337b8c5cd888147605ade57 Mon Sep 17 00:00:00 2001 From: Pascal Deveze Date: Thu, 7 Jul 2016 11:02:07 +0200 Subject: [PATCH 03/10] osc/portals4: For the ME, use the uid from PtlGetUid instead of PTL_UID_ANY --- ompi/mca/osc/portals4/osc_portals4.h | 1 + ompi/mca/osc/portals4/osc_portals4_component.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index 88c67b3c591..d87bdab97de 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -53,6 +53,7 @@ struct ompi_osc_portals4_component_t { ptl_size_t matching_atomic_max; ptl_size_t matching_fetch_atomic_max; ptl_size_t matching_atomic_ordered_size; + ptl_uid_t uid; opal_free_list_t requests; /* request free list for the r* communication variants */ }; diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index c313f9adc04..793892a3641 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -360,8 +360,18 @@ component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct ompi_info_t *info, int flavor) { + int ret; + if (MPI_WIN_FLAVOR_SHARED == flavor) return -1; + ret = PtlGetUid(mca_osc_portals4_component.matching_ni_h, &mca_osc_portals4_component.uid); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlGetUid failed: %d\n", + __FILE__, __LINE__, ret); + return OMPI_ERROR; + } + return 20; } @@ -482,7 +492,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit me.length = size; } me.ct_handle = PTL_CT_NONE; - me.uid = PTL_UID_ANY; + me.uid = mca_osc_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; @@ -505,7 +515,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit me.start = &module->state; me.length = sizeof(module->state); me.ct_handle = PTL_CT_NONE; - me.uid = PTL_UID_ANY; + me.uid = mca_osc_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; From c4181909a40a36e3621c9415cdc19715632d3eda Mon Sep 17 00:00:00 2001 From: Pascal Deveze Date: Thu, 7 Jul 2016 11:17:13 +0200 Subject: [PATCH 04/10] osc/portals4: Be sure that the ME are operationnal (wait for the PTL_EVENT_LINK) --- ompi/mca/osc/portals4/osc_portals4.h | 3 +++ .../mca/osc/portals4/osc_portals4_component.c | 19 +++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index d87bdab97de..ae493ec3a22 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -54,6 +54,8 @@ struct ompi_osc_portals4_component_t { ptl_size_t matching_fetch_atomic_max; ptl_size_t matching_atomic_ordered_size; ptl_uid_t uid; + opal_mutex_t lock; + opal_condition_t cond; opal_free_list_t requests; /* request free list for the r* communication variants */ }; @@ -83,6 +85,7 @@ struct ompi_osc_portals4_module_t { ptl_handle_ni_t ni_h; /* network interface used by this window */ ptl_pt_index_t pt_idx; /* portal table index used by this window (this will be same across window) */ ptl_handle_ct_t ct_h; /* Counting event handle used for completion in this window */ + int ct_link; /* PTL_EVENT_LINK flag */ ptl_handle_md_t md_h; /* memory descriptor describing all of memory used by this window */ ptl_handle_md_t req_md_h; /* memory descriptor with event completion used by this window */ ptl_handle_me_t data_me_h; /* data match list entry (MB are CID | OSC_PORTALS4_MB_DATA) */ diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index 793892a3641..ed95aa7c3b1 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -218,6 +218,13 @@ progress_callback(void) count++; if (NULL != ev.user_ptr) { + /* be sure that we receive the PTL_EVENT_LINK */ + if (ev.type == PTL_EVENT_LINK) { + *(int *)ev.user_ptr = *(int *)ev.user_ptr + 1; + opal_condition_broadcast(&mca_osc_portals4_component.cond); + continue; + } + req = (ompi_osc_portals4_request_t*) ev.user_ptr; opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength); ops = opal_atomic_add_32(&req->ops_committed, 1); @@ -503,7 +510,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->pt_idx, &me, PTL_PRIORITY_LIST, - NULL, + &module->ct_link, &module->data_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -526,7 +533,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->pt_idx, &me, PTL_PRIORITY_LIST, - NULL, + &module->ct_link, &module->control_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -574,6 +581,13 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit PtlAtomicSync(); /* Make sure that everyone's ready to receive. */ + OPAL_THREAD_LOCK(&mca_osc_portals4_component.lock); + while (module->ct_link != 2) { + opal_condition_wait(&mca_osc_portals4_component.cond, + &mca_osc_portals4_component.lock); + } + OPAL_THREAD_UNLOCK(&mca_osc_portals4_component.lock); + module->comm->c_coll.coll_barrier(module->comm, module->comm->c_coll.coll_barrier_module); @@ -618,6 +632,7 @@ ompi_osc_portals4_free(struct ompi_win_t *win) module->comm->c_coll.coll_barrier_module); /* cleanup */ + PtlMEUnlink(module->control_me_h); PtlMEUnlink(module->data_me_h); PtlMDRelease(module->md_h); PtlMDRelease(module->req_md_h); From aa0d687a0a7fe21a6caade2ae1054b35745c62e7 Mon Sep 17 00:00:00 2001 From: Pascal Deveze Date: Fri, 8 Jul 2016 13:34:52 +0200 Subject: [PATCH 05/10] osc/portals4: Display an ouput message if ompi_osc_portals4_get_dt() or ompi_osc_portals4_get_op() returns an error --- ompi/mca/osc/portals4/osc_portals4_comm.c | 75 ++++++++++++++++++----- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index 100cf9fb87c..a9b0f6ddc01 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -384,10 +384,18 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, 0); } else { ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Raccumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Raccumulate: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } ret = PtlAtomic(module->req_md_h, offset + sent, @@ -476,7 +484,9 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); - return ret; + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Rget_accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; } length *= origin_count; @@ -547,10 +557,18 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, origin_md_offset = (ptl_size_t) origin_addr; ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Rget_accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Rget_accumulate: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); @@ -760,10 +778,18 @@ ompi_osc_portals4_accumulate(const void *origin_addr, 0); } else { ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Accumulate: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } ret = PtlAtomic(module->md_h, md_offset + sent, @@ -842,7 +868,9 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, } ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) { - return ret; + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Get_accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; } length *= origin_count; @@ -909,11 +937,18 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, origin_md_offset = (ptl_size_t) origin_addr; ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Get_accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; - + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Get_accumulate: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); @@ -972,7 +1007,11 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr, (unsigned long) win)); ret = ompi_osc_portals4_get_dt(dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Compare_and_swap: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } offset = get_displacement(module, target) * target_disp; @@ -1035,7 +1074,11 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, (unsigned long) win)); ret = ompi_osc_portals4_get_dt(dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Fetch_and_op: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } offset = get_displacement(module, target) * target_disp; @@ -1083,7 +1126,11 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, ptl_size_t result_md_offset, origin_md_offset; ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Fetch_and_op: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; From 025201b45998a25c68a8d3ae23716ea34abb9333 Mon Sep 17 00:00:00 2001 From: Pascal Deveze Date: Fri, 8 Jul 2016 17:16:54 +0200 Subject: [PATCH 06/10] osc/portals4: set the initial value of req_status.MPI_ERROR to MPI_SUCCESS --- ompi/mca/osc/portals4/osc_portals4_request.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ompi/mca/osc/portals4/osc_portals4_request.h b/ompi/mca/osc/portals4/osc_portals4_request.h index ef7e7b425f3..ae1be6f44d2 100644 --- a/ompi/mca/osc/portals4/osc_portals4_request.h +++ b/ompi/mca/osc/portals4/osc_portals4_request.h @@ -33,6 +33,7 @@ OBJ_CLASS_DECLARATION(ompi_osc_portals4_request_t); req->super.req_mpi_object.win = win; \ req->super.req_complete = false; \ req->super.req_state = OMPI_REQUEST_ACTIVE; \ + req->super.req_status.MPI_ERROR = MPI_SUCCESS; \ req->ops_expected = 0; \ req->ops_committed = 0; \ } while (0) From 7aaf16e7fe6c3d7f66e6db16f2f4def783f3a09c Mon Sep 17 00:00:00 2001 From: Pascal Deveze Date: Tue, 12 Jul 2016 14:03:21 +0200 Subject: [PATCH 07/10] osc/portals4: Put/Get splitting because Portals4 may restrict sizes --- ompi/mca/osc/portals4/osc_portals4.h | 1 + ompi/mca/osc/portals4/osc_portals4_comm.c | 185 +++++++++++++++--- .../mca/osc/portals4/osc_portals4_component.c | 17 ++ 3 files changed, 175 insertions(+), 28 deletions(-) diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index ae493ec3a22..b0dea372656 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -53,6 +53,7 @@ struct ompi_osc_portals4_component_t { ptl_size_t matching_atomic_max; ptl_size_t matching_fetch_atomic_max; ptl_size_t matching_atomic_ordered_size; + ptl_size_t ptl_max_msg_size; /* max size given by portals (cf PtlNIInit) */ ptl_uid_t uid; opal_mutex_t lock; opal_condition_t cond; diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index a9b0f6ddc01..7e9fdf070e7 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -178,6 +178,97 @@ ompi_osc_portals4_get_dt(struct ompi_datatype_t *dt, ptl_datatype_t *ptl_dt) return 0; } +static ptl_size_t +number_of_fragment(ptl_size_t length, ptl_size_t maxlength) +{ + ptl_size_t nb_frag = length == 0 ? 1 : (length - 1) / maxlength + 1; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d : %ld fragment(s)", __FUNCTION__, __LINE__, nb_frag)); + return nb_frag; +} + +static int +splittedPtlPut(ptl_handle_md_t md_h, + ptl_size_t loc_offset, + ptl_size_t length, + ptl_ack_req_t ack_req, + ptl_process_t target_id, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_b, + ptl_size_t rem_offset, + void *usr_ptr, + ptl_hdr_data_t hdr_data) +{ + ptl_size_t length_sent = 0; + do { + ptl_size_t length_frag; + int ret; + + length_frag = (length > mca_osc_portals4_component.ptl_max_msg_size) ? + mca_osc_portals4_component.ptl_max_msg_size : + length; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "Put size : %lu/%lu, offset:%lu", length_frag, length, length_sent)); + ret = PtlPut(md_h, + loc_offset + length_sent, + length_frag, + ack_req, + target_id, + pt_index, + match_b, + rem_offset + length_sent, + usr_ptr, + hdr_data); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d PtlPut failed with return value %d", + __FUNCTION__, __LINE__, ret); + return ret; + } + length -= length_frag; + length_sent += length_frag; + } while (length); + return PTL_OK; +} + +static int +splittedPtlGet(ptl_handle_md_t md_h, + ptl_size_t loc_offset, + ptl_size_t length, + ptl_process_t target_id, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_b, + ptl_size_t rem_offset, + void *usr_ptr) +{ + ptl_size_t length_submitted = 0; + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, "Get")); + + do { + ptl_size_t length_frag; + int ret; + length_frag = (length > mca_osc_portals4_component.ptl_max_msg_size) ? + mca_osc_portals4_component.ptl_max_msg_size : + length; + ret = PtlGet(md_h, + (ptl_size_t) loc_offset + length_submitted, + length_frag, + target_id, + pt_index, + match_b, + rem_offset + length_submitted, + usr_ptr); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d PtlGet failed with return value %d", + __FUNCTION__, __LINE__, ret); + return ret; + } + length -= length_frag; + length_submitted += length_frag; + } while (length); + return PTL_OK; +} int ompi_osc_portals4_rput(const void *origin_addr, @@ -218,15 +309,18 @@ ompi_osc_portals4_rput(const void *origin_addr, "MPI_Rput: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected = 1; ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } length *= origin_count; - ret = PtlPut(module->req_md_h, + request->ops_expected = number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size); + opal_atomic_add_64(&module->opcount, request->ops_expected); + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Put", __FUNCTION__, __LINE__)); + + ret = splittedPtlPut(module->req_md_h, (ptl_size_t) origin_addr, length, PTL_ACK_REQ, @@ -285,15 +379,17 @@ ompi_osc_portals4_rget(void *origin_addr, "MPI_Rget: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected = 1; ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } length *= origin_count; - ret = PtlGet(module->req_md_h, + request->ops_expected = number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size); + opal_atomic_add_64(&module->opcount, request->ops_expected); + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); + ret = splittedPtlGet(module->req_md_h, (ptl_size_t) origin_addr, length, peer, @@ -368,11 +464,13 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, do { size_t msg_length = MIN(module->atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected++; if (MPI_REPLACE == op) { - ret = PtlPut(module->req_md_h, + request->ops_expected += number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size); + opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Put", __FUNCTION__, __LINE__)); + ret = splittedPtlPut(module->req_md_h, md_offset + sent, msg_length, PTL_ACK_REQ, @@ -383,6 +481,8 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, request, 0); } else { + request->ops_expected++; + opal_atomic_add_64(&module->opcount, 1); ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) { opal_output(ompi_osc_base_framework.framework_output, @@ -396,7 +496,8 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, "MPI_Raccumulate: operation is not currently supported"); return OMPI_ERR_NOT_SUPPORTED; } - + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Atomic", __FUNCTION__, __LINE__)); ret = PtlAtomic(module->req_md_h, offset + sent, msg_length, @@ -499,6 +600,8 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); request->ops_expected++; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->req_md_h, result_md_offset + sent, module->md_h, @@ -530,10 +633,11 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected++; - - ret = PtlGet(module->req_md_h, + opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size)); + request->ops_expected += number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); + ret = splittedPtlGet(module->req_md_h, md_offset + sent, msg_length, peer, @@ -558,14 +662,14 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) { - opal_output(ompi_osc_base_framework.framework_output, + opal_output(ompi_osc_base_framework.framework_output, "MPI_Rget_accumulate: datatype is not currently supported"); return OMPI_ERR_NOT_SUPPORTED; } ret = ompi_osc_portals4_get_op(op, &ptl_op); if (OMPI_SUCCESS != ret) { - opal_output(ompi_osc_base_framework.framework_output, + opal_output(ompi_osc_base_framework.framework_output, "MPI_Rget_accumulate: operation is not currently supported"); return OMPI_ERR_NOT_SUPPORTED; } @@ -576,6 +680,8 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); request->ops_expected++; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d FetchAtomic", __FUNCTION__, __LINE__)); ret = PtlFetchAtomic(module->req_md_h, result_md_offset + sent, module->md_h, @@ -634,13 +740,15 @@ ompi_osc_portals4_put(const void *origin_addr, "MPI_Put: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - (void)opal_atomic_add_64(&module->opcount, 1); ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { return ret; } length *= origin_count; - ret = PtlPut(module->md_h, + opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Put", __FUNCTION__, __LINE__)); + ret = splittedPtlPut(module->md_h, (ptl_size_t) origin_addr, length, PTL_ACK_REQ, @@ -691,13 +799,15 @@ ompi_osc_portals4_get(void *origin_addr, "MPI_Get: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - (void)opal_atomic_add_64(&module->opcount, 1); ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { return ret; } length *= origin_count; - ret = PtlGet(module->md_h, + opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); + ret = splittedPtlGet(module->md_h, (ptl_size_t) origin_addr, length, peer, @@ -763,10 +873,12 @@ ompi_osc_portals4_accumulate(const void *origin_addr, do { size_t msg_length = MIN(module->atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); if (MPI_REPLACE == op) { - ret = PtlPut(module->md_h, + opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Put", __FUNCTION__, __LINE__)); + ret = splittedPtlPut(module->md_h, md_offset + sent, msg_length, PTL_ACK_REQ, @@ -777,6 +889,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, NULL, 0); } else { + (void)opal_atomic_add_64(&module->opcount, 1); ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) { opal_output(ompi_osc_base_framework.framework_output, @@ -791,6 +904,8 @@ ompi_osc_portals4_accumulate(const void *origin_addr, return OMPI_ERR_NOT_SUPPORTED; } + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Atomic", __FUNCTION__, __LINE__)); ret = PtlAtomic(module->md_h, md_offset + sent, msg_length, @@ -882,6 +997,8 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->md_h, result_md_offset + sent, module->md_h, @@ -912,9 +1029,10 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); - - ret = PtlGet(module->md_h, + opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); + ret = splittedPtlGet(module->md_h, md_offset + sent, msg_length, peer, @@ -955,6 +1073,8 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d FetchAtomic", __FUNCTION__, __LINE__)); ret = PtlFetchAtomic(module->md_h, result_md_offset + sent, module->md_h, @@ -1025,6 +1145,8 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->md_h, result_md_offset, module->md_h, @@ -1087,14 +1209,15 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, assert(length <= module->fetch_atomic_max); - (void)opal_atomic_add_64(&module->opcount, 1); - if (MPI_REPLACE == op) { ptl_size_t result_md_offset, origin_md_offset; result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; + (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->md_h, result_md_offset, module->md_h, @@ -1114,7 +1237,10 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, md_offset = (ptl_size_t) result_addr; - ret = PtlGet(module->md_h, + opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size)); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); + ret = splittedPtlGet(module->md_h, md_offset, length, peer, @@ -1124,6 +1250,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, NULL); } else { ptl_size_t result_md_offset, origin_md_offset; + (void)opal_atomic_add_64(&module->opcount, 1); ret = ompi_osc_portals4_get_op(op, &ptl_op); if (OMPI_SUCCESS != ret) { @@ -1135,6 +1262,8 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d FetchAtomic", __FUNCTION__, __LINE__)); ret = PtlFetchAtomic(module->md_h, result_md_offset, module->md_h, diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index ed95aa7c3b1..15fc583344d 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -261,6 +261,18 @@ component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_osc_portals4_no_locks); + mca_osc_portals4_component.ptl_max_msg_size = PTL_SIZE_MAX; + (void) mca_base_component_var_register(&mca_osc_portals4_component.super.osc_version, + "max_msg_size", + "Max size supported by portals4 (above that, a message is cut into messages less than that size)", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, + NULL, + 0, + 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_osc_portals4_component.ptl_max_msg_size); + return OMPI_SUCCESS; } @@ -294,6 +306,11 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) /* BWB: FIX ME: Need to make sure our ID matches with the MTL... */ + if (mca_osc_portals4_component.ptl_max_msg_size > actual.max_msg_size) + mca_osc_portals4_component.ptl_max_msg_size = actual.max_msg_size; + OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, + "max_size = %lu", mca_osc_portals4_component.ptl_max_msg_size)); + mca_osc_portals4_component.matching_atomic_max = actual.max_atomic_size; mca_osc_portals4_component.matching_fetch_atomic_max = actual.max_fetch_atomic_size; mca_osc_portals4_component.matching_atomic_ordered_size = From 76b38651da6d67a57d00c9a4b234880d767deebd Mon Sep 17 00:00:00 2001 From: Pascal Deveze Date: Mon, 18 Jul 2016 11:20:50 +0200 Subject: [PATCH 08/10] osc/portals4: For the contiguous datatype, take into account the lower bound before calling portals4 --- ompi/mca/osc/portals4/osc_portals4_comm.c | 177 +++++++++++++++------- 1 file changed, 125 insertions(+), 52 deletions(-) diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index 7e9fdf070e7..0e23804298e 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -286,8 +286,8 @@ ompi_osc_portals4_rput(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; size_t offset; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rput: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", @@ -309,7 +309,12 @@ ompi_osc_portals4_rput(const void *origin_addr, "MPI_Rput: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; @@ -321,13 +326,13 @@ ompi_osc_portals4_rput(const void *origin_addr, "%s,%d Put", __FUNCTION__, __LINE__)); ret = splittedPtlPut(module->req_md_h, - (ptl_size_t) origin_addr, + (ptl_size_t) origin_addr + origin_lb, length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset, + offset + target_lb, request, 0); if (OMPI_SUCCESS != ret) { @@ -356,8 +361,8 @@ ompi_osc_portals4_rget(void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; size_t offset; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rget: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", @@ -379,7 +384,12 @@ ompi_osc_portals4_rget(void *origin_addr, "MPI_Rget: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; @@ -390,12 +400,12 @@ ompi_osc_portals4_rget(void *origin_addr, OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, "%s,%d Get", __FUNCTION__, __LINE__)); ret = splittedPtlGet(module->req_md_h, - (ptl_size_t) origin_addr, + (ptl_size_t) origin_addr + origin_lb, length, peer, module->pt_idx, module->match_bits, - offset, + offset + target_lb, request); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); @@ -424,10 +434,10 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "raccumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s 0x%lx", @@ -452,7 +462,12 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, } else { ptl_size_t md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; @@ -471,13 +486,13 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Put", __FUNCTION__, __LINE__)); ret = splittedPtlPut(module->req_md_h, - md_offset + sent, + md_offset + sent + origin_lb, msg_length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, request, 0); } else { @@ -499,13 +514,13 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, "%s,%d Atomic", __FUNCTION__, __LINE__)); ret = PtlAtomic(module->req_md_h, - offset + sent, + offset + sent + origin_lb, msg_length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, request, 0, ptl_op, @@ -543,10 +558,10 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb, result_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", @@ -577,11 +592,22 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, if (MPI_REPLACE == op) { ptl_size_t result_md_offset, origin_md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } + ret = ompi_datatype_type_lb(target_dt, &target_lb); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(result_dt, &result_lb); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); @@ -603,14 +629,14 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->req_md_h, - result_md_offset + sent, + result_md_offset + sent + result_lb, module->md_h, - origin_md_offset + sent, + origin_md_offset + sent + origin_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, request, 0, NULL, @@ -621,7 +647,12 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, } else if (MPI_NO_OP == op) { ptl_size_t md_offset; - ret = ompi_datatype_type_size(target_dt, &length); + ret = ompi_datatype_get_extent(target_dt, &target_lb, &length); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(result_dt, &result_lb); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; @@ -638,19 +669,29 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Get", __FUNCTION__, __LINE__)); ret = splittedPtlGet(module->req_md_h, - md_offset + sent, + md_offset + sent + result_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, request); sent += msg_length; } while (sent < length); } else { ptl_size_t result_md_offset, origin_md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_type_lb(result_dt, &result_lb); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; @@ -683,14 +724,14 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d FetchAtomic", __FUNCTION__, __LINE__)); ret = PtlFetchAtomic(module->req_md_h, - result_md_offset + sent, + result_md_offset + sent + result_lb, module->md_h, - origin_md_offset + sent, + origin_md_offset + sent + origin_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, request, 0, ptl_op, @@ -722,8 +763,8 @@ ompi_osc_portals4_put(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; size_t offset; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "put: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", @@ -740,7 +781,11 @@ ompi_osc_portals4_put(const void *origin_addr, "MPI_Put: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { return ret; } @@ -749,13 +794,13 @@ ompi_osc_portals4_put(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Put", __FUNCTION__, __LINE__)); ret = splittedPtlPut(module->md_h, - (ptl_size_t) origin_addr, + (ptl_size_t) origin_addr + origin_lb, length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset, + offset + target_lb, NULL, 0); if (OMPI_SUCCESS != ret) { @@ -781,8 +826,8 @@ ompi_osc_portals4_get(void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; size_t offset; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "get: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", @@ -799,7 +844,11 @@ ompi_osc_portals4_get(void *origin_addr, "MPI_Get: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { return ret; } @@ -808,12 +857,12 @@ ompi_osc_portals4_get(void *origin_addr, OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, "%s,%d Get", __FUNCTION__, __LINE__)); ret = splittedPtlGet(module->md_h, - (ptl_size_t) origin_addr, + (ptl_size_t) origin_addr + origin_lb, length, peer, module->pt_idx, module->match_bits, - offset, + offset + target_lb, NULL); if (OMPI_SUCCESS != ret) { return ret; @@ -839,10 +888,10 @@ ompi_osc_portals4_accumulate(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "accumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", @@ -862,7 +911,11 @@ ompi_osc_portals4_accumulate(const void *origin_addr, } else { ptl_size_t md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); if (OMPI_SUCCESS != ret) { return ret; } @@ -879,13 +932,13 @@ ompi_osc_portals4_accumulate(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Put", __FUNCTION__, __LINE__)); ret = splittedPtlPut(module->md_h, - md_offset + sent, + md_offset + sent + origin_lb, msg_length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, NULL, 0); } else { @@ -907,13 +960,13 @@ ompi_osc_portals4_accumulate(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Atomic", __FUNCTION__, __LINE__)); ret = PtlAtomic(module->md_h, - md_offset + sent, + md_offset + sent + origin_lb, msg_length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, NULL, 0, ptl_op, @@ -948,10 +1001,10 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb, result_lb; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", @@ -977,7 +1030,15 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, if (MPI_REPLACE == op) { ptl_size_t result_md_offset, origin_md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(result_dt, &result_lb); if (OMPI_SUCCESS != ret) { return ret; } @@ -1000,14 +1061,14 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->md_h, - result_md_offset + sent, + result_md_offset + sent + result_lb, module->md_h, - origin_md_offset + sent, + origin_md_offset + sent + origin_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, NULL, 0, NULL, @@ -1018,7 +1079,11 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, } else if (MPI_NO_OP == op) { ptl_size_t md_offset; - ret = ompi_datatype_type_size(target_dt, &length); + ret = ompi_datatype_get_extent(target_dt, &target_lb, &length); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(result_dt, &result_lb); if (OMPI_SUCCESS != ret) { return ret; } @@ -1033,19 +1098,27 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d Get", __FUNCTION__, __LINE__)); ret = splittedPtlGet(module->md_h, - md_offset + sent, + md_offset + sent + result_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, NULL); sent += msg_length; } while (sent < length); } else { ptl_size_t result_md_offset, origin_md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(target_dt, &target_lb); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_type_lb(result_dt, &result_lb); if (OMPI_SUCCESS != ret) { return ret; } @@ -1076,14 +1149,14 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, "%s,%d FetchAtomic", __FUNCTION__, __LINE__)); ret = PtlFetchAtomic(module->md_h, - result_md_offset + sent, + result_md_offset + sent + result_lb, module->md_h, - origin_md_offset + sent, + origin_md_offset + sent + origin_lb, msg_length, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, NULL, 0, ptl_op, From 81823d7a63c14ce9e3e84a1f324e1e6b671172d6 Mon Sep 17 00:00:00 2001 From: Pascal Deveze Date: Mon, 18 Jul 2016 11:51:52 +0200 Subject: [PATCH 09/10] osc/portals4: Store the no_locks parameter in osc_portals4_component.no_locks --- ompi/mca/osc/portals4/osc_portals4.h | 1 + ompi/mca/osc/portals4/osc_portals4_component.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index b0dea372656..c80b2b4a58f 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -54,6 +54,7 @@ struct ompi_osc_portals4_component_t { ptl_size_t matching_fetch_atomic_max; ptl_size_t matching_atomic_ordered_size; ptl_size_t ptl_max_msg_size; /* max size given by portals (cf PtlNIInit) */ + bool no_locks; ptl_uid_t uid; opal_mutex_t lock; opal_condition_t cond; diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index 15fc583344d..0bc8c5148e7 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -250,7 +250,7 @@ component_open(void) static int component_register(void) { - bool ompi_osc_portals4_no_locks = false; + mca_osc_portals4_component.no_locks = false; (void) mca_base_component_var_register(&mca_osc_portals4_component.super.osc_version, "no_locks", "Enable optimizations available only if MPI_LOCK is " @@ -259,7 +259,7 @@ component_register(void) MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &ompi_osc_portals4_no_locks); + &mca_osc_portals4_component.no_locks); mca_osc_portals4_component.ptl_max_msg_size = PTL_SIZE_MAX; (void) mca_base_component_var_register(&mca_osc_portals4_component.super.osc_version, From f19a2b961c9da1a8d8be4840b5c48c89f72469a9 Mon Sep 17 00:00:00 2001 From: Pascal Deveze Date: Mon, 18 Jul 2016 13:16:12 +0200 Subject: [PATCH 10/10] osc/portals4: Correct an error in an if statement --- ompi/mca/osc/portals4/osc_portals4_component.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index 0bc8c5148e7..889b20e8255 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -198,7 +198,7 @@ progress_callback(void) "%s:%d: PtlEQGet reported dropped event", __FILE__, __LINE__); goto process; - } else if (PTL_EQ_EMPTY) { + } else if (PTL_EQ_EMPTY == ret) { return 0; } else { opal_output_verbose(1, ompi_osc_base_framework.framework_output,