@@ -58,6 +58,9 @@ static int ompi_osc_pt2pt_req_comm_complete (ompi_request_t *request)
5858 "ompi_osc_pt2pt_req_comm_complete called tag = %d" ,
5959 request -> req_status .MPI_TAG ));
6060
61+ /* update the cbdata for ompi_osc_pt2pt_comm_complete */
62+ request -> req_complete_cb_data = pt2pt_request -> module ;
63+
6164 if (0 == OPAL_THREAD_ADD32 (& pt2pt_request -> outstanding_requests , -1 )) {
6265 ompi_osc_pt2pt_request_complete (pt2pt_request , request -> req_status .MPI_ERROR );
6366 }
@@ -218,8 +221,8 @@ static inline int ompi_osc_pt2pt_gacc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, c
218221 ((unsigned long ) target_disp * module -> disp_unit );
219222 int ret ;
220223
221- /* if we are in active target mode wait until all post messages arrive */
222- ompi_osc_pt2pt_sync_wait ( pt2pt_sync );
224+ OPAL_OUTPUT_VERBOSE (( MCA_BASE_VERBOSE_TRACE , ompi_osc_base_framework . framework_output , "ompi_osc_pt2pt_gacc_self: starting local "
225+ "get accumulate" ) );
223226
224227 ompi_osc_pt2pt_accumulate_lock (module );
225228
@@ -250,6 +253,9 @@ static inline int ompi_osc_pt2pt_gacc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, c
250253
251254 ompi_osc_pt2pt_accumulate_unlock (module );
252255
256+ OPAL_OUTPUT_VERBOSE ((MCA_BASE_VERBOSE_TRACE , ompi_osc_base_framework .framework_output , "ompi_osc_pt2pt_gacc_self: local get "
257+ "accumulate complete" ));
258+
253259 if (request ) {
254260 /* NTH: is it ok to use an ompi error code here? */
255261 ompi_osc_pt2pt_request_complete (request , ret );
@@ -310,14 +316,14 @@ static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_
310316 payload_len = origin_dt -> super .size * origin_count ;
311317 frag_len = sizeof (ompi_osc_pt2pt_header_put_t ) + ddt_len + payload_len ;
312318
313- ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , false);
319+ ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , false, true );
314320 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
315321 frag_len = sizeof (ompi_osc_pt2pt_header_put_t ) + ddt_len ;
316- ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , true);
322+ ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , true, false );
317323 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
318324 /* allocate space for the header plus space to store ddt_len */
319325 frag_len = sizeof (ompi_osc_pt2pt_header_put_t ) + 8 ;
320- ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , true);
326+ ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , true, false );
321327 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
322328 return OMPI_ERR_OUT_OF_RESOURCE ;
323329 }
@@ -469,14 +475,14 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count,
469475 payload_len = origin_dt -> super .size * origin_count ;
470476
471477 frag_len = sizeof (* header ) + ddt_len + payload_len ;
472- ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , false);
478+ ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , false, true );
473479 if (OMPI_SUCCESS != ret ) {
474480 frag_len = sizeof (* header ) + ddt_len ;
475- ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , true);
481+ ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , true, ! request );
476482 if (OMPI_SUCCESS != ret ) {
477483 /* allocate space for the header plus space to store ddt_len */
478484 frag_len = sizeof (* header ) + 8 ;
479- ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , true);
485+ ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , true, ! request );
480486 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
481487 return OMPI_ERR_OUT_OF_RESOURCE ;
482488 }
@@ -488,7 +494,7 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count,
488494 tag = get_rtag (module );
489495 }
490496
491- if (is_long_msg || is_long_datatype ) {
497+ if (is_long_msg ) {
492498 /* wait for synchronization before posting a long message */
493499 ompi_osc_pt2pt_sync_wait (pt2pt_sync );
494500 }
@@ -631,7 +637,7 @@ int ompi_osc_pt2pt_compare_and_swap (const void *origin_addr, const void *compar
631637 }
632638
633639 frag_len = sizeof (ompi_osc_pt2pt_header_cswap_t ) + ddt_len + payload_len ;
634- ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , false);
640+ ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , false, false );
635641 if (OMPI_SUCCESS != ret ) {
636642 return OMPI_ERR_OUT_OF_RESOURCE ;
637643 }
@@ -663,9 +669,7 @@ int ompi_osc_pt2pt_compare_and_swap (const void *origin_addr, const void *compar
663669 return ret ;
664670 }
665671
666- ret = ompi_osc_pt2pt_frag_finish (module , frag );
667-
668- return ret ;
672+ return ompi_osc_pt2pt_frag_finish (module , frag );
669673}
670674
671675
@@ -779,11 +783,11 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co
779783 ddt_len = ompi_datatype_pack_description_length (target_dt );
780784
781785 frag_len = sizeof (ompi_osc_pt2pt_header_get_t ) + ddt_len ;
782- ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , false);
786+ ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , false, release_req );
783787 if (OMPI_SUCCESS != ret ) {
784788 /* allocate space for the header plus space to store ddt_len */
785789 frag_len = sizeof (ompi_osc_pt2pt_header_put_t ) + 8 ;
786- ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , false);
790+ ret = ompi_osc_pt2pt_frag_alloc (module , target , frag_len , & frag , & ptr , false, release_req );
787791 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
788792 return OMPI_ERR_OUT_OF_RESOURCE ;
789793 }
@@ -961,6 +965,11 @@ int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin
961965 return OMPI_SUCCESS ;
962966 }
963967
968+ if (!release_req ) {
969+ /* wait for epoch to begin before starting operation */
970+ ompi_osc_pt2pt_sync_wait (pt2pt_sync );
971+ }
972+
964973 /* optimize the self case. TODO: optimize the local case */
965974 if (ompi_comm_rank (module -> comm ) == target_rank ) {
966975 * request = & pt2pt_request -> super ;
@@ -987,14 +996,14 @@ int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin
987996 }
988997
989998 frag_len = sizeof (* header ) + ddt_len + payload_len ;
990- ret = ompi_osc_pt2pt_frag_alloc (module , target_rank , frag_len , & frag , & ptr , false);
999+ ret = ompi_osc_pt2pt_frag_alloc (module , target_rank , frag_len , & frag , & ptr , false, release_req );
9911000 if (OMPI_SUCCESS != ret ) {
9921001 frag_len = sizeof (* header ) + ddt_len ;
993- ret = ompi_osc_pt2pt_frag_alloc (module , target_rank , frag_len , & frag , & ptr , true);
1002+ ret = ompi_osc_pt2pt_frag_alloc (module , target_rank , frag_len , & frag , & ptr , true, release_req );
9941003 if (OMPI_SUCCESS != ret ) {
9951004 /* allocate space for the header plus space to store ddt_len */
9961005 frag_len = sizeof (* header ) + 8 ;
997- ret = ompi_osc_pt2pt_frag_alloc (module , target_rank , frag_len , & frag , & ptr , true);
1006+ ret = ompi_osc_pt2pt_frag_alloc (module , target_rank , frag_len , & frag , & ptr , true, release_req );
9981007 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
9991008 return OMPI_ERR_OUT_OF_RESOURCE ;
10001009 }
@@ -1014,11 +1023,6 @@ int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin
10141023 /* increment the number of outgoing fragments */
10151024 ompi_osc_signal_outgoing (module , target_rank , pt2pt_request -> outstanding_requests );
10161025
1017- if (!release_req ) {
1018- /* wait for epoch to begin before starting operation */
1019- ompi_osc_pt2pt_sync_wait (pt2pt_sync );
1020- }
1021-
10221026 header = (ompi_osc_pt2pt_header_acc_t * ) ptr ;
10231027 header -> base .flags = 0 ;
10241028 header -> len = frag_len ;
0 commit comments