Skip to content

Commit c464f9f

Browse files
committed
start bug fixes (#1729)
* mpi/start: fix bugs in cm and ob1 start functions There were several problems with the implementation of start in Open MPI: - There are no checks whatsoever on the state of the request(s) provided to MPI_Start/MPI_Start_all. It is erroneous to provide an active request to either of these calls. Since we are already looping over the provided requests there is little overhead in verifying that the request can be started. - Both ob1 and cm were always throwing away the request on the initial call to start and start_all with a particular request. Subsequent calls would see that the request was pml_complete and reuse it. This introduced a leak as the initial request was never freed. Since the only pml request that can be mpi complete but not pml complete is a buffered send the code to reallocate the request has been moved. To detect that a request is indeed mpi complete but not pml complete isend_init in both cm and ob1 now marks the new request as pml complete. - If a new request was needed the callbacks on the original request were not copied over to the new request. This can cause osc/pt2pt to hang as the incoming message callback is never called. Signed-off-by: Nathan Hjelm <[email protected]> * osc/pt2pt: add request for gc after starting a new request Starting a new receive may cause a recursive call into the pt2pt frag receive function. If this happens and the prior request is on the garbage collection list it could cause problems. This commit moves the gc insert until after the new request has been posted. Signed-off-by: Nathan Hjelm <[email protected]> (cherry picked from open-mpi/ompi@e968ddf) Signed-off-by: Nathan Hjelm <[email protected]>
1 parent ab9a53e commit c464f9f

File tree

7 files changed

+107
-143
lines changed

7 files changed

+107
-143
lines changed

ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1660,9 +1660,10 @@ static int ompi_osc_pt2pt_callback (ompi_request_t *request)
16601660

16611661
osc_pt2pt_gc_clean (module);
16621662

1663+
ompi_osc_pt2pt_frag_start_receive (module);
1664+
16631665
/* put this request on the garbage colletion list */
16641666
osc_pt2pt_gc_add_request (module, request);
1665-
ompi_osc_pt2pt_frag_start_receive (module);
16661667

16671668
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
16681669
"finished posting receive request"));
@@ -1672,6 +1673,7 @@ static int ompi_osc_pt2pt_callback (ompi_request_t *request)
16721673

16731674
int ompi_osc_pt2pt_frag_start_receive (ompi_osc_pt2pt_module_t *module)
16741675
{
1676+
module->frag_request = MPI_REQUEST_NULL;
16751677
return ompi_osc_pt2pt_irecv_w_cb (module->incoming_buffer, mca_osc_pt2pt_component.buffer_size + sizeof (ompi_osc_pt2pt_frag_header_t),
16761678
MPI_BYTE, OMPI_ANY_SOURCE, OSC_PT2PT_FRAG_TAG, module->comm, &module->frag_request,
16771679
ompi_osc_pt2pt_callback, module);
@@ -1731,11 +1733,14 @@ int ompi_osc_pt2pt_irecv_w_cb (void *ptr, int count, ompi_datatype_t *datatype,
17311733

17321734
request->req_complete_cb = cb;
17331735
request->req_complete_cb_data = ctx;
1734-
if (request_out) {
1736+
1737+
ret = MCA_PML_CALL(start(1, &request));
1738+
if (request_out && MPI_REQUEST_NULL != request) {
17351739
*request_out = request;
17361740
}
17371741

1738-
ret = MCA_PML_CALL(start(1, &request));
1742+
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
1743+
"osc pt2pt: pml start returned %d. state: %d", ret, request->req_state));
17391744

17401745
return ret;
17411746
}

ompi/mca/osc/pt2pt/osc_pt2pt_module.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* University of Stuttgart. All rights reserved.
99
* Copyright (c) 2004-2005 The Regents of the University of California.
1010
* All rights reserved.
11-
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
11+
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
1212
* reserved.
1313
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
1414
* Copyright (c) 2015 Research Organization for Information Science
@@ -95,11 +95,12 @@ int ompi_osc_pt2pt_free(ompi_win_t *win)
9595

9696
if (NULL != module->epoch_outgoing_frag_count) free(module->epoch_outgoing_frag_count);
9797

98-
if (NULL != module->frag_request) {
98+
if (NULL != module->frag_request && MPI_REQUEST_NULL != module->frag_request) {
9999
module->frag_request->req_complete_cb = NULL;
100100
ompi_request_cancel (module->frag_request);
101101
ompi_request_free (&module->frag_request);
102102
}
103+
103104
if (NULL != module->comm) {
104105
ompi_comm_free(&module->comm);
105106
}

ompi/mca/pml/cm/pml_cm.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ mca_pml_cm_isend_init(const void* buf,
232232
MCA_PML_CM_HVY_SEND_REQUEST_INIT(sendreq, ompi_proc, comm, tag, dst,
233233
datatype, sendmode, true, false, buf, count);
234234

235+
/* Work around a leak in start by marking this request as complete. The
236+
* problem occured because we do not have a way to differentiate an
237+
* inital request and an incomplete pml request in start. This line
238+
* allows us to detect this state. */
239+
sendreq->req_send.req_base.req_pml_complete = true;
240+
235241
*request = (ompi_request_t*) sendreq;
236242

237243
return OMPI_SUCCESS;

ompi/mca/pml/cm/pml_cm_start.c

Lines changed: 39 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
34
* University Research and Technology
@@ -10,6 +11,8 @@
1011
* Copyright (c) 2004-2006 The Regents of the University of California.
1112
* All rights reserved.
1213
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
14+
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
15+
* reserved.
1316
* $COPYRIGHT$
1417
*
1518
* Additional copyrights may follow
@@ -32,78 +35,14 @@ int
3235
mca_pml_cm_start(size_t count, ompi_request_t** requests)
3336
{
3437
int rc;
35-
size_t i;
36-
for (i = 0 ; i < count ; i++) {
37-
mca_pml_cm_request_t *pml_request =
38-
(mca_pml_cm_request_t*)requests[i];
39-
if (OMPI_REQUEST_PML != requests[i]->req_type) {
40-
continue;
41-
}
42-
if (NULL == pml_request) {
38+
39+
for (size_t i = 0 ; i < count ; i++) {
40+
mca_pml_cm_request_t *pml_request = (mca_pml_cm_request_t*)requests[i];
41+
if (OMPI_REQUEST_PML != requests[i]->req_type || NULL == pml_request) {
4342
continue;
4443
}
45-
/* If the persistent request is currebtly active - obtain the
46-
* request lock and verify the status is incomplete. if the
47-
* pml layer has not completed the request - mark the request
48-
* as free called - so that it will be freed when the request
49-
* completes - and create a new request.
50-
*/
51-
switch (pml_request->req_ompi.req_state) {
52-
case OMPI_REQUEST_INACTIVE:
53-
if (pml_request->req_pml_complete == true)
54-
break;
55-
56-
case OMPI_REQUEST_ACTIVE: {
57-
/* otherwise fall through */
58-
ompi_request_t *request;
5944

60-
OPAL_THREAD_LOCK(&ompi_request_lock);
61-
if (pml_request->req_pml_complete == false) {
62-
/* free request after it completes */
63-
pml_request->req_free_called = true;
64-
} else {
65-
/* can reuse the existing request */
66-
OPAL_THREAD_UNLOCK(&ompi_request_lock);
67-
break;
68-
}
69-
70-
/* allocate a new request */
71-
switch (pml_request->req_pml_type) {
72-
case MCA_PML_CM_REQUEST_SEND_HEAVY: {
73-
mca_pml_cm_hvy_send_request_t* sendreq = (mca_pml_cm_hvy_send_request_t*) pml_request;
74-
rc = mca_pml_cm_isend_init( sendreq->req_addr,
75-
sendreq->req_count,
76-
sendreq->req_send.req_base.req_datatype,
77-
sendreq->req_peer,
78-
sendreq->req_tag,
79-
sendreq->req_send.req_send_mode,
80-
sendreq->req_send.req_base.req_comm,
81-
&request );
82-
break;
83-
}
84-
case MCA_PML_CM_REQUEST_RECV_HEAVY: {
85-
mca_pml_cm_hvy_recv_request_t* recvreq = (mca_pml_cm_hvy_recv_request_t*) pml_request;
86-
rc = mca_pml_cm_irecv_init( recvreq->req_addr,
87-
recvreq->req_count,
88-
recvreq->req_base.req_datatype,
89-
recvreq->req_peer,
90-
recvreq->req_tag,
91-
recvreq->req_base.req_comm,
92-
&request );
93-
break;
94-
}
95-
default:
96-
rc = OMPI_ERR_REQUEST;
97-
break;
98-
}
99-
OPAL_THREAD_UNLOCK(&ompi_request_lock);
100-
if(OMPI_SUCCESS != rc)
101-
return rc;
102-
pml_request = (mca_pml_cm_request_t*)request;
103-
requests[i] = request;
104-
break;
105-
}
106-
default:
45+
if (OMPI_REQUEST_ACTIVE == pml_request->req_ompi.req_state) {
10746
return OMPI_ERR_REQUEST;
10847
}
10948

@@ -113,6 +52,37 @@ mca_pml_cm_start(size_t count, ompi_request_t** requests)
11352
{
11453
mca_pml_cm_hvy_send_request_t* sendreq =
11554
(mca_pml_cm_hvy_send_request_t*)pml_request;
55+
if (!sendreq->req_send.req_base.req_pml_complete) {
56+
ompi_request_t *request;
57+
58+
/* buffered sends can be mpi complete and pml incomplete. to support this
59+
* case we need to allocate a new request. */
60+
rc = mca_pml_cm_isend_init (sendreq->req_addr,
61+
sendreq->req_count,
62+
sendreq->req_send.req_base.req_datatype,
63+
sendreq->req_peer,
64+
sendreq->req_tag,
65+
sendreq->req_send.req_send_mode,
66+
sendreq->req_send.req_base.req_comm,
67+
&request);
68+
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
69+
return rc;
70+
}
71+
72+
/* copy the callback and callback data to the new requests */
73+
request->req_complete_cb = pml_request->req_ompi.req_complete_cb;
74+
request->req_complete_cb_data = pml_request->req_ompi.req_complete_cb_data;
75+
76+
/* ensure the old request gets released */
77+
pml_request->req_free_called = true;
78+
79+
sendreq = (mca_pml_cm_hvy_send_request_t *) request;
80+
requests[i] = request;
81+
}
82+
83+
/* reset the completion flag */
84+
sendreq->req_send.req_base.req_pml_complete = false;
85+
11686
MCA_PML_CM_HVY_SEND_REQUEST_START(sendreq, rc);
11787
if(rc != OMPI_SUCCESS)
11888
return rc;

ompi/mca/pml/ob1/pml_ob1_isend.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ int mca_pml_ob1_isend_init(const void *buf,
6363
&(sendreq)->req_send.req_base,
6464
PERUSE_SEND);
6565

66+
/* Work around a leak in start by marking this request as complete. The
67+
* problem occured because we do not have a way to differentiate an
68+
* inital request and an incomplete pml request in start. This line
69+
* allows us to detect this state. */
70+
sendreq->req_send.req_base.req_pml_complete = true;
71+
6672
*request = (ompi_request_t *) sendreq;
6773
return OMPI_SUCCESS;
6874
}

ompi/mca/pml/ob1/pml_ob1_start.c

Lines changed: 44 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
34
* University Research and Technology
@@ -11,6 +12,8 @@
1112
* All rights reserved.
1213
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
1314
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
15+
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
16+
* reserved.
1417
* $COPYRIGHT$
1518
*
1619
* Additional copyrights may follow
@@ -29,84 +32,25 @@
2932
int mca_pml_ob1_start(size_t count, ompi_request_t** requests)
3033
{
3134
int rc;
32-
size_t i;
33-
bool reuse_old_request = true;
3435

35-
for(i=0; i<count; i++) {
36+
for (size_t i = 0 ; i < count ; ++i) {
3637
mca_pml_base_request_t *pml_request = (mca_pml_base_request_t*)requests[i];
37-
if(NULL == pml_request) {
38-
continue;
39-
}
40-
if (OMPI_REQUEST_PML != requests[i]->req_type) {
38+
if (NULL == pml_request || OMPI_REQUEST_PML != requests[i]->req_type) {
4139
continue;
4240
}
4341

44-
/* If the persistent request is currently active - obtain the
45-
* request lock and verify the status is incomplete. if the
46-
* pml layer has not completed the request - mark the request
47-
* as free called - so that it will be freed when the request
42+
/* If the persistent request is currently active - verify the status
43+
* is incomplete. if the pml layer has not completed the request - mark
44+
* the request as free called - so that it will be freed when the request
4845
* completes - and create a new request.
4946
*/
5047

5148
#if OPAL_ENABLE_MULTI_THREADS
5249
opal_atomic_rmb();
5350
#endif
54-
reuse_old_request = true;
55-
switch(pml_request->req_ompi.req_state) {
56-
case OMPI_REQUEST_INACTIVE:
57-
if(pml_request->req_pml_complete == true)
58-
break;
59-
/* otherwise fall through */
60-
case OMPI_REQUEST_ACTIVE: {
61-
62-
ompi_request_t *request;
63-
if (pml_request->req_pml_complete == false) {
64-
/* free request after it completes */
65-
pml_request->req_free_called = true;
66-
} else {
67-
/* can reuse the existing request */
68-
break;
69-
}
7051

71-
reuse_old_request = false;
72-
/* allocate a new request */
73-
switch(pml_request->req_type) {
74-
case MCA_PML_REQUEST_SEND: {
75-
mca_pml_base_send_mode_t sendmode =
76-
((mca_pml_base_send_request_t*)pml_request)->req_send_mode;
77-
rc = mca_pml_ob1_isend_init(
78-
pml_request->req_addr,
79-
pml_request->req_count,
80-
pml_request->req_datatype,
81-
pml_request->req_peer,
82-
pml_request->req_tag,
83-
sendmode,
84-
pml_request->req_comm,
85-
&request);
86-
break;
87-
}
88-
case MCA_PML_REQUEST_RECV:
89-
rc = mca_pml_ob1_irecv_init(
90-
pml_request->req_addr,
91-
pml_request->req_count,
92-
pml_request->req_datatype,
93-
pml_request->req_peer,
94-
pml_request->req_tag,
95-
pml_request->req_comm,
96-
&request);
97-
break;
98-
default:
99-
rc = OMPI_ERR_REQUEST;
100-
break;
101-
}
102-
if(OMPI_SUCCESS != rc)
103-
return rc;
104-
pml_request = (mca_pml_base_request_t*)request;
105-
requests[i] = request;
106-
break;
107-
}
108-
default:
109-
return OMPI_ERR_REQUEST;
52+
if (OMPI_REQUEST_ACTIVE == pml_request->req_ompi.req_state) {
53+
return OMPI_ERR_REQUEST;
11054
}
11155

11256
/* start the request */
@@ -119,15 +63,46 @@ int mca_pml_ob1_start(size_t count, ompi_request_t** requests)
11963
pml_request->req_addr, pml_request->req_count,
12064
pml_request->req_datatype);
12165
);
122-
if( reuse_old_request && (sendreq->req_send.req_bytes_packed != 0) ) {
66+
67+
if (!pml_request->req_pml_complete) {
68+
ompi_request_t *request;
69+
70+
/* buffered sends can be mpi complete and pml incomplete. to support this
71+
* case we need to allocate a new request. */
72+
rc = mca_pml_ob1_isend_init (pml_request->req_addr,
73+
pml_request->req_count,
74+
pml_request->req_datatype,
75+
pml_request->req_peer,
76+
pml_request->req_tag,
77+
sendreq->req_send.req_send_mode,
78+
pml_request->req_comm,
79+
&request);
80+
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
81+
return rc;
82+
}
83+
84+
/* copy the callback and callback data to the new requests */
85+
request->req_complete_cb = pml_request->req_ompi.req_complete_cb;
86+
request->req_complete_cb_data = pml_request->req_ompi.req_complete_cb_data;
87+
88+
/* ensure the old request gets released */
89+
pml_request->req_free_called = true;
90+
91+
sendreq = (mca_pml_ob1_send_request_t *) request;
92+
requests[i] = request;
93+
} else if (sendreq->req_send.req_bytes_packed != 0) {
12394
size_t offset = 0;
12495
/**
12596
* Reset the convertor in case we're dealing with the original
12697
* request, which when completed do not reset the convertor.
12798
*/
128-
opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor,
129-
&offset );
99+
opal_convertor_set_position (&sendreq->req_send.req_base.req_convertor,
100+
&offset);
130101
}
102+
103+
/* reset the completion flag */
104+
pml_request->req_pml_complete = false;
105+
131106
MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc);
132107
if(rc != OMPI_SUCCESS)
133108
return rc;

ompi/mpi/c/startall.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ int MPI_Startall(int count, MPI_Request requests[])
8787
}
8888
}
8989
}
90+
9091
ret = MCA_PML_CALL(start(count, requests));
9192

9293
return ret;

0 commit comments

Comments
 (0)