Skip to content

Commit 955c73b

Browse files
hjelmnhppritcha
authored andcommitted
pml/ob1: fix bugs in static request objects
This commit fixes several bugs in the static request objects used by ob1 for blocking send/receive operations. - Fix memory leak when using MPI_THREAD_MULTIPLE. Requests were allocated off the free list but were destructed and NOT returned. - Fix double-destruct of static objects. There is no reason to CONSTRUCT/DESTUCT the static object for each send/receive operation. This adds overhead and no benefit. To keep the code clean helper functions have been added to finalize ob1 send/receive requests. - Remove now unnecessary include of alloca.h. Signed-off-by: Nathan Hjelm <[email protected]> (cherry picked from commit open-mpi/ompi@9a8a876)
1 parent 2185e05 commit 955c73b

File tree

5 files changed

+46
-33
lines changed

5 files changed

+46
-33
lines changed

ompi/mca/pml/ob1/pml_ob1_component.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,18 @@ int mca_pml_ob1_component_fini(void)
292292
return OMPI_SUCCESS; /* never selected.. return success.. */
293293
mca_pml_ob1.enabled = false; /* not anymore */
294294

295+
/* return the static receive/send requests to the respective free list and
296+
* let the free list handle destruction. */
297+
if( NULL != mca_pml_ob1_recvreq ) {
298+
opal_free_list_return (&mca_pml_base_recv_requests, (opal_free_list_item_t *) mca_pml_ob1_recvreq);
299+
mca_pml_ob1_recvreq = NULL;
300+
}
301+
302+
if( NULL != mca_pml_ob1_sendreq ) {
303+
opal_free_list_return (&mca_pml_base_send_requests, (opal_free_list_item_t *) mca_pml_ob1_sendreq);
304+
mca_pml_ob1_sendreq = NULL;
305+
}
306+
295307
OBJ_DESTRUCT(&mca_pml_ob1.rdma_pending);
296308
OBJ_DESTRUCT(&mca_pml_ob1.pckt_pending);
297309
OBJ_DESTRUCT(&mca_pml_ob1.recv_pending);
@@ -304,15 +316,6 @@ int mca_pml_ob1_component_fini(void)
304316
OBJ_DESTRUCT(&mca_pml_ob1.lock);
305317
OBJ_DESTRUCT(&mca_pml_ob1.send_ranges);
306318

307-
if( NULL != mca_pml_ob1_recvreq ) {
308-
OBJ_DESTRUCT(mca_pml_ob1_recvreq);
309-
mca_pml_ob1_recvreq = NULL;
310-
}
311-
if( NULL != mca_pml_ob1_sendreq ) {
312-
OBJ_DESTRUCT(mca_pml_ob1_sendreq);
313-
mca_pml_ob1_sendreq = NULL;
314-
}
315-
316319
if( NULL != mca_pml_ob1.allocator ) {
317320
(void)mca_pml_ob1.allocator->alc_finalize(mca_pml_ob1.allocator);
318321
mca_pml_ob1.allocator = NULL;

ompi/mca/pml/ob1/pml_ob1_irecv.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
13-
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
13+
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
1414
* reserved.
1515
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
1616
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
@@ -28,9 +28,6 @@
2828
#include "pml_ob1_recvfrag.h"
2929
#include "ompi/peruse/peruse-internal.h"
3030
#include "ompi/message/message.h"
31-
#if HAVE_ALLOCA_H
32-
#include <alloca.h>
33-
#endif /* HAVE_ALLOCA_H */
3431

3532
mca_pml_ob1_recv_request_t *mca_pml_ob1_recvreq = NULL;
3633

@@ -109,7 +106,6 @@ int mca_pml_ob1_recv(void *addr,
109106
mca_pml_ob1_recvreq = recvreq;
110107
#endif /* !OMPI_ENABLE_THREAD_MULTIPLE */
111108
}
112-
OBJ_CONSTRUCT(recvreq, mca_pml_ob1_recv_request_t);
113109

114110
MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, addr, count, datatype,
115111
src, tag, comm, false);
@@ -126,8 +122,12 @@ int mca_pml_ob1_recv(void *addr,
126122
}
127123

128124
rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
129-
MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv);
130-
OBJ_DESTRUCT(recvreq);
125+
126+
#if OMPI_ENABLE_THREAD_MULTIPLE
127+
MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq);
128+
#else
129+
mca_pml_ob1_recv_request_fini (recvreq);
130+
#endif
131131

132132
return rc;
133133
}

ompi/mca/pml/ob1/pml_ob1_isend.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,6 @@
2828
#include "pml_ob1_sendreq.h"
2929
#include "pml_ob1_recvreq.h"
3030
#include "ompi/peruse/peruse-internal.h"
31-
#if HAVE_ALLOCA_H
32-
#include <alloca.h>
33-
#endif /* HAVE_ALLOCA_H */
3431

3532
mca_pml_ob1_send_request_t *mca_pml_ob1_sendreq = NULL;
3633

@@ -232,7 +229,6 @@ int mca_pml_ob1_send(const void *buf,
232229
mca_pml_ob1_sendreq = sendreq;
233230
#endif /* !OMPI_ENABLE_THREAD_MULTIPLE */
234231
}
235-
OBJ_CONSTRUCT(sendreq, mca_pml_ob1_send_request_t);
236232
sendreq->req_send.req_base.req_proc = dst_proc;
237233
sendreq->rdma_frag = NULL;
238234

@@ -252,9 +248,13 @@ int mca_pml_ob1_send(const void *buf,
252248
ompi_request_wait_completion(&sendreq->req_send.req_base.req_ompi);
253249

254250
rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR;
255-
MCA_PML_BASE_SEND_REQUEST_FINI(&sendreq->req_send);
256251
}
257-
OBJ_DESTRUCT(sendreq);
252+
253+
#if OMPI_ENABLE_THREAD_MULTIPLE
254+
MCA_PML_OB1_SEND_REQUEST_RETURN(sendreq);
255+
#else
256+
mca_pml_ob1_send_request_fini (sendreq);
257+
#endif
258258

259259
return rc;
260260
}

ompi/mca/pml/ob1/pml_ob1_recvreq.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,16 +128,21 @@ do { \
128128
ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true ); \
129129
} while (0)
130130

131+
static inline void mca_pml_ob1_recv_request_fini (mca_pml_ob1_recv_request_t *recvreq)
132+
{
133+
MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv);
134+
if ((recvreq)->local_handle) {
135+
mca_bml_base_deregister_mem (recvreq->rdma_bml, recvreq->local_handle);
136+
recvreq->local_handle = NULL;
137+
}
138+
}
139+
131140
/*
132141
* Free the PML receive request
133142
*/
134143
#define MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq) \
135144
{ \
136-
MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \
137-
if ((recvreq)->local_handle) { \
138-
mca_bml_base_deregister_mem ((recvreq)->rdma_bml, (recvreq)->local_handle); \
139-
(recvreq)->local_handle = NULL; \
140-
} \
145+
mca_pml_ob1_recv_request_fini (recvreq); \
141146
opal_free_list_return (&mca_pml_base_recv_requests, \
142147
(opal_free_list_item_t*)(recvreq)); \
143148
}

ompi/mca/pml/ob1/pml_ob1_sendreq.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -215,18 +215,23 @@ do {
215215
&(sendreq->req_send.req_base), PERUSE_SEND); \
216216
} while(0)
217217

218+
static inline void mca_pml_ob1_send_request_fini (mca_pml_ob1_send_request_t *sendreq)
219+
{
220+
/* Let the base handle the reference counts */
221+
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send));
222+
if (sendreq->rdma_frag) {
223+
MCA_PML_OB1_RDMA_FRAG_RETURN (sendreq->rdma_frag);
224+
sendreq->rdma_frag = NULL;
225+
}
226+
}
227+
218228
/*
219229
* Release resources associated with a request
220230
*/
221231

222232
#define MCA_PML_OB1_SEND_REQUEST_RETURN(sendreq) \
223233
do { \
224-
/* Let the base handle the reference counts */ \
225-
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \
226-
if (sendreq->rdma_frag) { \
227-
MCA_PML_OB1_RDMA_FRAG_RETURN (sendreq->rdma_frag); \
228-
sendreq->rdma_frag = NULL; \
229-
} \
234+
mca_pml_ob1_send_request_fini (sendreq); \
230235
opal_free_list_return ( &mca_pml_base_send_requests, \
231236
(opal_free_list_item_t*)sendreq); \
232237
} while(0)

0 commit comments

Comments
 (0)