Skip to content

Commit d5765ab

Browse files
committed
Fix a race condition between registering a callback and completing a request
Using atomic swap operations we make sure that a thread completing a request will atomically mark it for the thread registering a callback. Similarly, the thread registering a callback will register it atomically and check for whether the request has completed. Signed-off-by: Joseph Schuchart <[email protected]>
1 parent 75a42af commit d5765ab

File tree

1 file changed

+20
-12
lines changed

1 file changed

+20
-12
lines changed

ompi/request/request.h

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ typedef struct ompi_request_t ompi_request_t;
139139
#define REQUEST_PENDING (void *)0L
140140
#define REQUEST_COMPLETED (void *)1L
141141

142+
#define REQUEST_CB_PENDING (void *)0L
143+
#define REQUEST_CB_COMPLETED (void *)1L
144+
142145
struct ompi_predefined_request_t {
143146
struct ompi_request_t request;
144147
char padding[PREDEFINED_REQUEST_PAD - sizeof(ompi_request_t)];
@@ -529,12 +532,12 @@ static inline void ompi_request_wait_completion(ompi_request_t *req)
529532
static inline int ompi_request_complete(ompi_request_t* request, bool with_signal)
530533
{
531534
int rc = 0;
532-
533-
if(NULL != request->req_complete_cb) {
534-
/* Set the request cb to NULL to allow resetting in the callback */
535-
ompi_request_complete_fn_t fct = request->req_complete_cb;
536-
request->req_complete_cb = NULL;
537-
rc = fct( request );
535+
ompi_request_complete_fn_t cb;
536+
cb = (ompi_request_complete_fn_t)OPAL_ATOMIC_SWAP_PTR((opal_atomic_intptr_t*)&request->req_complete_cb,
537+
(intptr_t)REQUEST_CB_COMPLETED);
538+
if (REQUEST_CB_PENDING != cb) {
539+
request->req_complete_cb = REQUEST_CB_PENDING;
540+
rc = cb(request);
538541
}
539542

540543
if (0 == rc) {
@@ -558,12 +561,17 @@ static inline int ompi_request_set_callback(ompi_request_t* request,
558561
void* cb_data)
559562
{
560563
request->req_complete_cb_data = cb_data;
561-
request->req_complete_cb = cb;
562-
/* If request is completed and the callback is not called, need to call callback */
563-
if ((NULL != request->req_complete_cb) && (request->req_complete == REQUEST_COMPLETED)) {
564-
ompi_request_complete_fn_t fct = request->req_complete_cb;
565-
request->req_complete_cb = NULL;
566-
return fct( request );
564+
opal_atomic_wmb();
565+
if ((REQUEST_CB_COMPLETED == request->req_complete_cb) ||
566+
(REQUEST_CB_COMPLETED == (void*)OPAL_ATOMIC_SWAP_PTR((opal_atomic_intptr_t*)&request->req_complete_cb,
567+
(intptr_t)cb))) {
568+
if (NULL != cb) {
569+
/* the request was marked at least partially completed, make sure it's fully complete */
570+
while (!REQUEST_COMPLETE(request)) {}
571+
/* Set the request cb to NULL to allow resetting in the callback */
572+
request->req_complete_cb = REQUEST_CB_PENDING;
573+
cb(request);
574+
}
567575
}
568576
return OMPI_SUCCESS;
569577
}

0 commit comments

Comments
 (0)