Skip to content

Commit 5929fa9

Browse files
committed
Improve MPI_Waitsome performance
by avoiding extra atomic exchanges. The fix is based on MPI spec: 12.4.2 Multiple threads completing the same request. A program in which two threads block, waiting on the same request, is erroneous. Similarly, the same request cannot appear in the array of requests of two concurrent MPI_{WAIT|TEST}{ANY|SOME|ALL} calls. In MPI, a request can only be completed once. Any combination of wait or test that violates this rule is erroneous." We add marked flag to the request structure. Only MPI_Waitsome thread will use/access it by any means. PML threads will not see/touch it. So given that any particular request can be used no more than in one MPI_Waitsome we are safe to do this change.
1 parent fb44c3b commit 5929fa9

File tree

2 files changed

+27
-16
lines changed

2 files changed

+27
-16
lines changed

ompi/request/req_wait.c

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -375,8 +375,8 @@ int ompi_request_default_wait_some(size_t count,
375375
ompi_request_t **rptr = NULL;
376376
ompi_request_t *request = NULL;
377377
ompi_wait_sync_t sync;
378-
size_t sync_sets = 0, sync_unsets = 0;
379-
378+
bool will_be_signalled = false;
379+
380380
WAIT_SYNC_INIT(&sync, 1);
381381

382382
*outcount = 0;
@@ -398,9 +398,12 @@ int ompi_request_default_wait_some(size_t count,
398398
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) {
399399
/* If the request is completed go ahead and mark it as such */
400400
assert( REQUEST_COMPLETE(request) );
401+
/* TODO: make sure MPI spec is not strict about index order */
402+
indices[num_requests_done] = i;
401403
num_requests_done++;
404+
REQUEST_UNMARK(request);
402405
} else {
403-
sync_sets++;
406+
REQUEST_MARK(request);
404407
}
405408
}
406409

@@ -420,29 +423,29 @@ int ompi_request_default_wait_some(size_t count,
420423
/* Clean up the synchronization primitives */
421424

422425
rptr = requests;
423-
num_requests_done = 0;
424426
for (size_t i = 0; i < count; i++, rptr++) {
425427
request = *rptr;
426428

427-
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
429+
/* Skip inactive and already accounted requests */
430+
if( request->req_state == OMPI_REQUEST_INACTIVE || !REQUEST_MARKED(request) ) {
428431
continue;
429432
}
430-
/* Atomically mark the request as pending. If this succeed
431-
* then the request was not completed, and it is now marked as
432-
* pending. Otherwise, the request is complete )either it was
433-
* before or it has been meanwhile). The major drawback here
434-
* is that we will do all the atomics operations in all cases.
433+
/* Atomically mark the request as pending.
434+
* If this succeed - then the request was not completed,
435+
* and it is now marked as pending.
436+
* Otherwise, the request is complete meanwhile.
435437
*/
436-
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
438+
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
437439
indices[num_requests_done] = i;
438440
num_requests_done++;
439-
} else {
440-
/* request wasn't finished during this call */
441-
sync_unsets++;
442-
}
441+
/* at least one of requests was completed during this call
442+
* corresponding thread will signal us
443+
*/
444+
will_be_signalled = true;
445+
}
443446
}
444447

445-
if( sync_sets == sync_unsets ){
448+
if( !will_be_signalled ){
446449
/* nobody knows about us,
447450
* set signa-in-progress flag to false
448451
*/

ompi/request/request.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
1616
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
1717
* reserved.
18+
* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -112,6 +113,7 @@ struct ompi_request_t {
112113
ompi_request_complete_fn_t req_complete_cb; /**< Called when the request is MPI completed */
113114
void *req_complete_cb_data;
114115
ompi_mpi_object_t req_mpi_object; /**< Pointer to MPI object that created this request */
116+
bool marked;
115117
};
116118

117119
/**
@@ -151,6 +153,12 @@ typedef struct ompi_predefined_request_t ompi_predefined_request_t;
151153

152154

153155
#define REQUEST_COMPLETE(req) (REQUEST_COMPLETED == (req)->req_complete)
156+
157+
#define REQUEST_MARK(req) ( (req)->marked = true )
158+
#define REQUEST_UNMARK(req) ( (req)->marked = false )
159+
#define REQUEST_MARKED(req) ( (req)->marked )
160+
161+
154162
/**
155163
* Finalize a request. This is a macro to avoid function call
156164
* overhead, since this is typically invoked in the critical

0 commit comments

Comments
 (0)