Skip to content

Commit 84e178c

Browse files
authored
Merge pull request #1821 from artpol84/fix_waitsome_v2
MPI_Waitsome performance improvement (version #2)
2 parents b2a2be0 + 732d890 commit 84e178c

File tree

1 file changed

+18
-10
lines changed

1 file changed

+18
-10
lines changed

ompi/request/req_wait.c

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,8 @@ int ompi_request_default_wait_some(size_t count,
407407
num_requests_null_inactive++;
408408
continue;
409409
}
410-
411-
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) {
410+
indices[i] = OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync);
411+
if( !indices[i] ) {
412412
/* If the request is completed go ahead and mark it as such */
413413
assert( REQUEST_COMPLETE(request) );
414414
num_requests_done++;
@@ -439,15 +439,23 @@ int ompi_request_default_wait_some(size_t count,
439439
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
440440
continue;
441441
}
442-
/* Atomically mark the request as pending. If this succeed
443-
* then the request was not completed, and it is now marked as
444-
* pending. Otherwise, the request is complete )either it was
445-
* before or it has been meanwhile). The major drawback here
446-
* is that we will do all the atomics operations in all cases.
442+
/* Here we have 3 possibilities:
443+
* a) request was found completed in the first loop
444+
* => ( indices[i] == 0 )
445+
* b) request was completed between first loop and this check
446+
* => ( indices[i] == 1 ) and we can NOT atomically mark the
447+
* request as pending.
448+
* c) request wasn't finished yet
449+
* => ( indices[i] == 1 ) and we CAN atomically mark the
450+
* request as pending.
451+
* NOTE that in any case (i >= num_requests_done) as latter grows
452+
* either slowly (in case of partial completion)
453+
* OR in parallel with `i` (in case of full set completion)
447454
*/
448-
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
449-
indices[num_requests_done] = i;
450-
num_requests_done++;
455+
if( !indices[i] ){
456+
indices[num_requests_done++] = i;
457+
} else if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
458+
indices[num_requests_done++] = i;
451459
}
452460
}
453461
sync_unsets = count - num_requests_null_inactive - num_requests_done;

0 commit comments

Comments
 (0)