Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit 9ea5bf7

Browse files
authored
Merge pull request #1240 from hjelmn/v2.x_request_performance
ompi/request: fix performance regression
2 parents 34bcc3a + fe5643d commit 9ea5bf7

File tree

5 files changed

+61
-61
lines changed

5 files changed

+61
-61
lines changed

ompi/request/request.h

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -375,27 +375,25 @@ static inline int ompi_request_free(ompi_request_t** request)
375375
* Wait a particular request for completion
376376
*/
377377

378-
#if OPAL_ENABLE_MULTI_THREADS
379378
static inline void ompi_request_wait_completion(ompi_request_t *req)
380379
{
381-
ompi_wait_sync_t sync;
382-
WAIT_SYNC_INIT(&sync, 1);
380+
if (opal_using_threads ()) {
381+
ompi_wait_sync_t sync;
382+
WAIT_SYNC_INIT(&sync, 1);
383383

384-
if(OPAL_ATOMIC_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) {
385-
SYNC_WAIT(&sync);
386-
}
384+
if(OPAL_ATOMIC_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) {
385+
SYNC_WAIT(&sync);
386+
}
387387

388-
assert(REQUEST_COMPLETE(req));
389-
WAIT_SYNC_RELEASE(&sync);
390-
}
391-
#else
392-
static inline void ompi_request_wait_completion(ompi_request_t *req)
393-
{
394-
while(!REQUEST_COMPLETE(req)) {
395-
opal_progress();
388+
assert(REQUEST_COMPLETE(req));
389+
WAIT_SYNC_RELEASE(&sync);
390+
} else {
391+
while(!REQUEST_COMPLETE(req)) {
392+
opal_progress();
393+
}
396394
}
397395
}
398-
#endif
396+
399397
/**
400398
* Signal or mark a request as complete. If with_signal is true this will
401399
* wake any thread pending on the request. If with_signal is false, the
@@ -418,8 +416,8 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa
418416

419417
if( OPAL_LIKELY(with_signal) ) {
420418
if(!OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, REQUEST_COMPLETED)) {
421-
ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWP_PTR(&request->req_complete,
422-
REQUEST_COMPLETED);
419+
ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete,
420+
REQUEST_COMPLETED);
423421
/* In the case where another thread concurrently changed the request to REQUEST_PENDING */
424422
if( REQUEST_PENDING != tmp_sync )
425423
wait_sync_update(tmp_sync, 1, request->req_status.MPI_ERROR);

opal/runtime/opal_progress.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ opal_progress(void)
225225
events += (callbacks[i])();
226226
}
227227

228-
if ((OPAL_THREAD_ADD32((volatile int32_t *) &num_calls, 1) & callbacks_lp_mask) == 0) {
228+
if (callbacks_lp_len > 0 && (OPAL_THREAD_ADD32((volatile int32_t *) &num_calls, 1) & callbacks_lp_mask) == 0) {
229229
/* run low priority callbacks once every 8 calls to opal_progress() */
230230
for (i = 0 ; i < callbacks_lp_len ; ++i) {
231231
events += (callbacks_lp[i])();

opal/threads/mutex.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,20 @@ OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta)
331331
#endif
332332

333333

334+
static inline void *opal_thread_swap_ptr (volatile void *ptr, void *newvalue)
335+
{
336+
if (opal_using_threads ()) {
337+
return opal_atomic_swap_ptr (ptr, newvalue);
338+
}
339+
340+
void *old = ((void **) ptr)[0];
341+
((void **) ptr)[0] = newvalue;
342+
343+
return old;
344+
}
345+
346+
#define OPAL_ATOMIC_SWAP_PTR(x, y) opal_thread_swap_ptr (x, y)
347+
334348
END_C_DECLS
335349

336350
#endif /* OPAL_MUTEX_H */

opal/threads/wait_sync.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* Copyright (c) 2014-2016 The University of Tennessee and The University
44
* of Tennessee Research Foundation. All rights
55
* reserved.
6+
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
7+
* reserved.
68
* $COPYRIGHT$
79
*
810
* Additional copyrights may follow
@@ -21,15 +23,6 @@ static ompi_wait_sync_t* wait_sync_list = NULL;
2123
pthread_mutex_unlock( &(who)->lock); \
2224
} while(0)
2325

24-
25-
int sync_wait_st(ompi_wait_sync_t *sync)
26-
{
27-
while(sync->count > 0) {
28-
opal_progress();
29-
}
30-
return (0 == sync->status) ? OPAL_SUCCESS : OPAL_ERROR;
31-
}
32-
3326
int sync_wait_mt(ompi_wait_sync_t *sync)
3427
{
3528
if(sync->count <= 0)

opal/threads/wait_sync.h

Lines changed: 29 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* Copyright (c) 2014-2016 The University of Tennessee and The University
44
* of Tennessee Research Foundation. All rights
55
* reserved.
6+
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
7+
* reserved.
68
* $COPYRIGHT$
79
*
810
* Additional copyrights may follow
@@ -29,50 +31,42 @@ typedef struct ompi_wait_sync_t {
2931
#define REQUEST_PENDING (void*)0L
3032
#define REQUEST_COMPLETED (void*)1L
3133

32-
#if OPAL_ENABLE_MULTI_THREADS
33-
34-
#define OPAL_ATOMIC_ADD_32(a,b) opal_atomic_add_32(a,b)
35-
#define OPAL_ATOMIC_SWP_PTR(a,b) opal_atomic_swap_ptr(a,b)
36-
#define SYNC_WAIT(sync) sync_wait_mt(sync)
37-
#define PTHREAD_COND_INIT(a,b) pthread_cond_init(a,b)
38-
#define PTHREAD_MUTEX_INIT(a,b) pthread_mutex_init(a,b)
34+
#define SYNC_WAIT(sync) (opal_using_threads() ? sync_wait_mt (sync) : sync_wait_st (sync))
3935

4036
#define WAIT_SYNC_RELEASE(sync) \
41-
do { \
37+
if (opal_using_threads()) { \
4238
pthread_cond_destroy(&(sync)->condition); \
4339
pthread_mutex_destroy(&(sync)->lock); \
44-
} while(0)
40+
}
4541

4642
#define WAIT_SYNC_SIGNAL(sync) \
47-
do { \
43+
if (opal_using_threads()) { \
4844
pthread_mutex_lock(&(sync->lock)); \
4945
pthread_cond_signal(&sync->condition); \
5046
pthread_mutex_unlock(&(sync->lock)); \
51-
} while(0)
52-
53-
#else
47+
}
5448

55-
#define OPAL_ATOMIC_ADD_32(a,b) (*(a) += (b))
56-
#define OPAL_ATOMIC_SWP_PTR(a,b) *(a) = (b)
57-
#define PTHREAD_COND_INIT(a,b)
58-
#define PTHREAD_MUTEX_INIT(a,b)
59-
#define SYNC_WAIT(sync) sync_wait_st(sync)
60-
#define WAIT_SYNC_RELEASE(sync)
61-
#define WAIT_SYNC_SIGNAL(sync)
49+
OPAL_DECLSPEC int sync_wait_mt(ompi_wait_sync_t *sync);
50+
static inline int sync_wait_st (ompi_wait_sync_t *sync)
51+
{
52+
while (sync->count > 0) {
53+
opal_progress();
54+
}
6255

63-
#endif /* OPAL_ENABLE_MULTI_THREADS */
56+
return sync->status;
57+
}
6458

65-
OPAL_DECLSPEC int sync_wait_mt(ompi_wait_sync_t *sync);
66-
OPAL_DECLSPEC int sync_wait_st(ompi_wait_sync_t *sync);
6759

68-
#define WAIT_SYNC_INIT(sync,c) \
69-
do { \
70-
(sync)->count = c; \
71-
(sync)->next = NULL; \
72-
(sync)->prev = NULL; \
73-
(sync)->status = 0; \
74-
PTHREAD_COND_INIT(&(sync)->condition, NULL); \
75-
PTHREAD_MUTEX_INIT(&(sync)->lock, NULL); \
60+
#define WAIT_SYNC_INIT(sync,c) \
61+
do { \
62+
(sync)->count = c; \
63+
(sync)->next = NULL; \
64+
(sync)->prev = NULL; \
65+
(sync)->status = 0; \
66+
if (opal_using_threads()) { \
67+
pthread_cond_init (&(sync)->condition, NULL); \
68+
pthread_mutex_init (&(sync)->lock, NULL); \
69+
} \
7670
} while(0)
7771

7872
/**
@@ -84,12 +78,13 @@ OPAL_DECLSPEC int sync_wait_st(ompi_wait_sync_t *sync);
8478
static inline void wait_sync_update(ompi_wait_sync_t *sync, int updates, int status)
8579
{
8680
if( OPAL_LIKELY(OPAL_SUCCESS == status) ) {
87-
if( 0 != (OPAL_ATOMIC_ADD_32(&sync->count, -updates)) ) {
81+
if( 0 != (OPAL_THREAD_ADD32(&sync->count, -updates)) ) {
8882
return;
8983
}
9084
} else {
91-
OPAL_ATOMIC_CMPSET_32(&(sync->count), 0, 0);
92-
sync->status = -1;
85+
/* this is an error path so just use the atomic */
86+
opal_atomic_swap_32 (&sync->count, 0);
87+
sync->status = OPAL_ERROR;
9388
}
9489
WAIT_SYNC_SIGNAL(sync);
9590
}

0 commit comments

Comments
 (0)