Skip to content

Commit a8c3699

Browse files
committed
Fix performance regression caused by enabling opal thread support
This commit adds opal_using_threads() protection around the atomic operation in OBJ_RETAIN/OBJ_RELEASE. This resolves the performance issues seen when running psm with MPI_THREAD_SINGLE. To avoid issues with header dependencies opal_using_threads() has been moved to a new header (thread_usage.h). The OPAL_THREAD_ADD* and OPAL_THREAD_CMPSET* macros have also been relocated to this header. This commit is cherry-picked off a fix that was submitted for the v1.8 release series but never applied to master. This fixes part of the problem reported by @nysal in #1902. (cherry picked from commit open-mpi/ompi-release@ce91307) Signed-off-by: Nathan Hjelm <[email protected]>
1 parent b8a1ffb commit a8c3699

File tree

4 files changed

+181
-155
lines changed

4 files changed

+181
-155
lines changed

opal/class/opal_object.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
1313
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
14+
* Copyright (c) 2014 Research Organization for Information Science
15+
* and Technology (RIST). All rights reserved.
1416
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
1517
* reserved.
1618
* $COPYRIGHT$
@@ -121,7 +123,7 @@
121123
#include <assert.h>
122124
#include <stdlib.h>
123125

124-
#include "opal/sys/atomic.h"
126+
#include "opal/threads/thread_usage.h"
125127

126128
BEGIN_C_DECLS
127129

@@ -508,7 +510,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls)
508510
static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__;
509511
static inline int opal_obj_update(opal_object_t *object, int inc)
510512
{
511-
return opal_atomic_add_32(&(object->obj_reference_count), inc);
513+
return OPAL_THREAD_ADD32(&object->obj_reference_count, inc);
512514
}
513515

514516
END_C_DECLS

opal/threads/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ headers += \
2929
threads/mutex_unix.h \
3030
threads/threads.h \
3131
threads/tsd.h \
32-
threads/wait_sync.h
32+
threads/wait_sync.h \
33+
threads/thread_usage.h
3334

3435
lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \
3536
threads/condition.c \

opal/threads/mutex.h

Lines changed: 1 addition & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@
2828

2929
#include "opal_config.h"
3030

31-
#include "opal/sys/atomic.h"
32-
#include "opal/prefetch.h"
31+
#include "opal/threads/thread_usage.h"
3332

3433
BEGIN_C_DECLS
3534

@@ -41,11 +40,6 @@ BEGIN_C_DECLS
4140
* Functions for locking of critical sections.
4241
*/
4342

44-
/*
45-
* declaring this here so that CL does not complain
46-
*/
47-
OPAL_DECLSPEC extern bool opal_uses_threads;
48-
4943
/**
5044
* Opaque mutex object
5145
*/
@@ -107,67 +101,6 @@ END_C_DECLS
107101

108102
BEGIN_C_DECLS
109103

110-
/**
111-
* Check and see if the process is using multiple threads.
112-
*
113-
* @retval true If the process may have more than one thread.
114-
* @retval false If the process only has a single thread.
115-
*
116-
* The value that this function returns is influenced by:
117-
*
118-
* - how MPI_INIT or MPI_INIT_THREAD was invoked,
119-
* - what the final MPI thread level was determined to be,
120-
* - whether the OMPI or MPI libraries are multi-threaded (Jan 2003:
121-
* they're not),
122-
* - whether configure determined if we have thread support or not
123-
*
124-
* MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup
125-
* functions) invoke opal_set_using_threads() to influence the value of
126-
* this function, depending on their situation. Some examples:
127-
*
128-
* - if configure determined that we do not have threads, then this
129-
* value will always be false.
130-
*
131-
* - if MPI_INIT is invoked, and the ompi libraries are [still]
132-
* single-threaded, this value will be false.
133-
*
134-
* - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have
135-
* thread support, and the final thread level is determined to be
136-
* MPI_THREAD_MULTIPLE, this value will be true.
137-
*
138-
* - if the process is a single-threaded OMPI executable (e.g., mpicc),
139-
* this value will be false.
140-
*
141-
* Hence, this function will return false if there is guaranteed to
142-
* only be one thread in the process. If there is even the
143-
* possibility that we may have multiple threads, true will be
144-
* returned.
145-
*/
146-
#define opal_using_threads() opal_uses_threads
147-
148-
/**
149-
* Set whether the process is using multiple threads or not.
150-
*
151-
* @param have Boolean indicating whether the process is using
152-
* multiple threads or not.
153-
*
154-
* @retval opal_using_threads The new return value from
155-
* opal_using_threads().
156-
*
157-
* This function is used to influence the return value of
158-
* opal_using_threads(). If configure detected that we have thread
159-
* support, the return value of future invocations of
160-
* opal_using_threads() will be the parameter's value. If configure
161-
* detected that we have no thread support, then the retuen from
162-
* opal_using_threads() will always be false.
163-
*/
164-
static inline bool opal_set_using_threads(bool have)
165-
{
166-
opal_uses_threads = have;
167-
return opal_using_threads();
168-
}
169-
170-
171104
/**
172105
* Lock a mutex if opal_using_threads() says that multiple threads may
173106
* be active in the process.
@@ -254,90 +187,6 @@ static inline bool opal_set_using_threads(bool have)
254187
} \
255188
} while (0)
256189

257-
/**
258-
* Use an atomic operation for increment/decrement if opal_using_threads()
259-
* indicates that threads are in use by the application or library.
260-
*/
261-
262-
static inline int32_t
263-
OPAL_THREAD_ADD32(volatile int32_t *addr, int delta)
264-
{
265-
int32_t ret;
266-
267-
if (OPAL_UNLIKELY(opal_using_threads())) {
268-
ret = opal_atomic_add_32(addr, delta);
269-
} else {
270-
ret = (*addr += delta);
271-
}
272-
273-
return ret;
274-
}
275-
276-
#if OPAL_HAVE_ATOMIC_MATH_64
277-
static inline int64_t
278-
OPAL_THREAD_ADD64(volatile int64_t *addr, int delta)
279-
{
280-
int64_t ret;
281-
282-
if (OPAL_UNLIKELY(opal_using_threads())) {
283-
ret = opal_atomic_add_64(addr, delta);
284-
} else {
285-
ret = (*addr += delta);
286-
}
287-
288-
return ret;
289-
}
290-
#endif
291-
292-
static inline size_t
293-
OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta)
294-
{
295-
size_t ret;
296-
297-
if (OPAL_UNLIKELY(opal_using_threads())) {
298-
ret = opal_atomic_add_size_t(addr, delta);
299-
} else {
300-
ret = (*addr += delta);
301-
}
302-
303-
return ret;
304-
}
305-
306-
/* BWB: FIX ME: remove if possible */
307-
#define OPAL_CMPSET(x, y, z) ((*(x) == (y)) ? ((*(x) = (z)), 1) : 0)
308-
309-
#if OPAL_HAVE_ATOMIC_CMPSET_32
310-
#define OPAL_ATOMIC_CMPSET_32(x, y, z) \
311-
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset_32(x, y, z) : OPAL_CMPSET(x, y, z))
312-
#endif
313-
#if OPAL_HAVE_ATOMIC_CMPSET_64
314-
#define OPAL_ATOMIC_CMPSET_64(x, y, z) \
315-
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset_64(x, y, z) : OPAL_CMPSET(x, y, z))
316-
#endif
317-
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
318-
#define OPAL_ATOMIC_CMPSET(x, y, z) \
319-
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset(x, y, z) : OPAL_CMPSET(x, y, z))
320-
#endif
321-
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
322-
#define OPAL_ATOMIC_CMPSET_PTR(x, y, z) \
323-
(opal_using_threads() ? opal_atomic_cmpset_ptr(x, y, z) : OPAL_CMPSET(x, y, z))
324-
#endif
325-
326-
327-
static inline void *opal_thread_swap_ptr (volatile void *ptr, void *newvalue)
328-
{
329-
if (opal_using_threads ()) {
330-
return opal_atomic_swap_ptr (ptr, newvalue);
331-
}
332-
333-
void *old = ((void **) ptr)[0];
334-
((void **) ptr)[0] = newvalue;
335-
336-
return old;
337-
}
338-
339-
#define OPAL_ATOMIC_SWAP_PTR(x, y) opal_thread_swap_ptr (x, y)
340-
341190
END_C_DECLS
342191

343192
#endif /* OPAL_MUTEX_H */

opal/threads/thread_usage.h

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4+
* University Research and Technology
5+
* Corporation. All rights reserved.
6+
* Copyright (c) 2004-2007 The University of Tennessee and The University
7+
* of Tennessee Research Foundation. All rights
8+
* reserved.
9+
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
10+
* University of Stuttgart. All rights reserved.
11+
* Copyright (c) 2004-2005 The Regents of the University of California.
12+
* All rights reserved.
13+
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
14+
* Copyright (c) 2014 Research Organization for Information Science
15+
* and Technology (RIST). All rights reserved.
16+
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
17+
* reserved.
18+
* $COPYRIGHT$
19+
*
20+
* Additional copyrights may follow
21+
*
22+
* $HEADER$
23+
*/
24+
25+
#if !defined(OPAL_THREAD_USAGE_H)
26+
#define OPAL_THREAD_USAGE_H
27+
28+
#include "opal_config.h"
29+
30+
#include "opal/sys/atomic.h"
31+
#include "opal/prefetch.h"
32+
33+
OPAL_DECLSPEC extern bool opal_uses_threads;
34+
35+
/**
36+
* Check and see if the process is using multiple threads.
37+
*
38+
* @retval true If the process may have more than one thread.
39+
* @retval false If the process only has a single thread.
40+
*
41+
* The value that this function returns is influenced by:
42+
*
43+
* - how MPI_INIT or MPI_INIT_THREAD was invoked,
44+
* - what the final MPI thread level was determined to be,
45+
* - whether the OMPI or MPI libraries are multi-threaded
46+
*
47+
* MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup
48+
* functions) invoke opal_set_using_threads() to influence the value of
49+
* this function, depending on their situation. Some examples:
50+
*
51+
* - if MPI_INIT is invoked, and the ompi components in use are
52+
* single-threaded, this value will be false.
53+
*
54+
* - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have
55+
* thread support, and the final thread level is determined to be
56+
* MPI_THREAD_MULTIPLE, this value will be true.
57+
*
58+
* - if the process is a single-threaded OMPI executable (e.g., mpicc),
59+
* this value will be false.
60+
*
61+
* Hence, this function will return false if there is guaranteed to
62+
* only be one thread in the process. If there is even the
63+
* possibility that we may have multiple threads, true will be
64+
* returned.
65+
*/
66+
#define opal_using_threads() opal_uses_threads
67+
68+
/**
69+
* Set whether the process is using multiple threads or not.
70+
*
71+
* @param have Boolean indicating whether the process is using
72+
* multiple threads or not.
73+
*
74+
* @retval opal_using_threads The new return value from
75+
* opal_using_threads().
76+
*
77+
* This function is used to influence the return value of
78+
* opal_using_threads(). If configure detected that we have thread
79+
* support, the return value of future invocations of
80+
* opal_using_threads() will be the parameter's value. If configure
81+
* detected that we have no thread support, then the retuen from
82+
* opal_using_threads() will always be false.
83+
*/
84+
static inline bool opal_set_using_threads(bool have)
85+
{
86+
opal_uses_threads = have;
87+
return opal_using_threads();
88+
}
89+
90+
91+
/**
92+
* Use an atomic operation for increment/decrement if opal_using_threads()
93+
* indicates that threads are in use by the application or library.
94+
*/
95+
96+
static inline int32_t
97+
OPAL_THREAD_ADD32(volatile int32_t *addr, int delta)
98+
{
99+
int32_t ret;
100+
101+
if (OPAL_UNLIKELY(opal_using_threads())) {
102+
ret = opal_atomic_add_32(addr, delta);
103+
} else {
104+
ret = (*addr += delta);
105+
}
106+
107+
return ret;
108+
}
109+
110+
#if OPAL_HAVE_ATOMIC_MATH_64
111+
static inline int64_t
112+
OPAL_THREAD_ADD64(volatile int64_t *addr, int delta)
113+
{
114+
int64_t ret;
115+
116+
if (OPAL_UNLIKELY(opal_using_threads())) {
117+
ret = opal_atomic_add_64(addr, delta);
118+
} else {
119+
ret = (*addr += delta);
120+
}
121+
122+
return ret;
123+
}
124+
#endif
125+
126+
static inline size_t
127+
OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta)
128+
{
129+
size_t ret;
130+
131+
if (OPAL_UNLIKELY(opal_using_threads())) {
132+
ret = opal_atomic_add_size_t(addr, delta);
133+
} else {
134+
ret = (*addr += delta);
135+
}
136+
137+
return ret;
138+
}
139+
140+
/* BWB: FIX ME: remove if possible */
141+
#define OPAL_CMPSET(x, y, z) ((*(x) == (y)) ? ((*(x) = (z)), 1) : 0)
142+
143+
#if OPAL_HAVE_ATOMIC_CMPSET_32
144+
#define OPAL_ATOMIC_CMPSET_32(x, y, z) \
145+
(opal_using_threads() ? opal_atomic_cmpset_32(x, y, z) : OPAL_CMPSET(x, y, z))
146+
#endif
147+
#if OPAL_HAVE_ATOMIC_CMPSET_64
148+
#define OPAL_ATOMIC_CMPSET_64(x, y, z) \
149+
(opal_using_threads() ? opal_atomic_cmpset_64(x, y, z) : OPAL_CMPSET(x, y, z))
150+
#endif
151+
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
152+
#define OPAL_ATOMIC_CMPSET(x, y, z) \
153+
(opal_using_threads() ? opal_atomic_cmpset(x, y, z) : OPAL_CMPSET(x, y, z))
154+
#endif
155+
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
156+
#define OPAL_ATOMIC_CMPSET_PTR(x, y, z) \
157+
(opal_using_threads() ? opal_atomic_cmpset_ptr(x, y, z) : OPAL_CMPSET(x, y, z))
158+
#endif
159+
160+
static inline void *opal_thread_swap_ptr (volatile void *ptr, void *newvalue)
161+
{
162+
if (opal_using_threads ()) {
163+
return opal_atomic_swap_ptr (ptr, newvalue);
164+
}
165+
166+
void *old = ((void **) ptr)[0];
167+
((void **) ptr)[0] = newvalue;
168+
169+
return old;
170+
}
171+
172+
#define OPAL_ATOMIC_SWAP_PTR(x, y) opal_thread_swap_ptr (x, y)
173+
174+
#endif /* !defined(OPAL_THREAD_USAGE_H) */

0 commit comments

Comments
 (0)