Skip to content

Commit 9df0e66

Browse files
authored
Merge pull request #5222 from hjelmn/v3.1.x_aarch64
v3.1.x aarch64 support
2 parents d596ac2 + 72cb1fc commit 9df0e66

File tree

9 files changed

+220
-125
lines changed

9 files changed

+220
-125
lines changed

README

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Copyright (c) 2010 Oak Ridge National Labs. All rights reserved.
1717
Copyright (c) 2011 University of Houston. All rights reserved.
1818
Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
1919
Copyright (c) 2015 NVIDIA Corporation. All rights reserved.
20-
Copyright (c) 2017 Los Alamos National Security, LLC. All rights
20+
Copyright (c) 2017-2018 Los Alamos National Security, LLC. All rights
2121
reserved.
2222
Copyright (c) 2017 Research Organization for Information Science
2323
and Technology (RIST). All rights reserved.
@@ -143,10 +143,7 @@ General notes
143143
Platform Notes
144144
--------------
145145

146-
- ARM and POWER users may experience intermittent hangs when Open MPI
147-
is compiled with low optimization settings, due to an issue with our
148-
atomic list implementation. We recommend compiling with -O3
149-
optimization, both for performance reasons and to avoid this hang.
146+
- N/A
150147

151148

152149
Compiler Notes

opal/class/opal_fifo.h

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2007 Voltaire All rights reserved.
1414
* Copyright (c) 2010 IBM Corporation. All rights reserved.
15-
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
15+
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
1616
* reseved.
1717
* $COPYRIGHT$
1818
*
@@ -186,9 +186,10 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
186186
static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
187187
opal_list_item_t *item)
188188
{
189+
const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost;
189190
opal_list_item_t *tail_item;
190191

191-
item->opal_list_next = &fifo->opal_fifo_ghost;
192+
item->opal_list_next = (opal_list_item_t *) ghost;
192193

193194
opal_atomic_wmb ();
194195

@@ -197,7 +198,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
197198

198199
opal_atomic_wmb ();
199200

200-
if (&fifo->opal_fifo_ghost == tail_item) {
201+
if (ghost == tail_item) {
201202
/* update the head */
202203
fifo->opal_fifo_head.data.item = item;
203204
} else {
@@ -215,14 +216,24 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
215216
*/
216217
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
217218
{
218-
opal_list_item_t *item, *next;
219+
const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost;
219220

220221
#if OPAL_HAVE_ATOMIC_LLSC_PTR
222+
register opal_list_item_t *item, *next;
223+
int attempt = 0, ret = 0;
224+
221225
/* use load-linked store-conditional to avoid ABA issues */
222226
do {
223-
item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item);
224-
if (&fifo->opal_fifo_ghost == item) {
225-
if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) {
227+
if (++attempt == 5) {
228+
/* deliberatly suspend this thread to allow other threads to run. this should
229+
* only occur during periods of contention on the lifo. */
230+
_opal_lifo_release_cpu ();
231+
attempt = 0;
232+
}
233+
234+
opal_atomic_ll_ptr(&fifo->opal_fifo_head.data.item, item);
235+
if (ghost == item) {
236+
if (ghost == fifo->opal_fifo_tail.data.item) {
226237
return NULL;
227238
}
228239

@@ -232,11 +243,12 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
232243
}
233244

234245
next = (opal_list_item_t *) item->opal_list_next;
235-
if (opal_atomic_sc_ptr (&fifo->opal_fifo_head.data.item, next)) {
236-
break;
237-
}
238-
} while (1);
246+
opal_atomic_sc_ptr(&fifo->opal_fifo_head.data.item, next, ret);
247+
} while (!ret);
248+
239249
#else
250+
opal_list_item_t *item, *next;
251+
240252
/* protect against ABA issues by "locking" the head */
241253
do {
242254
if (opal_atomic_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) {
@@ -258,9 +270,9 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
258270
fifo->opal_fifo_head.data.item = next;
259271
#endif
260272

261-
if (&fifo->opal_fifo_ghost == next) {
262-
if (!opal_atomic_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, &fifo->opal_fifo_ghost)) {
263-
while (&fifo->opal_fifo_ghost == item->opal_list_next) {
273+
if (ghost == next) {
274+
if (!opal_atomic_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, (void *) ghost)) {
275+
while (ghost == item->opal_list_next) {
264276
opal_atomic_rmb ();
265277
}
266278

opal/class/opal_lifo.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2007 Voltaire All rights reserved.
1414
* Copyright (c) 2010 IBM Corporation. All rights reserved.
15-
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
15+
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
1616
* reseved.
1717
* Copyright (c) 2016 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
@@ -204,8 +204,8 @@ static inline void _opal_lifo_release_cpu (void)
204204
*/
205205
static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
206206
{
207-
opal_list_item_t *item, *next;
208-
int attempt = 0;
207+
register opal_list_item_t *item, *next;
208+
int attempt = 0, ret;
209209

210210
do {
211211
if (++attempt == 5) {
@@ -215,13 +215,14 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
215215
attempt = 0;
216216
}
217217

218-
item = (opal_list_item_t *) opal_atomic_ll_ptr (&lifo->opal_lifo_head.data.item);
218+
opal_atomic_ll_ptr(&lifo->opal_lifo_head.data.item, item);
219219
if (&lifo->opal_lifo_ghost == item) {
220220
return NULL;
221221
}
222222

223223
next = (opal_list_item_t *) item->opal_list_next;
224-
} while (!opal_atomic_sc_ptr (&lifo->opal_lifo_head.data.item, next));
224+
opal_atomic_sc_ptr(&lifo->opal_lifo_head.data.item, next, ret);
225+
} while (!ret);
225226

226227
opal_atomic_wmb ();
227228

opal/include/opal/sys/arm64/atomic.h

Lines changed: 50 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -150,28 +150,31 @@ static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
150150
return (ret == oldval);
151151
}
152152

153-
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
154-
{
155-
int32_t ret;
156-
157-
__asm__ __volatile__ ("ldaxr %w0, [%1] \n"
158-
: "=&r" (ret)
159-
: "r" (addr));
160-
161-
return ret;
162-
}
163-
164-
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
165-
{
166-
int ret;
167-
168-
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n"
169-
: "=&r" (ret)
170-
: "r" (addr), "r" (newval)
171-
: "cc", "memory");
172-
173-
return ret == 0;
174-
}
153+
#define opal_atomic_ll_32(addr, ret) \
154+
do { \
155+
volatile int32_t *_addr = (addr); \
156+
int32_t _ret; \
157+
\
158+
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" \
159+
: "=&r" (_ret) \
160+
: "r" (_addr)); \
161+
\
162+
ret = (typeof(ret)) _ret; \
163+
} while (0)
164+
165+
#define opal_atomic_sc_32(addr, newval, ret) \
166+
do { \
167+
volatile int32_t *_addr = (addr); \
168+
int32_t _newval = (int32_t) newval; \
169+
int _ret; \
170+
\
171+
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \
172+
: "=&r" (_ret) \
173+
: "r" (_addr), "r" (_newval) \
174+
: "cc", "memory"); \
175+
\
176+
ret = (_ret == 0); \
177+
} while (0)
175178

176179
static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
177180
int64_t oldval, int64_t newval)
@@ -251,28 +254,31 @@ static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
251254
return (ret == oldval);
252255
}
253256

254-
static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr)
255-
{
256-
int64_t ret;
257-
258-
__asm__ __volatile__ ("ldaxr %0, [%1] \n"
259-
: "=&r" (ret)
260-
: "r" (addr));
261-
262-
return ret;
263-
}
264-
265-
static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval)
266-
{
267-
int ret;
268-
269-
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n"
270-
: "=&r" (ret)
271-
: "r" (addr), "r" (newval)
272-
: "cc", "memory");
273-
274-
return ret == 0;
275-
}
257+
#define opal_atomic_ll_64(addr, ret) \
258+
do { \
259+
volatile int64_t *_addr = (addr); \
260+
int64_t _ret; \
261+
\
262+
__asm__ __volatile__ ("ldaxr %0, [%1] \n" \
263+
: "=&r" (_ret) \
264+
: "r" (_addr)); \
265+
\
266+
ret = (typeof(ret)) _ret; \
267+
} while (0)
268+
269+
#define opal_atomic_sc_64(addr, newval, ret) \
270+
do { \
271+
volatile int64_t *_addr = (addr); \
272+
int64_t _newval = (int64_t) newval; \
273+
int _ret; \
274+
\
275+
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \
276+
: "=&r" (_ret) \
277+
: "r" (_addr), "r" (_newval) \
278+
: "cc", "memory"); \
279+
\
280+
ret = (_ret == 0); \
281+
} while (0)
276282

277283
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
278284
static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \

opal/include/opal/sys/atomic_impl.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,15 +278,15 @@ static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr,
278278

279279
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32
280280

281-
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_32((int32_t *) addr)
282-
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_32((int32_t *) addr, (int32_t) newval)
281+
#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((volatile int32_t *) (addr), ret)
282+
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_32((volatile int32_t *) (addr), (intptr_t) (value), ret)
283283

284284
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
285285

286286
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64
287287

288-
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_64((int64_t *) addr)
289-
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_64((int64_t *) addr, (int64_t) newval)
288+
#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((volatile int64_t *) (addr), ret)
289+
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_64((volatile int64_t *) (addr), (intptr_t) (value), ret)
290290

291291
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
292292

0 commit comments

Comments
 (0)