Skip to content

Commit 53bf15e

Browse files
committed
opal/asm: updates to powerpc assembly
This commit contains the following changes: - There is a bug in the PGI 16.x betas for ppc64 that causes them to emit the incorrect instruction for loading 64-bit operands. If not cast to void * the operands are loaded with lwz (load word and zero) instead of ld. This does not affect optimized mode. The work around is to cast to void * and was implemented similar to a work-around for a xlc bug. - Actually implement 64-bit add/sub. These functions were missing and fell back to the less efficient compare-and-swap implementations. Thanks to @PHHargrove for helping to track this down. With this update the GCC inline assembly works as expected with pgi and ppc64. Signed-off-by: Nathan Hjelm <[email protected]> (cherry picked from commit a36bdfe)
1 parent be030c3 commit 53bf15e

File tree

1 file changed

+53
-8
lines changed

1 file changed

+53
-8
lines changed

opal/include/opal/sys/powerpc/atomic.h

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
1313
* Copyright (c) 2010 IBM Corporation. All rights reserved.
14-
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
14+
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
1515
* reserved.
1616
* $COPYRIGHT$
1717
*
@@ -54,6 +54,9 @@
5454
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
5555
#define OPAL_HAVE_ATOMIC_SWAP_64 1
5656
#define OPAL_HAVE_ATOMIC_LLSC_64 1
57+
#define OPAL_HAVE_ATOMIC_MATH_64 1
58+
#define OPAL_HAVE_ATOMIC_ADD_64 1
59+
#define OPAL_HAVE_ATOMIC_SUB_64 1
5760
#endif
5861

5962

@@ -121,6 +124,16 @@ void opal_atomic_wmb(void)
121124
#define OPAL_ASM_ADDR(a) (a)
122125
#endif
123126

127+
#if defined(__PGI)
128+
/* work-around for bug in PGI 16.5-16.7 where the compiler fails to
129+
* correctly emit load instructions for 64-bit operands. without this
130+
* it will emit lwz instead of ld to load the 64-bit operand. */
131+
#define OPAL_ASM_VALUE64(x) (void *)(intptr_t) (x)
132+
#else
133+
#define OPAL_ASM_VALUE64(x) x
134+
#endif
135+
136+
124137
static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
125138
int32_t oldval, int32_t newval)
126139
{
@@ -210,6 +223,38 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval
210223
#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64)
211224

212225
#if OPAL_GCC_INLINE_ASSEMBLY
226+
static inline int64_t opal_atomic_add_64 (volatile int64_t* v, int64_t inc)
227+
{
228+
int64_t t;
229+
230+
__asm__ __volatile__("1: ldarx %0, 0, %3 \n\t"
231+
" add %0, %2, %0 \n\t"
232+
" stdcx. %0, 0, %3 \n\t"
233+
" bne- 1b \n\t"
234+
: "=&r" (t), "+m" (*v)
235+
: "r" (OPAL_ASM_VALUE64(inc)), "r" OPAL_ASM_ADDR(v)
236+
: "cc");
237+
238+
return t;
239+
}
240+
241+
242+
static inline int64_t opal_atomic_sub_64 (volatile int64_t* v, int64_t dec)
243+
{
244+
int64_t t;
245+
246+
__asm__ __volatile__(
247+
"1: ldarx %0,0,%3 \n\t"
248+
" subf %0,%2,%0 \n\t"
249+
" stdcx. %0,0,%3 \n\t"
250+
" bne- 1b \n\t"
251+
: "=&r" (t), "+m" (*v)
252+
: "r" (OPAL_ASM_VALUE64(dec)), "r" OPAL_ASM_ADDR(v)
253+
: "cc");
254+
255+
return t;
256+
}
257+
213258
static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
214259
int64_t oldval, int64_t newval)
215260
{
@@ -222,8 +267,8 @@ static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
222267
" stdcx. %4, 0, %2 \n\t"
223268
" bne- 1b \n\t"
224269
"2:"
225-
: "=&r" (ret), "=m" (*addr)
226-
: "r" (addr), "r" (oldval), "r" (newval), "m" (*addr)
270+
: "=&r" (ret), "+m" (*addr)
271+
: "r" (addr), "r" (OPAL_ASM_VALUE64(oldval)), "r" (OPAL_ASM_VALUE64(newval))
227272
: "cc", "memory");
228273

229274
return (ret == oldval);
@@ -242,15 +287,15 @@ static inline int64_t opal_atomic_ll_64(volatile int64_t *addr)
242287

243288
static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval)
244289
{
245-
int32_t ret, foo;
290+
int32_t ret;
246291

247-
__asm__ __volatile__ (" stdcx. %4, 0, %3 \n\t"
292+
__asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t"
248293
" li %0,0 \n\t"
249294
" bne- 1f \n\t"
250295
" ori %0,%0,1 \n\t"
251296
"1:"
252-
: "=r" (ret), "=m" (*addr), "=r" (foo)
253-
: "r" (addr), "r" (newval)
297+
: "=r" (ret)
298+
: "r" (addr), "r" (OPAL_ASM_VALUE64(newval))
254299
: "cc", "memory");
255300
return ret;
256301
}
@@ -287,7 +332,7 @@ static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval
287332
" stdcx. %3, 0, %2 \n\t"
288333
" bne- 1b \n\t"
289334
: "=&r" (ret), "=m" (*addr)
290-
: "r" (addr), "r" (newval)
335+
: "r" (addr), "r" (OPAL_ASM_VALUE64(newval))
291336
: "cc", "memory");
292337

293338
return ret;

0 commit comments

Comments
 (0)