@@ -380,6 +380,7 @@ __atomic_add_fetch(&tmp64, 1, __ATOMIC_RELAXED);])],
380380 fi
381381] )
382382
383+
383384AC_DEFUN ( [ OPAL_CHECK_C11_CSWAP_INT128] , [
384385 OPAL_VAR_SCOPE_PUSH([ atomic_compare_exchange_result atomic_compare_exchange_CFLAGS_save atomic_compare_exchange_LIBS_save] )
385386
@@ -500,20 +501,16 @@ AC_DEFUN([OPAL_CHECK_CMPXCHG16B],[
500501 OPAL_VAR_SCOPE_POP
501502] ) dnl
502503
504+
503505dnl #################################################################
504506dnl
505- dnl OPAL_CHECK_INLINE_GCC
507+ dnl OPAL_CHECK_INLINE_GCC([action-if-found], [action-if-not-found])
506508dnl
507509dnl Check if the compiler is capable of doing GCC-style inline
508510dnl assembly. Some compilers emit a warning and ignore the inline
509511dnl assembly (xlc on OS X) and compile without error. Therefore,
510512dnl the test attempts to run the emitted code to check that the
511- dnl assembly is actually run. To run this test, one argument to
512- dnl the macro must be an assembly instruction in gcc format to move
513- dnl the value 0 into the register containing the variable ret.
514- dnl For PowerPC, this would be:
515- dnl
516- dnl "li %0,0" : "=&r"(ret)
513+ dnl assembly is actually run.
517514dnl
518515dnl For testing ia32 assembly, the assembly instruction xaddl is
519516dnl tested. The xaddl instruction is used by some of the atomic
@@ -527,181 +524,193 @@ dnl support
527524dnl
528525dnl #################################################################
529526AC_DEFUN ( [ OPAL_CHECK_INLINE_C_GCC] ,[
530- assembly="$1 "
531- asm_result="unknown"
532-
533- AC_MSG_CHECKING ( [ if $CC supports GCC inline assembly] )
534-
535- if test ! "$assembly" = "" ; then
536- AC_RUN_IFELSE ( [ AC_LANG_PROGRAM ( [ AC_INCLUDES_DEFAULT ] ,[ [
527+ AC_CACHE_CHECK ( [ if $CC supports GCC inline assembly] ,
528+ [ opal_cv_asm_gcc_inline_assembly] ,
529+ [ OPAL_VAR_SCOPE_PUSH([ asm_result opal_gcc_inline_assign OPAL_C_GCC_INLINE_ASSEMBLY] )
530+
531+ asm_result="unknown"
532+
533+ opal_gcc_inline_assign=""
534+ case "${host}" in
535+ x86_64-*x32|i?86-*|x86_64*|amd64*)
536+ opal_gcc_inline_assign='"xaddl %1,%0" : "=m"(ret), "+r"(negone) : "m"(ret)'
537+ ;;
538+ aarch64*)
539+ opal_gcc_inline_assign='"mov %0, #0" : "=&r"(ret)'
540+ ;;
541+ powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*)
542+ opal_gcc_inline_assign='"1: li %0,0" : "=&r"(ret)'
543+ ;;
544+ esac
545+
546+ AS_IF ( [ test "$opal_gcc_inline_assign" != ""] ,
547+ [ AC_RUN_IFELSE ( [ AC_LANG_PROGRAM ( [ AC_INCLUDES_DEFAULT ] ,[ [
537548int ret = 1;
538549int negone = -1;
539- __asm__ __volatile__ ($assembly );
550+ __asm__ __volatile__ ($opal_gcc_inline_assign );
540551return ret;
541- ] ] ) ] ,
542- [ asm_result="yes"] , [ asm_result="no"] ,
543- [ asm_result="unknown"] )
544- else
545- assembly="test skipped - assuming no"
546- fi
547-
548- # if we're cross compiling, just try to compile and figure good enough
549- if test "$asm_result" = "unknown" ; then
550- AC_LINK_IFELSE ( [ AC_LANG_PROGRAM ( [ AC_INCLUDES_DEFAULT ] ,[ [
552+ ] ] ) ] ,
553+ [ asm_result="yes"] , [ asm_result="no"] ,
554+ [ asm_result="unknown"] ) ] ,
555+ [ asm_result="no - architecture not supported"] )
556+
557+ # if we're cross compiling, just try to compile and figure good enough
558+ AS_IF ( [ test "$asm_result" = "unknown"] ,
559+ [ AC_LINK_IFELSE ( [ AC_LANG_PROGRAM ( [ AC_INCLUDES_DEFAULT ] ,[ [
551560int ret = 1;
552561int negone = -1;
553- __asm__ __volatile__ ($assembly );
562+ __asm__ __volatile__ ($opal_gcc_inline_assign );
554563return ret;
555- ] ] ) ] ,
556- [ asm_result="yes"] , [ asm_result="no"] )
557- fi
564+ ] ] ) ] ,
565+ [ asm_result="yes"] , [ asm_result="no"] ) ] )
566+ opal_cv_asm_gcc_inline_assembly="$asm_result"
567+ OPAL_VAR_SCOPE_POP] )
558568
559- AC_MSG_RESULT ( [ $asm_result] )
560-
561- if test "$asm_result" = "yes" ; then
562- OPAL_C_GCC_INLINE_ASSEMBLY=1
563- opal_cv_asm_inline_supported="yes"
564- else
565- OPAL_C_GCC_INLINE_ASSEMBLY=0
566- fi
569+ AS_IF ( [ test "$opal_cv_asm_gcc_inline_assembly" = "yes"] ,
570+ [ OPAL_C_GCC_INLINE_ASSEMBLY=1
571+ $1 ] ,
572+ [ OPAL_C_GCC_INLINE_ASSEMBLY=0
573+ $2 ] )
567574
568575 AC_DEFINE_UNQUOTED ( [ OPAL_C_GCC_INLINE_ASSEMBLY] ,
569576 [ $OPAL_C_GCC_INLINE_ASSEMBLY] ,
570577 [ Whether C compiler supports GCC style inline assembly] )
571-
572- unset OPAL_C_GCC_INLINE_ASSEMBLY assembly asm_result
573578] ) dnl
574579
580+
575581dnl #################################################################
576582dnl
577583dnl OPAL_CONFIG_ASM
578584dnl
579- dnl DEFINE OPAL_ASSEMBLY_ARCH to something in sys/architecture.h
580- dnl DEFINE OPAL_ASSEMBLY_FORMAT to string containing correct
581- dnl format for assembly (not user friendly)
582- dnl SUBST OPAL_ASSEMBLY_FORMAT to string containing correct
583- dnl format for assembly (not user friendly)
585+ dnl Configure assembly support. AC_DEFINES the following:
586+ dnl - OPAL_C_GCC_INLINE_ASSEMBLY - 1 if C compiler supports
587+ dnl GCC-style inline assembly
588+ dnl - OPAL_USE_C11_ATOMICS - 1 if atomics implementation should
589+ dnl use C11-style atomics
590+ dnl - OPAL_USE_GCC_BUILTIN_ATOMICS - 1 if atomics implementation
591+ dnl should use GCC built-in style atomics
592+ dnl - OPAL_USE_ASM_ATOMICS - 1 if atomics implementation should
593+ dnl use inline assembly (using GCC-style inline assembly)
594+ dnl for atomics implementaiton
584595dnl
585596dnl #################################################################
586597AC_DEFUN ( [ OPAL_CONFIG_ASM] ,[
587598 AC_REQUIRE ( [ OPAL_SETUP_CC] )
588599
589- AC_ARG_ENABLE ( [ c11-atomics] ,[ AS_HELP_STRING ( [ --enable-c11-atomics] ,
590- [ Enable use of C11 atomics if available (default: enabled)] ) ] )
600+ OPAL_VAR_SCOPE_PUSH([ atomics_found want_c11_atomics want_gcc_builtin_atomics want_asm_atomics opal_cv_asm_arch result] )
601+
602+ # only assembly style we support today is gcc-style inline
603+ # assembly, find out if it works. We need this even for C11/GCC
604+ # builtin atomics cases, because we use inline assembly for
605+ # timers, LLSC, and 16 byte compare and swap routines.
606+ OPAL_CHECK_INLINE_C_GCC([ gcc_inline=1] , [ gcc_inline=0] )
607+
608+ atomics_found=no
609+ want_c11_atomics=0
610+ want_gcc_builtin_atomics=0
611+ want_asm_atomics=0
612+
613+ AC_ARG_ENABLE ( [ c11-atomics] ,
614+ [ AS_HELP_STRING ( [ --enable-c11-atomics] ,
615+ [ Enable use of C11 atomics if available (default: use if available, disabled by default on 64-bit PowerPC)] ) ] )
591616
592617 AC_ARG_ENABLE ( [ builtin-atomics] ,
593- [ AS_HELP_STRING ( [ --enable-builtin-atomics] ,
594- [ Enable use of GCC built-in atomics (default: autodetect)] ) ] )
595-
596- OPAL_CHECK_C11_CSWAP_INT128
597- opal_cv_asm_builtin="BUILTIN_NO"
598- OPAL_CHECK_GCC_ATOMIC_BUILTINS
599-
600- if test "x$enable_c11_atomics" != "xno" && test "$opal_cv_c11_supported" = "yes" ; then
601- opal_cv_asm_builtin="BUILTIN_C11"
602- OPAL_CHECK_C11_CSWAP_INT128
603- elif test "x$enable_c11_atomics" = "xyes"; then
604- AC_MSG_WARN ( [ C11 atomics were requested but are not supported] )
605- AC_MSG_ERROR ( [ Cannot continue] )
606- elif test "$enable_builtin_atomics" = "yes" ; then
607- if test $opal_cv_have___atomic = "yes" ; then
608- opal_cv_asm_builtin="BUILTIN_GCC"
609- else
610- AC_MSG_WARN ( [ GCC built-in atomics requested but not found.] )
611- AC_MSG_ERROR ( [ Cannot continue] )
612- fi
613- fi
618+ [ AS_HELP_STRING ( [ --enable-builtin-atomics] ,
619+ [ Enable use of GCC built-in atomics. Note that C11 atomics are preferred over built-in atomics. (default: use if available, disabled by default on 64-bit PowerPC)] ) ] )
614620
615- # find our architecture for purposes of assembly stuff
616- opal_cv_asm_arch="UNSUPPORTED"
617- OPAL_GCC_INLINE_ASSIGN=""
621+ AC_ARG_ENABLE ( [ builtin-atomics- for-ppc ] ,
622+ [ AS_HELP_STRING ( [ --enable-builtin-atomics-for-ppc ] ,
623+ [ For performance reasons, 64-bit POWER architectures will not use C11 or GCC built-in atomics, even if --enable-c11-atomics is passed to configure. Enabling this option will re-enable support for both C11 and GCC built-in atomics. ] ) ] )
618624
625+ # See the following github PR and some performance numbers/discussion:
626+ # https://github.com/open-mpi/ompi/pull/8649
627+ #
628+ # This logic is a bit convoluted, but matches existing logic in v4.x.
619629 case "${host}" in
620- x86_64-*x32|i?86-*|x86_64*|amd64*)
621- if test "$ac_cv_sizeof_long" = "4" ; then
622- if test $opal_cv_asm_builtin = BUILTIN_NO ; then
623- AC_MSG_ERROR ( [ IA32 atomics are no longer supported. Use a C11 compiler] )
624- fi
625- opal_cv_asm_arch="IA32"
626- else
627- opal_cv_asm_arch="X86_64"
628- OPAL_CHECK_CMPXCHG16B
629- fi
630- OPAL_GCC_INLINE_ASSIGN='"xaddl %1,%0" : "=m"(ret), "+r"(negone) : "m"(ret)'
631- ;;
630+ powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*)
631+ AS_IF ( [ test "$ac_cv_sizeof_long" = "8" -a "$enable_builtin_atomics_for_ppc" != "yes"] ,
632+ [ AS_IF ( [ test "$enable_c11_atomics" != "no" -a "$enable_builtin_atomics" != "no"] ,
633+ [ AC_MSG_NOTICE ( [ Disabling built-in and C11 atomics due to known performance issues on Powerpc] ) ] )
634+ AS_IF ( [ test "$enable_c11_atomics" = "yes" -o "$enable_builtin_atomics" = "yes"] ,
635+ [ AC_MSG_WARN ( [ Ignoring --enable-c11-atomics and --enable-builtin-atomics options on POWER. Set
636+ --enable-builtin-atomics-for-ppc to re-enable.] ) ] )
637+ enable_c11_atomics="no"
638+ enable_builtin_atomics="no"] )
639+ ;;
640+ esac
632641
633- aarch64*)
634- opal_cv_asm_arch="ARM64"
635- OPAL_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)'
642+ # Option 1 for atomics: C11
643+ #
644+ # We currently always disable C11 atomics with the Intel compilers.
645+ # We know builds older than 20200310 are broken with respect to
646+ # C11 atomics, but have not apparently found a build we are happy
647+ # with. In the future, this should be changed to a check for a
648+ # particular Intel version.
649+ AS_IF ( [ test "$enable_c11_atomics" != "no" -a "$opal_cv_c11_supported" = "yes" -a "$opal_cv_c_compiler_vendor" != "intel"] ,
650+ [ AC_MSG_NOTICE ( [ Using C11 atomics] )
651+ OPAL_CHECK_C11_CSWAP_INT128
652+ want_c11_atomics=1
653+ atomics_found="C11 atomics"] ,
654+ [ test "$enable_c11_atomics" = "yes"] ,
655+ [ AC_MSG_WARN ( [ C11 atomics were requested but are not supported] )
656+ AC_MSG_ERROR ( [ Cannot continue] ) ] )
657+
658+ # Option 2 for atomics: GCC-style Builtin
659+ AS_IF ( [ test "$atomics_found" = "no" -a "$enable_builtin_atomics" != "no"] ,
660+ [ OPAL_CHECK_GCC_ATOMIC_BUILTINS
661+ AS_IF ( [ test $opal_cv_have___atomic = "yes"] ,
662+ [ AC_MSG_NOTICE ( [ Using GCC built-in style atomics] )
663+ atomics_found="GCC built-in style atomics"
664+ want_gcc_builtin_atomics=1] ,
665+ [ test "$enable_builtin_atomics" = "yes"] ,
666+ [ AC_MSG_WARN ( [ GCC built-in atomics requested but not found.] )
667+ AC_MSG_ERROR ( [ Cannot continue] ) ] ) ] )
668+
669+ # Option 3 for atomics: inline assembly
670+ AS_IF ( [ test "$atomics_found" = "no" -a "$gcc_inline" = "1"] ,
671+ [ case "${host}" in
672+ x86_64-*x32|i?86-*|x86_64*|amd64*)
673+ AS_IF ( [ test "$ac_cv_sizeof_long" = "8"] ,
674+ [ OPAL_CHECK_CMPXCHG16B
675+ opal_cv_asm_arch="X86_64"
676+ atomics_found="x86_64 assembly"] )
636677 ;;
637678
638- armv7*|arm-*-linux-gnueabihf|armv6*)
639- if test $opal_cv_asm_builtin = BUILTIN_NO ; then
640- AC_MSG_ERROR ( [ 32-bit ARM atomics are no longer supported. Use a C11 compiler] )
641- fi
642-
643- opal_cv_asm_arch="ARM"
644- OPAL_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)'
679+ aarch64*)
680+ opal_cv_asm_arch="ARM64"
681+ atomics_found="aarch64 assembly"
645682 ;;
646683
647- powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*)
648- if test "$ac_cv_sizeof_long" = "4" ; then
649- if test $opal_cv_asm_builtin = BUILTIN_NO ; then
650- AC_MSG_ERROR ( [ PowerPC 32-bit atomics are no longer supported. Use a C11 compiler] )
651- fi
652- opal_cv_asm_arch="POWERPC32"
653- elif test "$ac_cv_sizeof_long" = "8" ; then
654- opal_cv_asm_arch="POWERPC64"
655- else
656- AC_MSG_ERROR ( [ Could not determine PowerPC word size: $ac_cv_sizeof_long] )
657- fi
658- OPAL_GCC_INLINE_ASSIGN='"1: li %0,0" : "=&r"(ret)'
659-
660- # See the following github PR and some performance numbers/discussion:
661- # https://github.com/open-mpi/ompi/pull/8649
662- AC_MSG_CHECKING ( [ $opal_cv_asm_arch: Checking if force gcc atomics requested] )
663- if test $force_gcc_atomics_ppc = 0 ; then
664- AC_MSG_RESULT ( [ no] )
665- opal_cv_asm_builtin="BUILTIN_NO"
666- else
667- AC_MSG_RESULT ( [ Yes] )
668- AC_MSG_WARN ( [ $opal_cv_asm_arch: gcc atomics have been known to perform poorly on powerpc.] )
669- fi
670-
684+ powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*)
685+ AS_IF ( [ test "$ac_cv_sizeof_long" = "8"] ,
686+ [ opal_cv_asm_arch="POWERPC64"
687+ atomics_found="PowerPC asssembly"] )
671688 ;;
672- *)
673- if test $opal_cv_have___atomic = "yes" ; then
674- opal_cv_asm_builtin="BUILTIN_GCC"
675- else
676- AC_MSG_ERROR ( [ No atomic primitives available for $host] )
677- fi
678- ;;
679- esac
689+ esac
680690
681- if test "$opal_cv_asm_builtin" = "BUILTIN_GCC" ; then
682- AC_DEFINE ( [ OPAL_C_GCC_INLINE_ASSEMBLY] , [ 1] ,
683- [ Whether C compiler supports GCC style inline assembly] )
684- else
685- opal_cv_asm_inline_supported="no"
686- # now that we know our architecture, try to inline assemble
687- OPAL_CHECK_INLINE_C_GCC([ $OPAL_GCC_INLINE_ASSIGN] )
688- fi # if opal_cv_asm_builtin = BUILTIN_GCC
691+ AS_IF ( [ test "$atomics_found" != "no"] ,
692+ [ want_asm_atomics=1] )
693+ AC_MSG_CHECKING ( [ for inline assembly atomics] )
694+ AC_MSG_RESULT ( [ $atomics_found] ) ] )
695+
696+ AS_IF ( [ test "$aomics_found" = "no"] ,
697+ [ AC_MSG_ERROR ( [ No usable atomics implementation found. Cannot continue.] ) ] )
689698
690699 result="OPAL_$opal_cv_asm_arch"
691- AC_MSG_CHECKING ( [ for assembly architecture] )
692- AC_MSG_RESULT ( [ $opal_cv_asm_arch] )
693700 AC_DEFINE_UNQUOTED ( [ OPAL_ASSEMBLY_ARCH] , [ $result] ,
694701 [ Architecture type of assembly to use for atomic operations and CMA] )
695702
696- result="OPAL_$opal_cv_asm_builtin"
697- OPAL_ASSEMBLY_BUILTIN="$opal_cv_asm_builtin"
698- AC_MSG_CHECKING ( [ for builtin atomics] )
699- AC_MSG_RESULT ( [ $opal_cv_asm_builtin] )
700- AC_DEFINE_UNQUOTED ( [ OPAL_ASSEMBLY_BUILTIN] , [ $result] ,
701- [ Whether to use builtin atomics] )
702- AC_SUBST ( [ OPAL_ASSEMBLY_BUILTIN] )
703+ AC_DEFINE_UNQUOTED ( [ OPAL_USE_C11_ATOMICS] ,
704+ [ $want_c11_atomics] ,
705+ [ Whether to use C11 atomics for atomics implementation] )
706+ AC_DEFINE_UNQUOTED ( [ OPAL_USE_GCC_BUILTIN_ATOMICS] ,
707+ [ $want_gcc_builtin_atomics] ,
708+ [ Whether to use GCC-style built-in atomics for atomics implementation] )
709+ AC_DEFINE_UNQUOTED ( [ OPAL_USE_ASM_ATOMICS] ,
710+ [ $want_asm_atomics] ,
711+ [ Whether to use assembly-coded atomics for atomics implementation] )
703712
704- OPAL_SUMMARY_ADD([ [ Miscellaneous] ] ,[ [ Atomics] ] ,[ ] ,[ $opal_cv_asm_builtin ] )
713+ OPAL_SUMMARY_ADD([ [ Miscellaneous] ] ,[ [ Atomics] ] ,[ ] ,[ $atomics_found ] )
705714
706- unset result
715+ OPAL_VAR_SCOPE_POP
707716] ) dnl
0 commit comments