Skip to content

Commit 915383e

Browse files
committed
:@
1 parent df85ce5 commit 915383e

File tree

4 files changed

+168
-29
lines changed

4 files changed

+168
-29
lines changed

Python/cpuinfo.c

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,28 +8,27 @@
88
// For now, we only try to enable SIMD instructions for x86-64 Intel CPUs.
99
// In the future, we should carefully enable support for ARM NEON and POWER
1010
// as well as AMD. See https://sourceforge.net/p/predef/wiki/Architectures.
11+
#define HAS_CPUID_SUPPORT
1112
#if defined(__x86_64__) && defined(__GNUC__)
1213
# include <cpuid.h> // __cpuid_count()
13-
# define HAS_CPUID_SUPPORT
14-
# if defined(__clang__)
15-
# include <immintrin.h> // _xgetbv()
16-
# endif
17-
# define HAS_XGETBV_SUPPORT
14+
# include <immintrin.h> // _xgetbv()
1815
#elif defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64)
1916
# include <intrin.h> // __cpuidex()
20-
# define HAS_CPUID_SUPPORT
2117
# include <immintrin.h> // _xgetbv()
22-
# define HAS_XGETBV_SUPPORT
2318
#else
2419
# undef HAS_CPUID_SUPPORT
25-
# undef HAS_XGETBV_SUPPORT
2620
#endif
2721

2822
// Below, we declare macros for guarding the detection of SSE, AVX/AVX2
2923
// and AVX-512 instructions. If the compiler does not even recognize the
3024
// corresponding flags or if we are not on an 64-bit platform we do not
3125
// even try to inspect the output of CPUID for those specific features.
3226
#ifdef HAS_CPUID_SUPPORT
27+
#if defined(_Py_CPUINFO_USE_XGETBV_FUNC) \
28+
|| defined(_Py_CPUINFO_USE_XGETBV_OPCODE)
29+
# define HAS_XGETBV_SUPPORT
30+
#endif
31+
3332
#if defined(_Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \
3433
|| defined(_Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \
3534
|| defined(_Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \
@@ -159,19 +158,10 @@ static uint64_t /* should only be used after calling cpuid(1, 0, ...) */
159158
get_xgetbv(uint32_t index)
160159
{
161160
assert(index == 0); // only XCR0 is supported for now
162-
# if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__)
163-
# if defined(__clang__)
164-
# if _Py__has_builtin(__builtin_ia32_xgetbv)
161+
#if defined(_Py_CPUINFO_USE_XGETBV_FUNC)
162+
/* directly use the compiler's helper if -mxsave is available */
165163
return (uint64_t)_xgetbv(index);
166-
# else
167-
/*
168-
* Without -mxsave support, directly using xgetbv() with raw opcode
169-
* may still fail on some platforms (e.g., AMD64 + FreeBSD + clang).
170-
* To be on the safe side, we assume that XGETBV is not supported.
171-
*/
172-
return 0;
173-
# endif
174-
# else /* gcc & icc */
164+
#elif defined(__x86_64__) && defined(__GNUC__)
175165
uint32_t eax = 0, edx = 0;
176166
__asm__ volatile(
177167
/* raw opcode for xgetbv for compatibility with older toolchains */
@@ -180,14 +170,15 @@ get_xgetbv(uint32_t index)
180170
: "c" (index)
181171
);
182172
return ((uint64_t)edx << 32) | eax;
183-
# endif
184-
# elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64)
173+
#elif defined(_M_X64)
185174
return (uint64_t)_xgetbv(index);
186-
# else
175+
#else
187176
(void)index;
188177
return 0;
189-
# endif
178+
#endif
190179
}
180+
#else
181+
#define get_xgetbv(_INDEX) 0
191182
#endif
192183

193184
/* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */
@@ -364,14 +355,12 @@ detect_cpuid_xsave_state(_Py_cpuid_features *flags)
364355
assert(flags->maxleaf >= 1);
365356
(void)flags;
366357
// Keep the ordering and newlines as they are declared in the structure.
367-
#ifdef HAS_XGETBV_SUPPORT
368358
uint64_t xcr0 = flags->xsave && flags->osxsave ? get_xgetbv(0) : 0;
369359
flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE);
370360
flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX);
371361
flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK);
372362
flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256);
373363
flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM);
374-
#endif
375364
}
376365
#endif
377366

configure

Lines changed: 114 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

configure.ac

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8061,11 +8061,41 @@ fi
80618061
dnl Check that -mxsave can be used for cpuinfo.c as the latter
80628062
dnl requires to be compiled with this option for xgetbv() support.
80638063
AX_CHECK_COMPILE_FLAG([-mxsave],
8064-
[AS_VAR_SET([CORE_CPUINFO_CFLAGS], [-mxsave])],
8065-
[AS_VAR_SET([CORE_CPUINFO_CFLAGS], [])],
8066-
[-Werror])
8064+
[AS_VAR_SET([CORE_CPUINFO_CFLAGS], [-mxsave])],
8065+
[AS_VAR_SET([CORE_CPUINFO_CFLAGS], [])],
8066+
[-Werror])
80678067
AC_SUBST([CORE_CPUINFO_CFLAGS])
80688068

8069+
AC_CACHE_CHECK([_xgetbv(0) is natively supported], [ac_cv_use_xgetbv_func], [
8070+
WITH_SAVE_ENV([
8071+
CFLAGS="$CFLAGS -Werror -mxsave"
8072+
AC_COMPILE_IFELSE(
8073+
[AC_LANG_PROGRAM([[@%:@include <immintrin.h>]], [[_xgetbv(0)]])],
8074+
[ac_cv_use_xgetbv_func=yes],
8075+
[ac_cv_use_xgetbv_func=no])])])
8076+
if test "$ac_cv_use_xgetbv_func" = "yes" ; then
8077+
AC_DEFINE([_Py_CPUINFO_USE_XGETBV_FUNC], [1], [_xgetbv() is preferred])
8078+
fi
8079+
8080+
AC_CACHE_CHECK([xgetbv(0) opcode is supported], [ac_cv_use_xgetbv_opcode], [
8081+
WITH_SAVE_ENV([
8082+
CFLAGS="$CFLAGS -Werror"
8083+
AC_RUN_IFELSE([AC_LANG_PROGRAM([[@%:@include <stdint.h>]], [[
8084+
int main(void)
8085+
{
8086+
uint32_t eax = 0, edx = 0;
8087+
__asm__ __volatile__(
8088+
".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (0));
8089+
return 0;
8090+
}
8091+
]])],
8092+
[ac_cv_use_xgetbv_opcode=yes],
8093+
[ac_cv_use_xgetbv_opcode=no],
8094+
[ac_cv_use_xgetbv_opcode=no])])])
8095+
if test "$ac_cv_use_xgetbv_opcode" = "yes" ; then
8096+
AC_DEFINE([_Py_CPUINFO_USE_XGETBV_OPCODE], [1], [XGETBV opcode is preferred])
8097+
fi
8098+
80698099
###############################################################################
80708100
# HACL* compilation and linking configuration (contact: @picnixz)
80718101
#

pyconfig.h.in

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2104,6 +2104,12 @@
21042104
/* Define if '-mssse3' is a valid compiler flag. */
21052105
#undef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS
21062106

2107+
/* _xgetbv() is preferred */
2108+
#undef _Py_CPUINFO_USE_XGETBV_FUNC
2109+
2110+
/* XGETBV opcode is preferred */
2111+
#undef _Py_CPUINFO_USE_XGETBV_OPCODE
2112+
21072113
/* Defined if _Complex C type can be used with libffi. */
21082114
#undef _Py_FFI_SUPPORT_C_COMPLEX
21092115

0 commit comments

Comments
 (0)