Skip to content

Commit acf9f8d

Browse files
committed
x86/crc: drop the avx10_256 functions and rename avx10_512 to avx512
Intel made a late change to the AVX10 specification that removes support for a 256-bit maximum vector length and enumeration of the maximum vector length. AVX10 will imply a maximum vector length of 512 bits. I.e. there won't be any such thing as AVX10/256 or AVX10/512; there will just be AVX10, and it will essentially just consolidate AVX512 features. As a result of this new development, my strategy of providing both *_avx10_256 and *_avx10_512 functions didn't turn out to be that useful. The only remaining motivation for the 256-bit AVX512 / AVX10 functions is to avoid downclocking on older Intel CPUs. But I already wrote *_avx2 code too (primarily to support CPUs without AVX512), which performs almost as well as *_avx10_256. So we should just use that. Therefore, remove the *_avx10_256 CRC functions, and rename the *_avx10_512 CRC functions to *_avx512. Make Ice Lake and Tiger Lake use the *_avx2 functions instead of *_avx10_256 which they previously used. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Eric Biggers <[email protected]>
1 parent 981b39d commit acf9f8d

File tree

2 files changed

+13
-25
lines changed

2 files changed

+13
-25
lines changed

arch/x86/lib/crc-pclmul-template.S

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@
138138
.macro _fold_vec acc, data, consts, tmp
139139
_pclmulqdq \consts, HI64_TERMS, \acc, HI64_TERMS, \tmp
140140
_pclmulqdq \consts, LO64_TERMS, \acc, LO64_TERMS, \acc
141-
.if AVX_LEVEL < 10
141+
.if AVX_LEVEL <= 2
142142
_cond_vex pxor, \data, \tmp, \tmp
143143
_cond_vex pxor, \tmp, \acc, \acc
144144
.else
@@ -201,19 +201,16 @@
201201
// \vl is the maximum length of vector register to use in bytes: 16, 32, or 64.
202202
//
203203
// \avx_level is the level of AVX support to use: 0 for SSE only, 2 for AVX2, or
204-
// 10 for AVX10 or AVX512.
204+
// 512 for AVX512.
205205
//
206206
// If \vl == 16 && \avx_level == 0, the generated code requires:
207207
// PCLMULQDQ && SSE4.1. (Note: all known CPUs with PCLMULQDQ also have SSE4.1.)
208208
//
209209
// If \vl == 32 && \avx_level == 2, the generated code requires:
210210
// VPCLMULQDQ && AVX2.
211211
//
212-
// If \vl == 32 && \avx_level == 10, the generated code requires:
213-
// VPCLMULQDQ && (AVX10/256 || (AVX512BW && AVX512VL))
214-
//
215-
// If \vl == 64 && \avx_level == 10, the generated code requires:
216-
// VPCLMULQDQ && (AVX10/512 || (AVX512BW && AVX512VL))
212+
// If \vl == 64 && \avx_level == 512, the generated code requires:
213+
// VPCLMULQDQ && AVX512BW && AVX512VL.
217214
//
218215
// Other \vl and \avx_level combinations are either not supported or not useful.
219216
.macro _crc_pclmul n, lsb_crc, vl, avx_level
@@ -534,7 +531,7 @@
534531
.if LSB_CRC && \n == 64
535532
_cond_vex punpcklqdq, %xmm1, %xmm2, %xmm2
536533
_pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1
537-
.if AVX_LEVEL < 10
534+
.if AVX_LEVEL <= 2
538535
_cond_vex pxor, %xmm2, %xmm0, %xmm0
539536
_cond_vex pxor, %xmm1, %xmm0, %xmm0
540537
.else
@@ -574,13 +571,9 @@ SYM_FUNC_START(prefix##_vpclmul_avx2); \
574571
_crc_pclmul n=bits, lsb_crc=lsb, vl=32, avx_level=2; \
575572
SYM_FUNC_END(prefix##_vpclmul_avx2); \
576573
\
577-
SYM_FUNC_START(prefix##_vpclmul_avx10_256); \
578-
_crc_pclmul n=bits, lsb_crc=lsb, vl=32, avx_level=10; \
579-
SYM_FUNC_END(prefix##_vpclmul_avx10_256); \
580-
\
581-
SYM_FUNC_START(prefix##_vpclmul_avx10_512); \
582-
_crc_pclmul n=bits, lsb_crc=lsb, vl=64, avx_level=10; \
583-
SYM_FUNC_END(prefix##_vpclmul_avx10_512);
574+
SYM_FUNC_START(prefix##_vpclmul_avx512); \
575+
_crc_pclmul n=bits, lsb_crc=lsb, vl=64, avx_level=512; \
576+
SYM_FUNC_END(prefix##_vpclmul_avx512);
584577
#else
585578
#define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb) \
586579
SYM_FUNC_START(prefix##_pclmul_sse); \

arch/x86/lib/crc-pclmul-template.h

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,8 @@ crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len, \
2121
const void *consts_ptr); \
2222
crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \
2323
const void *consts_ptr); \
24-
crc_t prefix##_vpclmul_avx10_256(crc_t crc, const u8 *p, size_t len, \
25-
const void *consts_ptr); \
26-
crc_t prefix##_vpclmul_avx10_512(crc_t crc, const u8 *p, size_t len, \
27-
const void *consts_ptr); \
24+
crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \
25+
const void *consts_ptr); \
2826
DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse)
2927

3028
#define INIT_CRC_PCLMUL(prefix) \
@@ -35,13 +33,10 @@ do { \
3533
cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL)) { \
3634
if (boot_cpu_has(X86_FEATURE_AVX512BW) && \
3735
boot_cpu_has(X86_FEATURE_AVX512VL) && \
36+
!boot_cpu_has(X86_FEATURE_PREFER_YMM) && \
3837
cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL)) { \
39-
if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) \
40-
static_call_update(prefix##_pclmul, \
41-
prefix##_vpclmul_avx10_256); \
42-
else \
43-
static_call_update(prefix##_pclmul, \
44-
prefix##_vpclmul_avx10_512); \
38+
static_call_update(prefix##_pclmul, \
39+
prefix##_vpclmul_avx512); \
4540
} else { \
4641
static_call_update(prefix##_pclmul, \
4742
prefix##_vpclmul_avx2); \

0 commit comments

Comments
 (0)