Skip to content

Commit d36cebe

Browse files
committed
lib/crc32: improve support for arch-specific overrides
Currently the CRC32 library functions are defined as weak symbols, and the arm64 and riscv architectures override them. This method of arch-specific overrides has the limitation that it only works when both the base and arch code is built-in. Also, it makes the arch-specific code be silently not used if it is accidentally built with lib-y instead of obj-y; unfortunately the RISC-V code does this. This commit reorganizes the code to have explicit *_arch() functions that are called when they are enabled, similar to how some of the crypto library code works (e.g. chacha_crypt() calls chacha_crypt_arch()). Make the existing kconfig choice for the CRC32 implementation also control whether the arch-optimized implementation (if one is available) is enabled or not. Make it enabled by default if CRC32 is also enabled. The result is that arch-optimized CRC32 library functions will be included automatically when appropriate, but it is now possible to disable them. They can also now be built as a loadable module if the CRC32 library functions happen to be used only by loadable modules, in which case the arch and base CRC32 modules will be automatically loaded via direct symbol dependency when appropriate. Reviewed-by: Ard Biesheuvel <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Eric Biggers <[email protected]>
1 parent 0a499a7 commit d36cebe

File tree

11 files changed

+118
-51
lines changed

11 files changed

+118
-51
lines changed

arch/arm64/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ config ARM64
2121
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
2222
select ARCH_HAS_CACHE_LINE_SIZE
2323
select ARCH_HAS_CC_PLATFORM
24+
select ARCH_HAS_CRC32
2425
select ARCH_HAS_CURRENT_STACK_POINTER
2526
select ARCH_HAS_DEBUG_VIRTUAL
2627
select ARCH_HAS_DEBUG_VM_PGTABLE

arch/arm64/lib/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ endif
1313

1414
lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
1515

16-
obj-$(CONFIG_CRC32) += crc32.o crc32-glue.o
16+
obj-$(CONFIG_CRC32_ARCH) += crc32-arm64.o
17+
crc32-arm64-y := crc32.o crc32-glue.o
1718

1819
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
1920

arch/arm64/lib/crc32-glue.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <linux/crc32.h>
44
#include <linux/linkage.h>
5+
#include <linux/module.h>
56

67
#include <asm/alternative.h>
78
#include <asm/cpufeature.h>
@@ -21,7 +22,7 @@ asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
2122
asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
2223
asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len);
2324

24-
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
25+
u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
2526
{
2627
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
2728
return crc32_le_base(crc, p, len);
@@ -40,8 +41,9 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
4041

4142
return crc32_le_arm64(crc, p, len);
4243
}
44+
EXPORT_SYMBOL(crc32_le_arch);
4345

44-
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
46+
u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
4547
{
4648
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
4749
return crc32c_le_base(crc, p, len);
@@ -60,8 +62,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
6062

6163
return crc32c_le_arm64(crc, p, len);
6264
}
65+
EXPORT_SYMBOL(crc32c_le_arch);
6366

64-
u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
67+
u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
6568
{
6669
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
6770
return crc32_be_base(crc, p, len);
@@ -80,3 +83,7 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
8083

8184
return crc32_be_arm64(crc, p, len);
8285
}
86+
EXPORT_SYMBOL(crc32_be_arch);
87+
88+
MODULE_LICENSE("GPL");
89+
MODULE_DESCRIPTION("arm64-optimized CRC32 functions");

arch/riscv/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ config RISCV
2424
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
2525
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
2626
select ARCH_HAS_BINFMT_FLAT
27+
select ARCH_HAS_CRC32 if RISCV_ISA_ZBC
2728
select ARCH_HAS_CURRENT_STACK_POINTER
2829
select ARCH_HAS_DEBUG_VIRTUAL if MMU
2930
select ARCH_HAS_DEBUG_VM_PGTABLE

arch/riscv/lib/Makefile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ endif
1515
lib-$(CONFIG_MMU) += uaccess.o
1616
lib-$(CONFIG_64BIT) += tishift.o
1717
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
18-
lib-$(CONFIG_RISCV_ISA_ZBC) += crc32.o
19-
18+
obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o
2019
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
2120
lib-$(CONFIG_RISCV_ISA_V) += xor.o
2221
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o

arch/riscv/lib/crc32.c renamed to arch/riscv/lib/crc32-riscv.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <linux/crc32poly.h>
1515
#include <linux/crc32.h>
1616
#include <linux/byteorder/generic.h>
17+
#include <linux/module.h>
1718

1819
/*
1920
* Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
@@ -217,17 +218,19 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
217218
return crc_fb(crc, p, len);
218219
}
219220

220-
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
221+
u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
221222
{
222223
return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
223224
crc32_le_base);
224225
}
226+
EXPORT_SYMBOL(crc32_le_arch);
225227

226-
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
228+
u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
227229
{
228230
return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
229231
CRC32C_POLY_QT_LE, crc32c_le_base);
230232
}
233+
EXPORT_SYMBOL(crc32c_le_arch);
231234

232235
static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
233236
size_t len)
@@ -253,7 +256,7 @@ static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
253256
return crc;
254257
}
255258

256-
u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
259+
u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
257260
{
258261
size_t offset, head_len, tail_len;
259262
unsigned long const *p_ul;
@@ -292,3 +295,7 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
292295
legacy:
293296
return crc32_be_base(crc, p, len);
294297
}
298+
EXPORT_SYMBOL(crc32_be_arch);
299+
300+
MODULE_LICENSE("GPL");
301+
MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");

crypto/crc32_generic.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,12 +160,12 @@ static struct shash_alg algs[] = {{
160160
static int __init crc32_mod_init(void)
161161
{
162162
/* register the arch flavor only if it differs from the generic one */
163-
return crypto_register_shashes(algs, 1 + (&crc32_le != &crc32_le_base));
163+
return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
164164
}
165165

166166
static void __exit crc32_mod_fini(void)
167167
{
168-
crypto_unregister_shashes(algs, 1 + (&crc32_le != &crc32_le_base));
168+
crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
169169
}
170170

171171
subsys_initcall(crc32_mod_init);

crypto/crc32c_generic.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,12 +200,12 @@ static struct shash_alg algs[] = {{
200200
static int __init crc32c_mod_init(void)
201201
{
202202
/* register the arch flavor only if it differs from the generic one */
203-
return crypto_register_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base));
203+
return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
204204
}
205205

206206
static void __exit crc32c_mod_fini(void)
207207
{
208-
crypto_unregister_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base));
208+
crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
209209
}
210210

211211
subsys_initcall(crc32c_mod_init);

include/linux/crc32.h

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,34 @@
88
#include <linux/types.h>
99
#include <linux/bitrev.h>
1010

11-
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
12-
u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len);
13-
u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
14-
u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len);
11+
u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len);
12+
u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len);
13+
u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len);
14+
u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len);
15+
u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len);
16+
u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len);
17+
18+
static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len)
19+
{
20+
if (IS_ENABLED(CONFIG_CRC32_ARCH))
21+
return crc32_le_arch(crc, p, len);
22+
return crc32_le_base(crc, p, len);
23+
}
24+
25+
static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len)
26+
{
27+
if (IS_ENABLED(CONFIG_CRC32_ARCH))
28+
return crc32_be_arch(crc, p, len);
29+
return crc32_be_base(crc, p, len);
30+
}
31+
32+
/* TODO: leading underscores should be dropped once callers have been updated */
33+
static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
34+
{
35+
if (IS_ENABLED(CONFIG_CRC32_ARCH))
36+
return crc32c_le_arch(crc, p, len);
37+
return crc32c_le_base(crc, p, len);
38+
}
1539

1640
/**
1741
* crc32_le_combine - Combine two crc32 check values into one. For two
@@ -38,9 +62,6 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
3862
return crc32_le_shift(crc1, len2) ^ crc2;
3963
}
4064

41-
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len);
42-
u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len);
43-
4465
/**
4566
* __crc32c_le_combine - Combine two crc32c check values into one. For two
4667
* sequences of bytes, seq1 and seq2 with lengths len1

lib/Kconfig

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,9 @@ config CRC32
190190
the kernel tree does. Such modules that use library CRC32/CRC32c
191191
functions require M here.
192192

193+
config ARCH_HAS_CRC32
194+
bool
195+
193196
config CRC32_SELFTEST
194197
tristate "CRC32 perform self test on init"
195198
depends on CRC32
@@ -202,24 +205,39 @@ config CRC32_SELFTEST
202205
choice
203206
prompt "CRC32 implementation"
204207
depends on CRC32
205-
default CRC32_SLICEBY8
208+
default CRC32_IMPL_ARCH_PLUS_SLICEBY8 if ARCH_HAS_CRC32
209+
default CRC32_IMPL_SLICEBY8 if !ARCH_HAS_CRC32
206210
help
207-
This option allows a kernel builder to override the default choice
208-
of CRC32 algorithm. Choose the default ("slice by 8") unless you
209-
know that you need one of the others.
211+
This option allows you to override the default choice of CRC32
212+
implementation. Choose the default unless you know that you need one
213+
of the others.
210214

211-
config CRC32_SLICEBY8
215+
config CRC32_IMPL_ARCH_PLUS_SLICEBY8
216+
bool "Arch-optimized, with fallback to slice-by-8" if ARCH_HAS_CRC32
217+
help
218+
Use architecture-optimized implementation of CRC32. Fall back to
219+
slice-by-8 in cases where the arch-optimized implementation cannot be
220+
used, e.g. if the CPU lacks support for the needed instructions.
221+
222+
This is the default when an arch-optimized implementation exists.
223+
224+
config CRC32_IMPL_ARCH_PLUS_SLICEBY1
225+
bool "Arch-optimized, with fallback to slice-by-1" if ARCH_HAS_CRC32
226+
help
227+
Use architecture-optimized implementation of CRC32, but fall back to
228+
slice-by-1 instead of slice-by-8 in order to reduce the binary size.
229+
230+
config CRC32_IMPL_SLICEBY8
212231
bool "Slice by 8 bytes"
213232
help
214233
Calculate checksum 8 bytes at a time with a clever slicing algorithm.
215-
This is the fastest algorithm, but comes with a 8KiB lookup table.
216-
Most modern processors have enough cache to hold this table without
217-
thrashing the cache.
218-
219-
This is the default implementation choice. Choose this one unless
220-
you have a good reason not to.
234+
This is much slower than the architecture-optimized implementation of
235+
CRC32 (if the selected arch has one), but it is portable and is the
236+
fastest implementation when no arch-optimized implementation is
237+
available. It uses an 8KiB lookup table. Most modern processors have
238+
enough cache to hold this table without thrashing the cache.
221239

222-
config CRC32_SLICEBY4
240+
config CRC32_IMPL_SLICEBY4
223241
bool "Slice by 4 bytes"
224242
help
225243
Calculate checksum 4 bytes at a time with a clever slicing algorithm.
@@ -228,15 +246,15 @@ config CRC32_SLICEBY4
228246

229247
Only choose this option if you know what you are doing.
230248

231-
config CRC32_SARWATE
232-
bool "Sarwate's Algorithm (one byte at a time)"
249+
config CRC32_IMPL_SLICEBY1
250+
bool "Slice by 1 byte (Sarwate's algorithm)"
233251
help
234252
Calculate checksum a byte at a time using Sarwate's algorithm. This
235-
is not particularly fast, but has a small 256 byte lookup table.
253+
is not particularly fast, but has a small 1KiB lookup table.
236254

237255
Only choose this option if you know what you are doing.
238256

239-
config CRC32_BIT
257+
config CRC32_IMPL_BIT
240258
bool "Classic Algorithm (one bit at a time)"
241259
help
242260
Calculate checksum one bit at a time. This is VERY slow, but has
@@ -246,6 +264,26 @@ config CRC32_BIT
246264

247265
endchoice
248266

267+
config CRC32_ARCH
268+
tristate
269+
default CRC32 if CRC32_IMPL_ARCH_PLUS_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY1
270+
271+
config CRC32_SLICEBY8
272+
bool
273+
default y if CRC32_IMPL_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY8
274+
275+
config CRC32_SLICEBY4
276+
bool
277+
default y if CRC32_IMPL_SLICEBY4
278+
279+
config CRC32_SARWATE
280+
bool
281+
default y if CRC32_IMPL_SLICEBY1 || CRC32_IMPL_ARCH_PLUS_SLICEBY1
282+
283+
config CRC32_BIT
284+
bool
285+
default y if CRC32_IMPL_BIT
286+
249287
config CRC64
250288
tristate "CRC64 functions"
251289
help

0 commit comments

Comments
 (0)