Skip to content

Commit 5aebe00

Browse files
committed
x86/crc32: optimize tail handling for crc32c short inputs
For handling the 0 <= len < sizeof(unsigned long) bytes left at the end, do a 4-2-1 step-down instead of a byte-at-a-time loop. This allows taking advantage of wider CRC instructions. Note that crc32c-3way.S already uses this same optimization too. crc_kunit shows an improvement of about 25% for len=127. Suggested-by: "H. Peter Anvin" <[email protected]> Acked-by: Uros Bizjak <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Eric Biggers <[email protected]>
1 parent 511484f commit 5aebe00

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

arch/x86/lib/crc32-glue.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,15 @@ u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
5757
num_longs != 0; num_longs--, p += sizeof(unsigned long))
5858
asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
5959

60-
for (len %= sizeof(unsigned long); len; len--, p++)
60+
if (sizeof(unsigned long) > 4 && (len & 4)) {
61+
asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p));
62+
p += 4;
63+
}
64+
if (len & 2) {
65+
asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p));
66+
p += 2;
67+
}
68+
if (len & 1)
6169
asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
6270

6371
return crc;

0 commit comments

Comments
 (0)