Skip to content

Commit 948392a

Browse files
committed
common: Leverage a better CRC32C implementation
ISA-L provides a few different CRC32C implementations, of which Ceph has only ever linked against one (crc32_iscsi_00). The second implementation of CRC32C provided by ISA-L (crc32_iscsi_01) improves upon the first as it is used by Ceph in a couple of ways: 1) crc32_iscsi_01 explicitly handles and checks for < 8 byte buffers and computes the CRC32C value using the hardware-accelerated CRC32 instruction. In comparison, crc32_iscsi_00 prefetches too far in cases of small buffers, requiring the Ceph code to explicitly check and handle this case differently in software. This software-fallback implementation of CRC32 also comes with a different set of LUTs (look up tables) and is less efficient as it does not make use of the CRC32 instruction. 2) crc32_iscsi_00 makes use of large LUTs (look up tables) to effectively perform the modular reduction required to produce the CRC32C value. In constrast, crc32_iscsi_01 uses the PCLMUL instruction set to perform reductions 128-bits at a time with smaller LUTs, resulting in greater throughput and less data cache pollution. Fixes: https://tracker.ceph.com/issues/65791 Signed-off-by: Tyler Stachecki <[email protected]>
1 parent 7dd8e5a commit 948392a

File tree

4 files changed

+30
-0
lines changed

4 files changed

+30
-0
lines changed

src/common/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ if(HAVE_INTEL)
215215
set(CMAKE_ASM_FLAGS "-i ${PROJECT_SOURCE_DIR}/src/isa-l/include/ ${CMAKE_ASM_FLAGS}")
216216
list(APPEND crc32_srcs
217217
${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_00.asm
218+
${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_01.asm
218219
crc32c_intel_fast_zero_asm.s)
219220
endif(HAVE_NASM_X64)
220221
elseif(HAVE_POWER8)

src/common/crc32c.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ ceph_crc32c_func_t ceph_choose_crc32(void)
2424
// use that.
2525
#if defined(__i386__) || defined(__x86_64__)
2626
if (ceph_arch_intel_sse42 && ceph_crc32c_intel_fast_exists()) {
27+
if (ceph_arch_intel_pclmul) {
28+
return ceph_crc32c_intel_fast_pclmul;
29+
}
2730
return ceph_crc32c_intel_fast;
2831
}
2932
#elif defined(__arm__) || defined(__aarch64__)

src/common/crc32c_intel_fast.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,25 @@
22
#include "common/crc32c_intel_baseline.h"
33

44
extern unsigned int crc32_iscsi_00(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_00");
5+
extern unsigned int crc32_iscsi_01(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_01");
56
extern unsigned int crc32_iscsi_zero_00(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_zero_00");
67

78
#ifdef HAVE_NASM_X64
89

10+
uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
11+
{
12+
if (!buffer)
13+
{
14+
return crc32_iscsi_zero_00(buffer, len, crc);
15+
}
16+
17+
/* Unlike crc32_iscsi_00, crc32_iscsi_01 handles the case where the
18+
* input buffer is less than 8 bytes in its prelude, and does not
19+
* prefetch beyond said buffer.
20+
*/
21+
return crc32_iscsi_01(buffer, len, crc);
22+
}
23+
924
uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
1025
{
1126
uint32_t v;
@@ -43,6 +58,11 @@ int ceph_crc32c_intel_fast_exists(void)
4358
return 0;
4459
}
4560

61+
uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
62+
{
63+
return 0;
64+
}
65+
4666
uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
4767
{
4868
return 0;

src/common/crc32c_intel_fast.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,16 @@ extern int ceph_crc32c_intel_fast_exists(void);
1010

1111
#ifdef __x86_64__
1212

13+
extern uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len);
1314
extern uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len);
1415

1516
#else
1617

18+
static inline uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
19+
{
20+
return 0;
21+
}
22+
1723
static inline uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
1824
{
1925
return 0;

0 commit comments

Comments
 (0)