Skip to content

Commit cd1a52d

Browse files
committed
The CRC module of ISA-L has been accelerated using RISC-V's V, Zbc, Zvbc, and Zvbb instruction sets, implementing data folding and Barrett reduction optimizations.
Signed-off-by: Ji Dong <[email protected]>
1 parent d414b27 commit cd1a52d

35 files changed

+3649
-3
lines changed

configure.ac

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ case "${CPU}" in
7171
AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no]
7272
)
7373
if test "x$rvv" = "xyes"; then
74-
CFLAGS+=" -march=rv64gcv"
75-
CCASFLAGS+=" -march=rv64gcv"
74+
CFLAGS+=" -march=rv64gcv_zbc_zvbc_zvbb"
75+
CCASFLAGS+=" -march=rv64gcv_zbc_zvbc_zvbb"
7676
fi
7777
AC_MSG_RESULT([$rvv])
7878
;;

crc/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,14 @@
2828
########################################################################
2929

3030
include crc/aarch64/Makefile.am
31+
include crc/riscv64/Makefile.am
3132

3233
lsrc += \
3334
crc/crc_base.c \
3435
crc/crc64_base.c
3536

3637
lsrc_base_aliases += crc/crc_base_aliases.c
3738
lsrc_ppc64le += crc/crc_base_aliases.c
38-
lsrc_riscv64 += crc/crc_base_aliases.c
3939

4040
lsrc_x86_64 += \
4141
crc/crc16_t10dif_01.asm \

crc/riscv64/Makefile.am

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
########################################################################
2+
# Copyright(c) 2025 ZTE Corporation All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in
11+
# the documentation and/or other materials provided with the
12+
# distribution.
13+
# * Neither the name of ZTE Corporation nor the names of its
14+
# contributors may be used to endorse or promote products derived
15+
# from this software without specific prior written permission.
16+
#
17+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
#########################################################################
29+
lsrc_riscv64 += \
30+
crc/riscv64/crc_multibinary_riscv.S \
31+
crc/riscv64/crc_riscv64_dispatcher.c
32+
33+
lsrc_riscv64 += \
34+
crc/riscv64/crc16_t10dif_vclmul.S \
35+
crc/riscv64/crc16_t10dif_copy_vclmul.S \
36+
crc/riscv64/crc32_ieee_norm_vclmul.S \
37+
crc/riscv64/crc32_iscsi_refl_vclmul.S \
38+
crc/riscv64/crc32_gzip_refl_vclmul.S \
39+
crc/riscv64/crc64_ecma_refl_vclmul.S \
40+
crc/riscv64/crc64_ecma_norm_vclmul.S \
41+
crc/riscv64/crc64_iso_refl_vclmul.S \
42+
crc/riscv64/crc64_iso_norm_vclmul.S \
43+
crc/riscv64/crc64_jones_refl_vclmul.S \
44+
crc/riscv64/crc64_jones_norm_vclmul.S \
45+
crc/riscv64/crc64_rocksoft_refl_vclmul.S \
46+
crc/riscv64/crc64_rocksoft_norm_vclmul.S
Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
########################################################################
2+
# Copyright (c) 2025 ZTE Corporation.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in
11+
# the documentation and/or other materials provided with the
12+
# distribution.
13+
# * Neither the name of ZTE Corporation nor the names of its
14+
# contributors may be used to endorse or promote products derived
15+
# from this software without specific prior written permission.
16+
#
17+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
#########################################################################
29+
.section .text
30+
.align 2
31+
.global crc16_t10dif_copy_vclmul
32+
33+
# Arguments:
34+
# a0: uint16_t crc (seed)
35+
# a1: uint8_t *dst (pointer to data)
36+
# a2: uint8_t *src (pointer to data)
37+
# a3: size_t len (data length)
38+
39+
# Register usage:
40+
41+
crc16_t10dif_copy_vclmul:
42+
# Initialize state
43+
slli a0, a0, 16
44+
# Check if length >= 64
45+
li t1, 64
46+
bgeu a3, t1, .crc_fold
47+
48+
.crc_table_loop_pre:
49+
beq a3, zero, .end
50+
la a7, .LANCHOR0
51+
add a3, a3, a2
52+
53+
.crc_table_loop:
54+
lbu a4, 0(a2)
55+
sb a4, 0(a1)
56+
addi a2, a2, 1
57+
addi a1, a1, 1
58+
sllw a4, a4, 0x18
59+
xor a4, a4, a0
60+
srlw a5, a4, 0x18
61+
sll a5, a5, 0x2
62+
add a5, a5, a7
63+
lw a0, 0(a5)
64+
sllw a4, a4, 0x8
65+
xor a0, a0, a4
66+
bne a2, a3, .crc_table_loop
67+
68+
.end:
69+
slli a0, a0, 32
70+
srli a0, a0, 48
71+
ret
72+
73+
.crc_fold:
74+
# Initialize vector registers
75+
vsetivli zero, 2, e64, m1, ta, ma
76+
vle64.v v6, 0(a2)
77+
addi a2, a2, 16
78+
vle64.v v7, 0(a2)
79+
addi a2, a2, 16
80+
vle64.v v8, 0(a2)
81+
addi a2, a2, 16
82+
vle64.v v9, 0(a2)
83+
addi a2, a2, 16
84+
addi a3, a3, -64
85+
86+
vse64.v v6, (a1)
87+
addi a1, a1, 16
88+
vse64.v v7, (a1)
89+
addi a1, a1, 16
90+
vse64.v v8, (a1)
91+
addi a1, a1, 16
92+
vse64.v v9, (a1)
93+
addi a1, a1, 16
94+
95+
# Prepare initial vector
96+
slli a0, a0, 32
97+
vmv.s.x v4, zero
98+
vrev8.v v6, v6
99+
vrev8.v v7, v7
100+
vrev8.v v8, v8
101+
vrev8.v v9, v9
102+
vslidedown.vi v0, v6, 1
103+
vslidedown.vi v1, v7, 1
104+
vslidedown.vi v2, v8, 1
105+
vslidedown.vi v3, v9, 1
106+
vslideup.vi v0, v6, 1
107+
vslideup.vi v1, v7, 1
108+
vslideup.vi v2, v8, 1
109+
vslideup.vi v3, v9, 1
110+
111+
vmv.v.x v5, a0
112+
vslideup.vi v4, v5, 1
113+
114+
vxor.vv v0, v0, v4
115+
116+
vmv.v.x v8, zero
117+
bltu a3, t1, final_fold
118+
119+
# Load constants
120+
la t0, k_const1
121+
vle64.v v5, 0(t0)
122+
123+
li t0, 64
124+
125+
# Main processing loop
126+
loop_start:
127+
vle64.v v9, (a2)
128+
addi a2, a2, 16
129+
vle64.v v10, (a2)
130+
addi a2, a2, 16
131+
vle64.v v11, (a2)
132+
addi a2, a2, 16
133+
vle64.v v12, (a2)
134+
addi a2, a2, 16
135+
vse64.v v9, (a1)
136+
addi a1, a1, 16
137+
vse64.v v10, (a1)
138+
addi a1, a1, 16
139+
vse64.v v11, (a1)
140+
addi a1, a1, 16
141+
vse64.v v12, (a1)
142+
addi a1, a1, 16
143+
144+
vclmul.vv v4, v0, v5
145+
vclmulh.vv v0, v0, v5
146+
vredxor.vs v0, v0, v8
147+
vredxor.vs v4, v4, v8
148+
vslideup.vi v4, v0, 1
149+
vrev8.v v9, v9
150+
vslidedown.vi v6, v9, 1
151+
vslideup.vi v6, v9, 1
152+
vxor.vv v0, v4, v6
153+
154+
# Process v1-v3 (similar to v0)
155+
156+
vclmul.vv v4, v1, v5
157+
vclmulh.vv v1, v1, v5
158+
vredxor.vs v1, v1, v8
159+
vredxor.vs v4, v4, v8
160+
vslideup.vi v4, v1, 1
161+
vrev8.v v10, v10
162+
vslidedown.vi v6, v10, 1
163+
vslideup.vi v6, v10, 1
164+
vxor.vv v1, v4, v6
165+
166+
vclmul.vv v4, v2, v5
167+
vclmulh.vv v2, v2, v5
168+
vredxor.vs v2, v2, v8
169+
vredxor.vs v4, v4, v8
170+
vslideup.vi v4, v2, 1
171+
vrev8.v v11, v11
172+
vslidedown.vi v6, v11, 1
173+
vslideup.vi v6, v11, 1
174+
vxor.vv v2, v4, v6
175+
176+
vclmul.vv v4, v3, v5
177+
vclmulh.vv v3, v3, v5
178+
vredxor.vs v3, v3, v8
179+
vredxor.vs v4, v4, v8
180+
vslideup.vi v4, v3, 1
181+
vrev8.v v12, v12
182+
vslidedown.vi v6, v12, 1
183+
vslideup.vi v6, v12, 1
184+
vxor.vv v3, v4, v6
185+
186+
addi a3, a3, -64
187+
bge a3, t0, loop_start
188+
189+
final_fold:
190+
la t0, k_const2
191+
vle64.v v5, 0(t0)
192+
vclmul.vv v6, v0, v5
193+
vclmulh.vv v7, v0, v5
194+
vredxor.vs v6, v6, v8
195+
vredxor.vs v7, v7, v8
196+
vslideup.vi v6, v7, 1
197+
vxor.vv v0, v6, v1
198+
vclmul.vv v6, v0, v5
199+
vclmulh.vv v7, v0, v5
200+
vredxor.vs v6, v6, v8
201+
vredxor.vs v7, v7, v8
202+
vslideup.vi v6, v7, 1
203+
vxor.vv v0, v6, v2
204+
vclmul.vv v6, v0, v5
205+
vclmulh.vv v7, v0, v5
206+
vredxor.vs v6, v6, v8
207+
vredxor.vs v7, v7, v8
208+
vslideup.vi v6, v7, 1
209+
vxor.vv v0, v6, v3
210+
211+
# Store result
212+
addi sp, sp, -16
213+
vse64.v v0, (sp)
214+
215+
# 128b -> 64b folding
216+
ld t0, 0(sp)
217+
ld t1, 8(sp)
218+
li t2, 0x2d560000
219+
li t3, 0x13680000
220+
clmul a4, t1, t2
221+
clmulh a5, t1, t2
222+
slli a6, t0, 32
223+
srli a7, t0, 32
224+
xor a4, a4, a6
225+
xor a5, a5, a7
226+
clmul a5, a5, t3
227+
xor a4, a4, a5
228+
229+
# Barrett reduction
230+
srli a5, a4, 32
231+
li t2, 0x1f65a57f9 # x_quo
232+
clmul a5, t2, a5
233+
srli a5, a5, 32
234+
li t3, 0x18bb70000 # x_poly
235+
clmul a5, a5, t3
236+
xor a0, a5, a4
237+
addi sp, sp, 16
238+
239+
tail_processing:
240+
# Process remaining bytes
241+
beqz a3, .end
242+
243+
# Call crc16_t10dif_generic equivalent for remaining bytes
244+
jal x0, .crc_table_loop_pre
245+
246+
.section .rodata
247+
.align 4
248+
initial_vector:
249+
.quad 0x00000000
250+
.quad 0x00000000
251+
252+
k_const1:
253+
.quad 0x87e70000
254+
.quad 0x371d0000
255+
256+
k_const2:
257+
.quad 0xfb0b0000
258+
.quad 0x4c1a0000
259+
260+
.LANCHOR0 = . + 0
261+
.type crc16tab, %object
262+
.size crc16tab, 1024
263+
crc16tab:
264+
.word 0x00000000, 0x8bb70000, 0x9cd90000, 0x176e0000, 0xb2050000, 0x39b20000, 0x2edc0000, 0xa56b0000
265+
.word 0xefbd0000, 0x640a0000, 0x73640000, 0xf8d30000, 0x5db80000, 0xd60f0000, 0xc1610000, 0x4ad60000
266+
.word 0x54cd0000, 0xdf7a0000, 0xc8140000, 0x43a30000, 0xe6c80000, 0x6d7f0000, 0x7a110000, 0xf1a60000
267+
.word 0xbb700000, 0x30c70000, 0x27a90000, 0xac1e0000, 0x09750000, 0x82c20000, 0x95ac0000, 0x1e1b0000
268+
.word 0xa99a0000, 0x222d0000, 0x35430000, 0xbef40000, 0x1b9f0000, 0x90280000, 0x87460000, 0x0cf10000
269+
.word 0x46270000, 0xcd900000, 0xdafe0000, 0x51490000, 0xf4220000, 0x7f950000, 0x68fb0000, 0xe34c0000
270+
.word 0xfd570000, 0x76e00000, 0x618e0000, 0xea390000, 0x4f520000, 0xc4e50000, 0xd38b0000, 0x583c0000
271+
.word 0x12ea0000, 0x995d0000, 0x8e330000, 0x05840000, 0xa0ef0000, 0x2b580000, 0x3c360000, 0xb7810000
272+
.word 0xd8830000, 0x53340000, 0x445a0000, 0xcfed0000, 0x6a860000, 0xe1310000, 0xf65f0000, 0x7de80000
273+
.word 0x373e0000, 0xbc890000, 0xabe70000, 0x20500000, 0x853b0000, 0x0e8c0000, 0x19e20000, 0x92550000
274+
.word 0x8c4e0000, 0x07f90000, 0x10970000, 0x9b200000, 0x3e4b0000, 0xb5fc0000, 0xa2920000, 0x29250000
275+
.word 0x63f30000, 0xe8440000, 0xff2a0000, 0x749d0000, 0xd1f60000, 0x5a410000, 0x4d2f0000, 0xc6980000
276+
.word 0x71190000, 0xfaae0000, 0xedc00000, 0x66770000, 0xc31c0000, 0x48ab0000, 0x5fc50000, 0xd4720000
277+
.word 0x9ea40000, 0x15130000, 0x027d0000, 0x89ca0000, 0x2ca10000, 0xa7160000, 0xb0780000, 0x3bcf0000
278+
.word 0x25d40000, 0xae630000, 0xb90d0000, 0x32ba0000, 0x97d10000, 0x1c660000, 0x0b080000, 0x80bf0000
279+
.word 0xca690000, 0x41de0000, 0x56b00000, 0xdd070000, 0x786c0000, 0xf3db0000, 0xe4b50000, 0x6f020000
280+
.word 0x3ab10000, 0xb1060000, 0xa6680000, 0x2ddf0000, 0x88b40000, 0x03030000, 0x146d0000, 0x9fda0000
281+
.word 0xd50c0000, 0x5ebb0000, 0x49d50000, 0xc2620000, 0x67090000, 0xecbe0000, 0xfbd00000, 0x70670000
282+
.word 0x6e7c0000, 0xe5cb0000, 0xf2a50000, 0x79120000, 0xdc790000, 0x57ce0000, 0x40a00000, 0xcb170000
283+
.word 0x81c10000, 0x0a760000, 0x1d180000, 0x96af0000, 0x33c40000, 0xb8730000, 0xaf1d0000, 0x24aa0000
284+
.word 0x932b0000, 0x189c0000, 0x0ff20000, 0x84450000, 0x212e0000, 0xaa990000, 0xbdf70000, 0x36400000
285+
.word 0x7c960000, 0xf7210000, 0xe04f0000, 0x6bf80000, 0xce930000, 0x45240000, 0x524a0000, 0xd9fd0000
286+
.word 0xc7e60000, 0x4c510000, 0x5b3f0000, 0xd0880000, 0x75e30000, 0xfe540000, 0xe93a0000, 0x628d0000
287+
.word 0x285b0000, 0xa3ec0000, 0xb4820000, 0x3f350000, 0x9a5e0000, 0x11e90000, 0x06870000, 0x8d300000
288+
.word 0xe2320000, 0x69850000, 0x7eeb0000, 0xf55c0000, 0x50370000, 0xdb800000, 0xccee0000, 0x47590000
289+
.word 0x0d8f0000, 0x86380000, 0x91560000, 0x1ae10000, 0xbf8a0000, 0x343d0000, 0x23530000, 0xa8e40000
290+
.word 0xb6ff0000, 0x3d480000, 0x2a260000, 0xa1910000, 0x04fa0000, 0x8f4d0000, 0x98230000, 0x13940000
291+
.word 0x59420000, 0xd2f50000, 0xc59b0000, 0x4e2c0000, 0xeb470000, 0x60f00000, 0x779e0000, 0xfc290000
292+
.word 0x4ba80000, 0xc01f0000, 0xd7710000, 0x5cc60000, 0xf9ad0000, 0x721a0000, 0x65740000, 0xeec30000
293+
.word 0xa4150000, 0x2fa20000, 0x38cc0000, 0xb37b0000, 0x16100000, 0x9da70000, 0x8ac90000, 0x017e0000
294+
.word 0x1f650000, 0x94d20000, 0x83bc0000, 0x080b0000, 0xad600000, 0x26d70000, 0x31b90000, 0xba0e0000
295+
.word 0xf0d80000, 0x7b6f0000, 0x6c010000, 0xe7b60000, 0x42dd0000, 0xc96a0000, 0xde040000, 0x55b30000
296+
297+

0 commit comments

Comments
 (0)