Skip to content

Commit 64e3586

Browse files
committed
x86/crc32: update prototype for crc_pcl()
- Change the len parameter from unsigned int to size_t, so that the library function which takes a size_t can safely use this code. - Rename to crc32c_x86_3way() which is much clearer. - Move the crc parameter to the front, as this is the usual convention. Reviewed-by: Ard Biesheuvel <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Eric Biggers <[email protected]>
1 parent 0f60a8a commit 64e3586

File tree

2 files changed

+35
-35
lines changed

2 files changed

+35
-35
lines changed

arch/x86/crypto/crc32c-intel_glue.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@
4141
*/
4242
#define CRC32C_PCL_BREAKEVEN 512
4343

44-
asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len,
45-
unsigned int crc_init);
44+
asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
4645
#endif /* CONFIG_X86_64 */
4746

4847
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
@@ -159,7 +158,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
159158
*/
160159
if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
161160
kernel_fpu_begin();
162-
*crcp = crc_pcl(data, len, *crcp);
161+
*crcp = crc32c_x86_3way(*crcp, data, len);
163162
kernel_fpu_end();
164163
} else
165164
*crcp = crc32c_intel_le_hw(*crcp, data, len);
@@ -171,7 +170,7 @@ static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
171170
{
172171
if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
173172
kernel_fpu_begin();
174-
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
173+
*(__le32 *)out = ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len));
175174
kernel_fpu_end();
176175
} else
177176
*(__le32 *)out =

arch/x86/crypto/crc32c-pcl-intel-asm_64.S

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,16 @@
5252
# regular CRC code that does not interleave the CRC instructions.
5353
#define SMALL_SIZE 200
5454

55-
# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init);
55+
# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
5656

5757
.text
58-
SYM_FUNC_START(crc_pcl)
59-
#define bufp %rdi
60-
#define bufp_d %edi
61-
#define len %esi
62-
#define crc_init %edx
63-
#define crc_init_q %rdx
58+
SYM_FUNC_START(crc32c_x86_3way)
59+
#define crc0 %edi
60+
#define crc0_q %rdi
61+
#define bufp %rsi
62+
#define bufp_d %esi
63+
#define len %rdx
64+
#define len_dw %edx
6465
#define n_misaligned %ecx /* overlaps chunk_bytes! */
6566
#define n_misaligned_q %rcx
6667
#define chunk_bytes %ecx /* overlaps n_misaligned! */
@@ -85,9 +86,9 @@ SYM_FUNC_START(crc_pcl)
8586
.Ldo_align:
8687
movq (bufp), %rax
8788
add n_misaligned_q, bufp
88-
sub n_misaligned, len
89+
sub n_misaligned_q, len
8990
.Lalign_loop:
90-
crc32b %al, crc_init # compute crc32 of 1-byte
91+
crc32b %al, crc0 # compute crc32 of 1-byte
9192
shr $8, %rax # get next byte
9293
dec n_misaligned
9394
jne .Lalign_loop
@@ -102,7 +103,7 @@ SYM_FUNC_START(crc_pcl)
102103

103104
.Lpartial_block:
104105
# Compute floor(len / 24) to get num qwords to process from each lane.
105-
imul $2731, len, %eax # 2731 = ceil(2^16 / 24)
106+
imul $2731, len_dw, %eax # 2731 = ceil(2^16 / 24)
106107
shr $16, %eax
107108
jmp .Lcrc_3lanes
108109

@@ -125,16 +126,16 @@ SYM_FUNC_START(crc_pcl)
125126
# Unroll the loop by a factor of 4 to reduce the overhead of the loop
126127
# bookkeeping instructions, which can compete with crc32q for the ALUs.
127128
.Lcrc_3lanes_4x_loop:
128-
crc32q (bufp), crc_init_q
129+
crc32q (bufp), crc0_q
129130
crc32q (bufp,chunk_bytes_q), crc1
130131
crc32q (bufp,chunk_bytes_q,2), crc2
131-
crc32q 8(bufp), crc_init_q
132+
crc32q 8(bufp), crc0_q
132133
crc32q 8(bufp,chunk_bytes_q), crc1
133134
crc32q 8(bufp,chunk_bytes_q,2), crc2
134-
crc32q 16(bufp), crc_init_q
135+
crc32q 16(bufp), crc0_q
135136
crc32q 16(bufp,chunk_bytes_q), crc1
136137
crc32q 16(bufp,chunk_bytes_q,2), crc2
137-
crc32q 24(bufp), crc_init_q
138+
crc32q 24(bufp), crc0_q
138139
crc32q 24(bufp,chunk_bytes_q), crc1
139140
crc32q 24(bufp,chunk_bytes_q,2), crc2
140141
add $32, bufp
@@ -146,15 +147,15 @@ SYM_FUNC_START(crc_pcl)
146147
jz .Lcrc_3lanes_last_qword
147148

148149
.Lcrc_3lanes_1x_loop:
149-
crc32q (bufp), crc_init_q
150+
crc32q (bufp), crc0_q
150151
crc32q (bufp,chunk_bytes_q), crc1
151152
crc32q (bufp,chunk_bytes_q,2), crc2
152153
add $8, bufp
153154
dec %eax
154155
jnz .Lcrc_3lanes_1x_loop
155156

156157
.Lcrc_3lanes_last_qword:
157-
crc32q (bufp), crc_init_q
158+
crc32q (bufp), crc0_q
158159
crc32q (bufp,chunk_bytes_q), crc1
159160
# SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet
160161

@@ -165,9 +166,9 @@ SYM_FUNC_START(crc_pcl)
165166
lea (K_table-8)(%rip), %rax # first entry is for idx 1
166167
pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2
167168
lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
168-
sub %eax, len # len -= chunk_bytes * 3
169+
sub %rax, len # len -= chunk_bytes * 3
169170

170-
movq crc_init_q, %xmm1 # CRC for block 1
171+
movq crc0_q, %xmm1 # CRC for block 1
171172
pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2
172173

173174
movq crc1, %xmm2 # CRC for block 2
@@ -176,8 +177,8 @@ SYM_FUNC_START(crc_pcl)
176177
pxor %xmm2,%xmm1
177178
movq %xmm1, %rax
178179
xor (bufp,chunk_bytes_q,2), %rax
179-
mov crc2, crc_init_q
180-
crc32 %rax, crc_init_q
180+
mov crc2, crc0_q
181+
crc32 %rax, crc0_q
181182
lea 8(bufp,chunk_bytes_q,2), bufp
182183

183184
################################################################
@@ -193,34 +194,34 @@ SYM_FUNC_START(crc_pcl)
193194
## 6) Process any remainder without interleaving:
194195
#######################################################################
195196
.Lsmall:
196-
test len, len
197+
test len_dw, len_dw
197198
jz .Ldone
198-
mov len, %eax
199+
mov len_dw, %eax
199200
shr $3, %eax
200201
jz .Ldo_dword
201202
.Ldo_qwords:
202-
crc32q (bufp), crc_init_q
203+
crc32q (bufp), crc0_q
203204
add $8, bufp
204205
dec %eax
205206
jnz .Ldo_qwords
206207
.Ldo_dword:
207-
test $4, len
208+
test $4, len_dw
208209
jz .Ldo_word
209-
crc32l (bufp), crc_init
210+
crc32l (bufp), crc0
210211
add $4, bufp
211212
.Ldo_word:
212-
test $2, len
213+
test $2, len_dw
213214
jz .Ldo_byte
214-
crc32w (bufp), crc_init
215+
crc32w (bufp), crc0
215216
add $2, bufp
216217
.Ldo_byte:
217-
test $1, len
218+
test $1, len_dw
218219
jz .Ldone
219-
crc32b (bufp), crc_init
220+
crc32b (bufp), crc0
220221
.Ldone:
221-
mov crc_init, %eax
222+
mov crc0, %eax
222223
RET
223-
SYM_FUNC_END(crc_pcl)
224+
SYM_FUNC_END(crc32c_x86_3way)
224225

225226
.section .rodata, "a", @progbits
226227
################################################################

0 commit comments

Comments
 (0)