Skip to content

Commit 2e0fbff

Browse files
committed
Calculate 256-byte coefficients
When using 512-bit registers, we need to use coefficient pairs for folding 256 byte distances, as opposed to the 128 byte folding differences for smaller registers.
1 parent 481e76f commit 2e0fbff

File tree

10 files changed

+122
-89
lines changed

10 files changed

+122
-89
lines changed

src/algorithm.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ unsafe fn process_by_strategy<T: ArchOps, W: EnhancedCrcWidth>(
9090
data: &[u8],
9191
state: &mut CrcState<T::Vector>,
9292
reflector: Reflector<T::Vector>,
93-
keys: [u64; 21],
93+
keys: [u64; 23],
9494
ops: &T,
9595
) -> W::Value
9696
where
@@ -125,7 +125,7 @@ unsafe fn process_large_aligned<T: ArchOps, W: EnhancedCrcWidth>(
125125
bytes: &[u8],
126126
state: &mut CrcState<T::Vector>,
127127
reflector: Reflector<T::Vector>,
128-
keys: [u64; 21],
128+
keys: [u64; 23],
129129
ops: &T,
130130
) -> W::Value
131131
where
@@ -177,7 +177,7 @@ unsafe fn process_simd_chunks<T: ArchOps, W: EnhancedCrcWidth>(
177177
first: &[T::Vector; 8],
178178
rest: &[[T::Vector; 8]],
179179
reflector: &Reflector<T::Vector>,
180-
keys: [u64; 21],
180+
keys: [u64; 23],
181181
ops: &T,
182182
) where
183183
T::Vector: Copy,
@@ -256,7 +256,7 @@ unsafe fn process_exactly_16<T: ArchOps, W: EnhancedCrcWidth>(
256256
data: &[u8],
257257
state: &mut CrcState<T::Vector>,
258258
reflector: &Reflector<T::Vector>,
259-
keys: [u64; 21],
259+
keys: [u64; 23],
260260
ops: &T,
261261
) -> W::Value
262262
where
@@ -356,7 +356,7 @@ unsafe fn process_17_to_31<T: ArchOps, W: EnhancedCrcWidth>(
356356
data: &[u8],
357357
state: &mut CrcState<T::Vector>,
358358
reflector: &Reflector<T::Vector>,
359-
keys: [u64; 21],
359+
keys: [u64; 23],
360360
ops: &T,
361361
) -> W::Value
362362
where
@@ -395,7 +395,7 @@ unsafe fn process_32_to_255<T: ArchOps, W: EnhancedCrcWidth>(
395395
data: &[u8],
396396
state: &mut CrcState<T::Vector>,
397397
reflector: &Reflector<T::Vector>,
398-
keys: [u64; 21],
398+
keys: [u64; 23],
399399
ops: &T,
400400
) -> W::Value
401401
where
@@ -457,7 +457,7 @@ unsafe fn get_last_two_xmms<T: ArchOps, W: EnhancedCrcWidth>(
457457
data: &[u8],
458458
remaining_len: usize,
459459
current_state: T::Vector,
460-
keys: [u64; 21],
460+
keys: [u64; 23],
461461
reflector: &Reflector<T::Vector>,
462462
reflected: bool,
463463
ops: &T,

src/consts.rs

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -43,54 +43,6 @@ pub const CRC_64_NVME: Algorithm<u64> = Algorithm {
4343
residue: 0xf310303b2b6f6e42,
4444
};
4545

46-
pub(crate) const CRC32_EXPONENTS: [u64; 21] = [
47-
0, // unused, just aligns indexes with the literature
48-
32 * 3,
49-
32 * 5,
50-
32 * 31,
51-
32 * 33,
52-
32 * 3,
53-
32 * 2,
54-
0, // mu, generate separately
55-
0, // poly, generate separately
56-
32 * 27,
57-
32 * 29,
58-
32 * 23,
59-
32 * 25,
60-
32 * 19,
61-
32 * 21,
62-
32 * 15,
63-
32 * 17,
64-
32 * 11,
65-
32 * 13,
66-
32 * 7,
67-
32 * 9,
68-
];
69-
70-
pub(crate) const CRC64_EXPONENTS: [u64; 21] = [
71-
0, // unused, just aligns indexes with the literature
72-
64 * 2,
73-
64 * 3,
74-
64 * 16,
75-
64 * 17,
76-
64 * 2,
77-
64,
78-
0, // mu, generate separately
79-
0, // poly, generate separately
80-
64 * 14,
81-
64 * 15,
82-
64 * 12,
83-
64 * 13,
84-
64 * 10,
85-
64 * 11,
86-
64 * 8,
87-
64 * 9,
88-
64 * 6,
89-
64 * 7,
90-
64 * 4,
91-
64 * 5,
92-
];
93-
9446
// for software fallbacks and testing
9547
pub(crate) const RUST_CRC32_AIXM: crc::Crc<u32> = crc::Crc::<u32>::new(&crc::CRC_32_AIXM);
9648

src/crc32/algorithm.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ impl EnhancedCrcWidth for crate::structs::Width32 {
188188
unsafe fn perform_final_reduction<T: ArchOps>(
189189
state: T::Vector,
190190
reflected: bool,
191-
keys: [u64; 21],
191+
keys: [u64; 23],
192192
ops: &T,
193193
) -> Self::Value
194194
where
@@ -237,7 +237,7 @@ pub(crate) unsafe fn process_0_to_15<T: ArchOps, W: EnhancedCrcWidth>(
237237
data: &[u8],
238238
state: &mut CrcState<T::Vector>,
239239
reflector: &Reflector<T::Vector>,
240-
keys: [u64; 21],
240+
keys: [u64; 23],
241241
ops: &T,
242242
) -> W::Value
243243
where

src/crc32/consts.rs

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ pub const CRC32_XFER: CrcParams = CrcParams {
183183
};
184184

185185
// CRC-32/AIXM
186-
pub const KEYS_814141AB_FORWARD: [u64; 21] = [
186+
pub const KEYS_814141AB_FORWARD: [u64; 23] = [
187187
0x0000000000000000,
188188
0x9be9878f00000000,
189189
0x85b2a6e400000000,
@@ -205,10 +205,12 @@ pub const KEYS_814141AB_FORWARD: [u64; 21] = [
205205
0x361f380200000000,
206206
0x6757ee2f00000000,
207207
0xffc42e7700000000,
208+
0xd12a88300000000,
209+
0x93a03b8800000000,
208210
];
209211

210212
// CRC-32/AUTOSAR
211-
pub const KEYS_F4ACFB13_REFLECTED: [u64; 21] = [
213+
pub const KEYS_F4ACFB13_REFLECTED: [u64; 23] = [
212214
0x0000000000000000,
213215
0x000000016130902a,
214216
0x0000000050428a9c,
@@ -230,10 +232,12 @@ pub const KEYS_F4ACFB13_REFLECTED: [u64; 21] = [
230232
0x0000000049cb6c68,
231233
0x00000000c9d55d76,
232234
0x0000000022919656,
235+
0x00000001e97b6a9e,
236+
0x00000000000cbd7c,
233237
];
234238

235239
// CRC-32/BASE91-D
236-
pub const KEYS_A833982B_REFLECTED: [u64; 21] = [
240+
pub const KEYS_A833982B_REFLECTED: [u64; 23] = [
237241
0x0000000000000000,
238242
0x00000001e065d896,
239243
0x00000001aca6d990,
@@ -255,10 +259,12 @@ pub const KEYS_A833982B_REFLECTED: [u64; 21] = [
255259
0x00000001942367fa,
256260
0x00000000c2044564,
257261
0x00000001a07ba234,
262+
0x000000010ffc58e6,
263+
0x000000015920d7a6,
258264
];
259265

260266
// CRC-32/CD-ROM-EDC
261-
pub const KEYS_8001801B_REFLECTED: [u64; 21] = [
267+
pub const KEYS_8001801B_REFLECTED: [u64; 23] = [
262268
0x0000000000000000,
263269
0x00000001d5934102,
264270
0x000000006c90c100,
@@ -280,10 +286,12 @@ pub const KEYS_8001801B_REFLECTED: [u64; 21] = [
280286
0x00000001517f91c2,
281287
0x00000001f75a6182,
282288
0x00000000bd01c000,
289+
0x00000001bcb30820,
290+
0x000000010d925102,
283291
];
284292

285293
// CRC-32/MEF
286-
pub const KEYS_741B8CD7_REFLECTED: [u64; 21] = [
294+
pub const KEYS_741B8CD7_REFLECTED: [u64; 23] = [
287295
0x0000000000000000,
288296
0x000000014b0602f8,
289297
0x000000007b4bc878,
@@ -305,10 +313,12 @@ pub const KEYS_741B8CD7_REFLECTED: [u64; 21] = [
305313
0x0000000097259f1a,
306314
0x00000000adfa5198,
307315
0x000000009c899030,
316+
0x00000001adf2908e,
317+
0x00000001f91b48f0,
308318
];
309319

310320
// CRC-32/XFER
311-
pub const KEYS_000000AF_FORWARD: [u64; 21] = [
321+
pub const KEYS_000000AF_FORWARD: [u64; 23] = [
312322
0x0000000000000000,
313323
0x00295f2300000000,
314324
0xfafa517900000000,
@@ -330,10 +340,12 @@ pub const KEYS_000000AF_FORWARD: [u64; 21] = [
330340
0x784a483800000000,
331341
0x7d21bf2000000000,
332342
0xfaebd3d300000000,
343+
0x25ed382b00000000,
344+
0x6d2b811a00000000,
333345
];
334346

335347
// CRC-32/ISO-HDLC (aka 'crc32'), CRC-32/JAMCRC
336-
const KEYS_04C11DB7_REFLECTED: [u64; 21] = [
348+
const KEYS_04C11DB7_REFLECTED: [u64; 23] = [
337349
0x0000000000000000, // unused placeholder to match 1-based indexing
338350
0x00000000ccaa009e, // (2^(32* 3) mod P(x))' << 1
339351
0x00000001751997d0, // (2^(32* 5) mod P(x))' << 1
@@ -355,10 +367,12 @@ const KEYS_04C11DB7_REFLECTED: [u64; 21] = [
355367
0x000000003db1ecdc, // (2^(32*13) mod P(x))' << 1
356368
0x000000015a546366, // (2^(32* 7) mod P(x))' << 1
357369
0x00000000f1da05aa, // (2^(32* 9) mod P(x))' << 1
370+
0x00000001322d1430,
371+
0x000000011542778a,
358372
];
359373

360374
// CRC-32/ISCSI (aka 'crc32c')
361-
const KEYS_1EDC6F41_REFLECTED: [u64; 21] = [
375+
const KEYS_1EDC6F41_REFLECTED: [u64; 23] = [
362376
0x0000000000000000, // unused placeholder to match 1-based indexing
363377
0x000000014cd00bd6, // (2^(32* 3) mod P(x))' << 1
364378
0x00000000f20c0dfe, // (2^(32* 5) mod P(x))' << 1
@@ -380,10 +394,12 @@ const KEYS_1EDC6F41_REFLECTED: [u64; 21] = [
380394
0x000000001c291d04, // (2^(32*13) mod P(x))' << 1
381395
0x00000000ba4fc28e, // (2^(32* 7) mod P(x))' << 1
382396
0x00000001384aa63a, // (2^(32* 9) mod P(x))' << 1
397+
0x00000000b9e02b86,
398+
0x00000000dcb17aa4,
383399
];
384400

385401
// CRC-32/BZIP2, CRC-32/CKSUM, CRC-32/MPEG-2
386-
const KEYS_04C11DB7_FORWARD: [u64; 21] = [
402+
const KEYS_04C11DB7_FORWARD: [u64; 23] = [
387403
0x0000000000000000, // unused placeholder to match 1-based indexing
388404
0xf200aa6600000000, // 2^(32* 3) mod P(x) << 32
389405
0x17d3315d00000000, // 2^(32* 5) mod P(x) << 32
@@ -405,6 +421,8 @@ const KEYS_04C11DB7_FORWARD: [u64; 21] = [
405421
0x766f1b7800000000, // 2^(32*13) mod P(x) << 32
406422
0xcd8c54b500000000, // 2^(32* 7) mod P(x) << 32
407423
0xab40b71e00000000, // 2^(32* 9) mod P(x) << 32
424+
0x1851689900000000,
425+
0xa3dc855100000000,
408426
];
409427

410428
pub(crate) const SIMD_CONSTANTS: [[u64; 2]; 4] = [

src/crc64/algorithm.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ impl EnhancedCrcWidth for crate::structs::Width64 {
146146
unsafe fn perform_final_reduction<T: ArchOps>(
147147
state: T::Vector,
148148
reflected: bool,
149-
keys: [u64; 21],
149+
keys: [u64; 23],
150150
ops: &T,
151151
) -> Self::Value
152152
where
@@ -213,7 +213,7 @@ pub(crate) unsafe fn process_0_to_15<T: ArchOps, W: EnhancedCrcWidth>(
213213
data: &[u8],
214214
state: &mut CrcState<T::Vector>,
215215
reflector: &Reflector<T::Vector>,
216-
keys: [u64; 21],
216+
keys: [u64; 23],
217217
ops: &T,
218218
) -> W::Value
219219
where

0 commit comments

Comments
 (0)