Skip to content

Commit e75b27d

Browse files
argon2: add parallelism (#547)
Adds a `parallel` feature, with an optional dependency on `rayon`, and parallelizes the filling of blocks. Coordinated shared access in the memory blocks is implemented with a `SegmentViewIter` iterator, which implements either `rayon::iter::ParallelIterator` or `core::iter::Iterator` and returns `SegmentView` views into the Argon2 blocks memory that are safe to be used in parallel. The views alias in the regions that are read-only, but are disjoint in the regions where mutation happens. Effectively, they implement, with a combination of mutable borrowing and runtime checking, the cooperative contract outlined in RFC 9106. This is similar to what was suggested in #380. To avoid aliasing mutable references into the entire buffer of blocks (which would be UB), pointers are used up to the moment where a reference (shared or mutable) into a specific block is returned. At that point, aliasing is no longer possible. The following tests have been tried in and pass Miri (modulo unrelated warnings): reference_argon2i_v0x13_2_8_2 reference_argon2id_v0x13_2_8_2
1 parent 6527622 commit e75b27d

File tree

6 files changed

+292
-141
lines changed

6 files changed

+292
-141
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

argon2/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ base64ct = "1.7"
2121
blake2 = { version = "0.11.0-rc.0", default-features = false }
2222

2323
# optional dependencies
24+
rayon = { version = "1.7", optional = true }
2425
password-hash = { version = "0.6.0-rc.1", optional = true }
2526
zeroize = { version = "1", default-features = false, optional = true }
2627

@@ -36,6 +37,7 @@ default = ["alloc", "password-hash", "rand"]
3637
alloc = ["password-hash?/alloc"]
3738
std = ["alloc", "password-hash?/os_rng", "base64ct/std"]
3839

40+
parallel = ["dep:rayon"]
3941
rand = ["password-hash?/rand_core"]
4042
simple = ["password-hash"]
4143
zeroize = ["dep:zeroize"]

argon2/src/lib.rs

Lines changed: 104 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
clippy::cast_sign_loss,
1414
clippy::checked_conversions,
1515
clippy::implicit_saturating_sub,
16+
clippy::missing_safety_doc,
1617
clippy::panic,
1718
clippy::panic_in_result_fn,
19+
clippy::undocumented_unsafe_blocks,
1820
clippy::unwrap_used,
1921
missing_docs,
2022
rust_2018_idioms,
@@ -153,6 +155,7 @@ mod algorithm;
153155
mod blake2b_long;
154156
mod block;
155157
mod error;
158+
mod memory;
156159
mod params;
157160
mod version;
158161

@@ -173,6 +176,7 @@ pub use {
173176
use crate::blake2b_long::blake2b_long;
174177
use blake2::{Blake2b512, Digest, digest};
175178
use core::fmt;
179+
use memory::Memory;
176180

177181
#[cfg(all(feature = "alloc", feature = "password-hash"))]
178182
use password_hash::{Decimal, Ident, ParamsString, Salt};
@@ -347,7 +351,7 @@ impl<'key> Argon2<'key> {
347351
mut initial_hash: digest::Output<Blake2b512>,
348352
) -> Result<()> {
349353
let block_count = self.params.block_count();
350-
let memory_blocks = memory_blocks
354+
let mut memory_blocks = memory_blocks
351355
.get_mut(..block_count)
352356
.ok_or(Error::MemoryTooLittle)?;
353357

@@ -381,133 +385,133 @@ impl<'key> Argon2<'key> {
381385

382386
// Run passes on blocks
383387
for pass in 0..iterations {
384-
for slice in 0..SYNC_POINTS {
388+
memory_blocks.for_each_segment(lanes, |mut memory_view, slice, lane| {
385389
let data_independent_addressing = self.algorithm == Algorithm::Argon2i
386390
|| (self.algorithm == Algorithm::Argon2id
387391
&& pass == 0
388392
&& slice < SYNC_POINTS / 2);
389393

390-
for lane in 0..lanes {
391-
let mut address_block = Block::default();
392-
let mut input_block = Block::default();
393-
let zero_block = Block::default();
394+
let mut address_block = Block::default();
395+
let mut input_block = Block::default();
396+
let zero_block = Block::default();
397+
398+
if data_independent_addressing {
399+
input_block.as_mut()[..6].copy_from_slice(&[
400+
pass as u64,
401+
lane as u64,
402+
slice as u64,
403+
block_count as u64,
404+
iterations as u64,
405+
self.algorithm as u64,
406+
]);
407+
}
394408

409+
let first_block = if pass == 0 && slice == 0 {
395410
if data_independent_addressing {
396-
input_block.as_mut()[..6].copy_from_slice(&[
397-
pass as u64,
398-
lane as u64,
399-
slice as u64,
400-
memory_blocks.len() as u64,
401-
iterations as u64,
402-
self.algorithm as u64,
403-
]);
411+
// Generate first set of addresses
412+
self.update_address_block(
413+
&mut address_block,
414+
&mut input_block,
415+
&zero_block,
416+
);
404417
}
405418

406-
let first_block = if pass == 0 && slice == 0 {
407-
if data_independent_addressing {
408-
// Generate first set of addresses
419+
// The first two blocks of each lane are already initialized
420+
2
421+
} else {
422+
0
423+
};
424+
425+
let mut cur_index = lane * lane_length + slice * segment_length + first_block;
426+
let mut prev_index = if slice == 0 && first_block == 0 {
427+
// Last block in current lane
428+
cur_index + lane_length - 1
429+
} else {
430+
// Previous block
431+
cur_index - 1
432+
};
433+
434+
// Fill blocks in the segment
435+
for block in first_block..segment_length {
436+
// Extract entropy
437+
let rand = if data_independent_addressing {
438+
let address_index = block % ADDRESSES_IN_BLOCK;
439+
440+
if address_index == 0 {
409441
self.update_address_block(
410442
&mut address_block,
411443
&mut input_block,
412444
&zero_block,
413445
);
414446
}
415447

416-
// The first two blocks of each lane are already initialized
417-
2
448+
address_block.as_ref()[address_index]
418449
} else {
419-
0
450+
memory_view.get_block(prev_index).as_ref()[0]
420451
};
421452

422-
let mut cur_index = lane * lane_length + slice * segment_length + first_block;
423-
let mut prev_index = if slice == 0 && first_block == 0 {
424-
// Last block in current lane
425-
cur_index + lane_length - 1
453+
// Calculate source block index for compress function
454+
let ref_lane = if pass == 0 && slice == 0 {
455+
// Cannot reference other lanes yet
456+
lane
426457
} else {
427-
// Previous block
428-
cur_index - 1
458+
(rand >> 32) as usize % lanes
429459
};
430460

431-
// Fill blocks in the segment
432-
for block in first_block..segment_length {
433-
// Extract entropy
434-
let rand = if data_independent_addressing {
435-
let address_index = block % ADDRESSES_IN_BLOCK;
436-
437-
if address_index == 0 {
438-
self.update_address_block(
439-
&mut address_block,
440-
&mut input_block,
441-
&zero_block,
442-
);
443-
}
444-
445-
address_block.as_ref()[address_index]
446-
} else {
447-
memory_blocks[prev_index].as_ref()[0]
448-
};
449-
450-
// Calculate source block index for compress function
451-
let ref_lane = if pass == 0 && slice == 0 {
452-
// Cannot reference other lanes yet
453-
lane
454-
} else {
455-
(rand >> 32) as usize % lanes
456-
};
457-
458-
let reference_area_size = if pass == 0 {
459-
// First pass
460-
if slice == 0 {
461-
// First slice
462-
block - 1 // all but the previous
463-
} else if ref_lane == lane {
464-
// The same lane => add current segment
465-
slice * segment_length + block - 1
466-
} else {
467-
slice * segment_length - if block == 0 { 1 } else { 0 }
468-
}
461+
let reference_area_size = if pass == 0 {
462+
// First pass
463+
if slice == 0 {
464+
// First slice
465+
block - 1 // all but the previous
466+
} else if ref_lane == lane {
467+
// The same lane => add current segment
468+
slice * segment_length + block - 1
469469
} else {
470-
// Second pass
471-
if ref_lane == lane {
472-
lane_length - segment_length + block - 1
473-
} else {
474-
lane_length - segment_length - if block == 0 { 1 } else { 0 }
475-
}
476-
};
477-
478-
// 1.2.4. Mapping rand to 0..<reference_area_size-1> and produce
479-
// relative position
480-
let mut map = rand & 0xFFFFFFFF;
481-
map = (map * map) >> 32;
482-
let relative_position = reference_area_size
483-
- 1
484-
- ((reference_area_size as u64 * map) >> 32) as usize;
485-
486-
// 1.2.5 Computing starting position
487-
let start_position = if pass != 0 && slice != SYNC_POINTS - 1 {
488-
(slice + 1) * segment_length
470+
slice * segment_length - if block == 0 { 1 } else { 0 }
471+
}
472+
} else {
473+
// Second pass
474+
if ref_lane == lane {
475+
lane_length - segment_length + block - 1
489476
} else {
490-
0
491-
};
477+
lane_length - segment_length - if block == 0 { 1 } else { 0 }
478+
}
479+
};
492480

493-
let lane_index = (start_position + relative_position) % lane_length;
494-
let ref_index = ref_lane * lane_length + lane_index;
481+
// 1.2.4. Mapping rand to 0..<reference_area_size-1> and produce
482+
// relative position
483+
let mut map = rand & 0xFFFFFFFF;
484+
map = (map * map) >> 32;
485+
let relative_position = reference_area_size
486+
- 1
487+
- ((reference_area_size as u64 * map) >> 32) as usize;
488+
489+
// 1.2.5 Computing starting position
490+
let start_position = if pass != 0 && slice != SYNC_POINTS - 1 {
491+
(slice + 1) * segment_length
492+
} else {
493+
0
494+
};
495495

496-
// Calculate new block
497-
let result =
498-
self.compress(&memory_blocks[prev_index], &memory_blocks[ref_index]);
496+
let lane_index = (start_position + relative_position) % lane_length;
497+
let ref_index = ref_lane * lane_length + lane_index;
499498

500-
if self.version == Version::V0x10 || pass == 0 {
501-
memory_blocks[cur_index] = result;
502-
} else {
503-
memory_blocks[cur_index] ^= &result;
504-
};
499+
// Calculate new block
500+
let result = self.compress(
501+
memory_view.get_block(prev_index),
502+
memory_view.get_block(ref_index),
503+
);
505504

506-
prev_index = cur_index;
507-
cur_index += 1;
508-
}
505+
if self.version == Version::V0x10 || pass == 0 {
506+
*memory_view.get_block_mut(cur_index) = result;
507+
} else {
508+
*memory_view.get_block_mut(cur_index) ^= &result;
509+
};
510+
511+
prev_index = cur_index;
512+
cur_index += 1;
509513
}
510-
}
514+
});
511515
}
512516

513517
Ok(())
@@ -523,6 +527,7 @@ impl<'key> Argon2<'key> {
523527
}
524528

525529
if self.cpu_feat_avx2.get() {
530+
// SAFETY: checked that AVX2 was detected.
526531
return unsafe { compress_avx2(rhs, lhs) };
527532
}
528533
}

0 commit comments

Comments
 (0)