diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index 2c28b345..7de35100 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -1,9 +1,9 @@ #![allow(unsafe_op_in_unsafe_fn)] -use crate::Rounds; +use crate::{Rounds, Variant}; use core::marker::PhantomData; #[cfg(feature = "rng")] -use crate::{ChaChaCore, Variant}; +use crate::ChaChaCore; #[cfg(feature = "cipher")] use crate::{chacha::Block, STATE_WORDS}; @@ -27,10 +27,11 @@ const N: usize = PAR_BLOCKS / 2; #[inline] #[target_feature(enable = "avx2")] #[cfg(feature = "cipher")] -pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) +pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where R: Rounds, F: StreamCipherClosure, + V: Variant, { let state_ptr = state.as_ptr() as *const __m128i; let v = [ @@ -39,13 +40,21 @@ where _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))), ]; let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3))); - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)); + c = match size_of::() { + 4 => _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)), + 8 => _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)), + _ => unreachable!() + }; let mut ctr = [c; N]; for i in 0..N { ctr[i] = c; - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); + c = match size_of::() { + 4 => _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)), + 8 => _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)), + _ => unreachable!(), + }; } - let mut backend = Backend:: { + let mut backend = Backend:: { v, ctr, _pd: PhantomData, @@ -54,6 +63,11 @@ where f.call(&mut backend); state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; + match size_of::() { + 4 => {}, + 8 => state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32, + _ => unreachable!() + } } #[inline] @@ -71,13 +85,13 @@ where _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))), ]; let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3))); - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)); + c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)); let mut ctr = [c; N]; for i in 0..N { ctr[i] = c; - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); + c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)); } - let mut backend = Backend:: { + let mut backend = Backend:: { v, ctr, _pd: PhantomData, @@ -86,32 +100,37 @@ where backend.rng_gen_par_ks_blocks(buffer); core.state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; + core.state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; } -struct Backend { +struct Backend { v: [__m256i; 3], ctr: [__m256i; N], - _pd: PhantomData, + _pd: PhantomData<(R, V)>, } #[cfg(feature = "cipher")] -impl BlockSizeUser for Backend { +impl BlockSizeUser for Backend { type BlockSize = U64; } #[cfg(feature = "cipher")] -impl ParBlocksSizeUser for Backend { +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U4; } #[cfg(feature = "cipher")] -impl StreamCipherBackend for Backend { +impl StreamCipherBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { unsafe { let res = rounds::(&self.v, &self.ctr); for c in self.ctr.iter_mut() { - *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1)); + *c = match size_of::() { + 4 => _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1)), + 8 => _mm256_add_epi64(*c, _mm256_set_epi64x(0, 1, 0, 1)), + _ => unreachable!() + }; } let res0: [__m128i; 8] = core::mem::transmute(res[0]); @@ -130,7 +149,11 @@ impl StreamCipherBackend for Backend { let pb = PAR_BLOCKS as i32; for c in self.ctr.iter_mut() { - *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)); + *c = match size_of::() { + 4 => _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)), + 8 => _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)), + _ => unreachable!() + } } let mut block_ptr = blocks.as_mut_ptr() as *mut __m128i; @@ -147,7 +170,7 @@ impl StreamCipherBackend for Backend { } #[cfg(feature = "rng")] -impl Backend { +impl Backend { #[inline(always)] fn rng_gen_par_ks_blocks(&mut self, blocks: &mut [u32; 64]) { unsafe { @@ -155,7 +178,7 @@ impl Backend { let pb = PAR_BLOCKS as i32; for c in self.ctr.iter_mut() { - *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)); + *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)); } let mut block_ptr = blocks.as_mut_ptr() as *mut __m128i; diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 777a01fb..67cbed49 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -4,11 +4,11 @@ //! Adapted from the Crypto++ `chacha_simd` implementation by Jack Lloyd and //! Jeffrey Walton (public domain). -use crate::{Rounds, STATE_WORDS}; +use crate::{Rounds, STATE_WORDS, Variant}; use core::{arch::aarch64::*, marker::PhantomData}; #[cfg(feature = "rand_core")] -use crate::{ChaChaCore, Variant}; +use crate::ChaChaCore; #[cfg(feature = "cipher")] use crate::chacha::Block; @@ -19,13 +19,26 @@ use cipher::{ consts::{U4, U64}, }; -struct Backend { +struct Backend { state: [uint32x4_t; 4], ctrs: [uint32x4_t; 4], - _pd: PhantomData, + _pd: PhantomData<(R, V)>, } -impl Backend { +macro_rules! add_counter { + ($a:expr, $b:expr, $variant:ty) => { + match size_of::<<$variant>::Counter>() { + 4 => vaddq_u32($a, $b), + 8 => vreinterpretq_u32_u64(vaddq_u64( + vreinterpretq_u64_u32($a), + vreinterpretq_u64_u32($b), + )), + _ => unreachable!(), + } + }; +} + +impl Backend { #[inline] unsafe fn new(state: &mut [u32; STATE_WORDS]) -> Self { let state = [ @@ -40,7 +53,7 @@ impl Backend { vld1q_u32([3, 0, 0, 0].as_ptr()), vld1q_u32([4, 0, 0, 0].as_ptr()), ]; - Backend:: { + Backend:: { state, ctrs, _pd: PhantomData, @@ -51,16 +64,24 @@ impl Backend { #[inline] #[cfg(feature = "cipher")] #[target_feature(enable = "neon")] -pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) +pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where R: Rounds, F: StreamCipherClosure, + V: Variant, { - let mut backend = Backend::::new(state); + let mut backend = Backend::::new(state); f.call(&mut backend); - vst1q_u32(state.as_mut_ptr().offset(12), backend.state[3]); + match size_of::() { + 4 => state[12] = vgetq_lane_u32(backend.state[3], 0), + 8 => vst1q_u64( + state.as_mut_ptr().offset(12) as *mut u64, + vreinterpretq_u64_u32(backend.state[3]), + ), + _ => unreachable!(), + } } #[inline] @@ -73,19 +94,22 @@ where R: Rounds, V: Variant, { - let mut backend = Backend::::new(&mut core.state); + let mut backend = Backend::::new(&mut core.state); backend.write_par_ks_blocks(buffer); - vst1q_u32(core.state.as_mut_ptr().offset(12), backend.state[3]); + vst1q_u64( + core.state.as_mut_ptr().offset(12) as *mut u64, + vreinterpretq_u64_u32(backend.state[3]), + ); } #[cfg(feature = "cipher")] -impl BlockSizeUser for Backend { +impl BlockSizeUser for Backend { type BlockSize = U64; } #[cfg(feature = "cipher")] -impl ParBlocksSizeUser for Backend { +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U4; } @@ -97,7 +121,7 @@ macro_rules! add_assign_vec { } #[cfg(feature = "cipher")] -impl StreamCipherBackend for Backend { +impl StreamCipherBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { let state3 = self.state[3]; @@ -105,7 +129,7 @@ impl StreamCipherBackend for Backend { self.gen_par_ks_blocks(&mut par); *block = par[0]; unsafe { - self.state[3] = vaddq_u32(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); + self.state[3] = add_counter!(state3, vld1q_u32([1, 0, 0, 0].as_ptr()), V); } } @@ -118,19 +142,19 @@ impl StreamCipherBackend for Backend { self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[0]), + add_counter!(self.state[3], self.ctrs[0], V), ], [ self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[1]), + add_counter!(self.state[3], self.ctrs[1], V), ], [ self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[2]), + add_counter!(self.state[3], self.ctrs[2], V), ], ]; @@ -140,11 +164,16 @@ impl StreamCipherBackend for Backend { for block in 0..4 { // add state to block - for state_row in 0..4 { + for state_row in 0..3 { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } if block > 0 { - blocks[block][3] = vaddq_u32(blocks[block][3], self.ctrs[block - 1]); + add_assign_vec!( + blocks[block][3], + add_counter!(self.state[3], self.ctrs[block - 1], V) + ); + } else { + add_assign_vec!(blocks[block][3], self.state[3]); } // write blocks to dest for state_row in 0..4 { @@ -154,7 +183,7 @@ impl StreamCipherBackend for Backend { ); } } - self.state[3] = vaddq_u32(self.state[3], self.ctrs[3]); + self.state[3] = add_counter!(self.state[3], self.ctrs[3], V); } } } @@ -180,7 +209,7 @@ macro_rules! extract { }; } -impl Backend { +impl Backend { #[inline(always)] /// Generates `num_blocks` blocks and blindly writes them to `dest_ptr` /// @@ -197,19 +226,19 @@ impl Backend { self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[0]), + add_counter!(self.state[3], self.ctrs[0], V), ], [ self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[1]), + add_counter!(self.state[3], self.ctrs[1], V), ], [ self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[2]), + add_counter!(self.state[3], self.ctrs[2], V), ], ]; @@ -220,11 +249,16 @@ impl Backend { let mut dest_ptr = buffer.as_mut_ptr() as *mut u8; for block in 0..4 { // add state to block - for state_row in 0..4 { + for state_row in 0..3 { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } if block > 0 { - blocks[block][3] = vaddq_u32(blocks[block][3], self.ctrs[block - 1]); + add_assign_vec!( + blocks[block][3], + add_counter!(self.state[3], self.ctrs[block - 1], V) + ); + } else { + add_assign_vec!(blocks[block][3], self.state[3]); } // write blocks to buffer for state_row in 0..4 { @@ -235,7 +269,7 @@ impl Backend { } dest_ptr = dest_ptr.add(64); } - self.state[3] = vaddq_u32(self.state[3], self.ctrs[3]); + self.state[3] = add_counter!(self.state[3], self.ctrs[3], V); } } diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index e0614138..3ce4f6b1 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -11,6 +11,9 @@ use cipher::{ consts::{U1, U64}, }; +#[cfg(feature = "rng")] +use crate::rng::BLOCK_WORDS; + pub(crate) struct Backend<'a, R: Rounds, V: Variant>(pub(crate) &'a mut ChaChaCore); #[cfg(feature = "cipher")] @@ -28,7 +31,12 @@ impl StreamCipherBackend for Backend<'_, R, V> { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { let res = run_rounds::(&self.0.state); - self.0.state[12] = self.0.state[12].wrapping_add(1); + let mut ctr = (u64::from(self.0.state[13]) << 32) | u64::from(self.0.state[12]); + ctr = ctr.wrapping_add(1); + self.0.state[12] = ctr as u32; + if size_of::() == 8 { + self.0.state[13] = (ctr >> 32) as u32 + } for (chunk, val) in block.chunks_exact_mut(4).zip(res.iter()) { chunk.copy_from_slice(&val.to_le_bytes()); @@ -40,13 +48,15 @@ impl StreamCipherBackend for Backend<'_, R, V> { impl Backend<'_, R, V> { #[inline(always)] pub(crate) fn gen_ks_blocks(&mut self, buffer: &mut [u32; 64]) { - for i in 0..4 { + for block in 0..4 { let res = run_rounds::(&self.0.state); - self.0.state[12] = self.0.state[12].wrapping_add(1); + let mut ctr = u64::from(self.0.state[13]) << 32 | u64::from(self.0.state[12]); + ctr = ctr.wrapping_add(1); + self.0.state[12] = ctr as u32; + self.0.state[13] = (ctr >> 32) as u32; - for (word, val) in buffer[i << 4..(i + 1) << 4].iter_mut().zip(res.iter()) { - *word = val.to_le(); - } + buffer[block * BLOCK_WORDS as usize..(block + 1) * BLOCK_WORDS as usize] + .copy_from_slice(&res); } } } diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index dcdd0830..e2f66dc1 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -1,8 +1,8 @@ #![allow(unsafe_op_in_unsafe_fn)] -use crate::Rounds; +use crate::{Rounds, Variant}; #[cfg(feature = "rng")] -use crate::{ChaChaCore, Variant}; +use crate::{ChaChaCore}; #[cfg(feature = "cipher")] use crate::{chacha::Block, STATE_WORDS}; @@ -23,13 +23,14 @@ const PAR_BLOCKS: usize = 4; #[inline] #[target_feature(enable = "sse2")] #[cfg(feature = "cipher")] -pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) +pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where R: Rounds, F: StreamCipherClosure, + V: Variant, { let state_ptr = state.as_ptr() as *const __m128i; - let mut backend = Backend:: { + let mut backend = Backend:: { v: [ _mm_loadu_si128(state_ptr.add(0)), _mm_loadu_si128(state_ptr.add(1)), @@ -42,30 +43,37 @@ where f.call(&mut backend); state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; + if size_of::() == 8 { + state[13] = _mm_extract_epi32(backend.v[3], 1) as u32 + } } -struct Backend { +struct Backend { v: [__m128i; 4], - _pd: PhantomData, + _pd: PhantomData<(R, V)>, } #[cfg(feature = "cipher")] -impl BlockSizeUser for Backend { +impl BlockSizeUser for Backend { type BlockSize = U64; } #[cfg(feature = "cipher")] -impl ParBlocksSizeUser for Backend { +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U4; } #[cfg(feature = "cipher")] -impl StreamCipherBackend for Backend { +impl StreamCipherBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { unsafe { - let res = rounds::(&self.v); - self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, 1)); + let res = rounds::(&self.v); + self.v[3] = match size_of::() { + 4 => _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, 1)), + 8 => _mm_add_epi64(self.v[3], _mm_set_epi64x(0, 1)), + _ => unreachable!() + }; let block_ptr = block.as_mut_ptr() as *mut __m128i; for i in 0..4 { @@ -76,8 +84,12 @@ impl StreamCipherBackend for Backend { #[inline(always)] fn gen_par_ks_blocks(&mut self, blocks: &mut cipher::ParBlocks) { unsafe { - let res = rounds::(&self.v); - self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, PAR_BLOCKS as i32)); + let res = rounds::(&self.v); + self.v[3] = match size_of::() { + 4 => _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, PAR_BLOCKS as i32)), + 8 => _mm_add_epi64(self.v[3], _mm_set_epi64x(0, PAR_BLOCKS as i64)), + _ => unreachable!() + }; let blocks_ptr = blocks.as_mut_ptr() as *mut __m128i; for block in 0..PAR_BLOCKS { @@ -98,7 +110,7 @@ where V: Variant, { let state_ptr = core.state.as_ptr() as *const __m128i; - let mut backend = Backend:: { + let mut backend = Backend:: { v: [ _mm_loadu_si128(state_ptr.add(0)), _mm_loadu_si128(state_ptr.add(1)), @@ -111,16 +123,17 @@ where backend.gen_ks_blocks(buffer); core.state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; + core.state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; } #[cfg(feature = "rng")] -impl Backend { +impl Backend { #[inline(always)] fn gen_ks_blocks(&mut self, block: &mut [u32; 64]) { const _: () = assert!(4 * PAR_BLOCKS * size_of::<__m128i>() == size_of::<[u32; 64]>()); unsafe { - let res = rounds::(&self.v); - self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, PAR_BLOCKS as i32)); + let res = rounds::(&self.v); + self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, PAR_BLOCKS as i64)); let blocks_ptr = block.as_mut_ptr() as *mut __m128i; for block in 0..PAR_BLOCKS { @@ -134,10 +147,14 @@ impl Backend { #[inline] #[target_feature(enable = "sse2")] -unsafe fn rounds(v: &[__m128i; 4]) -> [[__m128i; 4]; PAR_BLOCKS] { +unsafe fn rounds(v: &[__m128i; 4]) -> [[__m128i; 4]; PAR_BLOCKS] { let mut res = [*v; 4]; for block in 1..PAR_BLOCKS { - res[block][3] = _mm_add_epi32(res[block][3], _mm_set_epi32(0, 0, 0, block as i32)); + res[block][3] = match size_of::() { + 4 => _mm_add_epi32(res[block][3], _mm_set_epi32(0, 0, 0, block as i32)), + 8 => _mm_add_epi64(res[block][3], _mm_set_epi64x(0, block as i64)), + _ => unreachable!() + } } for _ in 0..R::COUNT { @@ -145,11 +162,15 @@ unsafe fn rounds(v: &[__m128i; 4]) -> [[__m128i; 4]; PAR_BLOCKS] { } for block in 0..PAR_BLOCKS { - for i in 0..4 { + for i in 0..3 { res[block][i] = _mm_add_epi32(res[block][i], v[i]); } - // add the counter since `v` is lacking updated counter values - res[block][3] = _mm_add_epi32(res[block][3], _mm_set_epi32(0, 0, 0, block as i32)); + let ctr = match size_of::() { + 4 => _mm_add_epi32(v[3], _mm_set_epi32(0, 0, 0, block as i32)), + 8 => _mm_add_epi64(v[3], _mm_set_epi64x(0, block as i64)), + _ => unreachable!() + }; + res[block][3] = _mm_add_epi32(res[block][3], ctr); } res diff --git a/chacha20/src/legacy.rs b/chacha20/src/legacy.rs index c95fd86e..4d49f4ef 100644 --- a/chacha20/src/legacy.rs +++ b/chacha20/src/legacy.rs @@ -13,10 +13,6 @@ pub type LegacyNonce = Array; use crate::variants::Legacy; /// The ChaCha20 stream cipher (legacy "djb" construction with 64-bit nonce). -/// -/// **WARNING:** this implementation uses 32-bit counter, while the original -/// implementation uses 64-bit counter. In other words, it does -/// not allow encrypting of more than 256 GiB of data. pub type ChaCha20Legacy = StreamCipherCoreWrapper; /// /// The ChaCha20 stream cipher (legacy "djb" construction with 64-bit nonce). diff --git a/chacha20/src/lib.rs b/chacha20/src/lib.rs index f29c551e..52463bad 100644 --- a/chacha20/src/lib.rs +++ b/chacha20/src/lib.rs @@ -128,7 +128,7 @@ mod rng; #[cfg(feature = "xchacha")] mod xchacha; -mod variants; +pub mod variants; use variants::Variant; #[cfg(feature = "cipher")] @@ -206,17 +206,24 @@ cfg_if! { } /// The ChaCha core function. -#[cfg_attr(feature = "rng", derive(Clone))] pub struct ChaChaCore { /// Internal state of the core function state: [u32; STATE_WORDS], /// CPU target feature tokens #[allow(dead_code)] tokens: Tokens, - /// Number of rounds to perform - rounds: PhantomData, - /// the variant of the implementation - variant: PhantomData, + /// Number of rounds to perform and the cipher variant + _pd: PhantomData<(R, V)>, +} + +impl Clone for ChaChaCore { + fn clone(&self) -> Self { + Self { + state: self.state, + tokens: self.tokens, + _pd: PhantomData, + } + } } impl ChaChaCore { @@ -225,17 +232,21 @@ impl ChaChaCore { /// directly. fn new(key: &[u8; 32], iv: &[u8]) -> Self { let mut state = [0u32; STATE_WORDS]; - state[0..4].copy_from_slice(&CONSTANTS); - let key_chunks = key.chunks_exact(4); - for (val, chunk) in state[4..12].iter_mut().zip(key_chunks) { - *val = u32::from_le_bytes(chunk.try_into().unwrap()); + let ctr_size = size_of::() / size_of::(); + let (const_dst, state_rem) = state.split_at_mut(4); + let (key_dst, state_rem) = state_rem.split_at_mut(8); + let (_ctr_dst, iv_dst) = state_rem.split_at_mut(ctr_size); + + const_dst.copy_from_slice(&CONSTANTS); + + for (src, dst) in key.chunks_exact(4).zip(key_dst) { + *dst = u32::from_le_bytes(src.try_into().unwrap()); } - assert_eq!(iv.len(), 4 * (16 - V::NONCE_INDEX)); - let iv_chunks = iv.as_ref().chunks_exact(4); - for (val, chunk) in state[V::NONCE_INDEX..16].iter_mut().zip(iv_chunks) { - *val = u32::from_le_bytes(chunk.try_into().unwrap()); + assert_eq!(size_of_val(iv_dst), size_of_val(iv)); + for (src, dst) in iv.chunks_exact(4).zip(iv_dst) { + *dst = u32::from_le_bytes(src.try_into().unwrap()); } cfg_if! { @@ -258,24 +269,23 @@ impl ChaChaCore { Self { state, tokens, - rounds: PhantomData, - variant: PhantomData, + _pd: PhantomData, } } } #[cfg(feature = "cipher")] impl StreamCipherSeekCore for ChaChaCore { - type Counter = u32; + type Counter = V::Counter; #[inline(always)] fn get_block_pos(&self) -> Self::Counter { - self.state[12] + V::get_block_pos(&self.state[12..]) } #[inline(always)] fn set_block_pos(&mut self, pos: Self::Counter) { - self.state[12] = pos + V::set_block_pos(&mut self.state[12..], pos); } } @@ -283,8 +293,7 @@ impl StreamCipherSeekCore for ChaChaCore { impl StreamCipherCore for ChaChaCore { #[inline(always)] fn remaining_blocks(&self) -> Option { - let rem = u32::MAX - self.get_block_pos(); - rem.try_into().ok() + V::remaining_blocks(self.get_block_pos()) } fn process_with_backend( @@ -298,21 +307,21 @@ impl StreamCipherCore for ChaChaCore { cfg_if! { if #[cfg(chacha20_force_avx2)] { unsafe { - backends::avx2::inner::(&mut self.state, f); + backends::avx2::inner::(&mut self.state, f); } } else if #[cfg(chacha20_force_sse2)] { unsafe { - backends::sse2::inner::(&mut self.state, f); + backends::sse2::inner::(&mut self.state, f); } } else { let (avx2_token, sse2_token) = self.tokens; if avx2_token.get() { unsafe { - backends::avx2::inner::(&mut self.state, f); + backends::avx2::inner::(&mut self.state, f); } } else if sse2_token.get() { unsafe { - backends::sse2::inner::(&mut self.state, f); + backends::sse2::inner::(&mut self.state, f); } } else { f.call(&mut backends::soft::Backend(self)); @@ -321,7 +330,7 @@ impl StreamCipherCore for ChaChaCore { } } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { unsafe { - backends::neon::inner::(&mut self.state, f); + backends::neon::inner::(&mut self.state, f); } } else { f.call(&mut backends::soft::Backend(self)); diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 86f04bac..f58152fe 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -21,13 +21,13 @@ use zeroize::{Zeroize, ZeroizeOnDrop}; use crate::{ ChaChaCore, R8, R12, R20, Rounds, backends, - variants::{Ietf, Variant}, + variants::{Legacy, Variant}, }; use cfg_if::cfg_if; /// Number of 32-bit words per ChaCha block (fixed by algorithm definition). -const BLOCK_WORDS: u8 = 16; +pub(crate) const BLOCK_WORDS: u8 = 16; /// The seed for ChaCha20. Implements ZeroizeOnDrop when the /// zeroize feature is enabled. @@ -81,40 +81,14 @@ impl Debug for Seed { } } -/// A wrapper for set_word_pos() input. -/// -/// Can be constructed from any of the following: -/// * `[u8; 5]` -/// * `u64` -pub struct WordPosInput { - block_pos: u32, - index: usize, -} - -impl From<[u8; 5]> for WordPosInput { - fn from(value: [u8; 5]) -> Self { - Self { - block_pos: u32::from_le_bytes(value[0..4].try_into().unwrap()), - index: (value[4] & 0b1111) as usize, - } - } -} - -impl From for WordPosInput { - fn from(value: u64) -> Self { - Self { - block_pos: u32::from_le_bytes((value >> 4).to_le_bytes()[0..4].try_into().unwrap()), - index: (value.to_le_bytes()[0] & 0b1111) as usize, - } - } -} - /// A wrapper for `stream_id`. /// /// Can be constructed from any of the following: /// * `[u32; 3]` /// * `[u8; 12]` /// * `u128` +/// +/// The arrays should be in little endian order. pub struct StreamId([u32; Self::LEN]); impl StreamId { @@ -122,7 +96,7 @@ impl StreamId { const BYTES: usize = size_of::(); /// The length of the array contained within `StreamId`. - const LEN: usize = 3; + const LEN: usize = 2; } impl From<[u32; Self::LEN]> for StreamId { @@ -148,9 +122,9 @@ impl From<[u8; Self::BYTES]> for StreamId { } } -impl From for StreamId { +impl From for StreamId { #[inline] - fn from(value: u128) -> Self { + fn from(value: u64) -> Self { let result: [u8; Self::BYTES] = value.to_le_bytes()[..Self::BYTES].try_into().unwrap(); result.into() } @@ -159,21 +133,31 @@ impl From for StreamId { /// A wrapper for `block_pos`. /// /// Can be constructed from any of the following: -/// * `[u8; 4]` -/// * `u32` -pub struct BlockPos(u32); +/// * `u64` +/// * `[u8; 8]` +/// * `[u32; 2]` +/// +/// The arrays should be in little endian order. +pub struct BlockPos([u32; 2]); -impl From for BlockPos { +impl From for BlockPos { #[inline] - fn from(value: u32) -> Self { - Self(value.to_le()) + fn from(value: u64) -> Self { + Self([value as u32, (value >> 32) as u32]) } } -impl From<[u8; 4]> for BlockPos { +impl From<[u8; 8]> for BlockPos { #[inline] - fn from(value: [u8; 4]) -> Self { - Self(u32::from_le_bytes(value).to_le()) + fn from(value: [u8; 8]) -> Self { + u64::from_le_bytes(value).into() + } +} + +impl From<[u32; 2]> for BlockPos { + #[inline] + fn from(value: [u32; 2]) -> Self { + Self(value) } } @@ -255,7 +239,7 @@ impl ChaChaCore { } macro_rules! impl_chacha_rng { - ($ChaChaXRng:ident, $ChaChaXCore:ident, $rounds:ident, $abst: ident) => { + ($ChaChaXRng:ident, $ChaChaXCore:ident, $rounds:ident, $abst:ident) => { /// A cryptographically secure random number generator that uses the ChaCha algorithm. /// /// ChaCha is a stream cipher designed by Daniel J. Bernstein[^1], that we use as an RNG. It is @@ -271,10 +255,10 @@ macro_rules! impl_chacha_rng { /// rounds is the minimum potentially secure configuration, and 20 rounds is widely used as a /// conservative choice. /// - /// We use a 32-bit counter and 96-bit stream identifier as in the IETF implementation[^3] - /// except that we use a stream identifier in place of a nonce. A 32-bit counter over 64-byte - /// (16 word) blocks allows 256 GiB of output before cycling, and the stream identifier allows - /// 296 unique streams of output per seed. Both counter and stream are initialized + /// We use a 64-bit counter and 64-bit stream identifier as in Bernstein's implementation[^3] + /// except that we use a stream identifier in place of a nonce. A 64-bit counter over 64-byte + /// (16 word) blocks allows 1 ZiB of output before cycling, and the stream identifier allows + /// 264 unique streams of output per seed. Both counter and stream are initialized /// to zero but may be set via the `set_word_pos` and `set_stream` methods. /// /// The word layout is: @@ -283,7 +267,7 @@ macro_rules! impl_chacha_rng { /// constant constant constant constant /// seed seed seed seed /// seed seed seed seed - /// counter stream_id stream_id stream_id + /// counter counter stream_id stream_id /// ``` /// This implementation uses an output buffer of sixteen `u32` words, and uses /// [`BlockRng`] to implement the [`RngCore`] methods. @@ -302,17 +286,13 @@ macro_rules! impl_chacha_rng { /// let mut rng = ChaCha20Rng::from_seed(seed); /// rng.set_stream(100); /// - /// // you can also use a [u8; 12] in `.set_stream()` - /// rng.set_stream([3u8; 12]); - /// // or a [u32; 3] - /// rng.set_stream([4u32; 3]); - /// + /// // you can also use a [u8; 8] in `.set_stream()` + /// rng.set_stream([3u8; 8]); + /// // or a [u32; 2] + /// rng.set_stream([4u32; 2]); /// /// rng.set_word_pos(5); /// - /// // you can also use a [u8; 5] in `.set_word_pos()` - /// rng.set_word_pos([2u8; 5]); - /// /// let x = rng.next_u32(); /// let mut array = [0u8; 32]; /// rng.fill_bytes(&mut array); @@ -331,9 +311,6 @@ macro_rules! impl_chacha_rng { /// /// [^2]: [eSTREAM: the ECRYPT Stream Cipher Project]( /// http://www.ecrypt.eu.org/stream/) - /// - /// [^3]: Internet Research Task Force, [*ChaCha20 and Poly1305 for IETF Protocols*]( - /// https://www.rfc-editor.org/rfc/rfc8439) #[derive(Clone)] pub struct $ChaChaXRng { /// The ChaChaCore struct @@ -342,8 +319,16 @@ macro_rules! impl_chacha_rng { /// The ChaCha core random number generator #[derive(Clone)] - pub struct $ChaChaXCore(ChaChaCore<$rounds, Ietf>); + pub struct $ChaChaXCore(ChaChaCore<$rounds, Legacy>); + + impl SeedableRng for $ChaChaXCore { + type Seed = Seed; + #[inline] + fn from_seed(seed: Self::Seed) -> Self { + Self(ChaChaCore::<$rounds, Legacy>::new(seed.as_ref(), &[0u8; 8])) + } + } impl SeedableRng for $ChaChaXRng { type Seed = [u8; 32]; @@ -354,21 +339,20 @@ macro_rules! impl_chacha_rng { } } } - - impl BlockRngCore for $ChaChaXCore { - type Item = u32; - type Results = BlockRngResults; - + impl RngCore for $ChaChaXRng { #[inline] - fn generate(&mut self, r: &mut Self::Results) { - self.0.generate(&mut r.0); - #[cfg(target_endian = "big")] - for word in r.0.iter_mut() { - *word = word.to_le(); - } + fn next_u32(&mut self) -> u32 { + self.core.next_u32() + } + #[inline] + fn next_u64(&mut self) -> u64 { + self.core.next_u64() + } + #[inline] + fn fill_bytes(&mut self, dest: &mut [u8]) { + self.core.fill_bytes(dest) } } - impl CryptoBlockRng for $ChaChaXCore {} impl CryptoRng for $ChaChaXRng {} @@ -385,30 +369,6 @@ macro_rules! impl_chacha_rng { } } - impl SeedableRng for $ChaChaXCore { - type Seed = Seed; - - #[inline] - fn from_seed(seed: Self::Seed) -> Self { - Self(ChaChaCore::<$rounds, Ietf>::new(seed.as_ref(), &[0u8; 12])) - } - } - - impl RngCore for $ChaChaXRng { - #[inline] - fn next_u32(&mut self) -> u32 { - self.core.next_u32() - } - #[inline] - fn next_u64(&mut self) -> u64 { - self.core.next_u64() - } - #[inline] - fn fill_bytes(&mut self, dest: &mut [u8]) { - self.core.fill_bytes(dest) - } - } - impl $ChaChaXRng { // The buffer is a 4-block window, i.e. it is always at a block-aligned position in the // stream but if the stream has been sought it may not be self-aligned. @@ -416,35 +376,33 @@ macro_rules! impl_chacha_rng { /// Get the offset from the start of the stream, in 32-bit words. /// /// Since the generated blocks are 64 words (26) long and the - /// counter is 32-bits, the offset is a 36-bit number. Sub-word offsets are + /// counter is 64-bits, the offset is a 68-bit number. Sub-word offsets are /// not supported, hence the result can simply be multiplied by 4 to get a /// byte-offset. #[inline] - pub fn get_word_pos(&self) -> u64 { - let mut result = - u64::from(self.core.core.0.state[12].wrapping_sub(BUF_BLOCKS.into())) << 4; - result += self.core.index() as u64; - // eliminate bits above the 36th bit - result & 0xfffffffff + pub fn get_word_pos(&self) -> u128 { + let mut block_counter = (u64::from(self.core.core.0.state[13]) << 32) + | u64::from(self.core.core.0.state[12]); + block_counter = block_counter.wrapping_sub(BUF_BLOCKS as u64); + let word_pos = + block_counter as u128 * BLOCK_WORDS as u128 + self.core.index() as u128; + // eliminate bits above the 68th bit + word_pos & ((1 << 68) - 1) } - /// Set the offset from the start of the stream, in 32-bit words. This method - /// takes any of the following: - /// * `[u8; 5]` - /// * `u64` + /// Set the offset from the start of the stream, in 32-bit words. /// /// As with `get_word_pos`, we use a 36-bit number. When given a `u64`, we use /// the least significant 4 bits as the RNG's index, and the 32 bits before it /// as the block position. - /// - /// When given a `[u8; 5]`, the word_pos is set similarly, but it is more - /// arbitrary. #[inline] - pub fn set_word_pos>(&mut self, word_offset: W) { - let word_pos: WordPosInput = word_offset.into(); - self.core.core.0.state[12] = word_pos.block_pos; - // generate will increase block_pos by 4 - self.core.generate_and_set(word_pos.index); + pub fn set_word_pos(&mut self, word_offset: u128) { + let index = (word_offset & 0b1111) as usize; + let counter = word_offset >> 4; + //self.set_block_pos(counter as u64); + self.core.core.0.state[12] = counter as u32; + self.core.core.0.state[13] = (counter >> 32) as u32; + self.core.generate_and_set(index); } /// Set the block pos and reset the RNG's index. @@ -452,32 +410,60 @@ macro_rules! impl_chacha_rng { /// The word pos will be equal to `block_pos * 16 words per block`. /// /// This method takes any of the following: - /// * `[u8; 4]` - /// * `u32` + /// * `u64` + /// * `[u8; 8]` + /// * `[u32; 2]` + /// + /// Note: the arrays should be in little endian order. #[inline] + #[allow(unused)] pub fn set_block_pos>(&mut self, block_pos: B) { self.core.reset(); - self.core.core.0.state[12] = block_pos.into().0.to_le() + let block_pos = block_pos.into().0; + self.core.core.0.state[12] = block_pos[0]; + self.core.core.0.state[13] = block_pos[1] } /// Get the block pos. #[inline] - pub fn get_block_pos(&self) -> u32 { - self.core.core.0.state[12] + #[allow(unused)] + pub fn get_block_pos(&self) -> u64 { + self.core.core.0.state[12] as u64 | ((self.core.core.0.state[13] as u64) << 32) } /// Set the stream number. The lower 96 bits are used and the rest are /// discarded. This method takes any of the following: - /// * `[u32; 3]` - /// * `[u8; 12]` - /// * `u128` + /// * `u64` + /// * `[u8; 8]` + /// * `[u32; 2]` + /// + /// Note: the arrays should be in little endian order. /// /// This is initialized to zero; 296 unique streams of output - /// are available per seed/key. + /// are available per seed/key. In theory a 96-bit nonce can be used by + /// passing the last 64-bits to this function and using the first 32-bits as + /// the most significant half of the 64-bit counter, which may be set + /// directly via `set_block_pos` like so: + /// + /// ``` + /// use chacha20::ChaCha20Rng; + /// use rand_core::{SeedableRng, RngCore}; + /// + /// let seed = [2u8; 32]; + /// let mut rng = ChaCha20Rng::from_seed(seed); + /// + /// // set state[12] to 0, state[13] to 1, state[14] to 2, state[15] to 3 + /// rng.set_block_pos([0u32, 1u32]); + /// rng.set_stream([2u32, 3u32]); + /// + /// // confirm that state is set correctly + /// assert_eq!(rng.get_block_pos(), 1 << 32); + /// assert_eq!(rng.get_stream(), (3 << 32) + 2); + /// ``` #[inline] pub fn set_stream>(&mut self, stream: S) { let stream: StreamId = stream.into(); - for (n, val) in self.core.core.0.state[Ietf::NONCE_INDEX..BLOCK_WORDS as usize] + for (n, val) in self.core.core.0.state[14..BLOCK_WORDS as usize] .as_mut() .iter_mut() .zip(stream.0.iter()) @@ -491,9 +477,9 @@ macro_rules! impl_chacha_rng { /// Get the stream number. #[inline] - pub fn get_stream(&self) -> u128 { - let mut result = [0u8; 16]; - for (i, &big) in self.core.core.0.state[Ietf::NONCE_INDEX..BLOCK_WORDS as usize] + pub fn get_stream(&self) -> u64 { + let mut result = [0u8; 8]; + for (i, &big) in self.core.core.0.state[14..BLOCK_WORDS as usize] .iter() .enumerate() { @@ -503,7 +489,7 @@ macro_rules! impl_chacha_rng { result[index + 2] = (big >> 16) as u8; result[index + 3] = (big >> 24) as u8; } - u128::from_le_bytes(result) + u64::from_le_bytes(result) } /// Get the seed. @@ -569,8 +555,8 @@ macro_rules! impl_chacha_rng { #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub(crate) struct $ChaChaXRng { seed: crate::rng::Seed, - stream: u128, - word_pos: u64, + stream: u64, + word_pos: u128, } impl From<&super::$ChaChaXRng> for $ChaChaXRng { @@ -596,6 +582,16 @@ macro_rules! impl_chacha_rng { } } } + + impl BlockRngCore for $ChaChaXCore { + type Item = u32; + type Results = BlockRngResults; + + #[inline] + fn generate(&mut self, r: &mut Self::Results) { + self.0.generate(&mut r.0); + } + } }; } @@ -608,6 +604,8 @@ impl_chacha_rng!(ChaCha20Rng, ChaCha20Core, R20, abst20); #[cfg(test)] pub(crate) mod tests { + use hex_literal::hex; + use super::*; const KEY: [u8; 32] = [ @@ -638,13 +636,14 @@ pub(crate) mod tests { let mut rng = ChaCha20Rng::from_seed(KEY); rng.set_stream(1337); // test counter wrapping-add - rng.set_word_pos((2u64).pow(36) - 1); - let mut output = [3u8; 128]; + rng.set_word_pos((1 << 68) - 65); + let mut output = [3u8; 1280]; rng.fill_bytes(&mut output); - assert_ne!(output, [0u8; 128]); + assert_ne!(output, [0u8; 1280]); - assert!(rng.get_word_pos() < 2000 && rng.get_word_pos() != 0); + assert!(rng.get_word_pos() < 2000); + assert!(rng.get_word_pos() != 0); } #[test] @@ -652,12 +651,12 @@ pub(crate) mod tests { let seed = [44u8; 32]; let mut rng = ChaCha20Rng::from_seed(seed); - // test set_stream with [u32; 3] - rng.set_stream([313453u32, 0u32, 0u32]); + // test set_stream with [u32; 2] + rng.set_stream([313453u32, 0u32]); assert_eq!(rng.get_stream(), 313453); // test set_stream with [u8; 12] - rng.set_stream([89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + rng.set_stream([89, 0, 0, 0, 0, 0, 0, 0]); assert_eq!(rng.get_stream(), 89); // test set_stream with u128 @@ -670,16 +669,13 @@ pub(crate) mod tests { // test word_pos = 16 * block_pos assert_eq!(rng.get_word_pos(), 58392 * 16); - // test set_block_pos with [u8; 4] - rng.set_block_pos([77, 0, 0, 0]); + // test set_block_pos with [u8; 8] + rng.set_block_pos([77, 0, 0, 0, 0, 0, 0, 0]); assert_eq!(rng.get_block_pos(), 77); // test set_word_pos with u64 rng.set_word_pos(8888); assert_eq!(rng.get_word_pos(), 8888); - - // test set_word_pos with [u8; 5] - rng.set_word_pos([55, 0, 0, 0, 0]) } #[cfg(feature = "serde1")] @@ -728,7 +724,7 @@ pub(crate) mod tests { #[cfg(feature = "serde1")] #[test] fn test_chacha_serde_format_stability() { - let j = r#"{"seed":[4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8],"stream":27182818284,"word_pos":3141592653}"#; + let j = r#"{"seed":[4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8],"stream":27182818284,"word_pos":314159265359}"#; let r: ChaChaRng = serde_json::from_str(j).unwrap(); let j1 = serde_json::to_string(&r).unwrap(); assert_eq!(j, j1); @@ -851,9 +847,9 @@ pub(crate) mod tests { assert_eq!(results, expected); assert_eq!(rng3.get_word_pos(), expected_end); - // Test block 2 by using `set_block_pos` and [u8; 4] + // Test block 2 by using `set_block_pos` and [u8; 8] let mut rng4 = ChaChaRng::from_seed(seed); - rng4.set_block_pos([2, 0, 0, 0]); + rng4.set_block_pos([2, 0, 0, 0, 0, 0, 0, 0]); results = [0u32; 16]; for i in results.iter_mut() { *i = rng4.next_u32(); @@ -922,8 +918,9 @@ pub(crate) mod tests { let seed = hex!("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"); let mut rng = ChaChaRng::from_seed(seed); - let stream_id = hex!("000000090000004a00000000"); + let stream_id = hex!("0000004a00000000"); rng.set_stream(stream_id); + rng.set_block_pos(hex!("0000000000000009")); // The test vectors omit the first 64-bytes of the keystream let mut discard_first_64 = [0u8; 64]; @@ -942,6 +939,29 @@ pub(crate) mod tests { assert_eq!(results, expected); } + #[test] + fn test_chacha_nonce_2() { + // Test vector 5 from + // https://tools.ietf.org/html/draft-nir-cfrg-chacha20-poly1305-04 + // Although we do not support setting a nonce, we try it here anyway so + // we can use this test vector. + let seed = [0u8; 32]; + let mut rng = ChaChaRng::from_seed(seed); + // 96-bit nonce in LE order is: 0,0,0,0, 0,0,0,0, 0,0,0,2 + rng.set_stream(2u64 << (24 + 32)); + + let mut results = [0u32; 16]; + for i in results.iter_mut() { + *i = rng.next_u32(); + } + let expected = [ + 0x374dc6c2, 0x3736d58c, 0xb904e24a, 0xcd3f93ef, 0x88228b1a, 0x96a4dfb3, 0x5b76ab72, + 0xc727ee54, 0x0e0e978a, 0xf3145c95, 0x1b748ea8, 0xf786c297, 0x99c28f5f, 0x628314e8, + 0x398a19fa, 0x6ded1b53, + ]; + assert_eq!(results, expected); + } + #[test] fn test_chacha_clone_streams() { let seed = [ @@ -976,7 +996,7 @@ pub(crate) mod tests { use super::{BLOCK_WORDS, BUF_BLOCKS}; let mut rng = ChaChaRng::from_seed(Default::default()); // refilling the buffer in set_word_pos will wrap the block counter to 0 - let last_block = (2u64).pow(36) - u64::from(BUF_BLOCKS * BLOCK_WORDS); + let last_block = (1 << 68) - u128::from(BUF_BLOCKS * BLOCK_WORDS); rng.set_word_pos(last_block); assert_eq!(rng.get_word_pos(), last_block); } @@ -986,7 +1006,7 @@ pub(crate) mod tests { use super::BLOCK_WORDS; let mut rng = ChaChaRng::from_seed(Default::default()); // refilling the buffer in set_word_pos will wrap the block counter past 0 - let last_block = (1 << 36) - u64::from(BLOCK_WORDS); + let last_block = (1 << 68) - u128::from(BLOCK_WORDS); rng.set_word_pos(last_block); assert_eq!(rng.get_word_pos(), last_block); } @@ -1057,7 +1077,7 @@ pub(crate) mod tests { "Failed test at start_word_pos = {},\nfailed index: {:?}\nFailing word_pos = {}", debug_start_word_pos, index, - debug_start_word_pos + (index / 4) as u64 + debug_start_word_pos + (index / 4) as u128 ); } } @@ -1091,7 +1111,7 @@ pub(crate) mod tests { "Failed test at start_word_pos = {},\nfailed index: {:?}\nFailing word_pos = {}", debug_start_word_pos, index, - debug_start_word_pos + (index / 4) as u64 + debug_start_word_pos + (index / 4) as u128 ); } } @@ -1117,33 +1137,98 @@ pub(crate) mod tests { #[test] fn stream_id_endianness() { let mut rng = ChaCha20Rng::from_seed([0u8; 32]); - rng.set_stream([3, 3333, 333333]); - let expected = 2059058063; + rng.set_stream([3, 3333]); + let expected = 1152671828; assert_eq!(rng.next_u32(), expected); rng.set_stream(1234567); - let expected = 1254506509; + let expected = 3110319182; assert_eq!(rng.next_u32(), expected); - rng.set_stream([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]); - let expected = 1391671567; + rng.set_stream([1, 2, 3, 4, 5, 6, 7, 8]); + let expected = 3790367479; assert_eq!(rng.next_u32(), expected); } - /// If this test fails, the backend may be + /// If this test fails, the backend may not be /// performing 64-bit addition. #[test] - fn counter_wrapping() { + fn counter_wrapping_64_bit_counter() { let mut rng = ChaChaRng::from_seed([0u8; 32]); // get first four blocks and word pos let mut first_blocks = [0u8; 64 * 4]; rng.fill_bytes(&mut first_blocks); - let word_pos = rng.get_word_pos(); + let first_blocks_end_word_pos = rng.get_word_pos(); + let first_blocks_end_block_counter = rng.get_block_pos(); // get first four blocks after wrapping - rng.set_block_pos(u32::MAX); + rng.set_block_pos([u32::MAX, u32::MAX]); let mut result = [0u8; 64 * 5]; rng.fill_bytes(&mut result); - assert_eq!(word_pos, rng.get_word_pos()); + assert_eq!(first_blocks_end_word_pos, rng.get_word_pos()); + assert_eq!(first_blocks_end_block_counter, rng.get_block_pos() - 3); + + if first_blocks[0..64 * 4].ne(&result[64..]) { + for (i, (a, b)) in first_blocks.iter().zip(result.iter().skip(64)).enumerate() { + if a.ne(b) { + panic!("i = {}\na = {}\nb = {}", i, a, b); + } + } + } assert_eq!(&first_blocks[0..64 * 4], &result[64..]); } + + /// If this test fails, the backend may be doing + /// 32-bit addition. + #[test] + fn counter_not_wrapping_at_32_bits() { + let mut rng = ChaChaRng::from_seed([0u8; 32]); + + // get first four blocks and word pos + let mut first_blocks = [0u8; 64 * 4]; + rng.fill_bytes(&mut first_blocks); + let first_blocks_end_word_pos = rng.get_word_pos(); + + // get first four blocks after the supposed overflow + rng.set_block_pos(u32::MAX as u64); + let mut result = [0u8; 64 * 5]; + rng.fill_bytes(&mut result); + assert_ne!(first_blocks_end_word_pos, rng.get_word_pos()); + assert_eq!( + rng.get_word_pos(), + first_blocks_end_word_pos + (1 << 32) * BLOCK_WORDS as u128 + ); + assert_ne!(&first_blocks[0..64 * 4], &result[64..]); + } + + /// Test vector 8 from https://github.com/pyca/cryptography/blob/main/vectors/cryptography_vectors/ciphers/ChaCha20/counter-overflow.txt + #[test] + fn counter_overflow_1() { + let mut rng = ChaCha20Rng::from_seed([0u8; 32]); + let block_pos = 4294967295; + assert_eq!(block_pos, u32::MAX as u64); + rng.set_block_pos(4294967295); + + let mut output = [0u8; 64 * 3]; + rng.fill_bytes(&mut output); + let expected = hex!( + "ace4cd09e294d1912d4ad205d06f95d9c2f2bfcf453e8753f128765b62215f4d92c74f2f626c6a640c0b1284d839ec81f1696281dafc3e684593937023b58b1d3db41d3aa0d329285de6f225e6e24bd59c9a17006943d5c9b680e3873bdc683a5819469899989690c281cd17c96159af0682b5b903468a61f50228cf09622b5a46f0f6efee15c8f1b198cb49d92b990867905159440cc723916dc0012826981039ce1766aa2542b05db3bd809ab142489d5dbfe1273e7399637b4b3213768aaa" + ); + assert_eq!(expected, output); + } + + /// Test vector 9 from https://github.com/pyca/cryptography/blob/main/vectors/cryptography_vectors/ciphers/ChaCha20/counter-overflow.txt + #[test] + fn counter_wrap_1() { + let mut rng = ChaCha20Rng::from_seed([0u8; 32]); + let block_pos = 18446744073709551615; + assert_eq!(block_pos, u64::MAX); + rng.set_block_pos(block_pos); + + let mut output = [0u8; 64 * 3]; + rng.fill_bytes(&mut output); + let expected = hex!( + "d7918cd8620cf832532652c04c01a553092cfb32e7b3f2f5467ae9674a2e9eec17368ec8027a357c0c51e6ea747121fec45284be0f099d2b3328845607b1768976b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee65869f07e7be5551387a98ba977c732d080dcb0f29a048e3656912c6533e32ee7aed29b721769ce64e43d57133b074d839d531ed1f28510afb45ace10a1f4b794d6f" + ); + assert_eq!(expected, output); + } } diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index 58043a75..9bb0e7a0 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -1,25 +1,82 @@ -//! Distinguishing features of ChaCha variants. -//! -//! To be revisited for the 64-bit counter. - -/// A trait that distinguishes some ChaCha variants -pub trait Variant: Clone { - /// the size of the Nonce in u32s - const NONCE_INDEX: usize; +//! ChaCha variant-specific configurations. + +mod sealed { + pub trait Sealed {} } -#[derive(Clone)] -/// The details pertaining to the IETF variant -pub struct Ietf(); +/// A trait that distinguishes some ChaCha variants. Contains configurations +/// for "Legacy" DJB variant and the IETF variant. +pub trait Variant: sealed::Sealed { + /// The counter's type. + #[cfg(not(feature = "cipher"))] + type Counter; + + /// The counter's type. + #[cfg(feature = "cipher")] + type Counter: cipher::StreamCipherCounter; + + /// Takes a slice of `state[12..NONCE_INDEX]` to convert it into + /// `Self::Counter`. + fn get_block_pos(row: &[u32]) -> Self::Counter; + + /// Breaks down the `Self::Counter` type into a u32 array for setting the + /// block pos. + fn set_block_pos(row: &mut [u32], pos: Self::Counter); + + /// A helper method for calculating the remaining blocks using these types + fn remaining_blocks(block_pos: Self::Counter) -> Option; +} + +/// IETF ChaCha configuration to use a 32-bit counter and 96-bit nonce. +pub enum Ietf {} + +impl sealed::Sealed for Ietf {} + impl Variant for Ietf { - const NONCE_INDEX: usize = 13; + type Counter = u32; + + #[inline(always)] + fn get_block_pos(row: &[u32]) -> u32 { + row[0] + } + + #[inline(always)] + fn set_block_pos(row: &mut [u32], pos: u32) { + row[0] = pos; + } + + #[inline(always)] + fn remaining_blocks(block_pos: u32) -> Option { + let remaining = u32::MAX - block_pos; + remaining.try_into().ok() + } } -#[derive(Clone)] +/// DJB variant specific features: 64-bit counter and 64-bit nonce. +#[cfg(feature = "legacy")] +pub enum Legacy {} + #[cfg(feature = "legacy")] -pub struct Legacy(); +impl sealed::Sealed for Legacy {} #[cfg(feature = "legacy")] impl Variant for Legacy { - const NONCE_INDEX: usize = 14; + type Counter = u64; + + #[inline(always)] + fn get_block_pos(row: &[u32]) -> u64 { + (u64::from(row[1]) << 32) | u64::from(row[0]) + } + + #[inline(always)] + fn set_block_pos(row: &mut [u32], pos: u64) { + row[0] = (pos & 0xFFFF_FFFF).try_into().unwrap(); + row[1] = (pos >> 32).try_into().unwrap(); + } + + #[inline(always)] + fn remaining_blocks(block_pos: u64) -> Option { + let remaining = u64::MAX - block_pos; + remaining.try_into().ok() + } }