Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions scrypt/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ edition = "2024"
rust-version = "1.85"

[dependencies]
cfg-if = "1.0"
pbkdf2 = { version = "0.13.0-rc.0", path = "../pbkdf2" }
salsa20 = { version = "0.11.0-rc.0", default-features = false }
sha2 = { version = "0.11.0-rc.0", default-features = false }
Expand Down
68 changes: 68 additions & 0 deletions scrypt/src/block_mix/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#[cfg(any(
test,
not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse2"
),
all(target_arch = "aarch64", target_feature = "neon"),
all(target_arch = "wasm32", target_feature = "simd128"),
))
))]
mod soft;

cfg_if::cfg_if! {
if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
mod pivot;
mod neon;
pub(crate) use neon::{scrypt_block_mix, shuffle_in, shuffle_out};
} else if #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] {
mod pivot;
mod simd128;
pub(crate) use simd128::{scrypt_block_mix, shuffle_in, shuffle_out};
} else if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))] {
mod pivot;
mod sse2;
pub(crate) use sse2::{scrypt_block_mix, shuffle_in, shuffle_out};
} else {
pub(crate) use soft::scrypt_block_mix;

pub(crate) fn shuffle_in(_input: &mut [u8]) {}
pub(crate) fn shuffle_out(_input: &mut [u8]) {}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_scrypt_block_mix_abcd_against_soft() {
let mut input: [u8; 128] = core::array::from_fn(|i| i as u8);
for _round in 0..10 {
let mut output = [0u8; 128];

let mut expected0 = [0u8; 128];
let mut expected1 = [0u8; 128]; // check shuffle_out is a correct inverse of shuffle_in
soft::scrypt_block_mix(&input, &mut expected0);
shuffle_in(&mut input);
scrypt_block_mix(&input, &mut output);
shuffle_out(&mut input);
soft::scrypt_block_mix(&input, &mut expected1);
shuffle_out(&mut output);
assert_eq!(
expected0, expected1,
"expected0 != expected1, shuffle_out is not a correct inverse of shuffle_in?"
);
assert_eq!(
output, expected0,
"output != expected0, scrypt_block_mix is not correct?"
);

input
.iter_mut()
.zip(output.iter())
.for_each(|(a, b)| *a = a.wrapping_add(*b));
}
}
}
88 changes: 88 additions & 0 deletions scrypt/src/block_mix/neon.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
use crate::block_mix::pivot::{INVERSE_PIVOT_ABCD, PIVOT_ABCD};

pub(crate) fn shuffle_in(b: &mut [u8]) {
for chunk in b.chunks_exact_mut(64) {
let mut t = [0u32; 16];
for (c, b) in chunk.chunks_exact(4).zip(t.iter_mut()) {
*b = u32::from_ne_bytes(c.try_into().unwrap());
}
chunk.chunks_exact_mut(4).enumerate().for_each(|(i, b)| {
b.copy_from_slice(&t[PIVOT_ABCD[i]].to_ne_bytes());
});
}
}

pub(crate) fn shuffle_out(b: &mut [u8]) {
for chunk in b.chunks_exact_mut(64) {
let mut t = [0u32; 16];
for (c, b) in chunk.chunks_exact(4).zip(t.iter_mut()) {
*b = u32::from_ne_bytes(c.try_into().unwrap());
}
chunk.chunks_exact_mut(4).enumerate().for_each(|(i, b)| {
b.copy_from_slice(&t[INVERSE_PIVOT_ABCD[i]].to_ne_bytes());
});
}
}

pub(crate) fn scrypt_block_mix(input: &[u8], output: &mut [u8]) {
use core::arch::aarch64::*;

macro_rules! vrol_u32 {
($w:expr, $amt:literal) => {{
let w = $w;
vsraq_n_u32(vshlq_n_u32(w, $amt), w, 32 - $amt)
}};
}

let last_block = &input[input.len() - 64..];

let mut a = unsafe { vld1q_u32(last_block.as_ptr().cast()) };
let mut b = unsafe { vld1q_u32(last_block.as_ptr().add(16).cast()) };
let mut c = unsafe { vld1q_u32(last_block.as_ptr().add(32).cast()) };
let mut d = unsafe { vld1q_u32(last_block.as_ptr().add(48).cast()) };

for (i, chunk) in input.chunks(64).enumerate() {
let pos = if i % 2 == 0 {
(i / 2) * 64
} else {
(i / 2) * 64 + input.len() / 2
};

unsafe {
let chunk_a = vld1q_u32(chunk.as_ptr().cast());
let chunk_b = vld1q_u32(chunk.as_ptr().add(16).cast());
let chunk_c = vld1q_u32(chunk.as_ptr().add(32).cast());
let chunk_d = vld1q_u32(chunk.as_ptr().add(48).cast());

a = veorq_u32(a, chunk_a);
b = veorq_u32(b, chunk_b);
c = veorq_u32(c, chunk_c);
d = veorq_u32(d, chunk_d);

let saves = [a, b, c, d];

for _ in 0..8 {
b = veorq_u32(b, vrol_u32!(vaddq_u32(a, d), 7));
c = veorq_u32(c, vrol_u32!(vaddq_u32(b, a), 9));
d = veorq_u32(d, vrol_u32!(vaddq_u32(c, b), 13));
a = veorq_u32(a, vrol_u32!(vaddq_u32(d, c), 18));

d = vextq_u32(d, d, 1);
c = vextq_u32(c, c, 2);
b = vextq_u32(b, b, 3);

(b, d) = (d, b);
}

a = vaddq_u32(a, saves[0]);
b = vaddq_u32(b, saves[1]);
c = vaddq_u32(c, saves[2]);
d = vaddq_u32(d, saves[3]);

vst1q_u32(output.as_mut_ptr().add(pos).cast(), a);
vst1q_u32(output.as_mut_ptr().add(pos + 16).cast(), b);
vst1q_u32(output.as_mut_ptr().add(pos + 32).cast(), c);
vst1q_u32(output.as_mut_ptr().add(pos + 48).cast(), d);
}
}
}
20 changes: 20 additions & 0 deletions scrypt/src/block_mix/pivot.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/// Permute Salsa20 block to column major order
pub(crate) const PIVOT_ABCD: [usize; 16] = [0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11];

/// Inverse of PIVOT_ABCD
pub(crate) const INVERSE_PIVOT_ABCD: [usize; 16] = const {
let mut index = [0; 16];
let mut i = 0;
while i < 16 {
let mut inverse = 0;
while inverse < 16 {
if PIVOT_ABCD[inverse] == i {
index[i] = inverse;
break;
}
inverse += 1;
}
i += 1;
}
index
};
88 changes: 88 additions & 0 deletions scrypt/src/block_mix/simd128.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
use crate::block_mix::pivot::{INVERSE_PIVOT_ABCD, PIVOT_ABCD};

pub(crate) fn shuffle_in(b: &mut [u8]) {
for chunk in b.chunks_exact_mut(64) {
let mut t = [0u32; 16];
for (c, b) in chunk.chunks_exact(4).zip(t.iter_mut()) {
*b = u32::from_ne_bytes(c.try_into().unwrap());
}
chunk.chunks_exact_mut(4).enumerate().for_each(|(i, b)| {
b.copy_from_slice(&t[PIVOT_ABCD[i]].to_ne_bytes());
});
}
}

pub(crate) fn shuffle_out(b: &mut [u8]) {
for chunk in b.chunks_exact_mut(64) {
let mut t = [0u32; 16];
for (c, b) in chunk.chunks_exact(4).zip(t.iter_mut()) {
*b = u32::from_ne_bytes(c.try_into().unwrap());
}
chunk.chunks_exact_mut(4).enumerate().for_each(|(i, b)| {
b.copy_from_slice(&t[INVERSE_PIVOT_ABCD[i]].to_ne_bytes());
});
}
}

pub(crate) fn scrypt_block_mix(input: &[u8], output: &mut [u8]) {
use core::arch::wasm32::*;

macro_rules! u32x4_rol {
($w:expr, $amt:literal) => {{
let w = $w;
v128_or(u32x4_shl(w, $amt), u32x4_shr(w, 32 - $amt))
}};
}

let last_block = &input[input.len() - 64..];

let mut a = unsafe { v128_load(last_block.as_ptr().cast()) };
let mut b = unsafe { v128_load(last_block.as_ptr().add(16).cast()) };
let mut c = unsafe { v128_load(last_block.as_ptr().add(32).cast()) };
let mut d = unsafe { v128_load(last_block.as_ptr().add(48).cast()) };

for (i, chunk) in input.chunks(64).enumerate() {
let pos = if i % 2 == 0 {
(i / 2) * 64
} else {
(i / 2) * 64 + input.len() / 2
};

unsafe {
let chunk_a = v128_load(chunk.as_ptr().cast());
let chunk_b = v128_load(chunk.as_ptr().add(16).cast());
let chunk_c = v128_load(chunk.as_ptr().add(32).cast());
let chunk_d = v128_load(chunk.as_ptr().add(48).cast());

a = v128_xor(a, chunk_a);
b = v128_xor(b, chunk_b);
c = v128_xor(c, chunk_c);
d = v128_xor(d, chunk_d);

let saves = [a, b, c, d];

for _ in 0..8 {
b = v128_xor(b, u32x4_rol!(u32x4_add(a, d), 7));
c = v128_xor(c, u32x4_rol!(u32x4_add(b, a), 9));
d = v128_xor(d, u32x4_rol!(u32x4_add(c, b), 13));
a = v128_xor(a, u32x4_rol!(u32x4_add(d, c), 18));

d = i32x4_shuffle::<1, 2, 3, 0>(d, d);
c = i32x4_shuffle::<2, 3, 0, 1>(c, c);
b = i32x4_shuffle::<3, 0, 1, 2>(b, b);

(b, d) = (d, b);
}

a = u32x4_add(a, saves[0]);
b = u32x4_add(b, saves[1]);
c = u32x4_add(c, saves[2]);
d = u32x4_add(d, saves[3]);

v128_store(output.as_mut_ptr().add(pos).cast(), a);
v128_store(output.as_mut_ptr().add(pos + 16).cast(), b);
v128_store(output.as_mut_ptr().add(pos + 32).cast(), c);
v128_store(output.as_mut_ptr().add(pos + 48).cast(), d);
}
}
}
42 changes: 42 additions & 0 deletions scrypt/src/block_mix/soft.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/// Execute the BlockMix operation
/// input - the input vector. The length must be a multiple of 128.
/// output - the output vector. Must be the same length as input.
pub(crate) fn scrypt_block_mix(input: &[u8], output: &mut [u8]) {
use salsa20::{
SalsaCore,
cipher::{StreamCipherCore, typenum::U4},
};

type Salsa20_8 = SalsaCore<U4>;

let mut x = [0u8; 64];
x.copy_from_slice(&input[input.len() - 64..]);

let mut t = [0u8; 64];

for (i, chunk) in input.chunks(64).enumerate() {
xor(&x, chunk, &mut t);

let mut t2 = [0u32; 16];

for (c, b) in t.chunks_exact(4).zip(t2.iter_mut()) {
*b = u32::from_le_bytes(c.try_into().unwrap());
}

Salsa20_8::from_raw_state(t2).write_keystream_block((&mut x).into());

let pos = if i % 2 == 0 {
(i / 2) * 64
} else {
(i / 2) * 64 + input.len() / 2
};

output[pos..pos + 64].copy_from_slice(&x);
}
}

fn xor(x: &[u8], y: &[u8], output: &mut [u8]) {
for ((out, &x_i), &y_i) in output.iter_mut().zip(x.iter()).zip(y.iter()) {
*out = x_i ^ y_i;
}
}
Loading