diff --git a/.github/workflows/aes.yml b/.github/workflows/aes.yml index 6ff77005..c48bae6e 100644 --- a/.github/workflows/aes.yml +++ b/.github/workflows/aes.yml @@ -16,6 +16,9 @@ defaults: env: CARGO_INCREMENTAL: 0 RUSTFLAGS: "-Dwarnings" + # NOTE: The mirror number changes with each version so keep these in sync + SDE_FULL_VERSION_MIRROR: "859732" + SDE_FULL_VERSION: "9.58.0-2025-06-16" # Cancels CI jobs when new commits are pushed to a PR branch concurrency: @@ -102,6 +105,85 @@ jobs: - run: cargo test --target ${{ matrix.target }} --features hazmat - run: cargo test --target ${{ matrix.target }} --all-features + # Tests for the VAES AVX backend + vaes256: + runs-on: ubuntu-latest + strategy: + matrix: + include: + - target: x86_64-unknown-linux-gnu + rust: stable + RUSTFLAGS: "-Dwarnings --cfg aes_avx256" + env: + CARGO_INCREMENTAL: 0 + RUSTFLAGS: ${{ matrix.RUSTFLAGS }} + steps: + - uses: actions/checkout@v4 + - name: Install Intel SDE + run: | + curl -JLO "https://downloadmirror.intel.com/${{ env.SDE_FULL_VERSION_MIRROR }}/sde-external-${{ env.SDE_FULL_VERSION }}-lin.tar.xz" + tar xvf sde-external-${{ env.SDE_FULL_VERSION }}-lin.tar.xz -C /opt + echo "/opt/sde-external-${{ env.SDE_FULL_VERSION }}-lin" >> $GITHUB_PATH + - uses: RustCrypto/actions/cargo-cache@master + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + targets: ${{ matrix.target }} + # NOTE: Write a `.cargo/config.toml` to configure the target for VAES + # NOTE: We use intel-sde as the runner since not all GitHub CI hosts support AVX512 + - name: write .cargo/config.toml + shell: bash + run: | + cd ../aes/.. + mkdir -p .cargo + echo '[target.${{ matrix.target }}]' > .cargo/config.toml + echo 'runner = "sde64 -future --"' >> .cargo/config.toml + - run: ${{ matrix.deps }} + - run: cargo test --target ${{ matrix.target }} + - run: cargo test --target ${{ matrix.target }} --features hazmat + - run: cargo test --target ${{ matrix.target }} --all-features + + # Tests for the VAES AVX512 backend + vaes512: + runs-on: ubuntu-latest + strategy: + matrix: + include: + - target: x86_64-unknown-linux-gnu + rust: stable + RUSTFLAGS: "-Dwarnings --cfg aes_avx512" + - target: x86_64-unknown-linux-gnu + rust: stable + RUSTFLAGS: "-Dwarnings --cfg aes_avx256 --cfg aes_avx512" + env: + CARGO_INCREMENTAL: 0 + RUSTFLAGS: ${{ matrix.RUSTFLAGS }} + steps: + - uses: actions/checkout@v4 + - name: Install Intel SDE + run: | + curl -JLO "https://downloadmirror.intel.com/${{ env.SDE_FULL_VERSION_MIRROR }}/sde-external-${{ env.SDE_FULL_VERSION }}-lin.tar.xz" + tar xvf sde-external-${{ env.SDE_FULL_VERSION }}-lin.tar.xz -C /opt + echo "/opt/sde-external-${{ env.SDE_FULL_VERSION }}-lin" >> $GITHUB_PATH + - uses: RustCrypto/actions/cargo-cache@master + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + targets: ${{ matrix.target }} + # NOTE: Write a `.cargo/config.toml` to configure the target for VAES + # NOTE: We use intel-sde as the runner since not all GitHub CI hosts support AVX512 + - name: write .cargo/config.toml + shell: bash + run: | + cd ../aes/.. + mkdir -p .cargo + echo '[target.${{ matrix.target }}]' > .cargo/config.toml + echo 'runner = "sde64 -future --"' >> .cargo/config.toml + - run: ${{ matrix.deps }} + - run: cargo test --target ${{ matrix.target }} + - run: cargo test --target ${{ matrix.target }} --features hazmat + - run: cargo test --target ${{ matrix.target }} --all-features + # Tests for CPU feature autodetection with fallback to portable software implementation autodetect: runs-on: ubuntu-latest @@ -165,7 +247,6 @@ jobs: - run: cargo test --target ${{ matrix.target }} - run: cargo test --target ${{ matrix.target }} --all-features - # Cross-compiled tests cross: strategy: diff --git a/aes/Cargo.toml b/aes/Cargo.toml index 2818b4b9..f6af59b2 100644 --- a/aes/Cargo.toml +++ b/aes/Cargo.toml @@ -31,7 +31,7 @@ hazmat = [] # Expose cryptographically hazardous APIs [lints.rust.unexpected_cfgs] level = "warn" -check-cfg = ["cfg(aes_compact)", "cfg(aes_force_soft)"] +check-cfg = ["cfg(aes_compact)", "cfg(aes_force_soft)", "cfg(aes_avx256)", "cfg(aes_avx512)"] [package.metadata.docs.rs] all-features = true diff --git a/aes/src/armv8.rs b/aes/src/armv8.rs index 1a79fa25..995e6bae 100644 --- a/aes/src/armv8.rs +++ b/aes/src/armv8.rs @@ -25,6 +25,13 @@ use cipher::{ }; use core::fmt; +pub(crate) mod features { + cpufeatures::new!(features_aes, "aes"); + pub(crate) mod aes { + pub use super::features_aes::*; + } +} + impl_backends!( enc_name = Aes128BackEnc, dec_name = Aes128BackDec, @@ -86,18 +93,6 @@ macro_rules! define_aes_impl { decrypt: $name_back_dec, } - impl $name { - #[inline(always)] - pub(crate) fn get_enc_backend(&self) -> &$name_back_enc { - &self.encrypt - } - - #[inline(always)] - pub(crate) fn get_dec_backend(&self) -> &$name_back_dec { - &self.decrypt - } - } - impl KeySizeUser for $name { type KeySize = $key_size; } @@ -182,13 +177,6 @@ macro_rules! define_aes_impl { backend: $name_back_enc, } - impl $name_enc { - #[inline(always)] - pub(crate) fn get_enc_backend(&self) -> &$name_back_enc { - &self.backend - } - } - impl KeySizeUser for $name_enc { type KeySize = $key_size; } @@ -248,13 +236,6 @@ macro_rules! define_aes_impl { backend: $name_back_dec, } - impl $name_dec { - #[inline(always)] - pub(crate) fn get_dec_backend(&self) -> &$name_back_dec { - &self.backend - } - } - impl KeySizeUser for $name_dec { type KeySize = $key_size; } diff --git a/aes/src/autodetect.rs b/aes/src/autodetect.rs index 802680ab..3f8a2a42 100644 --- a/aes/src/autodetect.rs +++ b/aes/src/autodetect.rs @@ -12,12 +12,10 @@ use core::fmt; use core::mem::ManuallyDrop; #[cfg(target_arch = "aarch64")] -use crate::armv8 as intrinsics; +use crate::armv8 as arch; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] -use crate::ni as intrinsics; - -cpufeatures::new!(aes_intrinsics, "aes"); +use crate::x86 as arch; macro_rules! define_aes_impl { ( @@ -29,21 +27,21 @@ macro_rules! define_aes_impl { doc = $doc:expr, ) => { mod $module { - use super::{intrinsics, soft}; + use super::{arch, soft}; use core::mem::ManuallyDrop; pub(super) union Inner { - pub(super) intrinsics: ManuallyDrop, + pub(super) arch: ManuallyDrop, pub(super) soft: ManuallyDrop, } pub(super) union InnerEnc { - pub(super) intrinsics: ManuallyDrop, + pub(super) arch: ManuallyDrop, pub(super) soft: ManuallyDrop, } pub(super) union InnerDec { - pub(super) intrinsics: ManuallyDrop, + pub(super) arch: ManuallyDrop, pub(super) soft: ManuallyDrop, } } @@ -52,7 +50,7 @@ macro_rules! define_aes_impl { #[doc = "block cipher"] pub struct $name { inner: $module::Inner, - token: aes_intrinsics::InitToken, + token: arch::features::aes::InitToken, } impl KeySizeUser for $name { @@ -70,9 +68,7 @@ macro_rules! define_aes_impl { use core::ops::Deref; let inner = if enc.token.get() { $module::Inner { - intrinsics: ManuallyDrop::new(unsafe { - enc.inner.intrinsics.deref().into() - }), + arch: ManuallyDrop::new(unsafe { enc.inner.arch.deref().into() }), } } else { $module::Inner { @@ -90,11 +86,11 @@ macro_rules! define_aes_impl { impl KeyInit for $name { #[inline] fn new(key: &Key) -> Self { - let (token, aesni_present) = aes_intrinsics::init_get(); + let (token, aes_features) = arch::features::aes::init_get(); - let inner = if aesni_present { + let inner = if aes_features { $module::Inner { - intrinsics: ManuallyDrop::new(intrinsics::$name::new(key)), + arch: ManuallyDrop::new(arch::$name::new(key)), } } else { $module::Inner { @@ -115,7 +111,7 @@ macro_rules! define_aes_impl { fn clone(&self) -> Self { let inner = if self.token.get() { $module::Inner { - intrinsics: unsafe { self.inner.intrinsics.clone() }, + arch: unsafe { self.inner.arch.clone() }, } } else { $module::Inner { @@ -136,38 +132,20 @@ macro_rules! define_aes_impl { impl BlockCipherEncrypt for $name { fn encrypt_with_backend(&self, f: impl BlockCipherEncClosure) { - unsafe { - if self.token.get() { - #[target_feature(enable = "aes")] - unsafe fn inner( - state: &intrinsics::$name, - f: impl BlockCipherEncClosure, - ) { - f.call(state.get_enc_backend()); - } - inner(&self.inner.intrinsics, f); - } else { - f.call(&self.inner.soft.get_enc_backend()); - } + if self.token.get() { + unsafe { &self.inner.arch }.encrypt_with_backend(f) + } else { + unsafe { &self.inner.soft }.encrypt_with_backend(f) } } } impl BlockCipherDecrypt for $name { fn decrypt_with_backend(&self, f: impl BlockCipherDecClosure) { - unsafe { - if self.token.get() { - #[target_feature(enable = "aes")] - unsafe fn inner( - state: &intrinsics::$name, - f: impl BlockCipherDecClosure, - ) { - f.call(state.get_dec_backend()); - } - inner(&self.inner.intrinsics, f); - } else { - f.call(&self.inner.soft.get_dec_backend()); - } + if self.token.get() { + unsafe { &self.inner.arch }.decrypt_with_backend(f) + } else { + unsafe { &self.inner.soft }.decrypt_with_backend(f) } } } @@ -188,7 +166,7 @@ macro_rules! define_aes_impl { #[inline] fn drop(&mut self) { if self.token.get() { - unsafe { ManuallyDrop::drop(&mut self.inner.intrinsics) }; + unsafe { ManuallyDrop::drop(&mut self.inner.arch) }; } else { unsafe { ManuallyDrop::drop(&mut self.inner.soft) }; }; @@ -202,7 +180,7 @@ macro_rules! define_aes_impl { #[doc = "block cipher (encrypt-only)"] pub struct $name_enc { inner: $module::InnerEnc, - token: aes_intrinsics::InitToken, + token: arch::features::aes::InitToken, } impl KeySizeUser for $name_enc { @@ -212,11 +190,11 @@ macro_rules! define_aes_impl { impl KeyInit for $name_enc { #[inline] fn new(key: &Key) -> Self { - let (token, aesni_present) = aes_intrinsics::init_get(); + let (token, aes_features) = arch::features::aes::init_get(); - let inner = if aesni_present { + let inner = if aes_features { $module::InnerEnc { - intrinsics: ManuallyDrop::new(intrinsics::$name_enc::new(key)), + arch: ManuallyDrop::new(arch::$name_enc::new(key)), } } else { $module::InnerEnc { @@ -237,7 +215,7 @@ macro_rules! define_aes_impl { fn clone(&self) -> Self { let inner = if self.token.get() { $module::InnerEnc { - intrinsics: unsafe { self.inner.intrinsics.clone() }, + arch: unsafe { self.inner.arch.clone() }, } } else { $module::InnerEnc { @@ -258,19 +236,10 @@ macro_rules! define_aes_impl { impl BlockCipherEncrypt for $name_enc { fn encrypt_with_backend(&self, f: impl BlockCipherEncClosure) { - unsafe { - if self.token.get() { - #[target_feature(enable = "aes")] - unsafe fn inner( - state: &intrinsics::$name_enc, - f: impl BlockCipherEncClosure, - ) { - f.call(state.get_enc_backend()); - } - inner(&self.inner.intrinsics, f); - } else { - f.call(&self.inner.soft.get_enc_backend()); - } + if self.token.get() { + unsafe { &self.inner.arch }.encrypt_with_backend(f) + } else { + unsafe { &self.inner.soft }.encrypt_with_backend(f) } } } @@ -291,7 +260,7 @@ macro_rules! define_aes_impl { #[inline] fn drop(&mut self) { if self.token.get() { - unsafe { ManuallyDrop::drop(&mut self.inner.intrinsics) }; + unsafe { ManuallyDrop::drop(&mut self.inner.arch) }; } else { unsafe { ManuallyDrop::drop(&mut self.inner.soft) }; }; @@ -305,7 +274,7 @@ macro_rules! define_aes_impl { #[doc = "block cipher (decrypt-only)"] pub struct $name_dec { inner: $module::InnerDec, - token: aes_intrinsics::InitToken, + token: arch::features::aes::InitToken, } impl KeySizeUser for $name_dec { @@ -324,9 +293,7 @@ macro_rules! define_aes_impl { use core::ops::Deref; let inner = if enc.token.get() { $module::InnerDec { - intrinsics: ManuallyDrop::new(unsafe { - enc.inner.intrinsics.deref().into() - }), + arch: ManuallyDrop::new(unsafe { enc.inner.arch.deref().into() }), } } else { $module::InnerDec { @@ -344,11 +311,11 @@ macro_rules! define_aes_impl { impl KeyInit for $name_dec { #[inline] fn new(key: &Key) -> Self { - let (token, aesni_present) = aes_intrinsics::init_get(); + let (token, aes_features) = arch::features::aes::init_get(); - let inner = if aesni_present { + let inner = if aes_features { $module::InnerDec { - intrinsics: ManuallyDrop::new(intrinsics::$name_dec::new(key)), + arch: ManuallyDrop::new(arch::$name_dec::new(key)), } } else { $module::InnerDec { @@ -369,7 +336,7 @@ macro_rules! define_aes_impl { fn clone(&self) -> Self { let inner = if self.token.get() { $module::InnerDec { - intrinsics: unsafe { self.inner.intrinsics.clone() }, + arch: unsafe { self.inner.arch.clone() }, } } else { $module::InnerDec { @@ -390,19 +357,10 @@ macro_rules! define_aes_impl { impl BlockCipherDecrypt for $name_dec { fn decrypt_with_backend(&self, f: impl BlockCipherDecClosure) { - unsafe { - if self.token.get() { - #[target_feature(enable = "aes")] - unsafe fn inner( - state: &intrinsics::$name_dec, - f: impl BlockCipherDecClosure, - ) { - f.call(state.get_dec_backend()); - } - inner(&self.inner.intrinsics, f); - } else { - f.call(&self.inner.soft.get_dec_backend()); - } + if self.token.get() { + unsafe { &self.inner.arch }.decrypt_with_backend(f) + } else { + unsafe { &self.inner.soft }.decrypt_with_backend(f) } } } @@ -423,7 +381,7 @@ macro_rules! define_aes_impl { #[inline] fn drop(&mut self) { if self.token.get() { - unsafe { ManuallyDrop::drop(&mut self.inner.intrinsics) }; + unsafe { ManuallyDrop::drop(&mut self.inner.arch) }; } else { unsafe { ManuallyDrop::drop(&mut self.inner.soft) }; }; diff --git a/aes/src/hazmat.rs b/aes/src/hazmat.rs index 068cbd40..39631f83 100644 --- a/aes/src/hazmat.rs +++ b/aes/src/hazmat.rs @@ -21,7 +21,7 @@ pub type Block8 = cipher::array::Array; use crate::armv8::hazmat as intrinsics; #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(aes_force_soft)))] -use crate::ni::hazmat as intrinsics; +use crate::x86::ni::hazmat as intrinsics; #[cfg(all( any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"), diff --git a/aes/src/lib.rs b/aes/src/lib.rs index 01139578..01903b7f 100644 --- a/aes/src/lib.rs +++ b/aes/src/lib.rs @@ -35,15 +35,21 @@ //! runtime. On other platforms the `aes` target feature must be enabled via //! RUSTFLAGS. //! -//! ## `x86`/`x86_64` intrinsics (AES-NI) +//! ## `x86`/`x86_64` intrinsics (AES-NI and VAES) //! By default this crate uses runtime detection on `i686`/`x86_64` targets -//! in order to determine if AES-NI is available, and if it is not, it will -//! fallback to using a constant-time software implementation. +//! in order to determine if AES-NI and VAES are available, and if they are +//! not, it will fallback to using a constant-time software implementation. +//! +//! Passing `RUSTFLAGS=-Ctarget-feature=+aes,+ssse3` explicitly at +//! compile-time will override runtime detection and ensure that AES-NI is +//! used or passing `RUSTFLAGS=-Ctarget-feature=+aes,+avx512f,+ssse3,+vaes` +//! will ensure that AESNI and VAES are always used. +//! +//! Note: Enabling VAES256 or VAES512 still requires specifying `--cfg +//! aes_avx256` or `--cfg aes_avx512` explicitly. //! -//! Passing `RUSTFLAGS=-C target-feature=+aes,+ssse3` explicitly at compile-time -//! will override runtime detection and ensure that AES-NI is always used. //! Programs built in this manner will crash with an illegal instruction on -//! CPUs which do not have AES-NI enabled. +//! CPUs which do not have AES-NI and VAES enabled. //! //! Note: runtime detection is not possible on SGX targets. Please use the //! aforementioned `RUSTFLAGS` to leverage AES-NI and VAES on these targets. @@ -134,8 +140,8 @@ cfg_if! { any(target_arch = "x86", target_arch = "x86_64"), not(aes_force_soft) ))] { + mod x86; mod autodetect; - mod ni; pub use autodetect::*; } else { pub use soft::*; @@ -216,19 +222,19 @@ mod tests { #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(aes_force_soft)))] { - use super::ni; + use super::x86; cpufeatures::new!(aes_intrinsics, "aes"); if aes_intrinsics::get() { - test_for(ni::Aes128::new(&key_128)); - test_for(ni::Aes128Enc::new(&key_128)); - test_for(ni::Aes128Dec::new(&key_128)); - test_for(ni::Aes192::new(&key_192)); - test_for(ni::Aes192Enc::new(&key_192)); - test_for(ni::Aes192Dec::new(&key_192)); - test_for(ni::Aes256::new(&key_256)); - test_for(ni::Aes256Enc::new(&key_256)); - test_for(ni::Aes256Dec::new(&key_256)); + test_for(x86::Aes128::new(&key_128)); + test_for(x86::Aes128Enc::new(&key_128)); + test_for(x86::Aes128Dec::new(&key_128)); + test_for(x86::Aes192::new(&key_192)); + test_for(x86::Aes192Enc::new(&key_192)); + test_for(x86::Aes192Dec::new(&key_192)); + test_for(x86::Aes256::new(&key_256)); + test_for(x86::Aes256Enc::new(&key_256)); + test_for(x86::Aes256Dec::new(&key_256)); } } diff --git a/aes/src/soft.rs b/aes/src/soft.rs index f8d60617..9e3fe2be 100644 --- a/aes/src/soft.rs +++ b/aes/src/soft.rs @@ -45,18 +45,6 @@ macro_rules! define_aes_impl { keys: $fixslice_keys, } - impl $name { - #[inline(always)] - pub(crate) fn get_enc_backend(&self) -> $name_back_enc<'_> { - $name_back_enc(self) - } - - #[inline(always)] - pub(crate) fn get_dec_backend(&self) -> $name_back_dec<'_> { - $name_back_dec(self) - } - } - impl KeySizeUser for $name { type KeySize = $key_size; } @@ -81,13 +69,13 @@ macro_rules! define_aes_impl { impl BlockCipherEncrypt for $name { fn encrypt_with_backend(&self, f: impl BlockCipherEncClosure) { - f.call(&self.get_enc_backend()) + f.call(&$name_back_enc(self)) } } impl BlockCipherDecrypt for $name { fn decrypt_with_backend(&self, f: impl BlockCipherDecClosure) { - f.call(&self.get_dec_backend()) + f.call(&$name_back_dec(self)) } } @@ -135,13 +123,6 @@ macro_rules! define_aes_impl { inner: $name, } - impl $name_enc { - #[inline(always)] - pub(crate) fn get_enc_backend(&self) -> $name_back_enc<'_> { - self.inner.get_enc_backend() - } - } - impl KeySizeUser for $name_enc { type KeySize = $key_size; } @@ -165,7 +146,7 @@ macro_rules! define_aes_impl { impl BlockCipherEncrypt for $name_enc { fn encrypt_with_backend(&self, f: impl BlockCipherEncClosure) { - f.call(&self.get_enc_backend()) + f.call(&mut $name_back_enc(&self.inner)) } } @@ -191,13 +172,6 @@ macro_rules! define_aes_impl { inner: $name, } - impl $name_dec { - #[inline(always)] - pub(crate) fn get_dec_backend(&self) -> $name_back_dec<'_> { - self.inner.get_dec_backend() - } - } - impl KeySizeUser for $name_dec { type KeySize = $key_size; } @@ -237,7 +211,7 @@ macro_rules! define_aes_impl { impl BlockCipherDecrypt for $name_dec { fn decrypt_with_backend(&self, f: impl BlockCipherDecClosure) { - f.call(&self.get_dec_backend()); + f.call(&$name_back_dec(&self.inner)); } } diff --git a/aes/src/x86.rs b/aes/src/x86.rs new file mode 100644 index 00000000..6fa1f1fb --- /dev/null +++ b/aes/src/x86.rs @@ -0,0 +1,728 @@ +pub(crate) mod ni; +#[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] +pub(crate) mod vaes256; +#[cfg(all(target_arch = "x86_64", aes_avx512))] +pub(crate) mod vaes512; + +#[cfg(target_arch = "x86")] +use core::arch::x86 as arch; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64 as arch; + +use self::arch::*; +use crate::Block; +#[cfg(all(target_arch = "x86_64", aes_avx512))] +use cipher::consts::U64; +use cipher::{ + AlgorithmName, BlockCipherDecBackend, BlockCipherDecClosure, BlockCipherDecrypt, + BlockCipherEncBackend, BlockCipherEncClosure, BlockCipherEncrypt, BlockSizeUser, InOut, Key, + KeyInit, KeySizeUser, ParBlocksSizeUser, + consts::{U9, U16, U24, U32}, + crypto_common::WeakKeyError, +}; +#[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] +use cipher::{Array, InOutBuf, consts::U30, typenum::Unsigned}; +#[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] +use core::cell::OnceCell; +use core::fmt; + +#[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] +pub(crate) type Block30 = Array; +#[cfg(all(target_arch = "x86_64", aes_avx512))] +pub(crate) type Block64 = Array; + +pub(crate) mod features { + cpufeatures::new!(features_aes, "aes"); + cpufeatures::new!(features_avx, "avx"); + cpufeatures::new!(features_avx512f, "avx512f"); + cpufeatures::new!(features_vaes, "vaes"); + pub(crate) mod aes { + pub use super::features_aes::*; + } + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + pub(crate) mod avx { + pub use super::features_avx::*; + } + #[cfg(all(target_arch = "x86_64", aes_avx512))] + pub(crate) mod avx512f { + pub use super::features_avx512f::*; + } + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + pub(crate) mod vaes { + pub use super::features_vaes::*; + } +} + +type Simd128RoundKeys = [__m128i; ROUNDS]; +#[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] +type Simd256RoundKeys = [__m256i; ROUNDS]; +#[cfg(all(target_arch = "x86_64", aes_avx512))] +type Simd512RoundKeys = [__m512i; ROUNDS]; + +#[derive(Clone)] +enum Backend { + Ni, + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + Vaes256, + #[cfg(all(target_arch = "x86_64", aes_avx512))] + Vaes512, +} + +#[derive(Clone, Copy)] +struct Features { + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + avx: self::features::avx::InitToken, + #[cfg(all(target_arch = "x86_64", aes_avx512))] + avx512f: self::features::avx512f::InitToken, + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + vaes: self::features::vaes::InitToken, +} + +impl Features { + fn new() -> Self { + Self { + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + avx: self::features::avx::init(), + #[cfg(all(target_arch = "x86_64", aes_avx512))] + avx512f: self::features::avx512f::init(), + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + vaes: self::features::vaes::init(), + } + } + + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + fn has_vaes256(&self) -> bool { + #[cfg(target_arch = "x86_64")] + if cfg!(aes_avx256) && self.vaes.get() && self.avx.get() { + return true; + } + false + } + + #[cfg(all(target_arch = "x86_64", aes_avx512))] + fn has_vaes512(&self) -> bool { + #[cfg(target_arch = "x86_64")] + if cfg!(aes_avx512) && self.vaes.get() && self.avx512f.get() { + return true; + } + false + } + + fn dispatch(&self) -> Backend { + #[cfg(all(target_arch = "x86_64", aes_avx512))] + if self.has_vaes512() { + return self::Backend::Vaes512; + } + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + if self.has_vaes256() { + return self::Backend::Vaes256; + } + Backend::Ni + } +} + +macro_rules! define_aes_impl { + ( + $name:tt, + $name_enc:ident, + $name_dec:ident, + $name_backend:ident, + $module:tt, + $key_size:ty, + $rounds:tt, + $doc:expr $(,)? + ) => { + mod $name_backend { + use super::*; + + #[derive(Clone)] + pub(crate) struct Ni<'a> { + pub(crate) keys: &'a Simd128RoundKeys<$rounds>, + } + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + impl<'a> Ni<'a> { + pub const fn par_blocks(&self) -> usize { + ::ParBlocksSize::USIZE + } + } + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + impl<'a> From<&Vaes256<'a>> for Ni<'a> { + fn from(backend: &Vaes256<'a>) -> Self { + Self { keys: backend.keys } + } + } + + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + #[derive(Clone)] + pub(crate) struct Vaes256<'a> { + #[allow(unused)] // TODO: remove once cfg flags are removed + pub(crate) features: Features, + pub(crate) keys: &'a Simd128RoundKeys<$rounds>, + pub(crate) simd_256_keys: OnceCell>, + } + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + impl<'a> Vaes256<'a> { + #[allow(unused)] // TODO: remove once cfg flags are removed + pub const fn par_blocks(&self) -> usize { + ::ParBlocksSize::USIZE + } + } + #[cfg(all(target_arch = "x86_64", aes_avx512))] + impl<'a> From<&Vaes512<'a>> for Vaes256<'a> { + fn from(backend: &Vaes512<'a>) -> Self { + Self { + features: backend.features, + keys: backend.keys, + simd_256_keys: OnceCell::new(), + } + } + } + + #[cfg(all(target_arch = "x86_64", aes_avx512))] + pub(crate) struct Vaes512<'a> { + pub(crate) features: Features, + pub(crate) keys: &'a Simd128RoundKeys<$rounds>, + pub(crate) simd_512_keys: OnceCell>, + } + } + + #[doc=$doc] + #[doc = "block cipher"] + #[derive(Clone)] + pub struct $name { + encrypt: $name_enc, + decrypt: $name_dec, + } + + #[cfg(feature = "zeroize")] + impl zeroize::ZeroizeOnDrop for $name {} + + impl KeySizeUser for $name { + type KeySize = $key_size; + } + + impl KeyInit for $name { + #[inline] + fn new(key: &Key) -> Self { + let encrypt = $name_enc::new(key); + let decrypt = $name_dec::from(&encrypt); + Self { encrypt, decrypt } + } + + #[inline] + fn weak_key_test(key: &Key) -> Result<(), WeakKeyError> { + crate::weak_key_test(&key.0) + } + } + + impl From<$name_enc> for $name { + #[inline] + fn from(encrypt: $name_enc) -> $name { + let decrypt = (&encrypt).into(); + Self { encrypt, decrypt } + } + } + + impl From<&$name_enc> for $name { + #[inline] + fn from(encrypt: &$name_enc) -> $name { + let decrypt = encrypt.into(); + let encrypt = encrypt.clone(); + Self { encrypt, decrypt } + } + } + + impl BlockSizeUser for $name { + type BlockSize = U16; + } + + impl BlockCipherEncrypt for $name { + #[inline] + fn encrypt_with_backend(&self, f: impl BlockCipherEncClosure) { + self.encrypt.encrypt_with_backend(f) + } + } + + impl BlockCipherDecrypt for $name { + #[inline] + fn decrypt_with_backend(&self, f: impl BlockCipherDecClosure) { + self.decrypt.decrypt_with_backend(f) + } + } + + impl fmt::Debug for $name { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.write_str(concat!(stringify!($name), " { .. }")) + } + } + + impl AlgorithmName for $name { + fn write_alg_name(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(stringify!($name)) + } + } + + #[doc=$doc] + #[doc = "block cipher (encrypt-only)"] + #[derive(Clone)] + pub struct $name_enc { + keys: Simd128RoundKeys<$rounds>, + features: Features, + } + + impl Drop for $name_enc { + fn drop(&mut self) { + #[cfg(feature = "zeroize")] + unsafe { + zeroize::zeroize_flat_type(&mut self.keys) + } + } + } + + #[cfg(feature = "zeroize")] + impl zeroize::ZeroizeOnDrop for $name_enc {} + + impl KeySizeUser for $name_enc { + type KeySize = $key_size; + } + + impl KeyInit for $name_enc { + #[inline] + fn new(key: &Key) -> Self { + // SAFETY: we enforce that this code is called only when + // target features required by `expand` were properly checked. + Self { + keys: unsafe { self::ni::expand::$module::expand_key(key.as_ref()) }, + features: Features::new(), + } + } + + #[inline] + fn weak_key_test(key: &Key) -> Result<(), WeakKeyError> { + crate::weak_key_test(&key.0) + } + } + + impl BlockSizeUser for $name_enc { + type BlockSize = U16; + } + + impl BlockCipherEncrypt for $name_enc { + #[inline] + fn encrypt_with_backend(&self, f: impl BlockCipherEncClosure) { + let features = self.features; + let keys = &self.keys; + match features.dispatch() { + self::Backend::Ni => f.call(&mut $name_backend::Ni { keys }), + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + self::Backend::Vaes256 => f.call(&mut $name_backend::Vaes256 { + features, + keys, + simd_256_keys: OnceCell::new(), + }), + #[cfg(all(target_arch = "x86_64", aes_avx512))] + self::Backend::Vaes512 => f.call(&mut $name_backend::Vaes512 { + features, + keys, + simd_512_keys: OnceCell::new(), + }), + } + } + } + + impl fmt::Debug for $name_enc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.write_str(concat!(stringify!($name_enc), " { .. }")) + } + } + + impl AlgorithmName for $name_enc { + fn write_alg_name(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(stringify!($name_enc)) + } + } + + #[doc=$doc] + #[doc = "block cipher (decrypt-only)"] + #[derive(Clone)] + pub struct $name_dec { + keys: Simd128RoundKeys<$rounds>, + features: Features, + } + + impl Drop for $name_dec { + fn drop(&mut self) { + #[cfg(feature = "zeroize")] + unsafe { + zeroize::zeroize_flat_type(&mut self.keys) + } + } + } + + #[cfg(feature = "zeroize")] + impl zeroize::ZeroizeOnDrop for $name_dec {} + + impl KeySizeUser for $name_dec { + type KeySize = $key_size; + } + + impl KeyInit for $name_dec { + #[inline] + fn new(key: &Key) -> Self { + $name_enc::new(key).into() + } + + #[inline] + fn weak_key_test(key: &Key) -> Result<(), WeakKeyError> { + crate::weak_key_test(&key.0) + } + } + + impl From<$name_enc> for $name_dec { + #[inline] + fn from(enc: $name_enc) -> $name_dec { + Self::from(&enc) + } + } + + impl From<&$name_enc> for $name_dec { + #[inline] + fn from(enc: &$name_enc) -> $name_dec { + Self { + keys: unsafe { self::ni::expand::inv_keys(&enc.keys) }, + features: enc.features.clone(), + } + } + } + + impl BlockSizeUser for $name_dec { + type BlockSize = U16; + } + + impl BlockCipherDecrypt for $name_dec { + #[inline] + fn decrypt_with_backend(&self, f: impl BlockCipherDecClosure) { + let features = self.features; + let keys = &self.keys; + match features.dispatch() { + self::Backend::Ni => f.call(&mut $name_backend::Ni { keys }), + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + self::Backend::Vaes256 => f.call(&mut $name_backend::Vaes256 { + features, + keys, + simd_256_keys: OnceCell::new(), + }), + #[cfg(all(target_arch = "x86_64", aes_avx512))] + self::Backend::Vaes512 => f.call(&mut $name_backend::Vaes512 { + features, + keys, + simd_512_keys: OnceCell::new(), + }), + } + } + } + + impl fmt::Debug for $name_dec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.write_str(concat!(stringify!($name_dec), " { .. }")) + } + } + + impl AlgorithmName for $name_dec { + fn write_alg_name(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(stringify!($name_dec)) + } + } + + impl<'a> BlockSizeUser for $name_backend::Ni<'a> { + type BlockSize = U16; + } + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + impl<'a> BlockSizeUser for $name_backend::Vaes256<'a> { + type BlockSize = U16; + } + #[cfg(all(target_arch = "x86_64", aes_avx512))] + impl<'a> BlockSizeUser for $name_backend::Vaes512<'a> { + type BlockSize = U16; + } + + impl<'a> ParBlocksSizeUser for $name_backend::Ni<'a> { + type ParBlocksSize = U9; + } + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + impl<'a> ParBlocksSizeUser for $name_backend::Vaes256<'a> { + // Block size of 30 is chosen based on AVX2's 16 YMM registers. + // + // * 1 register holds 2 keys per round (loads interleaved with rounds) + // * 15 registers hold 2 data blocks + // + // This gives (16 - 1 ) * 2 = 30 . + type ParBlocksSize = U30; + } + #[cfg(all(target_arch = "x86_64", aes_avx512))] + impl<'a> ParBlocksSizeUser for $name_backend::Vaes512<'a> { + // Block size of 64 is chosen based on AVX512's 32 ZMM registers. + // + // * 11, 13, 15 registers for keys, correspond to AES-128, AES-192, AES-256 + // * 11, 13, 15 registers hold 4 keys each (no interleaved loading like VAES256) + // * 16 registers hold 4 data blocks + // * 1-4 registers remain unused (could use them but probably not worth it) + // + // This gives (32 - 15 - 1 ) * 4 = 64 . + type ParBlocksSize = U64; + } + + impl<'a> BlockCipherEncBackend for $name_backend::Ni<'a> { + #[inline] + fn encrypt_block(&self, block: InOut<'_, '_, Block>) { + unsafe { + self::ni::encdec::encrypt(self.keys, block); + } + } + #[inline] + fn encrypt_par_blocks(&self, blocks: InOut<'_, '_, cipher::ParBlocks>) { + unsafe { + self::ni::encdec::encrypt_par(self.keys, blocks); + } + } + } + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + impl<'a> BlockCipherEncBackend for $name_backend::Vaes256<'a> { + #[inline] + fn encrypt_block(&self, block: InOut<'_, '_, Block>) { + unsafe { + self::ni::encdec::encrypt(self.keys, block); + } + } + #[inline] + fn encrypt_par_blocks(&self, blocks: InOut<'_, '_, cipher::ParBlocks>) { + unsafe { + let simd_256_keys = self + .simd_256_keys + .get_or_init(|| vaes256::encdec::broadcast_keys(&self.keys)); + vaes256::encdec::encrypt30(simd_256_keys, blocks); + } + } + #[inline] + fn encrypt_tail_blocks(&self, blocks: InOutBuf<'_, '_, Block>) { + let backend = self; + + let mut rem = blocks.len(); + let (mut iptr, mut optr) = blocks.into_raw(); + + let backend = $name_backend::Ni::from(backend); + while rem >= backend.par_blocks() { + let blocks = unsafe { InOut::from_raw(iptr.cast(), optr.cast()) }; + backend.encrypt_par_blocks(blocks); + rem -= backend.par_blocks(); + iptr = unsafe { iptr.add(backend.par_blocks()) }; + optr = unsafe { optr.add(backend.par_blocks()) }; + } + + while rem > 0 { + let block = unsafe { InOut::from_raw(iptr, optr) }; + backend.encrypt_block(block); + rem -= 1; + iptr = unsafe { iptr.add(1) }; + optr = unsafe { optr.add(1) }; + } + } + } + #[cfg(all(target_arch = "x86_64", aes_avx512))] + impl<'a> BlockCipherEncBackend for $name_backend::Vaes512<'a> { + #[inline] + fn encrypt_block(&self, block: InOut<'_, '_, Block>) { + unsafe { + self::ni::encdec::encrypt(self.keys, block); + } + } + #[inline] + fn encrypt_par_blocks(&self, blocks: InOut<'_, '_, cipher::ParBlocks>) { + unsafe { + let simd_512_keys = self + .simd_512_keys + .get_or_init(|| vaes512::encdec::broadcast_keys(&self.keys)); + vaes512::encdec::encrypt64(simd_512_keys, blocks); + } + } + #[inline] + fn encrypt_tail_blocks(&self, blocks: InOutBuf<'_, '_, Block>) { + let backend = self; + + let mut rem = blocks.len(); + let (mut iptr, mut optr) = blocks.into_raw(); + + let backend = &$name_backend::Vaes256::from(backend); + if backend.features.has_vaes256() { + while rem >= backend.par_blocks() { + let blocks = unsafe { InOut::from_raw(iptr.cast(), optr.cast()) }; + backend.encrypt_par_blocks(blocks); + rem -= backend.par_blocks(); + iptr = unsafe { iptr.add(backend.par_blocks()) }; + optr = unsafe { optr.add(backend.par_blocks()) }; + } + } + + let backend = &$name_backend::Ni::from(backend); + while rem >= backend.par_blocks() { + let blocks = unsafe { InOut::from_raw(iptr.cast(), optr.cast()) }; + backend.encrypt_par_blocks(blocks); + rem -= backend.par_blocks(); + iptr = unsafe { iptr.add(backend.par_blocks()) }; + optr = unsafe { optr.add(backend.par_blocks()) }; + } + + while rem > 0 { + let block = unsafe { InOut::from_raw(iptr, optr) }; + backend.encrypt_block(block); + rem -= 1; + iptr = unsafe { iptr.add(1) }; + optr = unsafe { optr.add(1) }; + } + } + } + + impl<'a> BlockCipherDecBackend for $name_backend::Ni<'a> { + #[inline] + fn decrypt_block(&self, block: InOut<'_, '_, Block>) { + unsafe { + self::ni::encdec::decrypt(self.keys, block); + } + } + #[inline] + fn decrypt_par_blocks(&self, blocks: InOut<'_, '_, cipher::ParBlocks>) { + unsafe { + self::ni::encdec::decrypt_par(self.keys, blocks); + } + } + } + #[cfg(all(target_arch = "x86_64", any(aes_avx256, aes_avx512)))] + impl<'a> BlockCipherDecBackend for $name_backend::Vaes256<'a> { + #[inline] + fn decrypt_block(&self, block: InOut<'_, '_, Block>) { + unsafe { + self::ni::encdec::decrypt(self.keys, block); + } + } + #[inline] + fn decrypt_par_blocks(&self, blocks: InOut<'_, '_, cipher::ParBlocks>) { + unsafe { + let simd_256_keys = self + .simd_256_keys + .get_or_init(|| vaes256::encdec::broadcast_keys(&self.keys)); + vaes256::encdec::decrypt30(simd_256_keys, blocks); + } + } + #[inline] + fn decrypt_tail_blocks(&self, blocks: InOutBuf<'_, '_, Block>) { + let backend = self; + + let mut rem = blocks.len(); + let (mut iptr, mut optr) = blocks.into_raw(); + + let backend = $name_backend::Ni::from(backend); + while rem >= backend.par_blocks() { + let blocks = unsafe { InOut::from_raw(iptr.cast(), optr.cast()) }; + backend.decrypt_par_blocks(blocks); + rem -= backend.par_blocks(); + iptr = unsafe { iptr.add(backend.par_blocks()) }; + optr = unsafe { optr.add(backend.par_blocks()) }; + } + + while rem > 0 { + let block = unsafe { InOut::from_raw(iptr, optr) }; + backend.decrypt_block(block); + rem -= 1; + iptr = unsafe { iptr.add(1) }; + optr = unsafe { optr.add(1) }; + } + } + } + #[cfg(all(target_arch = "x86_64", aes_avx512))] + impl<'a> BlockCipherDecBackend for $name_backend::Vaes512<'a> { + #[inline] + fn decrypt_block(&self, block: InOut<'_, '_, Block>) { + unsafe { + self::ni::encdec::decrypt(self.keys, block); + } + } + #[inline] + fn decrypt_par_blocks(&self, blocks: InOut<'_, '_, cipher::ParBlocks>) { + unsafe { + let simd_512_keys = self + .simd_512_keys + .get_or_init(|| vaes512::encdec::broadcast_keys(&self.keys)); + vaes512::encdec::decrypt64(simd_512_keys, blocks); + } + } + #[inline] + fn decrypt_tail_blocks(&self, blocks: InOutBuf<'_, '_, Block>) { + let backend = self; + + let mut rem = blocks.len(); + let (mut iptr, mut optr) = blocks.into_raw(); + + let backend = &$name_backend::Vaes256::from(backend); + if backend.features.has_vaes256() { + while rem >= backend.par_blocks() { + let blocks = unsafe { InOut::from_raw(iptr.cast(), optr.cast()) }; + backend.decrypt_par_blocks(blocks); + rem -= backend.par_blocks(); + iptr = unsafe { iptr.add(backend.par_blocks()) }; + optr = unsafe { optr.add(backend.par_blocks()) }; + } + } + + let backend = &$name_backend::Ni::from(backend); + while rem >= backend.par_blocks() { + let blocks = unsafe { InOut::from_raw(iptr.cast(), optr.cast()) }; + backend.decrypt_par_blocks(blocks); + rem -= backend.par_blocks(); + iptr = unsafe { iptr.add(backend.par_blocks()) }; + optr = unsafe { optr.add(backend.par_blocks()) }; + } + + while rem > 0 { + let block = unsafe { InOut::from_raw(iptr, optr) }; + backend.decrypt_block(block); + rem -= 1; + iptr = unsafe { iptr.add(1) }; + optr = unsafe { optr.add(1) }; + } + } + } + }; +} + +define_aes_impl!( + Aes128, + Aes128Enc, + Aes128Dec, + aes128_backend, + aes128, + U16, + 11, + "AES-128", +); + +define_aes_impl!( + Aes192, + Aes192Enc, + Aes192Dec, + aes192_backend, + aes192, + U24, + 13, + "AES-192", +); + +define_aes_impl!( + Aes256, + Aes256Enc, + Aes256Dec, + aes256_backend, + aes256, + U32, + 15, + "AES-256", +); diff --git a/aes/src/x86/ni.rs b/aes/src/x86/ni.rs new file mode 100644 index 00000000..c35b1b86 --- /dev/null +++ b/aes/src/x86/ni.rs @@ -0,0 +1,24 @@ +//! AES block ciphers implementation using AES-NI instruction set. +//! +//! Ciphers functionality is accessed using `BlockCipher` trait from the +//! [`cipher`](https://docs.rs/cipher) crate. +//! +//! # Vulnerability +//! Lazy FP state restory vulnerability can allow local process to leak content +//! of the FPU register, in which round keys are stored. This vulnerability +//! can be mitigated at the operating system level by installing relevant +//! patches. (i.e. keep your OS updated!) More info: +//! - [Intel advisory](https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00145.html) +//! - [Wikipedia](https://en.wikipedia.org/wiki/Lazy_FP_state_restore) +//! +//! # Related documents +//! - [Intel AES-NI whitepaper](https://software.intel.com/sites/default/files/article/165683/aes-wp-2012-09-22-v01.pdf) +//! - [Use of the AES Instruction Set](https://www.cosic.esat.kuleuven.be/ecrypt/AESday/slides/Use_of_the_AES_Instruction_Set.pdf) + +pub(super) mod encdec; +pub(super) mod expand; +#[cfg(test)] +mod test_expand; + +#[cfg(feature = "hazmat")] +pub(crate) mod hazmat; diff --git a/aes/src/x86/ni/encdec.rs b/aes/src/x86/ni/encdec.rs new file mode 100644 index 00000000..b68ed5f9 --- /dev/null +++ b/aes/src/x86/ni/encdec.rs @@ -0,0 +1,161 @@ +#![allow(unsafe_op_in_unsafe_fn)] + +use crate::Block; +use crate::x86::arch::*; +use cipher::{ + array::{Array, ArraySize}, + inout::InOut, +}; + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn encrypt( + keys: &[__m128i; KEYS], + block: InOut<'_, '_, Block>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (block_in, block_out) = block.into_raw(); + let mut b = _mm_loadu_si128(block_in.cast()); + b = _mm_xor_si128(b, keys[0]); + for &key in &keys[1..KEYS - 1] { + b = _mm_aesenc_si128(b, key); + } + b = _mm_aesenclast_si128(b, keys[KEYS - 1]); + _mm_storeu_si128(block_out.cast(), b); +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn decrypt( + keys: &[__m128i; KEYS], + block: InOut<'_, '_, Block>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (block_in, block_out) = block.into_raw(); + let mut b = _mm_loadu_si128(block_in.cast()); + b = _mm_xor_si128(b, keys[0]); + for &key in &keys[1..KEYS - 1] { + b = _mm_aesdec_si128(b, key); + } + b = _mm_aesdeclast_si128(b, keys[KEYS - 1]); + _mm_storeu_si128(block_out.cast(), b); +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn encrypt_par( + keys: &[__m128i; KEYS], + blocks: InOut<'_, '_, Array>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (blocks_in, blocks_out) = blocks.into_raw(); + let mut b = load(blocks_in); + + // Loop over keys is intentionally not used here to force inlining + xor(&mut b, keys[0]); + aesenc(&mut b, keys[1]); + aesenc(&mut b, keys[2]); + aesenc(&mut b, keys[3]); + aesenc(&mut b, keys[4]); + aesenc(&mut b, keys[5]); + aesenc(&mut b, keys[6]); + aesenc(&mut b, keys[7]); + aesenc(&mut b, keys[8]); + aesenc(&mut b, keys[9]); + if KEYS >= 13 { + aesenc(&mut b, keys[10]); + aesenc(&mut b, keys[11]); + } + if KEYS == 15 { + aesenc(&mut b, keys[12]); + aesenc(&mut b, keys[13]); + } + aesenclast(&mut b, keys[KEYS - 1]); + store(blocks_out, b); +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn decrypt_par( + keys: &[__m128i; KEYS], + blocks: InOut<'_, '_, Array>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (blocks_in, blocks_out) = blocks.into_raw(); + let mut b = load(blocks_in); + + // Loop over keys is intentionally not used here to force inlining + xor(&mut b, keys[0]); + aesdec(&mut b, keys[1]); + aesdec(&mut b, keys[2]); + aesdec(&mut b, keys[3]); + aesdec(&mut b, keys[4]); + aesdec(&mut b, keys[5]); + aesdec(&mut b, keys[6]); + aesdec(&mut b, keys[7]); + aesdec(&mut b, keys[8]); + aesdec(&mut b, keys[9]); + if KEYS >= 13 { + aesdec(&mut b, keys[10]); + aesdec(&mut b, keys[11]); + } + if KEYS == 15 { + aesdec(&mut b, keys[12]); + aesdec(&mut b, keys[13]); + } + aesdeclast(&mut b, keys[KEYS - 1]); + store(blocks_out, b); +} + +#[target_feature(enable = "sse2")] +pub(crate) unsafe fn load(blocks: *const Array) -> Array<__m128i, N> { + let p = blocks.cast::<__m128i>(); + let mut res: Array<__m128i, N> = core::mem::zeroed(); + for i in 0..N::USIZE { + res[i] = _mm_loadu_si128(p.add(i)); + } + res +} + +#[target_feature(enable = "sse2")] +pub(crate) unsafe fn store(blocks: *mut Array, b: Array<__m128i, N>) { + let p = blocks.cast::<__m128i>(); + for i in 0..N::USIZE { + _mm_storeu_si128(p.add(i), b[i]); + } +} + +#[target_feature(enable = "sse2")] +pub(crate) unsafe fn xor(blocks: &mut Array<__m128i, N>, key: __m128i) { + for block in blocks { + *block = _mm_xor_si128(*block, key); + } +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn aesenc(blocks: &mut Array<__m128i, N>, key: __m128i) { + for block in blocks { + *block = _mm_aesenc_si128(*block, key); + } +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn aesenclast(blocks: &mut Array<__m128i, N>, key: __m128i) { + for block in blocks { + *block = _mm_aesenclast_si128(*block, key); + } +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn aesdec(blocks: &mut Array<__m128i, N>, key: __m128i) { + for block in blocks { + *block = _mm_aesdec_si128(*block, key); + } +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn aesdeclast(blocks: &mut Array<__m128i, N>, key: __m128i) { + for block in blocks { + *block = _mm_aesdeclast_si128(*block, key); + } +} diff --git a/aes/src/x86/ni/expand.rs b/aes/src/x86/ni/expand.rs new file mode 100644 index 00000000..68bdb357 --- /dev/null +++ b/aes/src/x86/ni/expand.rs @@ -0,0 +1,219 @@ +#![allow(unsafe_op_in_unsafe_fn)] + +use crate::x86::arch::*; +use core::mem::{transmute, zeroed}; + +pub(super) type Aes128RoundKeys = [__m128i; 11]; +pub(super) type Aes192RoundKeys = [__m128i; 13]; +pub(super) type Aes256RoundKeys = [__m128i; 15]; + +pub(crate) mod aes128 { + use super::*; + + #[target_feature(enable = "aes")] + pub(crate) unsafe fn expand_key(key: &[u8; 16]) -> Aes128RoundKeys { + unsafe fn expand_round(keys: &mut Aes128RoundKeys, pos: usize) { + let mut t1 = keys[pos - 1]; + let mut t2; + let mut t3; + + t2 = _mm_aeskeygenassist_si128(t1, RK); + t2 = _mm_shuffle_epi32(t2, 0xff); + t3 = _mm_slli_si128(t1, 0x4); + t1 = _mm_xor_si128(t1, t3); + t3 = _mm_slli_si128(t3, 0x4); + t1 = _mm_xor_si128(t1, t3); + t3 = _mm_slli_si128(t3, 0x4); + t1 = _mm_xor_si128(t1, t3); + t1 = _mm_xor_si128(t1, t2); + + keys[pos] = t1; + } + + let mut keys: Aes128RoundKeys = zeroed(); + let k = _mm_loadu_si128(key.as_ptr().cast()); + keys[0] = k; + + let kr = &mut keys; + expand_round::<0x01>(kr, 1); + expand_round::<0x02>(kr, 2); + expand_round::<0x04>(kr, 3); + expand_round::<0x08>(kr, 4); + expand_round::<0x10>(kr, 5); + expand_round::<0x20>(kr, 6); + expand_round::<0x40>(kr, 7); + expand_round::<0x80>(kr, 8); + expand_round::<0x1B>(kr, 9); + expand_round::<0x36>(kr, 10); + + keys + } +} + +pub(crate) mod aes192 { + use super::*; + + #[target_feature(enable = "aes")] + pub(crate) unsafe fn expand_key(key: &[u8; 24]) -> Aes192RoundKeys { + unsafe fn shuffle(a: __m128i, b: __m128i, i: usize) -> __m128i { + let a: [u64; 2] = transmute(a); + let b: [u64; 2] = transmute(b); + transmute([a[i], b[0]]) + } + + #[target_feature(enable = "aes")] + unsafe fn expand_round( + mut t1: __m128i, + mut t3: __m128i, + ) -> (__m128i, __m128i) { + let (mut t2, mut t4); + + t2 = _mm_aeskeygenassist_si128(t3, RK); + t2 = _mm_shuffle_epi32(t2, 0x55); + t4 = _mm_slli_si128(t1, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t1 = _mm_xor_si128(t1, t2); + t2 = _mm_shuffle_epi32(t1, 0xff); + t4 = _mm_slli_si128(t3, 0x4); + t3 = _mm_xor_si128(t3, t4); + t3 = _mm_xor_si128(t3, t2); + + (t1, t3) + } + + let mut keys: Aes192RoundKeys = zeroed(); + // We are being extra pedantic here to remove out-of-bound access. + // This should be optimized into movups, movsd sequence. + let (k0, k1l) = { + let mut t = [0u8; 32]; + t[..key.len()].copy_from_slice(key); + ( + _mm_loadu_si128(t.as_ptr().cast()), + _mm_loadu_si128(t.as_ptr().offset(16).cast()), + ) + }; + + keys[0] = k0; + + let (k1_2, k2r) = expand_round::<0x01>(k0, k1l); + keys[1] = shuffle(k1l, k1_2, 0); + keys[2] = shuffle(k1_2, k2r, 1); + + let (k3, k4l) = expand_round::<0x02>(k1_2, k2r); + keys[3] = k3; + + let (k4_5, k5r) = expand_round::<0x04>(k3, k4l); + let k4 = shuffle(k4l, k4_5, 0); + let k5 = shuffle(k4_5, k5r, 1); + keys[4] = k4; + keys[5] = k5; + + let (k6, k7l) = expand_round::<0x08>(k4_5, k5r); + keys[6] = k6; + + let (k7_8, k8r) = expand_round::<0x10>(k6, k7l); + keys[7] = shuffle(k7l, k7_8, 0); + keys[8] = shuffle(k7_8, k8r, 1); + + let (k9, k10l) = expand_round::<0x20>(k7_8, k8r); + keys[9] = k9; + + let (k10_11, k11r) = expand_round::<0x40>(k9, k10l); + keys[10] = shuffle(k10l, k10_11, 0); + keys[11] = shuffle(k10_11, k11r, 1); + + let (k12, _) = expand_round::<0x80>(k10_11, k11r); + keys[12] = k12; + + keys + } +} + +pub(crate) mod aes256 { + use super::*; + + #[target_feature(enable = "aes")] + pub(crate) unsafe fn expand_key(key: &[u8; 32]) -> Aes256RoundKeys { + unsafe fn expand_round(keys: &mut Aes256RoundKeys, pos: usize) { + let mut t1 = keys[pos - 2]; + let mut t2; + let mut t3 = keys[pos - 1]; + let mut t4; + + t2 = _mm_aeskeygenassist_si128(t3, RK); + t2 = _mm_shuffle_epi32(t2, 0xff); + t4 = _mm_slli_si128(t1, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t1 = _mm_xor_si128(t1, t2); + + keys[pos] = t1; + + t4 = _mm_aeskeygenassist_si128(t1, 0x00); + t2 = _mm_shuffle_epi32(t4, 0xaa); + t4 = _mm_slli_si128(t3, 0x4); + t3 = _mm_xor_si128(t3, t4); + t4 = _mm_slli_si128(t4, 0x4); + t3 = _mm_xor_si128(t3, t4); + t4 = _mm_slli_si128(t4, 0x4); + t3 = _mm_xor_si128(t3, t4); + t3 = _mm_xor_si128(t3, t2); + + keys[pos + 1] = t3; + } + + unsafe fn expand_round_last(keys: &mut Aes256RoundKeys, pos: usize) { + let mut t1 = keys[pos - 2]; + let mut t2; + let t3 = keys[pos - 1]; + let mut t4; + + t2 = _mm_aeskeygenassist_si128(t3, RK); + t2 = _mm_shuffle_epi32(t2, 0xff); + t4 = _mm_slli_si128(t1, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t1 = _mm_xor_si128(t1, t2); + + keys[pos] = t1; + } + + let mut keys: Aes256RoundKeys = zeroed(); + + let kp = key.as_ptr().cast::<__m128i>(); + keys[0] = _mm_loadu_si128(kp); + keys[1] = _mm_loadu_si128(kp.add(1)); + + let k = &mut keys; + expand_round::<0x01>(k, 2); + expand_round::<0x02>(k, 4); + expand_round::<0x04>(k, 6); + expand_round::<0x08>(k, 8); + expand_round::<0x10>(k, 10); + expand_round::<0x20>(k, 12); + expand_round_last::<0x40>(k, 14); + + keys + } +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn inv_keys(keys: &[__m128i; N]) -> [__m128i; N] { + let mut inv_keys: [__m128i; N] = zeroed(); + inv_keys[0] = keys[N - 1]; + for i in 1..N - 1 { + inv_keys[i] = _mm_aesimc_si128(keys[N - 1 - i]); + } + inv_keys[N - 1] = keys[0]; + inv_keys +} diff --git a/aes/src/x86/ni/hazmat.rs b/aes/src/x86/ni/hazmat.rs new file mode 100644 index 00000000..24a365a5 --- /dev/null +++ b/aes/src/x86/ni/hazmat.rs @@ -0,0 +1,97 @@ +//! Low-level "hazmat" AES functions: AES-NI support. +//! +//! Note: this isn't actually used in the `Aes128`/`Aes192`/`Aes256` +//! implementations in this crate, but instead provides raw AES-NI accelerated +//! access to the AES round function gated under the `hazmat` crate feature. +#![allow(unsafe_op_in_unsafe_fn)] + +use crate::hazmat::{Block, Block8}; +use crate::x86::arch::*; +use cipher::array::{Array, ArraySize}; + +#[target_feature(enable = "sse2")] +pub(crate) unsafe fn load(blocks: *const Array) -> Array<__m128i, N> { + let p = blocks.cast::<__m128i>(); + let mut res: Array<__m128i, N> = core::mem::zeroed(); + for i in 0..N::USIZE { + res[i] = _mm_loadu_si128(p.add(i)); + } + res +} + +#[target_feature(enable = "sse2")] +pub(crate) unsafe fn store(blocks: *mut Array, b: Array<__m128i, N>) { + let p = blocks.cast::<__m128i>(); + for i in 0..N::USIZE { + _mm_storeu_si128(p.add(i), b[i]); + } +} + +/// AES cipher (encrypt) round function. +#[target_feature(enable = "aes")] +pub(crate) unsafe fn cipher_round(block: &mut Block, round_key: &Block) { + // Safety: `loadu` and `storeu` support unaligned access + let b = _mm_loadu_si128(block.as_ptr() as *const __m128i); + let k = _mm_loadu_si128(round_key.as_ptr() as *const __m128i); + let out = _mm_aesenc_si128(b, k); + _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, out); +} + +/// AES cipher (encrypt) round function: parallel version. +#[target_feature(enable = "aes")] +pub(crate) unsafe fn cipher_round_par(blocks: &mut Block8, round_keys: &Block8) { + let xmm_keys = load(round_keys); + let mut xmm_blocks = load(blocks); + + for i in 0..8 { + xmm_blocks[i] = _mm_aesenc_si128(xmm_blocks[i], xmm_keys[i]); + } + + store(blocks, xmm_blocks); +} + +/// AES cipher (encrypt) round function. +#[target_feature(enable = "aes")] +pub(crate) unsafe fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block) { + // Safety: `loadu` and `storeu` support unaligned access + let b = _mm_loadu_si128(block.as_ptr() as *const __m128i); + let k = _mm_loadu_si128(round_key.as_ptr() as *const __m128i); + let out = _mm_aesdec_si128(b, k); + _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, out); +} + +/// AES cipher (encrypt) round function: parallel version. +#[target_feature(enable = "aes")] +pub(crate) unsafe fn equiv_inv_cipher_round_par(blocks: &mut Block8, round_keys: &Block8) { + let xmm_keys = load(round_keys); + let mut xmm_blocks = load(blocks); + + for i in 0..8 { + xmm_blocks[i] = _mm_aesdec_si128(xmm_blocks[i], xmm_keys[i]); + } + + store(blocks, xmm_blocks); +} + +/// AES mix columns function. +#[target_feature(enable = "aes")] +pub(crate) unsafe fn mix_columns(block: &mut Block) { + // Safety: `loadu` and `storeu` support unaligned access + let mut state = _mm_loadu_si128(block.as_ptr() as *const __m128i); + + // Emulate mix columns by performing three inverse mix columns operations + state = _mm_aesimc_si128(state); + state = _mm_aesimc_si128(state); + state = _mm_aesimc_si128(state); + + _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, state); +} + +/// AES inverse mix columns function. +#[target_feature(enable = "aes")] +pub(crate) unsafe fn inv_mix_columns(block: &mut Block) { + // Safety: `loadu` and `storeu` support unaligned access + let b = _mm_loadu_si128(block.as_ptr() as *const __m128i); + let out = _mm_aesimc_si128(b); + _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, out); +} diff --git a/aes/src/x86/ni/test_expand.rs b/aes/src/x86/ni/test_expand.rs new file mode 100644 index 00000000..6524ef74 --- /dev/null +++ b/aes/src/x86/ni/test_expand.rs @@ -0,0 +1,291 @@ +use crate::x86::arch::*; +use crate::x86::ni::expand::*; +use hex_literal::hex; + +pub(crate) fn check(a: &[__m128i], b: &[[u64; 2]]) { + assert_eq!(a.len(), b.len()); + for (v1, v2) in a.iter().zip(b) { + let t1: [u64; 2] = unsafe { core::mem::transmute(*v1) }; + let t2 = [v2[0].to_be(), v2[1].to_be()]; + assert_eq!(t1, t2); + } +} + +#[test] +#[cfg_attr( + not(target_feature = "aes"), + ignore = "requires enabled `aes` target feature" +)] +fn aes128_expand_key_test() { + let keys = [0x00; 16]; + check( + &unsafe { aes128::expand_key(&keys) }, + &[ + [0x0000000000000000, 0x0000000000000000], + [0x6263636362636363, 0x6263636362636363], + [0x9b9898c9f9fbfbaa, 0x9b9898c9f9fbfbaa], + [0x90973450696ccffa, 0xf2f457330b0fac99], + [0xee06da7b876a1581, 0x759e42b27e91ee2b], + [0x7f2e2b88f8443e09, 0x8dda7cbbf34b9290], + [0xec614b851425758c, 0x99ff09376ab49ba7], + [0x217517873550620b, 0xacaf6b3cc61bf09b], + [0x0ef903333ba96138, 0x97060a04511dfa9f], + [0xb1d4d8e28a7db9da, 0x1d7bb3de4c664941], + [0xb4ef5bcb3e92e211, 0x23e951cf6f8f188e], + ], + ); + + let keys = [0xff; 16]; + check( + &unsafe { aes128::expand_key(&keys) }, + &[ + [0xffffffffffffffff, 0xffffffffffffffff], + [0xe8e9e9e917161616, 0xe8e9e9e917161616], + [0xadaeae19bab8b80f, 0x525151e6454747f0], + [0x090e2277b3b69a78, 0xe1e7cb9ea4a08c6e], + [0xe16abd3e52dc2746, 0xb33becd8179b60b6], + [0xe5baf3ceb766d488, 0x045d385013c658e6], + [0x71d07db3c6b6a93b, 0xc2eb916bd12dc98d], + [0xe90d208d2fbb89b6, 0xed5018dd3c7dd150], + [0x96337366b988fad0, 0x54d8e20d68a5335d], + [0x8bf03f233278c5f3, 0x66a027fe0e0514a3], + [0xd60a3588e472f07b, 0x82d2d7858cd7c326], + ], + ); + + let keys = hex!("000102030405060708090a0b0c0d0e0f"); + check( + &unsafe { aes128::expand_key(&keys) }, + &[ + [0x0001020304050607, 0x08090a0b0c0d0e0f], + [0xd6aa74fdd2af72fa, 0xdaa678f1d6ab76fe], + [0xb692cf0b643dbdf1, 0xbe9bc5006830b3fe], + [0xb6ff744ed2c2c9bf, 0x6c590cbf0469bf41], + [0x47f7f7bc95353e03, 0xf96c32bcfd058dfd], + [0x3caaa3e8a99f9deb, 0x50f3af57adf622aa], + [0x5e390f7df7a69296, 0xa7553dc10aa31f6b], + [0x14f9701ae35fe28c, 0x440adf4d4ea9c026], + [0x47438735a41c65b9, 0xe016baf4aebf7ad2], + [0x549932d1f0855768, 0x1093ed9cbe2c974e], + [0x13111d7fe3944a17, 0xf307a78b4d2b30c5], + ], + ); + + let keys = hex!("6920e299a5202a6d656e636869746f2a"); + check( + &unsafe { aes128::expand_key(&keys) }, + &[ + [0x6920e299a5202a6d, 0x656e636869746f2a], + [0xfa8807605fa82d0d, 0x3ac64e6553b2214f], + [0xcf75838d90ddae80, 0xaa1be0e5f9a9c1aa], + [0x180d2f1488d08194, 0x22cb6171db62a0db], + [0xbaed96ad323d1739, 0x10f67648cb94d693], + [0x881b4ab2ba265d8b, 0xaad02bc36144fd50], + [0xb34f195d096944d6, 0xa3b96f15c2fd9245], + [0xa7007778ae6933ae, 0x0dd05cbbcf2dcefe], + [0xff8bccf251e2ff5c, 0x5c32a3e7931f6d19], + [0x24b7182e7555e772, 0x29674495ba78298c], + [0xae127cdadb479ba8, 0xf220df3d4858f6b1], + ], + ); + + let keys = hex!("2b7e151628aed2a6abf7158809cf4f3c"); + check( + &unsafe { aes128::expand_key(&keys) }, + &[ + [0x2b7e151628aed2a6, 0xabf7158809cf4f3c], + [0xa0fafe1788542cb1, 0x23a339392a6c7605], + [0xf2c295f27a96b943, 0x5935807a7359f67f], + [0x3d80477d4716fe3e, 0x1e237e446d7a883b], + [0xef44a541a8525b7f, 0xb671253bdb0bad00], + [0xd4d1c6f87c839d87, 0xcaf2b8bc11f915bc], + [0x6d88a37a110b3efd, 0xdbf98641ca0093fd], + [0x4e54f70e5f5fc9f3, 0x84a64fb24ea6dc4f], + [0xead27321b58dbad2, 0x312bf5607f8d292f], + [0xac7766f319fadc21, 0x28d12941575c006e], + [0xd014f9a8c9ee2589, 0xe13f0cc8b6630ca6], + ], + ); +} + +#[test] +#[cfg_attr( + not(target_feature = "aes"), + ignore = "requires enabled `aes` target feature" +)] +fn aes192_expand_key_test() { + let keys = [0x00; 24]; + check( + &unsafe { aes192::expand_key(&keys) }, + &[ + [0x0000000000000000, 0x0000000000000000], + [0x0000000000000000, 0x6263636362636363], + [0x6263636362636363, 0x6263636362636363], + [0x9b9898c9f9fbfbaa, 0x9b9898c9f9fbfbaa], + [0x9b9898c9f9fbfbaa, 0x90973450696ccffa], + [0xf2f457330b0fac99, 0x90973450696ccffa], + [0xc81d19a9a171d653, 0x53858160588a2df9], + [0xc81d19a9a171d653, 0x7bebf49bda9a22c8], + [0x891fa3a8d1958e51, 0x198897f8b8f941ab], + [0xc26896f718f2b43f, 0x91ed1797407899c6], + [0x59f00e3ee1094f95, 0x83ecbc0f9b1e0830], + [0x0af31fa74a8b8661, 0x137b885ff272c7ca], + [0x432ac886d834c0b6, 0xd2c7df11984c5970], + ], + ); + + let keys = [0xff; 24]; + check( + &unsafe { aes192::expand_key(&keys) }, + &[ + [0xffffffffffffffff, 0xffffffffffffffff], + [0xffffffffffffffff, 0xe8e9e9e917161616], + [0xe8e9e9e917161616, 0xe8e9e9e917161616], + [0xadaeae19bab8b80f, 0x525151e6454747f0], + [0xadaeae19bab8b80f, 0xc5c2d8ed7f7a60e2], + [0x2d2b3104686c76f4, 0xc5c2d8ed7f7a60e2], + [0x1712403f686820dd, 0x454311d92d2f672d], + [0xe8edbfc09797df22, 0x8f8cd3b7e7e4f36a], + [0xa2a7e2b38f88859e, 0x67653a5ef0f2e57c], + [0x2655c33bc1b13051, 0x6316d2e2ec9e577c], + [0x8bfb6d227b09885e, 0x67919b1aa620ab4b], + [0xc53679a929a82ed5, 0xa25343f7d95acba9], + [0x598e482fffaee364, 0x3a989acd1330b418], + ], + ); + + let keys = hex!("000102030405060708090a0b0c0d0e0f1011121314151617"); + check( + &unsafe { aes192::expand_key(&keys) }, + &[ + [0x0001020304050607, 0x08090a0b0c0d0e0f], + [0x1011121314151617, 0x5846f2f95c43f4fe], + [0x544afef55847f0fa, 0x4856e2e95c43f4fe], + [0x40f949b31cbabd4d, 0x48f043b810b7b342], + [0x58e151ab04a2a555, 0x7effb5416245080c], + [0x2ab54bb43a02f8f6, 0x62e3a95d66410c08], + [0xf501857297448d7e, 0xbdf1c6ca87f33e3c], + [0xe510976183519b69, 0x34157c9ea351f1e0], + [0x1ea0372a99530916, 0x7c439e77ff12051e], + [0xdd7e0e887e2fff68, 0x608fc842f9dcc154], + [0x859f5f237a8d5a3d, 0xc0c02952beefd63a], + [0xde601e7827bcdf2c, 0xa223800fd8aeda32], + [0xa4970a331a78dc09, 0xc418c271e3a41d5d], + ], + ); + + let keys = hex!("8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b"); + check( + &unsafe { aes192::expand_key(&keys) }, + &[ + [0x8e73b0f7da0e6452, 0xc810f32b809079e5], + [0x62f8ead2522c6b7b, 0xfe0c91f72402f5a5], + [0xec12068e6c827f6b, 0x0e7a95b95c56fec2], + [0x4db7b4bd69b54118, 0x85a74796e92538fd], + [0xe75fad44bb095386, 0x485af05721efb14f], + [0xa448f6d94d6dce24, 0xaa326360113b30e6], + [0xa25e7ed583b1cf9a, 0x27f939436a94f767], + [0xc0a69407d19da4e1, 0xec1786eb6fa64971], + [0x485f703222cb8755, 0xe26d135233f0b7b3], + [0x40beeb282f18a259, 0x6747d26b458c553e], + [0xa7e1466c9411f1df, 0x821f750aad07d753], + [0xca4005388fcc5006, 0x282d166abc3ce7b5], + [0xe98ba06f448c773c, 0x8ecc720401002202], + ], + ); +} + +#[test] +#[cfg_attr( + not(target_feature = "aes"), + ignore = "requires enabled `aes` target feature" +)] +fn aes256_expand_key_test() { + let keys = [0x00; 32]; + check( + &unsafe { aes256::expand_key(&keys) }, + &[ + [0x0000000000000000, 0x0000000000000000], + [0x0000000000000000, 0x0000000000000000], + [0x6263636362636363, 0x6263636362636363], + [0xaafbfbfbaafbfbfb, 0xaafbfbfbaafbfbfb], + [0x6f6c6ccf0d0f0fac, 0x6f6c6ccf0d0f0fac], + [0x7d8d8d6ad7767691, 0x7d8d8d6ad7767691], + [0x5354edc15e5be26d, 0x31378ea23c38810e], + [0x968a81c141fcf750, 0x3c717a3aeb070cab], + [0x9eaa8f28c0f16d45, 0xf1c6e3e7cdfe62e9], + [0x2b312bdf6acddc8f, 0x56bca6b5bdbbaa1e], + [0x6406fd52a4f79017, 0x553173f098cf1119], + [0x6dbba90b07767584, 0x51cad331ec71792f], + [0xe7b0e89c4347788b, 0x16760b7b8eb91a62], + [0x74ed0ba1739b7e25, 0x2251ad14ce20d43b], + [0x10f80a1753bf729c, 0x45c979e7cb706385], + ], + ); + + let keys = [0xff; 32]; + check( + &unsafe { aes256::expand_key(&keys) }, + &[ + [0xffffffffffffffff, 0xffffffffffffffff], + [0xffffffffffffffff, 0xffffffffffffffff], + [0xe8e9e9e917161616, 0xe8e9e9e917161616], + [0x0fb8b8b8f0474747, 0x0fb8b8b8f0474747], + [0x4a4949655d5f5f73, 0xb5b6b69aa2a0a08c], + [0x355858dcc51f1f9b, 0xcaa7a7233ae0e064], + [0xafa80ae5f2f75596, 0x4741e30ce5e14380], + [0xeca0421129bf5d8a, 0xe318faa9d9f81acd], + [0xe60ab7d014fde246, 0x53bc014ab65d42ca], + [0xa2ec6e658b5333ef, 0x684bc946b1b3d38b], + [0x9b6c8a188f91685e, 0xdc2d69146a702bde], + [0xa0bd9f782beeac97, 0x43a565d1f216b65a], + [0xfc22349173b35ccf, 0xaf9e35dbc5ee1e05], + [0x0695ed132d7b4184, 0x6ede24559cc8920f], + [0x546d424f27de1e80, 0x88402b5b4dae355e], + ], + ); + + let keys = hex!("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"); + check( + &unsafe { aes256::expand_key(&keys) }, + &[ + [0x0001020304050607, 0x08090a0b0c0d0e0f], + [0x1011121314151617, 0x18191a1b1c1d1e1f], + [0xa573c29fa176c498, 0xa97fce93a572c09c], + [0x1651a8cd0244beda, 0x1a5da4c10640bade], + [0xae87dff00ff11b68, 0xa68ed5fb03fc1567], + [0x6de1f1486fa54f92, 0x75f8eb5373b8518d], + [0xc656827fc9a79917, 0x6f294cec6cd5598b], + [0x3de23a75524775e7, 0x27bf9eb45407cf39], + [0x0bdc905fc27b0948, 0xad5245a4c1871c2f], + [0x45f5a66017b2d387, 0x300d4d33640a820a], + [0x7ccff71cbeb4fe54, 0x13e6bbf0d261a7df], + [0xf01afafee7a82979, 0xd7a5644ab3afe640], + [0x2541fe719bf50025, 0x8813bbd55a721c0a], + [0x4e5a6699a9f24fe0, 0x7e572baacdf8cdea], + [0x24fc79ccbf0979e9, 0x371ac23c6d68de36], + ], + ); + + let keys = hex!("603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4"); + check( + &unsafe { aes256::expand_key(&keys) }, + &[ + [0x603deb1015ca71be, 0x2b73aef0857d7781], + [0x1f352c073b6108d7, 0x2d9810a30914dff4], + [0x9ba354118e6925af, 0xa51a8b5f2067fcde], + [0xa8b09c1a93d194cd, 0xbe49846eb75d5b9a], + [0xd59aecb85bf3c917, 0xfee94248de8ebe96], + [0xb5a9328a2678a647, 0x983122292f6c79b3], + [0x812c81addadf48ba, 0x24360af2fab8b464], + [0x98c5bfc9bebd198e, 0x268c3ba709e04214], + [0x68007bacb2df3316, 0x96e939e46c518d80], + [0xc814e20476a9fb8a, 0x5025c02d59c58239], + [0xde1369676ccc5a71, 0xfa2563959674ee15], + [0x5886ca5d2e2f31d7, 0x7e0af1fa27cf73c3], + [0x749c47ab18501dda, 0xe2757e4f7401905a], + [0xcafaaae3e4d59b34, 0x9adf6acebd10190d], + [0xfe4890d1e6188d0b, 0x046df344706c631e], + ], + ); +} diff --git a/aes/src/x86/vaes256.rs b/aes/src/x86/vaes256.rs new file mode 100644 index 00000000..5664066f --- /dev/null +++ b/aes/src/x86/vaes256.rs @@ -0,0 +1 @@ +pub(super) mod encdec; diff --git a/aes/src/x86/vaes256/encdec.rs b/aes/src/x86/vaes256/encdec.rs new file mode 100644 index 00000000..303df83e --- /dev/null +++ b/aes/src/x86/vaes256/encdec.rs @@ -0,0 +1,83 @@ +use crate::x86::{Block30, Simd128RoundKeys, Simd256RoundKeys, arch::*}; +use cipher::inout::InOut; +use core::mem::MaybeUninit; + +#[target_feature(enable = "avx2")] +#[inline] +pub(crate) unsafe fn broadcast_keys( + keys: &Simd128RoundKeys, +) -> Simd256RoundKeys { + keys.map(|key| _mm256_broadcastsi128_si256(key)) +} + +#[target_feature(enable = "avx2,vaes")] +#[inline] +pub(crate) unsafe fn encrypt30( + keys: &Simd256RoundKeys, + blocks: InOut<'_, '_, Block30>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (iptr, optr) = blocks.into_raw(); + let iptr = iptr.cast::<__m256i>(); + let optr = optr.cast::<__m256i>(); + + let mut data: [MaybeUninit<__m256i>; 15] = unsafe { MaybeUninit::uninit().assume_init() }; + + (0..15).for_each(|i| { + data[i].write(unsafe { iptr.add(i).read_unaligned() }); + }); + let mut data: [__m256i; 15] = unsafe { ::core::mem::transmute(data) }; + + for vec in &mut data { + *vec = _mm256_xor_si256(*vec, keys[0]); + } + for key in &keys[1..KEYS - 1] { + for vec in &mut data { + *vec = _mm256_aesenc_epi128(*vec, *key); + } + } + for vec in &mut data { + *vec = _mm256_aesenclast_epi128(*vec, keys[KEYS - 1]); + } + + (0..15).for_each(|i| { + unsafe { optr.add(i).write_unaligned(data[i]) }; + }); +} + +#[target_feature(enable = "avx2,vaes")] +#[inline] +pub(crate) unsafe fn decrypt30( + keys: &Simd256RoundKeys, + blocks: InOut<'_, '_, Block30>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (iptr, optr) = blocks.into_raw(); + let iptr = iptr.cast::<__m256i>(); + let optr = optr.cast::<__m256i>(); + + let mut data: [MaybeUninit<__m256i>; 15] = unsafe { MaybeUninit::uninit().assume_init() }; + + (0..15).for_each(|i| { + data[i].write(unsafe { iptr.add(i).read_unaligned() }); + }); + let mut data: [__m256i; 15] = unsafe { ::core::mem::transmute(data) }; + + for vec in &mut data { + *vec = _mm256_xor_si256(*vec, keys[0]); + } + for key in &keys[1..KEYS - 1] { + for vec in &mut data { + *vec = _mm256_aesdec_epi128(*vec, *key); + } + } + for vec in &mut data { + *vec = _mm256_aesdeclast_epi128(*vec, keys[KEYS - 1]); + } + + (0..15).for_each(|i| { + unsafe { optr.add(i).write_unaligned(data[i]) }; + }); +} diff --git a/aes/src/x86/vaes512.rs b/aes/src/x86/vaes512.rs new file mode 100644 index 00000000..5664066f --- /dev/null +++ b/aes/src/x86/vaes512.rs @@ -0,0 +1 @@ +pub(super) mod encdec; diff --git a/aes/src/x86/vaes512/encdec.rs b/aes/src/x86/vaes512/encdec.rs new file mode 100644 index 00000000..f74358a6 --- /dev/null +++ b/aes/src/x86/vaes512/encdec.rs @@ -0,0 +1,85 @@ +#![allow(unsafe_op_in_unsafe_fn)] + +use crate::x86::{Block64, Simd128RoundKeys, Simd512RoundKeys, arch::*}; +use cipher::inout::InOut; +use core::mem::MaybeUninit; + +#[target_feature(enable = "avx512f")] +#[inline] +pub(crate) unsafe fn broadcast_keys( + keys: &Simd128RoundKeys, +) -> Simd512RoundKeys { + keys.map(|key| _mm512_broadcast_i32x4(key)) +} + +#[target_feature(enable = "avx512f,vaes")] +#[inline] +pub(crate) unsafe fn encrypt64( + keys: &Simd512RoundKeys, + blocks: InOut<'_, '_, Block64>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (iptr, optr) = blocks.into_raw(); + let iptr = iptr.cast::<__m512i>(); + let optr = optr.cast::<__m512i>(); + + let mut data: [MaybeUninit<__m512i>; 16] = MaybeUninit::uninit().assume_init(); + + (0..16).for_each(|i| { + data[i].write(iptr.add(i).read_unaligned()); + }); + let mut data: [__m512i; 16] = unsafe { ::core::mem::transmute(data) }; + + for vec in &mut data { + *vec = _mm512_xor_si512(*vec, keys[0]); + } + for key in &keys[1..KEYS - 1] { + for vec in &mut data { + *vec = _mm512_aesenc_epi128(*vec, *key); + } + } + for vec in &mut data { + *vec = _mm512_aesenclast_epi128(*vec, keys[KEYS - 1]); + } + + (0..16).for_each(|i| { + optr.add(i).write_unaligned(data[i]); + }); +} + +#[target_feature(enable = "avx512f,vaes")] +#[inline] +pub(crate) unsafe fn decrypt64( + keys: &Simd512RoundKeys, + blocks: InOut<'_, '_, Block64>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (iptr, optr) = blocks.into_raw(); + let iptr = iptr.cast::<__m512i>(); + let optr = optr.cast::<__m512i>(); + + let mut data: [MaybeUninit<__m512i>; 16] = MaybeUninit::uninit().assume_init(); + + (0..16).for_each(|i| { + data[i].write(iptr.add(i).read_unaligned()); + }); + let mut data: [__m512i; 16] = unsafe { ::core::mem::transmute(data) }; + + for vec in &mut data { + *vec = _mm512_xor_si512(*vec, keys[0]); + } + for key in &keys[1..KEYS - 1] { + for vec in &mut data { + *vec = _mm512_aesdec_epi128(*vec, *key); + } + } + for vec in &mut data { + *vec = _mm512_aesdeclast_epi128(*vec, keys[KEYS - 1]); + } + + (0..16).for_each(|i| { + optr.add(i).write_unaligned(data[i]); + }); +}