From 2b2ad090f63bba9a261d572238af2706b32b8aa9 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 12:42:54 +0100 Subject: [PATCH 01/19] Save the version before running the x86 generator --- Cargo.lock | 15 +- Cargo.toml | 2 + fearless_simd_core/Cargo.toml | 17 + fearless_simd_core/gen/Cargo.toml | 6 + fearless_simd_core/gen/src/data.rs | 2 + fearless_simd_core/gen/src/data/x86.rs | 370 ++++++++++++++++++++ fearless_simd_core/gen/src/main.rs | 166 +++++++++ fearless_simd_core/gen/templates/aarch64.rs | 0 fearless_simd_core/gen/templates/x86.rs | 90 +++++ fearless_simd_core/src/lib.rs | 282 +++++++++++++++ fearless_simd_core/src/trampoline.rs | 231 ++++++++++++ fearless_simd_core/src/x86/mod.rs | 15 + fearless_simd_core/src/x86/v1/fxsr.rs | 80 +++++ fearless_simd_core/src/x86/v1/mod.rs | 38 ++ fearless_simd_core/src/x86/v1/sse.rs | 90 +++++ 15 files changed, 1402 insertions(+), 2 deletions(-) create mode 100644 fearless_simd_core/Cargo.toml create mode 100644 fearless_simd_core/gen/Cargo.toml create mode 100644 fearless_simd_core/gen/src/data.rs create mode 100644 fearless_simd_core/gen/src/data/x86.rs create mode 100644 fearless_simd_core/gen/src/main.rs create mode 100644 fearless_simd_core/gen/templates/aarch64.rs create mode 100644 fearless_simd_core/gen/templates/x86.rs create mode 100644 fearless_simd_core/src/lib.rs create mode 100644 fearless_simd_core/src/trampoline.rs create mode 100644 fearless_simd_core/src/x86/mod.rs create mode 100644 fearless_simd_core/src/x86/v1/fxsr.rs create mode 100644 fearless_simd_core/src/x86/v1/mod.rs create mode 100644 fearless_simd_core/src/x86/v1/sse.rs diff --git a/Cargo.lock b/Cargo.lock index 161950a6..672a0913 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -60,9 +60,9 @@ checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" [[package]] name = "bytemuck" -version = "1.23.1" +version = "1.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422" +checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" [[package]] name = "cc" @@ -133,6 +133,17 @@ dependencies = [ "libm", ] +[[package]] +name = "fearless_simd_core" +version = "0.1.0" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "fearless_simd_core_gen" +version = "0.1.0" + [[package]] name = "fearless_simd_dev_macros" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 81395978..e84d0a2e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,8 @@ resolver = "2" members = [ "fearless_simd", + "fearless_simd_core", + "fearless_simd_core/gen", "fearless_simd_dev_macros", "fearless_simd_gen", "fearless_simd_tests", diff --git a/fearless_simd_core/Cargo.toml b/fearless_simd_core/Cargo.toml new file mode 100644 index 00000000..e16a9823 --- /dev/null +++ b/fearless_simd_core/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "fearless_simd_core" +version = "0.1.0" +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dev-dependencies] +bytemuck = { version = "1.24.0", features = ["must_cast"] } + +[lints] +workspace = true + +[features] +default = ["std"] +std = [] diff --git a/fearless_simd_core/gen/Cargo.toml b/fearless_simd_core/gen/Cargo.toml new file mode 100644 index 00000000..5617f7be --- /dev/null +++ b/fearless_simd_core/gen/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "fearless_simd_core_gen" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/fearless_simd_core/gen/src/data.rs b/fearless_simd_core/gen/src/data.rs new file mode 100644 index 00000000..87b9ec46 --- /dev/null +++ b/fearless_simd_core/gen/src/data.rs @@ -0,0 +1,2 @@ +mod x86; +pub(crate) use x86::{X86_FEATURES, X86_TEMPLATE}; diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs new file mode 100644 index 00000000..39e59990 --- /dev/null +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -0,0 +1,370 @@ +use crate::Feature; + +macro_rules! f { + ($(#[doc = $doc_addition: literal])* + struct ::$module: ident:: $struct_name: ident($display_name: literal): $feature_name: literal + [$($implicitly_enabled: literal),*] + fn $example_function_name: ident + $($additional_impls: tt)* + ) => { + Feature { + struct_name: stringify!($struct_name), + feature_name: $feature_name, + directly_implicitly_enabled: &[$($implicitly_enabled),*], + extra_docs: concat!($($doc_addition, "\n",)*), + example_function_name: stringify!($example_function_name), + feature_docs_name: $display_name, + additional_impls: stringify!($($additional_impls)*), + module: stringify!($module) + } + } +} + +pub(crate) const X86_TEMPLATE: &str = include_str!("../../templates/x86.rs"); + +// Data taken from: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86 +// (specifically, at https://github.com/rust-lang/reference/blob/1d930e1d5a27e114b4d22a50b0b6cd3771b92e31/src/attributes/codegen.md#x86-or-x86_64) +// TODO: Do we need to add their license attribution to our license? +// TODO: Check set against https://doc.rust-lang.org/stable/std/macro.is_x86_feature_detected.html +// In particular, we're missing lahfsahf +pub(crate) const X86_FEATURES: &[Feature] = &[ + f!( + /// [ADX] --- Multi-Precision Add-Carry Instruction Extensions + /// [ADX]: https://en.wikipedia.org/wiki/Intel_ADX + struct ::adx::Adx("ADX"): "adx" + [] + fn uses_adx + ), + f!( + /// [AES] --- Advanced Encryption Standard + /// [AES]: https://en.wikipedia.org/wiki/AES_instruction_set + struct ::crypto::Aes("AES"): "aes" + ["sse2"] + fn uses_aes + ), + f!( + /// [AVX] --- Advanced Vector Extensions + /// [AVX]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions + struct ::avx::Avx("AVX"): "avx" + ["sse4.2"] + fn uses_avx + ), + f!( + /// [AVX2] --- Advanced Vector Extensions 2 + /// [AVX2]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX2 + struct ::avx::Avx2("AVX2"): "avx2" + ["avx"] + fn uses_avx2 + ), + f!( + /// [AVX512-BF16] --- Advanced Vector Extensions 512-bit - Bfloat16 Extensions + /// [AVX512-BF16]: https://en.wikipedia.org/wiki/AVX-512#BF16 + struct ::avx512::Avx512bf16("AVX512-BF16"): "avx512bf16" + ["avx512bw"] + fn uses_avx512bf16 + ), + f!( + /// [AVX512-BITALG] --- Advanced Vector Extensions 512-bit - Bit Algorithms + /// [AVX512-BITALG]: https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG + struct ::avx512::Avx512bitalg("AVX512-BITALG"): "avx512bitalg" + ["avx512bw"] + fn uses_avx512bitalg + ), + f!( + /// [AVX512-BW] --- Advanced Vector Extensions 512-bit - Byte and Word Instructions + /// [AVX512-BW]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI + struct ::avx512::Avx512bw("AVX512-BW"): "avx512bw" + ["avx512f"] + fn uses_avx512bw + ), + f!( + /// [AVX512-CD] --- Advanced Vector Extensions 512-bit - Conflict Detection Instructions + /// [AVX512-CD]: https://en.wikipedia.org/wiki/AVX-512#Conflict_detection + struct ::avx512::Avx512cd("AVX512-CD"): "avx512cd" + ["avx512f"] + fn uses_avx512cd + ), + f!( + /// [AVX512-DQ] --- Advanced Vector Extensions 512-bit - Doubleword and Quadword Instructions + /// [AVX512-DQ]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI + struct ::avx512::Avx512dq("AVX512-DQ"): "avx512dq" + ["avx512f"] + fn uses_avx512dq + ), + f!( + /// [AVX512-F] --- Advanced Vector Extensions 512-bit - Foundation + /// [AVX512-F]: https://en.wikipedia.org/wiki/AVX-512 + struct ::avx512::Avx512f("AVX512-F"): "avx512f" + ["avx2", "fma", "f16c"] + fn uses_avx512f + ), + f!( + /// [AVX512-FP16] --- Advanced Vector Extensions 512-bit - Float16 Extensions + /// [AVX512-FP16]: https://en.wikipedia.org/wiki/AVX-512#FP16 + struct ::avx512::Avx512fp16("AVX512-FP16"): "avx512fp16" + ["avx512bw"] + fn uses_avx512fp16 + ), + f!( + /// [AVX512-IFMA] --- Advanced Vector Extensions 512-bit - Integer Fused Multiply Add + /// [AVX512-IFMA]: https://en.wikipedia.org/wiki/AVX-512#IFMA + struct ::avx512::Avx512ifma("AVX512-IFMA"): "avx512ifma" + ["avx512f"] + fn uses_avx512ifma + ), + f!( + /// [AVX512-VBMI] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions + /// [AVX512-VBMI]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI + struct ::avx512::Avx512vbmi("AVX512-VBMI"): "avx512vbmi" + ["avx512bw"] + fn uses_avx512vbmi + ), + f!( + /// [AVX512-VBMI2] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions 2 + /// [AVX512-VBMI2]: https://en.wikipedia.org/wiki/AVX-512#VBMI2 + struct ::avx512::Avx512vbmi2("AVX512-VBMI2"): "avx512vbmi2" + ["avx512bw"] + fn uses_avx512vbmi2 + ), + f!( + /// [AVX512-VL] --- Advanced Vector Extensions 512-bit - Vector Length Extensions + /// [AVX512-VL]: https://en.wikipedia.org/wiki/AVX-512 + struct ::avx512::Avx512vl("AVX512-VL"): "avx512vl" + ["avx512f"] + fn uses_avx512vl + ), + f!( + /// [AVX512-VNNI] --- Advanced Vector Extensions 512-bit - Vector Neural Network Instructions + /// [AVX512-VNNI]: https://en.wikipedia.org/wiki/AVX-512#VNNI + struct ::avx512::Avx512vnni("AVX512-VNNI"): "avx512vnni" + ["avx512f"] + fn uses_avx512vnni + ), + f!( + /// [AVX512-VP2INTERSECT] --- Advanced Vector Extensions 512-bit - Vector Pair Intersection to a Pair of Mask Registers + /// [AVX512-VP2INTERSECT]: https://en.wikipedia.org/wiki/AVX-512#VP2INTERSECT + struct ::avx512::Avx512vp2intersect("AVX512-VP2INTERSECT"): "avx512vp2intersect" + ["avx512f"] + fn uses_avx512vp2intersect + ), + f!( + /// [AVX512-VPOPCNTDQ] --- Advanced Vector Extensions 512-bit - Vector Population Count Instruction + /// [AVX512-VPOPCNTDQ]:https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG + struct ::avx512::Avx512vpopcntdq("AVX512-VPOPCNTDQ"): "avx512vpopcntdq" + ["avx512f"] + fn uses_avx512vpopcntdq + ), + f!( + /// [AVX-IFMA] --- Advanced Vector Extensions - Integer Fused Multiply Add + /// [AVX-IFMA]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct ::avx::Avxifma("AVX-IFMA"): "avxifma" + ["avx2"] + fn uses_avxifma + ), + f!( + /// [AVX-NE-CONVERT] --- Advanced Vector Extensions - No-Exception Floating-Point conversion Instructions + /// [AVX-NE-CONVERT]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct ::avx::Avxneconvert("AVX-NE-CONVERT"): "avxneconvert" + ["avx2"] + fn uses_avxneconvert + ), + f!( + /// [AVX-VNNI] --- Advanced Vector Extensions - Vector Neural Network Instructions + /// [AVX-VNNI]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct ::avx::Avxvnni("AVX-VNNI"): "avxvnni" + ["avx2"] + fn uses_avxvnni + ), + f!( + /// [AVX-VNNI-INT16] --- Advanced Vector Extensions - Vector Neural Network Instructions with 16-bit Integers + /// [AVX-VNNI-INT16]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct ::avx::Avxvnniint16("AVX-VNNI-INT16"): "avxvnniint16" + ["avx2"] + fn uses_avxvnniint16 + ), + f!( + /// [AVX-VNNI-INT8] --- Advanced Vector Extensions - Vector Neural Network Instructions with 8-bit Integers + /// [AVX-VNNI-INT8]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct ::avx::Avxvnniint8("AVX-VNNI-INT8"): "avxvnniint8" + ["avx2"] + fn uses_avxvnniint8 + ), + f!( + /// [BMI1] --- Bit Manipulation Instruction Sets + /// [BMI1]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets + struct ::v3::Bmi1(" 1"): "bmi1" + [] + fn uses_bmi1 + ), + f!( + /// [BMI2] --- Bit Manipulation Instruction Sets 2 + /// [BMI2]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2 + struct ::v3::Bmi2("BMI2"): "bmi2" + [] + fn uses_bmi2 + ), + f!( + /// ["cmpxchg16b"] --- Compares and exchange 16 bytes (128 bits) of data atomically + /// ["cmpxchg16b"]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + struct ::v2::Cmpxchg16b("`cmpxchg16b`"): "cmpxchg16b" + [] + fn uses_cmpxchg16b + ), + f!( + /// [F16C] --- 16-bit floating point conversion instructions + /// [F16C]: https://en.wikipedia.org/wiki/F16C + struct ::v3::F16c("F16C"): "f16c" + ["avx"] + fn uses_f16c + ), + f!( + /// [FMA3] --- Three-operand fused multiply-add + /// [FMA3]: https://en.wikipedia.org/wiki/FMA_instruction_set + struct ::v3::Fma("FMA3"): "fma" + ["avx"] + fn uses_fma + ), + f!( + /// ["fxsave"] and ["fxrstor"] --- Save and restore x87 FPU, MMX Technology, and SSE State + /// ["fxsave"]: https://www.felixcloutier.com/x86/fxsave, + struct ::sse::Fxsr("`fxsave + fxrstor`"): "fxsr" + [] + fn uses_fxsr + ), + f!( + /// [GFNI] --- Galois Field New Instructions + /// [GFNI]: https://en.wikipedia.org/wiki/AVX-512#GFNI + struct ::crypto::Gfni("GFNI"): "gfni" + ["sse2"] + fn uses_gfni + ), + f!( + /// [KEYLOCKER] --- Intel Key Locker Instructions + /// [KEYLOCKER]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions + struct ::crypto::Keylocker("KEYLOCKER"): "kl" + [] + fn uses_keylocker + ), + f!( + /// ["lzcnt"] --- Leading zeros count + /// ["lzcnt"]: https://www.felixcloutier.com/x86/lzcnt + struct ::v3::Lzcnt("`lzcnt`"): "lzcnt" + [] + fn uses_lzcnt + ), + f!( + /// ["movbe"] --- Move data after swapping bytes + /// ["movbe"]: https://www.felixcloutier.com/x86/movbe + struct ::v3::Movbe("`movbe`"): "movbe" + [] + fn uses_movbe + ), + f!( + /// ["pclmulqdq"] --- Packed carry-less multiplication quadword + /// ["pclmulqdq"]: https://www.felixcloutier.com/x86/pclmulqdq + struct ::crypto::Pclmulqdq("`pclmulqdq`"): "pclmulqdq" + ["sse2"] + fn uses_pclmulqdq + ), + f!( + /// ["popcnt"] --- Count of bits set to 1 + /// ["popcnt"]: https://www.felixcloutier.com/x86/popcnt + struct ::v2::Popcnt("`popcnt`"): "popcnt" + [] + fn uses_popcnt + ), + f!( + /// ["rdrand"] --- Read random number + /// ["rdrand"]: https://en.wikipedia.org/wiki/RdRand + struct ::crypto::Rdrand("`rdrand`"): "rdrand" + [] + fn uses_rdrand + ), + f!( + /// ["rdseed"] --- Read random seed + /// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand + struct ::crypto::Rdseed("`rdseed"): "rdseed" + [] + fn uses_rdseed + ), + f!( + /// [SHA] --- Secure Hash Algorithm + /// [SHA]: https://en.wikipedia.org/wiki/Intel_SHA_extensions + struct ::crypto::Sha("SHA"): "sha" + ["sse2"] + fn uses_sha + ), + f!( + /// [SHA512] --- Secure Hash Algorithm with 512-bit digest + /// [SHA512]: https://en.wikipedia.org/wiki/Intel_SHA_extensions + struct ::crypto::Sha512("SHA512"): "sha512" + ["avx2"] + fn uses_sha512 + ), + f!( + /// [SM3] --- ShangMi 3 Hash Algorithm + /// [SM3]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions + struct ::crypto::Sm3("SM3"): "sm3" + ["avx"] + fn uses_sm3 + ), + f!( + /// [SM4] --- ShangMi 4 Cipher Algorithm + /// [SM4]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions + struct ::crypto::Sm4("SM4"): "sm4" + ["avx2"] + fn uses_sm4 + ), + f!( + /// [SSE] --- Streaming SIMD Extensions + /// [SSE]: https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions + struct ::sse::Sse("SSE"): "sse" + [] + fn uses_sse + ), + f!( + /// [SSE2] --- Streaming SIMD Extensions 2 + /// [SSE2]: https://en.wikipedia.org/wiki/SSE2 + struct ::sse::Sse2("SSE2"): "sse2" + ["sse"] + fn uses_sse2 + ), + f!( + /// [SSE3] --- Streaming SIMD Extensions 3 + /// [SSE3]: https://en.wikipedia.org/wiki/SSE3 + struct ::sse::Sse3("SSE3"): "sse3" + ["sse2"] + fn uses_sse3 + ), + f!( + /// [SSE4.1] --- Streaming SIMD Extensions 4.1 + /// [SSE4.1]: https://en.wikipedia.org/wiki/SSE4#SSE4.1 + struct ::sse::Sse4_1("SSE4.1"): "sse4.1" + ["ssse3"] + fn uses_sse4 + ), + f!( + /// [SSE4.2] --- StreamingSIMDExtensions 4.2 + /// [SSE4.2]: https://en.wikipedia.org/wiki/SSE4#SSE4.2 + struct ::sse::Sse4_2("SSE4.2"): "sse4.2" + ["sse4.1"] + fn uses_sse4 + ), + // // TODO: This only exists from 1.91 and above (current beta) + // f!( + // /// [SSE4a] --- StreamingSIMDExtensions 4a + // /// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a + // struct Sse4a("SSE4a"): "sse4a" + ["sse3"] + // fn uses_sse4a + // ), + f!( + /// [SSSE3] --- Supplemental StreamingSIMDExtensions 3 + /// [SSSE3]: https://en.wikipedia.org/wiki/SSSE3 + struct ::sse::SupplementalSse3("SSSE3"): "ssse3" + ["sse3"] + fn uses_ssse3 + ), + f!( + /// [TBM] --- Trailing Bit Manipulation + /// [TBM]: https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#TBM_(Trailing_Bit_Manipulation) + struct ::discontinued::Tbm("TBM"): "tbm" + [] + fn uses_tbm + ), + f!( + /// [VAES] --- Vector AES Instructions + /// [VAES]: https://en.wikipedia.org/wiki/AVX-512#VAES + struct ::crypto::Vaes("VAES"): "vaes" + ["avx2", "aes"] + fn uses_vaes + ), + f!( + /// [VPCLMULQDQ] --- Vector Carry-less multiplication of Quadwords + /// [VPCLMULQDQ]: https://en.wikipedia.org/wiki/AVX-512#VPCLMULQDQ + struct ::crypto::Vpclmulqdq("VPCLMULQDQ"): "vpclmulqdq" + ["avx", "pclmulqdq"] + fn uses_vpclmulqdq + ), + f!( + /// [KEYLOCKER_WIDE] --- Intel Wide Keylocker Instructions + /// [KEYLOCKER_WIDE]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions + struct ::crypto::WideKeylocker("KEYLOCKER_WIDE"): "widekl" + ["kl"] + fn uses_wide_keylocker + ), + f!( + /// [`xsave`] --- Save processor extended states + /// ["xsave"]: https://www.felixcloutier.com/x86/xsave + struct ::xsave::Xsave("`xsave`"): "xsave" + [] + fn uses_xsave + ), + f!( + /// ["xsavec"] --- Save processor extended states with compaction + /// ["xsavec"]: https://www.felixcloutier.com/x86/xsavec + struct ::xsave::Xsavec("`xsavec`"): "xsavec" + [] + fn uses_xsavec + ), + f!( + /// ["xsaveopt"] --- Save processor extended states optimized + /// ["xsaveopt"]: https://www.felixcloutier.com/x86/xsaveopt + struct ::xsave::Xsaveopt("`xsaveopt`"): "xsaveopt" + [] + fn uses_xsaveopt + ), + f!( + /// ["xsaves"] --- Save processor extended states supervisor + /// ["xsaves"]: https://www.felixcloutier.com/x86/xsaves + struct ::xsave::Xsaves("`xsaves`"): "xsaves" + [] + fn uses_xsaves + ), +]; + +#[test] +fn all_features_included() {} diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs new file mode 100644 index 00000000..2961bd33 --- /dev/null +++ b/fearless_simd_core/gen/src/main.rs @@ -0,0 +1,166 @@ +mod data; + +use std::fmt::{Write, format}; +use std::fs; +use std::{ + cell::RefCell, + collections::HashMap, + fs::create_dir_all, + io, + path::{Path, PathBuf}, +}; + +fn main() { + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let src_dir = manifest_dir.ancestors().nth(1).unwrap().join("src"); + generate_for_arch(&src_dir, "x86", data::X86_TEMPLATE, data::X86_FEATURES).unwrap(); +} + +fn generate_for_arch( + root_dir: &Path, + arch_module_name: &str, + template: &str, + features: &'static [Feature], +) -> io::Result<()> { + let arch_dir = root_dir.join(arch_module_name); + let features = normalize_features(features); + for feature in &features { + let mut new_docs = String::new(); + for line in feature.feature.extra_docs.lines() { + writeln!(&mut new_docs, "///{line}").unwrap(); + } + let enabled_feature_docs = format!("`{}`", feature.children.join("`, `")); + let enabled_feature_str_list = format!(r#""{}""#, feature.children.join(r#"", ""#)); + let mut from_impls = String::new(); + for child in &feature.children { + let from_feature = features + .iter() + .find(|it| it.feature.feature_name == *child) + .unwrap(); + let type_path = format!( + "crate::{arch_module_name}::{}::{}", + from_feature.feature.module, from_feature.feature.struct_name + ); + write!( + from_impls, + r#"\n\ + impl From for {type_path} {{ + fn from(value: Self) -> {type_path} {{ + trampoline!([Self = value] => "{{FEATURE_ID}}", fn() -> {type_path} {{ {{type_path}}::new() }}) + }} + }}\n + "# + ).unwrap(); + } + let mut result = format!( + "// This file is automatically generated by `fearless_simd_core_gen`.\n\ + // Its template can be found in `fearless_simd_core/gen/templates`.\n\n\ + {template}" + ); + // We replace the from impls first, as they use template variables from the rest of this. + result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace("{FEATURE_DOCS_NAME}", feature.feature.feature_docs_name); + result = result.replace("/// {NEW_DOCS}", &new_docs); + result = result.replace("{FEATURE_ID}", feature.feature.feature_name); + result = result.replace("{ENABLED_FEATURES_DOCS_LIST}", &enabled_feature_docs); + result = result.replace( + "{EXAMPLE_FUNCTION_NAME}", + feature.feature.example_function_name, + ); + result = result.replace("FEATURE_STRUCT_NAME", feature.feature.struct_name); + result = result.replace("{ENABLED_FEATURES_STR_LIST}", &enabled_feature_str_list); + let module_dir = arch_dir.join(feature.feature.module); + create_dir_all(&module_dir)?; + let mut file = module_dir.join(feature.feature.feature_name); + file.set_extension("rs"); + fs::write(file, result)?; + } + Ok(()) +} + +#[derive(Debug)] +struct Feature { + /// The name of the struct to be generated. + struct_name: &'static str, + /// The Rust name for the feature, e.g. `"sse"`. + feature_name: &'static str, + /// The array of features which are implicitly enabled by this feature. + /// Note that this array does not include transitive enabled features. + directly_implicitly_enabled: &'static [&'static str], + /// Any additional docs which we want to add to the module. + extra_docs: &'static str, + /// The name of the function used in the examples. + /// Ideally, we'd make this optional, but that starts making the templating look more complicated. + example_function_name: &'static str, + /// The "display name" for the feature, used inside the docs. + feature_docs_name: &'static str, + /// Extra code added at the end. + /// Used for implicitly enabled features. + additional_impls: &'static str, + /// The module (if any) this feature will belong to. + /// + /// (Note that imports into the module are checked to exist, but not automatically inserted). + module: &'static str, +} + +/// Implementation detail intermediate struct of `normalize_features`. +struct MaybeNormalizedFeature { + /// The actual feature. + feature: &'static Feature, + /// The fully deduplicated, sorted list of target features enabled by this feature, including with all + /// implicitly enabled features resolved. + /// + /// Note that this *excludes* the parent target feature. + // We use a RefCell here as we know there cannot be loops. + children: RefCell>>, +} + +#[derive(Debug)] +struct NormalizedFeature { + feature: &'static Feature, + children: Vec<&'static str>, +} + +fn normalize_features(features: &'static [Feature]) -> Vec { + let mut state = HashMap::new(); + for feature in features { + state.insert( + feature.feature_name, + MaybeNormalizedFeature { + feature, + children: RefCell::new(None), + }, + ); + } + fn handle_item(state: &HashMap<&str, MaybeNormalizedFeature>, item: &MaybeNormalizedFeature) { + // We borrow for the entire lifetime to avoid infinite loops. + let mut borrowed_children = item.children.borrow_mut(); + if borrowed_children.is_some() { + return; + } + let mut new_children = Vec::new(); + for child in item.feature.directly_implicitly_enabled { + new_children.push(*child); + let child = state + .get(child) + .expect("Every implicitly enabled feature should exist."); + handle_item(state, child); + new_children.extend_from_slice(child.children.borrow().as_ref().unwrap()); + } + new_children.sort(); + new_children.dedup(); + *borrowed_children = Some(new_children); + } + for feature in state.values() { + handle_item(&state, feature); + } + let mut output = Vec::new(); + for (_, feature) in state { + output.push(NormalizedFeature { + feature: feature.feature, + children: feature.children.into_inner().unwrap(), + }); + } + output.sort_by_key(|it| it.feature.feature_name); + output +} diff --git a/fearless_simd_core/gen/templates/aarch64.rs b/fearless_simd_core/gen/templates/aarch64.rs new file mode 100644 index 00000000..e69de29b diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs new file mode 100644 index 00000000..a37207a3 --- /dev/null +++ b/fearless_simd_core/gen/templates/x86.rs @@ -0,0 +1,90 @@ +//! The {FEATURE_DOCS_NAME} target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// {NEW_DOCS} +/// +/// A token indicating that the current CPU has the `{FEATURE_ID}` target feature. +/// +/// This feature also enables {ENABLED_FEATURES_DOCS_LIST}; +/// the tokens for these features can be created using [`From`] implementations. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "{FEATURE_ID}")] +/// fn {EXAMPLE_FUNCTION_NAME}() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct FEATURE_STRUCT_NAME { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for FEATURE_STRUCT_NAME { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""{FEATURE_ID}" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse { + const FEATURES: &[&str] = &[{ ENABLED_FEATURES_STR_LIST }]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + trampoline!([Self = self] => "{FEATURE_ID}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse { + #[cfg(feature = "std")] + /// Create a new token if the `"{FEATURE_ID}"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("{FEATURE_ID}") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "{FEATURE_ID}")] + /// Create a new token for the "{FEATURE_ID}" target feature is enabled. + /// + /// This method is useful to get a new token if you have an external proof that + /// {FEATURE_ID} is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "{FEATURE_DOCS_NAME}" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} +/*{FROM_IMPLS}*/ + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs new file mode 100644 index 00000000..e212b240 --- /dev/null +++ b/fearless_simd_core/src/lib.rs @@ -0,0 +1,282 @@ +//! Tooling for Rust's target features. + +// LINEBENDER LINT SET - lib.rs - v4 +// See https://linebender.org/wiki/canonical-lints/ +// These lints shouldn't apply to examples or tests. +#![cfg_attr(not(test), warn(unused_crate_dependencies))] +// These lints shouldn't apply to examples. +#![warn(clippy::print_stdout, clippy::print_stderr)] +// Targeting e.g. 32-bit means structs containing usize can give false positives for 64-bit. +#![cfg_attr(target_pointer_width = "64", warn(clippy::trivially_copy_pass_by_ref))] +// END LINEBENDER LINT SET +#![cfg_attr(docsrs, feature(doc_cfg))] +#![no_std] + +// TODO: Do we want both an `x86` and `x86_64` module? +#[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))] +pub mod x86; + +pub mod trampoline; + +#[cfg(feature = "std")] +extern crate std; + +/// Token that a set of target feature is available. +/// +/// Note that this trait is only meaningful when there are values of this type. +/// That is, to enable the target features in `FEATURES`, you *must* have a value +/// of this type. +/// +/// Values which implement this trait are used in the second argument to [`trampoline!`], +/// which is a safe abstraction over enabling target features. +/// +/// # Safety +/// +/// To construct a value of a type implementing this trait, you must have proven that each +/// target feature in `FEATURES` is available. +pub unsafe trait TargetFeatureToken: Copy { + /// The set of target features which are enabled for this run, if + /// you have a value of this type. + const FEATURES: &[&str]; + + /// Enable the target features in `FEATURES` for a single run of `f`, and run it. + /// + /// `f` must be marked `#[inline(always)]` for this to work. + /// + /// Note that this does *not* enable the target features on the Rust side (e.g. for calling). + /// To do so, you should instead use [`trampoline!`] directly - this is a convenience wrapper around `trampoline` + /// for cases where the dispatch of simd values is handled elsewhere. + fn vectorize(self, f: impl FnOnce() -> R) -> R; +} + +/// Run an operation in a context with specific target features enabled, validated with [`TargetFeatureToken`] values. +/// +/// This is effectively a stable implementation of the "Struct Target Features" Rust feature, +/// which at the time of writing is neither in stable or nightly Rust. +/// This macro can be used to make SIMD dispatch safe in addition to make explicit SIMD, both safely. +/// +/// # Reference +/// +/// These reference examples presume that you have (values in brackets are the "variables"): +/// +/// - An expression (`token`) of a type (`Token`) which is `TargetFeatureToken` for some target features (`"f1,f2,f3"`); +/// - A function (signature `fn uses_simd(val: [f32; 4]) -> [f32; 4]`) which is safe but enables a subset of those target features (`"f1,f2"`); +/// - Local values of types corresponding to the argument types (`a` of type `[f32; 4]`) +/// +/// ```rust,ignore +/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// ``` +/// +/// Multiple tokens are also supported by providing them in a sequence in square brackets: +/// +/// ```rust,ignore +/// trampoline!([Token = token, Sse = my_sse] => "f1,f2,sse", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// ``` +/// +/// A more advanced syntax is available if you need to use generics. +/// That syntax is explained in comments around the macro's definition, which can be seen above. +/// For reference, the implementation used to implement [`vectorize`](TargetFeatureToken::vectorize) for `"sse"` is: +/// +/// ```rust,ignore +/// trampoline!([Self = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) +/// ``` +/// +/// There is also support for where clauses after the return type. +/// +/// # Motivation +/// +/// In Fearless SIMD, this macro has two primary use cases: +/// +/// 1) To dispatch to a specialised SIMD implementation of a function using target specific +/// instructions which will be more efficient than generic version written using the portable subset. +/// 2) To implement the portable subset of SIMD operations. +/// +/// To expand on use case 1, when using Fearless SIMD you will often be writing functions which are +/// instantiated for multiple different SIMD levels (using generics). +/// However, for certain SIMD levels, there may be specific instructions which solve your problem more +/// efficiently than using the generic implementations (as an example, consider SHA256 hashing, which has +/// built-in instructions on several architectures). +/// However, in such generic implementations, the Rust type system doesn't know which target features are enabled, +/// so it would ordinarily require writing code to: +/// +/// - detect whether a specific target feature is supported. +/// - unsafely, enter a context where the target feature is enabled in a way which makes the type system aware of this. +/// +/// This macro provides a way to do the second safely once you have completed the first. +/// +/// # Example +/// +/// This expands upon the example in the reference, written out completely. +/// +/// ```rust,ignore +/// // Just once, acquire a token. +/// let token = Token::try_new(); +/// // Later, dispatch based on whether that token is available, potentially multiple times: +/// +/// /// Perform some computation using SIMD. +/// #[target_feature(enable = "f1,f2")] +/// fn uses_simd(val: [f32; 4]) -> [f32; 4] { +/// // ... +/// } +/// +/// let a = [1., 2., 3., 4.]; +/// let Some(token) = token else { return scalar_fallback(a) }; +/// +/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// ``` +/// +/// Note that a function only operating on 128 bytes is probably too small for checking +/// whether a token exists just for it is worthwhile. +/// However, if you have amorphised the cost of that check between many function calls, +/// the `trampoline!` macro itself compiles down to a function call. +/// (This would be the case when this macro is being used to implement the portable subset of SIMD operations) +/// +// TODO: We could write an example for each of ARM, x86, and conditionally compile it in? +/// Note that our examples are all ignored as there is no target feature which is available on every platform, +/// but we need these docs to compile for users on any platform. +/// +/// # Soundness +/// +/// This macro is designed to be sound, i.e. no input to this macro can lead to undefined behaviour +/// without using the `unsafe` keyword. +/// +/// The operation provided will only ever be immediately called once on the same thread as the macro caller, +/// so safety justifications within the operation can rely on the context of the call site of this macro. +/// The shorthand format does not allow calling unsafe functions. +#[macro_export] +macro_rules! trampoline { + // [Sse = sse] for "sse", <(u32)> fn<(T: Int)>(a: [T; 4]) -> T where (...) {...} + ( + // The token types, with an expression to get a value of that token kind. + [$($token_type: path = $token: expr),+$(,)?] + // The target feature to enable. Must be a string literal. + => $to_enable: literal, + // The generic arguments to instantiate the call to the generated function with. + // Note the inner brackets, needed because we can't write a parser for this in macros. + $(<($($generic_instantiation: tt)+)>)? + // The generic parameters to give the inner generated function. + // Brackets needed as above. + fn$(<($($generic_args: tt)*)>)? + // The arguments to the function, with provided explicit values, plus return type and where clause. + ($($arg_name: ident: $arg_type: ty = $arg_value: expr),*$(,)?) $(-> $ret: ty)? + // The where clause of the generated function. + // Note the inner brackets after `where`, needed as above. + $(where ($($where: tt)*))? + // The operation to run inside the context with the target feature enabled. + $op: block + ) => {{ + #[target_feature(enable = $to_enable)] + #[inline] + // TODO: Do we want any other attributes here? + // Soundness: We wrap the $op in a wrapping block, to ensure that any inner attributes don't apply to the function. + // This ensures that the user can't add `#![target_feature(enable = "xxx")]` to their block. + // Soundness: Either of generic_args and `$where` could be used to exit the function item early, so aren't + // inside an unsafe block. + fn trampoline_impl$(<$($generic_args)*>)?($($arg_name: $arg_type),*) $(-> $ret)? $(where $($where)*)? { $op } + + $( + // We validate that we actually have a token of each claimed type. + let _: $token_type = $token; + )+ + const { + // And that the claimed types justify enabling the enabled target features. + $crate::trampoline::is_feature_subset($to_enable, [$(<$token_type as $crate::TargetFeatureToken>::FEATURES),+]) + // TODO: Better failure message here (i.e. at least concatting the set of requested features) + .unwrap(); + } + + $( + // Soundness: We use `arg_value` outside of the macro body to ensure it doesn't + // accidentally gain an unsafe capability. + #[allow(clippy::redundant_locals, reason="Required for consistency/safety.")] + let $arg_name = $arg_value; + )* + // Safety: We have validated that the target features enabled in `trampoline_impl` are enabled, + // because we have values of token types which implement $crate::TargetFeatureToken + // Soundness: `$generic_args` could be used to exit the path expression early. As `<>` are + // not treated as "real" brackets by macros, this isn't practical to detect and avoid statically. + // To try and ensure that this can't turn into unsoundess, the + // `trampoline_impl::<$generic_instantiation>` is evaluated outside of an unsafe block. + // In theory, if a user could make the value of `func` be an `unsafe` fn pointer or + // item type, this would still be unsound. + // However, we haven't found a way for this to compile given the trailing `>`, + // so aren't aware of any actual unsoundess. But note that this hasn't been rigorously proven, + // and new Rust features could open this up wider. + let func = trampoline_impl$(::<$($generic_instantiation)*>)?; + unsafe { func($($arg_name),*) } + }}; + // Sse = sse => "sse", sse_do_x(a: [f32; 4], b: [f32; 4]) -> [f32; 4] + ($token_type: path = $token: expr => $to_enable: literal, $function: ident($($arg_name: ident: $arg_type: ty),*$(,)?) $(-> $ret: ty)?) => { + $crate::trampoline!( + [$token_type = $token] + => $to_enable, + $function($($arg_name: $arg_type),*) $(-> $ret)? + ) + }; + // [Sse = sse] => "sse", sse_do_x(a: [f32; 4], b: [f32; 4]) -> [f32; 4] + ([$($token_type: path = $token: expr),+$(,)?] => $to_enable: literal, $function: ident($($arg_name: ident: $arg_type: ty),*$(,)?) $(-> $ret: ty)?) => { + $crate::trampoline!( + [$($token_type = $token),+] + => $to_enable, + fn($($arg_name: $arg_type = $arg_name),*) $(-> $ret)? { $function($($arg_name),*) } + ) + }; +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(test)] +mod example_expansion { + use core::arch::x86_64::{__m128, _mm_mul_ps}; + + use crate::x86::{self, v1::Sse}; + + #[target_feature(enable = "sse")] + fn sse_mul_f32s(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { + let a: __m128 = bytemuck::must_cast(a); + let b: __m128 = bytemuck::must_cast(b); + bytemuck::must_cast(_mm_mul_ps(a, b)) + } + + #[test] + // This is a test so that it is runnable + fn example_output() { + let Some(sse) = x86::v1::Sse::try_new() else { + panic!("Example code") + }; + let a = [10_f32, 20_f32, 30_f32, 40_f32]; + let b = [4_f32, 5_f32, 6_f32, 7_f32]; + + // Both of these example expansions, the former using the shorthand form: + let res = + trampoline!(Sse = sse => "sse", sse_mul_f32s(a: [f32; 4], b: [f32; 4]) -> [f32; 4]); + assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]); + let res = trampoline!([Sse = sse] => "sse", fn(a: [f32; 4] = a, b: [f32; 4] = b) -> [f32; 4] { sse_mul_f32s(a, b)}); + assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]); + // will expand to: + #[expect(unused_braces, reason = "Required for macro soundness.")] + // Start expansion: + let res = { + #[target_feature(enable = "sse")] + #[inline] + fn trampoline_impl(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { + { sse_mul_f32s(a, b) } + } + let _: Sse = sse; + const { + crate::trampoline::is_feature_subset( + "sse", + [::FEATURES], + ) + .unwrap(); + } + #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")] + let a = a; + #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")] + let b = b; + let func = trampoline_impl; + unsafe { func(a, b) } + }; + // End expansion + assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]); + } +} diff --git a/fearless_simd_core/src/trampoline.rs b/fearless_simd_core/src/trampoline.rs new file mode 100644 index 00000000..14c73f62 --- /dev/null +++ b/fearless_simd_core/src/trampoline.rs @@ -0,0 +1,231 @@ +//! Support for the safety checks in [`trampoline!`](crate::trampoline!). +//! +//! Methods to compute whether a each feature in a target feature string (e.g. "sse2,fma") +//! is supported by a set of target features. +//! +//! The [`trampoline`](crate::trampoline!) macro takes both a target feature string, +//! and one (or more) [`TargetFeatureToken`](crate::TargetFeatureToken). +//! It uses the functions in this module to validate that the target feature string is +//! supported by the provided tokens. +//! +//! Because evaluating whether this is safe needs to happen at compile time (for both performance +//! and predictability), the methods in this file are written as `const` functions. +//! This leads to a bit of weirdness, including treating strings as `&[u8]` internally, as that +//! actually allows slicing (i.e. reading individual bytes). As far as I know, that isn't +//! currently possibly in const contexts for strings. +//! Note that the code is still written to be UTF-8 compatible, although we believe that +//! all currently supported target features are ASCII anyway. + +/// The result of `is_feature_subset`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[must_use] +pub enum SubsetResult { + /// The required features are a subset of the permitted features. + Yes, + /// The required features are not all available. + No { + /// The feature which was found to be missing (there may be several such features). + failing: &'static str, + }, +} + +impl SubsetResult { + /// A utility method to panic if the target features aren't supported. + // TODO: How much more context would we be able to give if we inlined this? + pub const fn unwrap(self) { + match self { + Self::Yes => (), + // This is const, so we can't actually format out the failing value :( + Self::No { .. } => panic!("Tokens provided are missing a necessary target feature."), + } + } +} + +/// Determine whether the features in the target feature string `required` are a subset of the features in `permitted`. +/// See the module level docs [self]. +/// +/// We require static lifetimes as this is primarily internal to the macro. +pub const fn is_feature_subset( + required: &'static str, + permitted: [&[&'static str]; N], +) -> SubsetResult { + let mut required_bytes = required.as_bytes(); + let mut finished = false; + 'input_feature: while !finished { + let mut comma_idx = 0; + // Find the first comma in required_bytes, or the end of the string. + while comma_idx < required_bytes.len() && required_bytes[comma_idx] != b',' { + comma_idx += 1; + } + // `comma_idx` is now the index of the comma, e.g. if the string was "sse,", idx would be 3 + // This is the feature we need to validate exists in permitted. + let (to_find, remaining_required) = &required_bytes.split_at(comma_idx); + if let [comma, rest @ ..] = remaining_required { + if *comma != b',' { + panic!("Internal failure of expected behaviour."); + } else { + required_bytes = rest; + } + } else { + // Exit out of the loop after this iteration. + // Note that for input of `""`` and "sse,", we still need to search + // for the input target feature `` (i.e. the empty string), to match Rust's behaviour here. + finished = true; + } + + let mut local_permitted = permitted.as_slice(); + while let [to_test, rest @ ..] = local_permitted { + local_permitted = rest; + if str_array_contains(to_test, to_find) { + continue 'input_feature; + } + } + // We tried all of the items, and `to_find` wasn't one of them. + // Therefore, at least one of the features in the requested features wasn't supported + return SubsetResult::No { + failing: match core::str::from_utf8(to_find) { + Ok(x) => x, + Err(_) => panic!( + "We either found a comma or the end of the string, so before then should have been valid UTF-8." + ), + }, + }; + } + // We found all of the required features. + SubsetResult::Yes +} + +const fn str_array_contains(mut haystack: &[&str], needle: &[u8]) -> bool { + while let [to_test, rest @ ..] = haystack { + haystack = rest; + if byte_arrays_eq(to_test.as_bytes(), needle) { + return true; + } + } + false +} + +const fn byte_arrays_eq(lhs: &[u8], rhs: &[u8]) -> bool { + if lhs.len() != rhs.len() { + return false; + } + let mut idx = 0; + while idx < lhs.len() { + if lhs[idx] != rhs[idx] { + return false; + } + idx += 1; + } + true +} + +#[cfg(test)] +mod tests { + use super::{SubsetResult, is_feature_subset}; + + /// Test if each feature in the feature string `required` is an element in `permitted`. + /// + /// Should be equivalent to [`is_feature_subset`], but not written to be const compatible. + fn is_feature_subset_simple( + required: &'static str, + permitted: [&[&'static str]; N], + ) -> SubsetResult { + 'feature: for feature in required.split(',') { + for permitted_group in &permitted { + for permitted_feature in *permitted_group { + if feature == *permitted_feature { + continue 'feature; + } + } + } + // We tried all permitted feature, and this item wasn't present. + return SubsetResult::No { failing: feature }; + } + SubsetResult::Yes + } + + /// Expect `is_feature_subset` to succeed. + #[track_caller] + fn expect_success(required: &'static str, permitted: [&[&'static str]; N]) { + let res1 = is_feature_subset(required, permitted); + assert_eq!(res1, SubsetResult::Yes, "Const version failed."); + // Sanity check against the "trivially correct" version. + let res2 = is_feature_subset_simple(required, permitted); + assert_eq!(res2, SubsetResult::Yes, "Simpler version failed."); + } + + /// Expect `is_feature_subset` to fail (with only a single possible failure). + #[track_caller] + fn expect_failure( + required: &'static str, + permitted: [&[&'static str]; N], + failing: &'static str, + ) { + let res1 = is_feature_subset(required, permitted); + assert_eq!(res1, SubsetResult::No { failing }, "Const version failed."); + // Sanity check against the "trivially correct" version. + let res2 = is_feature_subset_simple(required, permitted); + assert_eq!( + res2, + SubsetResult::No { failing }, + "Simpler version failed." + ); + } + + /// Expect `is_feature_subset` to fail, possibly with multiple potential missing features. + #[track_caller] + fn expect_any_failure(required: &'static str, permitted: [&[&'static str]; N]) { + let res1 = is_feature_subset(required, permitted); + assert!( + matches!(res1, SubsetResult::No { .. }), + "Const version failed." + ); + // Sanity check against the "trivially correct" version. + let res2 = is_feature_subset_simple(required, permitted); + assert!( + matches!(res2, SubsetResult::No { .. }), + "Simpler version failed." + ); + } + + #[test] + fn simple_cases() { + expect_success("a,b,c", [&["a", "b", "c"]]); + expect_failure("a,b,c", [&["a", "b"]], "c"); + expect_success("c,a,b", [&["a", "b", "c"]]); + expect_failure("c,a,b", [&["a", "b"]], "c"); + expect_success("a,b", [&["a", "b", "c"]]); + expect_failure("a,b", [&["a", "c"]], "b"); + + // Check it correctly catches more than single item failures + expect_success("a1,a2,a3", [&["a1", "a2", "a3"]]); + expect_failure("a1,a2,a3", [&["a1", "a2"]], "a3"); + expect_success("a3,a1,a2", [&["a1", "a2", "a3"]]); + expect_failure("a3,a1,a2", [&["a1", "a2"]], "a3"); + expect_success("a1,a2", [&["a1", "a2", "a3"]]); + expect_failure("a1,a2", [&["a1", "a3"]], "a2"); + + // Check it doesn't have false positives with prefixes + expect_failure("a1,a2,a3", [&["a1", "a2", "a"]], "a3"); + expect_any_failure("a3,a1,a2", [&["a"]]); + expect_success("a1,a2", [&["a1", "a2", "a3"]]); + expect_failure("a1,a2", [&["a1", "a3"]], "a2"); + + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + } + + #[test] + fn empty_feature() { + expect_failure("a,b,", [&["a", "b"]], ""); + expect_failure("", [&["a", "b"]], ""); + } + + #[test] + fn non_ascii_features() { + expect_success("café", [&["café"]]); + expect_failure("café", [&["cafe"]], "café"); + } +} diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs new file mode 100644 index 00000000..10caa27a --- /dev/null +++ b/fearless_simd_core/src/x86/mod.rs @@ -0,0 +1,15 @@ +//! Target feature tokens for the x86 and x86-64 CPU families. +//! +//! The general compuotation CPU features associated with each [microarchitecture level] can +//! be found in their corresponding modules: +//! +//! - [`v1`] for x86-64-v1. +//! - [`v2`] for x86-64-v2. +//! - [`v3`] for x86-64-v3. +//! - [`v4`] for x86-64-v4. +//! +//! Tokens for target features which not associated with these levels can be found in this module. +//! +//! [microarchitecture level]: https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels + +pub mod v1; diff --git a/fearless_simd_core/src/x86/v1/fxsr.rs b/fearless_simd_core/src/x86/v1/fxsr.rs new file mode 100644 index 00000000..a4c99085 --- /dev/null +++ b/fearless_simd_core/src/x86/v1/fxsr.rs @@ -0,0 +1,80 @@ +//! The FXSR target feature. + +use core::fmt::Debug; + +use crate::{TargetFeatureToken, trampoline}; + +/// A token indicating that the current CPU has the FXSR target feature. +/// +/// The Rust target feature name for this feature is `fxsr`. +/// For example, this can be used to [`trampoline!`] into: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "fxsr")] +/// fn uses_fxsr() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Fxsr { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Fxsr { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""fxsr" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Fxsr { + const FEATURES: &[&str] = &["fxsr"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + trampoline!([Self = self] => "fxsr", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Fxsr { + #[cfg(feature = "std")] + /// Create a new token if the "fxsr" target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("fxsr") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "fxsr")] + /// Create a new token for the "fxsr" target feature is enabled. + /// + /// This method is useful to get a new token if you have an + /// external proof that FXSR is available. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "fxsr" target feature is available. + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v1/mod.rs b/fearless_simd_core/src/x86/v1/mod.rs new file mode 100644 index 00000000..0e360b50 --- /dev/null +++ b/fearless_simd_core/src/x86/v1/mod.rs @@ -0,0 +1,38 @@ +//! Target features enabled in the `x86-64-v1` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This is the baseline for x86-64 support. + +mod sse; +pub use sse::Sse; + +mod fxsr; +pub use fxsr::Fxsr; + +/// A token that the current CPU is on the x86-64-v1 microarchitecture level. +// TODO: (This is currently incomplete) +pub struct V1 { + pub sse: Sse, + pub fxsr: Fxsr, +} + +impl V1 { + /// Create a new token if the current CPU is at the x86-64-v1 microarchitecture level or better. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + #[cfg(feature = "std")] + pub fn try_new() -> Option { + // TODO: Caching + Some(Self { + fxsr: Fxsr::try_new()?, + sse: Sse::try_new()?, + }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v1/sse.rs b/fearless_simd_core/src/x86/v1/sse.rs new file mode 100644 index 00000000..67f2160c --- /dev/null +++ b/fearless_simd_core/src/x86/v1/sse.rs @@ -0,0 +1,90 @@ +//! The SSE target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// A token indicating that the current CPU has the SSE target feature. +/// +/// The Rust target feature name for this feature is `sse`. +/// +/// See for more information about these instructions. +/// This feature also implictily enables +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse")] +/// fn uses_sse() { +/// // ... +/// } +/// ``` +/// +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse { + const FEATURES: &[&str] = &["sse"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + trampoline!([Self = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse { + #[cfg(feature = "std")] + /// Create a new token if the `"sse"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse")] + /// Create a new token for the "sse" target feature is enabled. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE is available. This could happen if you have a token for a target feature + /// which [implicitly enables] `sse`. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86 + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; From 78d23d8f79ea48e047018e77568f1da06ab03443 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 14:43:24 +0100 Subject: [PATCH 02/19] Remove old v1 items --- fearless_simd_core/src/x86/v1/fxsr.rs | 80 ------------------------ fearless_simd_core/src/x86/v1/sse.rs | 90 --------------------------- 2 files changed, 170 deletions(-) delete mode 100644 fearless_simd_core/src/x86/v1/fxsr.rs delete mode 100644 fearless_simd_core/src/x86/v1/sse.rs diff --git a/fearless_simd_core/src/x86/v1/fxsr.rs b/fearless_simd_core/src/x86/v1/fxsr.rs deleted file mode 100644 index a4c99085..00000000 --- a/fearless_simd_core/src/x86/v1/fxsr.rs +++ /dev/null @@ -1,80 +0,0 @@ -//! The FXSR target feature. - -use core::fmt::Debug; - -use crate::{TargetFeatureToken, trampoline}; - -/// A token indicating that the current CPU has the FXSR target feature. -/// -/// The Rust target feature name for this feature is `fxsr`. -/// For example, this can be used to [`trampoline!`] into: -/// -/// ```rust -/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -/// #[target_feature(enable = "fxsr")] -/// fn uses_fxsr() { -/// // ... -/// } -/// ``` -#[derive(Copy, Clone, Hash, PartialEq, Eq)] -pub struct Fxsr { - // We don't use non_exhaustive because we don't want this struct to be constructible. - // in different modules in this crate. - _private: (), -} - -impl Debug for Fxsr { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, r#""fxsr" enabled."#) - } -} - -unsafe impl TargetFeatureToken for Fxsr { - const FEATURES: &[&str] = &["fxsr"]; - - #[inline(always)] - fn vectorize(self, f: impl FnOnce() -> R) -> R { - trampoline!([Self = self] => "fxsr", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) - } -} - -impl Fxsr { - #[cfg(feature = "std")] - /// Create a new token if the "fxsr" target feature is detected as enabled. - /// - /// This does not do any caching internally, although note that the standard - /// library does internally cache the features it detects. - // TODO: Consider a manual override feature/env var? - pub fn try_new() -> Option { - // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - if std::arch::is_x86_feature_detected!("fxsr") { - // Safety: The required CPU feature was detected. - unsafe { Some(Self::new()) } - } else { - None - } - } - - #[target_feature(enable = "fxsr")] - /// Create a new token for the "fxsr" target feature is enabled. - /// - /// This method is useful to get a new token if you have an - /// external proof that FXSR is available. - /// - /// # Safety - /// - /// No conditions other than those inherited from the target feature attribute, - /// i.e. that the "fxsr" target feature is available. - pub fn new() -> Self { - Self { _private: () } - } -} - -const _: () = { - assert!( - core::mem::size_of::() == 0, - "Target feature tokens should be zero sized." - ); -}; diff --git a/fearless_simd_core/src/x86/v1/sse.rs b/fearless_simd_core/src/x86/v1/sse.rs deleted file mode 100644 index 67f2160c..00000000 --- a/fearless_simd_core/src/x86/v1/sse.rs +++ /dev/null @@ -1,90 +0,0 @@ -//! The SSE target feature. - -use crate::{TargetFeatureToken, trampoline}; - -use core::fmt::Debug; - -/// A token indicating that the current CPU has the SSE target feature. -/// -/// The Rust target feature name for this feature is `sse`. -/// -/// See for more information about these instructions. -/// This feature also implictily enables -/// -/// # Example -/// -/// This can be used to [`trampoline!`] into: -/// -/// ```rust -/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -/// #[target_feature(enable = "sse")] -/// fn uses_sse() { -/// // ... -/// } -/// ``` -/// -#[derive(Copy, Clone, Hash, PartialEq, Eq)] -pub struct Sse { - // We don't use non_exhaustive because we don't want this struct to be constructible. - // in different modules in this crate. - _private: (), -} - -impl Debug for Sse { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, r#""sse" enabled."#) - } -} - -unsafe impl TargetFeatureToken for Sse { - const FEATURES: &[&str] = &["sse"]; - - #[inline(always)] - fn vectorize(self, f: impl FnOnce() -> R) -> R { - trampoline!([Self = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) - } -} - -impl Sse { - #[cfg(feature = "std")] - /// Create a new token if the `"sse"` target feature is detected as enabled. - /// - /// This does not do any caching internally, although note that the standard - /// library does internally cache the features it detects. - // TODO: Consider a manual override feature/env var? - pub fn try_new() -> Option { - // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - if std::arch::is_x86_feature_detected!("sse") { - // Safety: The required CPU feature was detected. - unsafe { Some(Self::new()) } - } else { - None - } - } - - #[target_feature(enable = "sse")] - /// Create a new token for the "sse" target feature is enabled. - /// - /// This method is useful to get a new token if you have an external proof that - /// SSE is available. This could happen if you have a token for a target feature - /// which [implicitly enables] `sse`. - /// - /// # Safety - /// - /// No conditions other than those inherited from the target feature attribute, - /// i.e. that the "sse" target feature is available. - /// - /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86 - pub fn new() -> Self { - Self { _private: () } - } -} - -const _: () = { - assert!( - core::mem::size_of::() == 0, - "Target feature tokens should be zero sized." - ); -}; From 9cfa3345c085a88997f73a115428b1d723461e35 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 14:43:37 +0100 Subject: [PATCH 03/19] Improve the generator --- fearless_simd_core/gen/src/data/x86.rs | 170 ++++++++++++++++-------- fearless_simd_core/gen/src/main.rs | 38 ++++-- fearless_simd_core/gen/templates/x86.rs | 14 +- 3 files changed, 144 insertions(+), 78 deletions(-) diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index 39e59990..346a9f79 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -2,7 +2,7 @@ use crate::Feature; macro_rules! f { ($(#[doc = $doc_addition: literal])* - struct ::$module: ident:: $struct_name: ident($display_name: literal): $feature_name: literal + [$($implicitly_enabled: literal),*] + struct $module: ident:: $struct_name: ident($display_name: literal): $feature_name: literal + [$($implicitly_enabled: literal),*] fn $example_function_name: ident $($additional_impls: tt)* ) => { @@ -29,339 +29,397 @@ pub(crate) const X86_TEMPLATE: &str = include_str!("../../templates/x86.rs"); pub(crate) const X86_FEATURES: &[Feature] = &[ f!( /// [ADX] --- Multi-Precision Add-Carry Instruction Extensions + /// /// [ADX]: https://en.wikipedia.org/wiki/Intel_ADX - struct ::adx::Adx("ADX"): "adx" + [] + struct adx::Adx("ADX"): "adx" + [] fn uses_adx ), f!( /// [AES] --- Advanced Encryption Standard + /// /// [AES]: https://en.wikipedia.org/wiki/AES_instruction_set - struct ::crypto::Aes("AES"): "aes" + ["sse2"] + struct crypto::Aes("AES"): "aes" + ["sse2"] fn uses_aes ), f!( /// [AVX] --- Advanced Vector Extensions + /// /// [AVX]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions - struct ::avx::Avx("AVX"): "avx" + ["sse4.2"] + struct avx::Avx("AVX"): "avx" + ["sse4.2"] fn uses_avx ), f!( /// [AVX2] --- Advanced Vector Extensions 2 + /// /// [AVX2]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX2 - struct ::avx::Avx2("AVX2"): "avx2" + ["avx"] + struct avx::Avx2("AVX2"): "avx2" + ["avx"] fn uses_avx2 ), f!( /// [AVX512-BF16] --- Advanced Vector Extensions 512-bit - Bfloat16 Extensions + /// /// [AVX512-BF16]: https://en.wikipedia.org/wiki/AVX-512#BF16 - struct ::avx512::Avx512bf16("AVX512-BF16"): "avx512bf16" + ["avx512bw"] + struct avx512::Avx512bf16("AVX512-BF16"): "avx512bf16" + ["avx512bw"] fn uses_avx512bf16 ), f!( /// [AVX512-BITALG] --- Advanced Vector Extensions 512-bit - Bit Algorithms + /// + /// /// [AVX512-BITALG]: https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG - struct ::avx512::Avx512bitalg("AVX512-BITALG"): "avx512bitalg" + ["avx512bw"] + struct avx512::Avx512bitalg("AVX512-BITALG"): "avx512bitalg" + ["avx512bw"] fn uses_avx512bitalg ), f!( /// [AVX512-BW] --- Advanced Vector Extensions 512-bit - Byte and Word Instructions + /// /// [AVX512-BW]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI - struct ::avx512::Avx512bw("AVX512-BW"): "avx512bw" + ["avx512f"] + struct avx512::Avx512bw("AVX512-BW"): "avx512bw" + ["avx512f"] fn uses_avx512bw ), f!( /// [AVX512-CD] --- Advanced Vector Extensions 512-bit - Conflict Detection Instructions + /// /// [AVX512-CD]: https://en.wikipedia.org/wiki/AVX-512#Conflict_detection - struct ::avx512::Avx512cd("AVX512-CD"): "avx512cd" + ["avx512f"] + struct avx512::Avx512cd("AVX512-CD"): "avx512cd" + ["avx512f"] fn uses_avx512cd ), f!( /// [AVX512-DQ] --- Advanced Vector Extensions 512-bit - Doubleword and Quadword Instructions + /// /// [AVX512-DQ]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI - struct ::avx512::Avx512dq("AVX512-DQ"): "avx512dq" + ["avx512f"] + struct avx512::Avx512dq("AVX512-DQ"): "avx512dq" + ["avx512f"] fn uses_avx512dq ), f!( /// [AVX512-F] --- Advanced Vector Extensions 512-bit - Foundation + /// /// [AVX512-F]: https://en.wikipedia.org/wiki/AVX-512 - struct ::avx512::Avx512f("AVX512-F"): "avx512f" + ["avx2", "fma", "f16c"] + struct avx512::Avx512f("AVX512-F"): "avx512f" + ["avx2", "fma", "f16c"] fn uses_avx512f ), f!( /// [AVX512-FP16] --- Advanced Vector Extensions 512-bit - Float16 Extensions + /// /// [AVX512-FP16]: https://en.wikipedia.org/wiki/AVX-512#FP16 - struct ::avx512::Avx512fp16("AVX512-FP16"): "avx512fp16" + ["avx512bw"] + struct avx512::Avx512fp16("AVX512-FP16"): "avx512fp16" + ["avx512bw"] fn uses_avx512fp16 ), f!( /// [AVX512-IFMA] --- Advanced Vector Extensions 512-bit - Integer Fused Multiply Add + /// /// [AVX512-IFMA]: https://en.wikipedia.org/wiki/AVX-512#IFMA - struct ::avx512::Avx512ifma("AVX512-IFMA"): "avx512ifma" + ["avx512f"] + struct avx512::Avx512ifma("AVX512-IFMA"): "avx512ifma" + ["avx512f"] fn uses_avx512ifma ), f!( /// [AVX512-VBMI] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions + /// /// [AVX512-VBMI]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI - struct ::avx512::Avx512vbmi("AVX512-VBMI"): "avx512vbmi" + ["avx512bw"] + struct avx512::Avx512vbmi("AVX512-VBMI"): "avx512vbmi" + ["avx512bw"] fn uses_avx512vbmi ), f!( /// [AVX512-VBMI2] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions 2 + /// /// [AVX512-VBMI2]: https://en.wikipedia.org/wiki/AVX-512#VBMI2 - struct ::avx512::Avx512vbmi2("AVX512-VBMI2"): "avx512vbmi2" + ["avx512bw"] + struct avx512::Avx512vbmi2("AVX512-VBMI2"): "avx512vbmi2" + ["avx512bw"] fn uses_avx512vbmi2 ), f!( /// [AVX512-VL] --- Advanced Vector Extensions 512-bit - Vector Length Extensions + /// /// [AVX512-VL]: https://en.wikipedia.org/wiki/AVX-512 - struct ::avx512::Avx512vl("AVX512-VL"): "avx512vl" + ["avx512f"] + struct avx512::Avx512vl("AVX512-VL"): "avx512vl" + ["avx512f"] fn uses_avx512vl ), f!( /// [AVX512-VNNI] --- Advanced Vector Extensions 512-bit - Vector Neural Network Instructions + /// /// [AVX512-VNNI]: https://en.wikipedia.org/wiki/AVX-512#VNNI - struct ::avx512::Avx512vnni("AVX512-VNNI"): "avx512vnni" + ["avx512f"] + struct avx512::Avx512vnni("AVX512-VNNI"): "avx512vnni" + ["avx512f"] fn uses_avx512vnni ), f!( /// [AVX512-VP2INTERSECT] --- Advanced Vector Extensions 512-bit - Vector Pair Intersection to a Pair of Mask Registers + /// /// [AVX512-VP2INTERSECT]: https://en.wikipedia.org/wiki/AVX-512#VP2INTERSECT - struct ::avx512::Avx512vp2intersect("AVX512-VP2INTERSECT"): "avx512vp2intersect" + ["avx512f"] + struct avx512::Avx512vp2intersect("AVX512-VP2INTERSECT"): "avx512vp2intersect" + ["avx512f"] fn uses_avx512vp2intersect ), f!( /// [AVX512-VPOPCNTDQ] --- Advanced Vector Extensions 512-bit - Vector Population Count Instruction + /// /// [AVX512-VPOPCNTDQ]:https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG - struct ::avx512::Avx512vpopcntdq("AVX512-VPOPCNTDQ"): "avx512vpopcntdq" + ["avx512f"] + struct avx512::Avx512vpopcntdq("AVX512-VPOPCNTDQ"): "avx512vpopcntdq" + ["avx512f"] fn uses_avx512vpopcntdq ), f!( /// [AVX-IFMA] --- Advanced Vector Extensions - Integer Fused Multiply Add + /// /// [AVX-IFMA]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA - struct ::avx::Avxifma("AVX-IFMA"): "avxifma" + ["avx2"] + struct avx::Avxifma("AVX-IFMA"): "avxifma" + ["avx2"] fn uses_avxifma ), f!( /// [AVX-NE-CONVERT] --- Advanced Vector Extensions - No-Exception Floating-Point conversion Instructions + /// /// [AVX-NE-CONVERT]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA - struct ::avx::Avxneconvert("AVX-NE-CONVERT"): "avxneconvert" + ["avx2"] + struct avx::Avxneconvert("AVX-NE-CONVERT"): "avxneconvert" + ["avx2"] fn uses_avxneconvert ), f!( /// [AVX-VNNI] --- Advanced Vector Extensions - Vector Neural Network Instructions + /// /// [AVX-VNNI]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA - struct ::avx::Avxvnni("AVX-VNNI"): "avxvnni" + ["avx2"] + struct avx::Avxvnni("AVX-VNNI"): "avxvnni" + ["avx2"] fn uses_avxvnni ), f!( /// [AVX-VNNI-INT16] --- Advanced Vector Extensions - Vector Neural Network Instructions with 16-bit Integers + /// /// [AVX-VNNI-INT16]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA - struct ::avx::Avxvnniint16("AVX-VNNI-INT16"): "avxvnniint16" + ["avx2"] + struct avx::Avxvnniint16("AVX-VNNI-INT16"): "avxvnniint16" + ["avx2"] fn uses_avxvnniint16 ), f!( /// [AVX-VNNI-INT8] --- Advanced Vector Extensions - Vector Neural Network Instructions with 8-bit Integers + /// /// [AVX-VNNI-INT8]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA - struct ::avx::Avxvnniint8("AVX-VNNI-INT8"): "avxvnniint8" + ["avx2"] + struct avx::Avxvnniint8("AVX-VNNI-INT8"): "avxvnniint8" + ["avx2"] fn uses_avxvnniint8 ), f!( /// [BMI1] --- Bit Manipulation Instruction Sets + /// /// [BMI1]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets - struct ::v3::Bmi1(" 1"): "bmi1" + [] + struct v3::Bmi1(" 1"): "bmi1" + [] fn uses_bmi1 ), f!( /// [BMI2] --- Bit Manipulation Instruction Sets 2 + /// /// [BMI2]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2 - struct ::v3::Bmi2("BMI2"): "bmi2" + [] + struct v3::Bmi2("BMI2"): "bmi2" + [] fn uses_bmi2 ), f!( /// ["cmpxchg16b"] --- Compares and exchange 16 bytes (128 bits) of data atomically + /// /// ["cmpxchg16b"]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b - struct ::v2::Cmpxchg16b("`cmpxchg16b`"): "cmpxchg16b" + [] + struct v2::Cmpxchg16b("`cmpxchg16b`"): "cmpxchg16b" + [] fn uses_cmpxchg16b ), f!( /// [F16C] --- 16-bit floating point conversion instructions + /// /// [F16C]: https://en.wikipedia.org/wiki/F16C - struct ::v3::F16c("F16C"): "f16c" + ["avx"] + struct v3::F16c("F16C"): "f16c" + ["avx"] fn uses_f16c ), f!( /// [FMA3] --- Three-operand fused multiply-add + /// /// [FMA3]: https://en.wikipedia.org/wiki/FMA_instruction_set - struct ::v3::Fma("FMA3"): "fma" + ["avx"] + struct v3::Fma("FMA3"): "fma" + ["avx"] fn uses_fma ), f!( /// ["fxsave"] and ["fxrstor"] --- Save and restore x87 FPU, MMX Technology, and SSE State + /// /// ["fxsave"]: https://www.felixcloutier.com/x86/fxsave, - struct ::sse::Fxsr("`fxsave + fxrstor`"): "fxsr" + [] + /// ["fxrstor"]: https://www.felixcloutier.com/x86/fxrstor, + struct sse::Fxsr("`fxsave + fxrstor`"): "fxsr" + [] fn uses_fxsr ), f!( /// [GFNI] --- Galois Field New Instructions + /// /// [GFNI]: https://en.wikipedia.org/wiki/AVX-512#GFNI - struct ::crypto::Gfni("GFNI"): "gfni" + ["sse2"] + struct crypto::Gfni("GFNI"): "gfni" + ["sse2"] fn uses_gfni ), f!( /// [KEYLOCKER] --- Intel Key Locker Instructions + /// /// [KEYLOCKER]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions - struct ::crypto::Keylocker("KEYLOCKER"): "kl" + [] + struct crypto::Keylocker("KEYLOCKER"): "kl" + [] fn uses_keylocker ), f!( /// ["lzcnt"] --- Leading zeros count + /// /// ["lzcnt"]: https://www.felixcloutier.com/x86/lzcnt - struct ::v3::Lzcnt("`lzcnt`"): "lzcnt" + [] + struct v3::Lzcnt("`lzcnt`"): "lzcnt" + [] fn uses_lzcnt ), f!( /// ["movbe"] --- Move data after swapping bytes + /// /// ["movbe"]: https://www.felixcloutier.com/x86/movbe - struct ::v3::Movbe("`movbe`"): "movbe" + [] + struct v3::Movbe("`movbe`"): "movbe" + [] fn uses_movbe ), f!( /// ["pclmulqdq"] --- Packed carry-less multiplication quadword + /// /// ["pclmulqdq"]: https://www.felixcloutier.com/x86/pclmulqdq - struct ::crypto::Pclmulqdq("`pclmulqdq`"): "pclmulqdq" + ["sse2"] + struct crypto::Pclmulqdq("`pclmulqdq`"): "pclmulqdq" + ["sse2"] fn uses_pclmulqdq ), f!( /// ["popcnt"] --- Count of bits set to 1 + /// /// ["popcnt"]: https://www.felixcloutier.com/x86/popcnt - struct ::v2::Popcnt("`popcnt`"): "popcnt" + [] + struct v2::Popcnt("`popcnt`"): "popcnt" + [] fn uses_popcnt ), f!( /// ["rdrand"] --- Read random number + /// /// ["rdrand"]: https://en.wikipedia.org/wiki/RdRand - struct ::crypto::Rdrand("`rdrand`"): "rdrand" + [] + struct crypto::Rdrand("`rdrand`"): "rdrand" + [] fn uses_rdrand ), f!( /// ["rdseed"] --- Read random seed + /// /// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand - struct ::crypto::Rdseed("`rdseed"): "rdseed" + [] + struct crypto::Rdseed("`rdseed"): "rdseed" + [] fn uses_rdseed ), f!( /// [SHA] --- Secure Hash Algorithm + /// /// [SHA]: https://en.wikipedia.org/wiki/Intel_SHA_extensions - struct ::crypto::Sha("SHA"): "sha" + ["sse2"] + struct crypto::Sha("SHA"): "sha" + ["sse2"] fn uses_sha ), f!( /// [SHA512] --- Secure Hash Algorithm with 512-bit digest + /// /// [SHA512]: https://en.wikipedia.org/wiki/Intel_SHA_extensions - struct ::crypto::Sha512("SHA512"): "sha512" + ["avx2"] + struct crypto::Sha512("SHA512"): "sha512" + ["avx2"] fn uses_sha512 ), f!( /// [SM3] --- ShangMi 3 Hash Algorithm + /// /// [SM3]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions - struct ::crypto::Sm3("SM3"): "sm3" + ["avx"] + struct crypto::Sm3("SM3"): "sm3" + ["avx"] fn uses_sm3 ), f!( /// [SM4] --- ShangMi 4 Cipher Algorithm + /// /// [SM4]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions - struct ::crypto::Sm4("SM4"): "sm4" + ["avx2"] + struct crypto::Sm4("SM4"): "sm4" + ["avx2"] fn uses_sm4 ), f!( /// [SSE] --- Streaming SIMD Extensions + /// /// [SSE]: https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions - struct ::sse::Sse("SSE"): "sse" + [] + struct sse::Sse("SSE"): "sse" + [] fn uses_sse ), f!( /// [SSE2] --- Streaming SIMD Extensions 2 + /// /// [SSE2]: https://en.wikipedia.org/wiki/SSE2 - struct ::sse::Sse2("SSE2"): "sse2" + ["sse"] + struct sse::Sse2("SSE2"): "sse2" + ["sse"] fn uses_sse2 ), f!( /// [SSE3] --- Streaming SIMD Extensions 3 + /// /// [SSE3]: https://en.wikipedia.org/wiki/SSE3 - struct ::sse::Sse3("SSE3"): "sse3" + ["sse2"] + struct sse::Sse3("SSE3"): "sse3" + ["sse2"] fn uses_sse3 ), f!( /// [SSE4.1] --- Streaming SIMD Extensions 4.1 + /// /// [SSE4.1]: https://en.wikipedia.org/wiki/SSE4#SSE4.1 - struct ::sse::Sse4_1("SSE4.1"): "sse4.1" + ["ssse3"] + struct sse::Sse4_1("SSE4.1"): "sse4.1" + ["ssse3"] fn uses_sse4 ), f!( /// [SSE4.2] --- StreamingSIMDExtensions 4.2 + /// /// [SSE4.2]: https://en.wikipedia.org/wiki/SSE4#SSE4.2 - struct ::sse::Sse4_2("SSE4.2"): "sse4.2" + ["sse4.1"] + struct sse::Sse4_2("SSE4.2"): "sse4.2" + ["sse4.1"] fn uses_sse4 ), // // TODO: This only exists from 1.91 and above (current beta) // f!( // /// [SSE4a] --- StreamingSIMDExtensions 4a + // /// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a // struct Sse4a("SSE4a"): "sse4a" + ["sse3"] // fn uses_sse4a // ), f!( /// [SSSE3] --- Supplemental StreamingSIMDExtensions 3 + /// /// [SSSE3]: https://en.wikipedia.org/wiki/SSSE3 - struct ::sse::SupplementalSse3("SSSE3"): "ssse3" + ["sse3"] + struct sse::SupplementalSse3("SSSE3"): "ssse3" + ["sse3"] fn uses_ssse3 ), f!( /// [TBM] --- Trailing Bit Manipulation + /// /// [TBM]: https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#TBM_(Trailing_Bit_Manipulation) - struct ::discontinued::Tbm("TBM"): "tbm" + [] + struct discontinued::Tbm("TBM"): "tbm" + [] fn uses_tbm ), f!( /// [VAES] --- Vector AES Instructions + /// /// [VAES]: https://en.wikipedia.org/wiki/AVX-512#VAES - struct ::crypto::Vaes("VAES"): "vaes" + ["avx2", "aes"] + struct crypto::Vaes("VAES"): "vaes" + ["avx2", "aes"] fn uses_vaes ), f!( /// [VPCLMULQDQ] --- Vector Carry-less multiplication of Quadwords + /// /// [VPCLMULQDQ]: https://en.wikipedia.org/wiki/AVX-512#VPCLMULQDQ - struct ::crypto::Vpclmulqdq("VPCLMULQDQ"): "vpclmulqdq" + ["avx", "pclmulqdq"] + struct crypto::Vpclmulqdq("VPCLMULQDQ"): "vpclmulqdq" + ["avx", "pclmulqdq"] fn uses_vpclmulqdq ), f!( /// [KEYLOCKER_WIDE] --- Intel Wide Keylocker Instructions + /// /// [KEYLOCKER_WIDE]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions - struct ::crypto::WideKeylocker("KEYLOCKER_WIDE"): "widekl" + ["kl"] + struct crypto::WideKeylocker("WIDE KEYLOCKER"): "widekl" + ["kl"] fn uses_wide_keylocker ), f!( /// [`xsave`] --- Save processor extended states + /// /// ["xsave"]: https://www.felixcloutier.com/x86/xsave - struct ::xsave::Xsave("`xsave`"): "xsave" + [] + struct xsave::Xsave("`xsave`"): "xsave" + [] fn uses_xsave ), f!( /// ["xsavec"] --- Save processor extended states with compaction + /// /// ["xsavec"]: https://www.felixcloutier.com/x86/xsavec - struct ::xsave::Xsavec("`xsavec`"): "xsavec" + [] + struct xsave::Xsavec("`xsavec`"): "xsavec" + [] fn uses_xsavec ), f!( /// ["xsaveopt"] --- Save processor extended states optimized + /// /// ["xsaveopt"]: https://www.felixcloutier.com/x86/xsaveopt - struct ::xsave::Xsaveopt("`xsaveopt`"): "xsaveopt" + [] + struct xsave::Xsaveopt("`xsaveopt`"): "xsaveopt" + [] fn uses_xsaveopt ), f!( /// ["xsaves"] --- Save processor extended states supervisor + /// /// ["xsaves"]: https://www.felixcloutier.com/x86/xsaves - struct ::xsave::Xsaves("`xsaves`"): "xsaves" + [] + struct xsave::Xsaves("`xsaves`"): "xsaves" + [] fn uses_xsaves ), ]; diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs index 2961bd33..1c0c8d43 100644 --- a/fearless_simd_core/gen/src/main.rs +++ b/fearless_simd_core/gen/src/main.rs @@ -1,6 +1,6 @@ mod data; -use std::fmt::{Write, format}; +use std::fmt::Write; use std::fs; use std::{ cell::RefCell, @@ -29,8 +29,16 @@ fn generate_for_arch( for line in feature.feature.extra_docs.lines() { writeln!(&mut new_docs, "///{line}").unwrap(); } - let enabled_feature_docs = format!("`{}`", feature.children.join("`, `")); - let enabled_feature_str_list = format!(r#""{}""#, feature.children.join(r#"", ""#)); + let enabled_feature_str_list = format!( + r#""{}", {}"#, + feature.feature.feature_name, + feature + .children + .iter() + .map(|it| format!(r#""{it}""#)) + .collect::>() + .join(", ") + ); let mut from_impls = String::new(); for child in &feature.children { let from_feature = features @@ -43,13 +51,13 @@ fn generate_for_arch( ); write!( from_impls, - r#"\n\ - impl From for {type_path} {{ - fn from(value: Self) -> {type_path} {{ - trampoline!([Self = value] => "{{FEATURE_ID}}", fn() -> {type_path} {{ {{type_path}}::new() }}) - }} - }}\n - "# + "\n\ +impl From for {type_path} {{ + fn from(value: FEATURE_STRUCT_NAME) -> Self {{ + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([FEATURE_STRUCT_NAME = value] => \"{{FEATURE_ID}}\", fn() -> {type_path} {{ {type_path}::new() }}) + }} +}}\n" ).unwrap(); } let mut result = format!( @@ -60,18 +68,20 @@ fn generate_for_arch( // We replace the from impls first, as they use template variables from the rest of this. result = result.replace("/*{FROM_IMPLS}*/", &from_impls); result = result.replace("{FEATURE_DOCS_NAME}", feature.feature.feature_docs_name); - result = result.replace("/// {NEW_DOCS}", &new_docs); + result = result.replace("/// {NEW_DOCS}\n", &new_docs); result = result.replace("{FEATURE_ID}", feature.feature.feature_name); - result = result.replace("{ENABLED_FEATURES_DOCS_LIST}", &enabled_feature_docs); result = result.replace( "{EXAMPLE_FUNCTION_NAME}", feature.feature.example_function_name, ); result = result.replace("FEATURE_STRUCT_NAME", feature.feature.struct_name); - result = result.replace("{ENABLED_FEATURES_STR_LIST}", &enabled_feature_str_list); + result = result.replace( + r#""{ENABLED_FEATURES_STR_LIST}""#, + &enabled_feature_str_list, + ); let module_dir = arch_dir.join(feature.feature.module); create_dir_all(&module_dir)?; - let mut file = module_dir.join(feature.feature.feature_name); + let mut file = module_dir.join(feature.feature.feature_name.replace(".", "_")); file.set_extension("rs"); fs::write(file, result)?; } diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs index a37207a3..48a294d6 100644 --- a/fearless_simd_core/gen/templates/x86.rs +++ b/fearless_simd_core/gen/templates/x86.rs @@ -8,12 +8,9 @@ use core::fmt::Debug; /// /// A token indicating that the current CPU has the `{FEATURE_ID}` target feature. /// -/// This feature also enables {ENABLED_FEATURES_DOCS_LIST}; -/// the tokens for these features can be created using [`From`] implementations. -/// /// # Example /// -/// This can be used to [`trampoline!`] into: +/// This can be used to [`trampoline!`] into functions like: /// /// ```rust /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -35,16 +32,17 @@ impl Debug for FEATURE_STRUCT_NAME { } } -unsafe impl TargetFeatureToken for Sse { - const FEATURES: &[&str] = &[{ ENABLED_FEATURES_STR_LIST }]; +unsafe impl TargetFeatureToken for FEATURE_STRUCT_NAME { + const FEATURES: &[&str] = &["{ENABLED_FEATURES_STR_LIST}"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - trampoline!([Self = self] => "{FEATURE_ID}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + // Because we want this constant to be eagerly evaluated. + trampoline!([FEATURE_STRUCT_NAME = self] => "{FEATURE_ID}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } -impl Sse { +impl FEATURE_STRUCT_NAME { #[cfg(feature = "std")] /// Create a new token if the `"{FEATURE_ID}"` target feature is detected as enabled. /// From 594a525b65eb2444f5fc09041a67a1c92675e8cd Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 14:50:50 +0100 Subject: [PATCH 04/19] Minor fixups in the generator Also removes unused additional impl support --- fearless_simd_core/gen/src/data/x86.rs | 4 +--- fearless_simd_core/gen/src/main.rs | 3 --- fearless_simd_core/gen/templates/x86.rs | 6 +++--- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index 346a9f79..52c13c4d 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -4,7 +4,6 @@ macro_rules! f { ($(#[doc = $doc_addition: literal])* struct $module: ident:: $struct_name: ident($display_name: literal): $feature_name: literal + [$($implicitly_enabled: literal),*] fn $example_function_name: ident - $($additional_impls: tt)* ) => { Feature { struct_name: stringify!($struct_name), @@ -13,7 +12,6 @@ macro_rules! f { extra_docs: concat!($($doc_addition, "\n",)*), example_function_name: stringify!($example_function_name), feature_docs_name: $display_name, - additional_impls: stringify!($($additional_impls)*), module: stringify!($module) } } @@ -285,7 +283,7 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ /// ["rdseed"] --- Read random seed /// /// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand - struct crypto::Rdseed("`rdseed"): "rdseed" + [] + struct crypto::Rdseed("`rdseed`"): "rdseed" + [] fn uses_rdseed ), f!( diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs index 1c0c8d43..aec7057f 100644 --- a/fearless_simd_core/gen/src/main.rs +++ b/fearless_simd_core/gen/src/main.rs @@ -104,9 +104,6 @@ struct Feature { example_function_name: &'static str, /// The "display name" for the feature, used inside the docs. feature_docs_name: &'static str, - /// Extra code added at the end. - /// Used for implicitly enabled features. - additional_impls: &'static str, /// The module (if any) this feature will belong to. /// /// (Note that imports into the module are checked to exist, but not automatically inserted). diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs index 48a294d6..7742b8e9 100644 --- a/fearless_simd_core/gen/templates/x86.rs +++ b/fearless_simd_core/gen/templates/x86.rs @@ -62,16 +62,16 @@ impl FEATURE_STRUCT_NAME { } #[target_feature(enable = "{FEATURE_ID}")] - /// Create a new token for the "{FEATURE_ID}" target feature is enabled. + /// Create a new token for the "{FEATURE_ID}" target feature. /// /// This method is useful to get a new token if you have an external proof that - /// {FEATURE_ID} is available. This could happen if you are in a target feature + /// {FEATURE_DOCS_NAME} is available. This could happen if you are in a target feature /// function called by an external library user. /// /// # Safety /// /// No conditions other than those inherited from the target feature attribute, - /// i.e. that the "{FEATURE_DOCS_NAME}" target feature is available. + /// i.e. that the "{FEATURE_ID}" target feature is available. /// /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions pub fn new() -> Self { From 7b595b44f51bcb713bed15984c0be0421f28c9d7 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 16:51:21 +0100 Subject: [PATCH 05/19] Add the generated x86 code --- fearless_simd_core/src/x86/adx/adx.rs | 92 +++++++++ fearless_simd_core/src/x86/adx/mod.rs | 8 + fearless_simd_core/src/x86/avx/avx.rs | 134 ++++++++++++ fearless_simd_core/src/x86/avx/avx2.rs | 143 +++++++++++++ fearless_simd_core/src/x86/avx/avxifma.rs | 150 ++++++++++++++ .../src/x86/avx/avxneconvert.rs | 158 +++++++++++++++ fearless_simd_core/src/x86/avx/avxvnni.rs | 150 ++++++++++++++ .../src/x86/avx/avxvnniint16.rs | 158 +++++++++++++++ fearless_simd_core/src/x86/avx/avxvnniint8.rs | 158 +++++++++++++++ fearless_simd_core/src/x86/avx/mod.rs | 24 +++ .../src/x86/avx512/avx512bf16.rs | 190 +++++++++++++++++ .../src/x86/avx512/avx512bitalg.rs | 191 ++++++++++++++++++ fearless_simd_core/src/x86/avx512/avx512bw.rs | 172 ++++++++++++++++ fearless_simd_core/src/x86/avx512/avx512cd.rs | 172 ++++++++++++++++ fearless_simd_core/src/x86/avx512/avx512dq.rs | 172 ++++++++++++++++ fearless_simd_core/src/x86/avx512/avx512f.rs | 164 +++++++++++++++ .../src/x86/avx512/avx512fp16.rs | 190 +++++++++++++++++ .../src/x86/avx512/avx512ifma.rs | 182 +++++++++++++++++ .../src/x86/avx512/avx512vbmi.rs | 190 +++++++++++++++++ .../src/x86/avx512/avx512vbmi2.rs | 190 +++++++++++++++++ fearless_simd_core/src/x86/avx512/avx512vl.rs | 172 ++++++++++++++++ .../src/x86/avx512/avx512vnni.rs | 182 +++++++++++++++++ .../src/x86/avx512/avx512vp2intersect.rs | 182 +++++++++++++++++ .../src/x86/avx512/avx512vpopcntdq.rs | 182 +++++++++++++++++ fearless_simd_core/src/x86/avx512/mod.rs | 41 ++++ fearless_simd_core/src/x86/crypto/aes.rs | 106 ++++++++++ fearless_simd_core/src/x86/crypto/gfni.rs | 106 ++++++++++ fearless_simd_core/src/x86/crypto/kl.rs | 92 +++++++++ fearless_simd_core/src/x86/crypto/mod.rs | 38 ++++ .../src/x86/crypto/pclmulqdq.rs | 106 ++++++++++ fearless_simd_core/src/x86/crypto/rdrand.rs | 92 +++++++++ fearless_simd_core/src/x86/crypto/rdseed.rs | 92 +++++++++ fearless_simd_core/src/x86/crypto/sha.rs | 106 ++++++++++ fearless_simd_core/src/x86/crypto/sha512.rs | 150 ++++++++++++++ fearless_simd_core/src/x86/crypto/sm3.rs | 143 +++++++++++++ fearless_simd_core/src/x86/crypto/sm4.rs | 150 ++++++++++++++ fearless_simd_core/src/x86/crypto/vaes.rs | 157 ++++++++++++++ .../src/x86/crypto/vpclmulqdq.rs | 158 +++++++++++++++ fearless_simd_core/src/x86/crypto/widekl.rs | 99 +++++++++ .../src/x86/discontinued/mod.rs | 3 + .../src/x86/discontinued/tbm.rs | 93 +++++++++ fearless_simd_core/src/x86/mod.rs | 8 + fearless_simd_core/src/x86/sse/fxsr.rs | 93 +++++++++ fearless_simd_core/src/x86/sse/mod.rs | 24 +++ fearless_simd_core/src/x86/sse/sse.rs | 92 +++++++++ fearless_simd_core/src/x86/sse/sse2.rs | 99 +++++++++ fearless_simd_core/src/x86/sse/sse3.rs | 106 ++++++++++ fearless_simd_core/src/x86/sse/sse4_1.rs | 120 +++++++++++ fearless_simd_core/src/x86/sse/sse4_2.rs | 127 ++++++++++++ fearless_simd_core/src/x86/sse/ssse3.rs | 113 +++++++++++ fearless_simd_core/src/x86/v1/mod.rs | 9 +- fearless_simd_core/src/x86/v2/cmpxchg16b.rs | 92 +++++++++ fearless_simd_core/src/x86/v2/mod.rs | 5 + fearless_simd_core/src/x86/v2/popcnt.rs | 92 +++++++++ fearless_simd_core/src/x86/v3/bmi1.rs | 92 +++++++++ fearless_simd_core/src/x86/v3/bmi2.rs | 92 +++++++++ fearless_simd_core/src/x86/v3/f16c.rs | 143 +++++++++++++ fearless_simd_core/src/x86/v3/fma.rs | 143 +++++++++++++ fearless_simd_core/src/x86/v3/lzcnt.rs | 92 +++++++++ fearless_simd_core/src/x86/v3/mod.rs | 17 ++ fearless_simd_core/src/x86/v3/movbe.rs | 92 +++++++++ fearless_simd_core/src/x86/xsave/xsave.rs | 93 +++++++++ fearless_simd_core/src/x86/xsave/xsavec.rs | 93 +++++++++ fearless_simd_core/src/x86/xsave/xsaveopt.rs | 93 +++++++++ fearless_simd_core/src/x86/xsave/xsaves.rs | 93 +++++++++ 65 files changed, 7455 insertions(+), 6 deletions(-) create mode 100644 fearless_simd_core/src/x86/adx/adx.rs create mode 100644 fearless_simd_core/src/x86/adx/mod.rs create mode 100644 fearless_simd_core/src/x86/avx/avx.rs create mode 100644 fearless_simd_core/src/x86/avx/avx2.rs create mode 100644 fearless_simd_core/src/x86/avx/avxifma.rs create mode 100644 fearless_simd_core/src/x86/avx/avxneconvert.rs create mode 100644 fearless_simd_core/src/x86/avx/avxvnni.rs create mode 100644 fearless_simd_core/src/x86/avx/avxvnniint16.rs create mode 100644 fearless_simd_core/src/x86/avx/avxvnniint8.rs create mode 100644 fearless_simd_core/src/x86/avx/mod.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512bf16.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512bitalg.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512bw.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512cd.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512dq.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512f.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512fp16.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512ifma.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vbmi.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vbmi2.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vl.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vnni.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs create mode 100644 fearless_simd_core/src/x86/avx512/mod.rs create mode 100644 fearless_simd_core/src/x86/crypto/aes.rs create mode 100644 fearless_simd_core/src/x86/crypto/gfni.rs create mode 100644 fearless_simd_core/src/x86/crypto/kl.rs create mode 100644 fearless_simd_core/src/x86/crypto/mod.rs create mode 100644 fearless_simd_core/src/x86/crypto/pclmulqdq.rs create mode 100644 fearless_simd_core/src/x86/crypto/rdrand.rs create mode 100644 fearless_simd_core/src/x86/crypto/rdseed.rs create mode 100644 fearless_simd_core/src/x86/crypto/sha.rs create mode 100644 fearless_simd_core/src/x86/crypto/sha512.rs create mode 100644 fearless_simd_core/src/x86/crypto/sm3.rs create mode 100644 fearless_simd_core/src/x86/crypto/sm4.rs create mode 100644 fearless_simd_core/src/x86/crypto/vaes.rs create mode 100644 fearless_simd_core/src/x86/crypto/vpclmulqdq.rs create mode 100644 fearless_simd_core/src/x86/crypto/widekl.rs create mode 100644 fearless_simd_core/src/x86/discontinued/mod.rs create mode 100644 fearless_simd_core/src/x86/discontinued/tbm.rs create mode 100644 fearless_simd_core/src/x86/sse/fxsr.rs create mode 100644 fearless_simd_core/src/x86/sse/mod.rs create mode 100644 fearless_simd_core/src/x86/sse/sse.rs create mode 100644 fearless_simd_core/src/x86/sse/sse2.rs create mode 100644 fearless_simd_core/src/x86/sse/sse3.rs create mode 100644 fearless_simd_core/src/x86/sse/sse4_1.rs create mode 100644 fearless_simd_core/src/x86/sse/sse4_2.rs create mode 100644 fearless_simd_core/src/x86/sse/ssse3.rs create mode 100644 fearless_simd_core/src/x86/v2/cmpxchg16b.rs create mode 100644 fearless_simd_core/src/x86/v2/mod.rs create mode 100644 fearless_simd_core/src/x86/v2/popcnt.rs create mode 100644 fearless_simd_core/src/x86/v3/bmi1.rs create mode 100644 fearless_simd_core/src/x86/v3/bmi2.rs create mode 100644 fearless_simd_core/src/x86/v3/f16c.rs create mode 100644 fearless_simd_core/src/x86/v3/fma.rs create mode 100644 fearless_simd_core/src/x86/v3/lzcnt.rs create mode 100644 fearless_simd_core/src/x86/v3/mod.rs create mode 100644 fearless_simd_core/src/x86/v3/movbe.rs create mode 100644 fearless_simd_core/src/x86/xsave/xsave.rs create mode 100644 fearless_simd_core/src/x86/xsave/xsavec.rs create mode 100644 fearless_simd_core/src/x86/xsave/xsaveopt.rs create mode 100644 fearless_simd_core/src/x86/xsave/xsaves.rs diff --git a/fearless_simd_core/src/x86/adx/adx.rs b/fearless_simd_core/src/x86/adx/adx.rs new file mode 100644 index 00000000..b10b6379 --- /dev/null +++ b/fearless_simd_core/src/x86/adx/adx.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The ADX target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [ADX] --- Multi-Precision Add-Carry Instruction Extensions +/// +/// [ADX]: https://en.wikipedia.org/wiki/Intel_ADX +/// +/// A token indicating that the current CPU has the `adx` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "adx")] +/// fn uses_adx() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Adx { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Adx { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""adx" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Adx { + const FEATURES: &[&str] = &["adx"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Adx = self] => "adx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Adx { + #[cfg(feature = "std")] + /// Create a new token if the `"adx"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("adx") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "adx")] + /// Create a new token for the "adx" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// ADX is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "adx" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/adx/mod.rs b/fearless_simd_core/src/x86/adx/mod.rs new file mode 100644 index 00000000..3c74dc60 --- /dev/null +++ b/fearless_simd_core/src/x86/adx/mod.rs @@ -0,0 +1,8 @@ +//! The "adx" target feature. + +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod adx; +pub use adx::Adx; diff --git a/fearless_simd_core/src/x86/avx/avx.rs b/fearless_simd_core/src/x86/avx/avx.rs new file mode 100644 index 00000000..baa56e36 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avx.rs @@ -0,0 +1,134 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX] --- Advanced Vector Extensions +/// +/// [AVX]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions +/// +/// A token indicating that the current CPU has the `avx` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx")] +/// fn uses_avx() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx { + const FEATURES: &[&str] = &["avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx = self] => "avx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx { + #[cfg(feature = "std")] + /// Create a new token if the `"avx"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx")] + /// Create a new token for the "avx" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avx2.rs b/fearless_simd_core/src/x86/avx/avx2.rs new file mode 100644 index 00000000..b6f252bf --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avx2.rs @@ -0,0 +1,143 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX2] --- Advanced Vector Extensions 2 +/// +/// [AVX2]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX2 +/// +/// A token indicating that the current CPU has the `avx2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx2")] +/// fn uses_avx2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx2" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx2 { + const FEATURES: &[&str] = &[ + "avx2", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx2 = self] => "avx2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx2 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx2")] + /// Create a new token for the "avx2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxifma.rs b/fearless_simd_core/src/x86/avx/avxifma.rs new file mode 100644 index 00000000..12fda758 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxifma.rs @@ -0,0 +1,150 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX-IFMA target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-IFMA] --- Advanced Vector Extensions - Integer Fused Multiply Add +/// +/// [AVX-IFMA]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxifma` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxifma")] +/// fn uses_avxifma() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxifma { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxifma { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxifma" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avxifma { + const FEATURES: &[&str] = &[ + "avxifma", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avxifma = self] => "avxifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxifma { + #[cfg(feature = "std")] + /// Create a new token if the `"avxifma"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxifma") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxifma")] + /// Create a new token for the "avxifma" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-IFMA is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxifma" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxneconvert.rs b/fearless_simd_core/src/x86/avx/avxneconvert.rs new file mode 100644 index 00000000..a2adef3c --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxneconvert.rs @@ -0,0 +1,158 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX-NE-CONVERT target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-NE-CONVERT] --- Advanced Vector Extensions - No-Exception Floating-Point conversion Instructions +/// +/// [AVX-NE-CONVERT]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxneconvert` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxneconvert")] +/// fn uses_avxneconvert() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxneconvert { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxneconvert { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxneconvert" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avxneconvert { + const FEATURES: &[&str] = &[ + "avxneconvert", + "avx", + "avx2", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avxneconvert = self] => "avxneconvert", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxneconvert { + #[cfg(feature = "std")] + /// Create a new token if the `"avxneconvert"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxneconvert") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxneconvert")] + /// Create a new token for the "avxneconvert" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-NE-CONVERT is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxneconvert" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxvnni.rs b/fearless_simd_core/src/x86/avx/avxvnni.rs new file mode 100644 index 00000000..48148a0c --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxvnni.rs @@ -0,0 +1,150 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX-VNNI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-VNNI] --- Advanced Vector Extensions - Vector Neural Network Instructions +/// +/// [AVX-VNNI]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxvnni` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxvnni")] +/// fn uses_avxvnni() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxvnni { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxvnni { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxvnni" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avxvnni { + const FEATURES: &[&str] = &[ + "avxvnni", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avxvnni = self] => "avxvnni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxvnni { + #[cfg(feature = "std")] + /// Create a new token if the `"avxvnni"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxvnni") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxvnni")] + /// Create a new token for the "avxvnni" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-VNNI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxvnni" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxvnniint16.rs b/fearless_simd_core/src/x86/avx/avxvnniint16.rs new file mode 100644 index 00000000..36b16a41 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxvnniint16.rs @@ -0,0 +1,158 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX-VNNI-INT16 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-VNNI-INT16] --- Advanced Vector Extensions - Vector Neural Network Instructions with 16-bit Integers +/// +/// [AVX-VNNI-INT16]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxvnniint16` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxvnniint16")] +/// fn uses_avxvnniint16() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxvnniint16 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxvnniint16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxvnniint16" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avxvnniint16 { + const FEATURES: &[&str] = &[ + "avxvnniint16", + "avx", + "avx2", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avxvnniint16 = self] => "avxvnniint16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxvnniint16 { + #[cfg(feature = "std")] + /// Create a new token if the `"avxvnniint16"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxvnniint16") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxvnniint16")] + /// Create a new token for the "avxvnniint16" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-VNNI-INT16 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxvnniint16" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxvnniint8.rs b/fearless_simd_core/src/x86/avx/avxvnniint8.rs new file mode 100644 index 00000000..2a0eaf4a --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxvnniint8.rs @@ -0,0 +1,158 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX-VNNI-INT8 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-VNNI-INT8] --- Advanced Vector Extensions - Vector Neural Network Instructions with 8-bit Integers +/// +/// [AVX-VNNI-INT8]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxvnniint8` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxvnniint8")] +/// fn uses_avxvnniint8() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxvnniint8 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxvnniint8 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxvnniint8" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avxvnniint8 { + const FEATURES: &[&str] = &[ + "avxvnniint8", + "avx", + "avx2", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avxvnniint8 = self] => "avxvnniint8", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxvnniint8 { + #[cfg(feature = "std")] + /// Create a new token if the `"avxvnniint8"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxvnniint8") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxvnniint8")] + /// Create a new token for the "avxvnniint8" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-VNNI-INT8 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxvnniint8" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/mod.rs b/fearless_simd_core/src/x86/avx/mod.rs new file mode 100644 index 00000000..f047d055 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/mod.rs @@ -0,0 +1,24 @@ +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod avx; +pub use avx::Avx; + +mod avx2; +pub use avx2::Avx2; + +mod avxifma; +pub use avxifma::Avxifma; + +mod avxneconvert; +pub use avxneconvert::Avxneconvert; + +mod avxvnni; +pub use avxvnni::Avxvnni; + +mod avxvnniint8; +pub use avxvnniint8::Avxvnniint8; + +mod avxvnniint16; +pub use avxvnniint16::Avxvnniint16; diff --git a/fearless_simd_core/src/x86/avx512/avx512bf16.rs b/fearless_simd_core/src/x86/avx512/avx512bf16.rs new file mode 100644 index 00000000..52396972 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512bf16.rs @@ -0,0 +1,190 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-BF16 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-BF16] --- Advanced Vector Extensions 512-bit - Bfloat16 Extensions +/// +/// [AVX512-BF16]: https://en.wikipedia.org/wiki/AVX-512#BF16 +/// +/// A token indicating that the current CPU has the `avx512bf16` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bf16")] +/// fn uses_avx512bf16() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512bf16 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512bf16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512bf16" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512bf16 { + const FEATURES: &[&str] = &[ + "avx512bf16", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512bf16 = self] => "avx512bf16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512bf16 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512bf16"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512bf16") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512bf16")] + /// Create a new token for the "avx512bf16" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-BF16 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bf16" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs new file mode 100644 index 00000000..66bb543a --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs @@ -0,0 +1,191 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-BITALG target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-BITALG] --- Advanced Vector Extensions 512-bit - Bit Algorithms +/// +/// +/// [AVX512-BITALG]: https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG +/// +/// A token indicating that the current CPU has the `avx512bitalg` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bitalg")] +/// fn uses_avx512bitalg() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512bitalg { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512bitalg { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512bitalg" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512bitalg { + const FEATURES: &[&str] = &[ + "avx512bitalg", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512bitalg = self] => "avx512bitalg", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512bitalg { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512bitalg"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512bitalg") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512bitalg")] + /// Create a new token for the "avx512bitalg" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-BITALG is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bitalg" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512bw.rs b/fearless_simd_core/src/x86/avx512/avx512bw.rs new file mode 100644 index 00000000..7213b3da --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512bw.rs @@ -0,0 +1,172 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-BW target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-BW] --- Advanced Vector Extensions 512-bit - Byte and Word Instructions +/// +/// [AVX512-BW]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI +/// +/// A token indicating that the current CPU has the `avx512bw` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bw")] +/// fn uses_avx512bw() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512bw { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512bw { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512bw" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512bw { + const FEATURES: &[&str] = &[ + "avx512bw", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512bw = self] => "avx512bw", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512bw { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512bw"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512bw") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512bw")] + /// Create a new token for the "avx512bw" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-BW is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bw" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512cd.rs b/fearless_simd_core/src/x86/avx512/avx512cd.rs new file mode 100644 index 00000000..b3b9c8c3 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512cd.rs @@ -0,0 +1,172 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-CD target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-CD] --- Advanced Vector Extensions 512-bit - Conflict Detection Instructions +/// +/// [AVX512-CD]: https://en.wikipedia.org/wiki/AVX-512#Conflict_detection +/// +/// A token indicating that the current CPU has the `avx512cd` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512cd")] +/// fn uses_avx512cd() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512cd { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512cd { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512cd" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512cd { + const FEATURES: &[&str] = &[ + "avx512cd", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512cd = self] => "avx512cd", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512cd { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512cd"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512cd") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512cd")] + /// Create a new token for the "avx512cd" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-CD is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512cd" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512dq.rs b/fearless_simd_core/src/x86/avx512/avx512dq.rs new file mode 100644 index 00000000..fb6d3670 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512dq.rs @@ -0,0 +1,172 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-DQ target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-DQ] --- Advanced Vector Extensions 512-bit - Doubleword and Quadword Instructions +/// +/// [AVX512-DQ]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI +/// +/// A token indicating that the current CPU has the `avx512dq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512dq")] +/// fn uses_avx512dq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512dq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512dq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512dq" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512dq { + const FEATURES: &[&str] = &[ + "avx512dq", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512dq = self] => "avx512dq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512dq { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512dq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512dq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512dq")] + /// Create a new token for the "avx512dq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-DQ is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512dq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512f.rs b/fearless_simd_core/src/x86/avx512/avx512f.rs new file mode 100644 index 00000000..fa6adb77 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512f.rs @@ -0,0 +1,164 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-F target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-F] --- Advanced Vector Extensions 512-bit - Foundation +/// +/// [AVX512-F]: https://en.wikipedia.org/wiki/AVX-512 +/// +/// A token indicating that the current CPU has the `avx512f` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512f")] +/// fn uses_avx512f() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512f { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512f { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512f" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512f { + const FEATURES: &[&str] = &[ + "avx512f", "avx", "avx2", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512f = self] => "avx512f", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512f { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512f"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512f") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512f")] + /// Create a new token for the "avx512f" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-F is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512f" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512fp16.rs b/fearless_simd_core/src/x86/avx512/avx512fp16.rs new file mode 100644 index 00000000..f3ed6089 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512fp16.rs @@ -0,0 +1,190 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-FP16 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-FP16] --- Advanced Vector Extensions 512-bit - Float16 Extensions +/// +/// [AVX512-FP16]: https://en.wikipedia.org/wiki/AVX-512#FP16 +/// +/// A token indicating that the current CPU has the `avx512fp16` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512fp16")] +/// fn uses_avx512fp16() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512fp16 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512fp16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512fp16" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512fp16 { + const FEATURES: &[&str] = &[ + "avx512fp16", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512fp16 = self] => "avx512fp16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512fp16 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512fp16"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512fp16") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512fp16")] + /// Create a new token for the "avx512fp16" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-FP16 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512fp16" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512ifma.rs b/fearless_simd_core/src/x86/avx512/avx512ifma.rs new file mode 100644 index 00000000..b7ab646c --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512ifma.rs @@ -0,0 +1,182 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-IFMA target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-IFMA] --- Advanced Vector Extensions 512-bit - Integer Fused Multiply Add +/// +/// [AVX512-IFMA]: https://en.wikipedia.org/wiki/AVX-512#IFMA +/// +/// A token indicating that the current CPU has the `avx512ifma` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512ifma")] +/// fn uses_avx512ifma() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512ifma { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512ifma { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512ifma" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512ifma { + const FEATURES: &[&str] = &[ + "avx512ifma", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512ifma = self] => "avx512ifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512ifma { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512ifma"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512ifma") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512ifma")] + /// Create a new token for the "avx512ifma" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-IFMA is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512ifma" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs new file mode 100644 index 00000000..9d0ad4da --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs @@ -0,0 +1,190 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VBMI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VBMI] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions +/// +/// [AVX512-VBMI]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI +/// +/// A token indicating that the current CPU has the `avx512vbmi` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vbmi")] +/// fn uses_avx512vbmi() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vbmi { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vbmi { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vbmi" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vbmi { + const FEATURES: &[&str] = &[ + "avx512vbmi", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vbmi = self] => "avx512vbmi", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vbmi { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vbmi"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vbmi") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vbmi")] + /// Create a new token for the "avx512vbmi" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VBMI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vbmi" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs new file mode 100644 index 00000000..cfff6b25 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs @@ -0,0 +1,190 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VBMI2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VBMI2] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions 2 +/// +/// [AVX512-VBMI2]: https://en.wikipedia.org/wiki/AVX-512#VBMI2 +/// +/// A token indicating that the current CPU has the `avx512vbmi2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vbmi2")] +/// fn uses_avx512vbmi2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vbmi2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vbmi2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vbmi2" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vbmi2 { + const FEATURES: &[&str] = &[ + "avx512vbmi2", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vbmi2 = self] => "avx512vbmi2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vbmi2 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vbmi2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vbmi2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vbmi2")] + /// Create a new token for the "avx512vbmi2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VBMI2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vbmi2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vl.rs b/fearless_simd_core/src/x86/avx512/avx512vl.rs new file mode 100644 index 00000000..ddfd7a1c --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vl.rs @@ -0,0 +1,172 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VL target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VL] --- Advanced Vector Extensions 512-bit - Vector Length Extensions +/// +/// [AVX512-VL]: https://en.wikipedia.org/wiki/AVX-512 +/// +/// A token indicating that the current CPU has the `avx512vl` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vl")] +/// fn uses_avx512vl() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vl { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vl { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vl" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vl { + const FEATURES: &[&str] = &[ + "avx512vl", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vl = self] => "avx512vl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vl { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vl"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vl") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vl")] + /// Create a new token for the "avx512vl" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VL is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vl" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vnni.rs b/fearless_simd_core/src/x86/avx512/avx512vnni.rs new file mode 100644 index 00000000..528282d9 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vnni.rs @@ -0,0 +1,182 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VNNI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VNNI] --- Advanced Vector Extensions 512-bit - Vector Neural Network Instructions +/// +/// [AVX512-VNNI]: https://en.wikipedia.org/wiki/AVX-512#VNNI +/// +/// A token indicating that the current CPU has the `avx512vnni` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vnni")] +/// fn uses_avx512vnni() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vnni { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vnni { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vnni" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vnni { + const FEATURES: &[&str] = &[ + "avx512vnni", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vnni = self] => "avx512vnni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vnni { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vnni"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vnni") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vnni")] + /// Create a new token for the "avx512vnni" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VNNI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vnni" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs new file mode 100644 index 00000000..73344f75 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs @@ -0,0 +1,182 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VP2INTERSECT target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VP2INTERSECT] --- Advanced Vector Extensions 512-bit - Vector Pair Intersection to a Pair of Mask Registers +/// +/// [AVX512-VP2INTERSECT]: https://en.wikipedia.org/wiki/AVX-512#VP2INTERSECT +/// +/// A token indicating that the current CPU has the `avx512vp2intersect` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vp2intersect")] +/// fn uses_avx512vp2intersect() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vp2intersect { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vp2intersect { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vp2intersect" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vp2intersect { + const FEATURES: &[&str] = &[ + "avx512vp2intersect", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vp2intersect = self] => "avx512vp2intersect", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vp2intersect { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vp2intersect"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vp2intersect") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vp2intersect")] + /// Create a new token for the "avx512vp2intersect" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VP2INTERSECT is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vp2intersect" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs new file mode 100644 index 00000000..7f96f8a7 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs @@ -0,0 +1,182 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VPOPCNTDQ target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VPOPCNTDQ] --- Advanced Vector Extensions 512-bit - Vector Population Count Instruction +/// +/// [AVX512-VPOPCNTDQ]:https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG +/// +/// A token indicating that the current CPU has the `avx512vpopcntdq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vpopcntdq")] +/// fn uses_avx512vpopcntdq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vpopcntdq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vpopcntdq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vpopcntdq" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vpopcntdq { + const FEATURES: &[&str] = &[ + "avx512vpopcntdq", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vpopcntdq = self] => "avx512vpopcntdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vpopcntdq { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vpopcntdq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vpopcntdq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vpopcntdq")] + /// Create a new token for the "avx512vpopcntdq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VPOPCNTDQ is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vpopcntdq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/mod.rs b/fearless_simd_core/src/x86/avx512/mod.rs new file mode 100644 index 00000000..1044da40 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/mod.rs @@ -0,0 +1,41 @@ +mod avx512bf16; +pub use avx512bf16::Avx512bf16; + +mod avx512bitalg; +pub use avx512bitalg::Avx512bitalg; + +mod avx512bw; +pub use avx512bw::Avx512bw; + +mod avx512cd; +pub use avx512cd::Avx512cd; + +mod avx512dq; +pub use avx512dq::Avx512dq; + +mod avx512f; +pub use avx512f::Avx512f; + +mod avx512fp16; +pub use avx512fp16::Avx512fp16; + +mod avx512ifma; +pub use avx512ifma::Avx512ifma; + +mod avx512vbmi; +pub use avx512vbmi::Avx512vbmi; + +mod avx512vbmi2; +pub use avx512vbmi2::Avx512vbmi2; + +mod avx512vl; +pub use avx512vl::Avx512vl; + +mod avx512vnni; +pub use avx512vnni::Avx512vnni; + +mod avx512vp2intersect; +pub use avx512vp2intersect::Avx512vp2intersect; + +mod avx512vpopcntdq; +pub use avx512vpopcntdq::Avx512vpopcntdq; diff --git a/fearless_simd_core/src/x86/crypto/aes.rs b/fearless_simd_core/src/x86/crypto/aes.rs new file mode 100644 index 00000000..af937bef --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/aes.rs @@ -0,0 +1,106 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AES target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AES] --- Advanced Encryption Standard +/// +/// [AES]: https://en.wikipedia.org/wiki/AES_instruction_set +/// +/// A token indicating that the current CPU has the `aes` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "aes")] +/// fn uses_aes() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Aes { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Aes { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""aes" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Aes { + const FEATURES: &[&str] = &["aes", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Aes = self] => "aes", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Aes { + #[cfg(feature = "std")] + /// Create a new token if the `"aes"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("aes") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "aes")] + /// Create a new token for the "aes" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AES is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "aes" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Aes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Aes = value] => "aes", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Aes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Aes = value] => "aes", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/gfni.rs b/fearless_simd_core/src/x86/crypto/gfni.rs new file mode 100644 index 00000000..63c73e81 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/gfni.rs @@ -0,0 +1,106 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The GFNI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [GFNI] --- Galois Field New Instructions +/// +/// [GFNI]: https://en.wikipedia.org/wiki/AVX-512#GFNI +/// +/// A token indicating that the current CPU has the `gfni` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "gfni")] +/// fn uses_gfni() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Gfni { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Gfni { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""gfni" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Gfni { + const FEATURES: &[&str] = &["gfni", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Gfni = self] => "gfni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Gfni { + #[cfg(feature = "std")] + /// Create a new token if the `"gfni"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("gfni") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "gfni")] + /// Create a new token for the "gfni" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// GFNI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "gfni" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Gfni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Gfni = value] => "gfni", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Gfni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Gfni = value] => "gfni", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/kl.rs b/fearless_simd_core/src/x86/crypto/kl.rs new file mode 100644 index 00000000..5e5d3d4f --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/kl.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The KEYLOCKER target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [KEYLOCKER] --- Intel Key Locker Instructions +/// +/// [KEYLOCKER]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions +/// +/// A token indicating that the current CPU has the `kl` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "kl")] +/// fn uses_keylocker() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Keylocker { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Keylocker { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""kl" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Keylocker { + const FEATURES: &[&str] = &["kl"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Keylocker = self] => "kl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Keylocker { + #[cfg(feature = "std")] + /// Create a new token if the `"kl"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("kl") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "kl")] + /// Create a new token for the "kl" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// KEYLOCKER is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "kl" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/mod.rs b/fearless_simd_core/src/x86/crypto/mod.rs new file mode 100644 index 00000000..cfb34d3b --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/mod.rs @@ -0,0 +1,38 @@ +mod aes; +pub use aes::Aes; + +mod gfni; +pub use gfni::Gfni; + +mod kl; +pub use kl::Keylocker; + +mod pclmulqdq; +pub use pclmulqdq::Pclmulqdq; + +mod rdrand; +pub use rdrand::Rdrand; + +mod rdseed; +pub use rdseed::Rdseed; + +mod sha; +pub use sha::Sha; + +mod sha512; +pub use sha512::Sha512; + +mod sm3; +pub use sm3::Sm3; + +mod sm4; +pub use sm4::Sm4; + +mod vaes; +pub use vaes::Vaes; + +mod vpclmulqdq; +pub use vpclmulqdq::Vpclmulqdq; + +mod widekl; +pub use widekl::WideKeylocker; diff --git a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs new file mode 100644 index 00000000..56d47f7e --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs @@ -0,0 +1,106 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `pclmulqdq` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["pclmulqdq"] --- Packed carry-less multiplication quadword +/// +/// ["pclmulqdq"]: https://www.felixcloutier.com/x86/pclmulqdq +/// +/// A token indicating that the current CPU has the `pclmulqdq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "pclmulqdq")] +/// fn uses_pclmulqdq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Pclmulqdq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Pclmulqdq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""pclmulqdq" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Pclmulqdq { + const FEATURES: &[&str] = &["pclmulqdq", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Pclmulqdq = self] => "pclmulqdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Pclmulqdq { + #[cfg(feature = "std")] + /// Create a new token if the `"pclmulqdq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("pclmulqdq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "pclmulqdq")] + /// Create a new token for the "pclmulqdq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `pclmulqdq` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "pclmulqdq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Pclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Pclmulqdq = value] => "pclmulqdq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Pclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Pclmulqdq = value] => "pclmulqdq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/rdrand.rs b/fearless_simd_core/src/x86/crypto/rdrand.rs new file mode 100644 index 00000000..9003251d --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/rdrand.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `rdrand` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["rdrand"] --- Read random number +/// +/// ["rdrand"]: https://en.wikipedia.org/wiki/RdRand +/// +/// A token indicating that the current CPU has the `rdrand` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "rdrand")] +/// fn uses_rdrand() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Rdrand { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Rdrand { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""rdrand" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Rdrand { + const FEATURES: &[&str] = &["rdrand"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Rdrand = self] => "rdrand", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Rdrand { + #[cfg(feature = "std")] + /// Create a new token if the `"rdrand"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("rdrand") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "rdrand")] + /// Create a new token for the "rdrand" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `rdrand` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "rdrand" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/rdseed.rs b/fearless_simd_core/src/x86/crypto/rdseed.rs new file mode 100644 index 00000000..26389a35 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/rdseed.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `rdseed` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["rdseed"] --- Read random seed +/// +/// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand +/// +/// A token indicating that the current CPU has the `rdseed` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "rdseed")] +/// fn uses_rdseed() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Rdseed { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Rdseed { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""rdseed" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Rdseed { + const FEATURES: &[&str] = &["rdseed"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Rdseed = self] => "rdseed", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Rdseed { + #[cfg(feature = "std")] + /// Create a new token if the `"rdseed"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("rdseed") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "rdseed")] + /// Create a new token for the "rdseed" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `rdseed` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "rdseed" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sha.rs b/fearless_simd_core/src/x86/crypto/sha.rs new file mode 100644 index 00000000..8c53b001 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sha.rs @@ -0,0 +1,106 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SHA target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SHA] --- Secure Hash Algorithm +/// +/// [SHA]: https://en.wikipedia.org/wiki/Intel_SHA_extensions +/// +/// A token indicating that the current CPU has the `sha` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sha")] +/// fn uses_sha() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sha { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sha { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sha" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sha { + const FEATURES: &[&str] = &["sha", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sha = self] => "sha", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sha { + #[cfg(feature = "std")] + /// Create a new token if the `"sha"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sha") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sha")] + /// Create a new token for the "sha" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SHA is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sha" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sha) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha = value] => "sha", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sha) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha = value] => "sha", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sha512.rs b/fearless_simd_core/src/x86/crypto/sha512.rs new file mode 100644 index 00000000..6968d4a4 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sha512.rs @@ -0,0 +1,150 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SHA512 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SHA512] --- Secure Hash Algorithm with 512-bit digest +/// +/// [SHA512]: https://en.wikipedia.org/wiki/Intel_SHA_extensions +/// +/// A token indicating that the current CPU has the `sha512` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sha512")] +/// fn uses_sha512() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sha512 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sha512 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sha512" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sha512 { + const FEATURES: &[&str] = &[ + "sha512", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sha512 = self] => "sha512", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sha512 { + #[cfg(feature = "std")] + /// Create a new token if the `"sha512"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sha512") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sha512")] + /// Create a new token for the "sha512" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SHA512 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sha512" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sm3.rs b/fearless_simd_core/src/x86/crypto/sm3.rs new file mode 100644 index 00000000..3292d72d --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sm3.rs @@ -0,0 +1,143 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SM3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SM3] --- ShangMi 3 Hash Algorithm +/// +/// [SM3]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions +/// +/// A token indicating that the current CPU has the `sm3` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sm3")] +/// fn uses_sm3() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sm3 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sm3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sm3" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sm3 { + const FEATURES: &[&str] = &[ + "sm3", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sm3 = self] => "sm3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sm3 { + #[cfg(feature = "std")] + /// Create a new token if the `"sm3"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sm3") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sm3")] + /// Create a new token for the "sm3" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SM3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sm3" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sm4.rs b/fearless_simd_core/src/x86/crypto/sm4.rs new file mode 100644 index 00000000..81e2db9a --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sm4.rs @@ -0,0 +1,150 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SM4 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SM4] --- ShangMi 4 Cipher Algorithm +/// +/// [SM4]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions +/// +/// A token indicating that the current CPU has the `sm4` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sm4")] +/// fn uses_sm4() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sm4 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sm4 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sm4" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sm4 { + const FEATURES: &[&str] = &[ + "sm4", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sm4 = self] => "sm4", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sm4 { + #[cfg(feature = "std")] + /// Create a new token if the `"sm4"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sm4") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sm4")] + /// Create a new token for the "sm4" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SM4 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sm4" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/vaes.rs b/fearless_simd_core/src/x86/crypto/vaes.rs new file mode 100644 index 00000000..fca0a918 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/vaes.rs @@ -0,0 +1,157 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The VAES target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [VAES] --- Vector AES Instructions +/// +/// [VAES]: https://en.wikipedia.org/wiki/AVX-512#VAES +/// +/// A token indicating that the current CPU has the `vaes` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "vaes")] +/// fn uses_vaes() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Vaes { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Vaes { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""vaes" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Vaes { + const FEATURES: &[&str] = &[ + "vaes", "aes", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Vaes = self] => "vaes", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Vaes { + #[cfg(feature = "std")] + /// Create a new token if the `"vaes"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("vaes") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "vaes")] + /// Create a new token for the "vaes" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// VAES is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "vaes" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::crypto::Aes { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::crypto::Aes { crate::x86::crypto::Aes::new() }) + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs new file mode 100644 index 00000000..d50b93f8 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs @@ -0,0 +1,158 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The VPCLMULQDQ target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [VPCLMULQDQ] --- Vector Carry-less multiplication of Quadwords +/// +/// [VPCLMULQDQ]: https://en.wikipedia.org/wiki/AVX-512#VPCLMULQDQ +/// +/// A token indicating that the current CPU has the `vpclmulqdq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "vpclmulqdq")] +/// fn uses_vpclmulqdq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Vpclmulqdq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Vpclmulqdq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""vpclmulqdq" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Vpclmulqdq { + const FEATURES: &[&str] = &[ + "vpclmulqdq", + "avx", + "pclmulqdq", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Vpclmulqdq = self] => "vpclmulqdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Vpclmulqdq { + #[cfg(feature = "std")] + /// Create a new token if the `"vpclmulqdq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("vpclmulqdq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "vpclmulqdq")] + /// Create a new token for the "vpclmulqdq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// VPCLMULQDQ is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "vpclmulqdq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::crypto::Pclmulqdq { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::crypto::Pclmulqdq { crate::x86::crypto::Pclmulqdq::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/widekl.rs b/fearless_simd_core/src/x86/crypto/widekl.rs new file mode 100644 index 00000000..ff179dff --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/widekl.rs @@ -0,0 +1,99 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The WIDE KEYLOCKER target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [KEYLOCKER_WIDE] --- Intel Wide Keylocker Instructions +/// +/// [KEYLOCKER_WIDE]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions +/// +/// A token indicating that the current CPU has the `widekl` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "widekl")] +/// fn uses_wide_keylocker() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct WideKeylocker { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for WideKeylocker { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""widekl" enabled."#) + } +} + +unsafe impl TargetFeatureToken for WideKeylocker { + const FEATURES: &[&str] = &["widekl", "kl"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([WideKeylocker = self] => "widekl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl WideKeylocker { + #[cfg(feature = "std")] + /// Create a new token if the `"widekl"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("widekl") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "widekl")] + /// Create a new token for the "widekl" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// WIDE KEYLOCKER is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "widekl" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::crypto::Keylocker { + fn from(value: WideKeylocker) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([WideKeylocker = value] => "widekl", fn() -> crate::x86::crypto::Keylocker { crate::x86::crypto::Keylocker::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/discontinued/mod.rs b/fearless_simd_core/src/x86/discontinued/mod.rs new file mode 100644 index 00000000..9afa91dc --- /dev/null +++ b/fearless_simd_core/src/x86/discontinued/mod.rs @@ -0,0 +1,3 @@ +// Stable in beta, but not current stable +// mod tbm; +// pub use tbm::Tbm; diff --git a/fearless_simd_core/src/x86/discontinued/tbm.rs b/fearless_simd_core/src/x86/discontinued/tbm.rs new file mode 100644 index 00000000..e0d8bb5b --- /dev/null +++ b/fearless_simd_core/src/x86/discontinued/tbm.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The TBM target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [TBM] --- Trailing Bit Manipulation +/// +/// [TBM]: https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#TBM_(Trailing_Bit_Manipulation) +/// +/// A token indicating that the current CPU has the `tbm` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "tbm")] +/// fn uses_tbm() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Tbm { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Tbm { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""tbm" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Tbm { + const FEATURES: &[&str] = &["tbm", ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Tbm = self] => "tbm", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Tbm { + #[cfg(feature = "std")] + /// Create a new token if the `"tbm"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("tbm") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "tbm")] + /// Create a new token for the "tbm" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// TBM is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "tbm" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index 10caa27a..ea94a80d 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -12,4 +12,12 @@ //! //! [microarchitecture level]: https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels +pub mod adx; +pub mod avx; +pub mod avx512; +pub mod crypto; +pub mod discontinued; +pub mod sse; pub mod v1; +pub mod v2; +pub mod v3; diff --git a/fearless_simd_core/src/x86/sse/fxsr.rs b/fearless_simd_core/src/x86/sse/fxsr.rs new file mode 100644 index 00000000..fbc5493f --- /dev/null +++ b/fearless_simd_core/src/x86/sse/fxsr.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `fxsave + fxrstor` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["fxsave"] and ["fxrstor"] --- Save and restore x87 FPU, MMX Technology, and SSE State +/// +/// ["fxsave"]: https://www.felixcloutier.com/x86/fxsave, +/// ["fxrstor"]: https://www.felixcloutier.com/x86/fxrstor, +/// +/// A token indicating that the current CPU has the `fxsr` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "fxsr")] +/// fn uses_fxsr() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Fxsr { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Fxsr { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""fxsr" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Fxsr { + const FEATURES: &[&str] = &["fxsr"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Fxsr = self] => "fxsr", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Fxsr { + #[cfg(feature = "std")] + /// Create a new token if the `"fxsr"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("fxsr") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "fxsr")] + /// Create a new token for the "fxsr" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `fxsave + fxrstor` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "fxsr" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/mod.rs b/fearless_simd_core/src/x86/sse/mod.rs new file mode 100644 index 00000000..ce9a3aec --- /dev/null +++ b/fearless_simd_core/src/x86/sse/mod.rs @@ -0,0 +1,24 @@ +mod fxsr; +pub use fxsr::Fxsr; + +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod sse; +pub use sse::Sse; + +mod sse2; +pub use sse2::Sse2; + +mod sse3; +pub use sse3::Sse3; + +mod ssse3; +pub use ssse3::SupplementalSse3; + +mod sse4_1; +pub use sse4_1::Sse4_1; + +mod sse4_2; +pub use sse4_2::Sse4_2; diff --git a/fearless_simd_core/src/x86/sse/sse.rs b/fearless_simd_core/src/x86/sse/sse.rs new file mode 100644 index 00000000..d3473a33 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE] --- Streaming SIMD Extensions +/// +/// [SSE]: https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions +/// +/// A token indicating that the current CPU has the `sse` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse")] +/// fn uses_sse() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse { + const FEATURES: &[&str] = &["sse"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse { + #[cfg(feature = "std")] + /// Create a new token if the `"sse"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse")] + /// Create a new token for the "sse" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse2.rs b/fearless_simd_core/src/x86/sse/sse2.rs new file mode 100644 index 00000000..137bf28c --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse2.rs @@ -0,0 +1,99 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE2] --- Streaming SIMD Extensions 2 +/// +/// [SSE2]: https://en.wikipedia.org/wiki/SSE2 +/// +/// A token indicating that the current CPU has the `sse2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse2")] +/// fn uses_sse2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse2" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse2 { + const FEATURES: &[&str] = &["sse2", "sse"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sse2 = self] => "sse2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse2 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse2")] + /// Create a new token for the "sse2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse2 = value] => "sse2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse3.rs b/fearless_simd_core/src/x86/sse/sse3.rs new file mode 100644 index 00000000..8c497ec3 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse3.rs @@ -0,0 +1,106 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE3] --- Streaming SIMD Extensions 3 +/// +/// [SSE3]: https://en.wikipedia.org/wiki/SSE3 +/// +/// A token indicating that the current CPU has the `sse3` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse3")] +/// fn uses_sse3() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse3 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse3" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse3 { + const FEATURES: &[&str] = &["sse3", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sse3 = self] => "sse3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse3 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse3"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse3") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse3")] + /// Create a new token for the "sse3" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse3" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse3 = value] => "sse3", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse3 = value] => "sse3", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse4_1.rs b/fearless_simd_core/src/x86/sse/sse4_1.rs new file mode 100644 index 00000000..02ec84f6 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse4_1.rs @@ -0,0 +1,120 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE4.1 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE4.1] --- Streaming SIMD Extensions 4.1 +/// +/// [SSE4.1]: https://en.wikipedia.org/wiki/SSE4#SSE4.1 +/// +/// A token indicating that the current CPU has the `sse4.1` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse4.1")] +/// fn uses_sse4() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse4_1 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse4_1 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse4.1" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse4_1 { + const FEATURES: &[&str] = &["sse4.1", "sse", "sse2", "sse3", "ssse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sse4_1 = self] => "sse4.1", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse4_1 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse4.1"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse4.1") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse4.1")] + /// Create a new token for the "sse4.1" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE4.1 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse4.1" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse4_2.rs b/fearless_simd_core/src/x86/sse/sse4_2.rs new file mode 100644 index 00000000..cf18d18e --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse4_2.rs @@ -0,0 +1,127 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE4.2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE4.2] --- StreamingSIMDExtensions 4.2 +/// +/// [SSE4.2]: https://en.wikipedia.org/wiki/SSE4#SSE4.2 +/// +/// A token indicating that the current CPU has the `sse4.2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse4.2")] +/// fn uses_sse4() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse4_2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse4_2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse4.2" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse4_2 { + const FEATURES: &[&str] = &["sse4.2", "sse", "sse2", "sse3", "sse4.1", "ssse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sse4_2 = self] => "sse4.2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse4_2 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse4.2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse4.2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse4.2")] + /// Create a new token for the "sse4.2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE4.2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse4.2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/ssse3.rs b/fearless_simd_core/src/x86/sse/ssse3.rs new file mode 100644 index 00000000..5ba3e9b5 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/ssse3.rs @@ -0,0 +1,113 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSSE3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSSE3] --- Supplemental StreamingSIMDExtensions 3 +/// +/// [SSSE3]: https://en.wikipedia.org/wiki/SSSE3 +/// +/// A token indicating that the current CPU has the `ssse3` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "ssse3")] +/// fn uses_ssse3() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct SupplementalSse3 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for SupplementalSse3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""ssse3" enabled."#) + } +} + +unsafe impl TargetFeatureToken for SupplementalSse3 { + const FEATURES: &[&str] = &["ssse3", "sse", "sse2", "sse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([SupplementalSse3 = self] => "ssse3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl SupplementalSse3 { + #[cfg(feature = "std")] + /// Create a new token if the `"ssse3"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("ssse3") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "ssse3")] + /// Create a new token for the "ssse3" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSSE3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "ssse3" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: SupplementalSse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([SupplementalSse3 = value] => "ssse3", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: SupplementalSse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([SupplementalSse3 = value] => "ssse3", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: SupplementalSse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([SupplementalSse3 = value] => "ssse3", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v1/mod.rs b/fearless_simd_core/src/x86/v1/mod.rs index 0e360b50..140fdd54 100644 --- a/fearless_simd_core/src/x86/v1/mod.rs +++ b/fearless_simd_core/src/x86/v1/mod.rs @@ -2,14 +2,11 @@ //! //! This is the baseline for x86-64 support. -mod sse; -pub use sse::Sse; - -mod fxsr; -pub use fxsr::Fxsr; +pub use crate::x86::sse::Fxsr; +pub use crate::x86::sse::Sse; /// A token that the current CPU is on the x86-64-v1 microarchitecture level. -// TODO: (This is currently incomplete) +// TODO: (This is currently incomplete) pub struct V1 { pub sse: Sse, pub fxsr: Fxsr, diff --git a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs new file mode 100644 index 00000000..b831e349 --- /dev/null +++ b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `cmpxchg16b` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["cmpxchg16b"] --- Compares and exchange 16 bytes (128 bits) of data atomically +/// +/// ["cmpxchg16b"]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b +/// +/// A token indicating that the current CPU has the `cmpxchg16b` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "cmpxchg16b")] +/// fn uses_cmpxchg16b() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Cmpxchg16b { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Cmpxchg16b { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""cmpxchg16b" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Cmpxchg16b { + const FEATURES: &[&str] = &["cmpxchg16b"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Cmpxchg16b = self] => "cmpxchg16b", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Cmpxchg16b { + #[cfg(feature = "std")] + /// Create a new token if the `"cmpxchg16b"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("cmpxchg16b") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "cmpxchg16b")] + /// Create a new token for the "cmpxchg16b" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `cmpxchg16b` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "cmpxchg16b" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v2/mod.rs b/fearless_simd_core/src/x86/v2/mod.rs new file mode 100644 index 00000000..197738fe --- /dev/null +++ b/fearless_simd_core/src/x86/v2/mod.rs @@ -0,0 +1,5 @@ +mod cmpxchg16b; +pub use cmpxchg16b::Cmpxchg16b; + +mod popcnt; +pub use popcnt::Popcnt; diff --git a/fearless_simd_core/src/x86/v2/popcnt.rs b/fearless_simd_core/src/x86/v2/popcnt.rs new file mode 100644 index 00000000..5e78adce --- /dev/null +++ b/fearless_simd_core/src/x86/v2/popcnt.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `popcnt` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["popcnt"] --- Count of bits set to 1 +/// +/// ["popcnt"]: https://www.felixcloutier.com/x86/popcnt +/// +/// A token indicating that the current CPU has the `popcnt` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "popcnt")] +/// fn uses_popcnt() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Popcnt { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Popcnt { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""popcnt" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Popcnt { + const FEATURES: &[&str] = &["popcnt"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Popcnt = self] => "popcnt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Popcnt { + #[cfg(feature = "std")] + /// Create a new token if the `"popcnt"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("popcnt") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "popcnt")] + /// Create a new token for the "popcnt" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `popcnt` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "popcnt" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/bmi1.rs b/fearless_simd_core/src/x86/v3/bmi1.rs new file mode 100644 index 00000000..a8cf99d7 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/bmi1.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The 1 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [BMI1] --- Bit Manipulation Instruction Sets +/// +/// [BMI1]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets +/// +/// A token indicating that the current CPU has the `bmi1` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "bmi1")] +/// fn uses_bmi1() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Bmi1 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Bmi1 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""bmi1" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Bmi1 { + const FEATURES: &[&str] = &["bmi1"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Bmi1 = self] => "bmi1", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Bmi1 { + #[cfg(feature = "std")] + /// Create a new token if the `"bmi1"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("bmi1") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "bmi1")] + /// Create a new token for the "bmi1" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// 1 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "bmi1" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/bmi2.rs b/fearless_simd_core/src/x86/v3/bmi2.rs new file mode 100644 index 00000000..82502ff8 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/bmi2.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The BMI2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [BMI2] --- Bit Manipulation Instruction Sets 2 +/// +/// [BMI2]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2 +/// +/// A token indicating that the current CPU has the `bmi2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "bmi2")] +/// fn uses_bmi2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Bmi2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Bmi2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""bmi2" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Bmi2 { + const FEATURES: &[&str] = &["bmi2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Bmi2 = self] => "bmi2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Bmi2 { + #[cfg(feature = "std")] + /// Create a new token if the `"bmi2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("bmi2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "bmi2")] + /// Create a new token for the "bmi2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// BMI2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "bmi2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/f16c.rs b/fearless_simd_core/src/x86/v3/f16c.rs new file mode 100644 index 00000000..a1bc7268 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/f16c.rs @@ -0,0 +1,143 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The F16C target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [F16C] --- 16-bit floating point conversion instructions +/// +/// [F16C]: https://en.wikipedia.org/wiki/F16C +/// +/// A token indicating that the current CPU has the `f16c` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "f16c")] +/// fn uses_f16c() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct F16c { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for F16c { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""f16c" enabled."#) + } +} + +unsafe impl TargetFeatureToken for F16c { + const FEATURES: &[&str] = &[ + "f16c", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([F16c = self] => "f16c", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl F16c { + #[cfg(feature = "std")] + /// Create a new token if the `"f16c"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("f16c") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "f16c")] + /// Create a new token for the "f16c" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// F16C is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "f16c" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/fma.rs b/fearless_simd_core/src/x86/v3/fma.rs new file mode 100644 index 00000000..dc232d73 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/fma.rs @@ -0,0 +1,143 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The FMA3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [FMA3] --- Three-operand fused multiply-add +/// +/// [FMA3]: https://en.wikipedia.org/wiki/FMA_instruction_set +/// +/// A token indicating that the current CPU has the `fma` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "fma")] +/// fn uses_fma() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Fma { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Fma { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""fma" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Fma { + const FEATURES: &[&str] = &[ + "fma", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Fma = self] => "fma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Fma { + #[cfg(feature = "std")] + /// Create a new token if the `"fma"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("fma") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "fma")] + /// Create a new token for the "fma" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// FMA3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "fma" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/lzcnt.rs b/fearless_simd_core/src/x86/v3/lzcnt.rs new file mode 100644 index 00000000..01ce658d --- /dev/null +++ b/fearless_simd_core/src/x86/v3/lzcnt.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `lzcnt` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["lzcnt"] --- Leading zeros count +/// +/// ["lzcnt"]: https://www.felixcloutier.com/x86/lzcnt +/// +/// A token indicating that the current CPU has the `lzcnt` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "lzcnt")] +/// fn uses_lzcnt() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Lzcnt { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Lzcnt { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""lzcnt" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Lzcnt { + const FEATURES: &[&str] = &["lzcnt"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Lzcnt = self] => "lzcnt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Lzcnt { + #[cfg(feature = "std")] + /// Create a new token if the `"lzcnt"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("lzcnt") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "lzcnt")] + /// Create a new token for the "lzcnt" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `lzcnt` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "lzcnt" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/mod.rs b/fearless_simd_core/src/x86/v3/mod.rs new file mode 100644 index 00000000..16d577e3 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/mod.rs @@ -0,0 +1,17 @@ +mod bmi1; +pub use bmi1::Bmi1; + +mod bmi2; +pub use bmi2::Bmi2; + +mod f16c; +pub use f16c::F16c; + +mod fma; +pub use fma::Fma; + +mod lzcnt; +pub use lzcnt::Lzcnt; + +mod movbe; +pub use movbe::Movbe; diff --git a/fearless_simd_core/src/x86/v3/movbe.rs b/fearless_simd_core/src/x86/v3/movbe.rs new file mode 100644 index 00000000..6e542dc1 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/movbe.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `movbe` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["movbe"] --- Move data after swapping bytes +/// +/// ["movbe"]: https://www.felixcloutier.com/x86/movbe +/// +/// A token indicating that the current CPU has the `movbe` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "movbe")] +/// fn uses_movbe() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Movbe { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Movbe { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""movbe" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Movbe { + const FEATURES: &[&str] = &["movbe"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Movbe = self] => "movbe", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Movbe { + #[cfg(feature = "std")] + /// Create a new token if the `"movbe"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("movbe") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "movbe")] + /// Create a new token for the "movbe" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `movbe` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "movbe" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs new file mode 100644 index 00000000..9d222867 --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `xsave` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`xsave`] --- Save processor extended states +/// +/// ["xsave"]: https://www.felixcloutier.com/x86/xsave +/// +/// A token indicating that the current CPU has the `xsave` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsave")] +/// fn uses_xsave() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsave { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsave { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsave" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Xsave { + const FEATURES: &[&str] = &["xsave", ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Xsave = self] => "xsave", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsave { + #[cfg(feature = "std")] + /// Create a new token if the `"xsave"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsave") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsave")] + /// Create a new token for the "xsave" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsave` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsave" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs new file mode 100644 index 00000000..6d11df47 --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `xsavec` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["xsavec"] --- Save processor extended states with compaction +/// +/// ["xsavec"]: https://www.felixcloutier.com/x86/xsavec +/// +/// A token indicating that the current CPU has the `xsavec` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsavec")] +/// fn uses_xsavec() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsavec { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsavec { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsavec" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Xsavec { + const FEATURES: &[&str] = &["xsavec", ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Xsavec = self] => "xsavec", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsavec { + #[cfg(feature = "std")] + /// Create a new token if the `"xsavec"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsavec") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsavec")] + /// Create a new token for the "xsavec" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsavec` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsavec" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs new file mode 100644 index 00000000..b63d444b --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `xsaveopt` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["xsaveopt"] --- Save processor extended states optimized +/// +/// ["xsaveopt"]: https://www.felixcloutier.com/x86/xsaveopt +/// +/// A token indicating that the current CPU has the `xsaveopt` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsaveopt")] +/// fn uses_xsaveopt() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsaveopt { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsaveopt { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsaveopt" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Xsaveopt { + const FEATURES: &[&str] = &["xsaveopt", ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Xsaveopt = self] => "xsaveopt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsaveopt { + #[cfg(feature = "std")] + /// Create a new token if the `"xsaveopt"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsaveopt") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsaveopt")] + /// Create a new token for the "xsaveopt" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsaveopt` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsaveopt" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs new file mode 100644 index 00000000..60633bef --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `xsaves` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["xsaves"] --- Save processor extended states supervisor +/// +/// ["xsaves"]: https://www.felixcloutier.com/x86/xsaves +/// +/// A token indicating that the current CPU has the `xsaves` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsaves")] +/// fn uses_xsaves() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsaves { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsaves { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsaves" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Xsaves { + const FEATURES: &[&str] = &["xsaves", ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Xsaves = self] => "xsaves", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsaves { + #[cfg(feature = "std")] + /// Create a new token if the `"xsaves"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsaves") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsaves")] + /// Create a new token for the "xsaves" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsaves` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsaves" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; From 75153cdad0063bd5e9b9cd60a2cbcb10a519784e Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 16:51:26 +0100 Subject: [PATCH 06/19] Fixup some docs --- fearless_simd_core/src/lib.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index e212b240..75bc9acb 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -35,7 +35,7 @@ extern crate std; /// To construct a value of a type implementing this trait, you must have proven that each /// target feature in `FEATURES` is available. pub unsafe trait TargetFeatureToken: Copy { - /// The set of target features which are enabled for this run, if + /// The set of target features which the current CPU has, if /// you have a value of this type. const FEATURES: &[&str]; @@ -78,7 +78,7 @@ pub unsafe trait TargetFeatureToken: Copy { /// For reference, the implementation used to implement [`vectorize`](TargetFeatureToken::vectorize) for `"sse"` is: /// /// ```rust,ignore -/// trampoline!([Self = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) +/// trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) /// ``` /// /// There is also support for where clauses after the return type. @@ -178,12 +178,15 @@ macro_rules! trampoline { // We validate that we actually have a token of each claimed type. let _: $token_type = $token; )+ - const { + // We use a const item rather than a const block to ensure that. + // This does mean that you can no longer use tokens "generically", but it's hard to think of + // cases where that would be usable anyway. + const _: () = { // And that the claimed types justify enabling the enabled target features. $crate::trampoline::is_feature_subset($to_enable, [$(<$token_type as $crate::TargetFeatureToken>::FEATURES),+]) // TODO: Better failure message here (i.e. at least concatting the set of requested features) .unwrap(); - } + }; $( // Soundness: We use `arg_value` outside of the macro body to ensure it doesn't From 119e9651e375fef9ee4d459af4b79294e3334bcc Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Wed, 15 Oct 2025 12:23:23 +0100 Subject: [PATCH 07/19] Save some generator changes, including adding x86_v{1,2,3,4} --- fearless_simd_core/gen/src/data.rs | 4 +- fearless_simd_core/gen/src/data/x86.rs | 28 ++- fearless_simd_core/gen/src/main.rs | 193 +++++++++++++++++- fearless_simd_core/gen/templates/x86.rs | 5 +- fearless_simd_core/gen/templates/x86_level.rs | 91 +++++++++ 5 files changed, 313 insertions(+), 8 deletions(-) create mode 100644 fearless_simd_core/gen/templates/x86_level.rs diff --git a/fearless_simd_core/gen/src/data.rs b/fearless_simd_core/gen/src/data.rs index 87b9ec46..fe46d862 100644 --- a/fearless_simd_core/gen/src/data.rs +++ b/fearless_simd_core/gen/src/data.rs @@ -1,2 +1,4 @@ mod x86; -pub(crate) use x86::{X86_FEATURES, X86_TEMPLATE}; +pub(crate) use x86::{ + X86_FEATURES, X86_LEVEL_TEMPLATE, X86_TEMPLATE, X86_V1, X86_V2, X86_V3, X86_V4, +}; diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index 52c13c4d..e8c5f85c 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -422,5 +422,29 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ ), ]; -#[test] -fn all_features_included() {} +// All taken from + +pub(crate) const X86_LEVEL_TEMPLATE: &str = include_str!("../../templates/x86_level.rs"); + +/// The target features required in the x86-64-v1 level. +// Rust doesn't have target features for "cmov", "cmpxchg8b", "fpu", "sce", and "mmx". +// The first four are all assumed, and the final is not implemented because +// it's practically impossible to use correctly (and there's no reason to). +pub(crate) const X86_V1: &[&str] = &["fxsr", "sse", "sse2"]; +/// The target features required in the x86-64-v1 level, in addition to those already in [`V1`]. +pub(crate) const X86_V2: &[&str] = &[ + "sse3", + "ssse3", + "sse4.1", + "sse4.2", + "popcnt", + "cmpxchg16b", + // The lahfahf target feature is currently in Rust beta. + // "lahfsahf", +]; +/// The target features required in the x86-64-v3 level, excluding those already in [`V2`]. +pub(crate) const X86_V3: &[&str] = &[ + "avx", "avx2", "bmi1", "bmi2", "f16c", "fma", "lzcnt", "movbe", "xsave", +]; +/// The target features required in the x86-64-v4 level, excluding those already in [`V3`]. +pub(crate) const X86_V4: &[&str] = &["avx512f", "avx512bw", "avx512cd", "avx512dq", "avx512vl"]; diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs index aec7057f..fe075fdf 100644 --- a/fearless_simd_core/gen/src/main.rs +++ b/fearless_simd_core/gen/src/main.rs @@ -1,7 +1,9 @@ mod data; +use std::collections::HashSet; use std::fmt::Write; use std::fs; +use std::hash::RandomState; use std::{ cell::RefCell, collections::HashMap, @@ -10,21 +12,34 @@ use std::{ path::{Path, PathBuf}, }; +use crate::data::X86_LEVEL_TEMPLATE; + fn main() { let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let src_dir = manifest_dir.ancestors().nth(1).unwrap().join("src"); - generate_for_arch(&src_dir, "x86", data::X86_TEMPLATE, data::X86_FEATURES).unwrap(); + { + let x86_features = normalize_features(data::X86_FEATURES); + generate_for_arch(&src_dir, "x86", data::X86_TEMPLATE, &x86_features).unwrap(); + let mut features: Vec<&'static str> = Vec::new(); + features.extend(data::X86_V1); + generate_x86_level(&src_dir, "v1", &x86_features, &features).unwrap(); + features.extend(data::X86_V2); + generate_x86_level(&src_dir, "v2", &x86_features, &features).unwrap(); + features.extend(data::X86_V3); + generate_x86_level(&src_dir, "v3", &x86_features, &features).unwrap(); + features.extend(data::X86_V4); + generate_x86_level(&src_dir, "v4", &x86_features, &features).unwrap(); + } } fn generate_for_arch( root_dir: &Path, arch_module_name: &str, template: &str, - features: &'static [Feature], + features: &[NormalizedFeature], ) -> io::Result<()> { let arch_dir = root_dir.join(arch_module_name); - let features = normalize_features(features); - for feature in &features { + for feature in features { let mut new_docs = String::new(); for line in feature.feature.extra_docs.lines() { writeln!(&mut new_docs, "///{line}").unwrap(); @@ -79,6 +94,7 @@ impl From for {type_path} {{ r#""{ENABLED_FEATURES_STR_LIST}""#, &enabled_feature_str_list, ); + let module_dir = arch_dir.join(feature.feature.module); create_dir_all(&module_dir)?; let mut file = module_dir.join(feature.feature.feature_name.replace(".", "_")); @@ -88,6 +104,175 @@ impl From for {type_path} {{ Ok(()) } +/// Generate the code for an X86 microarchitecture level. +fn generate_x86_level( + root_dir: &Path, + level: &'static str, + all_features: &[NormalizedFeature], + required_features: &[&'static str], +) -> io::Result<()> { + // Precalculate the sets of features we need to support. + // Intermediate value for + let mut superset = HashSet::new(); + for feature in required_features { + superset.insert(*feature); + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + superset.extend(&normalized.children); + } + + // Every single target feature supported on this level, including those implied. + // (In all likelihood, this is the same as `required_features`, but I'd rather validate that manually) + let mut superset = superset.into_iter().collect::>(); + superset.sort(); + let mut lcd = HashSet::<_, RandomState>::from_iter(superset.iter().copied()); + // We make the assumption that features are a tree, that is, there's no case where `A->B` and `B->A`. + // However, even if that didn't hold, we at least use a consistent ordering here. + // We test from the superset to be safe; this should be equivalent to using `required_features`, though. + for feature in &superset { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + for feature in &normalized.children { + // If the feature is a child of another required feature, we know we don't need it for this version. + // We don't care whether or not it was actually removed. + lcd.remove(*feature); + } + } + // The set of features which are strictly required. + // This is used to create the target feature string, so that it can be as short as possible. + let mut lcd = lcd.into_iter().collect::>(); + lcd.sort(); + // Now that we have lcd and superset, we can preprocess what we need for the actual file. + + let level_struct_name = level.to_uppercase(); + // The target_feature(enable = "...") string. + let lcd_contents = lcd.join(", "); + // The fields of the new struct. + let lcd_field_definitions = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!( + "crate::x86::{}::{}", + normalized.feature.module, normalized.feature.struct_name + ); + format!("{feature}: {type_path},\n") + }) + .collect::(); + // The enabled FEATURES. + let superset_list = superset + .iter() + .map(|it| format!(r#""{it}""#)) + .collect::>() + .join(", "); + // First argument to `trampoline!` + let lcd_trampoline = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!( + "crate::x86::{}::{}", + normalized.feature.module, normalized.feature.struct_name + ); + format!("{type_path} = self.{feature}") + }) + .collect::>() + .join(","); + // The version of the struct initializer in `try_new`. + let struct_initializer_try_new = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!( + "crate::x86::{}::{}", + normalized.feature.module, normalized.feature.struct_name + ); + // We rely on rustfmt to get the tab spacing right. + format!("\t{feature}: {type_path}::try_new()?,\n") + }) + .collect::(); + // The version of the struct initializer in `new`. + let struct_initializer_new = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!( + "crate::x86::{}::{}", + normalized.feature.module, normalized.feature.struct_name + ); + format!("\t{feature}: {type_path}::new(),\n") + }) + .collect::(); + + let mut from_impls = String::new(); + for child in &superset { + let from_feature = all_features + .iter() + .find(|it| it.feature.feature_name == *child) + .unwrap(); + let type_path = format!( + "crate::x86::{}::{}", + from_feature.feature.module, from_feature.feature.struct_name + ); + write!( + from_impls, + "\n\ +impl From for {type_path} {{ + fn from(value: LEVEL_STRUCT_NAME) -> Self {{ + // This serves as a correctness check of the implicitly enabled features. + trampoline!([LEVEL_STRUCT_NAME = value] => \"{{LEVEL_FEATURE_LCD_CONTENTS}}\", fn() -> {type_path} {{ {type_path}::new() }}) + }} +}}\n" + ).unwrap(); + } + + let mut result = format!( + "// This file is automatically generated by `fearless_simd_core_gen`.\n\ + // Its template can be found in `fearless_simd_core/gen/templates`.\n\n\ + {X86_LEVEL_TEMPLATE}" + ); + // We replace the from impls first, as they use template variables from the rest of this. + result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace( + "/*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/", + &lcd_field_definitions, + ); + result = result.replace(r#""{LEVEL_FEATURE_SUPERSET_LIST}""#, &superset_list); + result = result.replace("{LEVEL_FEATURE_LCD_TRAMPOLINE}", &lcd_trampoline); + + result = result.replace( + "/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_TRY_NEW}*/", + &struct_initializer_try_new, + ); + result = result.replace( + "/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_NEW}*/", + &struct_initializer_new, + ); + + let arch_dir = root_dir.join("x86"); + let module_dir = arch_dir.join(level); + create_dir_all(&module_dir)?; + let output_path = module_dir.join("level.rs"); + fs::write(output_path, result)?; + Ok(()) +} + #[derive(Debug)] struct Feature { /// The name of the struct to be generated. diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs index 7742b8e9..4c170fc8 100644 --- a/fearless_simd_core/gen/templates/x86.rs +++ b/fearless_simd_core/gen/templates/x86.rs @@ -32,12 +32,15 @@ impl Debug for FEATURE_STRUCT_NAME { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for FEATURE_STRUCT_NAME { const FEATURES: &[&str] = &["{ENABLED_FEATURES_STR_LIST}"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([FEATURE_STRUCT_NAME = self] => "{FEATURE_ID}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/gen/templates/x86_level.rs b/fearless_simd_core/gen/templates/x86_level.rs new file mode 100644 index 00000000..be72bf89 --- /dev/null +++ b/fearless_simd_core/gen/templates/x86_level.rs @@ -0,0 +1,91 @@ +//! The x86-64-{LEVEL_ID} microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-{LEVEL_ID} microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "{LEVEL_FEATURE_LCD_CONTENTS}")] +/// fn uses_x86_64_{LEVEL_ID}() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct LEVEL_STRUCT_NAME { + /*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/ + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for LEVEL_STRUCT_NAME { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-{LEVEL_ID} enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for LEVEL_STRUCT_NAME { + const FEATURES: &[&str] = &["{LEVEL_FEATURE_SUPERSET_LIST}"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([{LEVEL_FEATURE_LCD_TRAMPOLINE}] => "{LEVEL_FEATURE_LCD_CONTENTS}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl LEVEL_STRUCT_NAME { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-{LEVEL_ID} target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + /*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_TRY_NEW}*/ + }) + } + + #[target_feature(enable = "{LEVEL_FEATURE_LCD_CONTENTS}")] + /// Create a new token for the x86-64-{LEVEL_ID} microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-{LEVEL_ID} is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "{LEVEL_FEATURE_LCD_CONTENTS}" target feature is available. + pub fn new() -> Self { + Self { + /*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_NEW}*/ + } + } +} +/*{FROM_IMPLS}*/ + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; From b184befc9d8361fea2785bd2cc046714382befcc Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Wed, 15 Oct 2025 12:49:28 +0100 Subject: [PATCH 08/19] Fixup the generator and mostly finalize levels --- fearless_simd_core/gen/src/main.rs | 42 +-- fearless_simd_core/gen/templates/x86_level.rs | 2 + fearless_simd_core/src/x86/adx/adx.rs | 5 +- fearless_simd_core/src/x86/avx/avx.rs | 5 +- fearless_simd_core/src/x86/avx/avx2.rs | 5 +- fearless_simd_core/src/x86/avx/avxifma.rs | 5 +- .../src/x86/avx/avxneconvert.rs | 5 +- fearless_simd_core/src/x86/avx/avxvnni.rs | 5 +- .../src/x86/avx/avxvnniint16.rs | 5 +- fearless_simd_core/src/x86/avx/avxvnniint8.rs | 5 +- .../src/x86/avx512/avx512bf16.rs | 5 +- .../src/x86/avx512/avx512bitalg.rs | 5 +- fearless_simd_core/src/x86/avx512/avx512bw.rs | 5 +- fearless_simd_core/src/x86/avx512/avx512cd.rs | 5 +- fearless_simd_core/src/x86/avx512/avx512dq.rs | 5 +- fearless_simd_core/src/x86/avx512/avx512f.rs | 5 +- .../src/x86/avx512/avx512fp16.rs | 5 +- .../src/x86/avx512/avx512ifma.rs | 5 +- .../src/x86/avx512/avx512vbmi.rs | 5 +- .../src/x86/avx512/avx512vbmi2.rs | 5 +- fearless_simd_core/src/x86/avx512/avx512vl.rs | 5 +- .../src/x86/avx512/avx512vnni.rs | 5 +- .../src/x86/avx512/avx512vp2intersect.rs | 5 +- .../src/x86/avx512/avx512vpopcntdq.rs | 5 +- fearless_simd_core/src/x86/crypto/aes.rs | 5 +- fearless_simd_core/src/x86/crypto/gfni.rs | 5 +- fearless_simd_core/src/x86/crypto/kl.rs | 5 +- .../src/x86/crypto/pclmulqdq.rs | 5 +- fearless_simd_core/src/x86/crypto/rdrand.rs | 5 +- fearless_simd_core/src/x86/crypto/rdseed.rs | 5 +- fearless_simd_core/src/x86/crypto/sha.rs | 5 +- fearless_simd_core/src/x86/crypto/sha512.rs | 5 +- fearless_simd_core/src/x86/crypto/sm3.rs | 5 +- fearless_simd_core/src/x86/crypto/sm4.rs | 5 +- fearless_simd_core/src/x86/crypto/vaes.rs | 5 +- .../src/x86/crypto/vpclmulqdq.rs | 5 +- fearless_simd_core/src/x86/crypto/widekl.rs | 5 +- .../src/x86/discontinued/tbm.rs | 5 +- fearless_simd_core/src/x86/mod.rs | 3 + fearless_simd_core/src/x86/sse/fxsr.rs | 5 +- fearless_simd_core/src/x86/sse/sse.rs | 5 +- fearless_simd_core/src/x86/sse/sse2.rs | 5 +- fearless_simd_core/src/x86/sse/sse3.rs | 5 +- fearless_simd_core/src/x86/sse/sse4_1.rs | 5 +- fearless_simd_core/src/x86/sse/sse4_2.rs | 5 +- fearless_simd_core/src/x86/sse/ssse3.rs | 5 +- fearless_simd_core/src/x86/v1/level.rs | 119 +++++++ fearless_simd_core/src/x86/v1/mod.rs | 31 +- fearless_simd_core/src/x86/v2/cmpxchg16b.rs | 5 +- fearless_simd_core/src/x86/v2/level.rs | 179 ++++++++++ fearless_simd_core/src/x86/v2/mod.rs | 12 + fearless_simd_core/src/x86/v2/popcnt.rs | 5 +- fearless_simd_core/src/x86/v3/bmi1.rs | 5 +- fearless_simd_core/src/x86/v3/bmi2.rs | 5 +- fearless_simd_core/src/x86/v3/f16c.rs | 5 +- fearless_simd_core/src/x86/v3/fma.rs | 5 +- fearless_simd_core/src/x86/v3/level.rs | 279 +++++++++++++++ fearless_simd_core/src/x86/v3/lzcnt.rs | 5 +- fearless_simd_core/src/x86/v3/mod.rs | 17 + fearless_simd_core/src/x86/v3/movbe.rs | 5 +- fearless_simd_core/src/x86/v4/level.rs | 325 ++++++++++++++++++ fearless_simd_core/src/x86/v4/mod.rs | 27 ++ fearless_simd_core/src/x86/xsave/mod.rs | 15 + fearless_simd_core/src/x86/xsave/xsave.rs | 5 +- fearless_simd_core/src/x86/xsave/xsavec.rs | 5 +- fearless_simd_core/src/x86/xsave/xsaveopt.rs | 5 +- fearless_simd_core/src/x86/xsave/xsaves.rs | 5 +- 67 files changed, 1220 insertions(+), 106 deletions(-) create mode 100644 fearless_simd_core/src/x86/v1/level.rs create mode 100644 fearless_simd_core/src/x86/v2/level.rs create mode 100644 fearless_simd_core/src/x86/v3/level.rs create mode 100644 fearless_simd_core/src/x86/v4/level.rs create mode 100644 fearless_simd_core/src/x86/v4/mod.rs create mode 100644 fearless_simd_core/src/x86/xsave/mod.rs diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs index fe075fdf..c3ef8b9e 100644 --- a/fearless_simd_core/gen/src/main.rs +++ b/fearless_simd_core/gen/src/main.rs @@ -150,7 +150,7 @@ fn generate_x86_level( let level_struct_name = level.to_uppercase(); // The target_feature(enable = "...") string. - let lcd_contents = lcd.join(", "); + let lcd_contents = lcd.join(","); // The fields of the new struct. let lcd_field_definitions = lcd .iter() @@ -159,11 +159,13 @@ fn generate_x86_level( .iter() .find(|it| it.feature.feature_name == *feature) .unwrap(); - let type_path = format!( - "crate::x86::{}::{}", - normalized.feature.module, normalized.feature.struct_name - ); - format!("{feature}: {type_path},\n") + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); + format!( + "/// The contained proof that {} is available.\n\ + pub {feature}: {type_path},\n", + normalized.feature.feature_docs_name + ) }) .collect::(); // The enabled FEATURES. @@ -180,14 +182,12 @@ fn generate_x86_level( .iter() .find(|it| it.feature.feature_name == *feature) .unwrap(); - let type_path = format!( - "crate::x86::{}::{}", - normalized.feature.module, normalized.feature.struct_name - ); + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); format!("{type_path} = self.{feature}") }) .collect::>() - .join(","); + .join(", "); // The version of the struct initializer in `try_new`. let struct_initializer_try_new = lcd .iter() @@ -196,10 +196,8 @@ fn generate_x86_level( .iter() .find(|it| it.feature.feature_name == *feature) .unwrap(); - let type_path = format!( - "crate::x86::{}::{}", - normalized.feature.module, normalized.feature.struct_name - ); + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); // We rely on rustfmt to get the tab spacing right. format!("\t{feature}: {type_path}::try_new()?,\n") }) @@ -212,10 +210,8 @@ fn generate_x86_level( .iter() .find(|it| it.feature.feature_name == *feature) .unwrap(); - let type_path = format!( - "crate::x86::{}::{}", - normalized.feature.module, normalized.feature.struct_name - ); + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); format!("\t{feature}: {type_path}::new(),\n") }) .collect::(); @@ -226,10 +222,7 @@ fn generate_x86_level( .iter() .find(|it| it.feature.feature_name == *child) .unwrap(); - let type_path = format!( - "crate::x86::{}::{}", - from_feature.feature.module, from_feature.feature.struct_name - ); + let type_path = format!("crate::x86::{level}::{}", from_feature.feature.struct_name); write!( from_impls, "\n\ @@ -249,6 +242,9 @@ impl From for {type_path} {{ ); // We replace the from impls first, as they use template variables from the rest of this. result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace("LEVEL_STRUCT_NAME", &level_struct_name); + result = result.replace("{LEVEL_ID}", level); + result = result.replace("{LEVEL_FEATURE_LCD_CONTENTS}", &lcd_contents); result = result.replace( "/*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/", &lcd_field_definitions, diff --git a/fearless_simd_core/gen/templates/x86_level.rs b/fearless_simd_core/gen/templates/x86_level.rs index be72bf89..c0212411 100644 --- a/fearless_simd_core/gen/templates/x86_level.rs +++ b/fearless_simd_core/gen/templates/x86_level.rs @@ -81,6 +81,8 @@ impl LEVEL_STRUCT_NAME { } } } +// TODO: From impls to convert into lower x86 versions. + /*{FROM_IMPLS}*/ const _: () = { diff --git a/fearless_simd_core/src/x86/adx/adx.rs b/fearless_simd_core/src/x86/adx/adx.rs index b10b6379..10e7b599 100644 --- a/fearless_simd_core/src/x86/adx/adx.rs +++ b/fearless_simd_core/src/x86/adx/adx.rs @@ -37,12 +37,15 @@ impl Debug for Adx { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Adx { const FEATURES: &[&str] = &["adx"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Adx = self] => "adx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avx.rs b/fearless_simd_core/src/x86/avx/avx.rs index baa56e36..36804722 100644 --- a/fearless_simd_core/src/x86/avx/avx.rs +++ b/fearless_simd_core/src/x86/avx/avx.rs @@ -37,12 +37,15 @@ impl Debug for Avx { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx { const FEATURES: &[&str] = &["avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx = self] => "avx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avx2.rs b/fearless_simd_core/src/x86/avx/avx2.rs index b6f252bf..d3fec9f9 100644 --- a/fearless_simd_core/src/x86/avx/avx2.rs +++ b/fearless_simd_core/src/x86/avx/avx2.rs @@ -37,6 +37,8 @@ impl Debug for Avx2 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx2 { const FEATURES: &[&str] = &[ "avx2", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Avx2 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx2 = self] => "avx2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avxifma.rs b/fearless_simd_core/src/x86/avx/avxifma.rs index 12fda758..c6e1964d 100644 --- a/fearless_simd_core/src/x86/avx/avxifma.rs +++ b/fearless_simd_core/src/x86/avx/avxifma.rs @@ -37,6 +37,8 @@ impl Debug for Avxifma { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avxifma { const FEATURES: &[&str] = &[ "avxifma", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Avxifma { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avxifma = self] => "avxifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avxneconvert.rs b/fearless_simd_core/src/x86/avx/avxneconvert.rs index a2adef3c..9f94fc89 100644 --- a/fearless_simd_core/src/x86/avx/avxneconvert.rs +++ b/fearless_simd_core/src/x86/avx/avxneconvert.rs @@ -37,6 +37,8 @@ impl Debug for Avxneconvert { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avxneconvert { const FEATURES: &[&str] = &[ "avxneconvert", @@ -52,7 +54,8 @@ unsafe impl TargetFeatureToken for Avxneconvert { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avxneconvert = self] => "avxneconvert", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avxvnni.rs b/fearless_simd_core/src/x86/avx/avxvnni.rs index 48148a0c..5e10181e 100644 --- a/fearless_simd_core/src/x86/avx/avxvnni.rs +++ b/fearless_simd_core/src/x86/avx/avxvnni.rs @@ -37,6 +37,8 @@ impl Debug for Avxvnni { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avxvnni { const FEATURES: &[&str] = &[ "avxvnni", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Avxvnni { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avxvnni = self] => "avxvnni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avxvnniint16.rs b/fearless_simd_core/src/x86/avx/avxvnniint16.rs index 36b16a41..dab23460 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint16.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint16.rs @@ -37,6 +37,8 @@ impl Debug for Avxvnniint16 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avxvnniint16 { const FEATURES: &[&str] = &[ "avxvnniint16", @@ -52,7 +54,8 @@ unsafe impl TargetFeatureToken for Avxvnniint16 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avxvnniint16 = self] => "avxvnniint16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avxvnniint8.rs b/fearless_simd_core/src/x86/avx/avxvnniint8.rs index 2a0eaf4a..ff887660 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint8.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint8.rs @@ -37,6 +37,8 @@ impl Debug for Avxvnniint8 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avxvnniint8 { const FEATURES: &[&str] = &[ "avxvnniint8", @@ -52,7 +54,8 @@ unsafe impl TargetFeatureToken for Avxvnniint8 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avxvnniint8 = self] => "avxvnniint8", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512bf16.rs b/fearless_simd_core/src/x86/avx512/avx512bf16.rs index 52396972..705252a5 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bf16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bf16.rs @@ -37,6 +37,8 @@ impl Debug for Avx512bf16 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512bf16 { const FEATURES: &[&str] = &[ "avx512bf16", @@ -56,7 +58,8 @@ unsafe impl TargetFeatureToken for Avx512bf16 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512bf16 = self] => "avx512bf16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs index 66bb543a..578fd883 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs @@ -38,6 +38,8 @@ impl Debug for Avx512bitalg { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512bitalg { const FEATURES: &[&str] = &[ "avx512bitalg", @@ -57,7 +59,8 @@ unsafe impl TargetFeatureToken for Avx512bitalg { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512bitalg = self] => "avx512bitalg", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512bw.rs b/fearless_simd_core/src/x86/avx512/avx512bw.rs index 7213b3da..29b05829 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bw.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bw.rs @@ -37,6 +37,8 @@ impl Debug for Avx512bw { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512bw { const FEATURES: &[&str] = &[ "avx512bw", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", @@ -45,7 +47,8 @@ unsafe impl TargetFeatureToken for Avx512bw { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512bw = self] => "avx512bw", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512cd.rs b/fearless_simd_core/src/x86/avx512/avx512cd.rs index b3b9c8c3..e7ed2389 100644 --- a/fearless_simd_core/src/x86/avx512/avx512cd.rs +++ b/fearless_simd_core/src/x86/avx512/avx512cd.rs @@ -37,6 +37,8 @@ impl Debug for Avx512cd { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512cd { const FEATURES: &[&str] = &[ "avx512cd", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", @@ -45,7 +47,8 @@ unsafe impl TargetFeatureToken for Avx512cd { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512cd = self] => "avx512cd", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512dq.rs b/fearless_simd_core/src/x86/avx512/avx512dq.rs index fb6d3670..92b8c87b 100644 --- a/fearless_simd_core/src/x86/avx512/avx512dq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512dq.rs @@ -37,6 +37,8 @@ impl Debug for Avx512dq { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512dq { const FEATURES: &[&str] = &[ "avx512dq", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", @@ -45,7 +47,8 @@ unsafe impl TargetFeatureToken for Avx512dq { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512dq = self] => "avx512dq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512f.rs b/fearless_simd_core/src/x86/avx512/avx512f.rs index fa6adb77..bad51a08 100644 --- a/fearless_simd_core/src/x86/avx512/avx512f.rs +++ b/fearless_simd_core/src/x86/avx512/avx512f.rs @@ -37,6 +37,8 @@ impl Debug for Avx512f { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512f { const FEATURES: &[&str] = &[ "avx512f", "avx", "avx2", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Avx512f { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512f = self] => "avx512f", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512fp16.rs b/fearless_simd_core/src/x86/avx512/avx512fp16.rs index f3ed6089..b3ba12d5 100644 --- a/fearless_simd_core/src/x86/avx512/avx512fp16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512fp16.rs @@ -37,6 +37,8 @@ impl Debug for Avx512fp16 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512fp16 { const FEATURES: &[&str] = &[ "avx512fp16", @@ -56,7 +58,8 @@ unsafe impl TargetFeatureToken for Avx512fp16 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512fp16 = self] => "avx512fp16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512ifma.rs b/fearless_simd_core/src/x86/avx512/avx512ifma.rs index b7ab646c..330e16a8 100644 --- a/fearless_simd_core/src/x86/avx512/avx512ifma.rs +++ b/fearless_simd_core/src/x86/avx512/avx512ifma.rs @@ -37,6 +37,8 @@ impl Debug for Avx512ifma { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512ifma { const FEATURES: &[&str] = &[ "avx512ifma", @@ -55,7 +57,8 @@ unsafe impl TargetFeatureToken for Avx512ifma { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512ifma = self] => "avx512ifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs index 9d0ad4da..2811eb14 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vbmi { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vbmi { const FEATURES: &[&str] = &[ "avx512vbmi", @@ -56,7 +58,8 @@ unsafe impl TargetFeatureToken for Avx512vbmi { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vbmi = self] => "avx512vbmi", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs index cfff6b25..aa209c2d 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vbmi2 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vbmi2 { const FEATURES: &[&str] = &[ "avx512vbmi2", @@ -56,7 +58,8 @@ unsafe impl TargetFeatureToken for Avx512vbmi2 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vbmi2 = self] => "avx512vbmi2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vl.rs b/fearless_simd_core/src/x86/avx512/avx512vl.rs index ddfd7a1c..4089a4df 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vl.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vl.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vl { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vl { const FEATURES: &[&str] = &[ "avx512vl", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", @@ -45,7 +47,8 @@ unsafe impl TargetFeatureToken for Avx512vl { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vl = self] => "avx512vl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vnni.rs b/fearless_simd_core/src/x86/avx512/avx512vnni.rs index 528282d9..9703f9b6 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vnni.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vnni.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vnni { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vnni { const FEATURES: &[&str] = &[ "avx512vnni", @@ -55,7 +57,8 @@ unsafe impl TargetFeatureToken for Avx512vnni { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vnni = self] => "avx512vnni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs index 73344f75..6a3bdd17 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vp2intersect { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vp2intersect { const FEATURES: &[&str] = &[ "avx512vp2intersect", @@ -55,7 +57,8 @@ unsafe impl TargetFeatureToken for Avx512vp2intersect { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vp2intersect = self] => "avx512vp2intersect", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs index 7f96f8a7..ada57947 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vpopcntdq { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vpopcntdq { const FEATURES: &[&str] = &[ "avx512vpopcntdq", @@ -55,7 +57,8 @@ unsafe impl TargetFeatureToken for Avx512vpopcntdq { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vpopcntdq = self] => "avx512vpopcntdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/aes.rs b/fearless_simd_core/src/x86/crypto/aes.rs index af937bef..afe9a2ee 100644 --- a/fearless_simd_core/src/x86/crypto/aes.rs +++ b/fearless_simd_core/src/x86/crypto/aes.rs @@ -37,12 +37,15 @@ impl Debug for Aes { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Aes { const FEATURES: &[&str] = &["aes", "sse", "sse2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Aes = self] => "aes", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/gfni.rs b/fearless_simd_core/src/x86/crypto/gfni.rs index 63c73e81..3e00a923 100644 --- a/fearless_simd_core/src/x86/crypto/gfni.rs +++ b/fearless_simd_core/src/x86/crypto/gfni.rs @@ -37,12 +37,15 @@ impl Debug for Gfni { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Gfni { const FEATURES: &[&str] = &["gfni", "sse", "sse2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Gfni = self] => "gfni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/kl.rs b/fearless_simd_core/src/x86/crypto/kl.rs index 5e5d3d4f..722eb6db 100644 --- a/fearless_simd_core/src/x86/crypto/kl.rs +++ b/fearless_simd_core/src/x86/crypto/kl.rs @@ -37,12 +37,15 @@ impl Debug for Keylocker { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Keylocker { const FEATURES: &[&str] = &["kl"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Keylocker = self] => "kl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs index 56d47f7e..ca80c141 100644 --- a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs @@ -37,12 +37,15 @@ impl Debug for Pclmulqdq { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Pclmulqdq { const FEATURES: &[&str] = &["pclmulqdq", "sse", "sse2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Pclmulqdq = self] => "pclmulqdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/rdrand.rs b/fearless_simd_core/src/x86/crypto/rdrand.rs index 9003251d..46d05c8c 100644 --- a/fearless_simd_core/src/x86/crypto/rdrand.rs +++ b/fearless_simd_core/src/x86/crypto/rdrand.rs @@ -37,12 +37,15 @@ impl Debug for Rdrand { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Rdrand { const FEATURES: &[&str] = &["rdrand"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Rdrand = self] => "rdrand", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/rdseed.rs b/fearless_simd_core/src/x86/crypto/rdseed.rs index 26389a35..a4ba70f3 100644 --- a/fearless_simd_core/src/x86/crypto/rdseed.rs +++ b/fearless_simd_core/src/x86/crypto/rdseed.rs @@ -37,12 +37,15 @@ impl Debug for Rdseed { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Rdseed { const FEATURES: &[&str] = &["rdseed"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Rdseed = self] => "rdseed", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/sha.rs b/fearless_simd_core/src/x86/crypto/sha.rs index 8c53b001..3479ce3d 100644 --- a/fearless_simd_core/src/x86/crypto/sha.rs +++ b/fearless_simd_core/src/x86/crypto/sha.rs @@ -37,12 +37,15 @@ impl Debug for Sha { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sha { const FEATURES: &[&str] = &["sha", "sse", "sse2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sha = self] => "sha", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/sha512.rs b/fearless_simd_core/src/x86/crypto/sha512.rs index 6968d4a4..818ef884 100644 --- a/fearless_simd_core/src/x86/crypto/sha512.rs +++ b/fearless_simd_core/src/x86/crypto/sha512.rs @@ -37,6 +37,8 @@ impl Debug for Sha512 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sha512 { const FEATURES: &[&str] = &[ "sha512", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Sha512 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sha512 = self] => "sha512", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/sm3.rs b/fearless_simd_core/src/x86/crypto/sm3.rs index 3292d72d..fff96832 100644 --- a/fearless_simd_core/src/x86/crypto/sm3.rs +++ b/fearless_simd_core/src/x86/crypto/sm3.rs @@ -37,6 +37,8 @@ impl Debug for Sm3 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sm3 { const FEATURES: &[&str] = &[ "sm3", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Sm3 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sm3 = self] => "sm3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/sm4.rs b/fearless_simd_core/src/x86/crypto/sm4.rs index 81e2db9a..66a48b3d 100644 --- a/fearless_simd_core/src/x86/crypto/sm4.rs +++ b/fearless_simd_core/src/x86/crypto/sm4.rs @@ -37,6 +37,8 @@ impl Debug for Sm4 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sm4 { const FEATURES: &[&str] = &[ "sm4", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Sm4 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sm4 = self] => "sm4", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/vaes.rs b/fearless_simd_core/src/x86/crypto/vaes.rs index fca0a918..16ddb321 100644 --- a/fearless_simd_core/src/x86/crypto/vaes.rs +++ b/fearless_simd_core/src/x86/crypto/vaes.rs @@ -37,6 +37,8 @@ impl Debug for Vaes { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Vaes { const FEATURES: &[&str] = &[ "vaes", "aes", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Vaes { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Vaes = self] => "vaes", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs index d50b93f8..342af9d3 100644 --- a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs @@ -37,6 +37,8 @@ impl Debug for Vpclmulqdq { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Vpclmulqdq { const FEATURES: &[&str] = &[ "vpclmulqdq", @@ -52,7 +54,8 @@ unsafe impl TargetFeatureToken for Vpclmulqdq { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Vpclmulqdq = self] => "vpclmulqdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/widekl.rs b/fearless_simd_core/src/x86/crypto/widekl.rs index ff179dff..a9601bde 100644 --- a/fearless_simd_core/src/x86/crypto/widekl.rs +++ b/fearless_simd_core/src/x86/crypto/widekl.rs @@ -37,12 +37,15 @@ impl Debug for WideKeylocker { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for WideKeylocker { const FEATURES: &[&str] = &["widekl", "kl"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([WideKeylocker = self] => "widekl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/discontinued/tbm.rs b/fearless_simd_core/src/x86/discontinued/tbm.rs index e0d8bb5b..572f756e 100644 --- a/fearless_simd_core/src/x86/discontinued/tbm.rs +++ b/fearless_simd_core/src/x86/discontinued/tbm.rs @@ -37,12 +37,15 @@ impl Debug for Tbm { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Tbm { const FEATURES: &[&str] = &["tbm", ]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Tbm = self] => "tbm", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index ea94a80d..bdea8907 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -18,6 +18,9 @@ pub mod avx512; pub mod crypto; pub mod discontinued; pub mod sse; +pub mod xsave; + pub mod v1; pub mod v2; pub mod v3; +pub mod v4; diff --git a/fearless_simd_core/src/x86/sse/fxsr.rs b/fearless_simd_core/src/x86/sse/fxsr.rs index fbc5493f..a36ec709 100644 --- a/fearless_simd_core/src/x86/sse/fxsr.rs +++ b/fearless_simd_core/src/x86/sse/fxsr.rs @@ -38,12 +38,15 @@ impl Debug for Fxsr { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Fxsr { const FEATURES: &[&str] = &["fxsr"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Fxsr = self] => "fxsr", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/sse.rs b/fearless_simd_core/src/x86/sse/sse.rs index d3473a33..ad020577 100644 --- a/fearless_simd_core/src/x86/sse/sse.rs +++ b/fearless_simd_core/src/x86/sse/sse.rs @@ -37,12 +37,15 @@ impl Debug for Sse { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sse { const FEATURES: &[&str] = &["sse"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/sse2.rs b/fearless_simd_core/src/x86/sse/sse2.rs index 137bf28c..c86ce42f 100644 --- a/fearless_simd_core/src/x86/sse/sse2.rs +++ b/fearless_simd_core/src/x86/sse/sse2.rs @@ -37,12 +37,15 @@ impl Debug for Sse2 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sse2 { const FEATURES: &[&str] = &["sse2", "sse"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sse2 = self] => "sse2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/sse3.rs b/fearless_simd_core/src/x86/sse/sse3.rs index 8c497ec3..27789c9a 100644 --- a/fearless_simd_core/src/x86/sse/sse3.rs +++ b/fearless_simd_core/src/x86/sse/sse3.rs @@ -37,12 +37,15 @@ impl Debug for Sse3 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sse3 { const FEATURES: &[&str] = &["sse3", "sse", "sse2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sse3 = self] => "sse3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/sse4_1.rs b/fearless_simd_core/src/x86/sse/sse4_1.rs index 02ec84f6..3f2b75bc 100644 --- a/fearless_simd_core/src/x86/sse/sse4_1.rs +++ b/fearless_simd_core/src/x86/sse/sse4_1.rs @@ -37,12 +37,15 @@ impl Debug for Sse4_1 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sse4_1 { const FEATURES: &[&str] = &["sse4.1", "sse", "sse2", "sse3", "ssse3"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sse4_1 = self] => "sse4.1", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/sse4_2.rs b/fearless_simd_core/src/x86/sse/sse4_2.rs index cf18d18e..0794c2e1 100644 --- a/fearless_simd_core/src/x86/sse/sse4_2.rs +++ b/fearless_simd_core/src/x86/sse/sse4_2.rs @@ -37,12 +37,15 @@ impl Debug for Sse4_2 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sse4_2 { const FEATURES: &[&str] = &["sse4.2", "sse", "sse2", "sse3", "sse4.1", "ssse3"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sse4_2 = self] => "sse4.2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/ssse3.rs b/fearless_simd_core/src/x86/sse/ssse3.rs index 5ba3e9b5..e5a214b9 100644 --- a/fearless_simd_core/src/x86/sse/ssse3.rs +++ b/fearless_simd_core/src/x86/sse/ssse3.rs @@ -37,12 +37,15 @@ impl Debug for SupplementalSse3 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for SupplementalSse3 { const FEATURES: &[&str] = &["ssse3", "sse", "sse2", "sse3"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([SupplementalSse3 = self] => "ssse3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v1/level.rs b/fearless_simd_core/src/x86/v1/level.rs new file mode 100644 index 00000000..58095f88 --- /dev/null +++ b/fearless_simd_core/src/x86/v1/level.rs @@ -0,0 +1,119 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v1 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v1 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "fxsr,sse2")] +/// fn uses_x86_64_v1() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V1 { + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v1::Fxsr, + /// The contained proof that SSE2 is available. + pub sse2: crate::x86::v1::Sse2, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V1 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v1 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V1 { + const FEATURES: &[&str] = &["fxsr", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v1::Fxsr = self.fxsr, crate::x86::v1::Sse2 = self.sse2] => "fxsr,sse2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V1 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v1 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + fxsr: crate::x86::v1::Fxsr::try_new()?, + sse2: crate::x86::v1::Sse2::try_new()?, + }) + } + + #[target_feature(enable = "fxsr,sse2")] + /// Create a new token for the x86-64-v1 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v1 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "fxsr,sse2" target feature is available. + pub fn new() -> Self { + Self { + fxsr: crate::x86::v1::Fxsr::new(), + sse2: crate::x86::v1::Sse2::new(), + } + } +} + +impl From for crate::x86::v1::Fxsr { + fn from(value: V1) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V1 = value] => "fxsr,sse2", fn() -> crate::x86::v1::Fxsr { crate::x86::v1::Fxsr::new() }) + } +} + +impl From for crate::x86::v1::Sse { + fn from(value: V1) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V1 = value] => "fxsr,sse2", fn() -> crate::x86::v1::Sse { crate::x86::v1::Sse::new() }) + } +} + +impl From for crate::x86::v1::Sse2 { + fn from(value: V1) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V1 = value] => "fxsr,sse2", fn() -> crate::x86::v1::Sse2 { crate::x86::v1::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v1/mod.rs b/fearless_simd_core/src/x86/v1/mod.rs index 140fdd54..1a6309e0 100644 --- a/fearless_simd_core/src/x86/v1/mod.rs +++ b/fearless_simd_core/src/x86/v1/mod.rs @@ -4,32 +4,7 @@ pub use crate::x86::sse::Fxsr; pub use crate::x86::sse::Sse; +pub use crate::x86::sse::Sse2; -/// A token that the current CPU is on the x86-64-v1 microarchitecture level. -// TODO: (This is currently incomplete) -pub struct V1 { - pub sse: Sse, - pub fxsr: Fxsr, -} - -impl V1 { - /// Create a new token if the current CPU is at the x86-64-v1 microarchitecture level or better. - /// - /// This does not do any caching internally, although note that the standard - /// library does internally cache the features it detects. - #[cfg(feature = "std")] - pub fn try_new() -> Option { - // TODO: Caching - Some(Self { - fxsr: Fxsr::try_new()?, - sse: Sse::try_new()?, - }) - } -} - -const _: () = { - assert!( - core::mem::size_of::() == 0, - "Target feature tokens should be zero sized." - ); -}; +mod level; +pub use level::V1; diff --git a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs index b831e349..42415dc4 100644 --- a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs +++ b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs @@ -37,12 +37,15 @@ impl Debug for Cmpxchg16b { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Cmpxchg16b { const FEATURES: &[&str] = &["cmpxchg16b"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Cmpxchg16b = self] => "cmpxchg16b", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v2/level.rs b/fearless_simd_core/src/x86/v2/level.rs new file mode 100644 index 00000000..cc64c315 --- /dev/null +++ b/fearless_simd_core/src/x86/v2/level.rs @@ -0,0 +1,179 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v2 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v2 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "cmpxchg16b,fxsr,popcnt,sse4.2")] +/// fn uses_x86_64_v2() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V2 { + /// The contained proof that `cmpxchg16b` is available. + pub cmpxchg16b: crate::x86::v2::Cmpxchg16b, + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v2::Fxsr, + /// The contained proof that `popcnt` is available. + pub popcnt: crate::x86::v2::Popcnt, + /// The contained proof that SSE4.2 is available. + pub sse4_2: crate::x86::v2::Sse4_2, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v2 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V2 { + const FEATURES: &[&str] = &[ + "cmpxchg16b", + "fxsr", + "popcnt", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v2::Cmpxchg16b = self.cmpxchg16b, crate::x86::v2::Fxsr = self.fxsr, crate::x86::v2::Popcnt = self.popcnt, crate::x86::v2::Sse4_2 = self.sse4_2] => "cmpxchg16b,fxsr,popcnt,sse4.2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V2 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v2 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + cmpxchg16b: crate::x86::v2::Cmpxchg16b::try_new()?, + fxsr: crate::x86::v2::Fxsr::try_new()?, + popcnt: crate::x86::v2::Popcnt::try_new()?, + sse4_2: crate::x86::v2::Sse4_2::try_new()?, + }) + } + + #[target_feature(enable = "cmpxchg16b,fxsr,popcnt,sse4.2")] + /// Create a new token for the x86-64-v2 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "cmpxchg16b,fxsr,popcnt,sse4.2" target feature is available. + pub fn new() -> Self { + Self { + cmpxchg16b: crate::x86::v2::Cmpxchg16b::new(), + fxsr: crate::x86::v2::Fxsr::new(), + popcnt: crate::x86::v2::Popcnt::new(), + sse4_2: crate::x86::v2::Sse4_2::new(), + } + } +} + +impl From for crate::x86::v2::Cmpxchg16b { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Cmpxchg16b { crate::x86::v2::Cmpxchg16b::new() }) + } +} + +impl From for crate::x86::v2::Fxsr { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Fxsr { crate::x86::v2::Fxsr::new() }) + } +} + +impl From for crate::x86::v2::Popcnt { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Popcnt { crate::x86::v2::Popcnt::new() }) + } +} + +impl From for crate::x86::v2::Sse { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse { crate::x86::v2::Sse::new() }) + } +} + +impl From for crate::x86::v2::Sse2 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse2 { crate::x86::v2::Sse2::new() }) + } +} + +impl From for crate::x86::v2::Sse3 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse3 { crate::x86::v2::Sse3::new() }) + } +} + +impl From for crate::x86::v2::Sse4_1 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse4_1 { crate::x86::v2::Sse4_1::new() }) + } +} + +impl From for crate::x86::v2::Sse4_2 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse4_2 { crate::x86::v2::Sse4_2::new() }) + } +} + +impl From for crate::x86::v2::SupplementalSse3 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::SupplementalSse3 { crate::x86::v2::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v2/mod.rs b/fearless_simd_core/src/x86/v2/mod.rs index 197738fe..60c6a651 100644 --- a/fearless_simd_core/src/x86/v2/mod.rs +++ b/fearless_simd_core/src/x86/v2/mod.rs @@ -1,5 +1,17 @@ +pub use crate::x86::sse::Sse3; +pub use crate::x86::sse::Sse4_1; +pub use crate::x86::sse::Sse4_2; +pub use crate::x86::sse::SupplementalSse3; +// TODO: Do we actually want to re-export from the previous level here? +pub use crate::x86::v1::Fxsr; +pub use crate::x86::v1::Sse; +pub use crate::x86::v1::Sse2; + mod cmpxchg16b; pub use cmpxchg16b::Cmpxchg16b; mod popcnt; pub use popcnt::Popcnt; + +mod level; +pub use level::V2; diff --git a/fearless_simd_core/src/x86/v2/popcnt.rs b/fearless_simd_core/src/x86/v2/popcnt.rs index 5e78adce..e5666b6a 100644 --- a/fearless_simd_core/src/x86/v2/popcnt.rs +++ b/fearless_simd_core/src/x86/v2/popcnt.rs @@ -37,12 +37,15 @@ impl Debug for Popcnt { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Popcnt { const FEATURES: &[&str] = &["popcnt"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Popcnt = self] => "popcnt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/bmi1.rs b/fearless_simd_core/src/x86/v3/bmi1.rs index a8cf99d7..92362dbe 100644 --- a/fearless_simd_core/src/x86/v3/bmi1.rs +++ b/fearless_simd_core/src/x86/v3/bmi1.rs @@ -37,12 +37,15 @@ impl Debug for Bmi1 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Bmi1 { const FEATURES: &[&str] = &["bmi1"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Bmi1 = self] => "bmi1", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/bmi2.rs b/fearless_simd_core/src/x86/v3/bmi2.rs index 82502ff8..55b97dcc 100644 --- a/fearless_simd_core/src/x86/v3/bmi2.rs +++ b/fearless_simd_core/src/x86/v3/bmi2.rs @@ -37,12 +37,15 @@ impl Debug for Bmi2 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Bmi2 { const FEATURES: &[&str] = &["bmi2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Bmi2 = self] => "bmi2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/f16c.rs b/fearless_simd_core/src/x86/v3/f16c.rs index a1bc7268..aacb30e9 100644 --- a/fearless_simd_core/src/x86/v3/f16c.rs +++ b/fearless_simd_core/src/x86/v3/f16c.rs @@ -37,6 +37,8 @@ impl Debug for F16c { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for F16c { const FEATURES: &[&str] = &[ "f16c", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for F16c { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([F16c = self] => "f16c", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/fma.rs b/fearless_simd_core/src/x86/v3/fma.rs index dc232d73..e051013b 100644 --- a/fearless_simd_core/src/x86/v3/fma.rs +++ b/fearless_simd_core/src/x86/v3/fma.rs @@ -37,6 +37,8 @@ impl Debug for Fma { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Fma { const FEATURES: &[&str] = &[ "fma", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Fma { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Fma = self] => "fma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/level.rs b/fearless_simd_core/src/x86/v3/level.rs new file mode 100644 index 00000000..53ce8485 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/level.rs @@ -0,0 +1,279 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v3 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v3 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave")] +/// fn uses_x86_64_v3() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V3 { + /// The contained proof that AVX2 is available. + pub avx2: crate::x86::v3::Avx2, + /// The contained proof that 1 is available. + pub bmi1: crate::x86::v3::Bmi1, + /// The contained proof that BMI2 is available. + pub bmi2: crate::x86::v3::Bmi2, + /// The contained proof that `cmpxchg16b` is available. + pub cmpxchg16b: crate::x86::v3::Cmpxchg16b, + /// The contained proof that F16C is available. + pub f16c: crate::x86::v3::F16c, + /// The contained proof that FMA3 is available. + pub fma: crate::x86::v3::Fma, + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v3::Fxsr, + /// The contained proof that `lzcnt` is available. + pub lzcnt: crate::x86::v3::Lzcnt, + /// The contained proof that `movbe` is available. + pub movbe: crate::x86::v3::Movbe, + /// The contained proof that `popcnt` is available. + pub popcnt: crate::x86::v3::Popcnt, + /// The contained proof that `xsave` is available. + pub xsave: crate::x86::v3::Xsave, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v3 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V3 { + const FEATURES: &[&str] = &[ + "avx", + "avx2", + "bmi1", + "bmi2", + "cmpxchg16b", + "f16c", + "fma", + "fxsr", + "lzcnt", + "movbe", + "popcnt", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + "xsave", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v3::Avx2 = self.avx2, crate::x86::v3::Bmi1 = self.bmi1, crate::x86::v3::Bmi2 = self.bmi2, crate::x86::v3::Cmpxchg16b = self.cmpxchg16b, crate::x86::v3::F16c = self.f16c, crate::x86::v3::Fma = self.fma, crate::x86::v3::Fxsr = self.fxsr, crate::x86::v3::Lzcnt = self.lzcnt, crate::x86::v3::Movbe = self.movbe, crate::x86::v3::Popcnt = self.popcnt, crate::x86::v3::Xsave = self.xsave] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V3 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v3 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + avx2: crate::x86::v3::Avx2::try_new()?, + bmi1: crate::x86::v3::Bmi1::try_new()?, + bmi2: crate::x86::v3::Bmi2::try_new()?, + cmpxchg16b: crate::x86::v3::Cmpxchg16b::try_new()?, + f16c: crate::x86::v3::F16c::try_new()?, + fma: crate::x86::v3::Fma::try_new()?, + fxsr: crate::x86::v3::Fxsr::try_new()?, + lzcnt: crate::x86::v3::Lzcnt::try_new()?, + movbe: crate::x86::v3::Movbe::try_new()?, + popcnt: crate::x86::v3::Popcnt::try_new()?, + xsave: crate::x86::v3::Xsave::try_new()?, + }) + } + + #[target_feature(enable = "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave")] + /// Create a new token for the x86-64-v3 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave" target feature is available. + pub fn new() -> Self { + Self { + avx2: crate::x86::v3::Avx2::new(), + bmi1: crate::x86::v3::Bmi1::new(), + bmi2: crate::x86::v3::Bmi2::new(), + cmpxchg16b: crate::x86::v3::Cmpxchg16b::new(), + f16c: crate::x86::v3::F16c::new(), + fma: crate::x86::v3::Fma::new(), + fxsr: crate::x86::v3::Fxsr::new(), + lzcnt: crate::x86::v3::Lzcnt::new(), + movbe: crate::x86::v3::Movbe::new(), + popcnt: crate::x86::v3::Popcnt::new(), + xsave: crate::x86::v3::Xsave::new(), + } + } +} + +impl From for crate::x86::v3::Avx { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Avx { crate::x86::v3::Avx::new() }) + } +} + +impl From for crate::x86::v3::Avx2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Avx2 { crate::x86::v3::Avx2::new() }) + } +} + +impl From for crate::x86::v3::Bmi1 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Bmi1 { crate::x86::v3::Bmi1::new() }) + } +} + +impl From for crate::x86::v3::Bmi2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Bmi2 { crate::x86::v3::Bmi2::new() }) + } +} + +impl From for crate::x86::v3::Cmpxchg16b { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Cmpxchg16b { crate::x86::v3::Cmpxchg16b::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::v3::Fxsr { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Fxsr { crate::x86::v3::Fxsr::new() }) + } +} + +impl From for crate::x86::v3::Lzcnt { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Lzcnt { crate::x86::v3::Lzcnt::new() }) + } +} + +impl From for crate::x86::v3::Movbe { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Movbe { crate::x86::v3::Movbe::new() }) + } +} + +impl From for crate::x86::v3::Popcnt { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Popcnt { crate::x86::v3::Popcnt::new() }) + } +} + +impl From for crate::x86::v3::Sse { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse { crate::x86::v3::Sse::new() }) + } +} + +impl From for crate::x86::v3::Sse2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse2 { crate::x86::v3::Sse2::new() }) + } +} + +impl From for crate::x86::v3::Sse3 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse3 { crate::x86::v3::Sse3::new() }) + } +} + +impl From for crate::x86::v3::Sse4_1 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse4_1 { crate::x86::v3::Sse4_1::new() }) + } +} + +impl From for crate::x86::v3::Sse4_2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse4_2 { crate::x86::v3::Sse4_2::new() }) + } +} + +impl From for crate::x86::v3::SupplementalSse3 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::SupplementalSse3 { crate::x86::v3::SupplementalSse3::new() }) + } +} + +impl From for crate::x86::v3::Xsave { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Xsave { crate::x86::v3::Xsave::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/lzcnt.rs b/fearless_simd_core/src/x86/v3/lzcnt.rs index 01ce658d..c92a0c9f 100644 --- a/fearless_simd_core/src/x86/v3/lzcnt.rs +++ b/fearless_simd_core/src/x86/v3/lzcnt.rs @@ -37,12 +37,15 @@ impl Debug for Lzcnt { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Lzcnt { const FEATURES: &[&str] = &["lzcnt"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Lzcnt = self] => "lzcnt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/mod.rs b/fearless_simd_core/src/x86/v3/mod.rs index 16d577e3..8c158a70 100644 --- a/fearless_simd_core/src/x86/v3/mod.rs +++ b/fearless_simd_core/src/x86/v3/mod.rs @@ -1,3 +1,17 @@ +pub use crate::x86::v1::Fxsr; +pub use crate::x86::v1::Sse; +pub use crate::x86::v1::Sse2; +pub use crate::x86::v2::Cmpxchg16b; +pub use crate::x86::v2::Popcnt; +pub use crate::x86::v2::Sse3; +pub use crate::x86::v2::Sse4_1; +pub use crate::x86::v2::Sse4_2; +pub use crate::x86::v2::SupplementalSse3; + +pub use crate::x86::avx::Avx; +pub use crate::x86::avx::Avx2; +pub use crate::x86::xsave::Xsave; + mod bmi1; pub use bmi1::Bmi1; @@ -15,3 +29,6 @@ pub use lzcnt::Lzcnt; mod movbe; pub use movbe::Movbe; + +mod level; +pub use level::V3; diff --git a/fearless_simd_core/src/x86/v3/movbe.rs b/fearless_simd_core/src/x86/v3/movbe.rs index 6e542dc1..cea0faf9 100644 --- a/fearless_simd_core/src/x86/v3/movbe.rs +++ b/fearless_simd_core/src/x86/v3/movbe.rs @@ -37,12 +37,15 @@ impl Debug for Movbe { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Movbe { const FEATURES: &[&str] = &["movbe"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Movbe = self] => "movbe", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v4/level.rs b/fearless_simd_core/src/x86/v4/level.rs new file mode 100644 index 00000000..96250c58 --- /dev/null +++ b/fearless_simd_core/src/x86/v4/level.rs @@ -0,0 +1,325 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v4 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v4 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave")] +/// fn uses_x86_64_v4() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V4 { + /// The contained proof that AVX512-BW is available. + pub avx512bw: crate::x86::v4::Avx512bw, + /// The contained proof that AVX512-CD is available. + pub avx512cd: crate::x86::v4::Avx512cd, + /// The contained proof that AVX512-DQ is available. + pub avx512dq: crate::x86::v4::Avx512dq, + /// The contained proof that AVX512-VL is available. + pub avx512vl: crate::x86::v4::Avx512vl, + /// The contained proof that 1 is available. + pub bmi1: crate::x86::v4::Bmi1, + /// The contained proof that BMI2 is available. + pub bmi2: crate::x86::v4::Bmi2, + /// The contained proof that `cmpxchg16b` is available. + pub cmpxchg16b: crate::x86::v4::Cmpxchg16b, + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v4::Fxsr, + /// The contained proof that `lzcnt` is available. + pub lzcnt: crate::x86::v4::Lzcnt, + /// The contained proof that `movbe` is available. + pub movbe: crate::x86::v4::Movbe, + /// The contained proof that `popcnt` is available. + pub popcnt: crate::x86::v4::Popcnt, + /// The contained proof that `xsave` is available. + pub xsave: crate::x86::v4::Xsave, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V4 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v4 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V4 { + const FEATURES: &[&str] = &[ + "avx", + "avx2", + "avx512bw", + "avx512cd", + "avx512dq", + "avx512f", + "avx512vl", + "bmi1", + "bmi2", + "cmpxchg16b", + "f16c", + "fma", + "fxsr", + "lzcnt", + "movbe", + "popcnt", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + "xsave", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v4::Avx512bw = self.avx512bw, crate::x86::v4::Avx512cd = self.avx512cd, crate::x86::v4::Avx512dq = self.avx512dq, crate::x86::v4::Avx512vl = self.avx512vl, crate::x86::v4::Bmi1 = self.bmi1, crate::x86::v4::Bmi2 = self.bmi2, crate::x86::v4::Cmpxchg16b = self.cmpxchg16b, crate::x86::v4::Fxsr = self.fxsr, crate::x86::v4::Lzcnt = self.lzcnt, crate::x86::v4::Movbe = self.movbe, crate::x86::v4::Popcnt = self.popcnt, crate::x86::v4::Xsave = self.xsave] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V4 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v4 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + avx512bw: crate::x86::v4::Avx512bw::try_new()?, + avx512cd: crate::x86::v4::Avx512cd::try_new()?, + avx512dq: crate::x86::v4::Avx512dq::try_new()?, + avx512vl: crate::x86::v4::Avx512vl::try_new()?, + bmi1: crate::x86::v4::Bmi1::try_new()?, + bmi2: crate::x86::v4::Bmi2::try_new()?, + cmpxchg16b: crate::x86::v4::Cmpxchg16b::try_new()?, + fxsr: crate::x86::v4::Fxsr::try_new()?, + lzcnt: crate::x86::v4::Lzcnt::try_new()?, + movbe: crate::x86::v4::Movbe::try_new()?, + popcnt: crate::x86::v4::Popcnt::try_new()?, + xsave: crate::x86::v4::Xsave::try_new()?, + }) + } + + #[target_feature( + enable = "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave" + )] + /// Create a new token for the x86-64-v4 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v4 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave" target feature is available. + pub fn new() -> Self { + Self { + avx512bw: crate::x86::v4::Avx512bw::new(), + avx512cd: crate::x86::v4::Avx512cd::new(), + avx512dq: crate::x86::v4::Avx512dq::new(), + avx512vl: crate::x86::v4::Avx512vl::new(), + bmi1: crate::x86::v4::Bmi1::new(), + bmi2: crate::x86::v4::Bmi2::new(), + cmpxchg16b: crate::x86::v4::Cmpxchg16b::new(), + fxsr: crate::x86::v4::Fxsr::new(), + lzcnt: crate::x86::v4::Lzcnt::new(), + movbe: crate::x86::v4::Movbe::new(), + popcnt: crate::x86::v4::Popcnt::new(), + xsave: crate::x86::v4::Xsave::new(), + } + } +} + +impl From for crate::x86::v4::Avx { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx { crate::x86::v4::Avx::new() }) + } +} + +impl From for crate::x86::v4::Avx2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx2 { crate::x86::v4::Avx2::new() }) + } +} + +impl From for crate::x86::v4::Avx512bw { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512bw { crate::x86::v4::Avx512bw::new() }) + } +} + +impl From for crate::x86::v4::Avx512cd { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512cd { crate::x86::v4::Avx512cd::new() }) + } +} + +impl From for crate::x86::v4::Avx512dq { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512dq { crate::x86::v4::Avx512dq::new() }) + } +} + +impl From for crate::x86::v4::Avx512f { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512f { crate::x86::v4::Avx512f::new() }) + } +} + +impl From for crate::x86::v4::Avx512vl { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512vl { crate::x86::v4::Avx512vl::new() }) + } +} + +impl From for crate::x86::v4::Bmi1 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Bmi1 { crate::x86::v4::Bmi1::new() }) + } +} + +impl From for crate::x86::v4::Bmi2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Bmi2 { crate::x86::v4::Bmi2::new() }) + } +} + +impl From for crate::x86::v4::Cmpxchg16b { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Cmpxchg16b { crate::x86::v4::Cmpxchg16b::new() }) + } +} + +impl From for crate::x86::v4::F16c { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::F16c { crate::x86::v4::F16c::new() }) + } +} + +impl From for crate::x86::v4::Fma { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Fma { crate::x86::v4::Fma::new() }) + } +} + +impl From for crate::x86::v4::Fxsr { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Fxsr { crate::x86::v4::Fxsr::new() }) + } +} + +impl From for crate::x86::v4::Lzcnt { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Lzcnt { crate::x86::v4::Lzcnt::new() }) + } +} + +impl From for crate::x86::v4::Movbe { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Movbe { crate::x86::v4::Movbe::new() }) + } +} + +impl From for crate::x86::v4::Popcnt { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Popcnt { crate::x86::v4::Popcnt::new() }) + } +} + +impl From for crate::x86::v4::Sse { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse { crate::x86::v4::Sse::new() }) + } +} + +impl From for crate::x86::v4::Sse2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse2 { crate::x86::v4::Sse2::new() }) + } +} + +impl From for crate::x86::v4::Sse3 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse3 { crate::x86::v4::Sse3::new() }) + } +} + +impl From for crate::x86::v4::Sse4_1 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse4_1 { crate::x86::v4::Sse4_1::new() }) + } +} + +impl From for crate::x86::v4::Sse4_2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse4_2 { crate::x86::v4::Sse4_2::new() }) + } +} + +impl From for crate::x86::v4::SupplementalSse3 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::SupplementalSse3 { crate::x86::v4::SupplementalSse3::new() }) + } +} + +impl From for crate::x86::v4::Xsave { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Xsave { crate::x86::v4::Xsave::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v4/mod.rs b/fearless_simd_core/src/x86/v4/mod.rs new file mode 100644 index 00000000..e401ecb2 --- /dev/null +++ b/fearless_simd_core/src/x86/v4/mod.rs @@ -0,0 +1,27 @@ +pub use crate::x86::v1::Fxsr; +pub use crate::x86::v1::Sse; +pub use crate::x86::v1::Sse2; +pub use crate::x86::v2::Cmpxchg16b; +pub use crate::x86::v2::Popcnt; +pub use crate::x86::v2::Sse3; +pub use crate::x86::v2::Sse4_1; +pub use crate::x86::v2::Sse4_2; +pub use crate::x86::v2::SupplementalSse3; +pub use crate::x86::v3::Avx; +pub use crate::x86::v3::Avx2; +pub use crate::x86::v3::Bmi1; +pub use crate::x86::v3::Bmi2; +pub use crate::x86::v3::F16c; +pub use crate::x86::v3::Fma; +pub use crate::x86::v3::Lzcnt; +pub use crate::x86::v3::Movbe; +pub use crate::x86::v3::Xsave; + +pub use crate::x86::avx512::Avx512bw; +pub use crate::x86::avx512::Avx512cd; +pub use crate::x86::avx512::Avx512dq; +pub use crate::x86::avx512::Avx512f; +pub use crate::x86::avx512::Avx512vl; + +mod level; +pub use level::V4; diff --git a/fearless_simd_core/src/x86/xsave/mod.rs b/fearless_simd_core/src/x86/xsave/mod.rs new file mode 100644 index 00000000..aee24a74 --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/mod.rs @@ -0,0 +1,15 @@ +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod xsave; +pub use xsave::Xsave; + +mod xsavec; +pub use xsavec::Xsavec; + +mod xsaveopt; +pub use xsaveopt::Xsaveopt; + +pub use xsaves::Xsaves; +mod xsaves; diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs index 9d222867..3c57c87e 100644 --- a/fearless_simd_core/src/x86/xsave/xsave.rs +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -37,12 +37,15 @@ impl Debug for Xsave { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Xsave { const FEATURES: &[&str] = &["xsave", ]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Xsave = self] => "xsave", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs index 6d11df47..b7e5393a 100644 --- a/fearless_simd_core/src/x86/xsave/xsavec.rs +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -37,12 +37,15 @@ impl Debug for Xsavec { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Xsavec { const FEATURES: &[&str] = &["xsavec", ]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Xsavec = self] => "xsavec", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs index b63d444b..8efabdca 100644 --- a/fearless_simd_core/src/x86/xsave/xsaveopt.rs +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -37,12 +37,15 @@ impl Debug for Xsaveopt { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Xsaveopt { const FEATURES: &[&str] = &["xsaveopt", ]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Xsaveopt = self] => "xsaveopt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs index 60633bef..1a49e04c 100644 --- a/fearless_simd_core/src/x86/xsave/xsaves.rs +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -37,12 +37,15 @@ impl Debug for Xsaves { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Xsaves { const FEATURES: &[&str] = &["xsaves", ]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Xsaves = self] => "xsaves", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } From b40ee761e58c1ea762a434f2195012c7819345d1 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Wed, 15 Oct 2025 13:22:59 +0100 Subject: [PATCH 09/19] Re-run update --- fearless_simd_core/src/x86/v1/level.rs | 1 + fearless_simd_core/src/x86/v2/level.rs | 1 + fearless_simd_core/src/x86/v3/level.rs | 1 + fearless_simd_core/src/x86/v4/level.rs | 1 + fearless_simd_core/src/x86/xsave/xsave.rs | 3 +-- fearless_simd_core/src/x86/xsave/xsavec.rs | 3 +-- fearless_simd_core/src/x86/xsave/xsaveopt.rs | 3 +-- fearless_simd_core/src/x86/xsave/xsaves.rs | 3 +-- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fearless_simd_core/src/x86/v1/level.rs b/fearless_simd_core/src/x86/v1/level.rs index 58095f88..86fc6141 100644 --- a/fearless_simd_core/src/x86/v1/level.rs +++ b/fearless_simd_core/src/x86/v1/level.rs @@ -89,6 +89,7 @@ impl V1 { } } } +// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v1::Fxsr { fn from(value: V1) -> Self { diff --git a/fearless_simd_core/src/x86/v2/level.rs b/fearless_simd_core/src/x86/v2/level.rs index cc64c315..5f01a232 100644 --- a/fearless_simd_core/src/x86/v2/level.rs +++ b/fearless_simd_core/src/x86/v2/level.rs @@ -107,6 +107,7 @@ impl V2 { } } } +// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v2::Cmpxchg16b { fn from(value: V2) -> Self { diff --git a/fearless_simd_core/src/x86/v3/level.rs b/fearless_simd_core/src/x86/v3/level.rs index 53ce8485..ad607a4f 100644 --- a/fearless_simd_core/src/x86/v3/level.rs +++ b/fearless_simd_core/src/x86/v3/level.rs @@ -144,6 +144,7 @@ impl V3 { } } } +// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v3::Avx { fn from(value: V3) -> Self { diff --git a/fearless_simd_core/src/x86/v4/level.rs b/fearless_simd_core/src/x86/v4/level.rs index 96250c58..b3acc6f1 100644 --- a/fearless_simd_core/src/x86/v4/level.rs +++ b/fearless_simd_core/src/x86/v4/level.rs @@ -155,6 +155,7 @@ impl V4 { } } } +// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v4::Avx { fn from(value: V4) -> Self { diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs index 3c57c87e..19c8f2f5 100644 --- a/fearless_simd_core/src/x86/xsave/xsave.rs +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -40,7 +40,7 @@ impl Debug for Xsave { // Safety: This token can only be constructed if you have proof that all the requisite // target feature is enabled. unsafe impl TargetFeatureToken for Xsave { - const FEATURES: &[&str] = &["xsave", ]; + const FEATURES: &[&str] = &["xsave"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { @@ -87,7 +87,6 @@ impl Xsave { } } - const _: () = { assert!( core::mem::size_of::() == 0, diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs index b7e5393a..df8033d1 100644 --- a/fearless_simd_core/src/x86/xsave/xsavec.rs +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -40,7 +40,7 @@ impl Debug for Xsavec { // Safety: This token can only be constructed if you have proof that all the requisite // target feature is enabled. unsafe impl TargetFeatureToken for Xsavec { - const FEATURES: &[&str] = &["xsavec", ]; + const FEATURES: &[&str] = &["xsavec"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { @@ -87,7 +87,6 @@ impl Xsavec { } } - const _: () = { assert!( core::mem::size_of::() == 0, diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs index 8efabdca..563c26c5 100644 --- a/fearless_simd_core/src/x86/xsave/xsaveopt.rs +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -40,7 +40,7 @@ impl Debug for Xsaveopt { // Safety: This token can only be constructed if you have proof that all the requisite // target feature is enabled. unsafe impl TargetFeatureToken for Xsaveopt { - const FEATURES: &[&str] = &["xsaveopt", ]; + const FEATURES: &[&str] = &["xsaveopt"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { @@ -87,7 +87,6 @@ impl Xsaveopt { } } - const _: () = { assert!( core::mem::size_of::() == 0, diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs index 1a49e04c..8f365d8c 100644 --- a/fearless_simd_core/src/x86/xsave/xsaves.rs +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -40,7 +40,7 @@ impl Debug for Xsaves { // Safety: This token can only be constructed if you have proof that all the requisite // target feature is enabled. unsafe impl TargetFeatureToken for Xsaves { - const FEATURES: &[&str] = &["xsaves", ]; + const FEATURES: &[&str] = &["xsaves"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { @@ -87,7 +87,6 @@ impl Xsaves { } } - const _: () = { assert!( core::mem::size_of::() == 0, From 7f52b7bc9d27c133ccd885bc323b830d9743dbe1 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 10:16:09 +0100 Subject: [PATCH 10/19] Bump MSRV to allow avx512 support --- .github/workflows/ci.yml | 4 ++-- CHANGELOG.md | 2 +- Cargo.toml | 2 +- README.md | 2 +- fearless_simd/README.md | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a6a3d50f..ffeb4b90 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,12 +3,12 @@ env: # version like 1.70. Note that we only specify MAJOR.MINOR and not PATCH so that bugfixes still # come automatically. If the version specified here is no longer the latest stable version, # then please feel free to submit a PR that adjusts it along with the potential clippy fixes. - RUST_STABLE_VER: "1.88" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7 + RUST_STABLE_VER: "1.90" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7 # The purpose of checking with the minimum supported Rust toolchain is to detect its staleness. # If the compilation fails, then the version specified here needs to be bumped up to reality. # Be sure to also update the rust-version property in the workspace Cargo.toml file, # plus all the README.md files of the affected packages. - RUST_MIN_VER: "1.88" + RUST_MIN_VER: "1.89" # List of packages that will be checked with the minimum supported Rust version. # This should be limited to packages that are intended for publishing. RUST_MIN_VER_PKGS: "-p fearless_simd" diff --git a/CHANGELOG.md b/CHANGELOG.md index 14c1fb20..db6d6a96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ You can find its changes [documented below](#030-2025-10-14). ## [Unreleased] -This release has an [MSRV][] of 1.88. +This release has an [MSRV][] of 1.89. ### Added diff --git a/Cargo.toml b/Cargo.toml index e84d0a2e..b1f1ab45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ license = "Apache-2.0 OR MIT" repository = "https://github.com/linebender/fearless_simd" # Keep in sync with RUST_MIN_VER in .github/workflows/ci.yml, with the relevant README.md files # and with the MSRV in the `Unreleased` section of CHANGELOG.md. -rust-version = "1.88" +rust-version = "1.89" [workspace.lints] diff --git a/README.md b/README.md index 7c3d95fe..4749d6f9 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ It benefited from conversations with Luca Versari, though he is not responsible ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/fearless_simd/README.md b/fearless_simd/README.md index 160ce842..953e4827 100644 --- a/fearless_simd/README.md +++ b/fearless_simd/README.md @@ -139,7 +139,7 @@ At least one of `std` and `libm` is required; `std` overrides `libm`. ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. From 6da5c54e17c3c4a5312509fbbb9b41088d8fe1f5 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:04:55 +0100 Subject: [PATCH 11/19] Misc cleanups to get ready to launch --- .clippy.toml | 2 + .github/workflows/ci.yml | 2 +- Cargo.lock | 4 +- fearless_simd_core/Cargo.toml | 5 +- fearless_simd_core/LICENSE-APACHE | 176 +++++++++++++++++++++++++ fearless_simd_core/LICENSE-MIT | 25 ++++ fearless_simd_core/README.md | 105 +++++++++++++++ fearless_simd_core/gen/Cargo.toml | 9 +- fearless_simd_core/gen/src/data/x86.rs | 6 +- fearless_simd_core/src/lib.rs | 53 +++++++- fearless_simd_core/src/x86/mod.rs | 5 + fearless_simd_gen/src/mk_simd_types.rs | 45 ++++--- 12 files changed, 404 insertions(+), 33 deletions(-) create mode 100644 fearless_simd_core/LICENSE-APACHE create mode 100644 fearless_simd_core/LICENSE-MIT create mode 100644 fearless_simd_core/README.md diff --git a/.clippy.toml b/.clippy.toml index 4781d68c..89821835 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -8,3 +8,5 @@ trivial-copy-size-limit = 16 # END LINEBENDER LINT SET + +doc-valid-idents = ["ShangMi", ".."] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ffeb4b90..5c1919d0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ env: RUST_MIN_VER: "1.89" # List of packages that will be checked with the minimum supported Rust version. # This should be limited to packages that are intended for publishing. - RUST_MIN_VER_PKGS: "-p fearless_simd" + RUST_MIN_VER_PKGS: "-p fearless_simd -p fearless_simd_core" # List of features that depend on the standard library and will be excluded from no_std checks. FEATURES_DEPENDING_ON_STD: "std,default" # List of packages that can not target Wasm. diff --git a/Cargo.lock b/Cargo.lock index 672a0913..ce10218f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -135,14 +135,14 @@ dependencies = [ [[package]] name = "fearless_simd_core" -version = "0.1.0" +version = "0.3.0" dependencies = [ "bytemuck", ] [[package]] name = "fearless_simd_core_gen" -version = "0.1.0" +version = "0.0.0" [[package]] name = "fearless_simd_dev_macros" diff --git a/fearless_simd_core/Cargo.toml b/fearless_simd_core/Cargo.toml index e16a9823..1342c218 100644 --- a/fearless_simd_core/Cargo.toml +++ b/fearless_simd_core/Cargo.toml @@ -1,6 +1,9 @@ [package] name = "fearless_simd_core" -version = "0.1.0" +version = "0.3.0" +description = "Safely run custom #[target_feature] functions" +keywords = ["simd", "target_feature"] +categories = ["hardware-support"] edition.workspace = true license.workspace = true repository.workspace = true diff --git a/fearless_simd_core/LICENSE-APACHE b/fearless_simd_core/LICENSE-APACHE new file mode 100644 index 00000000..d9a10c0d --- /dev/null +++ b/fearless_simd_core/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/fearless_simd_core/LICENSE-MIT b/fearless_simd_core/LICENSE-MIT new file mode 100644 index 00000000..f3d84348 --- /dev/null +++ b/fearless_simd_core/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2018 Raph Levien + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/fearless_simd_core/README.md b/fearless_simd_core/README.md new file mode 100644 index 00000000..45052d28 --- /dev/null +++ b/fearless_simd_core/README.md @@ -0,0 +1,105 @@ +
+ +# Fearless SIMD Core + +**Target Features in Rust's type system** + +[![Latest published version.](https://img.shields.io/crates/v/fearless_simd.svg)](https://crates.io/crates/fearless_simd) +[![Documentation build status.](https://img.shields.io/docsrs/fearless_simd.svg)](https://docs.rs/fearless_simd) +[![Apache 2.0 or MIT license.](https://img.shields.io/badge/license-Apache--2.0_OR_MIT-blue.svg)](#license) +\ +[![Linebender Zulip, #simd channel.](https://img.shields.io/badge/Linebender-%23simd-blue?logo=Zulip)](https://xi.zulipchat.com/#narrow/channel/514230-simd) +[![GitHub Actions CI status.](https://img.shields.io/github/actions/workflow/status/linebender/fearless_simd/ci.yml?logo=github&label=CI)](https://github.com/linebender/fearless_simd/actions) +[![Dependency staleness status.](https://deps.rs/crate/fearless_simd/latest/status.svg)](https://deps.rs/crate/fearless_simd/) + +
+ + + + + + + +An abstraction to allow safely running custom `#[target_feature]` functions on stable Rust. + +This crate introduces the [`trampoline!`] macro, which allows running code in a +statically validated `#[target_feature(enable="some_features")]` environment, based on +externally provided tokens. +This abstraction is designed to be combined with target features 1.1, the recent update +in the Rust compiler to allow calling `#[target_feature]` functions safely from within +other `#[target_feature]` functions. +As such, once you have used the [`trampoline!`] macro, you can call any intrinsic in [`core::arch`]. + +This crate also has modules which contain tokens for each Rust target features. +These allow safely validating that a target feature is available, and obtaining a token. +These are grouped by architecture: + +- [`x86`] contains the tokens for both the x86 and x86-64 targets. + It also contains tokens for each x86-64 microarchitecture level, see [`x86::V1`] for details. + + +# Examples + +At the time of writing, it is not possible to turn scalar values into SIMD vector types safely using +only the standard library. +These examples use [bytemuck](https://crates.io/crates/bytemuck) for this. + + + +Note that for `aarch64`'s neon, you will want to enable bytemuck's `aarch64_simd` feature. +This is also the case for WASM with `wasm_simd`, but note that this crate +[isn't needed on WASM][attributes.codegen.target_feature.wasm], as it is safe to +call `#[target_features]` on that platform. + +# Crate Feature Flags + + + +# Implementation + +The tokens provided to [`trampoline!`] implement the [`TargetFeatureToken`] trait, +which indicates that a value of that token is only possible to construct if the set +of target features it specifies are enabled. +This means that the macro can use the existence of these token values as +safety proofs that calling a function with those target features is safe. + +This safety proof happens entirely in const evaluation, so if there's a mistake with the +proof, it will cause a compilation error. +The code generated by this macro is thus a function containing the provided code, marked +with `#[target_feature]`, and a call to this newly generated function. + +[attributes.codegen.target_feature.wasm]: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.wasm + + + +## Minimum supported Rust Version (MSRV) + +This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. + +Future versions of Fearless SIMD might increase the Rust version requirement. +It will not be treated as a breaking change and as such can even happen with small patch releases. + +## Community + +[![Linebender Zulip, #simd channel.](https://img.shields.io/badge/Linebender-%23simd-blue?logo=Zulip)](https://xi.zulipchat.com/#narrow/channel/514230-simd) + +Discussion of Fearless SIMD development happens in the [Linebender Zulip](https://xi.zulipchat.com/), specifically in [#simd](https://xi.zulipchat.com/#narrow/channel/514230-simd). +All public content can be read without logging in. + +Contributions are welcome by pull request. +The [Rust code of conduct] applies. + +## License + +Licensed under either of + +- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or ) +- MIT license ([LICENSE-MIT](LICENSE-MIT) or ) + +at your option. + +[Rust Code of Conduct]: https://www.rust-lang.org/policies/code-of-conduct diff --git a/fearless_simd_core/gen/Cargo.toml b/fearless_simd_core/gen/Cargo.toml index 5617f7be..65644e6e 100644 --- a/fearless_simd_core/gen/Cargo.toml +++ b/fearless_simd_core/gen/Cargo.toml @@ -1,6 +1,11 @@ [package] name = "fearless_simd_core_gen" -version = "0.1.0" -edition = "2024" +description = "Internal code generator for the Fearless SIMD Core crate." +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +publish = false [dependencies] diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index e8c5f85c..b6719c22 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -431,7 +431,7 @@ pub(crate) const X86_LEVEL_TEMPLATE: &str = include_str!("../../templates/x86_le // The first four are all assumed, and the final is not implemented because // it's practically impossible to use correctly (and there's no reason to). pub(crate) const X86_V1: &[&str] = &["fxsr", "sse", "sse2"]; -/// The target features required in the x86-64-v1 level, in addition to those already in [`V1`]. +/// The target features required in the x86-64-v1 level, in addition to those already in [`X86_V1`]. pub(crate) const X86_V2: &[&str] = &[ "sse3", "ssse3", @@ -442,9 +442,9 @@ pub(crate) const X86_V2: &[&str] = &[ // The lahfahf target feature is currently in Rust beta. // "lahfsahf", ]; -/// The target features required in the x86-64-v3 level, excluding those already in [`V2`]. +/// The target features required in the x86-64-v3 level, excluding those already in [`X86_V2`]. pub(crate) const X86_V3: &[&str] = &[ "avx", "avx2", "bmi1", "bmi2", "f16c", "fma", "lzcnt", "movbe", "xsave", ]; -/// The target features required in the x86-64-v4 level, excluding those already in [`V3`]. +/// The target features required in the x86-64-v4 level, excluding those already in [`X86_V3`]. pub(crate) const X86_V4: &[&str] = &["avx512f", "avx512bw", "avx512cd", "avx512dq", "avx512vl"]; diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index 75bc9acb..67ce0e0a 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -1,4 +1,52 @@ -//! Tooling for Rust's target features. +//! An abstraction to allow safely running custom `#[target_feature]` functions on stable Rust. +//! +//! This crate introduces the [`trampoline!`] macro, which allows running code in a +//! statically validated `#[target_feature(enable="some_features")]` environment, based on +//! externally provided tokens. +//! This abstraction is designed to be combined with target features 1.1, the recent update +//! in the Rust compiler to allow calling `#[target_feature]` functions safely from within +//! other `#[target_feature]` functions. +//! As such, once you have used the [`trampoline!`] macro, you can call any intrinsic in [`core::arch`]. +//! +//! This crate also has modules which contain tokens for each Rust target features. +//! These allow safely validating that a target feature is available, and obtaining a token. +//! These are grouped by architecture: +//! +//! - [`x86`] contains the tokens for both the x86 and x86-64 targets. +//! It also contains tokens for each x86-64 microarchitecture level, see [`x86::V1`] for details. +//! +//! +//! # Examples +//! +//! At the time of writing, it is not possible to turn scalar values into SIMD vector types safely using +//! only the standard library. +//! These examples use [bytemuck](https://crates.io/crates/bytemuck) for this. +//! +//! +//! +//! Note that for `aarch64`'s neon, you will want to enable bytemuck's `aarch64_simd` feature. +//! This is also the case for WASM with `wasm_simd`, but note that this crate +//! [isn't needed on WASM][attributes.codegen.target_feature.wasm], as it is safe to +//! call `#[target_features]` on that platform. +//! +//! # Crate Feature Flags +//! +//! +//! +//! # Implementation +//! +//! The tokens provided to [`trampoline!`] implement the [`TargetFeatureToken`] trait, +//! which indicates that a value of that token is only possible to construct if the set +//! of target features it specifies are enabled. +//! This means that the macro can use the existence of these token values as +//! safety proofs that calling a function with those target features is safe. +//! +//! This safety proof happens entirely in const evaluation, so if there's a mistake with the +//! proof, it will cause a compilation error. +//! The code generated by this macro is thus a function containing the provided code, marked +//! with `#[target_feature]`, and a call to this newly generated function. +//! +//! [attributes.codegen.target_feature.wasm]: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.wasm // LINEBENDER LINT SET - lib.rs - v4 // See https://linebender.org/wiki/canonical-lints/ @@ -229,6 +277,9 @@ macro_rules! trampoline { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(test)] mod example_expansion { + #[cfg(target_arch = "x86")] + use core::arch::x86::{__m128, _mm_mul_ps}; + #[cfg(target_arch = "x86_64")] use core::arch::x86_64::{__m128, _mm_mul_ps}; use crate::x86::{self, v1::Sse}; diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index bdea8907..77ec914b 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -24,3 +24,8 @@ pub mod v1; pub mod v2; pub mod v3; pub mod v4; + +pub use v1::V1; +pub use v2::V2; +pub use v3::V3; +pub use v4::V4; diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index b1935285..ee71e374 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -207,29 +207,28 @@ fn simd_impl(ty: &VecType) -> TokenStream { | OpSig::Cvt(_, _) | OpSig::Reinterpret(_, _) | OpSig::Shift - ) { - if let Some(args) = sig.vec_trait_args() { - let ret_ty = sig.ret_ty(ty, TyFlavor::VecImpl); - let call_args = match sig { - OpSig::Unary | OpSig::Cvt(_, _) | OpSig::Reinterpret(_, _) => quote! { self }, - OpSig::Binary | OpSig::Compare | OpSig::Combine => { - quote! { self, rhs.simd_into(self.simd) } - } - OpSig::Shift => { - quote! { self, shift } - } - OpSig::Ternary => { - quote! { self, op1.simd_into(self.simd), op2.simd_into(self.simd) } - } - _ => quote! { todo!() }, - }; - methods.push(quote! { - #[inline(always)] - pub fn #method_name(#args) -> #ret_ty { - self.simd.#trait_method(#call_args) - } - }); - } + ) && let Some(args) = sig.vec_trait_args() + { + let ret_ty = sig.ret_ty(ty, TyFlavor::VecImpl); + let call_args = match sig { + OpSig::Unary | OpSig::Cvt(_, _) | OpSig::Reinterpret(_, _) => quote! { self }, + OpSig::Binary | OpSig::Compare | OpSig::Combine => { + quote! { self, rhs.simd_into(self.simd) } + } + OpSig::Shift => { + quote! { self, shift } + } + OpSig::Ternary => { + quote! { self, op1.simd_into(self.simd), op2.simd_into(self.simd) } + } + _ => quote! { todo!() }, + }; + methods.push(quote! { + #[inline(always)] + pub fn #method_name(#args) -> #ret_ty { + self.simd.#trait_method(#call_args) + } + }); } } let vec_impl = simd_vec_impl(ty); From 4b1ae94870afb7c916f342cf87dd726d07604737 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:23:49 +0100 Subject: [PATCH 12/19] Handle sse4a and tbm consistently --- Cargo.toml | 1 + fearless_simd_core/gen/src/data/x86.rs | 15 ++- .../src/x86/discontinued/mod.rs | 2 +- fearless_simd_core/src/x86/sse/mod.rs | 4 + fearless_simd_core/src/x86/sse/sse4a.rs | 116 ++++++++++++++++++ 5 files changed, 129 insertions(+), 9 deletions(-) create mode 100644 fearless_simd_core/src/x86/sse/sse4a.rs diff --git a/Cargo.toml b/Cargo.toml index b1f1ab45..eb1d3283 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ license = "Apache-2.0 OR MIT" repository = "https://github.com/linebender/fearless_simd" # Keep in sync with RUST_MIN_VER in .github/workflows/ci.yml, with the relevant README.md files # and with the MSRV in the `Unreleased` section of CHANGELOG.md. +# When increasing past 1.91, also uncomment the `discontinued::tbm` and `sse::sse4a` modules/imports in Fearless SIMD Core. rust-version = "1.89" [workspace.lints] diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index b6719c22..6740063d 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -349,14 +349,13 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ struct sse::Sse4_2("SSE4.2"): "sse4.2" + ["sse4.1"] fn uses_sse4 ), - // // TODO: This only exists from 1.91 and above (current beta) - // f!( - // /// [SSE4a] --- StreamingSIMDExtensions 4a - - // /// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a - // struct Sse4a("SSE4a"): "sse4a" + ["sse3"] - // fn uses_sse4a - // ), + f!( + /// [SSE4a] --- StreamingSIMDExtensions 4a + /// + /// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a + struct sse::Sse4a("SSE4a"): "sse4a" + ["sse3"] + fn uses_sse4a + ), f!( /// [SSSE3] --- Supplemental StreamingSIMDExtensions 3 /// diff --git a/fearless_simd_core/src/x86/discontinued/mod.rs b/fearless_simd_core/src/x86/discontinued/mod.rs index 9afa91dc..7869229d 100644 --- a/fearless_simd_core/src/x86/discontinued/mod.rs +++ b/fearless_simd_core/src/x86/discontinued/mod.rs @@ -1,3 +1,3 @@ -// Stable in beta, but not current stable +// These will be stabilised in 1.91. // mod tbm; // pub use tbm::Tbm; diff --git a/fearless_simd_core/src/x86/sse/mod.rs b/fearless_simd_core/src/x86/sse/mod.rs index ce9a3aec..0470258b 100644 --- a/fearless_simd_core/src/x86/sse/mod.rs +++ b/fearless_simd_core/src/x86/sse/mod.rs @@ -17,6 +17,10 @@ pub use sse3::Sse3; mod ssse3; pub use ssse3::SupplementalSse3; +// These will be stabilised in 1.91. +// mod sse4a; +// pub use sse4a::Sse4a; + mod sse4_1; pub use sse4_1::Sse4_1; diff --git a/fearless_simd_core/src/x86/sse/sse4a.rs b/fearless_simd_core/src/x86/sse/sse4a.rs new file mode 100644 index 00000000..b0475fad --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse4a.rs @@ -0,0 +1,116 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE4a target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE4a] --- StreamingSIMDExtensions 4a +/// +/// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a +/// +/// A token indicating that the current CPU has the `sse4a` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse4a")] +/// fn uses_sse4a() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse4a { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse4a { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse4a" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sse4a { + const FEATURES: &[&str] = &["sse4a", "sse", "sse2", "sse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sse4a = self] => "sse4a", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse4a { + #[cfg(feature = "std")] + /// Create a new token if the `"sse4a"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse4a") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse4a")] + /// Create a new token for the "sse4a" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE4a is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse4a" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse4a) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4a = value] => "sse4a", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse4a) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4a = value] => "sse4a", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sse4a) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4a = value] => "sse4a", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; From dc9053938fbadcf8a7176d5868a0bf9a75859e44 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 14:54:32 +0100 Subject: [PATCH 13/19] Add the final missing docs --- fearless_simd_core/gen/src/data/x86.rs | 48 +++++++++---------- fearless_simd_core/src/lib.rs | 4 +- fearless_simd_core/src/x86/adx/mod.rs | 2 +- fearless_simd_core/src/x86/avx/mod.rs | 7 +++ fearless_simd_core/src/x86/avx512/mod.rs | 6 +++ fearless_simd_core/src/x86/crypto/mod.rs | 4 ++ .../src/x86/crypto/pclmulqdq.rs | 4 +- fearless_simd_core/src/x86/crypto/rdrand.rs | 4 +- fearless_simd_core/src/x86/crypto/rdseed.rs | 4 +- .../src/x86/discontinued/mod.rs | 7 +++ fearless_simd_core/src/x86/mod.rs | 21 +++++--- fearless_simd_core/src/x86/sse/fxsr.rs | 6 +-- fearless_simd_core/src/x86/sse/mod.rs | 9 ++++ fearless_simd_core/src/x86/sse/sse4a.rs | 1 + fearless_simd_core/src/x86/v1/mod.rs | 7 ++- fearless_simd_core/src/x86/v2/cmpxchg16b.rs | 4 +- fearless_simd_core/src/x86/v2/mod.rs | 9 +++- fearless_simd_core/src/x86/v2/popcnt.rs | 4 +- fearless_simd_core/src/x86/v3/lzcnt.rs | 4 +- fearless_simd_core/src/x86/v3/mod.rs | 8 ++++ fearless_simd_core/src/x86/v3/movbe.rs | 4 +- fearless_simd_core/src/x86/v4/mod.rs | 8 ++++ fearless_simd_core/src/x86/xsave/mod.rs | 2 + fearless_simd_core/src/x86/xsave/xsave.rs | 2 +- fearless_simd_core/src/x86/xsave/xsavec.rs | 4 +- fearless_simd_core/src/x86/xsave/xsaveopt.rs | 4 +- fearless_simd_core/src/x86/xsave/xsaves.rs | 4 +- 27 files changed, 131 insertions(+), 60 deletions(-) diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index 6740063d..e2b1e9f2 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -202,9 +202,9 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ fn uses_bmi2 ), f!( - /// ["cmpxchg16b"] --- Compares and exchange 16 bytes (128 bits) of data atomically + /// [`cmpxchg16b`] --- Compares and exchange 16 bytes (128 bits) of data atomically /// - /// ["cmpxchg16b"]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + /// [`cmpxchg16b`]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b struct v2::Cmpxchg16b("`cmpxchg16b`"): "cmpxchg16b" + [] fn uses_cmpxchg16b ), @@ -223,10 +223,10 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ fn uses_fma ), f!( - /// ["fxsave"] and ["fxrstor"] --- Save and restore x87 FPU, MMX Technology, and SSE State + /// [`fxsave`] and [`fxrstor`] --- Save and restore x87 FPU, MMX Technology, and SSE State /// - /// ["fxsave"]: https://www.felixcloutier.com/x86/fxsave, - /// ["fxrstor"]: https://www.felixcloutier.com/x86/fxrstor, + /// [`fxsave`]: https://www.felixcloutier.com/x86/fxsave, + /// [`fxrstor`]: https://www.felixcloutier.com/x86/fxrstor, struct sse::Fxsr("`fxsave + fxrstor`"): "fxsr" + [] fn uses_fxsr ), @@ -245,44 +245,44 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ fn uses_keylocker ), f!( - /// ["lzcnt"] --- Leading zeros count + /// [`lzcnt`] --- Leading zeros count /// - /// ["lzcnt"]: https://www.felixcloutier.com/x86/lzcnt + /// [`lzcnt`]: https://www.felixcloutier.com/x86/lzcnt struct v3::Lzcnt("`lzcnt`"): "lzcnt" + [] fn uses_lzcnt ), f!( - /// ["movbe"] --- Move data after swapping bytes + /// [`movbe`] --- Move data after swapping bytes /// - /// ["movbe"]: https://www.felixcloutier.com/x86/movbe + /// [`movbe`]: https://www.felixcloutier.com/x86/movbe struct v3::Movbe("`movbe`"): "movbe" + [] fn uses_movbe ), f!( - /// ["pclmulqdq"] --- Packed carry-less multiplication quadword + /// [`pclmulqdq`] --- Packed carry-less multiplication quadword /// - /// ["pclmulqdq"]: https://www.felixcloutier.com/x86/pclmulqdq + /// [`pclmulqdq`]: https://www.felixcloutier.com/x86/pclmulqdq struct crypto::Pclmulqdq("`pclmulqdq`"): "pclmulqdq" + ["sse2"] fn uses_pclmulqdq ), f!( - /// ["popcnt"] --- Count of bits set to 1 + /// [`popcnt`] --- Count of bits set to 1 /// - /// ["popcnt"]: https://www.felixcloutier.com/x86/popcnt + /// [`popcnt`]: https://www.felixcloutier.com/x86/popcnt struct v2::Popcnt("`popcnt`"): "popcnt" + [] fn uses_popcnt ), f!( - /// ["rdrand"] --- Read random number + /// [`rdrand`] --- Read random number /// - /// ["rdrand"]: https://en.wikipedia.org/wiki/RdRand + /// [`rdrand`]: https://en.wikipedia.org/wiki/RdRand struct crypto::Rdrand("`rdrand`"): "rdrand" + [] fn uses_rdrand ), f!( - /// ["rdseed"] --- Read random seed + /// [`rdseed`] --- Read random seed /// - /// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand + /// [`rdseed`]: https://en.wikipedia.org/wiki/RdRand struct crypto::Rdseed("`rdseed`"): "rdseed" + [] fn uses_rdseed ), @@ -394,28 +394,28 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ f!( /// [`xsave`] --- Save processor extended states /// - /// ["xsave"]: https://www.felixcloutier.com/x86/xsave + /// [`xsave`]: https://www.felixcloutier.com/x86/xsave struct xsave::Xsave("`xsave`"): "xsave" + [] fn uses_xsave ), f!( - /// ["xsavec"] --- Save processor extended states with compaction + /// [`xsavec`] --- Save processor extended states with compaction /// - /// ["xsavec"]: https://www.felixcloutier.com/x86/xsavec + /// [`xsavec`]: https://www.felixcloutier.com/x86/xsavec struct xsave::Xsavec("`xsavec`"): "xsavec" + [] fn uses_xsavec ), f!( - /// ["xsaveopt"] --- Save processor extended states optimized + /// [`xsaveopt`] --- Save processor extended states optimized /// - /// ["xsaveopt"]: https://www.felixcloutier.com/x86/xsaveopt + /// [`xsaveopt`]: https://www.felixcloutier.com/x86/xsaveopt struct xsave::Xsaveopt("`xsaveopt`"): "xsaveopt" + [] fn uses_xsaveopt ), f!( - /// ["xsaves"] --- Save processor extended states supervisor + /// [`xsaves`] --- Save processor extended states supervisor /// - /// ["xsaves"]: https://www.felixcloutier.com/x86/xsaves + /// [`xsaves`]: https://www.felixcloutier.com/x86/xsaves struct xsave::Xsaves("`xsaves`"): "xsaves" + [] fn uses_xsaves ), diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index 67ce0e0a..0638e8e6 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -18,8 +18,8 @@ //! //! # Examples //! -//! At the time of writing, it is not possible to turn scalar values into SIMD vector types safely using -//! only the standard library. +//! At the time of writing, it is not possible to turn scalar values into SIMD +//! vector types safely using only the standard library. //! These examples use [bytemuck](https://crates.io/crates/bytemuck) for this. //! //! diff --git a/fearless_simd_core/src/x86/adx/mod.rs b/fearless_simd_core/src/x86/adx/mod.rs index 3c74dc60..0fc70629 100644 --- a/fearless_simd_core/src/x86/adx/mod.rs +++ b/fearless_simd_core/src/x86/adx/mod.rs @@ -1,4 +1,4 @@ -//! The "adx" target feature. +//! The "adx" target feature, used for arbitrary precision integer addition. #[expect( clippy::module_inception, diff --git a/fearless_simd_core/src/x86/avx/mod.rs b/fearless_simd_core/src/x86/avx/mod.rs index f047d055..3261000e 100644 --- a/fearless_simd_core/src/x86/avx/mod.rs +++ b/fearless_simd_core/src/x86/avx/mod.rs @@ -1,3 +1,10 @@ +//! Target features related to the Advanced Vector Extensions target features (before AVX-512). +//! +//! These are most commonly used through the [x86-64-v3](crate::x86::V3) microarchitecture level. +//! +//! These support SIMD registers of up to 256 bits. +//! For the 512 bit extension, see [`avx512`](crate::x86::avx512). + #[expect( clippy::module_inception, reason = "The inner module is automatically generated." diff --git a/fearless_simd_core/src/x86/avx512/mod.rs b/fearless_simd_core/src/x86/avx512/mod.rs index 1044da40..abc53fde 100644 --- a/fearless_simd_core/src/x86/avx512/mod.rs +++ b/fearless_simd_core/src/x86/avx512/mod.rs @@ -1,3 +1,9 @@ +//! Target features related to the 512-bit extensions to [AVX](crate::x86::avx). +//! +//! Many of these are part of the [x86-64-v4](crate::x86::V4) microarchitecture level. +//! +//! These support SIMD registers of up to 512 bits. + mod avx512bf16; pub use avx512bf16::Avx512bf16; diff --git a/fearless_simd_core/src/x86/crypto/mod.rs b/fearless_simd_core/src/x86/crypto/mod.rs index cfb34d3b..39a3c923 100644 --- a/fearless_simd_core/src/x86/crypto/mod.rs +++ b/fearless_simd_core/src/x86/crypto/mod.rs @@ -1,3 +1,7 @@ +//! Cryptogryphy related target features, including hashing, random number generation, and encryption. +//! +//! These are not generally part of the standardised microarchitecture levels. + mod aes; pub use aes::Aes; diff --git a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs index ca80c141..4ad6e376 100644 --- a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["pclmulqdq"] --- Packed carry-less multiplication quadword +/// [`pclmulqdq`] --- Packed carry-less multiplication quadword /// -/// ["pclmulqdq"]: https://www.felixcloutier.com/x86/pclmulqdq +/// [`pclmulqdq`]: https://www.felixcloutier.com/x86/pclmulqdq /// /// A token indicating that the current CPU has the `pclmulqdq` target feature. /// diff --git a/fearless_simd_core/src/x86/crypto/rdrand.rs b/fearless_simd_core/src/x86/crypto/rdrand.rs index 46d05c8c..38d211bb 100644 --- a/fearless_simd_core/src/x86/crypto/rdrand.rs +++ b/fearless_simd_core/src/x86/crypto/rdrand.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["rdrand"] --- Read random number +/// [`rdrand`] --- Read random number /// -/// ["rdrand"]: https://en.wikipedia.org/wiki/RdRand +/// [`rdrand`]: https://en.wikipedia.org/wiki/RdRand /// /// A token indicating that the current CPU has the `rdrand` target feature. /// diff --git a/fearless_simd_core/src/x86/crypto/rdseed.rs b/fearless_simd_core/src/x86/crypto/rdseed.rs index a4ba70f3..08730295 100644 --- a/fearless_simd_core/src/x86/crypto/rdseed.rs +++ b/fearless_simd_core/src/x86/crypto/rdseed.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["rdseed"] --- Read random seed +/// [`rdseed`] --- Read random seed /// -/// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand +/// [`rdseed`]: https://en.wikipedia.org/wiki/RdRand /// /// A token indicating that the current CPU has the `rdseed` target feature. /// diff --git a/fearless_simd_core/src/x86/discontinued/mod.rs b/fearless_simd_core/src/x86/discontinued/mod.rs index 7869229d..ed82fed0 100644 --- a/fearless_simd_core/src/x86/discontinued/mod.rs +++ b/fearless_simd_core/src/x86/discontinued/mod.rs @@ -1,3 +1,10 @@ +//! Discontinued x86-64 target features. +//! +//! That is target features which were present on some CPUs, but later CPU families from the +//! same vendor did not include them. +//! +//! For more information, see + // These will be stabilised in 1.91. // mod tbm; // pub use tbm::Tbm; diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index 77ec914b..206589e8 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -1,14 +1,21 @@ //! Target feature tokens for the x86 and x86-64 CPU families. //! -//! The general compuotation CPU features associated with each [microarchitecture level] can -//! be found in their corresponding modules: +//! The general computation [microarchitecture level]s each have a level in this module. +//! These levels are useful for most users of this crate, as they provide useful categories +//! of supported instructions. //! -//! - [`v1`] for x86-64-v1. -//! - [`v2`] for x86-64-v2. -//! - [`v3`] for x86-64-v3. -//! - [`v4`] for x86-64-v4. +//! - [`V1`] for x86-64-v1. +//! - [`V2`] for x86-64-v2. +//! - [`V3`] for x86-64-v3. +//! - [`V4`] for x86-64-v4. //! -//! Tokens for target features which not associated with these levels can be found in this module. +//! We don't yet provide a way to select the best of these for the current CPU, +//! but that is planned. +//! +//! Tokens for individual target features, including those not associated with these levels, +//! can be found in the modules under this feature. +//! These are less likely to be directly useful for most users, but are provided for use +//! cases which require them (probably especially those under [`crypto`]). //! //! [microarchitecture level]: https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels diff --git a/fearless_simd_core/src/x86/sse/fxsr.rs b/fearless_simd_core/src/x86/sse/fxsr.rs index a36ec709..c1315c39 100644 --- a/fearless_simd_core/src/x86/sse/fxsr.rs +++ b/fearless_simd_core/src/x86/sse/fxsr.rs @@ -7,10 +7,10 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["fxsave"] and ["fxrstor"] --- Save and restore x87 FPU, MMX Technology, and SSE State +/// [`fxsave`] and [`fxrstor`] --- Save and restore x87 FPU, MMX Technology, and SSE State /// -/// ["fxsave"]: https://www.felixcloutier.com/x86/fxsave, -/// ["fxrstor"]: https://www.felixcloutier.com/x86/fxrstor, +/// [`fxsave`]: https://www.felixcloutier.com/x86/fxsave, +/// [`fxrstor`]: https://www.felixcloutier.com/x86/fxrstor, /// /// A token indicating that the current CPU has the `fxsr` target feature. /// diff --git a/fearless_simd_core/src/x86/sse/mod.rs b/fearless_simd_core/src/x86/sse/mod.rs index 0470258b..03e0320c 100644 --- a/fearless_simd_core/src/x86/sse/mod.rs +++ b/fearless_simd_core/src/x86/sse/mod.rs @@ -1,3 +1,12 @@ +//! Target features related to Streaming SIMD Extensions. +//! +//! These are the predecessors to the [AVX](crate::x86::avx) instructions. +//! +//! These are most commonly used through the [x86-64-v2](crate::x86::V2) microarchitecture level. +//! Some of these features are also included in [x86-64-v1](crate::x86::V1). +//! +//! These support SIMD registers of up to 128 bits. + mod fxsr; pub use fxsr::Fxsr; diff --git a/fearless_simd_core/src/x86/sse/sse4a.rs b/fearless_simd_core/src/x86/sse/sse4a.rs index b0475fad..f3562cab 100644 --- a/fearless_simd_core/src/x86/sse/sse4a.rs +++ b/fearless_simd_core/src/x86/sse/sse4a.rs @@ -108,6 +108,7 @@ impl From for crate::x86::sse::Sse3 { } } + const _: () = { assert!( core::mem::size_of::() == 0, diff --git a/fearless_simd_core/src/x86/v1/mod.rs b/fearless_simd_core/src/x86/v1/mod.rs index 1a6309e0..02d27c5a 100644 --- a/fearless_simd_core/src/x86/v1/mod.rs +++ b/fearless_simd_core/src/x86/v1/mod.rs @@ -1,6 +1,11 @@ //! Target features enabled in the `x86-64-v1` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. //! -//! This is the baseline for x86-64 support. +//! This can usually be treated as the baseline for x86-64 support; all of the target features in this module are enabled by +//! default on Rust's x86-64 targets (such as `x86_64-unknown-linux-gnu`). +//! +//! This module also contains [`V1`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. pub use crate::x86::sse::Fxsr; pub use crate::x86::sse::Sse; diff --git a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs index 42415dc4..dc123141 100644 --- a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs +++ b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["cmpxchg16b"] --- Compares and exchange 16 bytes (128 bits) of data atomically +/// [`cmpxchg16b`] --- Compares and exchange 16 bytes (128 bits) of data atomically /// -/// ["cmpxchg16b"]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b +/// [`cmpxchg16b`]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b /// /// A token indicating that the current CPU has the `cmpxchg16b` target feature. /// diff --git a/fearless_simd_core/src/x86/v2/mod.rs b/fearless_simd_core/src/x86/v2/mod.rs index 60c6a651..666414b2 100644 --- a/fearless_simd_core/src/x86/v2/mod.rs +++ b/fearless_simd_core/src/x86/v2/mod.rs @@ -1,8 +1,15 @@ +//! Target features enabled in the `x86-64-v2` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This module also contains [`V2`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. +//! +//! This level also implies that `x86-64-v1` is available. + pub use crate::x86::sse::Sse3; pub use crate::x86::sse::Sse4_1; pub use crate::x86::sse::Sse4_2; pub use crate::x86::sse::SupplementalSse3; -// TODO: Do we actually want to re-export from the previous level here? pub use crate::x86::v1::Fxsr; pub use crate::x86::v1::Sse; pub use crate::x86::v1::Sse2; diff --git a/fearless_simd_core/src/x86/v2/popcnt.rs b/fearless_simd_core/src/x86/v2/popcnt.rs index e5666b6a..0a81347a 100644 --- a/fearless_simd_core/src/x86/v2/popcnt.rs +++ b/fearless_simd_core/src/x86/v2/popcnt.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["popcnt"] --- Count of bits set to 1 +/// [`popcnt`] --- Count of bits set to 1 /// -/// ["popcnt"]: https://www.felixcloutier.com/x86/popcnt +/// [`popcnt`]: https://www.felixcloutier.com/x86/popcnt /// /// A token indicating that the current CPU has the `popcnt` target feature. /// diff --git a/fearless_simd_core/src/x86/v3/lzcnt.rs b/fearless_simd_core/src/x86/v3/lzcnt.rs index c92a0c9f..e9c629dc 100644 --- a/fearless_simd_core/src/x86/v3/lzcnt.rs +++ b/fearless_simd_core/src/x86/v3/lzcnt.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["lzcnt"] --- Leading zeros count +/// [`lzcnt`] --- Leading zeros count /// -/// ["lzcnt"]: https://www.felixcloutier.com/x86/lzcnt +/// [`lzcnt`]: https://www.felixcloutier.com/x86/lzcnt /// /// A token indicating that the current CPU has the `lzcnt` target feature. /// diff --git a/fearless_simd_core/src/x86/v3/mod.rs b/fearless_simd_core/src/x86/v3/mod.rs index 8c158a70..1a3ec5ed 100644 --- a/fearless_simd_core/src/x86/v3/mod.rs +++ b/fearless_simd_core/src/x86/v3/mod.rs @@ -1,3 +1,11 @@ +//! Target features enabled in the `x86-64-v3` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This module also contains [`V3`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. +//! +//! This level also implies that `x86-64-v2` is available. + pub use crate::x86::v1::Fxsr; pub use crate::x86::v1::Sse; pub use crate::x86::v1::Sse2; diff --git a/fearless_simd_core/src/x86/v3/movbe.rs b/fearless_simd_core/src/x86/v3/movbe.rs index cea0faf9..f5270f4d 100644 --- a/fearless_simd_core/src/x86/v3/movbe.rs +++ b/fearless_simd_core/src/x86/v3/movbe.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["movbe"] --- Move data after swapping bytes +/// [`movbe`] --- Move data after swapping bytes /// -/// ["movbe"]: https://www.felixcloutier.com/x86/movbe +/// [`movbe`]: https://www.felixcloutier.com/x86/movbe /// /// A token indicating that the current CPU has the `movbe` target feature. /// diff --git a/fearless_simd_core/src/x86/v4/mod.rs b/fearless_simd_core/src/x86/v4/mod.rs index e401ecb2..8946fdfb 100644 --- a/fearless_simd_core/src/x86/v4/mod.rs +++ b/fearless_simd_core/src/x86/v4/mod.rs @@ -1,3 +1,11 @@ +//! Target features enabled in the `x86-64-v4` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This module also contains [`V4`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. +//! +//! This level also implies that `x86-64-v3` is available. + pub use crate::x86::v1::Fxsr; pub use crate::x86::v1::Sse; pub use crate::x86::v1::Sse2; diff --git a/fearless_simd_core/src/x86/xsave/mod.rs b/fearless_simd_core/src/x86/xsave/mod.rs index aee24a74..f75aaa4c 100644 --- a/fearless_simd_core/src/x86/xsave/mod.rs +++ b/fearless_simd_core/src/x86/xsave/mod.rs @@ -1,3 +1,5 @@ +//! Target features relating to saving processor state, as used to implement operating systems. + #[expect( clippy::module_inception, reason = "The inner module is automatically generated." diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs index 19c8f2f5..1fba1b9e 100644 --- a/fearless_simd_core/src/x86/xsave/xsave.rs +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -9,7 +9,7 @@ use core::fmt::Debug; /// [`xsave`] --- Save processor extended states /// -/// ["xsave"]: https://www.felixcloutier.com/x86/xsave +/// [`xsave`]: https://www.felixcloutier.com/x86/xsave /// /// A token indicating that the current CPU has the `xsave` target feature. /// diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs index df8033d1..1fa01186 100644 --- a/fearless_simd_core/src/x86/xsave/xsavec.rs +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["xsavec"] --- Save processor extended states with compaction +/// [`xsavec`] --- Save processor extended states with compaction /// -/// ["xsavec"]: https://www.felixcloutier.com/x86/xsavec +/// [`xsavec`]: https://www.felixcloutier.com/x86/xsavec /// /// A token indicating that the current CPU has the `xsavec` target feature. /// diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs index 563c26c5..7dab1087 100644 --- a/fearless_simd_core/src/x86/xsave/xsaveopt.rs +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["xsaveopt"] --- Save processor extended states optimized +/// [`xsaveopt`] --- Save processor extended states optimized /// -/// ["xsaveopt"]: https://www.felixcloutier.com/x86/xsaveopt +/// [`xsaveopt`]: https://www.felixcloutier.com/x86/xsaveopt /// /// A token indicating that the current CPU has the `xsaveopt` target feature. /// diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs index 8f365d8c..d26309bc 100644 --- a/fearless_simd_core/src/x86/xsave/xsaves.rs +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["xsaves"] --- Save processor extended states supervisor +/// [`xsaves`] --- Save processor extended states supervisor /// -/// ["xsaves"]: https://www.felixcloutier.com/x86/xsaves +/// [`xsaves`]: https://www.felixcloutier.com/x86/xsaves /// /// A token indicating that the current CPU has the `xsaves` target feature. /// From 060c5bc3d30adfaa8c14b6cd6ccece8dea15e952 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 15:23:43 +0100 Subject: [PATCH 14/19] Add copyright headers --- fearless_simd_core/gen/src/data.rs | 3 +++ fearless_simd_core/gen/src/data/x86.rs | 3 +++ fearless_simd_core/gen/src/main.rs | 20 +++++++++---------- fearless_simd_core/gen/templates/aarch64.rs | 4 ++++ fearless_simd_core/gen/templates/x86.rs | 5 +++++ fearless_simd_core/gen/templates/x86_level.rs | 5 +++++ fearless_simd_core/src/lib.rs | 3 +++ fearless_simd_core/src/trampoline.rs | 3 +++ fearless_simd_core/src/x86/adx/adx.rs | 4 +++- fearless_simd_core/src/x86/adx/mod.rs | 3 +++ fearless_simd_core/src/x86/avx/avx.rs | 4 +++- fearless_simd_core/src/x86/avx/avx2.rs | 4 +++- fearless_simd_core/src/x86/avx/avxifma.rs | 4 +++- .../src/x86/avx/avxneconvert.rs | 4 +++- fearless_simd_core/src/x86/avx/avxvnni.rs | 4 +++- .../src/x86/avx/avxvnniint16.rs | 4 +++- fearless_simd_core/src/x86/avx/avxvnniint8.rs | 4 +++- fearless_simd_core/src/x86/avx/mod.rs | 3 +++ .../src/x86/avx512/avx512bf16.rs | 4 +++- .../src/x86/avx512/avx512bitalg.rs | 4 +++- fearless_simd_core/src/x86/avx512/avx512bw.rs | 4 +++- fearless_simd_core/src/x86/avx512/avx512cd.rs | 4 +++- fearless_simd_core/src/x86/avx512/avx512dq.rs | 4 +++- fearless_simd_core/src/x86/avx512/avx512f.rs | 4 +++- .../src/x86/avx512/avx512fp16.rs | 4 +++- .../src/x86/avx512/avx512ifma.rs | 4 +++- .../src/x86/avx512/avx512vbmi.rs | 4 +++- .../src/x86/avx512/avx512vbmi2.rs | 4 +++- fearless_simd_core/src/x86/avx512/avx512vl.rs | 4 +++- .../src/x86/avx512/avx512vnni.rs | 4 +++- .../src/x86/avx512/avx512vp2intersect.rs | 4 +++- .../src/x86/avx512/avx512vpopcntdq.rs | 4 +++- fearless_simd_core/src/x86/avx512/mod.rs | 3 +++ fearless_simd_core/src/x86/crypto/aes.rs | 4 +++- fearless_simd_core/src/x86/crypto/gfni.rs | 4 +++- fearless_simd_core/src/x86/crypto/kl.rs | 4 +++- fearless_simd_core/src/x86/crypto/mod.rs | 3 +++ .../src/x86/crypto/pclmulqdq.rs | 4 +++- fearless_simd_core/src/x86/crypto/rdrand.rs | 4 +++- fearless_simd_core/src/x86/crypto/rdseed.rs | 4 +++- fearless_simd_core/src/x86/crypto/sha.rs | 4 +++- fearless_simd_core/src/x86/crypto/sha512.rs | 4 +++- fearless_simd_core/src/x86/crypto/sm3.rs | 4 +++- fearless_simd_core/src/x86/crypto/sm4.rs | 4 +++- fearless_simd_core/src/x86/crypto/vaes.rs | 4 +++- .../src/x86/crypto/vpclmulqdq.rs | 4 +++- fearless_simd_core/src/x86/crypto/widekl.rs | 4 +++- .../src/x86/discontinued/mod.rs | 3 +++ .../src/x86/discontinued/tbm.rs | 4 +++- fearless_simd_core/src/x86/mod.rs | 3 +++ fearless_simd_core/src/x86/sse/fxsr.rs | 4 +++- fearless_simd_core/src/x86/sse/mod.rs | 3 +++ fearless_simd_core/src/x86/sse/sse.rs | 4 +++- fearless_simd_core/src/x86/sse/sse2.rs | 4 +++- fearless_simd_core/src/x86/sse/sse3.rs | 4 +++- fearless_simd_core/src/x86/sse/sse4_1.rs | 4 +++- fearless_simd_core/src/x86/sse/sse4_2.rs | 4 +++- fearless_simd_core/src/x86/sse/sse4a.rs | 4 +++- fearless_simd_core/src/x86/sse/ssse3.rs | 4 +++- fearless_simd_core/src/x86/v1/level.rs | 3 +++ fearless_simd_core/src/x86/v1/mod.rs | 3 +++ fearless_simd_core/src/x86/v2/cmpxchg16b.rs | 4 +++- fearless_simd_core/src/x86/v2/level.rs | 3 +++ fearless_simd_core/src/x86/v2/mod.rs | 3 +++ fearless_simd_core/src/x86/v2/popcnt.rs | 4 +++- fearless_simd_core/src/x86/v3/bmi1.rs | 4 +++- fearless_simd_core/src/x86/v3/bmi2.rs | 4 +++- fearless_simd_core/src/x86/v3/f16c.rs | 4 +++- fearless_simd_core/src/x86/v3/fma.rs | 4 +++- fearless_simd_core/src/x86/v3/level.rs | 3 +++ fearless_simd_core/src/x86/v3/lzcnt.rs | 4 +++- fearless_simd_core/src/x86/v3/mod.rs | 3 +++ fearless_simd_core/src/x86/v3/movbe.rs | 4 +++- fearless_simd_core/src/x86/v4/level.rs | 3 +++ fearless_simd_core/src/x86/v4/mod.rs | 3 +++ fearless_simd_core/src/x86/xsave/mod.rs | 3 +++ fearless_simd_core/src/x86/xsave/xsave.rs | 4 +++- fearless_simd_core/src/x86/xsave/xsavec.rs | 4 +++- fearless_simd_core/src/x86/xsave/xsaveopt.rs | 4 +++- fearless_simd_core/src/x86/xsave/xsaves.rs | 4 +++- 80 files changed, 252 insertions(+), 66 deletions(-) diff --git a/fearless_simd_core/gen/src/data.rs b/fearless_simd_core/gen/src/data.rs index fe46d862..fb5726bb 100644 --- a/fearless_simd_core/gen/src/data.rs +++ b/fearless_simd_core/gen/src/data.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + mod x86; pub(crate) use x86::{ X86_FEATURES, X86_LEVEL_TEMPLATE, X86_TEMPLATE, X86_V1, X86_V2, X86_V3, X86_V4, diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index e2b1e9f2..1b96b5a8 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + use crate::Feature; macro_rules! f { diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs index c3ef8b9e..35d8c233 100644 --- a/fearless_simd_core/gen/src/main.rs +++ b/fearless_simd_core/gen/src/main.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + mod data; use std::collections::HashSet; @@ -75,13 +78,10 @@ impl From for {type_path} {{ }}\n" ).unwrap(); } - let mut result = format!( - "// This file is automatically generated by `fearless_simd_core_gen`.\n\ - // Its template can be found in `fearless_simd_core/gen/templates`.\n\n\ - {template}" - ); + let mut result = String::from(template); // We replace the from impls first, as they use template variables from the rest of this. result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace("// {AUTOGEN_COMMENT}\n", AUTOGEN_COMMENT); result = result.replace("{FEATURE_DOCS_NAME}", feature.feature.feature_docs_name); result = result.replace("/// {NEW_DOCS}\n", &new_docs); result = result.replace("{FEATURE_ID}", feature.feature.feature_name); @@ -235,13 +235,10 @@ impl From for {type_path} {{ ).unwrap(); } - let mut result = format!( - "// This file is automatically generated by `fearless_simd_core_gen`.\n\ - // Its template can be found in `fearless_simd_core/gen/templates`.\n\n\ - {X86_LEVEL_TEMPLATE}" - ); + let mut result = String::from(X86_LEVEL_TEMPLATE); // We replace the from impls first, as they use template variables from the rest of this. result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace("// {AUTOGEN_COMMENT}", AUTOGEN_COMMENT); result = result.replace("LEVEL_STRUCT_NAME", &level_struct_name); result = result.replace("{LEVEL_ID}", level); result = result.replace("{LEVEL_FEATURE_LCD_CONTENTS}", &lcd_contents); @@ -269,6 +266,9 @@ impl From for {type_path} {{ Ok(()) } +const AUTOGEN_COMMENT: &str = "// This file is automatically generated by `fearless_simd_core_gen`.\n\ + // Its template can be found in `fearless_simd_core/gen/templates`."; + #[derive(Debug)] struct Feature { /// The name of the struct to be generated. diff --git a/fearless_simd_core/gen/templates/aarch64.rs b/fearless_simd_core/gen/templates/aarch64.rs index e69de29b..e8fbb89a 100644 --- a/fearless_simd_core/gen/templates/aarch64.rs +++ b/fearless_simd_core/gen/templates/aarch64.rs @@ -0,0 +1,4 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// {AUTOGEN_COMMENT} diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs index 4c170fc8..918a0533 100644 --- a/fearless_simd_core/gen/templates/x86.rs +++ b/fearless_simd_core/gen/templates/x86.rs @@ -1,3 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// {AUTOGEN_COMMENT} + //! The {FEATURE_DOCS_NAME} target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/gen/templates/x86_level.rs b/fearless_simd_core/gen/templates/x86_level.rs index c0212411..4aabad26 100644 --- a/fearless_simd_core/gen/templates/x86_level.rs +++ b/fearless_simd_core/gen/templates/x86_level.rs @@ -1,3 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// {AUTOGEN_COMMENT} + //! The x86-64-{LEVEL_ID} microarchitecture level. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index 0638e8e6..7e3efbfb 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! An abstraction to allow safely running custom `#[target_feature]` functions on stable Rust. //! //! This crate introduces the [`trampoline!`] macro, which allows running code in a diff --git a/fearless_simd_core/src/trampoline.rs b/fearless_simd_core/src/trampoline.rs index 14c73f62..dde5a625 100644 --- a/fearless_simd_core/src/trampoline.rs +++ b/fearless_simd_core/src/trampoline.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Support for the safety checks in [`trampoline!`](crate::trampoline!). //! //! Methods to compute whether a each feature in a target feature string (e.g. "sse2,fma") diff --git a/fearless_simd_core/src/x86/adx/adx.rs b/fearless_simd_core/src/x86/adx/adx.rs index 10e7b599..4cd2cd11 100644 --- a/fearless_simd_core/src/x86/adx/adx.rs +++ b/fearless_simd_core/src/x86/adx/adx.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The ADX target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/adx/mod.rs b/fearless_simd_core/src/x86/adx/mod.rs index 0fc70629..22a8b231 100644 --- a/fearless_simd_core/src/x86/adx/mod.rs +++ b/fearless_simd_core/src/x86/adx/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! The "adx" target feature, used for arbitrary precision integer addition. #[expect( diff --git a/fearless_simd_core/src/x86/avx/avx.rs b/fearless_simd_core/src/x86/avx/avx.rs index 36804722..9e03dd5d 100644 --- a/fearless_simd_core/src/x86/avx/avx.rs +++ b/fearless_simd_core/src/x86/avx/avx.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avx2.rs b/fearless_simd_core/src/x86/avx/avx2.rs index d3fec9f9..2d243579 100644 --- a/fearless_simd_core/src/x86/avx/avx2.rs +++ b/fearless_simd_core/src/x86/avx/avx2.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX2 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avxifma.rs b/fearless_simd_core/src/x86/avx/avxifma.rs index c6e1964d..d356ee46 100644 --- a/fearless_simd_core/src/x86/avx/avxifma.rs +++ b/fearless_simd_core/src/x86/avx/avxifma.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX-IFMA target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avxneconvert.rs b/fearless_simd_core/src/x86/avx/avxneconvert.rs index 9f94fc89..41f67016 100644 --- a/fearless_simd_core/src/x86/avx/avxneconvert.rs +++ b/fearless_simd_core/src/x86/avx/avxneconvert.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX-NE-CONVERT target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avxvnni.rs b/fearless_simd_core/src/x86/avx/avxvnni.rs index 5e10181e..ed631a65 100644 --- a/fearless_simd_core/src/x86/avx/avxvnni.rs +++ b/fearless_simd_core/src/x86/avx/avxvnni.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX-VNNI target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avxvnniint16.rs b/fearless_simd_core/src/x86/avx/avxvnniint16.rs index dab23460..270472f2 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint16.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint16.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX-VNNI-INT16 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avxvnniint8.rs b/fearless_simd_core/src/x86/avx/avxvnniint8.rs index ff887660..907d4923 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint8.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint8.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX-VNNI-INT8 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/mod.rs b/fearless_simd_core/src/x86/avx/mod.rs index 3261000e..65fe5757 100644 --- a/fearless_simd_core/src/x86/avx/mod.rs +++ b/fearless_simd_core/src/x86/avx/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features related to the Advanced Vector Extensions target features (before AVX-512). //! //! These are most commonly used through the [x86-64-v3](crate::x86::V3) microarchitecture level. diff --git a/fearless_simd_core/src/x86/avx512/avx512bf16.rs b/fearless_simd_core/src/x86/avx512/avx512bf16.rs index 705252a5..ec932968 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bf16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bf16.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-BF16 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs index 578fd883..d88e5582 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-BITALG target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512bw.rs b/fearless_simd_core/src/x86/avx512/avx512bw.rs index 29b05829..9a07e5fa 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bw.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bw.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-BW target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512cd.rs b/fearless_simd_core/src/x86/avx512/avx512cd.rs index e7ed2389..cce3c766 100644 --- a/fearless_simd_core/src/x86/avx512/avx512cd.rs +++ b/fearless_simd_core/src/x86/avx512/avx512cd.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-CD target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512dq.rs b/fearless_simd_core/src/x86/avx512/avx512dq.rs index 92b8c87b..5b444aa1 100644 --- a/fearless_simd_core/src/x86/avx512/avx512dq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512dq.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-DQ target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512f.rs b/fearless_simd_core/src/x86/avx512/avx512f.rs index bad51a08..6dfa381b 100644 --- a/fearless_simd_core/src/x86/avx512/avx512f.rs +++ b/fearless_simd_core/src/x86/avx512/avx512f.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-F target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512fp16.rs b/fearless_simd_core/src/x86/avx512/avx512fp16.rs index b3ba12d5..9a03a700 100644 --- a/fearless_simd_core/src/x86/avx512/avx512fp16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512fp16.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-FP16 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512ifma.rs b/fearless_simd_core/src/x86/avx512/avx512ifma.rs index 330e16a8..1c8866f4 100644 --- a/fearless_simd_core/src/x86/avx512/avx512ifma.rs +++ b/fearless_simd_core/src/x86/avx512/avx512ifma.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-IFMA target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs index 2811eb14..3e5dde51 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VBMI target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs index aa209c2d..d1187a91 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VBMI2 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vl.rs b/fearless_simd_core/src/x86/avx512/avx512vl.rs index 4089a4df..aa6b95b5 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vl.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vl.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VL target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vnni.rs b/fearless_simd_core/src/x86/avx512/avx512vnni.rs index 9703f9b6..d5856997 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vnni.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vnni.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VNNI target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs index 6a3bdd17..676ec580 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VP2INTERSECT target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs index ada57947..c1b39eee 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VPOPCNTDQ target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/mod.rs b/fearless_simd_core/src/x86/avx512/mod.rs index abc53fde..5a0ca606 100644 --- a/fearless_simd_core/src/x86/avx512/mod.rs +++ b/fearless_simd_core/src/x86/avx512/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features related to the 512-bit extensions to [AVX](crate::x86::avx). //! //! Many of these are part of the [x86-64-v4](crate::x86::V4) microarchitecture level. diff --git a/fearless_simd_core/src/x86/crypto/aes.rs b/fearless_simd_core/src/x86/crypto/aes.rs index afe9a2ee..bcc8145d 100644 --- a/fearless_simd_core/src/x86/crypto/aes.rs +++ b/fearless_simd_core/src/x86/crypto/aes.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AES target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/gfni.rs b/fearless_simd_core/src/x86/crypto/gfni.rs index 3e00a923..8267392b 100644 --- a/fearless_simd_core/src/x86/crypto/gfni.rs +++ b/fearless_simd_core/src/x86/crypto/gfni.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The GFNI target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/kl.rs b/fearless_simd_core/src/x86/crypto/kl.rs index 722eb6db..c1023e40 100644 --- a/fearless_simd_core/src/x86/crypto/kl.rs +++ b/fearless_simd_core/src/x86/crypto/kl.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The KEYLOCKER target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/mod.rs b/fearless_simd_core/src/x86/crypto/mod.rs index 39a3c923..7154cf89 100644 --- a/fearless_simd_core/src/x86/crypto/mod.rs +++ b/fearless_simd_core/src/x86/crypto/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Cryptogryphy related target features, including hashing, random number generation, and encryption. //! //! These are not generally part of the standardised microarchitecture levels. diff --git a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs index 4ad6e376..357f8e4f 100644 --- a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `pclmulqdq` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/rdrand.rs b/fearless_simd_core/src/x86/crypto/rdrand.rs index 38d211bb..0f3800a0 100644 --- a/fearless_simd_core/src/x86/crypto/rdrand.rs +++ b/fearless_simd_core/src/x86/crypto/rdrand.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `rdrand` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/rdseed.rs b/fearless_simd_core/src/x86/crypto/rdseed.rs index 08730295..b7ca6b56 100644 --- a/fearless_simd_core/src/x86/crypto/rdseed.rs +++ b/fearless_simd_core/src/x86/crypto/rdseed.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `rdseed` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/sha.rs b/fearless_simd_core/src/x86/crypto/sha.rs index 3479ce3d..3e9e2cab 100644 --- a/fearless_simd_core/src/x86/crypto/sha.rs +++ b/fearless_simd_core/src/x86/crypto/sha.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SHA target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/sha512.rs b/fearless_simd_core/src/x86/crypto/sha512.rs index 818ef884..9aff5b82 100644 --- a/fearless_simd_core/src/x86/crypto/sha512.rs +++ b/fearless_simd_core/src/x86/crypto/sha512.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SHA512 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/sm3.rs b/fearless_simd_core/src/x86/crypto/sm3.rs index fff96832..2ad3d2ec 100644 --- a/fearless_simd_core/src/x86/crypto/sm3.rs +++ b/fearless_simd_core/src/x86/crypto/sm3.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SM3 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/sm4.rs b/fearless_simd_core/src/x86/crypto/sm4.rs index 66a48b3d..14479e31 100644 --- a/fearless_simd_core/src/x86/crypto/sm4.rs +++ b/fearless_simd_core/src/x86/crypto/sm4.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SM4 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/vaes.rs b/fearless_simd_core/src/x86/crypto/vaes.rs index 16ddb321..9e634b77 100644 --- a/fearless_simd_core/src/x86/crypto/vaes.rs +++ b/fearless_simd_core/src/x86/crypto/vaes.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The VAES target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs index 342af9d3..66e135b2 100644 --- a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The VPCLMULQDQ target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/widekl.rs b/fearless_simd_core/src/x86/crypto/widekl.rs index a9601bde..1acfb8d9 100644 --- a/fearless_simd_core/src/x86/crypto/widekl.rs +++ b/fearless_simd_core/src/x86/crypto/widekl.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The WIDE KEYLOCKER target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/discontinued/mod.rs b/fearless_simd_core/src/x86/discontinued/mod.rs index ed82fed0..f1cc4f63 100644 --- a/fearless_simd_core/src/x86/discontinued/mod.rs +++ b/fearless_simd_core/src/x86/discontinued/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Discontinued x86-64 target features. //! //! That is target features which were present on some CPUs, but later CPU families from the diff --git a/fearless_simd_core/src/x86/discontinued/tbm.rs b/fearless_simd_core/src/x86/discontinued/tbm.rs index 572f756e..ab875c82 100644 --- a/fearless_simd_core/src/x86/discontinued/tbm.rs +++ b/fearless_simd_core/src/x86/discontinued/tbm.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The TBM target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index 206589e8..951a532c 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target feature tokens for the x86 and x86-64 CPU families. //! //! The general computation [microarchitecture level]s each have a level in this module. diff --git a/fearless_simd_core/src/x86/sse/fxsr.rs b/fearless_simd_core/src/x86/sse/fxsr.rs index c1315c39..d7dfe791 100644 --- a/fearless_simd_core/src/x86/sse/fxsr.rs +++ b/fearless_simd_core/src/x86/sse/fxsr.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `fxsave + fxrstor` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/mod.rs b/fearless_simd_core/src/x86/sse/mod.rs index 03e0320c..ecd7d84b 100644 --- a/fearless_simd_core/src/x86/sse/mod.rs +++ b/fearless_simd_core/src/x86/sse/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features related to Streaming SIMD Extensions. //! //! These are the predecessors to the [AVX](crate::x86::avx) instructions. diff --git a/fearless_simd_core/src/x86/sse/sse.rs b/fearless_simd_core/src/x86/sse/sse.rs index ad020577..8f40bb76 100644 --- a/fearless_simd_core/src/x86/sse/sse.rs +++ b/fearless_simd_core/src/x86/sse/sse.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/sse2.rs b/fearless_simd_core/src/x86/sse/sse2.rs index c86ce42f..089bda1a 100644 --- a/fearless_simd_core/src/x86/sse/sse2.rs +++ b/fearless_simd_core/src/x86/sse/sse2.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE2 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/sse3.rs b/fearless_simd_core/src/x86/sse/sse3.rs index 27789c9a..c86b454c 100644 --- a/fearless_simd_core/src/x86/sse/sse3.rs +++ b/fearless_simd_core/src/x86/sse/sse3.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE3 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/sse4_1.rs b/fearless_simd_core/src/x86/sse/sse4_1.rs index 3f2b75bc..cbce281f 100644 --- a/fearless_simd_core/src/x86/sse/sse4_1.rs +++ b/fearless_simd_core/src/x86/sse/sse4_1.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE4.1 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/sse4_2.rs b/fearless_simd_core/src/x86/sse/sse4_2.rs index 0794c2e1..6054559d 100644 --- a/fearless_simd_core/src/x86/sse/sse4_2.rs +++ b/fearless_simd_core/src/x86/sse/sse4_2.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE4.2 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/sse4a.rs b/fearless_simd_core/src/x86/sse/sse4a.rs index f3562cab..d6bf8769 100644 --- a/fearless_simd_core/src/x86/sse/sse4a.rs +++ b/fearless_simd_core/src/x86/sse/sse4a.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE4a target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/ssse3.rs b/fearless_simd_core/src/x86/sse/ssse3.rs index e5a214b9..45305bf7 100644 --- a/fearless_simd_core/src/x86/sse/ssse3.rs +++ b/fearless_simd_core/src/x86/sse/ssse3.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSSE3 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v1/level.rs b/fearless_simd_core/src/x86/v1/level.rs index 86fc6141..889db039 100644 --- a/fearless_simd_core/src/x86/v1/level.rs +++ b/fearless_simd_core/src/x86/v1/level.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. diff --git a/fearless_simd_core/src/x86/v1/mod.rs b/fearless_simd_core/src/x86/v1/mod.rs index 02d27c5a..58d5dcf8 100644 --- a/fearless_simd_core/src/x86/v1/mod.rs +++ b/fearless_simd_core/src/x86/v1/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features enabled in the `x86-64-v1` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. //! //! This can usually be treated as the baseline for x86-64 support; all of the target features in this module are enabled by diff --git a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs index dc123141..1b6d002a 100644 --- a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs +++ b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `cmpxchg16b` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v2/level.rs b/fearless_simd_core/src/x86/v2/level.rs index 5f01a232..97c66b7b 100644 --- a/fearless_simd_core/src/x86/v2/level.rs +++ b/fearless_simd_core/src/x86/v2/level.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. diff --git a/fearless_simd_core/src/x86/v2/mod.rs b/fearless_simd_core/src/x86/v2/mod.rs index 666414b2..a5032b1b 100644 --- a/fearless_simd_core/src/x86/v2/mod.rs +++ b/fearless_simd_core/src/x86/v2/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features enabled in the `x86-64-v2` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. //! //! This module also contains [`V2`], which is a token indicating that this level is available. diff --git a/fearless_simd_core/src/x86/v2/popcnt.rs b/fearless_simd_core/src/x86/v2/popcnt.rs index 0a81347a..d7c71c75 100644 --- a/fearless_simd_core/src/x86/v2/popcnt.rs +++ b/fearless_simd_core/src/x86/v2/popcnt.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `popcnt` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/bmi1.rs b/fearless_simd_core/src/x86/v3/bmi1.rs index 92362dbe..f887a9dc 100644 --- a/fearless_simd_core/src/x86/v3/bmi1.rs +++ b/fearless_simd_core/src/x86/v3/bmi1.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The 1 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/bmi2.rs b/fearless_simd_core/src/x86/v3/bmi2.rs index 55b97dcc..af3eeefe 100644 --- a/fearless_simd_core/src/x86/v3/bmi2.rs +++ b/fearless_simd_core/src/x86/v3/bmi2.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The BMI2 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/f16c.rs b/fearless_simd_core/src/x86/v3/f16c.rs index aacb30e9..53ad8318 100644 --- a/fearless_simd_core/src/x86/v3/f16c.rs +++ b/fearless_simd_core/src/x86/v3/f16c.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The F16C target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/fma.rs b/fearless_simd_core/src/x86/v3/fma.rs index e051013b..44d01709 100644 --- a/fearless_simd_core/src/x86/v3/fma.rs +++ b/fearless_simd_core/src/x86/v3/fma.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The FMA3 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/level.rs b/fearless_simd_core/src/x86/v3/level.rs index ad607a4f..f24edb4e 100644 --- a/fearless_simd_core/src/x86/v3/level.rs +++ b/fearless_simd_core/src/x86/v3/level.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. diff --git a/fearless_simd_core/src/x86/v3/lzcnt.rs b/fearless_simd_core/src/x86/v3/lzcnt.rs index e9c629dc..d5283fc6 100644 --- a/fearless_simd_core/src/x86/v3/lzcnt.rs +++ b/fearless_simd_core/src/x86/v3/lzcnt.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `lzcnt` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/mod.rs b/fearless_simd_core/src/x86/v3/mod.rs index 1a3ec5ed..c67583a9 100644 --- a/fearless_simd_core/src/x86/v3/mod.rs +++ b/fearless_simd_core/src/x86/v3/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features enabled in the `x86-64-v3` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. //! //! This module also contains [`V3`], which is a token indicating that this level is available. diff --git a/fearless_simd_core/src/x86/v3/movbe.rs b/fearless_simd_core/src/x86/v3/movbe.rs index f5270f4d..91ee22bb 100644 --- a/fearless_simd_core/src/x86/v3/movbe.rs +++ b/fearless_simd_core/src/x86/v3/movbe.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `movbe` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v4/level.rs b/fearless_simd_core/src/x86/v4/level.rs index b3acc6f1..1e6cad45 100644 --- a/fearless_simd_core/src/x86/v4/level.rs +++ b/fearless_simd_core/src/x86/v4/level.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. diff --git a/fearless_simd_core/src/x86/v4/mod.rs b/fearless_simd_core/src/x86/v4/mod.rs index 8946fdfb..7f3cd1ee 100644 --- a/fearless_simd_core/src/x86/v4/mod.rs +++ b/fearless_simd_core/src/x86/v4/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features enabled in the `x86-64-v4` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. //! //! This module also contains [`V4`], which is a token indicating that this level is available. diff --git a/fearless_simd_core/src/x86/xsave/mod.rs b/fearless_simd_core/src/x86/xsave/mod.rs index f75aaa4c..406dd54c 100644 --- a/fearless_simd_core/src/x86/xsave/mod.rs +++ b/fearless_simd_core/src/x86/xsave/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features relating to saving processor state, as used to implement operating systems. #[expect( diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs index 1fba1b9e..1507efb7 100644 --- a/fearless_simd_core/src/x86/xsave/xsave.rs +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `xsave` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs index 1fa01186..b84f7912 100644 --- a/fearless_simd_core/src/x86/xsave/xsavec.rs +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `xsavec` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs index 7dab1087..3454d9ca 100644 --- a/fearless_simd_core/src/x86/xsave/xsaveopt.rs +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `xsaveopt` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs index d26309bc..c5277468 100644 --- a/fearless_simd_core/src/x86/xsave/xsaves.rs +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `xsaves` target feature. use crate::{TargetFeatureToken, trampoline}; From 674227ec968d2471da00e6e3c101baaae9c31128 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 15:32:31 +0100 Subject: [PATCH 15/19] Add a CI check for the new generator --- .github/workflows/ci.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5c1919d0..229965e2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -192,6 +192,13 @@ jobs: - name: run code generator run: cargo run --bin fearless_simd_gen + - name: run core code generator + run: cargo run --bin fearless_simd_core_gen + + - name: Reformat (Fearless SIMD Core) + # The code generator for Fearless SIMD Core does not do this. + run: cargo fmt -p fearless_simd_core + - name: check for uncommitted changes run: git diff --exit-code From 769e44e4a1f9482bcb6bb63b81da6f2b419101d3 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Fri, 17 Oct 2025 13:31:08 +0100 Subject: [PATCH 16/19] Fixup docs on `vectorize` --- fearless_simd_core/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index 7e3efbfb..418b49d9 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -94,9 +94,9 @@ pub unsafe trait TargetFeatureToken: Copy { /// /// `f` must be marked `#[inline(always)]` for this to work. /// - /// Note that this does *not* enable the target features on the Rust side (e.g. for calling). + /// Note that this does *not* enable the target features on the Rust side (i.e. for calling intrinsics safely). /// To do so, you should instead use [`trampoline!`] directly - this is a convenience wrapper around `trampoline` - /// for cases where the dispatch of simd values is handled elsewhere. + /// for cases where either autovectorisation is sufficient, or dispatch to simd intrinsics is handled elsewhere. fn vectorize(self, f: impl FnOnce() -> R) -> R; } From af91c1e90f80d51b242fb3ee7d8aa3a3908299ca Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:56:10 +0100 Subject: [PATCH 17/19] Address various review feedback and other cleanups Rename `trampoline.rs` to `support.rs` The old name conflicted with the name of the macro, leading to it being harder to find the docs of the macro itself. Remove unneeded reference Remove entire note on 128 bytes being too small The point it was making was: - Fairly hard to explain - Not necessarily true Add a few more test cases Co-authored-by: Taj Pereira --- fearless_simd_core/src/lib.rs | 81 +++++++++++-------- .../src/{trampoline.rs => support.rs} | 19 ++++- 2 files changed, 64 insertions(+), 36 deletions(-) rename fearless_simd_core/src/{trampoline.rs => support.rs} (91%) diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index 418b49d9..d5bcf004 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -11,30 +11,32 @@ //! other `#[target_feature]` functions. //! As such, once you have used the [`trampoline!`] macro, you can call any intrinsic in [`core::arch`]. //! -//! This crate also has modules which contain tokens for each Rust target features. -//! These allow safely validating that a target feature is available, and obtaining a token. +//! This crate also has modules which contain a token for each Rust target feature. +//! These each have a `try_new` constructor, which validates whether the corresponding +//! target feature is available, then creates a token if it is. //! These are grouped by architecture: //! //! - [`x86`] contains the tokens for both the x86 and x86-64 targets. -//! It also contains tokens for each x86-64 microarchitecture level, see [`x86::V1`] for details. +//! It also contains a token for each x86-64 microarchitecture level, see [`x86::V1`] for details. //! //! //! # Examples //! -//! At the time of writing, it is not possible to turn scalar values into SIMD +//! At the time of writing, it is not possible to turn scalar values into SIMD //! vector types safely using only the standard library. //! These examples use [bytemuck](https://crates.io/crates/bytemuck) for this. //! -//! +//! Note: These examples are currently pending. +//! //! //! Note that for `aarch64`'s neon, you will want to enable bytemuck's `aarch64_simd` feature. //! This is also the case for WASM with `wasm_simd`, but note that this crate //! [isn't needed on WASM][attributes.codegen.target_feature.wasm], as it is safe to -//! call `#[target_features]` on that platform. +//! call `#[target_feature]` functions on that platform. //! //! # Crate Feature Flags //! -//! +//! //! //! # Implementation //! @@ -67,18 +69,18 @@ #[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))] pub mod x86; -pub mod trampoline; +pub mod support; #[cfg(feature = "std")] extern crate std; -/// Token that a set of target feature is available. +/// Token which proves that a set of target feature is available. /// /// Note that this trait is only meaningful when there are values of this type. /// That is, to enable the target features in `FEATURES`, you *must* have a value /// of this type. /// -/// Values which implement this trait are used in the second argument to [`trampoline!`], +/// Values which implement this trait are used in the first argument to [`trampoline!`], /// which is a safe abstraction over enabling target features. /// /// # Safety @@ -104,43 +106,58 @@ pub unsafe trait TargetFeatureToken: Copy { /// /// This is effectively a stable implementation of the "Struct Target Features" Rust feature, /// which at the time of writing is neither in stable or nightly Rust. -/// This macro can be used to make SIMD dispatch safe in addition to make explicit SIMD, both safely. +/// This macro can be used to make both SIMD dispatch and explicit SIMD safe. /// /// # Reference /// -/// These reference examples presume that you have (values in brackets are the "variables"): +/// These reference examples presume that you have the following. +/// The parts of the examples referring to each prerequisite are provided in the brackets: /// -/// - An expression (`token`) of a type (`Token`) which is `TargetFeatureToken` for some target features (`"f1,f2,f3"`); -/// - A function (signature `fn uses_simd(val: [f32; 4]) -> [f32; 4]`) which is safe but enables a subset of those target features (`"f1,f2"`); +/// - An expression (`token`) of a type (`Token`) which implements `TargetFeatureToken` for some target features (`"f1,f2,f3"`); +/// - A function (signature `fn uses_simd(val: [f32; 4]) -> [f32; 4]`) which is safe but enables a subset +/// of those target features (annotated `#[target_feature(enable = "f1,f2")]`); /// - Local values of types corresponding to the argument types (`a` of type `[f32; 4]`) /// /// ```rust,ignore -/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// trampoline!(Token = token => "f1,f2,f3", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// // Or equivalently, as `uses_simd` doesn't require `f3`: +/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]); /// ``` /// -/// Multiple tokens are also supported by providing them in a sequence in square brackets: +/// Multiple tokens are also supported by providing them in a sequence in square brackets. +/// The target feature string must be a subset of the total features made available by the tokens: /// /// ```rust,ignore /// trampoline!([Token = token, Sse = my_sse] => "f1,f2,sse", uses_simd(a: [f32; 4]) -> [f32; 4]) /// ``` /// +/// This is fully validated for safety, so the following example would fail to compile: +/// +/// ```rust,ignore,compile_fail +/// // ERROR: call to function `uses_simd` with `#[target_feature]` is unsafe and requires unsafe block +/// // in order for the call to be safe, the context requires the following additional target feature: f2 +/// trampoline!(Token = token => "f1", uses_simd(a: [f32; 4]) -> [f32; 4]); +/// ``` +/// /// A more advanced syntax is available if you need to use generics. -/// That syntax is explained in comments around the macro's definition, which can be seen above. +/// That syntax is explained in comments around the macro's definition. /// For reference, the implementation used to implement [`vectorize`](TargetFeatureToken::vectorize) for `"sse"` is: /// /// ```rust,ignore /// trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) /// ``` /// -/// There is also support for where clauses after the return type. +/// There is also support for a where clause, after the return type. /// /// # Motivation /// -/// In Fearless SIMD, this macro has two primary use cases: +/// In Fearless SIMD, this macro is used in three ways primary use cases: /// -/// 1) To dispatch to a specialised SIMD implementation of a function using target specific -/// instructions which will be more efficient than generic version written using the portable subset. +/// 1) By end-users, to dispatch to a specialised SIMD implementation of a function using target specific +/// instructions, which will be more efficient than generic version written using the portable subset. /// 2) To implement the portable subset of SIMD operations. +/// 3) To implement the `dispatch!` macro and `Simd::vectorize`, which allows SIMD intrinsics to +/// be correctly inlined when writing portable SIMD code. /// /// To expand on use case 1, when using Fearless SIMD you will often be writing functions which are /// instantiated for multiple different SIMD levels (using generics). @@ -176,12 +193,6 @@ pub unsafe trait TargetFeatureToken: Copy { /// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]) /// ``` /// -/// Note that a function only operating on 128 bytes is probably too small for checking -/// whether a token exists just for it is worthwhile. -/// However, if you have amorphised the cost of that check between many function calls, -/// the `trampoline!` macro itself compiles down to a function call. -/// (This would be the case when this macro is being used to implement the portable subset of SIMD operations) -/// // TODO: We could write an example for each of ARM, x86, and conditionally compile it in? /// Note that our examples are all ignored as there is no target feature which is available on every platform, /// but we need these docs to compile for users on any platform. @@ -229,12 +240,14 @@ macro_rules! trampoline { // We validate that we actually have a token of each claimed type. let _: $token_type = $token; )+ - // We use a const item rather than a const block to ensure that. - // This does mean that you can no longer use tokens "generically", but it's hard to think of - // cases where that would be usable anyway. + // We use a const item rather than a const block to ensure that the const evaluation happens eagerly, + // ensuring that we don't create functions which look valid but actually will always fail when actually codegenned. + // This does mean that you can't use tokens "generically", but it's hard to think of cases where that + // would be usable anyway. For any case where that is valid, you can always manually create the + // "subsetted" token/tokens beforehand using the `From` impls. const _: () = { // And that the claimed types justify enabling the enabled target features. - $crate::trampoline::is_feature_subset($to_enable, [$(<$token_type as $crate::TargetFeatureToken>::FEATURES),+]) + $crate::support::is_feature_subset($to_enable, [$(<$token_type as $crate::TargetFeatureToken>::FEATURES),+]) // TODO: Better failure message here (i.e. at least concatting the set of requested features) .unwrap(); }; @@ -319,13 +332,13 @@ mod example_expansion { { sse_mul_f32s(a, b) } } let _: Sse = sse; - const { - crate::trampoline::is_feature_subset( + const _: () = { + crate::support::is_feature_subset( "sse", [::FEATURES], ) .unwrap(); - } + }; #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")] let a = a; #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")] diff --git a/fearless_simd_core/src/trampoline.rs b/fearless_simd_core/src/support.rs similarity index 91% rename from fearless_simd_core/src/trampoline.rs rename to fearless_simd_core/src/support.rs index dde5a625..96331d16 100644 --- a/fearless_simd_core/src/trampoline.rs +++ b/fearless_simd_core/src/support.rs @@ -45,7 +45,7 @@ impl SubsetResult { } /// Determine whether the features in the target feature string `required` are a subset of the features in `permitted`. -/// See the module level docs [self]. +/// See [the module level docs][self]. /// /// We require static lifetimes as this is primarily internal to the macro. pub const fn is_feature_subset( @@ -62,7 +62,7 @@ pub const fn is_feature_subset( } // `comma_idx` is now the index of the comma, e.g. if the string was "sse,", idx would be 3 // This is the feature we need to validate exists in permitted. - let (to_find, remaining_required) = &required_bytes.split_at(comma_idx); + let (to_find, remaining_required) = required_bytes.split_at(comma_idx); if let [comma, rest @ ..] = remaining_required { if *comma != b',' { panic!("Internal failure of expected behaviour."); @@ -199,6 +199,8 @@ mod tests { expect_failure("c,a,b", [&["a", "b"]], "c"); expect_success("a,b", [&["a", "b", "c"]]); expect_failure("a,b", [&["a", "c"]], "b"); + expect_success("a,b,a,a", [&["a", "b", "c"]]); + expect_success("a,b,c", [&["c"], &["b"], &["a"]]); // Check it correctly catches more than single item failures expect_success("a1,a2,a3", [&["a1", "a2", "a3"]]); @@ -220,10 +222,23 @@ mod tests { expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); } + #[test] + fn incorrect_token() { + // The permitted list here only allows features which are the literal `a1,a2` + // This is completely impossible to pass, but it's worth checking + expect_any_failure("a1,a2", [&["a1,a2"]]); + } + #[test] fn empty_feature() { expect_failure("a,b,", [&["a", "b"]], ""); expect_failure("", [&["a", "b"]], ""); + + // We succeed if the empty target feature is allowed; any case where this is relevant will always + // be validated away by rustc anyway, as there is no target with the target feature `""`. + // As such, there's no harm in being flexible here. + expect_success("", [&[""]]); + expect_success(",,,,,,", [&[""]]); } #[test] From 355009b1b91acd6ce088d984f281454299f1525a Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:59:01 +0100 Subject: [PATCH 18/19] Clean up the stuff about licensing --- fearless_simd_core/gen/src/data/x86.rs | 5 ++--- fearless_simd_core/gen/templates/x86.rs | 1 - fearless_simd_core/gen/templates/x86_level.rs | 1 - fearless_simd_core/src/lib.rs | 1 - fearless_simd_core/src/support.rs | 2 +- fearless_simd_core/src/x86/adx/adx.rs | 1 - fearless_simd_core/src/x86/avx/avx.rs | 1 - fearless_simd_core/src/x86/avx/avx2.rs | 1 - fearless_simd_core/src/x86/avx/avxifma.rs | 1 - fearless_simd_core/src/x86/avx/avxneconvert.rs | 1 - fearless_simd_core/src/x86/avx/avxvnni.rs | 1 - fearless_simd_core/src/x86/avx/avxvnniint16.rs | 1 - fearless_simd_core/src/x86/avx/avxvnniint8.rs | 1 - fearless_simd_core/src/x86/avx512/avx512bf16.rs | 1 - fearless_simd_core/src/x86/avx512/avx512bitalg.rs | 1 - fearless_simd_core/src/x86/avx512/avx512bw.rs | 1 - fearless_simd_core/src/x86/avx512/avx512cd.rs | 1 - fearless_simd_core/src/x86/avx512/avx512dq.rs | 1 - fearless_simd_core/src/x86/avx512/avx512f.rs | 1 - fearless_simd_core/src/x86/avx512/avx512fp16.rs | 1 - fearless_simd_core/src/x86/avx512/avx512ifma.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vbmi.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vbmi2.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vl.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vnni.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs | 1 - fearless_simd_core/src/x86/crypto/aes.rs | 1 - fearless_simd_core/src/x86/crypto/gfni.rs | 1 - fearless_simd_core/src/x86/crypto/kl.rs | 1 - fearless_simd_core/src/x86/crypto/pclmulqdq.rs | 1 - fearless_simd_core/src/x86/crypto/rdrand.rs | 1 - fearless_simd_core/src/x86/crypto/rdseed.rs | 1 - fearless_simd_core/src/x86/crypto/sha.rs | 1 - fearless_simd_core/src/x86/crypto/sha512.rs | 1 - fearless_simd_core/src/x86/crypto/sm3.rs | 1 - fearless_simd_core/src/x86/crypto/sm4.rs | 1 - fearless_simd_core/src/x86/crypto/vaes.rs | 1 - fearless_simd_core/src/x86/crypto/vpclmulqdq.rs | 1 - fearless_simd_core/src/x86/crypto/widekl.rs | 1 - fearless_simd_core/src/x86/discontinued/tbm.rs | 1 - fearless_simd_core/src/x86/mod.rs | 7 +++++++ fearless_simd_core/src/x86/sse/fxsr.rs | 1 - fearless_simd_core/src/x86/sse/sse.rs | 1 - fearless_simd_core/src/x86/sse/sse2.rs | 1 - fearless_simd_core/src/x86/sse/sse3.rs | 1 - fearless_simd_core/src/x86/sse/sse4_1.rs | 1 - fearless_simd_core/src/x86/sse/sse4_2.rs | 1 - fearless_simd_core/src/x86/sse/sse4a.rs | 1 - fearless_simd_core/src/x86/sse/ssse3.rs | 1 - fearless_simd_core/src/x86/v1/level.rs | 1 - fearless_simd_core/src/x86/v2/cmpxchg16b.rs | 1 - fearless_simd_core/src/x86/v2/level.rs | 1 - fearless_simd_core/src/x86/v2/popcnt.rs | 1 - fearless_simd_core/src/x86/v3/bmi1.rs | 1 - fearless_simd_core/src/x86/v3/bmi2.rs | 1 - fearless_simd_core/src/x86/v3/f16c.rs | 1 - fearless_simd_core/src/x86/v3/fma.rs | 1 - fearless_simd_core/src/x86/v3/level.rs | 1 - fearless_simd_core/src/x86/v3/lzcnt.rs | 1 - fearless_simd_core/src/x86/v3/movbe.rs | 1 - fearless_simd_core/src/x86/v4/level.rs | 1 - fearless_simd_core/src/x86/xsave/xsave.rs | 1 - fearless_simd_core/src/x86/xsave/xsavec.rs | 1 - fearless_simd_core/src/x86/xsave/xsaveopt.rs | 1 - fearless_simd_core/src/x86/xsave/xsaves.rs | 1 - 66 files changed, 10 insertions(+), 67 deletions(-) diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index 1b96b5a8..841dac60 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -22,11 +22,10 @@ macro_rules! f { pub(crate) const X86_TEMPLATE: &str = include_str!("../../templates/x86.rs"); -// Data taken from: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86 +// Data adapted from: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86 // (specifically, at https://github.com/rust-lang/reference/blob/1d930e1d5a27e114b4d22a50b0b6cd3771b92e31/src/attributes/codegen.md#x86-or-x86_64) -// TODO: Do we need to add their license attribution to our license? // TODO: Check set against https://doc.rust-lang.org/stable/std/macro.is_x86_feature_detected.html -// In particular, we're missing lahfsahf +// In particular, we seem to be missing lahfsahf (not stable?) pub(crate) const X86_FEATURES: &[Feature] = &[ f!( /// [ADX] --- Multi-Precision Add-Carry Instruction Extensions diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs index 918a0533..777dcae3 100644 --- a/fearless_simd_core/gen/templates/x86.rs +++ b/fearless_simd_core/gen/templates/x86.rs @@ -59,7 +59,6 @@ impl FEATURE_STRUCT_NAME { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("{FEATURE_ID}") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/gen/templates/x86_level.rs b/fearless_simd_core/gen/templates/x86_level.rs index 4aabad26..51aaa6d9 100644 --- a/fearless_simd_core/gen/templates/x86_level.rs +++ b/fearless_simd_core/gen/templates/x86_level.rs @@ -86,7 +86,6 @@ impl LEVEL_STRUCT_NAME { } } } -// TODO: From impls to convert into lower x86 versions. /*{FROM_IMPLS}*/ diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index d5bcf004..924bbd0b 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -65,7 +65,6 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![no_std] -// TODO: Do we want both an `x86` and `x86_64` module? #[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))] pub mod x86; diff --git a/fearless_simd_core/src/support.rs b/fearless_simd_core/src/support.rs index 96331d16..cf530b3a 100644 --- a/fearless_simd_core/src/support.rs +++ b/fearless_simd_core/src/support.rs @@ -236,7 +236,7 @@ mod tests { // We succeed if the empty target feature is allowed; any case where this is relevant will always // be validated away by rustc anyway, as there is no target with the target feature `""`. - // As such, there's no harm in being flexible here. + // As such, there's no harm in being flexible here.git expect_success("", [&[""]]); expect_success(",,,,,,", [&[""]]); } diff --git a/fearless_simd_core/src/x86/adx/adx.rs b/fearless_simd_core/src/x86/adx/adx.rs index 4cd2cd11..339dbbd0 100644 --- a/fearless_simd_core/src/x86/adx/adx.rs +++ b/fearless_simd_core/src/x86/adx/adx.rs @@ -61,7 +61,6 @@ impl Adx { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("adx") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avx.rs b/fearless_simd_core/src/x86/avx/avx.rs index 9e03dd5d..3885ebbc 100644 --- a/fearless_simd_core/src/x86/avx/avx.rs +++ b/fearless_simd_core/src/x86/avx/avx.rs @@ -61,7 +61,6 @@ impl Avx { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avx2.rs b/fearless_simd_core/src/x86/avx/avx2.rs index 2d243579..9cfdaab6 100644 --- a/fearless_simd_core/src/x86/avx/avx2.rs +++ b/fearless_simd_core/src/x86/avx/avx2.rs @@ -63,7 +63,6 @@ impl Avx2 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx2") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avxifma.rs b/fearless_simd_core/src/x86/avx/avxifma.rs index d356ee46..870bb988 100644 --- a/fearless_simd_core/src/x86/avx/avxifma.rs +++ b/fearless_simd_core/src/x86/avx/avxifma.rs @@ -63,7 +63,6 @@ impl Avxifma { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avxifma") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avxneconvert.rs b/fearless_simd_core/src/x86/avx/avxneconvert.rs index 41f67016..0009d58f 100644 --- a/fearless_simd_core/src/x86/avx/avxneconvert.rs +++ b/fearless_simd_core/src/x86/avx/avxneconvert.rs @@ -71,7 +71,6 @@ impl Avxneconvert { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avxneconvert") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avxvnni.rs b/fearless_simd_core/src/x86/avx/avxvnni.rs index ed631a65..e385386e 100644 --- a/fearless_simd_core/src/x86/avx/avxvnni.rs +++ b/fearless_simd_core/src/x86/avx/avxvnni.rs @@ -63,7 +63,6 @@ impl Avxvnni { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avxvnni") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avxvnniint16.rs b/fearless_simd_core/src/x86/avx/avxvnniint16.rs index 270472f2..e213c938 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint16.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint16.rs @@ -71,7 +71,6 @@ impl Avxvnniint16 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avxvnniint16") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avxvnniint8.rs b/fearless_simd_core/src/x86/avx/avxvnniint8.rs index 907d4923..7caa7251 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint8.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint8.rs @@ -71,7 +71,6 @@ impl Avxvnniint8 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avxvnniint8") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512bf16.rs b/fearless_simd_core/src/x86/avx512/avx512bf16.rs index ec932968..62bbaa69 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bf16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bf16.rs @@ -75,7 +75,6 @@ impl Avx512bf16 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512bf16") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs index d88e5582..226cffab 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs @@ -76,7 +76,6 @@ impl Avx512bitalg { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512bitalg") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512bw.rs b/fearless_simd_core/src/x86/avx512/avx512bw.rs index 9a07e5fa..b5aab6f6 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bw.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bw.rs @@ -64,7 +64,6 @@ impl Avx512bw { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512bw") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512cd.rs b/fearless_simd_core/src/x86/avx512/avx512cd.rs index cce3c766..39c81d5f 100644 --- a/fearless_simd_core/src/x86/avx512/avx512cd.rs +++ b/fearless_simd_core/src/x86/avx512/avx512cd.rs @@ -64,7 +64,6 @@ impl Avx512cd { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512cd") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512dq.rs b/fearless_simd_core/src/x86/avx512/avx512dq.rs index 5b444aa1..abc3c32a 100644 --- a/fearless_simd_core/src/x86/avx512/avx512dq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512dq.rs @@ -64,7 +64,6 @@ impl Avx512dq { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512dq") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512f.rs b/fearless_simd_core/src/x86/avx512/avx512f.rs index 6dfa381b..a25c9255 100644 --- a/fearless_simd_core/src/x86/avx512/avx512f.rs +++ b/fearless_simd_core/src/x86/avx512/avx512f.rs @@ -63,7 +63,6 @@ impl Avx512f { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512f") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512fp16.rs b/fearless_simd_core/src/x86/avx512/avx512fp16.rs index 9a03a700..b76df903 100644 --- a/fearless_simd_core/src/x86/avx512/avx512fp16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512fp16.rs @@ -75,7 +75,6 @@ impl Avx512fp16 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512fp16") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512ifma.rs b/fearless_simd_core/src/x86/avx512/avx512ifma.rs index 1c8866f4..dd74a8a2 100644 --- a/fearless_simd_core/src/x86/avx512/avx512ifma.rs +++ b/fearless_simd_core/src/x86/avx512/avx512ifma.rs @@ -74,7 +74,6 @@ impl Avx512ifma { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512ifma") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs index 3e5dde51..38eb6e99 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs @@ -75,7 +75,6 @@ impl Avx512vbmi { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vbmi") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs index d1187a91..b172416d 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs @@ -75,7 +75,6 @@ impl Avx512vbmi2 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vbmi2") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vl.rs b/fearless_simd_core/src/x86/avx512/avx512vl.rs index aa6b95b5..983bc3fc 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vl.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vl.rs @@ -64,7 +64,6 @@ impl Avx512vl { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vl") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vnni.rs b/fearless_simd_core/src/x86/avx512/avx512vnni.rs index d5856997..f0037c2f 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vnni.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vnni.rs @@ -74,7 +74,6 @@ impl Avx512vnni { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vnni") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs index 676ec580..5294dcbe 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs @@ -74,7 +74,6 @@ impl Avx512vp2intersect { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vp2intersect") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs index c1b39eee..f16c735f 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs @@ -74,7 +74,6 @@ impl Avx512vpopcntdq { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vpopcntdq") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/aes.rs b/fearless_simd_core/src/x86/crypto/aes.rs index bcc8145d..65a1aeaa 100644 --- a/fearless_simd_core/src/x86/crypto/aes.rs +++ b/fearless_simd_core/src/x86/crypto/aes.rs @@ -61,7 +61,6 @@ impl Aes { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("aes") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/gfni.rs b/fearless_simd_core/src/x86/crypto/gfni.rs index 8267392b..f8b12371 100644 --- a/fearless_simd_core/src/x86/crypto/gfni.rs +++ b/fearless_simd_core/src/x86/crypto/gfni.rs @@ -61,7 +61,6 @@ impl Gfni { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("gfni") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/kl.rs b/fearless_simd_core/src/x86/crypto/kl.rs index c1023e40..45361b81 100644 --- a/fearless_simd_core/src/x86/crypto/kl.rs +++ b/fearless_simd_core/src/x86/crypto/kl.rs @@ -61,7 +61,6 @@ impl Keylocker { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("kl") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs index 357f8e4f..31d7f60a 100644 --- a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs @@ -61,7 +61,6 @@ impl Pclmulqdq { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("pclmulqdq") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/rdrand.rs b/fearless_simd_core/src/x86/crypto/rdrand.rs index 0f3800a0..c37ab595 100644 --- a/fearless_simd_core/src/x86/crypto/rdrand.rs +++ b/fearless_simd_core/src/x86/crypto/rdrand.rs @@ -61,7 +61,6 @@ impl Rdrand { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("rdrand") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/rdseed.rs b/fearless_simd_core/src/x86/crypto/rdseed.rs index b7ca6b56..2f052c0b 100644 --- a/fearless_simd_core/src/x86/crypto/rdseed.rs +++ b/fearless_simd_core/src/x86/crypto/rdseed.rs @@ -61,7 +61,6 @@ impl Rdseed { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("rdseed") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/sha.rs b/fearless_simd_core/src/x86/crypto/sha.rs index 3e9e2cab..1788683d 100644 --- a/fearless_simd_core/src/x86/crypto/sha.rs +++ b/fearless_simd_core/src/x86/crypto/sha.rs @@ -61,7 +61,6 @@ impl Sha { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sha") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/sha512.rs b/fearless_simd_core/src/x86/crypto/sha512.rs index 9aff5b82..f1116dc2 100644 --- a/fearless_simd_core/src/x86/crypto/sha512.rs +++ b/fearless_simd_core/src/x86/crypto/sha512.rs @@ -63,7 +63,6 @@ impl Sha512 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sha512") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/sm3.rs b/fearless_simd_core/src/x86/crypto/sm3.rs index 2ad3d2ec..663f3467 100644 --- a/fearless_simd_core/src/x86/crypto/sm3.rs +++ b/fearless_simd_core/src/x86/crypto/sm3.rs @@ -63,7 +63,6 @@ impl Sm3 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sm3") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/sm4.rs b/fearless_simd_core/src/x86/crypto/sm4.rs index 14479e31..af2a6cfc 100644 --- a/fearless_simd_core/src/x86/crypto/sm4.rs +++ b/fearless_simd_core/src/x86/crypto/sm4.rs @@ -63,7 +63,6 @@ impl Sm4 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sm4") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/vaes.rs b/fearless_simd_core/src/x86/crypto/vaes.rs index 9e634b77..7f47b744 100644 --- a/fearless_simd_core/src/x86/crypto/vaes.rs +++ b/fearless_simd_core/src/x86/crypto/vaes.rs @@ -63,7 +63,6 @@ impl Vaes { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("vaes") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs index 66e135b2..8467fe8b 100644 --- a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs @@ -71,7 +71,6 @@ impl Vpclmulqdq { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("vpclmulqdq") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/widekl.rs b/fearless_simd_core/src/x86/crypto/widekl.rs index 1acfb8d9..5c74f3d9 100644 --- a/fearless_simd_core/src/x86/crypto/widekl.rs +++ b/fearless_simd_core/src/x86/crypto/widekl.rs @@ -61,7 +61,6 @@ impl WideKeylocker { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("widekl") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/discontinued/tbm.rs b/fearless_simd_core/src/x86/discontinued/tbm.rs index ab875c82..65deed65 100644 --- a/fearless_simd_core/src/x86/discontinued/tbm.rs +++ b/fearless_simd_core/src/x86/discontinued/tbm.rs @@ -61,7 +61,6 @@ impl Tbm { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("tbm") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index 951a532c..929f9c8c 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -20,6 +20,13 @@ //! These are less likely to be directly useful for most users, but are provided for use //! cases which require them (probably especially those under [`crypto`]). //! +//! Both the x86 and x86-64 CPU families are supported in this module as their code is entirely identical, +//! including using the same [`std::is_x86_feature_detected`] macro. +//! Note that this is not the case for `std::arch`; for example, [`core::arch::x86_64::_mm_crc32_u64`] is +//! only available on x86-64. +//! +//! Documentation for features is adapted from the Rust reference. +//! //! [microarchitecture level]: https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels pub mod adx; diff --git a/fearless_simd_core/src/x86/sse/fxsr.rs b/fearless_simd_core/src/x86/sse/fxsr.rs index d7dfe791..50fa9a79 100644 --- a/fearless_simd_core/src/x86/sse/fxsr.rs +++ b/fearless_simd_core/src/x86/sse/fxsr.rs @@ -62,7 +62,6 @@ impl Fxsr { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("fxsr") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse.rs b/fearless_simd_core/src/x86/sse/sse.rs index 8f40bb76..5937ece6 100644 --- a/fearless_simd_core/src/x86/sse/sse.rs +++ b/fearless_simd_core/src/x86/sse/sse.rs @@ -61,7 +61,6 @@ impl Sse { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse2.rs b/fearless_simd_core/src/x86/sse/sse2.rs index 089bda1a..1d151568 100644 --- a/fearless_simd_core/src/x86/sse/sse2.rs +++ b/fearless_simd_core/src/x86/sse/sse2.rs @@ -61,7 +61,6 @@ impl Sse2 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse2") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse3.rs b/fearless_simd_core/src/x86/sse/sse3.rs index c86b454c..c501bc3a 100644 --- a/fearless_simd_core/src/x86/sse/sse3.rs +++ b/fearless_simd_core/src/x86/sse/sse3.rs @@ -61,7 +61,6 @@ impl Sse3 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse3") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse4_1.rs b/fearless_simd_core/src/x86/sse/sse4_1.rs index cbce281f..451a45d3 100644 --- a/fearless_simd_core/src/x86/sse/sse4_1.rs +++ b/fearless_simd_core/src/x86/sse/sse4_1.rs @@ -61,7 +61,6 @@ impl Sse4_1 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse4.1") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse4_2.rs b/fearless_simd_core/src/x86/sse/sse4_2.rs index 6054559d..feba99a8 100644 --- a/fearless_simd_core/src/x86/sse/sse4_2.rs +++ b/fearless_simd_core/src/x86/sse/sse4_2.rs @@ -61,7 +61,6 @@ impl Sse4_2 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse4.2") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse4a.rs b/fearless_simd_core/src/x86/sse/sse4a.rs index d6bf8769..2bb3e346 100644 --- a/fearless_simd_core/src/x86/sse/sse4a.rs +++ b/fearless_simd_core/src/x86/sse/sse4a.rs @@ -61,7 +61,6 @@ impl Sse4a { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse4a") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/ssse3.rs b/fearless_simd_core/src/x86/sse/ssse3.rs index 45305bf7..22b35582 100644 --- a/fearless_simd_core/src/x86/sse/ssse3.rs +++ b/fearless_simd_core/src/x86/sse/ssse3.rs @@ -61,7 +61,6 @@ impl SupplementalSse3 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("ssse3") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v1/level.rs b/fearless_simd_core/src/x86/v1/level.rs index 889db039..e431a73b 100644 --- a/fearless_simd_core/src/x86/v1/level.rs +++ b/fearless_simd_core/src/x86/v1/level.rs @@ -92,7 +92,6 @@ impl V1 { } } } -// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v1::Fxsr { fn from(value: V1) -> Self { diff --git a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs index 1b6d002a..1fd68ceb 100644 --- a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs +++ b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs @@ -61,7 +61,6 @@ impl Cmpxchg16b { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("cmpxchg16b") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v2/level.rs b/fearless_simd_core/src/x86/v2/level.rs index 97c66b7b..f3455c4e 100644 --- a/fearless_simd_core/src/x86/v2/level.rs +++ b/fearless_simd_core/src/x86/v2/level.rs @@ -110,7 +110,6 @@ impl V2 { } } } -// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v2::Cmpxchg16b { fn from(value: V2) -> Self { diff --git a/fearless_simd_core/src/x86/v2/popcnt.rs b/fearless_simd_core/src/x86/v2/popcnt.rs index d7c71c75..29066049 100644 --- a/fearless_simd_core/src/x86/v2/popcnt.rs +++ b/fearless_simd_core/src/x86/v2/popcnt.rs @@ -61,7 +61,6 @@ impl Popcnt { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("popcnt") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/bmi1.rs b/fearless_simd_core/src/x86/v3/bmi1.rs index f887a9dc..875a1002 100644 --- a/fearless_simd_core/src/x86/v3/bmi1.rs +++ b/fearless_simd_core/src/x86/v3/bmi1.rs @@ -61,7 +61,6 @@ impl Bmi1 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("bmi1") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/bmi2.rs b/fearless_simd_core/src/x86/v3/bmi2.rs index af3eeefe..b83d5d44 100644 --- a/fearless_simd_core/src/x86/v3/bmi2.rs +++ b/fearless_simd_core/src/x86/v3/bmi2.rs @@ -61,7 +61,6 @@ impl Bmi2 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("bmi2") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/f16c.rs b/fearless_simd_core/src/x86/v3/f16c.rs index 53ad8318..1733f5ab 100644 --- a/fearless_simd_core/src/x86/v3/f16c.rs +++ b/fearless_simd_core/src/x86/v3/f16c.rs @@ -63,7 +63,6 @@ impl F16c { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("f16c") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/fma.rs b/fearless_simd_core/src/x86/v3/fma.rs index 44d01709..09479f33 100644 --- a/fearless_simd_core/src/x86/v3/fma.rs +++ b/fearless_simd_core/src/x86/v3/fma.rs @@ -63,7 +63,6 @@ impl Fma { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("fma") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/level.rs b/fearless_simd_core/src/x86/v3/level.rs index f24edb4e..c5d2f8c7 100644 --- a/fearless_simd_core/src/x86/v3/level.rs +++ b/fearless_simd_core/src/x86/v3/level.rs @@ -147,7 +147,6 @@ impl V3 { } } } -// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v3::Avx { fn from(value: V3) -> Self { diff --git a/fearless_simd_core/src/x86/v3/lzcnt.rs b/fearless_simd_core/src/x86/v3/lzcnt.rs index d5283fc6..f81f8df4 100644 --- a/fearless_simd_core/src/x86/v3/lzcnt.rs +++ b/fearless_simd_core/src/x86/v3/lzcnt.rs @@ -61,7 +61,6 @@ impl Lzcnt { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("lzcnt") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/movbe.rs b/fearless_simd_core/src/x86/v3/movbe.rs index 91ee22bb..37df1e1f 100644 --- a/fearless_simd_core/src/x86/v3/movbe.rs +++ b/fearless_simd_core/src/x86/v3/movbe.rs @@ -61,7 +61,6 @@ impl Movbe { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("movbe") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v4/level.rs b/fearless_simd_core/src/x86/v4/level.rs index 1e6cad45..db146467 100644 --- a/fearless_simd_core/src/x86/v4/level.rs +++ b/fearless_simd_core/src/x86/v4/level.rs @@ -158,7 +158,6 @@ impl V4 { } } } -// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v4::Avx { fn from(value: V4) -> Self { diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs index 1507efb7..d24692c2 100644 --- a/fearless_simd_core/src/x86/xsave/xsave.rs +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -61,7 +61,6 @@ impl Xsave { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("xsave") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs index b84f7912..5b91126b 100644 --- a/fearless_simd_core/src/x86/xsave/xsavec.rs +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -61,7 +61,6 @@ impl Xsavec { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("xsavec") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs index 3454d9ca..00505619 100644 --- a/fearless_simd_core/src/x86/xsave/xsaveopt.rs +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -61,7 +61,6 @@ impl Xsaveopt { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("xsaveopt") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs index c5277468..000054c7 100644 --- a/fearless_simd_core/src/x86/xsave/xsaves.rs +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -61,7 +61,6 @@ impl Xsaves { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("xsaves") { // Safety: The required CPU feature was detected. From 3af92fb7af801435e5a090bf50fcbd1dc58224b3 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Mon, 3 Nov 2025 11:58:36 +0000 Subject: [PATCH 19/19] Add SSE4a and TBM with MSRV 1.91 --- .clippy.toml | 2 +- .github/workflows/ci.yml | 4 ++-- CHANGELOG.md | 2 +- Cargo.toml | 3 +-- README.md | 2 +- fearless_simd/README.md | 2 +- fearless_simd_core/README.md | 2 +- fearless_simd_core/src/x86/discontinued/mod.rs | 5 ++--- fearless_simd_core/src/x86/discontinued/tbm.rs | 3 +-- fearless_simd_core/src/x86/sse/mod.rs | 5 ++--- fearless_simd_core/src/x86/sse/sse4a.rs | 1 - 11 files changed, 13 insertions(+), 18 deletions(-) diff --git a/.clippy.toml b/.clippy.toml index 89821835..bee24f27 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -9,4 +9,4 @@ trivial-copy-size-limit = 16 # END LINEBENDER LINT SET -doc-valid-idents = ["ShangMi", ".."] +doc-valid-idents = ["ShangMi", "SSE4a", ".."] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 229965e2..24420569 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,12 +3,12 @@ env: # version like 1.70. Note that we only specify MAJOR.MINOR and not PATCH so that bugfixes still # come automatically. If the version specified here is no longer the latest stable version, # then please feel free to submit a PR that adjusts it along with the potential clippy fixes. - RUST_STABLE_VER: "1.90" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7 + RUST_STABLE_VER: "1.91" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7 # The purpose of checking with the minimum supported Rust toolchain is to detect its staleness. # If the compilation fails, then the version specified here needs to be bumped up to reality. # Be sure to also update the rust-version property in the workspace Cargo.toml file, # plus all the README.md files of the affected packages. - RUST_MIN_VER: "1.89" + RUST_MIN_VER: "1.91" # List of packages that will be checked with the minimum supported Rust version. # This should be limited to packages that are intended for publishing. RUST_MIN_VER_PKGS: "-p fearless_simd -p fearless_simd_core" diff --git a/CHANGELOG.md b/CHANGELOG.md index db6d6a96..9ad6c367 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ You can find its changes [documented below](#030-2025-10-14). ## [Unreleased] -This release has an [MSRV][] of 1.89. +This release has an [MSRV][] of 1.91. ### Added diff --git a/Cargo.toml b/Cargo.toml index eb1d3283..b9e817fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,8 +15,7 @@ license = "Apache-2.0 OR MIT" repository = "https://github.com/linebender/fearless_simd" # Keep in sync with RUST_MIN_VER in .github/workflows/ci.yml, with the relevant README.md files # and with the MSRV in the `Unreleased` section of CHANGELOG.md. -# When increasing past 1.91, also uncomment the `discontinued::tbm` and `sse::sse4a` modules/imports in Fearless SIMD Core. -rust-version = "1.89" +rust-version = "1.91" [workspace.lints] diff --git a/README.md b/README.md index 4749d6f9..dcccda46 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ It benefited from conversations with Luca Versari, though he is not responsible ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/fearless_simd/README.md b/fearless_simd/README.md index 953e4827..f1f3ed19 100644 --- a/fearless_simd/README.md +++ b/fearless_simd/README.md @@ -139,7 +139,7 @@ At least one of `std` and `libm` is required; `std` overrides `libm`. ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/fearless_simd_core/README.md b/fearless_simd_core/README.md index 45052d28..142fea2c 100644 --- a/fearless_simd_core/README.md +++ b/fearless_simd_core/README.md @@ -78,7 +78,7 @@ with `#[target_feature]`, and a call to this newly generated function. ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/fearless_simd_core/src/x86/discontinued/mod.rs b/fearless_simd_core/src/x86/discontinued/mod.rs index f1cc4f63..1072d9ca 100644 --- a/fearless_simd_core/src/x86/discontinued/mod.rs +++ b/fearless_simd_core/src/x86/discontinued/mod.rs @@ -8,6 +8,5 @@ //! //! For more information, see -// These will be stabilised in 1.91. -// mod tbm; -// pub use tbm::Tbm; +mod tbm; +pub use tbm::Tbm; diff --git a/fearless_simd_core/src/x86/discontinued/tbm.rs b/fearless_simd_core/src/x86/discontinued/tbm.rs index 65deed65..797afca4 100644 --- a/fearless_simd_core/src/x86/discontinued/tbm.rs +++ b/fearless_simd_core/src/x86/discontinued/tbm.rs @@ -42,7 +42,7 @@ impl Debug for Tbm { // Safety: This token can only be constructed if you have proof that all the requisite // target feature is enabled. unsafe impl TargetFeatureToken for Tbm { - const FEATURES: &[&str] = &["tbm", ]; + const FEATURES: &[&str] = &["tbm"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { @@ -88,7 +88,6 @@ impl Tbm { } } - const _: () = { assert!( core::mem::size_of::() == 0, diff --git a/fearless_simd_core/src/x86/sse/mod.rs b/fearless_simd_core/src/x86/sse/mod.rs index ecd7d84b..d3d6b5e5 100644 --- a/fearless_simd_core/src/x86/sse/mod.rs +++ b/fearless_simd_core/src/x86/sse/mod.rs @@ -29,9 +29,8 @@ pub use sse3::Sse3; mod ssse3; pub use ssse3::SupplementalSse3; -// These will be stabilised in 1.91. -// mod sse4a; -// pub use sse4a::Sse4a; +mod sse4a; +pub use sse4a::Sse4a; mod sse4_1; pub use sse4_1::Sse4_1; diff --git a/fearless_simd_core/src/x86/sse/sse4a.rs b/fearless_simd_core/src/x86/sse/sse4a.rs index 2bb3e346..8c8abc80 100644 --- a/fearless_simd_core/src/x86/sse/sse4a.rs +++ b/fearless_simd_core/src/x86/sse/sse4a.rs @@ -109,7 +109,6 @@ impl From for crate::x86::sse::Sse3 { } } - const _: () = { assert!( core::mem::size_of::() == 0,