From 2b2ad090f63bba9a261d572238af2706b32b8aa9 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Tue, 14 Oct 2025 12:42:54 +0100
Subject: [PATCH 01/19] Save the version before running the x86 generator
---
Cargo.lock | 15 +-
Cargo.toml | 2 +
fearless_simd_core/Cargo.toml | 17 +
fearless_simd_core/gen/Cargo.toml | 6 +
fearless_simd_core/gen/src/data.rs | 2 +
fearless_simd_core/gen/src/data/x86.rs | 370 ++++++++++++++++++++
fearless_simd_core/gen/src/main.rs | 166 +++++++++
fearless_simd_core/gen/templates/aarch64.rs | 0
fearless_simd_core/gen/templates/x86.rs | 90 +++++
fearless_simd_core/src/lib.rs | 282 +++++++++++++++
fearless_simd_core/src/trampoline.rs | 231 ++++++++++++
fearless_simd_core/src/x86/mod.rs | 15 +
fearless_simd_core/src/x86/v1/fxsr.rs | 80 +++++
fearless_simd_core/src/x86/v1/mod.rs | 38 ++
fearless_simd_core/src/x86/v1/sse.rs | 90 +++++
15 files changed, 1402 insertions(+), 2 deletions(-)
create mode 100644 fearless_simd_core/Cargo.toml
create mode 100644 fearless_simd_core/gen/Cargo.toml
create mode 100644 fearless_simd_core/gen/src/data.rs
create mode 100644 fearless_simd_core/gen/src/data/x86.rs
create mode 100644 fearless_simd_core/gen/src/main.rs
create mode 100644 fearless_simd_core/gen/templates/aarch64.rs
create mode 100644 fearless_simd_core/gen/templates/x86.rs
create mode 100644 fearless_simd_core/src/lib.rs
create mode 100644 fearless_simd_core/src/trampoline.rs
create mode 100644 fearless_simd_core/src/x86/mod.rs
create mode 100644 fearless_simd_core/src/x86/v1/fxsr.rs
create mode 100644 fearless_simd_core/src/x86/v1/mod.rs
create mode 100644 fearless_simd_core/src/x86/v1/sse.rs
diff --git a/Cargo.lock b/Cargo.lock
index 161950a6..672a0913 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -60,9 +60,9 @@ checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee"
[[package]]
name = "bytemuck"
-version = "1.23.1"
+version = "1.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422"
+checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4"
[[package]]
name = "cc"
@@ -133,6 +133,17 @@ dependencies = [
"libm",
]
+[[package]]
+name = "fearless_simd_core"
+version = "0.1.0"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "fearless_simd_core_gen"
+version = "0.1.0"
+
[[package]]
name = "fearless_simd_dev_macros"
version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 81395978..e84d0a2e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,6 +2,8 @@
resolver = "2"
members = [
"fearless_simd",
+ "fearless_simd_core",
+ "fearless_simd_core/gen",
"fearless_simd_dev_macros",
"fearless_simd_gen",
"fearless_simd_tests",
diff --git a/fearless_simd_core/Cargo.toml b/fearless_simd_core/Cargo.toml
new file mode 100644
index 00000000..e16a9823
--- /dev/null
+++ b/fearless_simd_core/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "fearless_simd_core"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+
+[dev-dependencies]
+bytemuck = { version = "1.24.0", features = ["must_cast"] }
+
+[lints]
+workspace = true
+
+[features]
+default = ["std"]
+std = []
diff --git a/fearless_simd_core/gen/Cargo.toml b/fearless_simd_core/gen/Cargo.toml
new file mode 100644
index 00000000..5617f7be
--- /dev/null
+++ b/fearless_simd_core/gen/Cargo.toml
@@ -0,0 +1,6 @@
+[package]
+name = "fearless_simd_core_gen"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
diff --git a/fearless_simd_core/gen/src/data.rs b/fearless_simd_core/gen/src/data.rs
new file mode 100644
index 00000000..87b9ec46
--- /dev/null
+++ b/fearless_simd_core/gen/src/data.rs
@@ -0,0 +1,2 @@
+mod x86;
+pub(crate) use x86::{X86_FEATURES, X86_TEMPLATE};
diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs
new file mode 100644
index 00000000..39e59990
--- /dev/null
+++ b/fearless_simd_core/gen/src/data/x86.rs
@@ -0,0 +1,370 @@
+use crate::Feature;
+
+macro_rules! f {
+ ($(#[doc = $doc_addition: literal])*
+ struct ::$module: ident:: $struct_name: ident($display_name: literal): $feature_name: literal + [$($implicitly_enabled: literal),*]
+ fn $example_function_name: ident
+ $($additional_impls: tt)*
+ ) => {
+ Feature {
+ struct_name: stringify!($struct_name),
+ feature_name: $feature_name,
+ directly_implicitly_enabled: &[$($implicitly_enabled),*],
+ extra_docs: concat!($($doc_addition, "\n",)*),
+ example_function_name: stringify!($example_function_name),
+ feature_docs_name: $display_name,
+ additional_impls: stringify!($($additional_impls)*),
+ module: stringify!($module)
+ }
+ }
+}
+
+pub(crate) const X86_TEMPLATE: &str = include_str!("../../templates/x86.rs");
+
+// Data taken from: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86
+// (specifically, at https://github.com/rust-lang/reference/blob/1d930e1d5a27e114b4d22a50b0b6cd3771b92e31/src/attributes/codegen.md#x86-or-x86_64)
+// TODO: Do we need to add their license attribution to our license?
+// TODO: Check set against https://doc.rust-lang.org/stable/std/macro.is_x86_feature_detected.html
+// In particular, we're missing lahfsahf
+pub(crate) const X86_FEATURES: &[Feature] = &[
+ f!(
+ /// [ADX] --- Multi-Precision Add-Carry Instruction Extensions
+ /// [ADX]: https://en.wikipedia.org/wiki/Intel_ADX
+ struct ::adx::Adx("ADX"): "adx" + []
+ fn uses_adx
+ ),
+ f!(
+ /// [AES] --- Advanced Encryption Standard
+ /// [AES]: https://en.wikipedia.org/wiki/AES_instruction_set
+ struct ::crypto::Aes("AES"): "aes" + ["sse2"]
+ fn uses_aes
+ ),
+ f!(
+ /// [AVX] --- Advanced Vector Extensions
+ /// [AVX]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
+ struct ::avx::Avx("AVX"): "avx" + ["sse4.2"]
+ fn uses_avx
+ ),
+ f!(
+ /// [AVX2] --- Advanced Vector Extensions 2
+ /// [AVX2]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX2
+ struct ::avx::Avx2("AVX2"): "avx2" + ["avx"]
+ fn uses_avx2
+ ),
+ f!(
+ /// [AVX512-BF16] --- Advanced Vector Extensions 512-bit - Bfloat16 Extensions
+ /// [AVX512-BF16]: https://en.wikipedia.org/wiki/AVX-512#BF16
+ struct ::avx512::Avx512bf16("AVX512-BF16"): "avx512bf16" + ["avx512bw"]
+ fn uses_avx512bf16
+ ),
+ f!(
+ /// [AVX512-BITALG] --- Advanced Vector Extensions 512-bit - Bit Algorithms
+ /// [AVX512-BITALG]: https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG
+ struct ::avx512::Avx512bitalg("AVX512-BITALG"): "avx512bitalg" + ["avx512bw"]
+ fn uses_avx512bitalg
+ ),
+ f!(
+ /// [AVX512-BW] --- Advanced Vector Extensions 512-bit - Byte and Word Instructions
+ /// [AVX512-BW]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI
+ struct ::avx512::Avx512bw("AVX512-BW"): "avx512bw" + ["avx512f"]
+ fn uses_avx512bw
+ ),
+ f!(
+ /// [AVX512-CD] --- Advanced Vector Extensions 512-bit - Conflict Detection Instructions
+ /// [AVX512-CD]: https://en.wikipedia.org/wiki/AVX-512#Conflict_detection
+ struct ::avx512::Avx512cd("AVX512-CD"): "avx512cd" + ["avx512f"]
+ fn uses_avx512cd
+ ),
+ f!(
+ /// [AVX512-DQ] --- Advanced Vector Extensions 512-bit - Doubleword and Quadword Instructions
+ /// [AVX512-DQ]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI
+ struct ::avx512::Avx512dq("AVX512-DQ"): "avx512dq" + ["avx512f"]
+ fn uses_avx512dq
+ ),
+ f!(
+ /// [AVX512-F] --- Advanced Vector Extensions 512-bit - Foundation
+ /// [AVX512-F]: https://en.wikipedia.org/wiki/AVX-512
+ struct ::avx512::Avx512f("AVX512-F"): "avx512f" + ["avx2", "fma", "f16c"]
+ fn uses_avx512f
+ ),
+ f!(
+ /// [AVX512-FP16] --- Advanced Vector Extensions 512-bit - Float16 Extensions
+ /// [AVX512-FP16]: https://en.wikipedia.org/wiki/AVX-512#FP16
+ struct ::avx512::Avx512fp16("AVX512-FP16"): "avx512fp16" + ["avx512bw"]
+ fn uses_avx512fp16
+ ),
+ f!(
+ /// [AVX512-IFMA] --- Advanced Vector Extensions 512-bit - Integer Fused Multiply Add
+ /// [AVX512-IFMA]: https://en.wikipedia.org/wiki/AVX-512#IFMA
+ struct ::avx512::Avx512ifma("AVX512-IFMA"): "avx512ifma" + ["avx512f"]
+ fn uses_avx512ifma
+ ),
+ f!(
+ /// [AVX512-VBMI] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions
+ /// [AVX512-VBMI]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI
+ struct ::avx512::Avx512vbmi("AVX512-VBMI"): "avx512vbmi" + ["avx512bw"]
+ fn uses_avx512vbmi
+ ),
+ f!(
+ /// [AVX512-VBMI2] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions 2
+ /// [AVX512-VBMI2]: https://en.wikipedia.org/wiki/AVX-512#VBMI2
+ struct ::avx512::Avx512vbmi2("AVX512-VBMI2"): "avx512vbmi2" + ["avx512bw"]
+ fn uses_avx512vbmi2
+ ),
+ f!(
+ /// [AVX512-VL] --- Advanced Vector Extensions 512-bit - Vector Length Extensions
+ /// [AVX512-VL]: https://en.wikipedia.org/wiki/AVX-512
+ struct ::avx512::Avx512vl("AVX512-VL"): "avx512vl" + ["avx512f"]
+ fn uses_avx512vl
+ ),
+ f!(
+ /// [AVX512-VNNI] --- Advanced Vector Extensions 512-bit - Vector Neural Network Instructions
+ /// [AVX512-VNNI]: https://en.wikipedia.org/wiki/AVX-512#VNNI
+ struct ::avx512::Avx512vnni("AVX512-VNNI"): "avx512vnni" + ["avx512f"]
+ fn uses_avx512vnni
+ ),
+ f!(
+ /// [AVX512-VP2INTERSECT] --- Advanced Vector Extensions 512-bit - Vector Pair Intersection to a Pair of Mask Registers
+ /// [AVX512-VP2INTERSECT]: https://en.wikipedia.org/wiki/AVX-512#VP2INTERSECT
+ struct ::avx512::Avx512vp2intersect("AVX512-VP2INTERSECT"): "avx512vp2intersect" + ["avx512f"]
+ fn uses_avx512vp2intersect
+ ),
+ f!(
+ /// [AVX512-VPOPCNTDQ] --- Advanced Vector Extensions 512-bit - Vector Population Count Instruction
+ /// [AVX512-VPOPCNTDQ]:https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG
+ struct ::avx512::Avx512vpopcntdq("AVX512-VPOPCNTDQ"): "avx512vpopcntdq" + ["avx512f"]
+ fn uses_avx512vpopcntdq
+ ),
+ f!(
+ /// [AVX-IFMA] --- Advanced Vector Extensions - Integer Fused Multiply Add
+ /// [AVX-IFMA]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+ struct ::avx::Avxifma("AVX-IFMA"): "avxifma" + ["avx2"]
+ fn uses_avxifma
+ ),
+ f!(
+ /// [AVX-NE-CONVERT] --- Advanced Vector Extensions - No-Exception Floating-Point conversion Instructions
+ /// [AVX-NE-CONVERT]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+ struct ::avx::Avxneconvert("AVX-NE-CONVERT"): "avxneconvert" + ["avx2"]
+ fn uses_avxneconvert
+ ),
+ f!(
+ /// [AVX-VNNI] --- Advanced Vector Extensions - Vector Neural Network Instructions
+ /// [AVX-VNNI]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+ struct ::avx::Avxvnni("AVX-VNNI"): "avxvnni" + ["avx2"]
+ fn uses_avxvnni
+ ),
+ f!(
+ /// [AVX-VNNI-INT16] --- Advanced Vector Extensions - Vector Neural Network Instructions with 16-bit Integers
+ /// [AVX-VNNI-INT16]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+ struct ::avx::Avxvnniint16("AVX-VNNI-INT16"): "avxvnniint16" + ["avx2"]
+ fn uses_avxvnniint16
+ ),
+ f!(
+ /// [AVX-VNNI-INT8] --- Advanced Vector Extensions - Vector Neural Network Instructions with 8-bit Integers
+ /// [AVX-VNNI-INT8]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+ struct ::avx::Avxvnniint8("AVX-VNNI-INT8"): "avxvnniint8" + ["avx2"]
+ fn uses_avxvnniint8
+ ),
+ f!(
+ /// [BMI1] --- Bit Manipulation Instruction Sets
+ /// [BMI1]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets
+ struct ::v3::Bmi1(" 1"): "bmi1" + []
+ fn uses_bmi1
+ ),
+ f!(
+ /// [BMI2] --- Bit Manipulation Instruction Sets 2
+ /// [BMI2]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2
+ struct ::v3::Bmi2("BMI2"): "bmi2" + []
+ fn uses_bmi2
+ ),
+ f!(
+ /// ["cmpxchg16b"] --- Compares and exchange 16 bytes (128 bits) of data atomically
+ /// ["cmpxchg16b"]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b
+ struct ::v2::Cmpxchg16b("`cmpxchg16b`"): "cmpxchg16b" + []
+ fn uses_cmpxchg16b
+ ),
+ f!(
+ /// [F16C] --- 16-bit floating point conversion instructions
+ /// [F16C]: https://en.wikipedia.org/wiki/F16C
+ struct ::v3::F16c("F16C"): "f16c" + ["avx"]
+ fn uses_f16c
+ ),
+ f!(
+ /// [FMA3] --- Three-operand fused multiply-add
+ /// [FMA3]: https://en.wikipedia.org/wiki/FMA_instruction_set
+ struct ::v3::Fma("FMA3"): "fma" + ["avx"]
+ fn uses_fma
+ ),
+ f!(
+ /// ["fxsave"] and ["fxrstor"] --- Save and restore x87 FPU, MMX Technology, and SSE State
+ /// ["fxsave"]: https://www.felixcloutier.com/x86/fxsave,
+ struct ::sse::Fxsr("`fxsave + fxrstor`"): "fxsr" + []
+ fn uses_fxsr
+ ),
+ f!(
+ /// [GFNI] --- Galois Field New Instructions
+ /// [GFNI]: https://en.wikipedia.org/wiki/AVX-512#GFNI
+ struct ::crypto::Gfni("GFNI"): "gfni" + ["sse2"]
+ fn uses_gfni
+ ),
+ f!(
+ /// [KEYLOCKER] --- Intel Key Locker Instructions
+ /// [KEYLOCKER]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions
+ struct ::crypto::Keylocker("KEYLOCKER"): "kl" + []
+ fn uses_keylocker
+ ),
+ f!(
+ /// ["lzcnt"] --- Leading zeros count
+ /// ["lzcnt"]: https://www.felixcloutier.com/x86/lzcnt
+ struct ::v3::Lzcnt("`lzcnt`"): "lzcnt" + []
+ fn uses_lzcnt
+ ),
+ f!(
+ /// ["movbe"] --- Move data after swapping bytes
+ /// ["movbe"]: https://www.felixcloutier.com/x86/movbe
+ struct ::v3::Movbe("`movbe`"): "movbe" + []
+ fn uses_movbe
+ ),
+ f!(
+ /// ["pclmulqdq"] --- Packed carry-less multiplication quadword
+ /// ["pclmulqdq"]: https://www.felixcloutier.com/x86/pclmulqdq
+ struct ::crypto::Pclmulqdq("`pclmulqdq`"): "pclmulqdq" + ["sse2"]
+ fn uses_pclmulqdq
+ ),
+ f!(
+ /// ["popcnt"] --- Count of bits set to 1
+ /// ["popcnt"]: https://www.felixcloutier.com/x86/popcnt
+ struct ::v2::Popcnt("`popcnt`"): "popcnt" + []
+ fn uses_popcnt
+ ),
+ f!(
+ /// ["rdrand"] --- Read random number
+ /// ["rdrand"]: https://en.wikipedia.org/wiki/RdRand
+ struct ::crypto::Rdrand("`rdrand`"): "rdrand" + []
+ fn uses_rdrand
+ ),
+ f!(
+ /// ["rdseed"] --- Read random seed
+ /// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand
+ struct ::crypto::Rdseed("`rdseed"): "rdseed" + []
+ fn uses_rdseed
+ ),
+ f!(
+ /// [SHA] --- Secure Hash Algorithm
+ /// [SHA]: https://en.wikipedia.org/wiki/Intel_SHA_extensions
+ struct ::crypto::Sha("SHA"): "sha" + ["sse2"]
+ fn uses_sha
+ ),
+ f!(
+ /// [SHA512] --- Secure Hash Algorithm with 512-bit digest
+ /// [SHA512]: https://en.wikipedia.org/wiki/Intel_SHA_extensions
+ struct ::crypto::Sha512("SHA512"): "sha512" + ["avx2"]
+ fn uses_sha512
+ ),
+ f!(
+ /// [SM3] --- ShangMi 3 Hash Algorithm
+ /// [SM3]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions
+ struct ::crypto::Sm3("SM3"): "sm3" + ["avx"]
+ fn uses_sm3
+ ),
+ f!(
+ /// [SM4] --- ShangMi 4 Cipher Algorithm
+ /// [SM4]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions
+ struct ::crypto::Sm4("SM4"): "sm4" + ["avx2"]
+ fn uses_sm4
+ ),
+ f!(
+ /// [SSE] --- Streaming SIMD Extensions
+ /// [SSE]: https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions
+ struct ::sse::Sse("SSE"): "sse" + []
+ fn uses_sse
+ ),
+ f!(
+ /// [SSE2] --- Streaming SIMD Extensions 2
+ /// [SSE2]: https://en.wikipedia.org/wiki/SSE2
+ struct ::sse::Sse2("SSE2"): "sse2" + ["sse"]
+ fn uses_sse2
+ ),
+ f!(
+ /// [SSE3] --- Streaming SIMD Extensions 3
+ /// [SSE3]: https://en.wikipedia.org/wiki/SSE3
+ struct ::sse::Sse3("SSE3"): "sse3" + ["sse2"]
+ fn uses_sse3
+ ),
+ f!(
+ /// [SSE4.1] --- Streaming SIMD Extensions 4.1
+ /// [SSE4.1]: https://en.wikipedia.org/wiki/SSE4#SSE4.1
+ struct ::sse::Sse4_1("SSE4.1"): "sse4.1" + ["ssse3"]
+ fn uses_sse4
+ ),
+ f!(
+ /// [SSE4.2] --- StreamingSIMDExtensions 4.2
+ /// [SSE4.2]: https://en.wikipedia.org/wiki/SSE4#SSE4.2
+ struct ::sse::Sse4_2("SSE4.2"): "sse4.2" + ["sse4.1"]
+ fn uses_sse4
+ ),
+ // // TODO: This only exists from 1.91 and above (current beta)
+ // f!(
+ // /// [SSE4a] --- StreamingSIMDExtensions 4a
+ // /// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a
+ // struct Sse4a("SSE4a"): "sse4a" + ["sse3"]
+ // fn uses_sse4a
+ // ),
+ f!(
+ /// [SSSE3] --- Supplemental StreamingSIMDExtensions 3
+ /// [SSSE3]: https://en.wikipedia.org/wiki/SSSE3
+ struct ::sse::SupplementalSse3("SSSE3"): "ssse3" + ["sse3"]
+ fn uses_ssse3
+ ),
+ f!(
+ /// [TBM] --- Trailing Bit Manipulation
+ /// [TBM]: https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#TBM_(Trailing_Bit_Manipulation)
+ struct ::discontinued::Tbm("TBM"): "tbm" + []
+ fn uses_tbm
+ ),
+ f!(
+ /// [VAES] --- Vector AES Instructions
+ /// [VAES]: https://en.wikipedia.org/wiki/AVX-512#VAES
+ struct ::crypto::Vaes("VAES"): "vaes" + ["avx2", "aes"]
+ fn uses_vaes
+ ),
+ f!(
+ /// [VPCLMULQDQ] --- Vector Carry-less multiplication of Quadwords
+ /// [VPCLMULQDQ]: https://en.wikipedia.org/wiki/AVX-512#VPCLMULQDQ
+ struct ::crypto::Vpclmulqdq("VPCLMULQDQ"): "vpclmulqdq" + ["avx", "pclmulqdq"]
+ fn uses_vpclmulqdq
+ ),
+ f!(
+ /// [KEYLOCKER_WIDE] --- Intel Wide Keylocker Instructions
+ /// [KEYLOCKER_WIDE]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions
+ struct ::crypto::WideKeylocker("KEYLOCKER_WIDE"): "widekl" + ["kl"]
+ fn uses_wide_keylocker
+ ),
+ f!(
+ /// [`xsave`] --- Save processor extended states
+ /// ["xsave"]: https://www.felixcloutier.com/x86/xsave
+ struct ::xsave::Xsave("`xsave`"): "xsave" + []
+ fn uses_xsave
+ ),
+ f!(
+ /// ["xsavec"] --- Save processor extended states with compaction
+ /// ["xsavec"]: https://www.felixcloutier.com/x86/xsavec
+ struct ::xsave::Xsavec("`xsavec`"): "xsavec" + []
+ fn uses_xsavec
+ ),
+ f!(
+ /// ["xsaveopt"] --- Save processor extended states optimized
+ /// ["xsaveopt"]: https://www.felixcloutier.com/x86/xsaveopt
+ struct ::xsave::Xsaveopt("`xsaveopt`"): "xsaveopt" + []
+ fn uses_xsaveopt
+ ),
+ f!(
+ /// ["xsaves"] --- Save processor extended states supervisor
+ /// ["xsaves"]: https://www.felixcloutier.com/x86/xsaves
+ struct ::xsave::Xsaves("`xsaves`"): "xsaves" + []
+ fn uses_xsaves
+ ),
+];
+
+#[test]
+fn all_features_included() {}
diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs
new file mode 100644
index 00000000..2961bd33
--- /dev/null
+++ b/fearless_simd_core/gen/src/main.rs
@@ -0,0 +1,166 @@
+mod data;
+
+use std::fmt::{Write, format};
+use std::fs;
+use std::{
+ cell::RefCell,
+ collections::HashMap,
+ fs::create_dir_all,
+ io,
+ path::{Path, PathBuf},
+};
+
+fn main() {
+ let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+ let src_dir = manifest_dir.ancestors().nth(1).unwrap().join("src");
+ generate_for_arch(&src_dir, "x86", data::X86_TEMPLATE, data::X86_FEATURES).unwrap();
+}
+
+fn generate_for_arch(
+ root_dir: &Path,
+ arch_module_name: &str,
+ template: &str,
+ features: &'static [Feature],
+) -> io::Result<()> {
+ let arch_dir = root_dir.join(arch_module_name);
+ let features = normalize_features(features);
+ for feature in &features {
+ let mut new_docs = String::new();
+ for line in feature.feature.extra_docs.lines() {
+ writeln!(&mut new_docs, "///{line}").unwrap();
+ }
+ let enabled_feature_docs = format!("`{}`", feature.children.join("`, `"));
+ let enabled_feature_str_list = format!(r#""{}""#, feature.children.join(r#"", ""#));
+ let mut from_impls = String::new();
+ for child in &feature.children {
+ let from_feature = features
+ .iter()
+ .find(|it| it.feature.feature_name == *child)
+ .unwrap();
+ let type_path = format!(
+ "crate::{arch_module_name}::{}::{}",
+ from_feature.feature.module, from_feature.feature.struct_name
+ );
+ write!(
+ from_impls,
+ r#"\n\
+ impl From for {type_path} {{
+ fn from(value: Self) -> {type_path} {{
+ trampoline!([Self = value] => "{{FEATURE_ID}}", fn() -> {type_path} {{ {{type_path}}::new() }})
+ }}
+ }}\n
+ "#
+ ).unwrap();
+ }
+ let mut result = format!(
+ "// This file is automatically generated by `fearless_simd_core_gen`.\n\
+ // Its template can be found in `fearless_simd_core/gen/templates`.\n\n\
+ {template}"
+ );
+ // We replace the from impls first, as they use template variables from the rest of this.
+ result = result.replace("/*{FROM_IMPLS}*/", &from_impls);
+ result = result.replace("{FEATURE_DOCS_NAME}", feature.feature.feature_docs_name);
+ result = result.replace("/// {NEW_DOCS}", &new_docs);
+ result = result.replace("{FEATURE_ID}", feature.feature.feature_name);
+ result = result.replace("{ENABLED_FEATURES_DOCS_LIST}", &enabled_feature_docs);
+ result = result.replace(
+ "{EXAMPLE_FUNCTION_NAME}",
+ feature.feature.example_function_name,
+ );
+ result = result.replace("FEATURE_STRUCT_NAME", feature.feature.struct_name);
+ result = result.replace("{ENABLED_FEATURES_STR_LIST}", &enabled_feature_str_list);
+ let module_dir = arch_dir.join(feature.feature.module);
+ create_dir_all(&module_dir)?;
+ let mut file = module_dir.join(feature.feature.feature_name);
+ file.set_extension("rs");
+ fs::write(file, result)?;
+ }
+ Ok(())
+}
+
+#[derive(Debug)]
+struct Feature {
+ /// The name of the struct to be generated.
+ struct_name: &'static str,
+ /// The Rust name for the feature, e.g. `"sse"`.
+ feature_name: &'static str,
+ /// The array of features which are implicitly enabled by this feature.
+ /// Note that this array does not include transitive enabled features.
+ directly_implicitly_enabled: &'static [&'static str],
+ /// Any additional docs which we want to add to the module.
+ extra_docs: &'static str,
+ /// The name of the function used in the examples.
+ /// Ideally, we'd make this optional, but that starts making the templating look more complicated.
+ example_function_name: &'static str,
+ /// The "display name" for the feature, used inside the docs.
+ feature_docs_name: &'static str,
+ /// Extra code added at the end.
+ /// Used for implicitly enabled features.
+ additional_impls: &'static str,
+ /// The module (if any) this feature will belong to.
+ ///
+ /// (Note that imports into the module are checked to exist, but not automatically inserted).
+ module: &'static str,
+}
+
+/// Implementation detail intermediate struct of `normalize_features`.
+struct MaybeNormalizedFeature {
+ /// The actual feature.
+ feature: &'static Feature,
+ /// The fully deduplicated, sorted list of target features enabled by this feature, including with all
+ /// implicitly enabled features resolved.
+ ///
+ /// Note that this *excludes* the parent target feature.
+ // We use a RefCell here as we know there cannot be loops.
+ children: RefCell