diff --git a/.clippy.toml b/.clippy.toml
index 4781d68c..bee24f27 100644
--- a/.clippy.toml
+++ b/.clippy.toml
@@ -8,3 +8,5 @@
trivial-copy-size-limit = 16
# END LINEBENDER LINT SET
+
+doc-valid-idents = ["ShangMi", "SSE4a", ".."]
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a6a3d50f..24420569 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -3,15 +3,15 @@ env:
# version like 1.70. Note that we only specify MAJOR.MINOR and not PATCH so that bugfixes still
# come automatically. If the version specified here is no longer the latest stable version,
# then please feel free to submit a PR that adjusts it along with the potential clippy fixes.
- RUST_STABLE_VER: "1.88" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7
+ RUST_STABLE_VER: "1.91" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7
# The purpose of checking with the minimum supported Rust toolchain is to detect its staleness.
# If the compilation fails, then the version specified here needs to be bumped up to reality.
# Be sure to also update the rust-version property in the workspace Cargo.toml file,
# plus all the README.md files of the affected packages.
- RUST_MIN_VER: "1.88"
+ RUST_MIN_VER: "1.91"
# List of packages that will be checked with the minimum supported Rust version.
# This should be limited to packages that are intended for publishing.
- RUST_MIN_VER_PKGS: "-p fearless_simd"
+ RUST_MIN_VER_PKGS: "-p fearless_simd -p fearless_simd_core"
# List of features that depend on the standard library and will be excluded from no_std checks.
FEATURES_DEPENDING_ON_STD: "std,default"
# List of packages that can not target Wasm.
@@ -192,6 +192,13 @@ jobs:
- name: run code generator
run: cargo run --bin fearless_simd_gen
+ - name: run core code generator
+ run: cargo run --bin fearless_simd_core_gen
+
+ - name: Reformat (Fearless SIMD Core)
+ # The code generator for Fearless SIMD Core does not do this.
+ run: cargo fmt -p fearless_simd_core
+
- name: check for uncommitted changes
run: git diff --exit-code
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14c1fb20..9ad6c367 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,7 +13,7 @@ You can find its changes [documented below](#030-2025-10-14).
## [Unreleased]
-This release has an [MSRV][] of 1.88.
+This release has an [MSRV][] of 1.91.
### Added
diff --git a/Cargo.lock b/Cargo.lock
index 161950a6..ce10218f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -60,9 +60,9 @@ checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee"
[[package]]
name = "bytemuck"
-version = "1.23.1"
+version = "1.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422"
+checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4"
[[package]]
name = "cc"
@@ -133,6 +133,17 @@ dependencies = [
"libm",
]
+[[package]]
+name = "fearless_simd_core"
+version = "0.3.0"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "fearless_simd_core_gen"
+version = "0.0.0"
+
[[package]]
name = "fearless_simd_dev_macros"
version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 81395978..b9e817fd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,6 +2,8 @@
resolver = "2"
members = [
"fearless_simd",
+ "fearless_simd_core",
+ "fearless_simd_core/gen",
"fearless_simd_dev_macros",
"fearless_simd_gen",
"fearless_simd_tests",
@@ -13,7 +15,7 @@ license = "Apache-2.0 OR MIT"
repository = "https://github.com/linebender/fearless_simd"
# Keep in sync with RUST_MIN_VER in .github/workflows/ci.yml, with the relevant README.md files
# and with the MSRV in the `Unreleased` section of CHANGELOG.md.
-rust-version = "1.88"
+rust-version = "1.91"
[workspace.lints]
diff --git a/README.md b/README.md
index 7c3d95fe..dcccda46 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ It benefited from conversations with Luca Versari, though he is not responsible
## Minimum supported Rust Version (MSRV)
-This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later.
+This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later.
Future versions of Fearless SIMD might increase the Rust version requirement.
It will not be treated as a breaking change and as such can even happen with small patch releases.
diff --git a/fearless_simd/README.md b/fearless_simd/README.md
index 160ce842..f1f3ed19 100644
--- a/fearless_simd/README.md
+++ b/fearless_simd/README.md
@@ -139,7 +139,7 @@ At least one of `std` and `libm` is required; `std` overrides `libm`.
## Minimum supported Rust Version (MSRV)
-This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later.
+This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later.
Future versions of Fearless SIMD might increase the Rust version requirement.
It will not be treated as a breaking change and as such can even happen with small patch releases.
diff --git a/fearless_simd_core/Cargo.toml b/fearless_simd_core/Cargo.toml
new file mode 100644
index 00000000..1342c218
--- /dev/null
+++ b/fearless_simd_core/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "fearless_simd_core"
+version = "0.3.0"
+description = "Safely run custom #[target_feature] functions"
+keywords = ["simd", "target_feature"]
+categories = ["hardware-support"]
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+
+[dev-dependencies]
+bytemuck = { version = "1.24.0", features = ["must_cast"] }
+
+[lints]
+workspace = true
+
+[features]
+default = ["std"]
+std = []
diff --git a/fearless_simd_core/LICENSE-APACHE b/fearless_simd_core/LICENSE-APACHE
new file mode 100644
index 00000000..d9a10c0d
--- /dev/null
+++ b/fearless_simd_core/LICENSE-APACHE
@@ -0,0 +1,176 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
diff --git a/fearless_simd_core/LICENSE-MIT b/fearless_simd_core/LICENSE-MIT
new file mode 100644
index 00000000..f3d84348
--- /dev/null
+++ b/fearless_simd_core/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2018 Raph Levien
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/fearless_simd_core/README.md b/fearless_simd_core/README.md
new file mode 100644
index 00000000..142fea2c
--- /dev/null
+++ b/fearless_simd_core/README.md
@@ -0,0 +1,105 @@
+
+
+# Fearless SIMD Core
+
+**Target Features in Rust's type system**
+
+[](https://crates.io/crates/fearless_simd)
+[](https://docs.rs/fearless_simd)
+[](#license)
+\
+[](https://xi.zulipchat.com/#narrow/channel/514230-simd)
+[](https://github.com/linebender/fearless_simd/actions)
+[](https://deps.rs/crate/fearless_simd/)
+
+
+
+
+
+
+
+
+
+An abstraction to allow safely running custom `#[target_feature]` functions on stable Rust.
+
+This crate introduces the [`trampoline!`] macro, which allows running code in a
+statically validated `#[target_feature(enable="some_features")]` environment, based on
+externally provided tokens.
+This abstraction is designed to be combined with target features 1.1, the recent update
+in the Rust compiler to allow calling `#[target_feature]` functions safely from within
+other `#[target_feature]` functions.
+As such, once you have used the [`trampoline!`] macro, you can call any intrinsic in [`core::arch`].
+
+This crate also has modules which contain tokens for each Rust target features.
+These allow safely validating that a target feature is available, and obtaining a token.
+These are grouped by architecture:
+
+- [`x86`] contains the tokens for both the x86 and x86-64 targets.
+ It also contains tokens for each x86-64 microarchitecture level, see [`x86::V1`] for details.
+
+
+# Examples
+
+At the time of writing, it is not possible to turn scalar values into SIMD vector types safely using
+only the standard library.
+These examples use [bytemuck](https://crates.io/crates/bytemuck) for this.
+
+
+
+Note that for `aarch64`'s neon, you will want to enable bytemuck's `aarch64_simd` feature.
+This is also the case for WASM with `wasm_simd`, but note that this crate
+[isn't needed on WASM][attributes.codegen.target_feature.wasm], as it is safe to
+call `#[target_features]` on that platform.
+
+# Crate Feature Flags
+
+
+
+# Implementation
+
+The tokens provided to [`trampoline!`] implement the [`TargetFeatureToken`] trait,
+which indicates that a value of that token is only possible to construct if the set
+of target features it specifies are enabled.
+This means that the macro can use the existence of these token values as
+safety proofs that calling a function with those target features is safe.
+
+This safety proof happens entirely in const evaluation, so if there's a mistake with the
+proof, it will cause a compilation error.
+The code generated by this macro is thus a function containing the provided code, marked
+with `#[target_feature]`, and a call to this newly generated function.
+
+[attributes.codegen.target_feature.wasm]: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.wasm
+
+
+
+## Minimum supported Rust Version (MSRV)
+
+This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later.
+
+Future versions of Fearless SIMD might increase the Rust version requirement.
+It will not be treated as a breaking change and as such can even happen with small patch releases.
+
+## Community
+
+[](https://xi.zulipchat.com/#narrow/channel/514230-simd)
+
+Discussion of Fearless SIMD development happens in the [Linebender Zulip](https://xi.zulipchat.com/), specifically in [#simd](https://xi.zulipchat.com/#narrow/channel/514230-simd).
+All public content can be read without logging in.
+
+Contributions are welcome by pull request.
+The [Rust code of conduct] applies.
+
+## License
+
+Licensed under either of
+
+- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or )
+- MIT license ([LICENSE-MIT](LICENSE-MIT) or )
+
+at your option.
+
+[Rust Code of Conduct]: https://www.rust-lang.org/policies/code-of-conduct
diff --git a/fearless_simd_core/gen/Cargo.toml b/fearless_simd_core/gen/Cargo.toml
new file mode 100644
index 00000000..65644e6e
--- /dev/null
+++ b/fearless_simd_core/gen/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "fearless_simd_core_gen"
+description = "Internal code generator for the Fearless SIMD Core crate."
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+
+publish = false
+
+[dependencies]
diff --git a/fearless_simd_core/gen/src/data.rs b/fearless_simd_core/gen/src/data.rs
new file mode 100644
index 00000000..fb5726bb
--- /dev/null
+++ b/fearless_simd_core/gen/src/data.rs
@@ -0,0 +1,7 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+mod x86;
+pub(crate) use x86::{
+ X86_FEATURES, X86_LEVEL_TEMPLATE, X86_TEMPLATE, X86_V1, X86_V2, X86_V3, X86_V4,
+};
diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs
new file mode 100644
index 00000000..841dac60
--- /dev/null
+++ b/fearless_simd_core/gen/src/data/x86.rs
@@ -0,0 +1,451 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+use crate::Feature;
+
+macro_rules! f {
+ ($(#[doc = $doc_addition: literal])*
+ struct $module: ident:: $struct_name: ident($display_name: literal): $feature_name: literal + [$($implicitly_enabled: literal),*]
+ fn $example_function_name: ident
+ ) => {
+ Feature {
+ struct_name: stringify!($struct_name),
+ feature_name: $feature_name,
+ directly_implicitly_enabled: &[$($implicitly_enabled),*],
+ extra_docs: concat!($($doc_addition, "\n",)*),
+ example_function_name: stringify!($example_function_name),
+ feature_docs_name: $display_name,
+ module: stringify!($module)
+ }
+ }
+}
+
+pub(crate) const X86_TEMPLATE: &str = include_str!("../../templates/x86.rs");
+
+// Data adapted from: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86
+// (specifically, at https://github.com/rust-lang/reference/blob/1d930e1d5a27e114b4d22a50b0b6cd3771b92e31/src/attributes/codegen.md#x86-or-x86_64)
+// TODO: Check set against https://doc.rust-lang.org/stable/std/macro.is_x86_feature_detected.html
+// In particular, we seem to be missing lahfsahf (not stable?)
+pub(crate) const X86_FEATURES: &[Feature] = &[
+ f!(
+ /// [ADX] --- Multi-Precision Add-Carry Instruction Extensions
+ ///
+ /// [ADX]: https://en.wikipedia.org/wiki/Intel_ADX
+ struct adx::Adx("ADX"): "adx" + []
+ fn uses_adx
+ ),
+ f!(
+ /// [AES] --- Advanced Encryption Standard
+ ///
+ /// [AES]: https://en.wikipedia.org/wiki/AES_instruction_set
+ struct crypto::Aes("AES"): "aes" + ["sse2"]
+ fn uses_aes
+ ),
+ f!(
+ /// [AVX] --- Advanced Vector Extensions
+ ///
+ /// [AVX]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
+ struct avx::Avx("AVX"): "avx" + ["sse4.2"]
+ fn uses_avx
+ ),
+ f!(
+ /// [AVX2] --- Advanced Vector Extensions 2
+ ///
+ /// [AVX2]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX2
+ struct avx::Avx2("AVX2"): "avx2" + ["avx"]
+ fn uses_avx2
+ ),
+ f!(
+ /// [AVX512-BF16] --- Advanced Vector Extensions 512-bit - Bfloat16 Extensions
+ ///
+ /// [AVX512-BF16]: https://en.wikipedia.org/wiki/AVX-512#BF16
+ struct avx512::Avx512bf16("AVX512-BF16"): "avx512bf16" + ["avx512bw"]
+ fn uses_avx512bf16
+ ),
+ f!(
+ /// [AVX512-BITALG] --- Advanced Vector Extensions 512-bit - Bit Algorithms
+ ///
+ ///
+ /// [AVX512-BITALG]: https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG
+ struct avx512::Avx512bitalg("AVX512-BITALG"): "avx512bitalg" + ["avx512bw"]
+ fn uses_avx512bitalg
+ ),
+ f!(
+ /// [AVX512-BW] --- Advanced Vector Extensions 512-bit - Byte and Word Instructions
+ ///
+ /// [AVX512-BW]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI
+ struct avx512::Avx512bw("AVX512-BW"): "avx512bw" + ["avx512f"]
+ fn uses_avx512bw
+ ),
+ f!(
+ /// [AVX512-CD] --- Advanced Vector Extensions 512-bit - Conflict Detection Instructions
+ ///
+ /// [AVX512-CD]: https://en.wikipedia.org/wiki/AVX-512#Conflict_detection
+ struct avx512::Avx512cd("AVX512-CD"): "avx512cd" + ["avx512f"]
+ fn uses_avx512cd
+ ),
+ f!(
+ /// [AVX512-DQ] --- Advanced Vector Extensions 512-bit - Doubleword and Quadword Instructions
+ ///
+ /// [AVX512-DQ]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI
+ struct avx512::Avx512dq("AVX512-DQ"): "avx512dq" + ["avx512f"]
+ fn uses_avx512dq
+ ),
+ f!(
+ /// [AVX512-F] --- Advanced Vector Extensions 512-bit - Foundation
+ ///
+ /// [AVX512-F]: https://en.wikipedia.org/wiki/AVX-512
+ struct avx512::Avx512f("AVX512-F"): "avx512f" + ["avx2", "fma", "f16c"]
+ fn uses_avx512f
+ ),
+ f!(
+ /// [AVX512-FP16] --- Advanced Vector Extensions 512-bit - Float16 Extensions
+ ///
+ /// [AVX512-FP16]: https://en.wikipedia.org/wiki/AVX-512#FP16
+ struct avx512::Avx512fp16("AVX512-FP16"): "avx512fp16" + ["avx512bw"]
+ fn uses_avx512fp16
+ ),
+ f!(
+ /// [AVX512-IFMA] --- Advanced Vector Extensions 512-bit - Integer Fused Multiply Add
+ ///
+ /// [AVX512-IFMA]: https://en.wikipedia.org/wiki/AVX-512#IFMA
+ struct avx512::Avx512ifma("AVX512-IFMA"): "avx512ifma" + ["avx512f"]
+ fn uses_avx512ifma
+ ),
+ f!(
+ /// [AVX512-VBMI] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions
+ ///
+ /// [AVX512-VBMI]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI
+ struct avx512::Avx512vbmi("AVX512-VBMI"): "avx512vbmi" + ["avx512bw"]
+ fn uses_avx512vbmi
+ ),
+ f!(
+ /// [AVX512-VBMI2] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions 2
+ ///
+ /// [AVX512-VBMI2]: https://en.wikipedia.org/wiki/AVX-512#VBMI2
+ struct avx512::Avx512vbmi2("AVX512-VBMI2"): "avx512vbmi2" + ["avx512bw"]
+ fn uses_avx512vbmi2
+ ),
+ f!(
+ /// [AVX512-VL] --- Advanced Vector Extensions 512-bit - Vector Length Extensions
+ ///
+ /// [AVX512-VL]: https://en.wikipedia.org/wiki/AVX-512
+ struct avx512::Avx512vl("AVX512-VL"): "avx512vl" + ["avx512f"]
+ fn uses_avx512vl
+ ),
+ f!(
+ /// [AVX512-VNNI] --- Advanced Vector Extensions 512-bit - Vector Neural Network Instructions
+ ///
+ /// [AVX512-VNNI]: https://en.wikipedia.org/wiki/AVX-512#VNNI
+ struct avx512::Avx512vnni("AVX512-VNNI"): "avx512vnni" + ["avx512f"]
+ fn uses_avx512vnni
+ ),
+ f!(
+ /// [AVX512-VP2INTERSECT] --- Advanced Vector Extensions 512-bit - Vector Pair Intersection to a Pair of Mask Registers
+ ///
+ /// [AVX512-VP2INTERSECT]: https://en.wikipedia.org/wiki/AVX-512#VP2INTERSECT
+ struct avx512::Avx512vp2intersect("AVX512-VP2INTERSECT"): "avx512vp2intersect" + ["avx512f"]
+ fn uses_avx512vp2intersect
+ ),
+ f!(
+ /// [AVX512-VPOPCNTDQ] --- Advanced Vector Extensions 512-bit - Vector Population Count Instruction
+ ///
+ /// [AVX512-VPOPCNTDQ]:https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG
+ struct avx512::Avx512vpopcntdq("AVX512-VPOPCNTDQ"): "avx512vpopcntdq" + ["avx512f"]
+ fn uses_avx512vpopcntdq
+ ),
+ f!(
+ /// [AVX-IFMA] --- Advanced Vector Extensions - Integer Fused Multiply Add
+ ///
+ /// [AVX-IFMA]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+ struct avx::Avxifma("AVX-IFMA"): "avxifma" + ["avx2"]
+ fn uses_avxifma
+ ),
+ f!(
+ /// [AVX-NE-CONVERT] --- Advanced Vector Extensions - No-Exception Floating-Point conversion Instructions
+ ///
+ /// [AVX-NE-CONVERT]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+ struct avx::Avxneconvert("AVX-NE-CONVERT"): "avxneconvert" + ["avx2"]
+ fn uses_avxneconvert
+ ),
+ f!(
+ /// [AVX-VNNI] --- Advanced Vector Extensions - Vector Neural Network Instructions
+ ///
+ /// [AVX-VNNI]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+ struct avx::Avxvnni("AVX-VNNI"): "avxvnni" + ["avx2"]
+ fn uses_avxvnni
+ ),
+ f!(
+ /// [AVX-VNNI-INT16] --- Advanced Vector Extensions - Vector Neural Network Instructions with 16-bit Integers
+ ///
+ /// [AVX-VNNI-INT16]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+ struct avx::Avxvnniint16("AVX-VNNI-INT16"): "avxvnniint16" + ["avx2"]
+ fn uses_avxvnniint16
+ ),
+ f!(
+ /// [AVX-VNNI-INT8] --- Advanced Vector Extensions - Vector Neural Network Instructions with 8-bit Integers
+ ///
+ /// [AVX-VNNI-INT8]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+ struct avx::Avxvnniint8("AVX-VNNI-INT8"): "avxvnniint8" + ["avx2"]
+ fn uses_avxvnniint8
+ ),
+ f!(
+ /// [BMI1] --- Bit Manipulation Instruction Sets
+ ///
+ /// [BMI1]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets
+ struct v3::Bmi1(" 1"): "bmi1" + []
+ fn uses_bmi1
+ ),
+ f!(
+ /// [BMI2] --- Bit Manipulation Instruction Sets 2
+ ///
+ /// [BMI2]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2
+ struct v3::Bmi2("BMI2"): "bmi2" + []
+ fn uses_bmi2
+ ),
+ f!(
+ /// [`cmpxchg16b`] --- Compares and exchange 16 bytes (128 bits) of data atomically
+ ///
+ /// [`cmpxchg16b`]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b
+ struct v2::Cmpxchg16b("`cmpxchg16b`"): "cmpxchg16b" + []
+ fn uses_cmpxchg16b
+ ),
+ f!(
+ /// [F16C] --- 16-bit floating point conversion instructions
+ ///
+ /// [F16C]: https://en.wikipedia.org/wiki/F16C
+ struct v3::F16c("F16C"): "f16c" + ["avx"]
+ fn uses_f16c
+ ),
+ f!(
+ /// [FMA3] --- Three-operand fused multiply-add
+ ///
+ /// [FMA3]: https://en.wikipedia.org/wiki/FMA_instruction_set
+ struct v3::Fma("FMA3"): "fma" + ["avx"]
+ fn uses_fma
+ ),
+ f!(
+ /// [`fxsave`] and [`fxrstor`] --- Save and restore x87 FPU, MMX Technology, and SSE State
+ ///
+ /// [`fxsave`]: https://www.felixcloutier.com/x86/fxsave,
+ /// [`fxrstor`]: https://www.felixcloutier.com/x86/fxrstor,
+ struct sse::Fxsr("`fxsave + fxrstor`"): "fxsr" + []
+ fn uses_fxsr
+ ),
+ f!(
+ /// [GFNI] --- Galois Field New Instructions
+ ///
+ /// [GFNI]: https://en.wikipedia.org/wiki/AVX-512#GFNI
+ struct crypto::Gfni("GFNI"): "gfni" + ["sse2"]
+ fn uses_gfni
+ ),
+ f!(
+ /// [KEYLOCKER] --- Intel Key Locker Instructions
+ ///
+ /// [KEYLOCKER]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions
+ struct crypto::Keylocker("KEYLOCKER"): "kl" + []
+ fn uses_keylocker
+ ),
+ f!(
+ /// [`lzcnt`] --- Leading zeros count
+ ///
+ /// [`lzcnt`]: https://www.felixcloutier.com/x86/lzcnt
+ struct v3::Lzcnt("`lzcnt`"): "lzcnt" + []
+ fn uses_lzcnt
+ ),
+ f!(
+ /// [`movbe`] --- Move data after swapping bytes
+ ///
+ /// [`movbe`]: https://www.felixcloutier.com/x86/movbe
+ struct v3::Movbe("`movbe`"): "movbe" + []
+ fn uses_movbe
+ ),
+ f!(
+ /// [`pclmulqdq`] --- Packed carry-less multiplication quadword
+ ///
+ /// [`pclmulqdq`]: https://www.felixcloutier.com/x86/pclmulqdq
+ struct crypto::Pclmulqdq("`pclmulqdq`"): "pclmulqdq" + ["sse2"]
+ fn uses_pclmulqdq
+ ),
+ f!(
+ /// [`popcnt`] --- Count of bits set to 1
+ ///
+ /// [`popcnt`]: https://www.felixcloutier.com/x86/popcnt
+ struct v2::Popcnt("`popcnt`"): "popcnt" + []
+ fn uses_popcnt
+ ),
+ f!(
+ /// [`rdrand`] --- Read random number
+ ///
+ /// [`rdrand`]: https://en.wikipedia.org/wiki/RdRand
+ struct crypto::Rdrand("`rdrand`"): "rdrand" + []
+ fn uses_rdrand
+ ),
+ f!(
+ /// [`rdseed`] --- Read random seed
+ ///
+ /// [`rdseed`]: https://en.wikipedia.org/wiki/RdRand
+ struct crypto::Rdseed("`rdseed`"): "rdseed" + []
+ fn uses_rdseed
+ ),
+ f!(
+ /// [SHA] --- Secure Hash Algorithm
+ ///
+ /// [SHA]: https://en.wikipedia.org/wiki/Intel_SHA_extensions
+ struct crypto::Sha("SHA"): "sha" + ["sse2"]
+ fn uses_sha
+ ),
+ f!(
+ /// [SHA512] --- Secure Hash Algorithm with 512-bit digest
+ ///
+ /// [SHA512]: https://en.wikipedia.org/wiki/Intel_SHA_extensions
+ struct crypto::Sha512("SHA512"): "sha512" + ["avx2"]
+ fn uses_sha512
+ ),
+ f!(
+ /// [SM3] --- ShangMi 3 Hash Algorithm
+ ///
+ /// [SM3]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions
+ struct crypto::Sm3("SM3"): "sm3" + ["avx"]
+ fn uses_sm3
+ ),
+ f!(
+ /// [SM4] --- ShangMi 4 Cipher Algorithm
+ ///
+ /// [SM4]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions
+ struct crypto::Sm4("SM4"): "sm4" + ["avx2"]
+ fn uses_sm4
+ ),
+ f!(
+ /// [SSE] --- Streaming SIMD Extensions
+ ///
+ /// [SSE]: https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions
+ struct sse::Sse("SSE"): "sse" + []
+ fn uses_sse
+ ),
+ f!(
+ /// [SSE2] --- Streaming SIMD Extensions 2
+ ///
+ /// [SSE2]: https://en.wikipedia.org/wiki/SSE2
+ struct sse::Sse2("SSE2"): "sse2" + ["sse"]
+ fn uses_sse2
+ ),
+ f!(
+ /// [SSE3] --- Streaming SIMD Extensions 3
+ ///
+ /// [SSE3]: https://en.wikipedia.org/wiki/SSE3
+ struct sse::Sse3("SSE3"): "sse3" + ["sse2"]
+ fn uses_sse3
+ ),
+ f!(
+ /// [SSE4.1] --- Streaming SIMD Extensions 4.1
+ ///
+ /// [SSE4.1]: https://en.wikipedia.org/wiki/SSE4#SSE4.1
+ struct sse::Sse4_1("SSE4.1"): "sse4.1" + ["ssse3"]
+ fn uses_sse4
+ ),
+ f!(
+ /// [SSE4.2] --- StreamingSIMD Extensions 4.2
+ ///
+ /// [SSE4.2]: https://en.wikipedia.org/wiki/SSE4#SSE4.2
+ struct sse::Sse4_2("SSE4.2"): "sse4.2" + ["sse4.1"]
+ fn uses_sse4
+ ),
+ f!(
+ /// [SSE4a] --- StreamingSIMD Extensions 4a
+ ///
+ /// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a
+ struct sse::Sse4a("SSE4a"): "sse4a" + ["sse3"]
+ fn uses_sse4a
+ ),
+ f!(
+ /// [SSSE3] --- Supplemental StreamingSIMD Extensions 3
+ ///
+ /// [SSSE3]: https://en.wikipedia.org/wiki/SSSE3
+ struct sse::SupplementalSse3("SSSE3"): "ssse3" + ["sse3"]
+ fn uses_ssse3
+ ),
+ f!(
+ /// [TBM] --- Trailing Bit Manipulation
+ ///
+ /// [TBM]: https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#TBM_(Trailing_Bit_Manipulation)
+ struct discontinued::Tbm("TBM"): "tbm" + []
+ fn uses_tbm
+ ),
+ f!(
+ /// [VAES] --- Vector AES Instructions
+ ///
+ /// [VAES]: https://en.wikipedia.org/wiki/AVX-512#VAES
+ struct crypto::Vaes("VAES"): "vaes" + ["avx2", "aes"]
+ fn uses_vaes
+ ),
+ f!(
+ /// [VPCLMULQDQ] --- Vector Carry-less multiplication of Quadwords
+ ///
+ /// [VPCLMULQDQ]: https://en.wikipedia.org/wiki/AVX-512#VPCLMULQDQ
+ struct crypto::Vpclmulqdq("VPCLMULQDQ"): "vpclmulqdq" + ["avx", "pclmulqdq"]
+ fn uses_vpclmulqdq
+ ),
+ f!(
+ /// [KEYLOCKER_WIDE] --- Intel Wide Keylocker Instructions
+ ///
+ /// [KEYLOCKER_WIDE]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions
+ struct crypto::WideKeylocker("WIDE KEYLOCKER"): "widekl" + ["kl"]
+ fn uses_wide_keylocker
+ ),
+ f!(
+ /// [`xsave`] --- Save processor extended states
+ ///
+ /// [`xsave`]: https://www.felixcloutier.com/x86/xsave
+ struct xsave::Xsave("`xsave`"): "xsave" + []
+ fn uses_xsave
+ ),
+ f!(
+ /// [`xsavec`] --- Save processor extended states with compaction
+ ///
+ /// [`xsavec`]: https://www.felixcloutier.com/x86/xsavec
+ struct xsave::Xsavec("`xsavec`"): "xsavec" + []
+ fn uses_xsavec
+ ),
+ f!(
+ /// [`xsaveopt`] --- Save processor extended states optimized
+ ///
+ /// [`xsaveopt`]: https://www.felixcloutier.com/x86/xsaveopt
+ struct xsave::Xsaveopt("`xsaveopt`"): "xsaveopt" + []
+ fn uses_xsaveopt
+ ),
+ f!(
+ /// [`xsaves`] --- Save processor extended states supervisor
+ ///
+ /// [`xsaves`]: https://www.felixcloutier.com/x86/xsaves
+ struct xsave::Xsaves("`xsaves`"): "xsaves" + []
+ fn uses_xsaves
+ ),
+];
+
+// All taken from
+
+pub(crate) const X86_LEVEL_TEMPLATE: &str = include_str!("../../templates/x86_level.rs");
+
+/// The target features required in the x86-64-v1 level.
+// Rust doesn't have target features for "cmov", "cmpxchg8b", "fpu", "sce", and "mmx".
+// The first four are all assumed, and the final is not implemented because
+// it's practically impossible to use correctly (and there's no reason to).
+pub(crate) const X86_V1: &[&str] = &["fxsr", "sse", "sse2"];
+/// The target features required in the x86-64-v1 level, in addition to those already in [`X86_V1`].
+pub(crate) const X86_V2: &[&str] = &[
+ "sse3",
+ "ssse3",
+ "sse4.1",
+ "sse4.2",
+ "popcnt",
+ "cmpxchg16b",
+ // The lahfahf target feature is currently in Rust beta.
+ // "lahfsahf",
+];
+/// The target features required in the x86-64-v3 level, excluding those already in [`X86_V2`].
+pub(crate) const X86_V3: &[&str] = &[
+ "avx", "avx2", "bmi1", "bmi2", "f16c", "fma", "lzcnt", "movbe", "xsave",
+];
+/// The target features required in the x86-64-v4 level, excluding those already in [`X86_V3`].
+pub(crate) const X86_V4: &[&str] = &["avx512f", "avx512bw", "avx512cd", "avx512dq", "avx512vl"];
diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs
new file mode 100644
index 00000000..35d8c233
--- /dev/null
+++ b/fearless_simd_core/gen/src/main.rs
@@ -0,0 +1,354 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+mod data;
+
+use std::collections::HashSet;
+use std::fmt::Write;
+use std::fs;
+use std::hash::RandomState;
+use std::{
+ cell::RefCell,
+ collections::HashMap,
+ fs::create_dir_all,
+ io,
+ path::{Path, PathBuf},
+};
+
+use crate::data::X86_LEVEL_TEMPLATE;
+
+fn main() {
+ let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+ let src_dir = manifest_dir.ancestors().nth(1).unwrap().join("src");
+ {
+ let x86_features = normalize_features(data::X86_FEATURES);
+ generate_for_arch(&src_dir, "x86", data::X86_TEMPLATE, &x86_features).unwrap();
+ let mut features: Vec<&'static str> = Vec::new();
+ features.extend(data::X86_V1);
+ generate_x86_level(&src_dir, "v1", &x86_features, &features).unwrap();
+ features.extend(data::X86_V2);
+ generate_x86_level(&src_dir, "v2", &x86_features, &features).unwrap();
+ features.extend(data::X86_V3);
+ generate_x86_level(&src_dir, "v3", &x86_features, &features).unwrap();
+ features.extend(data::X86_V4);
+ generate_x86_level(&src_dir, "v4", &x86_features, &features).unwrap();
+ }
+}
+
+fn generate_for_arch(
+ root_dir: &Path,
+ arch_module_name: &str,
+ template: &str,
+ features: &[NormalizedFeature],
+) -> io::Result<()> {
+ let arch_dir = root_dir.join(arch_module_name);
+ for feature in features {
+ let mut new_docs = String::new();
+ for line in feature.feature.extra_docs.lines() {
+ writeln!(&mut new_docs, "///{line}").unwrap();
+ }
+ let enabled_feature_str_list = format!(
+ r#""{}", {}"#,
+ feature.feature.feature_name,
+ feature
+ .children
+ .iter()
+ .map(|it| format!(r#""{it}""#))
+ .collect::>()
+ .join(", ")
+ );
+ let mut from_impls = String::new();
+ for child in &feature.children {
+ let from_feature = features
+ .iter()
+ .find(|it| it.feature.feature_name == *child)
+ .unwrap();
+ let type_path = format!(
+ "crate::{arch_module_name}::{}::{}",
+ from_feature.feature.module, from_feature.feature.struct_name
+ );
+ write!(
+ from_impls,
+ "\n\
+impl From for {type_path} {{
+ fn from(value: FEATURE_STRUCT_NAME) -> Self {{
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([FEATURE_STRUCT_NAME = value] => \"{{FEATURE_ID}}\", fn() -> {type_path} {{ {type_path}::new() }})
+ }}
+}}\n"
+ ).unwrap();
+ }
+ let mut result = String::from(template);
+ // We replace the from impls first, as they use template variables from the rest of this.
+ result = result.replace("/*{FROM_IMPLS}*/", &from_impls);
+ result = result.replace("// {AUTOGEN_COMMENT}\n", AUTOGEN_COMMENT);
+ result = result.replace("{FEATURE_DOCS_NAME}", feature.feature.feature_docs_name);
+ result = result.replace("/// {NEW_DOCS}\n", &new_docs);
+ result = result.replace("{FEATURE_ID}", feature.feature.feature_name);
+ result = result.replace(
+ "{EXAMPLE_FUNCTION_NAME}",
+ feature.feature.example_function_name,
+ );
+ result = result.replace("FEATURE_STRUCT_NAME", feature.feature.struct_name);
+ result = result.replace(
+ r#""{ENABLED_FEATURES_STR_LIST}""#,
+ &enabled_feature_str_list,
+ );
+
+ let module_dir = arch_dir.join(feature.feature.module);
+ create_dir_all(&module_dir)?;
+ let mut file = module_dir.join(feature.feature.feature_name.replace(".", "_"));
+ file.set_extension("rs");
+ fs::write(file, result)?;
+ }
+ Ok(())
+}
+
+/// Generate the code for an X86 microarchitecture level.
+fn generate_x86_level(
+ root_dir: &Path,
+ level: &'static str,
+ all_features: &[NormalizedFeature],
+ required_features: &[&'static str],
+) -> io::Result<()> {
+ // Precalculate the sets of features we need to support.
+ // Intermediate value for
+ let mut superset = HashSet::new();
+ for feature in required_features {
+ superset.insert(*feature);
+ let normalized = all_features
+ .iter()
+ .find(|it| it.feature.feature_name == *feature)
+ .unwrap();
+ superset.extend(&normalized.children);
+ }
+
+ // Every single target feature supported on this level, including those implied.
+ // (In all likelihood, this is the same as `required_features`, but I'd rather validate that manually)
+ let mut superset = superset.into_iter().collect::>();
+ superset.sort();
+ let mut lcd = HashSet::<_, RandomState>::from_iter(superset.iter().copied());
+ // We make the assumption that features are a tree, that is, there's no case where `A->B` and `B->A`.
+ // However, even if that didn't hold, we at least use a consistent ordering here.
+ // We test from the superset to be safe; this should be equivalent to using `required_features`, though.
+ for feature in &superset {
+ let normalized = all_features
+ .iter()
+ .find(|it| it.feature.feature_name == *feature)
+ .unwrap();
+ for feature in &normalized.children {
+ // If the feature is a child of another required feature, we know we don't need it for this version.
+ // We don't care whether or not it was actually removed.
+ lcd.remove(*feature);
+ }
+ }
+ // The set of features which are strictly required.
+ // This is used to create the target feature string, so that it can be as short as possible.
+ let mut lcd = lcd.into_iter().collect::>();
+ lcd.sort();
+ // Now that we have lcd and superset, we can preprocess what we need for the actual file.
+
+ let level_struct_name = level.to_uppercase();
+ // The target_feature(enable = "...") string.
+ let lcd_contents = lcd.join(",");
+ // The fields of the new struct.
+ let lcd_field_definitions = lcd
+ .iter()
+ .map(|feature| {
+ let normalized = all_features
+ .iter()
+ .find(|it| it.feature.feature_name == *feature)
+ .unwrap();
+ let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name);
+ let feature = feature.replace(".", "_");
+ format!(
+ "/// The contained proof that {} is available.\n\
+ pub {feature}: {type_path},\n",
+ normalized.feature.feature_docs_name
+ )
+ })
+ .collect::();
+ // The enabled FEATURES.
+ let superset_list = superset
+ .iter()
+ .map(|it| format!(r#""{it}""#))
+ .collect::>()
+ .join(", ");
+ // First argument to `trampoline!`
+ let lcd_trampoline = lcd
+ .iter()
+ .map(|feature| {
+ let normalized = all_features
+ .iter()
+ .find(|it| it.feature.feature_name == *feature)
+ .unwrap();
+ let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name);
+ let feature = feature.replace(".", "_");
+ format!("{type_path} = self.{feature}")
+ })
+ .collect::>()
+ .join(", ");
+ // The version of the struct initializer in `try_new`.
+ let struct_initializer_try_new = lcd
+ .iter()
+ .map(|feature| {
+ let normalized = all_features
+ .iter()
+ .find(|it| it.feature.feature_name == *feature)
+ .unwrap();
+ let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name);
+ let feature = feature.replace(".", "_");
+ // We rely on rustfmt to get the tab spacing right.
+ format!("\t{feature}: {type_path}::try_new()?,\n")
+ })
+ .collect::();
+ // The version of the struct initializer in `new`.
+ let struct_initializer_new = lcd
+ .iter()
+ .map(|feature| {
+ let normalized = all_features
+ .iter()
+ .find(|it| it.feature.feature_name == *feature)
+ .unwrap();
+ let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name);
+ let feature = feature.replace(".", "_");
+ format!("\t{feature}: {type_path}::new(),\n")
+ })
+ .collect::();
+
+ let mut from_impls = String::new();
+ for child in &superset {
+ let from_feature = all_features
+ .iter()
+ .find(|it| it.feature.feature_name == *child)
+ .unwrap();
+ let type_path = format!("crate::x86::{level}::{}", from_feature.feature.struct_name);
+ write!(
+ from_impls,
+ "\n\
+impl From for {type_path} {{
+ fn from(value: LEVEL_STRUCT_NAME) -> Self {{
+ // This serves as a correctness check of the implicitly enabled features.
+ trampoline!([LEVEL_STRUCT_NAME = value] => \"{{LEVEL_FEATURE_LCD_CONTENTS}}\", fn() -> {type_path} {{ {type_path}::new() }})
+ }}
+}}\n"
+ ).unwrap();
+ }
+
+ let mut result = String::from(X86_LEVEL_TEMPLATE);
+ // We replace the from impls first, as they use template variables from the rest of this.
+ result = result.replace("/*{FROM_IMPLS}*/", &from_impls);
+ result = result.replace("// {AUTOGEN_COMMENT}", AUTOGEN_COMMENT);
+ result = result.replace("LEVEL_STRUCT_NAME", &level_struct_name);
+ result = result.replace("{LEVEL_ID}", level);
+ result = result.replace("{LEVEL_FEATURE_LCD_CONTENTS}", &lcd_contents);
+ result = result.replace(
+ "/*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/",
+ &lcd_field_definitions,
+ );
+ result = result.replace(r#""{LEVEL_FEATURE_SUPERSET_LIST}""#, &superset_list);
+ result = result.replace("{LEVEL_FEATURE_LCD_TRAMPOLINE}", &lcd_trampoline);
+
+ result = result.replace(
+ "/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_TRY_NEW}*/",
+ &struct_initializer_try_new,
+ );
+ result = result.replace(
+ "/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_NEW}*/",
+ &struct_initializer_new,
+ );
+
+ let arch_dir = root_dir.join("x86");
+ let module_dir = arch_dir.join(level);
+ create_dir_all(&module_dir)?;
+ let output_path = module_dir.join("level.rs");
+ fs::write(output_path, result)?;
+ Ok(())
+}
+
+const AUTOGEN_COMMENT: &str = "// This file is automatically generated by `fearless_simd_core_gen`.\n\
+ // Its template can be found in `fearless_simd_core/gen/templates`.";
+
+#[derive(Debug)]
+struct Feature {
+ /// The name of the struct to be generated.
+ struct_name: &'static str,
+ /// The Rust name for the feature, e.g. `"sse"`.
+ feature_name: &'static str,
+ /// The array of features which are implicitly enabled by this feature.
+ /// Note that this array does not include transitive enabled features.
+ directly_implicitly_enabled: &'static [&'static str],
+ /// Any additional docs which we want to add to the module.
+ extra_docs: &'static str,
+ /// The name of the function used in the examples.
+ /// Ideally, we'd make this optional, but that starts making the templating look more complicated.
+ example_function_name: &'static str,
+ /// The "display name" for the feature, used inside the docs.
+ feature_docs_name: &'static str,
+ /// The module (if any) this feature will belong to.
+ ///
+ /// (Note that imports into the module are checked to exist, but not automatically inserted).
+ module: &'static str,
+}
+
+/// Implementation detail intermediate struct of `normalize_features`.
+struct MaybeNormalizedFeature {
+ /// The actual feature.
+ feature: &'static Feature,
+ /// The fully deduplicated, sorted list of target features enabled by this feature, including with all
+ /// implicitly enabled features resolved.
+ ///
+ /// Note that this *excludes* the parent target feature.
+ // We use a RefCell here as we know there cannot be loops.
+ children: RefCell>>,
+}
+
+#[derive(Debug)]
+struct NormalizedFeature {
+ feature: &'static Feature,
+ children: Vec<&'static str>,
+}
+
+fn normalize_features(features: &'static [Feature]) -> Vec {
+ let mut state = HashMap::new();
+ for feature in features {
+ state.insert(
+ feature.feature_name,
+ MaybeNormalizedFeature {
+ feature,
+ children: RefCell::new(None),
+ },
+ );
+ }
+ fn handle_item(state: &HashMap<&str, MaybeNormalizedFeature>, item: &MaybeNormalizedFeature) {
+ // We borrow for the entire lifetime to avoid infinite loops.
+ let mut borrowed_children = item.children.borrow_mut();
+ if borrowed_children.is_some() {
+ return;
+ }
+ let mut new_children = Vec::new();
+ for child in item.feature.directly_implicitly_enabled {
+ new_children.push(*child);
+ let child = state
+ .get(child)
+ .expect("Every implicitly enabled feature should exist.");
+ handle_item(state, child);
+ new_children.extend_from_slice(child.children.borrow().as_ref().unwrap());
+ }
+ new_children.sort();
+ new_children.dedup();
+ *borrowed_children = Some(new_children);
+ }
+ for feature in state.values() {
+ handle_item(&state, feature);
+ }
+ let mut output = Vec::new();
+ for (_, feature) in state {
+ output.push(NormalizedFeature {
+ feature: feature.feature,
+ children: feature.children.into_inner().unwrap(),
+ });
+ }
+ output.sort_by_key(|it| it.feature.feature_name);
+ output
+}
diff --git a/fearless_simd_core/gen/templates/aarch64.rs b/fearless_simd_core/gen/templates/aarch64.rs
new file mode 100644
index 00000000..e8fbb89a
--- /dev/null
+++ b/fearless_simd_core/gen/templates/aarch64.rs
@@ -0,0 +1,4 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// {AUTOGEN_COMMENT}
diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs
new file mode 100644
index 00000000..777dcae3
--- /dev/null
+++ b/fearless_simd_core/gen/templates/x86.rs
@@ -0,0 +1,95 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// {AUTOGEN_COMMENT}
+
+//! The {FEATURE_DOCS_NAME} target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// {NEW_DOCS}
+///
+/// A token indicating that the current CPU has the `{FEATURE_ID}` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "{FEATURE_ID}")]
+/// fn {EXAMPLE_FUNCTION_NAME}() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct FEATURE_STRUCT_NAME {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for FEATURE_STRUCT_NAME {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""{FEATURE_ID}" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for FEATURE_STRUCT_NAME {
+ const FEATURES: &[&str] = &["{ENABLED_FEATURES_STR_LIST}"];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([FEATURE_STRUCT_NAME = self] => "{FEATURE_ID}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl FEATURE_STRUCT_NAME {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"{FEATURE_ID}"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("{FEATURE_ID}") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "{FEATURE_ID}")]
+ /// Create a new token for the "{FEATURE_ID}" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// {FEATURE_DOCS_NAME} is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "{FEATURE_ID}" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+/*{FROM_IMPLS}*/
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/gen/templates/x86_level.rs b/fearless_simd_core/gen/templates/x86_level.rs
new file mode 100644
index 00000000..51aaa6d9
--- /dev/null
+++ b/fearless_simd_core/gen/templates/x86_level.rs
@@ -0,0 +1,97 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// {AUTOGEN_COMMENT}
+
+//! The x86-64-{LEVEL_ID} microarchitecture level.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+// TODO: Level specific docs?
+/// A token indicating that the current CPU has the x86-64-{LEVEL_ID} microarchitecture level.
+///
+/// For more details on the microarchitecture levels, see
+/// .
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "{LEVEL_FEATURE_LCD_CONTENTS}")]
+/// fn uses_x86_64_{LEVEL_ID}() {
+/// // ...
+/// }
+/// ```
+///
+/// This struct internally contains only the minimal features required to enable this level.
+/// This is done to ensure that the fewest target features are checked.
+/// However, it can be turned into any target feature it implies using the from impls.
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct LEVEL_STRUCT_NAME {
+ /*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/
+ // This struct explicitly is not non_exhaustive, because it is
+ // completely safe to construct from the fields.
+}
+
+impl Debug for LEVEL_STRUCT_NAME {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#"x86-64-{LEVEL_ID} enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proofs that all the requisite
+// target features are enabled.
+unsafe impl TargetFeatureToken for LEVEL_STRUCT_NAME {
+ const FEATURES: &[&str] = &["{LEVEL_FEATURE_SUPERSET_LIST}"];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // We use the explicitly written out form here as validation that the set of
+ // features we've created correctly mapes to the target feature string.
+ trampoline!([{LEVEL_FEATURE_LCD_TRAMPOLINE}] => "{LEVEL_FEATURE_LCD_CONTENTS}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl LEVEL_STRUCT_NAME {
+ #[cfg(feature = "std")]
+ /// Create a new token if the x86-64-{LEVEL_ID} target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ Some(Self {
+ /*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_TRY_NEW}*/
+ })
+ }
+
+ #[target_feature(enable = "{LEVEL_FEATURE_LCD_CONTENTS}")]
+ /// Create a new token for the x86-64-{LEVEL_ID} microarchitecture level.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// x86-64-{LEVEL_ID} is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "{LEVEL_FEATURE_LCD_CONTENTS}" target feature is available.
+ pub fn new() -> Self {
+ Self {
+ /*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_NEW}*/
+ }
+ }
+}
+
+/*{FROM_IMPLS}*/
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs
new file mode 100644
index 00000000..924bbd0b
--- /dev/null
+++ b/fearless_simd_core/src/lib.rs
@@ -0,0 +1,351 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+//! An abstraction to allow safely running custom `#[target_feature]` functions on stable Rust.
+//!
+//! This crate introduces the [`trampoline!`] macro, which allows running code in a
+//! statically validated `#[target_feature(enable="some_features")]` environment, based on
+//! externally provided tokens.
+//! This abstraction is designed to be combined with target features 1.1, the recent update
+//! in the Rust compiler to allow calling `#[target_feature]` functions safely from within
+//! other `#[target_feature]` functions.
+//! As such, once you have used the [`trampoline!`] macro, you can call any intrinsic in [`core::arch`].
+//!
+//! This crate also has modules which contain a token for each Rust target feature.
+//! These each have a `try_new` constructor, which validates whether the corresponding
+//! target feature is available, then creates a token if it is.
+//! These are grouped by architecture:
+//!
+//! - [`x86`] contains the tokens for both the x86 and x86-64 targets.
+//! It also contains a token for each x86-64 microarchitecture level, see [`x86::V1`] for details.
+//!
+//!
+//! # Examples
+//!
+//! At the time of writing, it is not possible to turn scalar values into SIMD
+//! vector types safely using only the standard library.
+//! These examples use [bytemuck](https://crates.io/crates/bytemuck) for this.
+//!
+//! Note: These examples are currently pending.
+//!
+//!
+//! Note that for `aarch64`'s neon, you will want to enable bytemuck's `aarch64_simd` feature.
+//! This is also the case for WASM with `wasm_simd`, but note that this crate
+//! [isn't needed on WASM][attributes.codegen.target_feature.wasm], as it is safe to
+//! call `#[target_feature]` functions on that platform.
+//!
+//! # Crate Feature Flags
+//!
+//!
+//!
+//! # Implementation
+//!
+//! The tokens provided to [`trampoline!`] implement the [`TargetFeatureToken`] trait,
+//! which indicates that a value of that token is only possible to construct if the set
+//! of target features it specifies are enabled.
+//! This means that the macro can use the existence of these token values as
+//! safety proofs that calling a function with those target features is safe.
+//!
+//! This safety proof happens entirely in const evaluation, so if there's a mistake with the
+//! proof, it will cause a compilation error.
+//! The code generated by this macro is thus a function containing the provided code, marked
+//! with `#[target_feature]`, and a call to this newly generated function.
+//!
+//! [attributes.codegen.target_feature.wasm]: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.wasm
+
+// LINEBENDER LINT SET - lib.rs - v4
+// See https://linebender.org/wiki/canonical-lints/
+// These lints shouldn't apply to examples or tests.
+#![cfg_attr(not(test), warn(unused_crate_dependencies))]
+// These lints shouldn't apply to examples.
+#![warn(clippy::print_stdout, clippy::print_stderr)]
+// Targeting e.g. 32-bit means structs containing usize can give false positives for 64-bit.
+#![cfg_attr(target_pointer_width = "64", warn(clippy::trivially_copy_pass_by_ref))]
+// END LINEBENDER LINT SET
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#![no_std]
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))]
+pub mod x86;
+
+pub mod support;
+
+#[cfg(feature = "std")]
+extern crate std;
+
+/// Token which proves that a set of target feature is available.
+///
+/// Note that this trait is only meaningful when there are values of this type.
+/// That is, to enable the target features in `FEATURES`, you *must* have a value
+/// of this type.
+///
+/// Values which implement this trait are used in the first argument to [`trampoline!`],
+/// which is a safe abstraction over enabling target features.
+///
+/// # Safety
+///
+/// To construct a value of a type implementing this trait, you must have proven that each
+/// target feature in `FEATURES` is available.
+pub unsafe trait TargetFeatureToken: Copy {
+ /// The set of target features which the current CPU has, if
+ /// you have a value of this type.
+ const FEATURES: &[&str];
+
+ /// Enable the target features in `FEATURES` for a single run of `f`, and run it.
+ ///
+ /// `f` must be marked `#[inline(always)]` for this to work.
+ ///
+ /// Note that this does *not* enable the target features on the Rust side (i.e. for calling intrinsics safely).
+ /// To do so, you should instead use [`trampoline!`] directly - this is a convenience wrapper around `trampoline`
+ /// for cases where either autovectorisation is sufficient, or dispatch to simd intrinsics is handled elsewhere.
+ fn vectorize(self, f: impl FnOnce() -> R) -> R;
+}
+
+/// Run an operation in a context with specific target features enabled, validated with [`TargetFeatureToken`] values.
+///
+/// This is effectively a stable implementation of the "Struct Target Features" Rust feature,
+/// which at the time of writing is neither in stable or nightly Rust.
+/// This macro can be used to make both SIMD dispatch and explicit SIMD safe.
+///
+/// # Reference
+///
+/// These reference examples presume that you have the following.
+/// The parts of the examples referring to each prerequisite are provided in the brackets:
+///
+/// - An expression (`token`) of a type (`Token`) which implements `TargetFeatureToken` for some target features (`"f1,f2,f3"`);
+/// - A function (signature `fn uses_simd(val: [f32; 4]) -> [f32; 4]`) which is safe but enables a subset
+/// of those target features (annotated `#[target_feature(enable = "f1,f2")]`);
+/// - Local values of types corresponding to the argument types (`a` of type `[f32; 4]`)
+///
+/// ```rust,ignore
+/// trampoline!(Token = token => "f1,f2,f3", uses_simd(a: [f32; 4]) -> [f32; 4])
+/// // Or equivalently, as `uses_simd` doesn't require `f3`:
+/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]);
+/// ```
+///
+/// Multiple tokens are also supported by providing them in a sequence in square brackets.
+/// The target feature string must be a subset of the total features made available by the tokens:
+///
+/// ```rust,ignore
+/// trampoline!([Token = token, Sse = my_sse] => "f1,f2,sse", uses_simd(a: [f32; 4]) -> [f32; 4])
+/// ```
+///
+/// This is fully validated for safety, so the following example would fail to compile:
+///
+/// ```rust,ignore,compile_fail
+/// // ERROR: call to function `uses_simd` with `#[target_feature]` is unsafe and requires unsafe block
+/// // in order for the call to be safe, the context requires the following additional target feature: f2
+/// trampoline!(Token = token => "f1", uses_simd(a: [f32; 4]) -> [f32; 4]);
+/// ```
+///
+/// A more advanced syntax is available if you need to use generics.
+/// That syntax is explained in comments around the macro's definition.
+/// For reference, the implementation used to implement [`vectorize`](TargetFeatureToken::vectorize) for `"sse"` is:
+///
+/// ```rust,ignore
+/// trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+/// ```
+///
+/// There is also support for a where clause, after the return type.
+///
+/// # Motivation
+///
+/// In Fearless SIMD, this macro is used in three ways primary use cases:
+///
+/// 1) By end-users, to dispatch to a specialised SIMD implementation of a function using target specific
+/// instructions, which will be more efficient than generic version written using the portable subset.
+/// 2) To implement the portable subset of SIMD operations.
+/// 3) To implement the `dispatch!` macro and `Simd::vectorize`, which allows SIMD intrinsics to
+/// be correctly inlined when writing portable SIMD code.
+///
+/// To expand on use case 1, when using Fearless SIMD you will often be writing functions which are
+/// instantiated for multiple different SIMD levels (using generics).
+/// However, for certain SIMD levels, there may be specific instructions which solve your problem more
+/// efficiently than using the generic implementations (as an example, consider SHA256 hashing, which has
+/// built-in instructions on several architectures).
+/// However, in such generic implementations, the Rust type system doesn't know which target features are enabled,
+/// so it would ordinarily require writing code to:
+///
+/// - detect whether a specific target feature is supported.
+/// - unsafely, enter a context where the target feature is enabled in a way which makes the type system aware of this.
+///
+/// This macro provides a way to do the second safely once you have completed the first.
+///
+/// # Example
+///
+/// This expands upon the example in the reference, written out completely.
+///
+/// ```rust,ignore
+/// // Just once, acquire a token.
+/// let token = Token::try_new();
+/// // Later, dispatch based on whether that token is available, potentially multiple times:
+///
+/// /// Perform some computation using SIMD.
+/// #[target_feature(enable = "f1,f2")]
+/// fn uses_simd(val: [f32; 4]) -> [f32; 4] {
+/// // ...
+/// }
+///
+/// let a = [1., 2., 3., 4.];
+/// let Some(token) = token else { return scalar_fallback(a) };
+///
+/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4])
+/// ```
+///
+// TODO: We could write an example for each of ARM, x86, and conditionally compile it in?
+/// Note that our examples are all ignored as there is no target feature which is available on every platform,
+/// but we need these docs to compile for users on any platform.
+///
+/// # Soundness
+///
+/// This macro is designed to be sound, i.e. no input to this macro can lead to undefined behaviour
+/// without using the `unsafe` keyword.
+///
+/// The operation provided will only ever be immediately called once on the same thread as the macro caller,
+/// so safety justifications within the operation can rely on the context of the call site of this macro.
+/// The shorthand format does not allow calling unsafe functions.
+#[macro_export]
+macro_rules! trampoline {
+ // [Sse = sse] for "sse", <(u32)> fn<(T: Int)>(a: [T; 4]) -> T where (...) {...}
+ (
+ // The token types, with an expression to get a value of that token kind.
+ [$($token_type: path = $token: expr),+$(,)?]
+ // The target feature to enable. Must be a string literal.
+ => $to_enable: literal,
+ // The generic arguments to instantiate the call to the generated function with.
+ // Note the inner brackets, needed because we can't write a parser for this in macros.
+ $(<($($generic_instantiation: tt)+)>)?
+ // The generic parameters to give the inner generated function.
+ // Brackets needed as above.
+ fn$(<($($generic_args: tt)*)>)?
+ // The arguments to the function, with provided explicit values, plus return type and where clause.
+ ($($arg_name: ident: $arg_type: ty = $arg_value: expr),*$(,)?) $(-> $ret: ty)?
+ // The where clause of the generated function.
+ // Note the inner brackets after `where`, needed as above.
+ $(where ($($where: tt)*))?
+ // The operation to run inside the context with the target feature enabled.
+ $op: block
+ ) => {{
+ #[target_feature(enable = $to_enable)]
+ #[inline]
+ // TODO: Do we want any other attributes here?
+ // Soundness: We wrap the $op in a wrapping block, to ensure that any inner attributes don't apply to the function.
+ // This ensures that the user can't add `#![target_feature(enable = "xxx")]` to their block.
+ // Soundness: Either of generic_args and `$where` could be used to exit the function item early, so aren't
+ // inside an unsafe block.
+ fn trampoline_impl$(<$($generic_args)*>)?($($arg_name: $arg_type),*) $(-> $ret)? $(where $($where)*)? { $op }
+
+ $(
+ // We validate that we actually have a token of each claimed type.
+ let _: $token_type = $token;
+ )+
+ // We use a const item rather than a const block to ensure that the const evaluation happens eagerly,
+ // ensuring that we don't create functions which look valid but actually will always fail when actually codegenned.
+ // This does mean that you can't use tokens "generically", but it's hard to think of cases where that
+ // would be usable anyway. For any case where that is valid, you can always manually create the
+ // "subsetted" token/tokens beforehand using the `From` impls.
+ const _: () = {
+ // And that the claimed types justify enabling the enabled target features.
+ $crate::support::is_feature_subset($to_enable, [$(<$token_type as $crate::TargetFeatureToken>::FEATURES),+])
+ // TODO: Better failure message here (i.e. at least concatting the set of requested features)
+ .unwrap();
+ };
+
+ $(
+ // Soundness: We use `arg_value` outside of the macro body to ensure it doesn't
+ // accidentally gain an unsafe capability.
+ #[allow(clippy::redundant_locals, reason="Required for consistency/safety.")]
+ let $arg_name = $arg_value;
+ )*
+ // Safety: We have validated that the target features enabled in `trampoline_impl` are enabled,
+ // because we have values of token types which implement $crate::TargetFeatureToken
+ // Soundness: `$generic_args` could be used to exit the path expression early. As `<>` are
+ // not treated as "real" brackets by macros, this isn't practical to detect and avoid statically.
+ // To try and ensure that this can't turn into unsoundess, the
+ // `trampoline_impl::<$generic_instantiation>` is evaluated outside of an unsafe block.
+ // In theory, if a user could make the value of `func` be an `unsafe` fn pointer or
+ // item type, this would still be unsound.
+ // However, we haven't found a way for this to compile given the trailing `>`,
+ // so aren't aware of any actual unsoundess. But note that this hasn't been rigorously proven,
+ // and new Rust features could open this up wider.
+ let func = trampoline_impl$(::<$($generic_instantiation)*>)?;
+ unsafe { func($($arg_name),*) }
+ }};
+ // Sse = sse => "sse", sse_do_x(a: [f32; 4], b: [f32; 4]) -> [f32; 4]
+ ($token_type: path = $token: expr => $to_enable: literal, $function: ident($($arg_name: ident: $arg_type: ty),*$(,)?) $(-> $ret: ty)?) => {
+ $crate::trampoline!(
+ [$token_type = $token]
+ => $to_enable,
+ $function($($arg_name: $arg_type),*) $(-> $ret)?
+ )
+ };
+ // [Sse = sse] => "sse", sse_do_x(a: [f32; 4], b: [f32; 4]) -> [f32; 4]
+ ([$($token_type: path = $token: expr),+$(,)?] => $to_enable: literal, $function: ident($($arg_name: ident: $arg_type: ty),*$(,)?) $(-> $ret: ty)?) => {
+ $crate::trampoline!(
+ [$($token_type = $token),+]
+ => $to_enable,
+ fn($($arg_name: $arg_type = $arg_name),*) $(-> $ret)? { $function($($arg_name),*) }
+ )
+ };
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[cfg(test)]
+mod example_expansion {
+ #[cfg(target_arch = "x86")]
+ use core::arch::x86::{__m128, _mm_mul_ps};
+ #[cfg(target_arch = "x86_64")]
+ use core::arch::x86_64::{__m128, _mm_mul_ps};
+
+ use crate::x86::{self, v1::Sse};
+
+ #[target_feature(enable = "sse")]
+ fn sse_mul_f32s(a: [f32; 4], b: [f32; 4]) -> [f32; 4] {
+ let a: __m128 = bytemuck::must_cast(a);
+ let b: __m128 = bytemuck::must_cast(b);
+ bytemuck::must_cast(_mm_mul_ps(a, b))
+ }
+
+ #[test]
+ // This is a test so that it is runnable
+ fn example_output() {
+ let Some(sse) = x86::v1::Sse::try_new() else {
+ panic!("Example code")
+ };
+ let a = [10_f32, 20_f32, 30_f32, 40_f32];
+ let b = [4_f32, 5_f32, 6_f32, 7_f32];
+
+ // Both of these example expansions, the former using the shorthand form:
+ let res =
+ trampoline!(Sse = sse => "sse", sse_mul_f32s(a: [f32; 4], b: [f32; 4]) -> [f32; 4]);
+ assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]);
+ let res = trampoline!([Sse = sse] => "sse", fn(a: [f32; 4] = a, b: [f32; 4] = b) -> [f32; 4] { sse_mul_f32s(a, b)});
+ assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]);
+ // will expand to:
+ #[expect(unused_braces, reason = "Required for macro soundness.")]
+ // Start expansion:
+ let res = {
+ #[target_feature(enable = "sse")]
+ #[inline]
+ fn trampoline_impl(a: [f32; 4], b: [f32; 4]) -> [f32; 4] {
+ { sse_mul_f32s(a, b) }
+ }
+ let _: Sse = sse;
+ const _: () = {
+ crate::support::is_feature_subset(
+ "sse",
+ [::FEATURES],
+ )
+ .unwrap();
+ };
+ #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")]
+ let a = a;
+ #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")]
+ let b = b;
+ let func = trampoline_impl;
+ unsafe { func(a, b) }
+ };
+ // End expansion
+ assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]);
+ }
+}
diff --git a/fearless_simd_core/src/support.rs b/fearless_simd_core/src/support.rs
new file mode 100644
index 00000000..cf530b3a
--- /dev/null
+++ b/fearless_simd_core/src/support.rs
@@ -0,0 +1,249 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+//! Support for the safety checks in [`trampoline!`](crate::trampoline!).
+//!
+//! Methods to compute whether a each feature in a target feature string (e.g. "sse2,fma")
+//! is supported by a set of target features.
+//!
+//! The [`trampoline`](crate::trampoline!) macro takes both a target feature string,
+//! and one (or more) [`TargetFeatureToken`](crate::TargetFeatureToken).
+//! It uses the functions in this module to validate that the target feature string is
+//! supported by the provided tokens.
+//!
+//! Because evaluating whether this is safe needs to happen at compile time (for both performance
+//! and predictability), the methods in this file are written as `const` functions.
+//! This leads to a bit of weirdness, including treating strings as `&[u8]` internally, as that
+//! actually allows slicing (i.e. reading individual bytes). As far as I know, that isn't
+//! currently possibly in const contexts for strings.
+//! Note that the code is still written to be UTF-8 compatible, although we believe that
+//! all currently supported target features are ASCII anyway.
+
+/// The result of `is_feature_subset`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[must_use]
+pub enum SubsetResult {
+ /// The required features are a subset of the permitted features.
+ Yes,
+ /// The required features are not all available.
+ No {
+ /// The feature which was found to be missing (there may be several such features).
+ failing: &'static str,
+ },
+}
+
+impl SubsetResult {
+ /// A utility method to panic if the target features aren't supported.
+ // TODO: How much more context would we be able to give if we inlined this?
+ pub const fn unwrap(self) {
+ match self {
+ Self::Yes => (),
+ // This is const, so we can't actually format out the failing value :(
+ Self::No { .. } => panic!("Tokens provided are missing a necessary target feature."),
+ }
+ }
+}
+
+/// Determine whether the features in the target feature string `required` are a subset of the features in `permitted`.
+/// See [the module level docs][self].
+///
+/// We require static lifetimes as this is primarily internal to the macro.
+pub const fn is_feature_subset(
+ required: &'static str,
+ permitted: [&[&'static str]; N],
+) -> SubsetResult {
+ let mut required_bytes = required.as_bytes();
+ let mut finished = false;
+ 'input_feature: while !finished {
+ let mut comma_idx = 0;
+ // Find the first comma in required_bytes, or the end of the string.
+ while comma_idx < required_bytes.len() && required_bytes[comma_idx] != b',' {
+ comma_idx += 1;
+ }
+ // `comma_idx` is now the index of the comma, e.g. if the string was "sse,", idx would be 3
+ // This is the feature we need to validate exists in permitted.
+ let (to_find, remaining_required) = required_bytes.split_at(comma_idx);
+ if let [comma, rest @ ..] = remaining_required {
+ if *comma != b',' {
+ panic!("Internal failure of expected behaviour.");
+ } else {
+ required_bytes = rest;
+ }
+ } else {
+ // Exit out of the loop after this iteration.
+ // Note that for input of `""`` and "sse,", we still need to search
+ // for the input target feature `` (i.e. the empty string), to match Rust's behaviour here.
+ finished = true;
+ }
+
+ let mut local_permitted = permitted.as_slice();
+ while let [to_test, rest @ ..] = local_permitted {
+ local_permitted = rest;
+ if str_array_contains(to_test, to_find) {
+ continue 'input_feature;
+ }
+ }
+ // We tried all of the items, and `to_find` wasn't one of them.
+ // Therefore, at least one of the features in the requested features wasn't supported
+ return SubsetResult::No {
+ failing: match core::str::from_utf8(to_find) {
+ Ok(x) => x,
+ Err(_) => panic!(
+ "We either found a comma or the end of the string, so before then should have been valid UTF-8."
+ ),
+ },
+ };
+ }
+ // We found all of the required features.
+ SubsetResult::Yes
+}
+
+const fn str_array_contains(mut haystack: &[&str], needle: &[u8]) -> bool {
+ while let [to_test, rest @ ..] = haystack {
+ haystack = rest;
+ if byte_arrays_eq(to_test.as_bytes(), needle) {
+ return true;
+ }
+ }
+ false
+}
+
+const fn byte_arrays_eq(lhs: &[u8], rhs: &[u8]) -> bool {
+ if lhs.len() != rhs.len() {
+ return false;
+ }
+ let mut idx = 0;
+ while idx < lhs.len() {
+ if lhs[idx] != rhs[idx] {
+ return false;
+ }
+ idx += 1;
+ }
+ true
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{SubsetResult, is_feature_subset};
+
+ /// Test if each feature in the feature string `required` is an element in `permitted`.
+ ///
+ /// Should be equivalent to [`is_feature_subset`], but not written to be const compatible.
+ fn is_feature_subset_simple(
+ required: &'static str,
+ permitted: [&[&'static str]; N],
+ ) -> SubsetResult {
+ 'feature: for feature in required.split(',') {
+ for permitted_group in &permitted {
+ for permitted_feature in *permitted_group {
+ if feature == *permitted_feature {
+ continue 'feature;
+ }
+ }
+ }
+ // We tried all permitted feature, and this item wasn't present.
+ return SubsetResult::No { failing: feature };
+ }
+ SubsetResult::Yes
+ }
+
+ /// Expect `is_feature_subset` to succeed.
+ #[track_caller]
+ fn expect_success(required: &'static str, permitted: [&[&'static str]; N]) {
+ let res1 = is_feature_subset(required, permitted);
+ assert_eq!(res1, SubsetResult::Yes, "Const version failed.");
+ // Sanity check against the "trivially correct" version.
+ let res2 = is_feature_subset_simple(required, permitted);
+ assert_eq!(res2, SubsetResult::Yes, "Simpler version failed.");
+ }
+
+ /// Expect `is_feature_subset` to fail (with only a single possible failure).
+ #[track_caller]
+ fn expect_failure(
+ required: &'static str,
+ permitted: [&[&'static str]; N],
+ failing: &'static str,
+ ) {
+ let res1 = is_feature_subset(required, permitted);
+ assert_eq!(res1, SubsetResult::No { failing }, "Const version failed.");
+ // Sanity check against the "trivially correct" version.
+ let res2 = is_feature_subset_simple(required, permitted);
+ assert_eq!(
+ res2,
+ SubsetResult::No { failing },
+ "Simpler version failed."
+ );
+ }
+
+ /// Expect `is_feature_subset` to fail, possibly with multiple potential missing features.
+ #[track_caller]
+ fn expect_any_failure(required: &'static str, permitted: [&[&'static str]; N]) {
+ let res1 = is_feature_subset(required, permitted);
+ assert!(
+ matches!(res1, SubsetResult::No { .. }),
+ "Const version failed."
+ );
+ // Sanity check against the "trivially correct" version.
+ let res2 = is_feature_subset_simple(required, permitted);
+ assert!(
+ matches!(res2, SubsetResult::No { .. }),
+ "Simpler version failed."
+ );
+ }
+
+ #[test]
+ fn simple_cases() {
+ expect_success("a,b,c", [&["a", "b", "c"]]);
+ expect_failure("a,b,c", [&["a", "b"]], "c");
+ expect_success("c,a,b", [&["a", "b", "c"]]);
+ expect_failure("c,a,b", [&["a", "b"]], "c");
+ expect_success("a,b", [&["a", "b", "c"]]);
+ expect_failure("a,b", [&["a", "c"]], "b");
+ expect_success("a,b,a,a", [&["a", "b", "c"]]);
+ expect_success("a,b,c", [&["c"], &["b"], &["a"]]);
+
+ // Check it correctly catches more than single item failures
+ expect_success("a1,a2,a3", [&["a1", "a2", "a3"]]);
+ expect_failure("a1,a2,a3", [&["a1", "a2"]], "a3");
+ expect_success("a3,a1,a2", [&["a1", "a2", "a3"]]);
+ expect_failure("a3,a1,a2", [&["a1", "a2"]], "a3");
+ expect_success("a1,a2", [&["a1", "a2", "a3"]]);
+ expect_failure("a1,a2", [&["a1", "a3"]], "a2");
+
+ // Check it doesn't have false positives with prefixes
+ expect_failure("a1,a2,a3", [&["a1", "a2", "a"]], "a3");
+ expect_any_failure("a3,a1,a2", [&["a"]]);
+ expect_success("a1,a2", [&["a1", "a2", "a3"]]);
+ expect_failure("a1,a2", [&["a1", "a3"]], "a2");
+
+ expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b");
+ expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b");
+ expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b");
+ expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b");
+ }
+
+ #[test]
+ fn incorrect_token() {
+ // The permitted list here only allows features which are the literal `a1,a2`
+ // This is completely impossible to pass, but it's worth checking
+ expect_any_failure("a1,a2", [&["a1,a2"]]);
+ }
+
+ #[test]
+ fn empty_feature() {
+ expect_failure("a,b,", [&["a", "b"]], "");
+ expect_failure("", [&["a", "b"]], "");
+
+ // We succeed if the empty target feature is allowed; any case where this is relevant will always
+ // be validated away by rustc anyway, as there is no target with the target feature `""`.
+ // As such, there's no harm in being flexible here.git
+ expect_success("", [&[""]]);
+ expect_success(",,,,,,", [&[""]]);
+ }
+
+ #[test]
+ fn non_ascii_features() {
+ expect_success("café", [&["café"]]);
+ expect_failure("café", [&["cafe"]], "café");
+ }
+}
diff --git a/fearless_simd_core/src/x86/adx/adx.rs b/fearless_simd_core/src/x86/adx/adx.rs
new file mode 100644
index 00000000..339dbbd0
--- /dev/null
+++ b/fearless_simd_core/src/x86/adx/adx.rs
@@ -0,0 +1,96 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The ADX target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [ADX] --- Multi-Precision Add-Carry Instruction Extensions
+///
+/// [ADX]: https://en.wikipedia.org/wiki/Intel_ADX
+///
+/// A token indicating that the current CPU has the `adx` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "adx")]
+/// fn uses_adx() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Adx {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Adx {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""adx" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Adx {
+ const FEATURES: &[&str] = &["adx"];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Adx = self] => "adx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Adx {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"adx"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("adx") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "adx")]
+ /// Create a new token for the "adx" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// ADX is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "adx" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/adx/mod.rs b/fearless_simd_core/src/x86/adx/mod.rs
new file mode 100644
index 00000000..22a8b231
--- /dev/null
+++ b/fearless_simd_core/src/x86/adx/mod.rs
@@ -0,0 +1,11 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+//! The "adx" target feature, used for arbitrary precision integer addition.
+
+#[expect(
+ clippy::module_inception,
+ reason = "The inner module is automatically generated."
+)]
+mod adx;
+pub use adx::Adx;
diff --git a/fearless_simd_core/src/x86/avx/avx.rs b/fearless_simd_core/src/x86/avx/avx.rs
new file mode 100644
index 00000000..3885ebbc
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx/avx.rs
@@ -0,0 +1,138 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX] --- Advanced Vector Extensions
+///
+/// [AVX]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
+///
+/// A token indicating that the current CPU has the `avx` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx")]
+/// fn uses_avx() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx {
+ const FEATURES: &[&str] = &["avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3"];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx = self] => "avx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx")]
+ /// Create a new token for the "avx" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx/avx2.rs b/fearless_simd_core/src/x86/avx/avx2.rs
new file mode 100644
index 00000000..9cfdaab6
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx/avx2.rs
@@ -0,0 +1,147 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX2 target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX2] --- Advanced Vector Extensions 2
+///
+/// [AVX2]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX2
+///
+/// A token indicating that the current CPU has the `avx2` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx2")]
+/// fn uses_avx2() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx2 {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx2 {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx2" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx2 {
+ const FEATURES: &[&str] = &[
+ "avx2", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx2 = self] => "avx2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx2 {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx2"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx2") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx2")]
+ /// Create a new token for the "avx2" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX2 is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx2" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx/avxifma.rs b/fearless_simd_core/src/x86/avx/avxifma.rs
new file mode 100644
index 00000000..870bb988
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx/avxifma.rs
@@ -0,0 +1,154 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX-IFMA target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX-IFMA] --- Advanced Vector Extensions - Integer Fused Multiply Add
+///
+/// [AVX-IFMA]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+///
+/// A token indicating that the current CPU has the `avxifma` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avxifma")]
+/// fn uses_avxifma() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avxifma {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avxifma {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avxifma" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avxifma {
+ const FEATURES: &[&str] = &[
+ "avxifma", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avxifma = self] => "avxifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avxifma {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avxifma"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avxifma") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avxifma")]
+ /// Create a new token for the "avxifma" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX-IFMA is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avxifma" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avxifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avxifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avxifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avxifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avxifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avxifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avxifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avxifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx/avxneconvert.rs b/fearless_simd_core/src/x86/avx/avxneconvert.rs
new file mode 100644
index 00000000..0009d58f
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx/avxneconvert.rs
@@ -0,0 +1,162 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX-NE-CONVERT target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX-NE-CONVERT] --- Advanced Vector Extensions - No-Exception Floating-Point conversion Instructions
+///
+/// [AVX-NE-CONVERT]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+///
+/// A token indicating that the current CPU has the `avxneconvert` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avxneconvert")]
+/// fn uses_avxneconvert() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avxneconvert {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avxneconvert {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avxneconvert" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avxneconvert {
+ const FEATURES: &[&str] = &[
+ "avxneconvert",
+ "avx",
+ "avx2",
+ "sse",
+ "sse2",
+ "sse3",
+ "sse4.1",
+ "sse4.2",
+ "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avxneconvert = self] => "avxneconvert", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avxneconvert {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avxneconvert"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avxneconvert") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avxneconvert")]
+ /// Create a new token for the "avxneconvert" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX-NE-CONVERT is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avxneconvert" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avxneconvert) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avxneconvert) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avxneconvert) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avxneconvert) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avxneconvert) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avxneconvert) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avxneconvert) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avxneconvert) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx/avxvnni.rs b/fearless_simd_core/src/x86/avx/avxvnni.rs
new file mode 100644
index 00000000..e385386e
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx/avxvnni.rs
@@ -0,0 +1,154 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX-VNNI target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX-VNNI] --- Advanced Vector Extensions - Vector Neural Network Instructions
+///
+/// [AVX-VNNI]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+///
+/// A token indicating that the current CPU has the `avxvnni` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avxvnni")]
+/// fn uses_avxvnni() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avxvnni {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avxvnni {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avxvnni" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avxvnni {
+ const FEATURES: &[&str] = &[
+ "avxvnni", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avxvnni = self] => "avxvnni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avxvnni {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avxvnni"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avxvnni") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avxvnni")]
+ /// Create a new token for the "avxvnni" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX-VNNI is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avxvnni" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avxvnni) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avxvnni) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avxvnni) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avxvnni) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avxvnni) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avxvnni) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avxvnni) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avxvnni) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx/avxvnniint16.rs b/fearless_simd_core/src/x86/avx/avxvnniint16.rs
new file mode 100644
index 00000000..e213c938
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx/avxvnniint16.rs
@@ -0,0 +1,162 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX-VNNI-INT16 target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX-VNNI-INT16] --- Advanced Vector Extensions - Vector Neural Network Instructions with 16-bit Integers
+///
+/// [AVX-VNNI-INT16]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+///
+/// A token indicating that the current CPU has the `avxvnniint16` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avxvnniint16")]
+/// fn uses_avxvnniint16() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avxvnniint16 {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avxvnniint16 {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avxvnniint16" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avxvnniint16 {
+ const FEATURES: &[&str] = &[
+ "avxvnniint16",
+ "avx",
+ "avx2",
+ "sse",
+ "sse2",
+ "sse3",
+ "sse4.1",
+ "sse4.2",
+ "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avxvnniint16 = self] => "avxvnniint16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avxvnniint16 {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avxvnniint16"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avxvnniint16") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avxvnniint16")]
+ /// Create a new token for the "avxvnniint16" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX-VNNI-INT16 is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avxvnniint16" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avxvnniint16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avxvnniint16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avxvnniint16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avxvnniint16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avxvnniint16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avxvnniint16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avxvnniint16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avxvnniint16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx/avxvnniint8.rs b/fearless_simd_core/src/x86/avx/avxvnniint8.rs
new file mode 100644
index 00000000..7caa7251
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx/avxvnniint8.rs
@@ -0,0 +1,162 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX-VNNI-INT8 target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX-VNNI-INT8] --- Advanced Vector Extensions - Vector Neural Network Instructions with 8-bit Integers
+///
+/// [AVX-VNNI-INT8]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA
+///
+/// A token indicating that the current CPU has the `avxvnniint8` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avxvnniint8")]
+/// fn uses_avxvnniint8() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avxvnniint8 {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avxvnniint8 {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avxvnniint8" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avxvnniint8 {
+ const FEATURES: &[&str] = &[
+ "avxvnniint8",
+ "avx",
+ "avx2",
+ "sse",
+ "sse2",
+ "sse3",
+ "sse4.1",
+ "sse4.2",
+ "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avxvnniint8 = self] => "avxvnniint8", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avxvnniint8 {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avxvnniint8"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avxvnniint8") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avxvnniint8")]
+ /// Create a new token for the "avxvnniint8" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX-VNNI-INT8 is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avxvnniint8" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avxvnniint8) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avxvnniint8) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avxvnniint8) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avxvnniint8) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avxvnniint8) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avxvnniint8) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avxvnniint8) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avxvnniint8) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx/mod.rs b/fearless_simd_core/src/x86/avx/mod.rs
new file mode 100644
index 00000000..65fe5757
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx/mod.rs
@@ -0,0 +1,34 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+//! Target features related to the Advanced Vector Extensions target features (before AVX-512).
+//!
+//! These are most commonly used through the [x86-64-v3](crate::x86::V3) microarchitecture level.
+//!
+//! These support SIMD registers of up to 256 bits.
+//! For the 512 bit extension, see [`avx512`](crate::x86::avx512).
+
+#[expect(
+ clippy::module_inception,
+ reason = "The inner module is automatically generated."
+)]
+mod avx;
+pub use avx::Avx;
+
+mod avx2;
+pub use avx2::Avx2;
+
+mod avxifma;
+pub use avxifma::Avxifma;
+
+mod avxneconvert;
+pub use avxneconvert::Avxneconvert;
+
+mod avxvnni;
+pub use avxvnni::Avxvnni;
+
+mod avxvnniint8;
+pub use avxvnniint8::Avxvnniint8;
+
+mod avxvnniint16;
+pub use avxvnniint16::Avxvnniint16;
diff --git a/fearless_simd_core/src/x86/avx512/avx512bf16.rs b/fearless_simd_core/src/x86/avx512/avx512bf16.rs
new file mode 100644
index 00000000..62bbaa69
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx512/avx512bf16.rs
@@ -0,0 +1,194 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX512-BF16 target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX512-BF16] --- Advanced Vector Extensions 512-bit - Bfloat16 Extensions
+///
+/// [AVX512-BF16]: https://en.wikipedia.org/wiki/AVX-512#BF16
+///
+/// A token indicating that the current CPU has the `avx512bf16` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx512bf16")]
+/// fn uses_avx512bf16() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx512bf16 {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx512bf16 {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx512bf16" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx512bf16 {
+ const FEATURES: &[&str] = &[
+ "avx512bf16",
+ "avx",
+ "avx2",
+ "avx512bw",
+ "avx512f",
+ "f16c",
+ "fma",
+ "sse",
+ "sse2",
+ "sse3",
+ "sse4.1",
+ "sse4.2",
+ "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx512bf16 = self] => "avx512bf16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx512bf16 {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx512bf16"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx512bf16") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx512bf16")]
+ /// Create a new token for the "avx512bf16" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX512-BF16 is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx512bf16" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512bw {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512f {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() })
+ }
+}
+
+impl From for crate::x86::v3::F16c {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() })
+ }
+}
+
+impl From for crate::x86::v3::Fma {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx512bf16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs
new file mode 100644
index 00000000..226cffab
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs
@@ -0,0 +1,195 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX512-BITALG target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX512-BITALG] --- Advanced Vector Extensions 512-bit - Bit Algorithms
+///
+///
+/// [AVX512-BITALG]: https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG
+///
+/// A token indicating that the current CPU has the `avx512bitalg` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx512bitalg")]
+/// fn uses_avx512bitalg() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx512bitalg {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx512bitalg {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx512bitalg" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx512bitalg {
+ const FEATURES: &[&str] = &[
+ "avx512bitalg",
+ "avx",
+ "avx2",
+ "avx512bw",
+ "avx512f",
+ "f16c",
+ "fma",
+ "sse",
+ "sse2",
+ "sse3",
+ "sse4.1",
+ "sse4.2",
+ "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx512bitalg = self] => "avx512bitalg", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx512bitalg {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx512bitalg"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx512bitalg") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx512bitalg")]
+ /// Create a new token for the "avx512bitalg" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX512-BITALG is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx512bitalg" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512bw {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512f {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() })
+ }
+}
+
+impl From for crate::x86::v3::F16c {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() })
+ }
+}
+
+impl From for crate::x86::v3::Fma {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx512bitalg) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx512/avx512bw.rs b/fearless_simd_core/src/x86/avx512/avx512bw.rs
new file mode 100644
index 00000000..b5aab6f6
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx512/avx512bw.rs
@@ -0,0 +1,176 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX512-BW target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX512-BW] --- Advanced Vector Extensions 512-bit - Byte and Word Instructions
+///
+/// [AVX512-BW]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI
+///
+/// A token indicating that the current CPU has the `avx512bw` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx512bw")]
+/// fn uses_avx512bw() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx512bw {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx512bw {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx512bw" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx512bw {
+ const FEATURES: &[&str] = &[
+ "avx512bw", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1",
+ "sse4.2", "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx512bw = self] => "avx512bw", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx512bw {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx512bw"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx512bw") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx512bw")]
+ /// Create a new token for the "avx512bw" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX512-BW is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx512bw" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx512bw) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avx512bw) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512f {
+ fn from(value: Avx512bw) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() })
+ }
+}
+
+impl From for crate::x86::v3::F16c {
+ fn from(value: Avx512bw) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() })
+ }
+}
+
+impl From for crate::x86::v3::Fma {
+ fn from(value: Avx512bw) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx512bw) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx512bw) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx512bw) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx512bw) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx512bw) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx512bw) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx512/avx512cd.rs b/fearless_simd_core/src/x86/avx512/avx512cd.rs
new file mode 100644
index 00000000..39c81d5f
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx512/avx512cd.rs
@@ -0,0 +1,176 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX512-CD target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX512-CD] --- Advanced Vector Extensions 512-bit - Conflict Detection Instructions
+///
+/// [AVX512-CD]: https://en.wikipedia.org/wiki/AVX-512#Conflict_detection
+///
+/// A token indicating that the current CPU has the `avx512cd` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx512cd")]
+/// fn uses_avx512cd() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx512cd {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx512cd {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx512cd" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx512cd {
+ const FEATURES: &[&str] = &[
+ "avx512cd", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1",
+ "sse4.2", "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx512cd = self] => "avx512cd", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx512cd {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx512cd"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx512cd") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx512cd")]
+ /// Create a new token for the "avx512cd" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX512-CD is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx512cd" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx512cd) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avx512cd) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512f {
+ fn from(value: Avx512cd) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() })
+ }
+}
+
+impl From for crate::x86::v3::F16c {
+ fn from(value: Avx512cd) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() })
+ }
+}
+
+impl From for crate::x86::v3::Fma {
+ fn from(value: Avx512cd) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx512cd) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx512cd) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx512cd) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx512cd) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx512cd) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx512cd) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx512/avx512dq.rs b/fearless_simd_core/src/x86/avx512/avx512dq.rs
new file mode 100644
index 00000000..abc3c32a
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx512/avx512dq.rs
@@ -0,0 +1,176 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX512-DQ target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX512-DQ] --- Advanced Vector Extensions 512-bit - Doubleword and Quadword Instructions
+///
+/// [AVX512-DQ]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI
+///
+/// A token indicating that the current CPU has the `avx512dq` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx512dq")]
+/// fn uses_avx512dq() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx512dq {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx512dq {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx512dq" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx512dq {
+ const FEATURES: &[&str] = &[
+ "avx512dq", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1",
+ "sse4.2", "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx512dq = self] => "avx512dq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx512dq {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx512dq"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx512dq") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx512dq")]
+ /// Create a new token for the "avx512dq" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX512-DQ is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx512dq" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx512dq) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avx512dq) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512f {
+ fn from(value: Avx512dq) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() })
+ }
+}
+
+impl From for crate::x86::v3::F16c {
+ fn from(value: Avx512dq) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() })
+ }
+}
+
+impl From for crate::x86::v3::Fma {
+ fn from(value: Avx512dq) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx512dq) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx512dq) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx512dq) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx512dq) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx512dq) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx512dq) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx512/avx512f.rs b/fearless_simd_core/src/x86/avx512/avx512f.rs
new file mode 100644
index 00000000..a25c9255
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx512/avx512f.rs
@@ -0,0 +1,168 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX512-F target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX512-F] --- Advanced Vector Extensions 512-bit - Foundation
+///
+/// [AVX512-F]: https://en.wikipedia.org/wiki/AVX-512
+///
+/// A token indicating that the current CPU has the `avx512f` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx512f")]
+/// fn uses_avx512f() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx512f {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx512f {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx512f" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx512f {
+ const FEATURES: &[&str] = &[
+ "avx512f", "avx", "avx2", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx512f = self] => "avx512f", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx512f {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx512f"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx512f") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx512f")]
+ /// Create a new token for the "avx512f" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX512-F is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx512f" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx512f) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avx512f) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::v3::F16c {
+ fn from(value: Avx512f) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() })
+ }
+}
+
+impl From for crate::x86::v3::Fma {
+ fn from(value: Avx512f) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx512f) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx512f) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx512f) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx512f) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx512f) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx512f) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx512/avx512fp16.rs b/fearless_simd_core/src/x86/avx512/avx512fp16.rs
new file mode 100644
index 00000000..b76df903
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx512/avx512fp16.rs
@@ -0,0 +1,194 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX512-FP16 target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX512-FP16] --- Advanced Vector Extensions 512-bit - Float16 Extensions
+///
+/// [AVX512-FP16]: https://en.wikipedia.org/wiki/AVX-512#FP16
+///
+/// A token indicating that the current CPU has the `avx512fp16` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx512fp16")]
+/// fn uses_avx512fp16() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx512fp16 {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx512fp16 {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx512fp16" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx512fp16 {
+ const FEATURES: &[&str] = &[
+ "avx512fp16",
+ "avx",
+ "avx2",
+ "avx512bw",
+ "avx512f",
+ "f16c",
+ "fma",
+ "sse",
+ "sse2",
+ "sse3",
+ "sse4.1",
+ "sse4.2",
+ "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx512fp16 = self] => "avx512fp16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx512fp16 {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx512fp16"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx512fp16") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx512fp16")]
+ /// Create a new token for the "avx512fp16" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX512-FP16 is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx512fp16" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512bw {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512f {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() })
+ }
+}
+
+impl From for crate::x86::v3::F16c {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() })
+ }
+}
+
+impl From for crate::x86::v3::Fma {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx512fp16) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx512/avx512ifma.rs b/fearless_simd_core/src/x86/avx512/avx512ifma.rs
new file mode 100644
index 00000000..dd74a8a2
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx512/avx512ifma.rs
@@ -0,0 +1,186 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX512-IFMA target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX512-IFMA] --- Advanced Vector Extensions 512-bit - Integer Fused Multiply Add
+///
+/// [AVX512-IFMA]: https://en.wikipedia.org/wiki/AVX-512#IFMA
+///
+/// A token indicating that the current CPU has the `avx512ifma` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx512ifma")]
+/// fn uses_avx512ifma() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx512ifma {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx512ifma {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx512ifma" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx512ifma {
+ const FEATURES: &[&str] = &[
+ "avx512ifma",
+ "avx",
+ "avx2",
+ "avx512f",
+ "f16c",
+ "fma",
+ "sse",
+ "sse2",
+ "sse3",
+ "sse4.1",
+ "sse4.2",
+ "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx512ifma = self] => "avx512ifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx512ifma {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx512ifma"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx512ifma") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx512ifma")]
+ /// Create a new token for the "avx512ifma" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX512-IFMA is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx512ifma" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx512ifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avx512ifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512f {
+ fn from(value: Avx512ifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() })
+ }
+}
+
+impl From for crate::x86::v3::F16c {
+ fn from(value: Avx512ifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() })
+ }
+}
+
+impl From for crate::x86::v3::Fma {
+ fn from(value: Avx512ifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx512ifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx512ifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx512ifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx512ifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx512ifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx512ifma) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs
new file mode 100644
index 00000000..38eb6e99
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs
@@ -0,0 +1,194 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX512-VBMI target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX512-VBMI] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions
+///
+/// [AVX512-VBMI]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI
+///
+/// A token indicating that the current CPU has the `avx512vbmi` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx512vbmi")]
+/// fn uses_avx512vbmi() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx512vbmi {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx512vbmi {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx512vbmi" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx512vbmi {
+ const FEATURES: &[&str] = &[
+ "avx512vbmi",
+ "avx",
+ "avx2",
+ "avx512bw",
+ "avx512f",
+ "f16c",
+ "fma",
+ "sse",
+ "sse2",
+ "sse3",
+ "sse4.1",
+ "sse4.2",
+ "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx512vbmi = self] => "avx512vbmi", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx512vbmi {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx512vbmi"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx512vbmi") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx512vbmi")]
+ /// Create a new token for the "avx512vbmi" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX512-VBMI is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx512vbmi" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512bw {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512f {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() })
+ }
+}
+
+impl From for crate::x86::v3::F16c {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() })
+ }
+}
+
+impl From for crate::x86::v3::Fma {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx512vbmi) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs
new file mode 100644
index 00000000..b172416d
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs
@@ -0,0 +1,194 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX512-VBMI2 target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX512-VBMI2] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions 2
+///
+/// [AVX512-VBMI2]: https://en.wikipedia.org/wiki/AVX-512#VBMI2
+///
+/// A token indicating that the current CPU has the `avx512vbmi2` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx512vbmi2")]
+/// fn uses_avx512vbmi2() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx512vbmi2 {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx512vbmi2 {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx512vbmi2" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx512vbmi2 {
+ const FEATURES: &[&str] = &[
+ "avx512vbmi2",
+ "avx",
+ "avx2",
+ "avx512bw",
+ "avx512f",
+ "f16c",
+ "fma",
+ "sse",
+ "sse2",
+ "sse3",
+ "sse4.1",
+ "sse4.2",
+ "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx512vbmi2 = self] => "avx512vbmi2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx512vbmi2 {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx512vbmi2"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx512vbmi2") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx512vbmi2")]
+ /// Create a new token for the "avx512vbmi2" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX512-VBMI2 is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx512vbmi2" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From for crate::x86::avx::Avx2 {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512bw {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() })
+ }
+}
+
+impl From for crate::x86::avx512::Avx512f {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() })
+ }
+}
+
+impl From for crate::x86::v3::F16c {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() })
+ }
+}
+
+impl From for crate::x86::v3::Fma {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse2 {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse3 {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_1 {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() })
+ }
+}
+
+impl From for crate::x86::sse::Sse4_2 {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() })
+ }
+}
+
+impl From for crate::x86::sse::SupplementalSse3 {
+ fn from(value: Avx512vbmi2) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() })
+ }
+}
+
+const _: () = {
+ assert!(
+ core::mem::size_of::() == 0,
+ "Target feature tokens should be zero sized."
+ );
+};
diff --git a/fearless_simd_core/src/x86/avx512/avx512vl.rs b/fearless_simd_core/src/x86/avx512/avx512vl.rs
new file mode 100644
index 00000000..983bc3fc
--- /dev/null
+++ b/fearless_simd_core/src/x86/avx512/avx512vl.rs
@@ -0,0 +1,176 @@
+// Copyright 2025 the Fearless_SIMD Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// This file is automatically generated by `fearless_simd_core_gen`.
+// Its template can be found in `fearless_simd_core/gen/templates`.
+//! The AVX512-VL target feature.
+
+use crate::{TargetFeatureToken, trampoline};
+
+use core::fmt::Debug;
+
+/// [AVX512-VL] --- Advanced Vector Extensions 512-bit - Vector Length Extensions
+///
+/// [AVX512-VL]: https://en.wikipedia.org/wiki/AVX-512
+///
+/// A token indicating that the current CPU has the `avx512vl` target feature.
+///
+/// # Example
+///
+/// This can be used to [`trampoline!`] into functions like:
+///
+/// ```rust
+/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+/// #[target_feature(enable = "avx512vl")]
+/// fn uses_avx512vl() {
+/// // ...
+/// }
+/// ```
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Avx512vl {
+ // We don't use non_exhaustive because we don't want this struct to be constructible.
+ // in different modules in this crate.
+ _private: (),
+}
+
+impl Debug for Avx512vl {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, r#""avx512vl" enabled."#)
+ }
+}
+
+// Safety: This token can only be constructed if you have proof that all the requisite
+// target feature is enabled.
+unsafe impl TargetFeatureToken for Avx512vl {
+ const FEATURES: &[&str] = &[
+ "avx512vl", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1",
+ "sse4.2", "ssse3",
+ ];
+
+ #[inline(always)]
+ fn vectorize(self, f: impl FnOnce() -> R) -> R {
+ // Because we need the safety check to be eagerly evaluated, it uses an constant item.
+ // This means we can't use `Self = self` here, unfortunately.
+ trampoline!([Avx512vl = self] => "avx512vl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
+ }
+}
+
+impl Avx512vl {
+ #[cfg(feature = "std")]
+ /// Create a new token if the `"avx512vl"` target feature is detected as enabled.
+ ///
+ /// This does not do any caching internally, although note that the standard
+ /// library does internally cache the features it detects.
+ // TODO: Consider a manual override feature/env var?
+ pub fn try_new() -> Option {
+ // Feature flag required to make docs compile.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ if std::arch::is_x86_feature_detected!("avx512vl") {
+ // Safety: The required CPU feature was detected.
+ unsafe { Some(Self::new()) }
+ } else {
+ None
+ }
+ }
+
+ #[target_feature(enable = "avx512vl")]
+ /// Create a new token for the "avx512vl" target feature.
+ ///
+ /// This method is useful to get a new token if you have an external proof that
+ /// AVX512-VL is available. This could happen if you are in a target feature
+ /// function called by an external library user.
+ ///
+ /// # Safety
+ ///
+ /// No conditions other than those inherited from the target feature attribute,
+ /// i.e. that the "avx512vl" target feature is available.
+ ///
+ /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions
+ pub fn new() -> Self {
+ Self { _private: () }
+ }
+}
+
+impl From for crate::x86::avx::Avx {
+ fn from(value: Avx512vl) -> Self {
+ // This also serves as a correctness check of the implicitly enabled features.
+ trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() })
+ }
+}
+
+impl From