diff --git a/.clippy.toml b/.clippy.toml index 4781d68c..bee24f27 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -8,3 +8,5 @@ trivial-copy-size-limit = 16 # END LINEBENDER LINT SET + +doc-valid-idents = ["ShangMi", "SSE4a", ".."] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a6a3d50f..24420569 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,15 +3,15 @@ env: # version like 1.70. Note that we only specify MAJOR.MINOR and not PATCH so that bugfixes still # come automatically. If the version specified here is no longer the latest stable version, # then please feel free to submit a PR that adjusts it along with the potential clippy fixes. - RUST_STABLE_VER: "1.88" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7 + RUST_STABLE_VER: "1.91" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7 # The purpose of checking with the minimum supported Rust toolchain is to detect its staleness. # If the compilation fails, then the version specified here needs to be bumped up to reality. # Be sure to also update the rust-version property in the workspace Cargo.toml file, # plus all the README.md files of the affected packages. - RUST_MIN_VER: "1.88" + RUST_MIN_VER: "1.91" # List of packages that will be checked with the minimum supported Rust version. # This should be limited to packages that are intended for publishing. - RUST_MIN_VER_PKGS: "-p fearless_simd" + RUST_MIN_VER_PKGS: "-p fearless_simd -p fearless_simd_core" # List of features that depend on the standard library and will be excluded from no_std checks. FEATURES_DEPENDING_ON_STD: "std,default" # List of packages that can not target Wasm. @@ -192,6 +192,13 @@ jobs: - name: run code generator run: cargo run --bin fearless_simd_gen + - name: run core code generator + run: cargo run --bin fearless_simd_core_gen + + - name: Reformat (Fearless SIMD Core) + # The code generator for Fearless SIMD Core does not do this. + run: cargo fmt -p fearless_simd_core + - name: check for uncommitted changes run: git diff --exit-code diff --git a/CHANGELOG.md b/CHANGELOG.md index 14c1fb20..9ad6c367 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ You can find its changes [documented below](#030-2025-10-14). ## [Unreleased] -This release has an [MSRV][] of 1.88. +This release has an [MSRV][] of 1.91. ### Added diff --git a/Cargo.lock b/Cargo.lock index 161950a6..ce10218f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -60,9 +60,9 @@ checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" [[package]] name = "bytemuck" -version = "1.23.1" +version = "1.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422" +checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" [[package]] name = "cc" @@ -133,6 +133,17 @@ dependencies = [ "libm", ] +[[package]] +name = "fearless_simd_core" +version = "0.3.0" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "fearless_simd_core_gen" +version = "0.0.0" + [[package]] name = "fearless_simd_dev_macros" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 81395978..b9e817fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,8 @@ resolver = "2" members = [ "fearless_simd", + "fearless_simd_core", + "fearless_simd_core/gen", "fearless_simd_dev_macros", "fearless_simd_gen", "fearless_simd_tests", @@ -13,7 +15,7 @@ license = "Apache-2.0 OR MIT" repository = "https://github.com/linebender/fearless_simd" # Keep in sync with RUST_MIN_VER in .github/workflows/ci.yml, with the relevant README.md files # and with the MSRV in the `Unreleased` section of CHANGELOG.md. -rust-version = "1.88" +rust-version = "1.91" [workspace.lints] diff --git a/README.md b/README.md index 7c3d95fe..dcccda46 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ It benefited from conversations with Luca Versari, though he is not responsible ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/fearless_simd/README.md b/fearless_simd/README.md index 160ce842..f1f3ed19 100644 --- a/fearless_simd/README.md +++ b/fearless_simd/README.md @@ -139,7 +139,7 @@ At least one of `std` and `libm` is required; `std` overrides `libm`. ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/fearless_simd_core/Cargo.toml b/fearless_simd_core/Cargo.toml new file mode 100644 index 00000000..1342c218 --- /dev/null +++ b/fearless_simd_core/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "fearless_simd_core" +version = "0.3.0" +description = "Safely run custom #[target_feature] functions" +keywords = ["simd", "target_feature"] +categories = ["hardware-support"] +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dev-dependencies] +bytemuck = { version = "1.24.0", features = ["must_cast"] } + +[lints] +workspace = true + +[features] +default = ["std"] +std = [] diff --git a/fearless_simd_core/LICENSE-APACHE b/fearless_simd_core/LICENSE-APACHE new file mode 100644 index 00000000..d9a10c0d --- /dev/null +++ b/fearless_simd_core/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/fearless_simd_core/LICENSE-MIT b/fearless_simd_core/LICENSE-MIT new file mode 100644 index 00000000..f3d84348 --- /dev/null +++ b/fearless_simd_core/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2018 Raph Levien + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/fearless_simd_core/README.md b/fearless_simd_core/README.md new file mode 100644 index 00000000..142fea2c --- /dev/null +++ b/fearless_simd_core/README.md @@ -0,0 +1,105 @@ +
+ +# Fearless SIMD Core + +**Target Features in Rust's type system** + +[![Latest published version.](https://img.shields.io/crates/v/fearless_simd.svg)](https://crates.io/crates/fearless_simd) +[![Documentation build status.](https://img.shields.io/docsrs/fearless_simd.svg)](https://docs.rs/fearless_simd) +[![Apache 2.0 or MIT license.](https://img.shields.io/badge/license-Apache--2.0_OR_MIT-blue.svg)](#license) +\ +[![Linebender Zulip, #simd channel.](https://img.shields.io/badge/Linebender-%23simd-blue?logo=Zulip)](https://xi.zulipchat.com/#narrow/channel/514230-simd) +[![GitHub Actions CI status.](https://img.shields.io/github/actions/workflow/status/linebender/fearless_simd/ci.yml?logo=github&label=CI)](https://github.com/linebender/fearless_simd/actions) +[![Dependency staleness status.](https://deps.rs/crate/fearless_simd/latest/status.svg)](https://deps.rs/crate/fearless_simd/) + +
+ + + + + + + +An abstraction to allow safely running custom `#[target_feature]` functions on stable Rust. + +This crate introduces the [`trampoline!`] macro, which allows running code in a +statically validated `#[target_feature(enable="some_features")]` environment, based on +externally provided tokens. +This abstraction is designed to be combined with target features 1.1, the recent update +in the Rust compiler to allow calling `#[target_feature]` functions safely from within +other `#[target_feature]` functions. +As such, once you have used the [`trampoline!`] macro, you can call any intrinsic in [`core::arch`]. + +This crate also has modules which contain tokens for each Rust target features. +These allow safely validating that a target feature is available, and obtaining a token. +These are grouped by architecture: + +- [`x86`] contains the tokens for both the x86 and x86-64 targets. + It also contains tokens for each x86-64 microarchitecture level, see [`x86::V1`] for details. + + +# Examples + +At the time of writing, it is not possible to turn scalar values into SIMD vector types safely using +only the standard library. +These examples use [bytemuck](https://crates.io/crates/bytemuck) for this. + + + +Note that for `aarch64`'s neon, you will want to enable bytemuck's `aarch64_simd` feature. +This is also the case for WASM with `wasm_simd`, but note that this crate +[isn't needed on WASM][attributes.codegen.target_feature.wasm], as it is safe to +call `#[target_features]` on that platform. + +# Crate Feature Flags + + + +# Implementation + +The tokens provided to [`trampoline!`] implement the [`TargetFeatureToken`] trait, +which indicates that a value of that token is only possible to construct if the set +of target features it specifies are enabled. +This means that the macro can use the existence of these token values as +safety proofs that calling a function with those target features is safe. + +This safety proof happens entirely in const evaluation, so if there's a mistake with the +proof, it will cause a compilation error. +The code generated by this macro is thus a function containing the provided code, marked +with `#[target_feature]`, and a call to this newly generated function. + +[attributes.codegen.target_feature.wasm]: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.wasm + + + +## Minimum supported Rust Version (MSRV) + +This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later. + +Future versions of Fearless SIMD might increase the Rust version requirement. +It will not be treated as a breaking change and as such can even happen with small patch releases. + +## Community + +[![Linebender Zulip, #simd channel.](https://img.shields.io/badge/Linebender-%23simd-blue?logo=Zulip)](https://xi.zulipchat.com/#narrow/channel/514230-simd) + +Discussion of Fearless SIMD development happens in the [Linebender Zulip](https://xi.zulipchat.com/), specifically in [#simd](https://xi.zulipchat.com/#narrow/channel/514230-simd). +All public content can be read without logging in. + +Contributions are welcome by pull request. +The [Rust code of conduct] applies. + +## License + +Licensed under either of + +- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or ) +- MIT license ([LICENSE-MIT](LICENSE-MIT) or ) + +at your option. + +[Rust Code of Conduct]: https://www.rust-lang.org/policies/code-of-conduct diff --git a/fearless_simd_core/gen/Cargo.toml b/fearless_simd_core/gen/Cargo.toml new file mode 100644 index 00000000..65644e6e --- /dev/null +++ b/fearless_simd_core/gen/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "fearless_simd_core_gen" +description = "Internal code generator for the Fearless SIMD Core crate." +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +publish = false + +[dependencies] diff --git a/fearless_simd_core/gen/src/data.rs b/fearless_simd_core/gen/src/data.rs new file mode 100644 index 00000000..fb5726bb --- /dev/null +++ b/fearless_simd_core/gen/src/data.rs @@ -0,0 +1,7 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +mod x86; +pub(crate) use x86::{ + X86_FEATURES, X86_LEVEL_TEMPLATE, X86_TEMPLATE, X86_V1, X86_V2, X86_V3, X86_V4, +}; diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs new file mode 100644 index 00000000..841dac60 --- /dev/null +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -0,0 +1,451 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::Feature; + +macro_rules! f { + ($(#[doc = $doc_addition: literal])* + struct $module: ident:: $struct_name: ident($display_name: literal): $feature_name: literal + [$($implicitly_enabled: literal),*] + fn $example_function_name: ident + ) => { + Feature { + struct_name: stringify!($struct_name), + feature_name: $feature_name, + directly_implicitly_enabled: &[$($implicitly_enabled),*], + extra_docs: concat!($($doc_addition, "\n",)*), + example_function_name: stringify!($example_function_name), + feature_docs_name: $display_name, + module: stringify!($module) + } + } +} + +pub(crate) const X86_TEMPLATE: &str = include_str!("../../templates/x86.rs"); + +// Data adapted from: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86 +// (specifically, at https://github.com/rust-lang/reference/blob/1d930e1d5a27e114b4d22a50b0b6cd3771b92e31/src/attributes/codegen.md#x86-or-x86_64) +// TODO: Check set against https://doc.rust-lang.org/stable/std/macro.is_x86_feature_detected.html +// In particular, we seem to be missing lahfsahf (not stable?) +pub(crate) const X86_FEATURES: &[Feature] = &[ + f!( + /// [ADX] --- Multi-Precision Add-Carry Instruction Extensions + /// + /// [ADX]: https://en.wikipedia.org/wiki/Intel_ADX + struct adx::Adx("ADX"): "adx" + [] + fn uses_adx + ), + f!( + /// [AES] --- Advanced Encryption Standard + /// + /// [AES]: https://en.wikipedia.org/wiki/AES_instruction_set + struct crypto::Aes("AES"): "aes" + ["sse2"] + fn uses_aes + ), + f!( + /// [AVX] --- Advanced Vector Extensions + /// + /// [AVX]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions + struct avx::Avx("AVX"): "avx" + ["sse4.2"] + fn uses_avx + ), + f!( + /// [AVX2] --- Advanced Vector Extensions 2 + /// + /// [AVX2]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX2 + struct avx::Avx2("AVX2"): "avx2" + ["avx"] + fn uses_avx2 + ), + f!( + /// [AVX512-BF16] --- Advanced Vector Extensions 512-bit - Bfloat16 Extensions + /// + /// [AVX512-BF16]: https://en.wikipedia.org/wiki/AVX-512#BF16 + struct avx512::Avx512bf16("AVX512-BF16"): "avx512bf16" + ["avx512bw"] + fn uses_avx512bf16 + ), + f!( + /// [AVX512-BITALG] --- Advanced Vector Extensions 512-bit - Bit Algorithms + /// + /// + /// [AVX512-BITALG]: https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG + struct avx512::Avx512bitalg("AVX512-BITALG"): "avx512bitalg" + ["avx512bw"] + fn uses_avx512bitalg + ), + f!( + /// [AVX512-BW] --- Advanced Vector Extensions 512-bit - Byte and Word Instructions + /// + /// [AVX512-BW]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI + struct avx512::Avx512bw("AVX512-BW"): "avx512bw" + ["avx512f"] + fn uses_avx512bw + ), + f!( + /// [AVX512-CD] --- Advanced Vector Extensions 512-bit - Conflict Detection Instructions + /// + /// [AVX512-CD]: https://en.wikipedia.org/wiki/AVX-512#Conflict_detection + struct avx512::Avx512cd("AVX512-CD"): "avx512cd" + ["avx512f"] + fn uses_avx512cd + ), + f!( + /// [AVX512-DQ] --- Advanced Vector Extensions 512-bit - Doubleword and Quadword Instructions + /// + /// [AVX512-DQ]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI + struct avx512::Avx512dq("AVX512-DQ"): "avx512dq" + ["avx512f"] + fn uses_avx512dq + ), + f!( + /// [AVX512-F] --- Advanced Vector Extensions 512-bit - Foundation + /// + /// [AVX512-F]: https://en.wikipedia.org/wiki/AVX-512 + struct avx512::Avx512f("AVX512-F"): "avx512f" + ["avx2", "fma", "f16c"] + fn uses_avx512f + ), + f!( + /// [AVX512-FP16] --- Advanced Vector Extensions 512-bit - Float16 Extensions + /// + /// [AVX512-FP16]: https://en.wikipedia.org/wiki/AVX-512#FP16 + struct avx512::Avx512fp16("AVX512-FP16"): "avx512fp16" + ["avx512bw"] + fn uses_avx512fp16 + ), + f!( + /// [AVX512-IFMA] --- Advanced Vector Extensions 512-bit - Integer Fused Multiply Add + /// + /// [AVX512-IFMA]: https://en.wikipedia.org/wiki/AVX-512#IFMA + struct avx512::Avx512ifma("AVX512-IFMA"): "avx512ifma" + ["avx512f"] + fn uses_avx512ifma + ), + f!( + /// [AVX512-VBMI] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions + /// + /// [AVX512-VBMI]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI + struct avx512::Avx512vbmi("AVX512-VBMI"): "avx512vbmi" + ["avx512bw"] + fn uses_avx512vbmi + ), + f!( + /// [AVX512-VBMI2] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions 2 + /// + /// [AVX512-VBMI2]: https://en.wikipedia.org/wiki/AVX-512#VBMI2 + struct avx512::Avx512vbmi2("AVX512-VBMI2"): "avx512vbmi2" + ["avx512bw"] + fn uses_avx512vbmi2 + ), + f!( + /// [AVX512-VL] --- Advanced Vector Extensions 512-bit - Vector Length Extensions + /// + /// [AVX512-VL]: https://en.wikipedia.org/wiki/AVX-512 + struct avx512::Avx512vl("AVX512-VL"): "avx512vl" + ["avx512f"] + fn uses_avx512vl + ), + f!( + /// [AVX512-VNNI] --- Advanced Vector Extensions 512-bit - Vector Neural Network Instructions + /// + /// [AVX512-VNNI]: https://en.wikipedia.org/wiki/AVX-512#VNNI + struct avx512::Avx512vnni("AVX512-VNNI"): "avx512vnni" + ["avx512f"] + fn uses_avx512vnni + ), + f!( + /// [AVX512-VP2INTERSECT] --- Advanced Vector Extensions 512-bit - Vector Pair Intersection to a Pair of Mask Registers + /// + /// [AVX512-VP2INTERSECT]: https://en.wikipedia.org/wiki/AVX-512#VP2INTERSECT + struct avx512::Avx512vp2intersect("AVX512-VP2INTERSECT"): "avx512vp2intersect" + ["avx512f"] + fn uses_avx512vp2intersect + ), + f!( + /// [AVX512-VPOPCNTDQ] --- Advanced Vector Extensions 512-bit - Vector Population Count Instruction + /// + /// [AVX512-VPOPCNTDQ]:https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG + struct avx512::Avx512vpopcntdq("AVX512-VPOPCNTDQ"): "avx512vpopcntdq" + ["avx512f"] + fn uses_avx512vpopcntdq + ), + f!( + /// [AVX-IFMA] --- Advanced Vector Extensions - Integer Fused Multiply Add + /// + /// [AVX-IFMA]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct avx::Avxifma("AVX-IFMA"): "avxifma" + ["avx2"] + fn uses_avxifma + ), + f!( + /// [AVX-NE-CONVERT] --- Advanced Vector Extensions - No-Exception Floating-Point conversion Instructions + /// + /// [AVX-NE-CONVERT]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct avx::Avxneconvert("AVX-NE-CONVERT"): "avxneconvert" + ["avx2"] + fn uses_avxneconvert + ), + f!( + /// [AVX-VNNI] --- Advanced Vector Extensions - Vector Neural Network Instructions + /// + /// [AVX-VNNI]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct avx::Avxvnni("AVX-VNNI"): "avxvnni" + ["avx2"] + fn uses_avxvnni + ), + f!( + /// [AVX-VNNI-INT16] --- Advanced Vector Extensions - Vector Neural Network Instructions with 16-bit Integers + /// + /// [AVX-VNNI-INT16]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct avx::Avxvnniint16("AVX-VNNI-INT16"): "avxvnniint16" + ["avx2"] + fn uses_avxvnniint16 + ), + f!( + /// [AVX-VNNI-INT8] --- Advanced Vector Extensions - Vector Neural Network Instructions with 8-bit Integers + /// + /// [AVX-VNNI-INT8]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct avx::Avxvnniint8("AVX-VNNI-INT8"): "avxvnniint8" + ["avx2"] + fn uses_avxvnniint8 + ), + f!( + /// [BMI1] --- Bit Manipulation Instruction Sets + /// + /// [BMI1]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets + struct v3::Bmi1(" 1"): "bmi1" + [] + fn uses_bmi1 + ), + f!( + /// [BMI2] --- Bit Manipulation Instruction Sets 2 + /// + /// [BMI2]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2 + struct v3::Bmi2("BMI2"): "bmi2" + [] + fn uses_bmi2 + ), + f!( + /// [`cmpxchg16b`] --- Compares and exchange 16 bytes (128 bits) of data atomically + /// + /// [`cmpxchg16b`]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + struct v2::Cmpxchg16b("`cmpxchg16b`"): "cmpxchg16b" + [] + fn uses_cmpxchg16b + ), + f!( + /// [F16C] --- 16-bit floating point conversion instructions + /// + /// [F16C]: https://en.wikipedia.org/wiki/F16C + struct v3::F16c("F16C"): "f16c" + ["avx"] + fn uses_f16c + ), + f!( + /// [FMA3] --- Three-operand fused multiply-add + /// + /// [FMA3]: https://en.wikipedia.org/wiki/FMA_instruction_set + struct v3::Fma("FMA3"): "fma" + ["avx"] + fn uses_fma + ), + f!( + /// [`fxsave`] and [`fxrstor`] --- Save and restore x87 FPU, MMX Technology, and SSE State + /// + /// [`fxsave`]: https://www.felixcloutier.com/x86/fxsave, + /// [`fxrstor`]: https://www.felixcloutier.com/x86/fxrstor, + struct sse::Fxsr("`fxsave + fxrstor`"): "fxsr" + [] + fn uses_fxsr + ), + f!( + /// [GFNI] --- Galois Field New Instructions + /// + /// [GFNI]: https://en.wikipedia.org/wiki/AVX-512#GFNI + struct crypto::Gfni("GFNI"): "gfni" + ["sse2"] + fn uses_gfni + ), + f!( + /// [KEYLOCKER] --- Intel Key Locker Instructions + /// + /// [KEYLOCKER]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions + struct crypto::Keylocker("KEYLOCKER"): "kl" + [] + fn uses_keylocker + ), + f!( + /// [`lzcnt`] --- Leading zeros count + /// + /// [`lzcnt`]: https://www.felixcloutier.com/x86/lzcnt + struct v3::Lzcnt("`lzcnt`"): "lzcnt" + [] + fn uses_lzcnt + ), + f!( + /// [`movbe`] --- Move data after swapping bytes + /// + /// [`movbe`]: https://www.felixcloutier.com/x86/movbe + struct v3::Movbe("`movbe`"): "movbe" + [] + fn uses_movbe + ), + f!( + /// [`pclmulqdq`] --- Packed carry-less multiplication quadword + /// + /// [`pclmulqdq`]: https://www.felixcloutier.com/x86/pclmulqdq + struct crypto::Pclmulqdq("`pclmulqdq`"): "pclmulqdq" + ["sse2"] + fn uses_pclmulqdq + ), + f!( + /// [`popcnt`] --- Count of bits set to 1 + /// + /// [`popcnt`]: https://www.felixcloutier.com/x86/popcnt + struct v2::Popcnt("`popcnt`"): "popcnt" + [] + fn uses_popcnt + ), + f!( + /// [`rdrand`] --- Read random number + /// + /// [`rdrand`]: https://en.wikipedia.org/wiki/RdRand + struct crypto::Rdrand("`rdrand`"): "rdrand" + [] + fn uses_rdrand + ), + f!( + /// [`rdseed`] --- Read random seed + /// + /// [`rdseed`]: https://en.wikipedia.org/wiki/RdRand + struct crypto::Rdseed("`rdseed`"): "rdseed" + [] + fn uses_rdseed + ), + f!( + /// [SHA] --- Secure Hash Algorithm + /// + /// [SHA]: https://en.wikipedia.org/wiki/Intel_SHA_extensions + struct crypto::Sha("SHA"): "sha" + ["sse2"] + fn uses_sha + ), + f!( + /// [SHA512] --- Secure Hash Algorithm with 512-bit digest + /// + /// [SHA512]: https://en.wikipedia.org/wiki/Intel_SHA_extensions + struct crypto::Sha512("SHA512"): "sha512" + ["avx2"] + fn uses_sha512 + ), + f!( + /// [SM3] --- ShangMi 3 Hash Algorithm + /// + /// [SM3]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions + struct crypto::Sm3("SM3"): "sm3" + ["avx"] + fn uses_sm3 + ), + f!( + /// [SM4] --- ShangMi 4 Cipher Algorithm + /// + /// [SM4]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions + struct crypto::Sm4("SM4"): "sm4" + ["avx2"] + fn uses_sm4 + ), + f!( + /// [SSE] --- Streaming SIMD Extensions + /// + /// [SSE]: https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions + struct sse::Sse("SSE"): "sse" + [] + fn uses_sse + ), + f!( + /// [SSE2] --- Streaming SIMD Extensions 2 + /// + /// [SSE2]: https://en.wikipedia.org/wiki/SSE2 + struct sse::Sse2("SSE2"): "sse2" + ["sse"] + fn uses_sse2 + ), + f!( + /// [SSE3] --- Streaming SIMD Extensions 3 + /// + /// [SSE3]: https://en.wikipedia.org/wiki/SSE3 + struct sse::Sse3("SSE3"): "sse3" + ["sse2"] + fn uses_sse3 + ), + f!( + /// [SSE4.1] --- Streaming SIMD Extensions 4.1 + /// + /// [SSE4.1]: https://en.wikipedia.org/wiki/SSE4#SSE4.1 + struct sse::Sse4_1("SSE4.1"): "sse4.1" + ["ssse3"] + fn uses_sse4 + ), + f!( + /// [SSE4.2] --- StreamingSIMDExtensions 4.2 + /// + /// [SSE4.2]: https://en.wikipedia.org/wiki/SSE4#SSE4.2 + struct sse::Sse4_2("SSE4.2"): "sse4.2" + ["sse4.1"] + fn uses_sse4 + ), + f!( + /// [SSE4a] --- StreamingSIMDExtensions 4a + /// + /// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a + struct sse::Sse4a("SSE4a"): "sse4a" + ["sse3"] + fn uses_sse4a + ), + f!( + /// [SSSE3] --- Supplemental StreamingSIMDExtensions 3 + /// + /// [SSSE3]: https://en.wikipedia.org/wiki/SSSE3 + struct sse::SupplementalSse3("SSSE3"): "ssse3" + ["sse3"] + fn uses_ssse3 + ), + f!( + /// [TBM] --- Trailing Bit Manipulation + /// + /// [TBM]: https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#TBM_(Trailing_Bit_Manipulation) + struct discontinued::Tbm("TBM"): "tbm" + [] + fn uses_tbm + ), + f!( + /// [VAES] --- Vector AES Instructions + /// + /// [VAES]: https://en.wikipedia.org/wiki/AVX-512#VAES + struct crypto::Vaes("VAES"): "vaes" + ["avx2", "aes"] + fn uses_vaes + ), + f!( + /// [VPCLMULQDQ] --- Vector Carry-less multiplication of Quadwords + /// + /// [VPCLMULQDQ]: https://en.wikipedia.org/wiki/AVX-512#VPCLMULQDQ + struct crypto::Vpclmulqdq("VPCLMULQDQ"): "vpclmulqdq" + ["avx", "pclmulqdq"] + fn uses_vpclmulqdq + ), + f!( + /// [KEYLOCKER_WIDE] --- Intel Wide Keylocker Instructions + /// + /// [KEYLOCKER_WIDE]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions + struct crypto::WideKeylocker("WIDE KEYLOCKER"): "widekl" + ["kl"] + fn uses_wide_keylocker + ), + f!( + /// [`xsave`] --- Save processor extended states + /// + /// [`xsave`]: https://www.felixcloutier.com/x86/xsave + struct xsave::Xsave("`xsave`"): "xsave" + [] + fn uses_xsave + ), + f!( + /// [`xsavec`] --- Save processor extended states with compaction + /// + /// [`xsavec`]: https://www.felixcloutier.com/x86/xsavec + struct xsave::Xsavec("`xsavec`"): "xsavec" + [] + fn uses_xsavec + ), + f!( + /// [`xsaveopt`] --- Save processor extended states optimized + /// + /// [`xsaveopt`]: https://www.felixcloutier.com/x86/xsaveopt + struct xsave::Xsaveopt("`xsaveopt`"): "xsaveopt" + [] + fn uses_xsaveopt + ), + f!( + /// [`xsaves`] --- Save processor extended states supervisor + /// + /// [`xsaves`]: https://www.felixcloutier.com/x86/xsaves + struct xsave::Xsaves("`xsaves`"): "xsaves" + [] + fn uses_xsaves + ), +]; + +// All taken from + +pub(crate) const X86_LEVEL_TEMPLATE: &str = include_str!("../../templates/x86_level.rs"); + +/// The target features required in the x86-64-v1 level. +// Rust doesn't have target features for "cmov", "cmpxchg8b", "fpu", "sce", and "mmx". +// The first four are all assumed, and the final is not implemented because +// it's practically impossible to use correctly (and there's no reason to). +pub(crate) const X86_V1: &[&str] = &["fxsr", "sse", "sse2"]; +/// The target features required in the x86-64-v1 level, in addition to those already in [`X86_V1`]. +pub(crate) const X86_V2: &[&str] = &[ + "sse3", + "ssse3", + "sse4.1", + "sse4.2", + "popcnt", + "cmpxchg16b", + // The lahfahf target feature is currently in Rust beta. + // "lahfsahf", +]; +/// The target features required in the x86-64-v3 level, excluding those already in [`X86_V2`]. +pub(crate) const X86_V3: &[&str] = &[ + "avx", "avx2", "bmi1", "bmi2", "f16c", "fma", "lzcnt", "movbe", "xsave", +]; +/// The target features required in the x86-64-v4 level, excluding those already in [`X86_V3`]. +pub(crate) const X86_V4: &[&str] = &["avx512f", "avx512bw", "avx512cd", "avx512dq", "avx512vl"]; diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs new file mode 100644 index 00000000..35d8c233 --- /dev/null +++ b/fearless_simd_core/gen/src/main.rs @@ -0,0 +1,354 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +mod data; + +use std::collections::HashSet; +use std::fmt::Write; +use std::fs; +use std::hash::RandomState; +use std::{ + cell::RefCell, + collections::HashMap, + fs::create_dir_all, + io, + path::{Path, PathBuf}, +}; + +use crate::data::X86_LEVEL_TEMPLATE; + +fn main() { + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let src_dir = manifest_dir.ancestors().nth(1).unwrap().join("src"); + { + let x86_features = normalize_features(data::X86_FEATURES); + generate_for_arch(&src_dir, "x86", data::X86_TEMPLATE, &x86_features).unwrap(); + let mut features: Vec<&'static str> = Vec::new(); + features.extend(data::X86_V1); + generate_x86_level(&src_dir, "v1", &x86_features, &features).unwrap(); + features.extend(data::X86_V2); + generate_x86_level(&src_dir, "v2", &x86_features, &features).unwrap(); + features.extend(data::X86_V3); + generate_x86_level(&src_dir, "v3", &x86_features, &features).unwrap(); + features.extend(data::X86_V4); + generate_x86_level(&src_dir, "v4", &x86_features, &features).unwrap(); + } +} + +fn generate_for_arch( + root_dir: &Path, + arch_module_name: &str, + template: &str, + features: &[NormalizedFeature], +) -> io::Result<()> { + let arch_dir = root_dir.join(arch_module_name); + for feature in features { + let mut new_docs = String::new(); + for line in feature.feature.extra_docs.lines() { + writeln!(&mut new_docs, "///{line}").unwrap(); + } + let enabled_feature_str_list = format!( + r#""{}", {}"#, + feature.feature.feature_name, + feature + .children + .iter() + .map(|it| format!(r#""{it}""#)) + .collect::>() + .join(", ") + ); + let mut from_impls = String::new(); + for child in &feature.children { + let from_feature = features + .iter() + .find(|it| it.feature.feature_name == *child) + .unwrap(); + let type_path = format!( + "crate::{arch_module_name}::{}::{}", + from_feature.feature.module, from_feature.feature.struct_name + ); + write!( + from_impls, + "\n\ +impl From for {type_path} {{ + fn from(value: FEATURE_STRUCT_NAME) -> Self {{ + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([FEATURE_STRUCT_NAME = value] => \"{{FEATURE_ID}}\", fn() -> {type_path} {{ {type_path}::new() }}) + }} +}}\n" + ).unwrap(); + } + let mut result = String::from(template); + // We replace the from impls first, as they use template variables from the rest of this. + result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace("// {AUTOGEN_COMMENT}\n", AUTOGEN_COMMENT); + result = result.replace("{FEATURE_DOCS_NAME}", feature.feature.feature_docs_name); + result = result.replace("/// {NEW_DOCS}\n", &new_docs); + result = result.replace("{FEATURE_ID}", feature.feature.feature_name); + result = result.replace( + "{EXAMPLE_FUNCTION_NAME}", + feature.feature.example_function_name, + ); + result = result.replace("FEATURE_STRUCT_NAME", feature.feature.struct_name); + result = result.replace( + r#""{ENABLED_FEATURES_STR_LIST}""#, + &enabled_feature_str_list, + ); + + let module_dir = arch_dir.join(feature.feature.module); + create_dir_all(&module_dir)?; + let mut file = module_dir.join(feature.feature.feature_name.replace(".", "_")); + file.set_extension("rs"); + fs::write(file, result)?; + } + Ok(()) +} + +/// Generate the code for an X86 microarchitecture level. +fn generate_x86_level( + root_dir: &Path, + level: &'static str, + all_features: &[NormalizedFeature], + required_features: &[&'static str], +) -> io::Result<()> { + // Precalculate the sets of features we need to support. + // Intermediate value for + let mut superset = HashSet::new(); + for feature in required_features { + superset.insert(*feature); + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + superset.extend(&normalized.children); + } + + // Every single target feature supported on this level, including those implied. + // (In all likelihood, this is the same as `required_features`, but I'd rather validate that manually) + let mut superset = superset.into_iter().collect::>(); + superset.sort(); + let mut lcd = HashSet::<_, RandomState>::from_iter(superset.iter().copied()); + // We make the assumption that features are a tree, that is, there's no case where `A->B` and `B->A`. + // However, even if that didn't hold, we at least use a consistent ordering here. + // We test from the superset to be safe; this should be equivalent to using `required_features`, though. + for feature in &superset { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + for feature in &normalized.children { + // If the feature is a child of another required feature, we know we don't need it for this version. + // We don't care whether or not it was actually removed. + lcd.remove(*feature); + } + } + // The set of features which are strictly required. + // This is used to create the target feature string, so that it can be as short as possible. + let mut lcd = lcd.into_iter().collect::>(); + lcd.sort(); + // Now that we have lcd and superset, we can preprocess what we need for the actual file. + + let level_struct_name = level.to_uppercase(); + // The target_feature(enable = "...") string. + let lcd_contents = lcd.join(","); + // The fields of the new struct. + let lcd_field_definitions = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); + format!( + "/// The contained proof that {} is available.\n\ + pub {feature}: {type_path},\n", + normalized.feature.feature_docs_name + ) + }) + .collect::(); + // The enabled FEATURES. + let superset_list = superset + .iter() + .map(|it| format!(r#""{it}""#)) + .collect::>() + .join(", "); + // First argument to `trampoline!` + let lcd_trampoline = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); + format!("{type_path} = self.{feature}") + }) + .collect::>() + .join(", "); + // The version of the struct initializer in `try_new`. + let struct_initializer_try_new = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); + // We rely on rustfmt to get the tab spacing right. + format!("\t{feature}: {type_path}::try_new()?,\n") + }) + .collect::(); + // The version of the struct initializer in `new`. + let struct_initializer_new = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); + format!("\t{feature}: {type_path}::new(),\n") + }) + .collect::(); + + let mut from_impls = String::new(); + for child in &superset { + let from_feature = all_features + .iter() + .find(|it| it.feature.feature_name == *child) + .unwrap(); + let type_path = format!("crate::x86::{level}::{}", from_feature.feature.struct_name); + write!( + from_impls, + "\n\ +impl From for {type_path} {{ + fn from(value: LEVEL_STRUCT_NAME) -> Self {{ + // This serves as a correctness check of the implicitly enabled features. + trampoline!([LEVEL_STRUCT_NAME = value] => \"{{LEVEL_FEATURE_LCD_CONTENTS}}\", fn() -> {type_path} {{ {type_path}::new() }}) + }} +}}\n" + ).unwrap(); + } + + let mut result = String::from(X86_LEVEL_TEMPLATE); + // We replace the from impls first, as they use template variables from the rest of this. + result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace("// {AUTOGEN_COMMENT}", AUTOGEN_COMMENT); + result = result.replace("LEVEL_STRUCT_NAME", &level_struct_name); + result = result.replace("{LEVEL_ID}", level); + result = result.replace("{LEVEL_FEATURE_LCD_CONTENTS}", &lcd_contents); + result = result.replace( + "/*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/", + &lcd_field_definitions, + ); + result = result.replace(r#""{LEVEL_FEATURE_SUPERSET_LIST}""#, &superset_list); + result = result.replace("{LEVEL_FEATURE_LCD_TRAMPOLINE}", &lcd_trampoline); + + result = result.replace( + "/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_TRY_NEW}*/", + &struct_initializer_try_new, + ); + result = result.replace( + "/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_NEW}*/", + &struct_initializer_new, + ); + + let arch_dir = root_dir.join("x86"); + let module_dir = arch_dir.join(level); + create_dir_all(&module_dir)?; + let output_path = module_dir.join("level.rs"); + fs::write(output_path, result)?; + Ok(()) +} + +const AUTOGEN_COMMENT: &str = "// This file is automatically generated by `fearless_simd_core_gen`.\n\ + // Its template can be found in `fearless_simd_core/gen/templates`."; + +#[derive(Debug)] +struct Feature { + /// The name of the struct to be generated. + struct_name: &'static str, + /// The Rust name for the feature, e.g. `"sse"`. + feature_name: &'static str, + /// The array of features which are implicitly enabled by this feature. + /// Note that this array does not include transitive enabled features. + directly_implicitly_enabled: &'static [&'static str], + /// Any additional docs which we want to add to the module. + extra_docs: &'static str, + /// The name of the function used in the examples. + /// Ideally, we'd make this optional, but that starts making the templating look more complicated. + example_function_name: &'static str, + /// The "display name" for the feature, used inside the docs. + feature_docs_name: &'static str, + /// The module (if any) this feature will belong to. + /// + /// (Note that imports into the module are checked to exist, but not automatically inserted). + module: &'static str, +} + +/// Implementation detail intermediate struct of `normalize_features`. +struct MaybeNormalizedFeature { + /// The actual feature. + feature: &'static Feature, + /// The fully deduplicated, sorted list of target features enabled by this feature, including with all + /// implicitly enabled features resolved. + /// + /// Note that this *excludes* the parent target feature. + // We use a RefCell here as we know there cannot be loops. + children: RefCell>>, +} + +#[derive(Debug)] +struct NormalizedFeature { + feature: &'static Feature, + children: Vec<&'static str>, +} + +fn normalize_features(features: &'static [Feature]) -> Vec { + let mut state = HashMap::new(); + for feature in features { + state.insert( + feature.feature_name, + MaybeNormalizedFeature { + feature, + children: RefCell::new(None), + }, + ); + } + fn handle_item(state: &HashMap<&str, MaybeNormalizedFeature>, item: &MaybeNormalizedFeature) { + // We borrow for the entire lifetime to avoid infinite loops. + let mut borrowed_children = item.children.borrow_mut(); + if borrowed_children.is_some() { + return; + } + let mut new_children = Vec::new(); + for child in item.feature.directly_implicitly_enabled { + new_children.push(*child); + let child = state + .get(child) + .expect("Every implicitly enabled feature should exist."); + handle_item(state, child); + new_children.extend_from_slice(child.children.borrow().as_ref().unwrap()); + } + new_children.sort(); + new_children.dedup(); + *borrowed_children = Some(new_children); + } + for feature in state.values() { + handle_item(&state, feature); + } + let mut output = Vec::new(); + for (_, feature) in state { + output.push(NormalizedFeature { + feature: feature.feature, + children: feature.children.into_inner().unwrap(), + }); + } + output.sort_by_key(|it| it.feature.feature_name); + output +} diff --git a/fearless_simd_core/gen/templates/aarch64.rs b/fearless_simd_core/gen/templates/aarch64.rs new file mode 100644 index 00000000..e8fbb89a --- /dev/null +++ b/fearless_simd_core/gen/templates/aarch64.rs @@ -0,0 +1,4 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// {AUTOGEN_COMMENT} diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs new file mode 100644 index 00000000..777dcae3 --- /dev/null +++ b/fearless_simd_core/gen/templates/x86.rs @@ -0,0 +1,95 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// {AUTOGEN_COMMENT} + +//! The {FEATURE_DOCS_NAME} target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// {NEW_DOCS} +/// +/// A token indicating that the current CPU has the `{FEATURE_ID}` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "{FEATURE_ID}")] +/// fn {EXAMPLE_FUNCTION_NAME}() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct FEATURE_STRUCT_NAME { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for FEATURE_STRUCT_NAME { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""{FEATURE_ID}" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for FEATURE_STRUCT_NAME { + const FEATURES: &[&str] = &["{ENABLED_FEATURES_STR_LIST}"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([FEATURE_STRUCT_NAME = self] => "{FEATURE_ID}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl FEATURE_STRUCT_NAME { + #[cfg(feature = "std")] + /// Create a new token if the `"{FEATURE_ID}"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("{FEATURE_ID}") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "{FEATURE_ID}")] + /// Create a new token for the "{FEATURE_ID}" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// {FEATURE_DOCS_NAME} is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "{FEATURE_ID}" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} +/*{FROM_IMPLS}*/ + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/gen/templates/x86_level.rs b/fearless_simd_core/gen/templates/x86_level.rs new file mode 100644 index 00000000..51aaa6d9 --- /dev/null +++ b/fearless_simd_core/gen/templates/x86_level.rs @@ -0,0 +1,97 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// {AUTOGEN_COMMENT} + +//! The x86-64-{LEVEL_ID} microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-{LEVEL_ID} microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "{LEVEL_FEATURE_LCD_CONTENTS}")] +/// fn uses_x86_64_{LEVEL_ID}() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct LEVEL_STRUCT_NAME { + /*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/ + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for LEVEL_STRUCT_NAME { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-{LEVEL_ID} enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for LEVEL_STRUCT_NAME { + const FEATURES: &[&str] = &["{LEVEL_FEATURE_SUPERSET_LIST}"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([{LEVEL_FEATURE_LCD_TRAMPOLINE}] => "{LEVEL_FEATURE_LCD_CONTENTS}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl LEVEL_STRUCT_NAME { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-{LEVEL_ID} target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + /*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_TRY_NEW}*/ + }) + } + + #[target_feature(enable = "{LEVEL_FEATURE_LCD_CONTENTS}")] + /// Create a new token for the x86-64-{LEVEL_ID} microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-{LEVEL_ID} is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "{LEVEL_FEATURE_LCD_CONTENTS}" target feature is available. + pub fn new() -> Self { + Self { + /*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_NEW}*/ + } + } +} + +/*{FROM_IMPLS}*/ + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs new file mode 100644 index 00000000..924bbd0b --- /dev/null +++ b/fearless_simd_core/src/lib.rs @@ -0,0 +1,351 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! An abstraction to allow safely running custom `#[target_feature]` functions on stable Rust. +//! +//! This crate introduces the [`trampoline!`] macro, which allows running code in a +//! statically validated `#[target_feature(enable="some_features")]` environment, based on +//! externally provided tokens. +//! This abstraction is designed to be combined with target features 1.1, the recent update +//! in the Rust compiler to allow calling `#[target_feature]` functions safely from within +//! other `#[target_feature]` functions. +//! As such, once you have used the [`trampoline!`] macro, you can call any intrinsic in [`core::arch`]. +//! +//! This crate also has modules which contain a token for each Rust target feature. +//! These each have a `try_new` constructor, which validates whether the corresponding +//! target feature is available, then creates a token if it is. +//! These are grouped by architecture: +//! +//! - [`x86`] contains the tokens for both the x86 and x86-64 targets. +//! It also contains a token for each x86-64 microarchitecture level, see [`x86::V1`] for details. +//! +//! +//! # Examples +//! +//! At the time of writing, it is not possible to turn scalar values into SIMD +//! vector types safely using only the standard library. +//! These examples use [bytemuck](https://crates.io/crates/bytemuck) for this. +//! +//! Note: These examples are currently pending. +//! +//! +//! Note that for `aarch64`'s neon, you will want to enable bytemuck's `aarch64_simd` feature. +//! This is also the case for WASM with `wasm_simd`, but note that this crate +//! [isn't needed on WASM][attributes.codegen.target_feature.wasm], as it is safe to +//! call `#[target_feature]` functions on that platform. +//! +//! # Crate Feature Flags +//! +//! +//! +//! # Implementation +//! +//! The tokens provided to [`trampoline!`] implement the [`TargetFeatureToken`] trait, +//! which indicates that a value of that token is only possible to construct if the set +//! of target features it specifies are enabled. +//! This means that the macro can use the existence of these token values as +//! safety proofs that calling a function with those target features is safe. +//! +//! This safety proof happens entirely in const evaluation, so if there's a mistake with the +//! proof, it will cause a compilation error. +//! The code generated by this macro is thus a function containing the provided code, marked +//! with `#[target_feature]`, and a call to this newly generated function. +//! +//! [attributes.codegen.target_feature.wasm]: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.wasm + +// LINEBENDER LINT SET - lib.rs - v4 +// See https://linebender.org/wiki/canonical-lints/ +// These lints shouldn't apply to examples or tests. +#![cfg_attr(not(test), warn(unused_crate_dependencies))] +// These lints shouldn't apply to examples. +#![warn(clippy::print_stdout, clippy::print_stderr)] +// Targeting e.g. 32-bit means structs containing usize can give false positives for 64-bit. +#![cfg_attr(target_pointer_width = "64", warn(clippy::trivially_copy_pass_by_ref))] +// END LINEBENDER LINT SET +#![cfg_attr(docsrs, feature(doc_cfg))] +#![no_std] + +#[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))] +pub mod x86; + +pub mod support; + +#[cfg(feature = "std")] +extern crate std; + +/// Token which proves that a set of target feature is available. +/// +/// Note that this trait is only meaningful when there are values of this type. +/// That is, to enable the target features in `FEATURES`, you *must* have a value +/// of this type. +/// +/// Values which implement this trait are used in the first argument to [`trampoline!`], +/// which is a safe abstraction over enabling target features. +/// +/// # Safety +/// +/// To construct a value of a type implementing this trait, you must have proven that each +/// target feature in `FEATURES` is available. +pub unsafe trait TargetFeatureToken: Copy { + /// The set of target features which the current CPU has, if + /// you have a value of this type. + const FEATURES: &[&str]; + + /// Enable the target features in `FEATURES` for a single run of `f`, and run it. + /// + /// `f` must be marked `#[inline(always)]` for this to work. + /// + /// Note that this does *not* enable the target features on the Rust side (i.e. for calling intrinsics safely). + /// To do so, you should instead use [`trampoline!`] directly - this is a convenience wrapper around `trampoline` + /// for cases where either autovectorisation is sufficient, or dispatch to simd intrinsics is handled elsewhere. + fn vectorize(self, f: impl FnOnce() -> R) -> R; +} + +/// Run an operation in a context with specific target features enabled, validated with [`TargetFeatureToken`] values. +/// +/// This is effectively a stable implementation of the "Struct Target Features" Rust feature, +/// which at the time of writing is neither in stable or nightly Rust. +/// This macro can be used to make both SIMD dispatch and explicit SIMD safe. +/// +/// # Reference +/// +/// These reference examples presume that you have the following. +/// The parts of the examples referring to each prerequisite are provided in the brackets: +/// +/// - An expression (`token`) of a type (`Token`) which implements `TargetFeatureToken` for some target features (`"f1,f2,f3"`); +/// - A function (signature `fn uses_simd(val: [f32; 4]) -> [f32; 4]`) which is safe but enables a subset +/// of those target features (annotated `#[target_feature(enable = "f1,f2")]`); +/// - Local values of types corresponding to the argument types (`a` of type `[f32; 4]`) +/// +/// ```rust,ignore +/// trampoline!(Token = token => "f1,f2,f3", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// // Or equivalently, as `uses_simd` doesn't require `f3`: +/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]); +/// ``` +/// +/// Multiple tokens are also supported by providing them in a sequence in square brackets. +/// The target feature string must be a subset of the total features made available by the tokens: +/// +/// ```rust,ignore +/// trampoline!([Token = token, Sse = my_sse] => "f1,f2,sse", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// ``` +/// +/// This is fully validated for safety, so the following example would fail to compile: +/// +/// ```rust,ignore,compile_fail +/// // ERROR: call to function `uses_simd` with `#[target_feature]` is unsafe and requires unsafe block +/// // in order for the call to be safe, the context requires the following additional target feature: f2 +/// trampoline!(Token = token => "f1", uses_simd(a: [f32; 4]) -> [f32; 4]); +/// ``` +/// +/// A more advanced syntax is available if you need to use generics. +/// That syntax is explained in comments around the macro's definition. +/// For reference, the implementation used to implement [`vectorize`](TargetFeatureToken::vectorize) for `"sse"` is: +/// +/// ```rust,ignore +/// trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) +/// ``` +/// +/// There is also support for a where clause, after the return type. +/// +/// # Motivation +/// +/// In Fearless SIMD, this macro is used in three ways primary use cases: +/// +/// 1) By end-users, to dispatch to a specialised SIMD implementation of a function using target specific +/// instructions, which will be more efficient than generic version written using the portable subset. +/// 2) To implement the portable subset of SIMD operations. +/// 3) To implement the `dispatch!` macro and `Simd::vectorize`, which allows SIMD intrinsics to +/// be correctly inlined when writing portable SIMD code. +/// +/// To expand on use case 1, when using Fearless SIMD you will often be writing functions which are +/// instantiated for multiple different SIMD levels (using generics). +/// However, for certain SIMD levels, there may be specific instructions which solve your problem more +/// efficiently than using the generic implementations (as an example, consider SHA256 hashing, which has +/// built-in instructions on several architectures). +/// However, in such generic implementations, the Rust type system doesn't know which target features are enabled, +/// so it would ordinarily require writing code to: +/// +/// - detect whether a specific target feature is supported. +/// - unsafely, enter a context where the target feature is enabled in a way which makes the type system aware of this. +/// +/// This macro provides a way to do the second safely once you have completed the first. +/// +/// # Example +/// +/// This expands upon the example in the reference, written out completely. +/// +/// ```rust,ignore +/// // Just once, acquire a token. +/// let token = Token::try_new(); +/// // Later, dispatch based on whether that token is available, potentially multiple times: +/// +/// /// Perform some computation using SIMD. +/// #[target_feature(enable = "f1,f2")] +/// fn uses_simd(val: [f32; 4]) -> [f32; 4] { +/// // ... +/// } +/// +/// let a = [1., 2., 3., 4.]; +/// let Some(token) = token else { return scalar_fallback(a) }; +/// +/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// ``` +/// +// TODO: We could write an example for each of ARM, x86, and conditionally compile it in? +/// Note that our examples are all ignored as there is no target feature which is available on every platform, +/// but we need these docs to compile for users on any platform. +/// +/// # Soundness +/// +/// This macro is designed to be sound, i.e. no input to this macro can lead to undefined behaviour +/// without using the `unsafe` keyword. +/// +/// The operation provided will only ever be immediately called once on the same thread as the macro caller, +/// so safety justifications within the operation can rely on the context of the call site of this macro. +/// The shorthand format does not allow calling unsafe functions. +#[macro_export] +macro_rules! trampoline { + // [Sse = sse] for "sse", <(u32)> fn<(T: Int)>(a: [T; 4]) -> T where (...) {...} + ( + // The token types, with an expression to get a value of that token kind. + [$($token_type: path = $token: expr),+$(,)?] + // The target feature to enable. Must be a string literal. + => $to_enable: literal, + // The generic arguments to instantiate the call to the generated function with. + // Note the inner brackets, needed because we can't write a parser for this in macros. + $(<($($generic_instantiation: tt)+)>)? + // The generic parameters to give the inner generated function. + // Brackets needed as above. + fn$(<($($generic_args: tt)*)>)? + // The arguments to the function, with provided explicit values, plus return type and where clause. + ($($arg_name: ident: $arg_type: ty = $arg_value: expr),*$(,)?) $(-> $ret: ty)? + // The where clause of the generated function. + // Note the inner brackets after `where`, needed as above. + $(where ($($where: tt)*))? + // The operation to run inside the context with the target feature enabled. + $op: block + ) => {{ + #[target_feature(enable = $to_enable)] + #[inline] + // TODO: Do we want any other attributes here? + // Soundness: We wrap the $op in a wrapping block, to ensure that any inner attributes don't apply to the function. + // This ensures that the user can't add `#![target_feature(enable = "xxx")]` to their block. + // Soundness: Either of generic_args and `$where` could be used to exit the function item early, so aren't + // inside an unsafe block. + fn trampoline_impl$(<$($generic_args)*>)?($($arg_name: $arg_type),*) $(-> $ret)? $(where $($where)*)? { $op } + + $( + // We validate that we actually have a token of each claimed type. + let _: $token_type = $token; + )+ + // We use a const item rather than a const block to ensure that the const evaluation happens eagerly, + // ensuring that we don't create functions which look valid but actually will always fail when actually codegenned. + // This does mean that you can't use tokens "generically", but it's hard to think of cases where that + // would be usable anyway. For any case where that is valid, you can always manually create the + // "subsetted" token/tokens beforehand using the `From` impls. + const _: () = { + // And that the claimed types justify enabling the enabled target features. + $crate::support::is_feature_subset($to_enable, [$(<$token_type as $crate::TargetFeatureToken>::FEATURES),+]) + // TODO: Better failure message here (i.e. at least concatting the set of requested features) + .unwrap(); + }; + + $( + // Soundness: We use `arg_value` outside of the macro body to ensure it doesn't + // accidentally gain an unsafe capability. + #[allow(clippy::redundant_locals, reason="Required for consistency/safety.")] + let $arg_name = $arg_value; + )* + // Safety: We have validated that the target features enabled in `trampoline_impl` are enabled, + // because we have values of token types which implement $crate::TargetFeatureToken + // Soundness: `$generic_args` could be used to exit the path expression early. As `<>` are + // not treated as "real" brackets by macros, this isn't practical to detect and avoid statically. + // To try and ensure that this can't turn into unsoundess, the + // `trampoline_impl::<$generic_instantiation>` is evaluated outside of an unsafe block. + // In theory, if a user could make the value of `func` be an `unsafe` fn pointer or + // item type, this would still be unsound. + // However, we haven't found a way for this to compile given the trailing `>`, + // so aren't aware of any actual unsoundess. But note that this hasn't been rigorously proven, + // and new Rust features could open this up wider. + let func = trampoline_impl$(::<$($generic_instantiation)*>)?; + unsafe { func($($arg_name),*) } + }}; + // Sse = sse => "sse", sse_do_x(a: [f32; 4], b: [f32; 4]) -> [f32; 4] + ($token_type: path = $token: expr => $to_enable: literal, $function: ident($($arg_name: ident: $arg_type: ty),*$(,)?) $(-> $ret: ty)?) => { + $crate::trampoline!( + [$token_type = $token] + => $to_enable, + $function($($arg_name: $arg_type),*) $(-> $ret)? + ) + }; + // [Sse = sse] => "sse", sse_do_x(a: [f32; 4], b: [f32; 4]) -> [f32; 4] + ([$($token_type: path = $token: expr),+$(,)?] => $to_enable: literal, $function: ident($($arg_name: ident: $arg_type: ty),*$(,)?) $(-> $ret: ty)?) => { + $crate::trampoline!( + [$($token_type = $token),+] + => $to_enable, + fn($($arg_name: $arg_type = $arg_name),*) $(-> $ret)? { $function($($arg_name),*) } + ) + }; +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(test)] +mod example_expansion { + #[cfg(target_arch = "x86")] + use core::arch::x86::{__m128, _mm_mul_ps}; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::{__m128, _mm_mul_ps}; + + use crate::x86::{self, v1::Sse}; + + #[target_feature(enable = "sse")] + fn sse_mul_f32s(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { + let a: __m128 = bytemuck::must_cast(a); + let b: __m128 = bytemuck::must_cast(b); + bytemuck::must_cast(_mm_mul_ps(a, b)) + } + + #[test] + // This is a test so that it is runnable + fn example_output() { + let Some(sse) = x86::v1::Sse::try_new() else { + panic!("Example code") + }; + let a = [10_f32, 20_f32, 30_f32, 40_f32]; + let b = [4_f32, 5_f32, 6_f32, 7_f32]; + + // Both of these example expansions, the former using the shorthand form: + let res = + trampoline!(Sse = sse => "sse", sse_mul_f32s(a: [f32; 4], b: [f32; 4]) -> [f32; 4]); + assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]); + let res = trampoline!([Sse = sse] => "sse", fn(a: [f32; 4] = a, b: [f32; 4] = b) -> [f32; 4] { sse_mul_f32s(a, b)}); + assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]); + // will expand to: + #[expect(unused_braces, reason = "Required for macro soundness.")] + // Start expansion: + let res = { + #[target_feature(enable = "sse")] + #[inline] + fn trampoline_impl(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { + { sse_mul_f32s(a, b) } + } + let _: Sse = sse; + const _: () = { + crate::support::is_feature_subset( + "sse", + [::FEATURES], + ) + .unwrap(); + }; + #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")] + let a = a; + #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")] + let b = b; + let func = trampoline_impl; + unsafe { func(a, b) } + }; + // End expansion + assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]); + } +} diff --git a/fearless_simd_core/src/support.rs b/fearless_simd_core/src/support.rs new file mode 100644 index 00000000..cf530b3a --- /dev/null +++ b/fearless_simd_core/src/support.rs @@ -0,0 +1,249 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Support for the safety checks in [`trampoline!`](crate::trampoline!). +//! +//! Methods to compute whether a each feature in a target feature string (e.g. "sse2,fma") +//! is supported by a set of target features. +//! +//! The [`trampoline`](crate::trampoline!) macro takes both a target feature string, +//! and one (or more) [`TargetFeatureToken`](crate::TargetFeatureToken). +//! It uses the functions in this module to validate that the target feature string is +//! supported by the provided tokens. +//! +//! Because evaluating whether this is safe needs to happen at compile time (for both performance +//! and predictability), the methods in this file are written as `const` functions. +//! This leads to a bit of weirdness, including treating strings as `&[u8]` internally, as that +//! actually allows slicing (i.e. reading individual bytes). As far as I know, that isn't +//! currently possibly in const contexts for strings. +//! Note that the code is still written to be UTF-8 compatible, although we believe that +//! all currently supported target features are ASCII anyway. + +/// The result of `is_feature_subset`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[must_use] +pub enum SubsetResult { + /// The required features are a subset of the permitted features. + Yes, + /// The required features are not all available. + No { + /// The feature which was found to be missing (there may be several such features). + failing: &'static str, + }, +} + +impl SubsetResult { + /// A utility method to panic if the target features aren't supported. + // TODO: How much more context would we be able to give if we inlined this? + pub const fn unwrap(self) { + match self { + Self::Yes => (), + // This is const, so we can't actually format out the failing value :( + Self::No { .. } => panic!("Tokens provided are missing a necessary target feature."), + } + } +} + +/// Determine whether the features in the target feature string `required` are a subset of the features in `permitted`. +/// See [the module level docs][self]. +/// +/// We require static lifetimes as this is primarily internal to the macro. +pub const fn is_feature_subset( + required: &'static str, + permitted: [&[&'static str]; N], +) -> SubsetResult { + let mut required_bytes = required.as_bytes(); + let mut finished = false; + 'input_feature: while !finished { + let mut comma_idx = 0; + // Find the first comma in required_bytes, or the end of the string. + while comma_idx < required_bytes.len() && required_bytes[comma_idx] != b',' { + comma_idx += 1; + } + // `comma_idx` is now the index of the comma, e.g. if the string was "sse,", idx would be 3 + // This is the feature we need to validate exists in permitted. + let (to_find, remaining_required) = required_bytes.split_at(comma_idx); + if let [comma, rest @ ..] = remaining_required { + if *comma != b',' { + panic!("Internal failure of expected behaviour."); + } else { + required_bytes = rest; + } + } else { + // Exit out of the loop after this iteration. + // Note that for input of `""`` and "sse,", we still need to search + // for the input target feature `` (i.e. the empty string), to match Rust's behaviour here. + finished = true; + } + + let mut local_permitted = permitted.as_slice(); + while let [to_test, rest @ ..] = local_permitted { + local_permitted = rest; + if str_array_contains(to_test, to_find) { + continue 'input_feature; + } + } + // We tried all of the items, and `to_find` wasn't one of them. + // Therefore, at least one of the features in the requested features wasn't supported + return SubsetResult::No { + failing: match core::str::from_utf8(to_find) { + Ok(x) => x, + Err(_) => panic!( + "We either found a comma or the end of the string, so before then should have been valid UTF-8." + ), + }, + }; + } + // We found all of the required features. + SubsetResult::Yes +} + +const fn str_array_contains(mut haystack: &[&str], needle: &[u8]) -> bool { + while let [to_test, rest @ ..] = haystack { + haystack = rest; + if byte_arrays_eq(to_test.as_bytes(), needle) { + return true; + } + } + false +} + +const fn byte_arrays_eq(lhs: &[u8], rhs: &[u8]) -> bool { + if lhs.len() != rhs.len() { + return false; + } + let mut idx = 0; + while idx < lhs.len() { + if lhs[idx] != rhs[idx] { + return false; + } + idx += 1; + } + true +} + +#[cfg(test)] +mod tests { + use super::{SubsetResult, is_feature_subset}; + + /// Test if each feature in the feature string `required` is an element in `permitted`. + /// + /// Should be equivalent to [`is_feature_subset`], but not written to be const compatible. + fn is_feature_subset_simple( + required: &'static str, + permitted: [&[&'static str]; N], + ) -> SubsetResult { + 'feature: for feature in required.split(',') { + for permitted_group in &permitted { + for permitted_feature in *permitted_group { + if feature == *permitted_feature { + continue 'feature; + } + } + } + // We tried all permitted feature, and this item wasn't present. + return SubsetResult::No { failing: feature }; + } + SubsetResult::Yes + } + + /// Expect `is_feature_subset` to succeed. + #[track_caller] + fn expect_success(required: &'static str, permitted: [&[&'static str]; N]) { + let res1 = is_feature_subset(required, permitted); + assert_eq!(res1, SubsetResult::Yes, "Const version failed."); + // Sanity check against the "trivially correct" version. + let res2 = is_feature_subset_simple(required, permitted); + assert_eq!(res2, SubsetResult::Yes, "Simpler version failed."); + } + + /// Expect `is_feature_subset` to fail (with only a single possible failure). + #[track_caller] + fn expect_failure( + required: &'static str, + permitted: [&[&'static str]; N], + failing: &'static str, + ) { + let res1 = is_feature_subset(required, permitted); + assert_eq!(res1, SubsetResult::No { failing }, "Const version failed."); + // Sanity check against the "trivially correct" version. + let res2 = is_feature_subset_simple(required, permitted); + assert_eq!( + res2, + SubsetResult::No { failing }, + "Simpler version failed." + ); + } + + /// Expect `is_feature_subset` to fail, possibly with multiple potential missing features. + #[track_caller] + fn expect_any_failure(required: &'static str, permitted: [&[&'static str]; N]) { + let res1 = is_feature_subset(required, permitted); + assert!( + matches!(res1, SubsetResult::No { .. }), + "Const version failed." + ); + // Sanity check against the "trivially correct" version. + let res2 = is_feature_subset_simple(required, permitted); + assert!( + matches!(res2, SubsetResult::No { .. }), + "Simpler version failed." + ); + } + + #[test] + fn simple_cases() { + expect_success("a,b,c", [&["a", "b", "c"]]); + expect_failure("a,b,c", [&["a", "b"]], "c"); + expect_success("c,a,b", [&["a", "b", "c"]]); + expect_failure("c,a,b", [&["a", "b"]], "c"); + expect_success("a,b", [&["a", "b", "c"]]); + expect_failure("a,b", [&["a", "c"]], "b"); + expect_success("a,b,a,a", [&["a", "b", "c"]]); + expect_success("a,b,c", [&["c"], &["b"], &["a"]]); + + // Check it correctly catches more than single item failures + expect_success("a1,a2,a3", [&["a1", "a2", "a3"]]); + expect_failure("a1,a2,a3", [&["a1", "a2"]], "a3"); + expect_success("a3,a1,a2", [&["a1", "a2", "a3"]]); + expect_failure("a3,a1,a2", [&["a1", "a2"]], "a3"); + expect_success("a1,a2", [&["a1", "a2", "a3"]]); + expect_failure("a1,a2", [&["a1", "a3"]], "a2"); + + // Check it doesn't have false positives with prefixes + expect_failure("a1,a2,a3", [&["a1", "a2", "a"]], "a3"); + expect_any_failure("a3,a1,a2", [&["a"]]); + expect_success("a1,a2", [&["a1", "a2", "a3"]]); + expect_failure("a1,a2", [&["a1", "a3"]], "a2"); + + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + } + + #[test] + fn incorrect_token() { + // The permitted list here only allows features which are the literal `a1,a2` + // This is completely impossible to pass, but it's worth checking + expect_any_failure("a1,a2", [&["a1,a2"]]); + } + + #[test] + fn empty_feature() { + expect_failure("a,b,", [&["a", "b"]], ""); + expect_failure("", [&["a", "b"]], ""); + + // We succeed if the empty target feature is allowed; any case where this is relevant will always + // be validated away by rustc anyway, as there is no target with the target feature `""`. + // As such, there's no harm in being flexible here.git + expect_success("", [&[""]]); + expect_success(",,,,,,", [&[""]]); + } + + #[test] + fn non_ascii_features() { + expect_success("café", [&["café"]]); + expect_failure("café", [&["cafe"]], "café"); + } +} diff --git a/fearless_simd_core/src/x86/adx/adx.rs b/fearless_simd_core/src/x86/adx/adx.rs new file mode 100644 index 00000000..339dbbd0 --- /dev/null +++ b/fearless_simd_core/src/x86/adx/adx.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The ADX target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [ADX] --- Multi-Precision Add-Carry Instruction Extensions +/// +/// [ADX]: https://en.wikipedia.org/wiki/Intel_ADX +/// +/// A token indicating that the current CPU has the `adx` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "adx")] +/// fn uses_adx() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Adx { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Adx { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""adx" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Adx { + const FEATURES: &[&str] = &["adx"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Adx = self] => "adx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Adx { + #[cfg(feature = "std")] + /// Create a new token if the `"adx"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("adx") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "adx")] + /// Create a new token for the "adx" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// ADX is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "adx" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/adx/mod.rs b/fearless_simd_core/src/x86/adx/mod.rs new file mode 100644 index 00000000..22a8b231 --- /dev/null +++ b/fearless_simd_core/src/x86/adx/mod.rs @@ -0,0 +1,11 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! The "adx" target feature, used for arbitrary precision integer addition. + +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod adx; +pub use adx::Adx; diff --git a/fearless_simd_core/src/x86/avx/avx.rs b/fearless_simd_core/src/x86/avx/avx.rs new file mode 100644 index 00000000..3885ebbc --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avx.rs @@ -0,0 +1,138 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX] --- Advanced Vector Extensions +/// +/// [AVX]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions +/// +/// A token indicating that the current CPU has the `avx` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx")] +/// fn uses_avx() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx { + const FEATURES: &[&str] = &["avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx = self] => "avx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx { + #[cfg(feature = "std")] + /// Create a new token if the `"avx"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx")] + /// Create a new token for the "avx" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avx2.rs b/fearless_simd_core/src/x86/avx/avx2.rs new file mode 100644 index 00000000..9cfdaab6 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avx2.rs @@ -0,0 +1,147 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX2] --- Advanced Vector Extensions 2 +/// +/// [AVX2]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX2 +/// +/// A token indicating that the current CPU has the `avx2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx2")] +/// fn uses_avx2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx2" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx2 { + const FEATURES: &[&str] = &[ + "avx2", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx2 = self] => "avx2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx2 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx2")] + /// Create a new token for the "avx2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxifma.rs b/fearless_simd_core/src/x86/avx/avxifma.rs new file mode 100644 index 00000000..870bb988 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxifma.rs @@ -0,0 +1,154 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX-IFMA target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-IFMA] --- Advanced Vector Extensions - Integer Fused Multiply Add +/// +/// [AVX-IFMA]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxifma` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxifma")] +/// fn uses_avxifma() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxifma { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxifma { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxifma" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avxifma { + const FEATURES: &[&str] = &[ + "avxifma", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avxifma = self] => "avxifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxifma { + #[cfg(feature = "std")] + /// Create a new token if the `"avxifma"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxifma") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxifma")] + /// Create a new token for the "avxifma" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-IFMA is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxifma" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxneconvert.rs b/fearless_simd_core/src/x86/avx/avxneconvert.rs new file mode 100644 index 00000000..0009d58f --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxneconvert.rs @@ -0,0 +1,162 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX-NE-CONVERT target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-NE-CONVERT] --- Advanced Vector Extensions - No-Exception Floating-Point conversion Instructions +/// +/// [AVX-NE-CONVERT]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxneconvert` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxneconvert")] +/// fn uses_avxneconvert() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxneconvert { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxneconvert { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxneconvert" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avxneconvert { + const FEATURES: &[&str] = &[ + "avxneconvert", + "avx", + "avx2", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avxneconvert = self] => "avxneconvert", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxneconvert { + #[cfg(feature = "std")] + /// Create a new token if the `"avxneconvert"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxneconvert") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxneconvert")] + /// Create a new token for the "avxneconvert" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-NE-CONVERT is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxneconvert" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxvnni.rs b/fearless_simd_core/src/x86/avx/avxvnni.rs new file mode 100644 index 00000000..e385386e --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxvnni.rs @@ -0,0 +1,154 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX-VNNI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-VNNI] --- Advanced Vector Extensions - Vector Neural Network Instructions +/// +/// [AVX-VNNI]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxvnni` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxvnni")] +/// fn uses_avxvnni() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxvnni { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxvnni { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxvnni" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avxvnni { + const FEATURES: &[&str] = &[ + "avxvnni", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avxvnni = self] => "avxvnni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxvnni { + #[cfg(feature = "std")] + /// Create a new token if the `"avxvnni"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxvnni") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxvnni")] + /// Create a new token for the "avxvnni" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-VNNI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxvnni" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxvnniint16.rs b/fearless_simd_core/src/x86/avx/avxvnniint16.rs new file mode 100644 index 00000000..e213c938 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxvnniint16.rs @@ -0,0 +1,162 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX-VNNI-INT16 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-VNNI-INT16] --- Advanced Vector Extensions - Vector Neural Network Instructions with 16-bit Integers +/// +/// [AVX-VNNI-INT16]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxvnniint16` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxvnniint16")] +/// fn uses_avxvnniint16() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxvnniint16 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxvnniint16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxvnniint16" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avxvnniint16 { + const FEATURES: &[&str] = &[ + "avxvnniint16", + "avx", + "avx2", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avxvnniint16 = self] => "avxvnniint16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxvnniint16 { + #[cfg(feature = "std")] + /// Create a new token if the `"avxvnniint16"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxvnniint16") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxvnniint16")] + /// Create a new token for the "avxvnniint16" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-VNNI-INT16 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxvnniint16" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxvnniint8.rs b/fearless_simd_core/src/x86/avx/avxvnniint8.rs new file mode 100644 index 00000000..7caa7251 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxvnniint8.rs @@ -0,0 +1,162 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX-VNNI-INT8 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-VNNI-INT8] --- Advanced Vector Extensions - Vector Neural Network Instructions with 8-bit Integers +/// +/// [AVX-VNNI-INT8]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxvnniint8` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxvnniint8")] +/// fn uses_avxvnniint8() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxvnniint8 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxvnniint8 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxvnniint8" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avxvnniint8 { + const FEATURES: &[&str] = &[ + "avxvnniint8", + "avx", + "avx2", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avxvnniint8 = self] => "avxvnniint8", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxvnniint8 { + #[cfg(feature = "std")] + /// Create a new token if the `"avxvnniint8"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxvnniint8") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxvnniint8")] + /// Create a new token for the "avxvnniint8" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-VNNI-INT8 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxvnniint8" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/mod.rs b/fearless_simd_core/src/x86/avx/mod.rs new file mode 100644 index 00000000..65fe5757 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/mod.rs @@ -0,0 +1,34 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Target features related to the Advanced Vector Extensions target features (before AVX-512). +//! +//! These are most commonly used through the [x86-64-v3](crate::x86::V3) microarchitecture level. +//! +//! These support SIMD registers of up to 256 bits. +//! For the 512 bit extension, see [`avx512`](crate::x86::avx512). + +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod avx; +pub use avx::Avx; + +mod avx2; +pub use avx2::Avx2; + +mod avxifma; +pub use avxifma::Avxifma; + +mod avxneconvert; +pub use avxneconvert::Avxneconvert; + +mod avxvnni; +pub use avxvnni::Avxvnni; + +mod avxvnniint8; +pub use avxvnniint8::Avxvnniint8; + +mod avxvnniint16; +pub use avxvnniint16::Avxvnniint16; diff --git a/fearless_simd_core/src/x86/avx512/avx512bf16.rs b/fearless_simd_core/src/x86/avx512/avx512bf16.rs new file mode 100644 index 00000000..62bbaa69 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512bf16.rs @@ -0,0 +1,194 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-BF16 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-BF16] --- Advanced Vector Extensions 512-bit - Bfloat16 Extensions +/// +/// [AVX512-BF16]: https://en.wikipedia.org/wiki/AVX-512#BF16 +/// +/// A token indicating that the current CPU has the `avx512bf16` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bf16")] +/// fn uses_avx512bf16() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512bf16 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512bf16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512bf16" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512bf16 { + const FEATURES: &[&str] = &[ + "avx512bf16", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512bf16 = self] => "avx512bf16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512bf16 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512bf16"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512bf16") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512bf16")] + /// Create a new token for the "avx512bf16" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-BF16 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bf16" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs new file mode 100644 index 00000000..226cffab --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs @@ -0,0 +1,195 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-BITALG target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-BITALG] --- Advanced Vector Extensions 512-bit - Bit Algorithms +/// +/// +/// [AVX512-BITALG]: https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG +/// +/// A token indicating that the current CPU has the `avx512bitalg` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bitalg")] +/// fn uses_avx512bitalg() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512bitalg { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512bitalg { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512bitalg" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512bitalg { + const FEATURES: &[&str] = &[ + "avx512bitalg", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512bitalg = self] => "avx512bitalg", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512bitalg { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512bitalg"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512bitalg") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512bitalg")] + /// Create a new token for the "avx512bitalg" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-BITALG is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bitalg" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512bw.rs b/fearless_simd_core/src/x86/avx512/avx512bw.rs new file mode 100644 index 00000000..b5aab6f6 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512bw.rs @@ -0,0 +1,176 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-BW target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-BW] --- Advanced Vector Extensions 512-bit - Byte and Word Instructions +/// +/// [AVX512-BW]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI +/// +/// A token indicating that the current CPU has the `avx512bw` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bw")] +/// fn uses_avx512bw() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512bw { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512bw { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512bw" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512bw { + const FEATURES: &[&str] = &[ + "avx512bw", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512bw = self] => "avx512bw", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512bw { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512bw"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512bw") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512bw")] + /// Create a new token for the "avx512bw" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-BW is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bw" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512cd.rs b/fearless_simd_core/src/x86/avx512/avx512cd.rs new file mode 100644 index 00000000..39c81d5f --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512cd.rs @@ -0,0 +1,176 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-CD target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-CD] --- Advanced Vector Extensions 512-bit - Conflict Detection Instructions +/// +/// [AVX512-CD]: https://en.wikipedia.org/wiki/AVX-512#Conflict_detection +/// +/// A token indicating that the current CPU has the `avx512cd` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512cd")] +/// fn uses_avx512cd() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512cd { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512cd { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512cd" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512cd { + const FEATURES: &[&str] = &[ + "avx512cd", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512cd = self] => "avx512cd", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512cd { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512cd"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512cd") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512cd")] + /// Create a new token for the "avx512cd" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-CD is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512cd" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512dq.rs b/fearless_simd_core/src/x86/avx512/avx512dq.rs new file mode 100644 index 00000000..abc3c32a --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512dq.rs @@ -0,0 +1,176 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-DQ target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-DQ] --- Advanced Vector Extensions 512-bit - Doubleword and Quadword Instructions +/// +/// [AVX512-DQ]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI +/// +/// A token indicating that the current CPU has the `avx512dq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512dq")] +/// fn uses_avx512dq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512dq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512dq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512dq" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512dq { + const FEATURES: &[&str] = &[ + "avx512dq", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512dq = self] => "avx512dq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512dq { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512dq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512dq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512dq")] + /// Create a new token for the "avx512dq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-DQ is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512dq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512f.rs b/fearless_simd_core/src/x86/avx512/avx512f.rs new file mode 100644 index 00000000..a25c9255 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512f.rs @@ -0,0 +1,168 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-F target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-F] --- Advanced Vector Extensions 512-bit - Foundation +/// +/// [AVX512-F]: https://en.wikipedia.org/wiki/AVX-512 +/// +/// A token indicating that the current CPU has the `avx512f` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512f")] +/// fn uses_avx512f() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512f { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512f { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512f" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512f { + const FEATURES: &[&str] = &[ + "avx512f", "avx", "avx2", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512f = self] => "avx512f", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512f { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512f"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512f") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512f")] + /// Create a new token for the "avx512f" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-F is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512f" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512fp16.rs b/fearless_simd_core/src/x86/avx512/avx512fp16.rs new file mode 100644 index 00000000..b76df903 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512fp16.rs @@ -0,0 +1,194 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-FP16 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-FP16] --- Advanced Vector Extensions 512-bit - Float16 Extensions +/// +/// [AVX512-FP16]: https://en.wikipedia.org/wiki/AVX-512#FP16 +/// +/// A token indicating that the current CPU has the `avx512fp16` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512fp16")] +/// fn uses_avx512fp16() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512fp16 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512fp16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512fp16" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512fp16 { + const FEATURES: &[&str] = &[ + "avx512fp16", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512fp16 = self] => "avx512fp16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512fp16 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512fp16"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512fp16") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512fp16")] + /// Create a new token for the "avx512fp16" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-FP16 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512fp16" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512ifma.rs b/fearless_simd_core/src/x86/avx512/avx512ifma.rs new file mode 100644 index 00000000..dd74a8a2 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512ifma.rs @@ -0,0 +1,186 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-IFMA target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-IFMA] --- Advanced Vector Extensions 512-bit - Integer Fused Multiply Add +/// +/// [AVX512-IFMA]: https://en.wikipedia.org/wiki/AVX-512#IFMA +/// +/// A token indicating that the current CPU has the `avx512ifma` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512ifma")] +/// fn uses_avx512ifma() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512ifma { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512ifma { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512ifma" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512ifma { + const FEATURES: &[&str] = &[ + "avx512ifma", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512ifma = self] => "avx512ifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512ifma { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512ifma"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512ifma") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512ifma")] + /// Create a new token for the "avx512ifma" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-IFMA is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512ifma" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs new file mode 100644 index 00000000..38eb6e99 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs @@ -0,0 +1,194 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-VBMI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VBMI] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions +/// +/// [AVX512-VBMI]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI +/// +/// A token indicating that the current CPU has the `avx512vbmi` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vbmi")] +/// fn uses_avx512vbmi() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vbmi { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vbmi { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vbmi" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512vbmi { + const FEATURES: &[&str] = &[ + "avx512vbmi", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512vbmi = self] => "avx512vbmi", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vbmi { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vbmi"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vbmi") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vbmi")] + /// Create a new token for the "avx512vbmi" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VBMI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vbmi" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs new file mode 100644 index 00000000..b172416d --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs @@ -0,0 +1,194 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-VBMI2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VBMI2] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions 2 +/// +/// [AVX512-VBMI2]: https://en.wikipedia.org/wiki/AVX-512#VBMI2 +/// +/// A token indicating that the current CPU has the `avx512vbmi2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vbmi2")] +/// fn uses_avx512vbmi2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vbmi2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vbmi2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vbmi2" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512vbmi2 { + const FEATURES: &[&str] = &[ + "avx512vbmi2", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512vbmi2 = self] => "avx512vbmi2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vbmi2 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vbmi2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vbmi2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vbmi2")] + /// Create a new token for the "avx512vbmi2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VBMI2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vbmi2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vl.rs b/fearless_simd_core/src/x86/avx512/avx512vl.rs new file mode 100644 index 00000000..983bc3fc --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vl.rs @@ -0,0 +1,176 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-VL target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VL] --- Advanced Vector Extensions 512-bit - Vector Length Extensions +/// +/// [AVX512-VL]: https://en.wikipedia.org/wiki/AVX-512 +/// +/// A token indicating that the current CPU has the `avx512vl` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vl")] +/// fn uses_avx512vl() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vl { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vl { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vl" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512vl { + const FEATURES: &[&str] = &[ + "avx512vl", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512vl = self] => "avx512vl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vl { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vl"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vl") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vl")] + /// Create a new token for the "avx512vl" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VL is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vl" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vnni.rs b/fearless_simd_core/src/x86/avx512/avx512vnni.rs new file mode 100644 index 00000000..f0037c2f --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vnni.rs @@ -0,0 +1,186 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-VNNI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VNNI] --- Advanced Vector Extensions 512-bit - Vector Neural Network Instructions +/// +/// [AVX512-VNNI]: https://en.wikipedia.org/wiki/AVX-512#VNNI +/// +/// A token indicating that the current CPU has the `avx512vnni` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vnni")] +/// fn uses_avx512vnni() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vnni { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vnni { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vnni" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512vnni { + const FEATURES: &[&str] = &[ + "avx512vnni", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512vnni = self] => "avx512vnni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vnni { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vnni"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vnni") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vnni")] + /// Create a new token for the "avx512vnni" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VNNI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vnni" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs new file mode 100644 index 00000000..5294dcbe --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs @@ -0,0 +1,186 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-VP2INTERSECT target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VP2INTERSECT] --- Advanced Vector Extensions 512-bit - Vector Pair Intersection to a Pair of Mask Registers +/// +/// [AVX512-VP2INTERSECT]: https://en.wikipedia.org/wiki/AVX-512#VP2INTERSECT +/// +/// A token indicating that the current CPU has the `avx512vp2intersect` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vp2intersect")] +/// fn uses_avx512vp2intersect() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vp2intersect { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vp2intersect { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vp2intersect" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512vp2intersect { + const FEATURES: &[&str] = &[ + "avx512vp2intersect", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512vp2intersect = self] => "avx512vp2intersect", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vp2intersect { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vp2intersect"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vp2intersect") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vp2intersect")] + /// Create a new token for the "avx512vp2intersect" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VP2INTERSECT is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vp2intersect" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs new file mode 100644 index 00000000..f16c735f --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs @@ -0,0 +1,186 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AVX512-VPOPCNTDQ target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VPOPCNTDQ] --- Advanced Vector Extensions 512-bit - Vector Population Count Instruction +/// +/// [AVX512-VPOPCNTDQ]:https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG +/// +/// A token indicating that the current CPU has the `avx512vpopcntdq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vpopcntdq")] +/// fn uses_avx512vpopcntdq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vpopcntdq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vpopcntdq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vpopcntdq" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Avx512vpopcntdq { + const FEATURES: &[&str] = &[ + "avx512vpopcntdq", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Avx512vpopcntdq = self] => "avx512vpopcntdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vpopcntdq { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vpopcntdq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vpopcntdq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vpopcntdq")] + /// Create a new token for the "avx512vpopcntdq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VPOPCNTDQ is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vpopcntdq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/mod.rs b/fearless_simd_core/src/x86/avx512/mod.rs new file mode 100644 index 00000000..5a0ca606 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/mod.rs @@ -0,0 +1,50 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Target features related to the 512-bit extensions to [AVX](crate::x86::avx). +//! +//! Many of these are part of the [x86-64-v4](crate::x86::V4) microarchitecture level. +//! +//! These support SIMD registers of up to 512 bits. + +mod avx512bf16; +pub use avx512bf16::Avx512bf16; + +mod avx512bitalg; +pub use avx512bitalg::Avx512bitalg; + +mod avx512bw; +pub use avx512bw::Avx512bw; + +mod avx512cd; +pub use avx512cd::Avx512cd; + +mod avx512dq; +pub use avx512dq::Avx512dq; + +mod avx512f; +pub use avx512f::Avx512f; + +mod avx512fp16; +pub use avx512fp16::Avx512fp16; + +mod avx512ifma; +pub use avx512ifma::Avx512ifma; + +mod avx512vbmi; +pub use avx512vbmi::Avx512vbmi; + +mod avx512vbmi2; +pub use avx512vbmi2::Avx512vbmi2; + +mod avx512vl; +pub use avx512vl::Avx512vl; + +mod avx512vnni; +pub use avx512vnni::Avx512vnni; + +mod avx512vp2intersect; +pub use avx512vp2intersect::Avx512vp2intersect; + +mod avx512vpopcntdq; +pub use avx512vpopcntdq::Avx512vpopcntdq; diff --git a/fearless_simd_core/src/x86/crypto/aes.rs b/fearless_simd_core/src/x86/crypto/aes.rs new file mode 100644 index 00000000..65a1aeaa --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/aes.rs @@ -0,0 +1,110 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The AES target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AES] --- Advanced Encryption Standard +/// +/// [AES]: https://en.wikipedia.org/wiki/AES_instruction_set +/// +/// A token indicating that the current CPU has the `aes` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "aes")] +/// fn uses_aes() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Aes { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Aes { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""aes" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Aes { + const FEATURES: &[&str] = &["aes", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Aes = self] => "aes", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Aes { + #[cfg(feature = "std")] + /// Create a new token if the `"aes"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("aes") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "aes")] + /// Create a new token for the "aes" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AES is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "aes" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Aes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Aes = value] => "aes", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Aes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Aes = value] => "aes", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/gfni.rs b/fearless_simd_core/src/x86/crypto/gfni.rs new file mode 100644 index 00000000..f8b12371 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/gfni.rs @@ -0,0 +1,110 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The GFNI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [GFNI] --- Galois Field New Instructions +/// +/// [GFNI]: https://en.wikipedia.org/wiki/AVX-512#GFNI +/// +/// A token indicating that the current CPU has the `gfni` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "gfni")] +/// fn uses_gfni() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Gfni { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Gfni { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""gfni" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Gfni { + const FEATURES: &[&str] = &["gfni", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Gfni = self] => "gfni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Gfni { + #[cfg(feature = "std")] + /// Create a new token if the `"gfni"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("gfni") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "gfni")] + /// Create a new token for the "gfni" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// GFNI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "gfni" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Gfni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Gfni = value] => "gfni", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Gfni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Gfni = value] => "gfni", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/kl.rs b/fearless_simd_core/src/x86/crypto/kl.rs new file mode 100644 index 00000000..45361b81 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/kl.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The KEYLOCKER target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [KEYLOCKER] --- Intel Key Locker Instructions +/// +/// [KEYLOCKER]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions +/// +/// A token indicating that the current CPU has the `kl` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "kl")] +/// fn uses_keylocker() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Keylocker { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Keylocker { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""kl" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Keylocker { + const FEATURES: &[&str] = &["kl"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Keylocker = self] => "kl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Keylocker { + #[cfg(feature = "std")] + /// Create a new token if the `"kl"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("kl") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "kl")] + /// Create a new token for the "kl" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// KEYLOCKER is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "kl" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/mod.rs b/fearless_simd_core/src/x86/crypto/mod.rs new file mode 100644 index 00000000..7154cf89 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/mod.rs @@ -0,0 +1,45 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Cryptogryphy related target features, including hashing, random number generation, and encryption. +//! +//! These are not generally part of the standardised microarchitecture levels. + +mod aes; +pub use aes::Aes; + +mod gfni; +pub use gfni::Gfni; + +mod kl; +pub use kl::Keylocker; + +mod pclmulqdq; +pub use pclmulqdq::Pclmulqdq; + +mod rdrand; +pub use rdrand::Rdrand; + +mod rdseed; +pub use rdseed::Rdseed; + +mod sha; +pub use sha::Sha; + +mod sha512; +pub use sha512::Sha512; + +mod sm3; +pub use sm3::Sm3; + +mod sm4; +pub use sm4::Sm4; + +mod vaes; +pub use vaes::Vaes; + +mod vpclmulqdq; +pub use vpclmulqdq::Vpclmulqdq; + +mod widekl; +pub use widekl::WideKeylocker; diff --git a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs new file mode 100644 index 00000000..31d7f60a --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs @@ -0,0 +1,110 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `pclmulqdq` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`pclmulqdq`] --- Packed carry-less multiplication quadword +/// +/// [`pclmulqdq`]: https://www.felixcloutier.com/x86/pclmulqdq +/// +/// A token indicating that the current CPU has the `pclmulqdq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "pclmulqdq")] +/// fn uses_pclmulqdq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Pclmulqdq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Pclmulqdq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""pclmulqdq" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Pclmulqdq { + const FEATURES: &[&str] = &["pclmulqdq", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Pclmulqdq = self] => "pclmulqdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Pclmulqdq { + #[cfg(feature = "std")] + /// Create a new token if the `"pclmulqdq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("pclmulqdq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "pclmulqdq")] + /// Create a new token for the "pclmulqdq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `pclmulqdq` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "pclmulqdq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Pclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Pclmulqdq = value] => "pclmulqdq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Pclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Pclmulqdq = value] => "pclmulqdq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/rdrand.rs b/fearless_simd_core/src/x86/crypto/rdrand.rs new file mode 100644 index 00000000..c37ab595 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/rdrand.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `rdrand` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`rdrand`] --- Read random number +/// +/// [`rdrand`]: https://en.wikipedia.org/wiki/RdRand +/// +/// A token indicating that the current CPU has the `rdrand` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "rdrand")] +/// fn uses_rdrand() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Rdrand { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Rdrand { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""rdrand" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Rdrand { + const FEATURES: &[&str] = &["rdrand"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Rdrand = self] => "rdrand", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Rdrand { + #[cfg(feature = "std")] + /// Create a new token if the `"rdrand"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("rdrand") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "rdrand")] + /// Create a new token for the "rdrand" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `rdrand` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "rdrand" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/rdseed.rs b/fearless_simd_core/src/x86/crypto/rdseed.rs new file mode 100644 index 00000000..2f052c0b --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/rdseed.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `rdseed` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`rdseed`] --- Read random seed +/// +/// [`rdseed`]: https://en.wikipedia.org/wiki/RdRand +/// +/// A token indicating that the current CPU has the `rdseed` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "rdseed")] +/// fn uses_rdseed() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Rdseed { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Rdseed { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""rdseed" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Rdseed { + const FEATURES: &[&str] = &["rdseed"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Rdseed = self] => "rdseed", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Rdseed { + #[cfg(feature = "std")] + /// Create a new token if the `"rdseed"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("rdseed") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "rdseed")] + /// Create a new token for the "rdseed" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `rdseed` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "rdseed" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sha.rs b/fearless_simd_core/src/x86/crypto/sha.rs new file mode 100644 index 00000000..1788683d --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sha.rs @@ -0,0 +1,110 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The SHA target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SHA] --- Secure Hash Algorithm +/// +/// [SHA]: https://en.wikipedia.org/wiki/Intel_SHA_extensions +/// +/// A token indicating that the current CPU has the `sha` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sha")] +/// fn uses_sha() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sha { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sha { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sha" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sha { + const FEATURES: &[&str] = &["sha", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sha = self] => "sha", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sha { + #[cfg(feature = "std")] + /// Create a new token if the `"sha"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sha") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sha")] + /// Create a new token for the "sha" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SHA is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sha" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sha) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha = value] => "sha", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sha) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha = value] => "sha", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sha512.rs b/fearless_simd_core/src/x86/crypto/sha512.rs new file mode 100644 index 00000000..f1116dc2 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sha512.rs @@ -0,0 +1,154 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The SHA512 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SHA512] --- Secure Hash Algorithm with 512-bit digest +/// +/// [SHA512]: https://en.wikipedia.org/wiki/Intel_SHA_extensions +/// +/// A token indicating that the current CPU has the `sha512` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sha512")] +/// fn uses_sha512() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sha512 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sha512 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sha512" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sha512 { + const FEATURES: &[&str] = &[ + "sha512", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sha512 = self] => "sha512", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sha512 { + #[cfg(feature = "std")] + /// Create a new token if the `"sha512"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sha512") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sha512")] + /// Create a new token for the "sha512" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SHA512 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sha512" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sm3.rs b/fearless_simd_core/src/x86/crypto/sm3.rs new file mode 100644 index 00000000..663f3467 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sm3.rs @@ -0,0 +1,147 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The SM3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SM3] --- ShangMi 3 Hash Algorithm +/// +/// [SM3]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions +/// +/// A token indicating that the current CPU has the `sm3` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sm3")] +/// fn uses_sm3() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sm3 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sm3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sm3" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sm3 { + const FEATURES: &[&str] = &[ + "sm3", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sm3 = self] => "sm3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sm3 { + #[cfg(feature = "std")] + /// Create a new token if the `"sm3"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sm3") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sm3")] + /// Create a new token for the "sm3" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SM3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sm3" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sm4.rs b/fearless_simd_core/src/x86/crypto/sm4.rs new file mode 100644 index 00000000..af2a6cfc --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sm4.rs @@ -0,0 +1,154 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The SM4 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SM4] --- ShangMi 4 Cipher Algorithm +/// +/// [SM4]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions +/// +/// A token indicating that the current CPU has the `sm4` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sm4")] +/// fn uses_sm4() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sm4 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sm4 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sm4" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sm4 { + const FEATURES: &[&str] = &[ + "sm4", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sm4 = self] => "sm4", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sm4 { + #[cfg(feature = "std")] + /// Create a new token if the `"sm4"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sm4") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sm4")] + /// Create a new token for the "sm4" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SM4 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sm4" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/vaes.rs b/fearless_simd_core/src/x86/crypto/vaes.rs new file mode 100644 index 00000000..7f47b744 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/vaes.rs @@ -0,0 +1,161 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The VAES target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [VAES] --- Vector AES Instructions +/// +/// [VAES]: https://en.wikipedia.org/wiki/AVX-512#VAES +/// +/// A token indicating that the current CPU has the `vaes` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "vaes")] +/// fn uses_vaes() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Vaes { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Vaes { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""vaes" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Vaes { + const FEATURES: &[&str] = &[ + "vaes", "aes", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Vaes = self] => "vaes", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Vaes { + #[cfg(feature = "std")] + /// Create a new token if the `"vaes"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("vaes") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "vaes")] + /// Create a new token for the "vaes" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// VAES is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "vaes" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::crypto::Aes { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::crypto::Aes { crate::x86::crypto::Aes::new() }) + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs new file mode 100644 index 00000000..8467fe8b --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs @@ -0,0 +1,162 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The VPCLMULQDQ target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [VPCLMULQDQ] --- Vector Carry-less multiplication of Quadwords +/// +/// [VPCLMULQDQ]: https://en.wikipedia.org/wiki/AVX-512#VPCLMULQDQ +/// +/// A token indicating that the current CPU has the `vpclmulqdq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "vpclmulqdq")] +/// fn uses_vpclmulqdq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Vpclmulqdq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Vpclmulqdq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""vpclmulqdq" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Vpclmulqdq { + const FEATURES: &[&str] = &[ + "vpclmulqdq", + "avx", + "pclmulqdq", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Vpclmulqdq = self] => "vpclmulqdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Vpclmulqdq { + #[cfg(feature = "std")] + /// Create a new token if the `"vpclmulqdq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("vpclmulqdq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "vpclmulqdq")] + /// Create a new token for the "vpclmulqdq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// VPCLMULQDQ is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "vpclmulqdq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::crypto::Pclmulqdq { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::crypto::Pclmulqdq { crate::x86::crypto::Pclmulqdq::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/widekl.rs b/fearless_simd_core/src/x86/crypto/widekl.rs new file mode 100644 index 00000000..5c74f3d9 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/widekl.rs @@ -0,0 +1,103 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The WIDE KEYLOCKER target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [KEYLOCKER_WIDE] --- Intel Wide Keylocker Instructions +/// +/// [KEYLOCKER_WIDE]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions +/// +/// A token indicating that the current CPU has the `widekl` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "widekl")] +/// fn uses_wide_keylocker() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct WideKeylocker { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for WideKeylocker { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""widekl" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for WideKeylocker { + const FEATURES: &[&str] = &["widekl", "kl"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([WideKeylocker = self] => "widekl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl WideKeylocker { + #[cfg(feature = "std")] + /// Create a new token if the `"widekl"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("widekl") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "widekl")] + /// Create a new token for the "widekl" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// WIDE KEYLOCKER is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "widekl" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::crypto::Keylocker { + fn from(value: WideKeylocker) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([WideKeylocker = value] => "widekl", fn() -> crate::x86::crypto::Keylocker { crate::x86::crypto::Keylocker::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/discontinued/mod.rs b/fearless_simd_core/src/x86/discontinued/mod.rs new file mode 100644 index 00000000..1072d9ca --- /dev/null +++ b/fearless_simd_core/src/x86/discontinued/mod.rs @@ -0,0 +1,12 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Discontinued x86-64 target features. +//! +//! That is target features which were present on some CPUs, but later CPU families from the +//! same vendor did not include them. +//! +//! For more information, see + +mod tbm; +pub use tbm::Tbm; diff --git a/fearless_simd_core/src/x86/discontinued/tbm.rs b/fearless_simd_core/src/x86/discontinued/tbm.rs new file mode 100644 index 00000000..797afca4 --- /dev/null +++ b/fearless_simd_core/src/x86/discontinued/tbm.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The TBM target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [TBM] --- Trailing Bit Manipulation +/// +/// [TBM]: https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#TBM_(Trailing_Bit_Manipulation) +/// +/// A token indicating that the current CPU has the `tbm` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "tbm")] +/// fn uses_tbm() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Tbm { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Tbm { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""tbm" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Tbm { + const FEATURES: &[&str] = &["tbm"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Tbm = self] => "tbm", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Tbm { + #[cfg(feature = "std")] + /// Create a new token if the `"tbm"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("tbm") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "tbm")] + /// Create a new token for the "tbm" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// TBM is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "tbm" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs new file mode 100644 index 00000000..929f9c8c --- /dev/null +++ b/fearless_simd_core/src/x86/mod.rs @@ -0,0 +1,48 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Target feature tokens for the x86 and x86-64 CPU families. +//! +//! The general computation [microarchitecture level]s each have a level in this module. +//! These levels are useful for most users of this crate, as they provide useful categories +//! of supported instructions. +//! +//! - [`V1`] for x86-64-v1. +//! - [`V2`] for x86-64-v2. +//! - [`V3`] for x86-64-v3. +//! - [`V4`] for x86-64-v4. +//! +//! We don't yet provide a way to select the best of these for the current CPU, +//! but that is planned. +//! +//! Tokens for individual target features, including those not associated with these levels, +//! can be found in the modules under this feature. +//! These are less likely to be directly useful for most users, but are provided for use +//! cases which require them (probably especially those under [`crypto`]). +//! +//! Both the x86 and x86-64 CPU families are supported in this module as their code is entirely identical, +//! including using the same [`std::is_x86_feature_detected`] macro. +//! Note that this is not the case for `std::arch`; for example, [`core::arch::x86_64::_mm_crc32_u64`] is +//! only available on x86-64. +//! +//! Documentation for features is adapted from the Rust reference. +//! +//! [microarchitecture level]: https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels + +pub mod adx; +pub mod avx; +pub mod avx512; +pub mod crypto; +pub mod discontinued; +pub mod sse; +pub mod xsave; + +pub mod v1; +pub mod v2; +pub mod v3; +pub mod v4; + +pub use v1::V1; +pub use v2::V2; +pub use v3::V3; +pub use v4::V4; diff --git a/fearless_simd_core/src/x86/sse/fxsr.rs b/fearless_simd_core/src/x86/sse/fxsr.rs new file mode 100644 index 00000000..50fa9a79 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/fxsr.rs @@ -0,0 +1,97 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `fxsave + fxrstor` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`fxsave`] and [`fxrstor`] --- Save and restore x87 FPU, MMX Technology, and SSE State +/// +/// [`fxsave`]: https://www.felixcloutier.com/x86/fxsave, +/// [`fxrstor`]: https://www.felixcloutier.com/x86/fxrstor, +/// +/// A token indicating that the current CPU has the `fxsr` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "fxsr")] +/// fn uses_fxsr() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Fxsr { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Fxsr { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""fxsr" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Fxsr { + const FEATURES: &[&str] = &["fxsr"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Fxsr = self] => "fxsr", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Fxsr { + #[cfg(feature = "std")] + /// Create a new token if the `"fxsr"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("fxsr") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "fxsr")] + /// Create a new token for the "fxsr" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `fxsave + fxrstor` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "fxsr" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/mod.rs b/fearless_simd_core/src/x86/sse/mod.rs new file mode 100644 index 00000000..d3d6b5e5 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/mod.rs @@ -0,0 +1,39 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Target features related to Streaming SIMD Extensions. +//! +//! These are the predecessors to the [AVX](crate::x86::avx) instructions. +//! +//! These are most commonly used through the [x86-64-v2](crate::x86::V2) microarchitecture level. +//! Some of these features are also included in [x86-64-v1](crate::x86::V1). +//! +//! These support SIMD registers of up to 128 bits. + +mod fxsr; +pub use fxsr::Fxsr; + +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod sse; +pub use sse::Sse; + +mod sse2; +pub use sse2::Sse2; + +mod sse3; +pub use sse3::Sse3; + +mod ssse3; +pub use ssse3::SupplementalSse3; + +mod sse4a; +pub use sse4a::Sse4a; + +mod sse4_1; +pub use sse4_1::Sse4_1; + +mod sse4_2; +pub use sse4_2::Sse4_2; diff --git a/fearless_simd_core/src/x86/sse/sse.rs b/fearless_simd_core/src/x86/sse/sse.rs new file mode 100644 index 00000000..5937ece6 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The SSE target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE] --- Streaming SIMD Extensions +/// +/// [SSE]: https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions +/// +/// A token indicating that the current CPU has the `sse` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse")] +/// fn uses_sse() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sse { + const FEATURES: &[&str] = &["sse"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse { + #[cfg(feature = "std")] + /// Create a new token if the `"sse"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse")] + /// Create a new token for the "sse" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse2.rs b/fearless_simd_core/src/x86/sse/sse2.rs new file mode 100644 index 00000000..1d151568 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse2.rs @@ -0,0 +1,103 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The SSE2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE2] --- Streaming SIMD Extensions 2 +/// +/// [SSE2]: https://en.wikipedia.org/wiki/SSE2 +/// +/// A token indicating that the current CPU has the `sse2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse2")] +/// fn uses_sse2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse2" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sse2 { + const FEATURES: &[&str] = &["sse2", "sse"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sse2 = self] => "sse2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse2 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse2")] + /// Create a new token for the "sse2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse2 = value] => "sse2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse3.rs b/fearless_simd_core/src/x86/sse/sse3.rs new file mode 100644 index 00000000..c501bc3a --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse3.rs @@ -0,0 +1,110 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The SSE3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE3] --- Streaming SIMD Extensions 3 +/// +/// [SSE3]: https://en.wikipedia.org/wiki/SSE3 +/// +/// A token indicating that the current CPU has the `sse3` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse3")] +/// fn uses_sse3() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse3 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse3" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sse3 { + const FEATURES: &[&str] = &["sse3", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sse3 = self] => "sse3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse3 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse3"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse3") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse3")] + /// Create a new token for the "sse3" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse3" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse3 = value] => "sse3", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse3 = value] => "sse3", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse4_1.rs b/fearless_simd_core/src/x86/sse/sse4_1.rs new file mode 100644 index 00000000..451a45d3 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse4_1.rs @@ -0,0 +1,124 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The SSE4.1 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE4.1] --- Streaming SIMD Extensions 4.1 +/// +/// [SSE4.1]: https://en.wikipedia.org/wiki/SSE4#SSE4.1 +/// +/// A token indicating that the current CPU has the `sse4.1` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse4.1")] +/// fn uses_sse4() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse4_1 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse4_1 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse4.1" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sse4_1 { + const FEATURES: &[&str] = &["sse4.1", "sse", "sse2", "sse3", "ssse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sse4_1 = self] => "sse4.1", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse4_1 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse4.1"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse4.1") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse4.1")] + /// Create a new token for the "sse4.1" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE4.1 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse4.1" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse4_2.rs b/fearless_simd_core/src/x86/sse/sse4_2.rs new file mode 100644 index 00000000..feba99a8 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse4_2.rs @@ -0,0 +1,131 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The SSE4.2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE4.2] --- StreamingSIMDExtensions 4.2 +/// +/// [SSE4.2]: https://en.wikipedia.org/wiki/SSE4#SSE4.2 +/// +/// A token indicating that the current CPU has the `sse4.2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse4.2")] +/// fn uses_sse4() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse4_2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse4_2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse4.2" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sse4_2 { + const FEATURES: &[&str] = &["sse4.2", "sse", "sse2", "sse3", "sse4.1", "ssse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sse4_2 = self] => "sse4.2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse4_2 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse4.2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse4.2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse4.2")] + /// Create a new token for the "sse4.2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE4.2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse4.2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse4a.rs b/fearless_simd_core/src/x86/sse/sse4a.rs new file mode 100644 index 00000000..8c8abc80 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse4a.rs @@ -0,0 +1,117 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The SSE4a target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE4a] --- StreamingSIMDExtensions 4a +/// +/// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a +/// +/// A token indicating that the current CPU has the `sse4a` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse4a")] +/// fn uses_sse4a() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse4a { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse4a { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse4a" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sse4a { + const FEATURES: &[&str] = &["sse4a", "sse", "sse2", "sse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sse4a = self] => "sse4a", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse4a { + #[cfg(feature = "std")] + /// Create a new token if the `"sse4a"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse4a") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse4a")] + /// Create a new token for the "sse4a" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE4a is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse4a" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse4a) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4a = value] => "sse4a", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse4a) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4a = value] => "sse4a", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sse4a) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4a = value] => "sse4a", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/ssse3.rs b/fearless_simd_core/src/x86/sse/ssse3.rs new file mode 100644 index 00000000..22b35582 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/ssse3.rs @@ -0,0 +1,117 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The SSSE3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSSE3] --- Supplemental StreamingSIMDExtensions 3 +/// +/// [SSSE3]: https://en.wikipedia.org/wiki/SSSE3 +/// +/// A token indicating that the current CPU has the `ssse3` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "ssse3")] +/// fn uses_ssse3() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct SupplementalSse3 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for SupplementalSse3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""ssse3" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for SupplementalSse3 { + const FEATURES: &[&str] = &["ssse3", "sse", "sse2", "sse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([SupplementalSse3 = self] => "ssse3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl SupplementalSse3 { + #[cfg(feature = "std")] + /// Create a new token if the `"ssse3"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("ssse3") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "ssse3")] + /// Create a new token for the "ssse3" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSSE3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "ssse3" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: SupplementalSse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([SupplementalSse3 = value] => "ssse3", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: SupplementalSse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([SupplementalSse3 = value] => "ssse3", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: SupplementalSse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([SupplementalSse3 = value] => "ssse3", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v1/level.rs b/fearless_simd_core/src/x86/v1/level.rs new file mode 100644 index 00000000..e431a73b --- /dev/null +++ b/fearless_simd_core/src/x86/v1/level.rs @@ -0,0 +1,122 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v1 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v1 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "fxsr,sse2")] +/// fn uses_x86_64_v1() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V1 { + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v1::Fxsr, + /// The contained proof that SSE2 is available. + pub sse2: crate::x86::v1::Sse2, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V1 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v1 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V1 { + const FEATURES: &[&str] = &["fxsr", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v1::Fxsr = self.fxsr, crate::x86::v1::Sse2 = self.sse2] => "fxsr,sse2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V1 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v1 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + fxsr: crate::x86::v1::Fxsr::try_new()?, + sse2: crate::x86::v1::Sse2::try_new()?, + }) + } + + #[target_feature(enable = "fxsr,sse2")] + /// Create a new token for the x86-64-v1 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v1 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "fxsr,sse2" target feature is available. + pub fn new() -> Self { + Self { + fxsr: crate::x86::v1::Fxsr::new(), + sse2: crate::x86::v1::Sse2::new(), + } + } +} + +impl From for crate::x86::v1::Fxsr { + fn from(value: V1) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V1 = value] => "fxsr,sse2", fn() -> crate::x86::v1::Fxsr { crate::x86::v1::Fxsr::new() }) + } +} + +impl From for crate::x86::v1::Sse { + fn from(value: V1) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V1 = value] => "fxsr,sse2", fn() -> crate::x86::v1::Sse { crate::x86::v1::Sse::new() }) + } +} + +impl From for crate::x86::v1::Sse2 { + fn from(value: V1) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V1 = value] => "fxsr,sse2", fn() -> crate::x86::v1::Sse2 { crate::x86::v1::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v1/mod.rs b/fearless_simd_core/src/x86/v1/mod.rs new file mode 100644 index 00000000..58d5dcf8 --- /dev/null +++ b/fearless_simd_core/src/x86/v1/mod.rs @@ -0,0 +1,18 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Target features enabled in the `x86-64-v1` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This can usually be treated as the baseline for x86-64 support; all of the target features in this module are enabled by +//! default on Rust's x86-64 targets (such as `x86_64-unknown-linux-gnu`). +//! +//! This module also contains [`V1`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. + +pub use crate::x86::sse::Fxsr; +pub use crate::x86::sse::Sse; +pub use crate::x86::sse::Sse2; + +mod level; +pub use level::V1; diff --git a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs new file mode 100644 index 00000000..1fd68ceb --- /dev/null +++ b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `cmpxchg16b` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`cmpxchg16b`] --- Compares and exchange 16 bytes (128 bits) of data atomically +/// +/// [`cmpxchg16b`]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b +/// +/// A token indicating that the current CPU has the `cmpxchg16b` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "cmpxchg16b")] +/// fn uses_cmpxchg16b() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Cmpxchg16b { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Cmpxchg16b { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""cmpxchg16b" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Cmpxchg16b { + const FEATURES: &[&str] = &["cmpxchg16b"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Cmpxchg16b = self] => "cmpxchg16b", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Cmpxchg16b { + #[cfg(feature = "std")] + /// Create a new token if the `"cmpxchg16b"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("cmpxchg16b") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "cmpxchg16b")] + /// Create a new token for the "cmpxchg16b" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `cmpxchg16b` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "cmpxchg16b" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v2/level.rs b/fearless_simd_core/src/x86/v2/level.rs new file mode 100644 index 00000000..f3455c4e --- /dev/null +++ b/fearless_simd_core/src/x86/v2/level.rs @@ -0,0 +1,182 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v2 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v2 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "cmpxchg16b,fxsr,popcnt,sse4.2")] +/// fn uses_x86_64_v2() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V2 { + /// The contained proof that `cmpxchg16b` is available. + pub cmpxchg16b: crate::x86::v2::Cmpxchg16b, + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v2::Fxsr, + /// The contained proof that `popcnt` is available. + pub popcnt: crate::x86::v2::Popcnt, + /// The contained proof that SSE4.2 is available. + pub sse4_2: crate::x86::v2::Sse4_2, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v2 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V2 { + const FEATURES: &[&str] = &[ + "cmpxchg16b", + "fxsr", + "popcnt", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v2::Cmpxchg16b = self.cmpxchg16b, crate::x86::v2::Fxsr = self.fxsr, crate::x86::v2::Popcnt = self.popcnt, crate::x86::v2::Sse4_2 = self.sse4_2] => "cmpxchg16b,fxsr,popcnt,sse4.2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V2 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v2 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + cmpxchg16b: crate::x86::v2::Cmpxchg16b::try_new()?, + fxsr: crate::x86::v2::Fxsr::try_new()?, + popcnt: crate::x86::v2::Popcnt::try_new()?, + sse4_2: crate::x86::v2::Sse4_2::try_new()?, + }) + } + + #[target_feature(enable = "cmpxchg16b,fxsr,popcnt,sse4.2")] + /// Create a new token for the x86-64-v2 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "cmpxchg16b,fxsr,popcnt,sse4.2" target feature is available. + pub fn new() -> Self { + Self { + cmpxchg16b: crate::x86::v2::Cmpxchg16b::new(), + fxsr: crate::x86::v2::Fxsr::new(), + popcnt: crate::x86::v2::Popcnt::new(), + sse4_2: crate::x86::v2::Sse4_2::new(), + } + } +} + +impl From for crate::x86::v2::Cmpxchg16b { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Cmpxchg16b { crate::x86::v2::Cmpxchg16b::new() }) + } +} + +impl From for crate::x86::v2::Fxsr { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Fxsr { crate::x86::v2::Fxsr::new() }) + } +} + +impl From for crate::x86::v2::Popcnt { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Popcnt { crate::x86::v2::Popcnt::new() }) + } +} + +impl From for crate::x86::v2::Sse { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse { crate::x86::v2::Sse::new() }) + } +} + +impl From for crate::x86::v2::Sse2 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse2 { crate::x86::v2::Sse2::new() }) + } +} + +impl From for crate::x86::v2::Sse3 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse3 { crate::x86::v2::Sse3::new() }) + } +} + +impl From for crate::x86::v2::Sse4_1 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse4_1 { crate::x86::v2::Sse4_1::new() }) + } +} + +impl From for crate::x86::v2::Sse4_2 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse4_2 { crate::x86::v2::Sse4_2::new() }) + } +} + +impl From for crate::x86::v2::SupplementalSse3 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::SupplementalSse3 { crate::x86::v2::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v2/mod.rs b/fearless_simd_core/src/x86/v2/mod.rs new file mode 100644 index 00000000..a5032b1b --- /dev/null +++ b/fearless_simd_core/src/x86/v2/mod.rs @@ -0,0 +1,27 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Target features enabled in the `x86-64-v2` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This module also contains [`V2`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. +//! +//! This level also implies that `x86-64-v1` is available. + +pub use crate::x86::sse::Sse3; +pub use crate::x86::sse::Sse4_1; +pub use crate::x86::sse::Sse4_2; +pub use crate::x86::sse::SupplementalSse3; +pub use crate::x86::v1::Fxsr; +pub use crate::x86::v1::Sse; +pub use crate::x86::v1::Sse2; + +mod cmpxchg16b; +pub use cmpxchg16b::Cmpxchg16b; + +mod popcnt; +pub use popcnt::Popcnt; + +mod level; +pub use level::V2; diff --git a/fearless_simd_core/src/x86/v2/popcnt.rs b/fearless_simd_core/src/x86/v2/popcnt.rs new file mode 100644 index 00000000..29066049 --- /dev/null +++ b/fearless_simd_core/src/x86/v2/popcnt.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `popcnt` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`popcnt`] --- Count of bits set to 1 +/// +/// [`popcnt`]: https://www.felixcloutier.com/x86/popcnt +/// +/// A token indicating that the current CPU has the `popcnt` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "popcnt")] +/// fn uses_popcnt() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Popcnt { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Popcnt { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""popcnt" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Popcnt { + const FEATURES: &[&str] = &["popcnt"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Popcnt = self] => "popcnt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Popcnt { + #[cfg(feature = "std")] + /// Create a new token if the `"popcnt"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("popcnt") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "popcnt")] + /// Create a new token for the "popcnt" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `popcnt` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "popcnt" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/bmi1.rs b/fearless_simd_core/src/x86/v3/bmi1.rs new file mode 100644 index 00000000..875a1002 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/bmi1.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The 1 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [BMI1] --- Bit Manipulation Instruction Sets +/// +/// [BMI1]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets +/// +/// A token indicating that the current CPU has the `bmi1` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "bmi1")] +/// fn uses_bmi1() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Bmi1 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Bmi1 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""bmi1" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Bmi1 { + const FEATURES: &[&str] = &["bmi1"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Bmi1 = self] => "bmi1", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Bmi1 { + #[cfg(feature = "std")] + /// Create a new token if the `"bmi1"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("bmi1") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "bmi1")] + /// Create a new token for the "bmi1" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// 1 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "bmi1" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/bmi2.rs b/fearless_simd_core/src/x86/v3/bmi2.rs new file mode 100644 index 00000000..b83d5d44 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/bmi2.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The BMI2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [BMI2] --- Bit Manipulation Instruction Sets 2 +/// +/// [BMI2]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2 +/// +/// A token indicating that the current CPU has the `bmi2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "bmi2")] +/// fn uses_bmi2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Bmi2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Bmi2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""bmi2" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Bmi2 { + const FEATURES: &[&str] = &["bmi2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Bmi2 = self] => "bmi2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Bmi2 { + #[cfg(feature = "std")] + /// Create a new token if the `"bmi2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("bmi2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "bmi2")] + /// Create a new token for the "bmi2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// BMI2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "bmi2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/f16c.rs b/fearless_simd_core/src/x86/v3/f16c.rs new file mode 100644 index 00000000..1733f5ab --- /dev/null +++ b/fearless_simd_core/src/x86/v3/f16c.rs @@ -0,0 +1,147 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The F16C target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [F16C] --- 16-bit floating point conversion instructions +/// +/// [F16C]: https://en.wikipedia.org/wiki/F16C +/// +/// A token indicating that the current CPU has the `f16c` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "f16c")] +/// fn uses_f16c() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct F16c { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for F16c { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""f16c" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for F16c { + const FEATURES: &[&str] = &[ + "f16c", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([F16c = self] => "f16c", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl F16c { + #[cfg(feature = "std")] + /// Create a new token if the `"f16c"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("f16c") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "f16c")] + /// Create a new token for the "f16c" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// F16C is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "f16c" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/fma.rs b/fearless_simd_core/src/x86/v3/fma.rs new file mode 100644 index 00000000..09479f33 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/fma.rs @@ -0,0 +1,147 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The FMA3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [FMA3] --- Three-operand fused multiply-add +/// +/// [FMA3]: https://en.wikipedia.org/wiki/FMA_instruction_set +/// +/// A token indicating that the current CPU has the `fma` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "fma")] +/// fn uses_fma() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Fma { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Fma { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""fma" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Fma { + const FEATURES: &[&str] = &[ + "fma", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Fma = self] => "fma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Fma { + #[cfg(feature = "std")] + /// Create a new token if the `"fma"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("fma") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "fma")] + /// Create a new token for the "fma" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// FMA3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "fma" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/level.rs b/fearless_simd_core/src/x86/v3/level.rs new file mode 100644 index 00000000..c5d2f8c7 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/level.rs @@ -0,0 +1,282 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v3 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v3 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave")] +/// fn uses_x86_64_v3() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V3 { + /// The contained proof that AVX2 is available. + pub avx2: crate::x86::v3::Avx2, + /// The contained proof that 1 is available. + pub bmi1: crate::x86::v3::Bmi1, + /// The contained proof that BMI2 is available. + pub bmi2: crate::x86::v3::Bmi2, + /// The contained proof that `cmpxchg16b` is available. + pub cmpxchg16b: crate::x86::v3::Cmpxchg16b, + /// The contained proof that F16C is available. + pub f16c: crate::x86::v3::F16c, + /// The contained proof that FMA3 is available. + pub fma: crate::x86::v3::Fma, + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v3::Fxsr, + /// The contained proof that `lzcnt` is available. + pub lzcnt: crate::x86::v3::Lzcnt, + /// The contained proof that `movbe` is available. + pub movbe: crate::x86::v3::Movbe, + /// The contained proof that `popcnt` is available. + pub popcnt: crate::x86::v3::Popcnt, + /// The contained proof that `xsave` is available. + pub xsave: crate::x86::v3::Xsave, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v3 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V3 { + const FEATURES: &[&str] = &[ + "avx", + "avx2", + "bmi1", + "bmi2", + "cmpxchg16b", + "f16c", + "fma", + "fxsr", + "lzcnt", + "movbe", + "popcnt", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + "xsave", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v3::Avx2 = self.avx2, crate::x86::v3::Bmi1 = self.bmi1, crate::x86::v3::Bmi2 = self.bmi2, crate::x86::v3::Cmpxchg16b = self.cmpxchg16b, crate::x86::v3::F16c = self.f16c, crate::x86::v3::Fma = self.fma, crate::x86::v3::Fxsr = self.fxsr, crate::x86::v3::Lzcnt = self.lzcnt, crate::x86::v3::Movbe = self.movbe, crate::x86::v3::Popcnt = self.popcnt, crate::x86::v3::Xsave = self.xsave] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V3 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v3 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + avx2: crate::x86::v3::Avx2::try_new()?, + bmi1: crate::x86::v3::Bmi1::try_new()?, + bmi2: crate::x86::v3::Bmi2::try_new()?, + cmpxchg16b: crate::x86::v3::Cmpxchg16b::try_new()?, + f16c: crate::x86::v3::F16c::try_new()?, + fma: crate::x86::v3::Fma::try_new()?, + fxsr: crate::x86::v3::Fxsr::try_new()?, + lzcnt: crate::x86::v3::Lzcnt::try_new()?, + movbe: crate::x86::v3::Movbe::try_new()?, + popcnt: crate::x86::v3::Popcnt::try_new()?, + xsave: crate::x86::v3::Xsave::try_new()?, + }) + } + + #[target_feature(enable = "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave")] + /// Create a new token for the x86-64-v3 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave" target feature is available. + pub fn new() -> Self { + Self { + avx2: crate::x86::v3::Avx2::new(), + bmi1: crate::x86::v3::Bmi1::new(), + bmi2: crate::x86::v3::Bmi2::new(), + cmpxchg16b: crate::x86::v3::Cmpxchg16b::new(), + f16c: crate::x86::v3::F16c::new(), + fma: crate::x86::v3::Fma::new(), + fxsr: crate::x86::v3::Fxsr::new(), + lzcnt: crate::x86::v3::Lzcnt::new(), + movbe: crate::x86::v3::Movbe::new(), + popcnt: crate::x86::v3::Popcnt::new(), + xsave: crate::x86::v3::Xsave::new(), + } + } +} + +impl From for crate::x86::v3::Avx { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Avx { crate::x86::v3::Avx::new() }) + } +} + +impl From for crate::x86::v3::Avx2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Avx2 { crate::x86::v3::Avx2::new() }) + } +} + +impl From for crate::x86::v3::Bmi1 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Bmi1 { crate::x86::v3::Bmi1::new() }) + } +} + +impl From for crate::x86::v3::Bmi2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Bmi2 { crate::x86::v3::Bmi2::new() }) + } +} + +impl From for crate::x86::v3::Cmpxchg16b { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Cmpxchg16b { crate::x86::v3::Cmpxchg16b::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::v3::Fxsr { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Fxsr { crate::x86::v3::Fxsr::new() }) + } +} + +impl From for crate::x86::v3::Lzcnt { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Lzcnt { crate::x86::v3::Lzcnt::new() }) + } +} + +impl From for crate::x86::v3::Movbe { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Movbe { crate::x86::v3::Movbe::new() }) + } +} + +impl From for crate::x86::v3::Popcnt { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Popcnt { crate::x86::v3::Popcnt::new() }) + } +} + +impl From for crate::x86::v3::Sse { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse { crate::x86::v3::Sse::new() }) + } +} + +impl From for crate::x86::v3::Sse2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse2 { crate::x86::v3::Sse2::new() }) + } +} + +impl From for crate::x86::v3::Sse3 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse3 { crate::x86::v3::Sse3::new() }) + } +} + +impl From for crate::x86::v3::Sse4_1 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse4_1 { crate::x86::v3::Sse4_1::new() }) + } +} + +impl From for crate::x86::v3::Sse4_2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse4_2 { crate::x86::v3::Sse4_2::new() }) + } +} + +impl From for crate::x86::v3::SupplementalSse3 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::SupplementalSse3 { crate::x86::v3::SupplementalSse3::new() }) + } +} + +impl From for crate::x86::v3::Xsave { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Xsave { crate::x86::v3::Xsave::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/lzcnt.rs b/fearless_simd_core/src/x86/v3/lzcnt.rs new file mode 100644 index 00000000..f81f8df4 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/lzcnt.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `lzcnt` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`lzcnt`] --- Leading zeros count +/// +/// [`lzcnt`]: https://www.felixcloutier.com/x86/lzcnt +/// +/// A token indicating that the current CPU has the `lzcnt` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "lzcnt")] +/// fn uses_lzcnt() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Lzcnt { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Lzcnt { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""lzcnt" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Lzcnt { + const FEATURES: &[&str] = &["lzcnt"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Lzcnt = self] => "lzcnt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Lzcnt { + #[cfg(feature = "std")] + /// Create a new token if the `"lzcnt"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("lzcnt") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "lzcnt")] + /// Create a new token for the "lzcnt" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `lzcnt` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "lzcnt" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/mod.rs b/fearless_simd_core/src/x86/v3/mod.rs new file mode 100644 index 00000000..c67583a9 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/mod.rs @@ -0,0 +1,45 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Target features enabled in the `x86-64-v3` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This module also contains [`V3`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. +//! +//! This level also implies that `x86-64-v2` is available. + +pub use crate::x86::v1::Fxsr; +pub use crate::x86::v1::Sse; +pub use crate::x86::v1::Sse2; +pub use crate::x86::v2::Cmpxchg16b; +pub use crate::x86::v2::Popcnt; +pub use crate::x86::v2::Sse3; +pub use crate::x86::v2::Sse4_1; +pub use crate::x86::v2::Sse4_2; +pub use crate::x86::v2::SupplementalSse3; + +pub use crate::x86::avx::Avx; +pub use crate::x86::avx::Avx2; +pub use crate::x86::xsave::Xsave; + +mod bmi1; +pub use bmi1::Bmi1; + +mod bmi2; +pub use bmi2::Bmi2; + +mod f16c; +pub use f16c::F16c; + +mod fma; +pub use fma::Fma; + +mod lzcnt; +pub use lzcnt::Lzcnt; + +mod movbe; +pub use movbe::Movbe; + +mod level; +pub use level::V3; diff --git a/fearless_simd_core/src/x86/v3/movbe.rs b/fearless_simd_core/src/x86/v3/movbe.rs new file mode 100644 index 00000000..37df1e1f --- /dev/null +++ b/fearless_simd_core/src/x86/v3/movbe.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `movbe` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`movbe`] --- Move data after swapping bytes +/// +/// [`movbe`]: https://www.felixcloutier.com/x86/movbe +/// +/// A token indicating that the current CPU has the `movbe` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "movbe")] +/// fn uses_movbe() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Movbe { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Movbe { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""movbe" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Movbe { + const FEATURES: &[&str] = &["movbe"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Movbe = self] => "movbe", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Movbe { + #[cfg(feature = "std")] + /// Create a new token if the `"movbe"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("movbe") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "movbe")] + /// Create a new token for the "movbe" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `movbe` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "movbe" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v4/level.rs b/fearless_simd_core/src/x86/v4/level.rs new file mode 100644 index 00000000..db146467 --- /dev/null +++ b/fearless_simd_core/src/x86/v4/level.rs @@ -0,0 +1,328 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v4 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v4 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave")] +/// fn uses_x86_64_v4() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V4 { + /// The contained proof that AVX512-BW is available. + pub avx512bw: crate::x86::v4::Avx512bw, + /// The contained proof that AVX512-CD is available. + pub avx512cd: crate::x86::v4::Avx512cd, + /// The contained proof that AVX512-DQ is available. + pub avx512dq: crate::x86::v4::Avx512dq, + /// The contained proof that AVX512-VL is available. + pub avx512vl: crate::x86::v4::Avx512vl, + /// The contained proof that 1 is available. + pub bmi1: crate::x86::v4::Bmi1, + /// The contained proof that BMI2 is available. + pub bmi2: crate::x86::v4::Bmi2, + /// The contained proof that `cmpxchg16b` is available. + pub cmpxchg16b: crate::x86::v4::Cmpxchg16b, + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v4::Fxsr, + /// The contained proof that `lzcnt` is available. + pub lzcnt: crate::x86::v4::Lzcnt, + /// The contained proof that `movbe` is available. + pub movbe: crate::x86::v4::Movbe, + /// The contained proof that `popcnt` is available. + pub popcnt: crate::x86::v4::Popcnt, + /// The contained proof that `xsave` is available. + pub xsave: crate::x86::v4::Xsave, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V4 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v4 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V4 { + const FEATURES: &[&str] = &[ + "avx", + "avx2", + "avx512bw", + "avx512cd", + "avx512dq", + "avx512f", + "avx512vl", + "bmi1", + "bmi2", + "cmpxchg16b", + "f16c", + "fma", + "fxsr", + "lzcnt", + "movbe", + "popcnt", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + "xsave", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v4::Avx512bw = self.avx512bw, crate::x86::v4::Avx512cd = self.avx512cd, crate::x86::v4::Avx512dq = self.avx512dq, crate::x86::v4::Avx512vl = self.avx512vl, crate::x86::v4::Bmi1 = self.bmi1, crate::x86::v4::Bmi2 = self.bmi2, crate::x86::v4::Cmpxchg16b = self.cmpxchg16b, crate::x86::v4::Fxsr = self.fxsr, crate::x86::v4::Lzcnt = self.lzcnt, crate::x86::v4::Movbe = self.movbe, crate::x86::v4::Popcnt = self.popcnt, crate::x86::v4::Xsave = self.xsave] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V4 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v4 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + avx512bw: crate::x86::v4::Avx512bw::try_new()?, + avx512cd: crate::x86::v4::Avx512cd::try_new()?, + avx512dq: crate::x86::v4::Avx512dq::try_new()?, + avx512vl: crate::x86::v4::Avx512vl::try_new()?, + bmi1: crate::x86::v4::Bmi1::try_new()?, + bmi2: crate::x86::v4::Bmi2::try_new()?, + cmpxchg16b: crate::x86::v4::Cmpxchg16b::try_new()?, + fxsr: crate::x86::v4::Fxsr::try_new()?, + lzcnt: crate::x86::v4::Lzcnt::try_new()?, + movbe: crate::x86::v4::Movbe::try_new()?, + popcnt: crate::x86::v4::Popcnt::try_new()?, + xsave: crate::x86::v4::Xsave::try_new()?, + }) + } + + #[target_feature( + enable = "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave" + )] + /// Create a new token for the x86-64-v4 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v4 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave" target feature is available. + pub fn new() -> Self { + Self { + avx512bw: crate::x86::v4::Avx512bw::new(), + avx512cd: crate::x86::v4::Avx512cd::new(), + avx512dq: crate::x86::v4::Avx512dq::new(), + avx512vl: crate::x86::v4::Avx512vl::new(), + bmi1: crate::x86::v4::Bmi1::new(), + bmi2: crate::x86::v4::Bmi2::new(), + cmpxchg16b: crate::x86::v4::Cmpxchg16b::new(), + fxsr: crate::x86::v4::Fxsr::new(), + lzcnt: crate::x86::v4::Lzcnt::new(), + movbe: crate::x86::v4::Movbe::new(), + popcnt: crate::x86::v4::Popcnt::new(), + xsave: crate::x86::v4::Xsave::new(), + } + } +} + +impl From for crate::x86::v4::Avx { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx { crate::x86::v4::Avx::new() }) + } +} + +impl From for crate::x86::v4::Avx2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx2 { crate::x86::v4::Avx2::new() }) + } +} + +impl From for crate::x86::v4::Avx512bw { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512bw { crate::x86::v4::Avx512bw::new() }) + } +} + +impl From for crate::x86::v4::Avx512cd { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512cd { crate::x86::v4::Avx512cd::new() }) + } +} + +impl From for crate::x86::v4::Avx512dq { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512dq { crate::x86::v4::Avx512dq::new() }) + } +} + +impl From for crate::x86::v4::Avx512f { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512f { crate::x86::v4::Avx512f::new() }) + } +} + +impl From for crate::x86::v4::Avx512vl { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512vl { crate::x86::v4::Avx512vl::new() }) + } +} + +impl From for crate::x86::v4::Bmi1 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Bmi1 { crate::x86::v4::Bmi1::new() }) + } +} + +impl From for crate::x86::v4::Bmi2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Bmi2 { crate::x86::v4::Bmi2::new() }) + } +} + +impl From for crate::x86::v4::Cmpxchg16b { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Cmpxchg16b { crate::x86::v4::Cmpxchg16b::new() }) + } +} + +impl From for crate::x86::v4::F16c { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::F16c { crate::x86::v4::F16c::new() }) + } +} + +impl From for crate::x86::v4::Fma { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Fma { crate::x86::v4::Fma::new() }) + } +} + +impl From for crate::x86::v4::Fxsr { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Fxsr { crate::x86::v4::Fxsr::new() }) + } +} + +impl From for crate::x86::v4::Lzcnt { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Lzcnt { crate::x86::v4::Lzcnt::new() }) + } +} + +impl From for crate::x86::v4::Movbe { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Movbe { crate::x86::v4::Movbe::new() }) + } +} + +impl From for crate::x86::v4::Popcnt { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Popcnt { crate::x86::v4::Popcnt::new() }) + } +} + +impl From for crate::x86::v4::Sse { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse { crate::x86::v4::Sse::new() }) + } +} + +impl From for crate::x86::v4::Sse2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse2 { crate::x86::v4::Sse2::new() }) + } +} + +impl From for crate::x86::v4::Sse3 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse3 { crate::x86::v4::Sse3::new() }) + } +} + +impl From for crate::x86::v4::Sse4_1 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse4_1 { crate::x86::v4::Sse4_1::new() }) + } +} + +impl From for crate::x86::v4::Sse4_2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse4_2 { crate::x86::v4::Sse4_2::new() }) + } +} + +impl From for crate::x86::v4::SupplementalSse3 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::SupplementalSse3 { crate::x86::v4::SupplementalSse3::new() }) + } +} + +impl From for crate::x86::v4::Xsave { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Xsave { crate::x86::v4::Xsave::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v4/mod.rs b/fearless_simd_core/src/x86/v4/mod.rs new file mode 100644 index 00000000..7f3cd1ee --- /dev/null +++ b/fearless_simd_core/src/x86/v4/mod.rs @@ -0,0 +1,38 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Target features enabled in the `x86-64-v4` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This module also contains [`V4`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. +//! +//! This level also implies that `x86-64-v3` is available. + +pub use crate::x86::v1::Fxsr; +pub use crate::x86::v1::Sse; +pub use crate::x86::v1::Sse2; +pub use crate::x86::v2::Cmpxchg16b; +pub use crate::x86::v2::Popcnt; +pub use crate::x86::v2::Sse3; +pub use crate::x86::v2::Sse4_1; +pub use crate::x86::v2::Sse4_2; +pub use crate::x86::v2::SupplementalSse3; +pub use crate::x86::v3::Avx; +pub use crate::x86::v3::Avx2; +pub use crate::x86::v3::Bmi1; +pub use crate::x86::v3::Bmi2; +pub use crate::x86::v3::F16c; +pub use crate::x86::v3::Fma; +pub use crate::x86::v3::Lzcnt; +pub use crate::x86::v3::Movbe; +pub use crate::x86::v3::Xsave; + +pub use crate::x86::avx512::Avx512bw; +pub use crate::x86::avx512::Avx512cd; +pub use crate::x86::avx512::Avx512dq; +pub use crate::x86::avx512::Avx512f; +pub use crate::x86::avx512::Avx512vl; + +mod level; +pub use level::V4; diff --git a/fearless_simd_core/src/x86/xsave/mod.rs b/fearless_simd_core/src/x86/xsave/mod.rs new file mode 100644 index 00000000..406dd54c --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/mod.rs @@ -0,0 +1,20 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Target features relating to saving processor state, as used to implement operating systems. + +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod xsave; +pub use xsave::Xsave; + +mod xsavec; +pub use xsavec::Xsavec; + +mod xsaveopt; +pub use xsaveopt::Xsaveopt; + +pub use xsaves::Xsaves; +mod xsaves; diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs new file mode 100644 index 00000000..d24692c2 --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `xsave` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`xsave`] --- Save processor extended states +/// +/// [`xsave`]: https://www.felixcloutier.com/x86/xsave +/// +/// A token indicating that the current CPU has the `xsave` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsave")] +/// fn uses_xsave() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsave { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsave { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsave" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Xsave { + const FEATURES: &[&str] = &["xsave"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Xsave = self] => "xsave", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsave { + #[cfg(feature = "std")] + /// Create a new token if the `"xsave"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsave") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsave")] + /// Create a new token for the "xsave" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsave` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsave" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs new file mode 100644 index 00000000..5b91126b --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `xsavec` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`xsavec`] --- Save processor extended states with compaction +/// +/// [`xsavec`]: https://www.felixcloutier.com/x86/xsavec +/// +/// A token indicating that the current CPU has the `xsavec` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsavec")] +/// fn uses_xsavec() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsavec { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsavec { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsavec" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Xsavec { + const FEATURES: &[&str] = &["xsavec"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Xsavec = self] => "xsavec", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsavec { + #[cfg(feature = "std")] + /// Create a new token if the `"xsavec"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsavec") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsavec")] + /// Create a new token for the "xsavec" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsavec` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsavec" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs new file mode 100644 index 00000000..00505619 --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `xsaveopt` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`xsaveopt`] --- Save processor extended states optimized +/// +/// [`xsaveopt`]: https://www.felixcloutier.com/x86/xsaveopt +/// +/// A token indicating that the current CPU has the `xsaveopt` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsaveopt")] +/// fn uses_xsaveopt() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsaveopt { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsaveopt { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsaveopt" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Xsaveopt { + const FEATURES: &[&str] = &["xsaveopt"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Xsaveopt = self] => "xsaveopt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsaveopt { + #[cfg(feature = "std")] + /// Create a new token if the `"xsaveopt"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsaveopt") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsaveopt")] + /// Create a new token for the "xsaveopt" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsaveopt` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsaveopt" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs new file mode 100644 index 00000000..000054c7 --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -0,0 +1,96 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. +//! The `xsaves` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`xsaves`] --- Save processor extended states supervisor +/// +/// [`xsaves`]: https://www.felixcloutier.com/x86/xsaves +/// +/// A token indicating that the current CPU has the `xsaves` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsaves")] +/// fn uses_xsaves() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsaves { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsaves { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsaves" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Xsaves { + const FEATURES: &[&str] = &["xsaves"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Xsaves = self] => "xsaves", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsaves { + #[cfg(feature = "std")] + /// Create a new token if the `"xsaves"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsaves") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsaves")] + /// Create a new token for the "xsaves" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsaves` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsaves" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index b1935285..ee71e374 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -207,29 +207,28 @@ fn simd_impl(ty: &VecType) -> TokenStream { | OpSig::Cvt(_, _) | OpSig::Reinterpret(_, _) | OpSig::Shift - ) { - if let Some(args) = sig.vec_trait_args() { - let ret_ty = sig.ret_ty(ty, TyFlavor::VecImpl); - let call_args = match sig { - OpSig::Unary | OpSig::Cvt(_, _) | OpSig::Reinterpret(_, _) => quote! { self }, - OpSig::Binary | OpSig::Compare | OpSig::Combine => { - quote! { self, rhs.simd_into(self.simd) } - } - OpSig::Shift => { - quote! { self, shift } - } - OpSig::Ternary => { - quote! { self, op1.simd_into(self.simd), op2.simd_into(self.simd) } - } - _ => quote! { todo!() }, - }; - methods.push(quote! { - #[inline(always)] - pub fn #method_name(#args) -> #ret_ty { - self.simd.#trait_method(#call_args) - } - }); - } + ) && let Some(args) = sig.vec_trait_args() + { + let ret_ty = sig.ret_ty(ty, TyFlavor::VecImpl); + let call_args = match sig { + OpSig::Unary | OpSig::Cvt(_, _) | OpSig::Reinterpret(_, _) => quote! { self }, + OpSig::Binary | OpSig::Compare | OpSig::Combine => { + quote! { self, rhs.simd_into(self.simd) } + } + OpSig::Shift => { + quote! { self, shift } + } + OpSig::Ternary => { + quote! { self, op1.simd_into(self.simd), op2.simd_into(self.simd) } + } + _ => quote! { todo!() }, + }; + methods.push(quote! { + #[inline(always)] + pub fn #method_name(#args) -> #ret_ty { + self.simd.#trait_method(#call_args) + } + }); } } let vec_impl = simd_vec_impl(ty);