diff --git a/Cargo.lock b/Cargo.lock
index 4d1c6d4c46b23..a1d8d91930f32 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -674,7 +674,7 @@ checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81"
 dependencies = [
  "serde",
  "termcolor",
- "unicode-width 0.1.14",
+ "unicode-width 0.2.1",
 ]
 
 [[package]]
@@ -937,23 +937,24 @@ dependencies = [
 
 [[package]]
 name = "cxx"
-version = "1.0.168"
+version = "1.0.185"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7aa144b12f11741f0dab5b4182896afad46faa0598b6a061f7b9d17a21837ba7"
+checksum = "2f81de88da10862f22b5b3a60f18f6f42bbe7cb8faa24845dd7b1e4e22190e77"
 dependencies = [
  "cc",
+ "cxx-build",
  "cxxbridge-cmd",
  "cxxbridge-flags",
  "cxxbridge-macro",
- "foldhash",
+ "foldhash 0.2.0",
  "link-cplusplus",
 ]
 
 [[package]]
 name = "cxx-build"
-version = "1.0.168"
+version = "1.0.185"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12d3cbb84fb003242941c231b45ca9417e786e66e94baa39584bd99df3a270b6"
+checksum = "5edd58bf75c3fdfc80d79806403af626570662f7b6cc782a7fabe156166bd6d6"
 dependencies = [
  "cc",
  "codespan-reporting",
@@ -966,9 +967,9 @@ dependencies = [
 
 [[package]]
 name = "cxxbridge-cmd"
-version = "1.0.168"
+version = "1.0.185"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3fa36b7b249d43f67a3f54bd65788e35e7afe64bbc671396387a48b3e8aaea94"
+checksum = "fd46bf2b541a4e0c2d5abba76607379ee05d68e714868e3cb406dc8d591ce2d2"
 dependencies = [
  "clap",
  "codespan-reporting",
@@ -980,15 +981,15 @@ dependencies = [
 
 [[package]]
 name = "cxxbridge-flags"
-version = "1.0.168"
+version = "1.0.185"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77707c70f6563edc5429618ca34a07241b75ebab35bd01d46697c75d58f8ddfe"
+checksum = "2c79b68f6a3a8f809d39b38ae8af61305a6113819b19b262643b9c21353b92d9"
 
 [[package]]
 name = "cxxbridge-macro"
-version = "1.0.168"
+version = "1.0.185"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ede6c0fb7e318f0a11799b86ee29dcf17b9be2960bd379a6c38e1a96a6010fff"
+checksum = "862b7fdb048ff9ef0779a0d0a03affd09746c4c875543746b640756be9cff2af"
 dependencies = [
  "indexmap",
  "proc-macro2",
@@ -1388,6 +1389,12 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
+
 [[package]]
 name = "form_urlencoded"
 version = "1.2.1"
@@ -1567,7 +1574,7 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 dependencies = [
  "allocator-api2",
  "equivalent",
- "foldhash",
+ "foldhash 0.1.5",
  "serde",
 ]
 
@@ -2160,9 +2167,9 @@ dependencies = [
 
 [[package]]
 name = "link-cplusplus"
-version = "1.0.10"
+version = "1.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a6f6da007f968f9def0d65a05b187e2960183de70c160204ecfccf0ee330212"
+checksum = "7f78c730aaa7d0b9336a299029ea49f9ee53b0ed06e9202e8cb7db9bae7b8c82"
 dependencies = [
  "cc",
 ]
diff --git a/bootstrap.example.toml b/bootstrap.example.toml
index 51529751dd58f..0cd571134ef15 100644
--- a/bootstrap.example.toml
+++ b/bootstrap.example.toml
@@ -325,6 +325,9 @@
 # Defaults to the Python interpreter used to execute x.py.
 #build.python = "python"
 
+# The path to (or name of) the resource compiler executable to use on Windows.
+#build.windows-rc = "rc.exe"
+
 # The path to the REUSE executable to use. Note that REUSE is not required in
 # most cases, as our tooling relies on a cached (and shrunk) copy of the
 # REUSE output present in the git repository and in our source tarballs.
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 8461c8b03d5a4..45c5c9aa5514d 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -217,27 +217,16 @@ impl<'a> IntoIterator for LLVMFeature<'a> {
 /// Rust can also be build with an external precompiled version of LLVM which might lead to failures
 /// if the oldest tested / supported LLVM version doesn't yet support the relevant intrinsics.
 pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFeature<'a>> {
-    let arch = if sess.target.arch == "x86_64" {
-        "x86"
-    } else if sess.target.arch == "arm64ec" {
-        "aarch64"
-    } else if sess.target.arch == "sparc64" {
-        "sparc"
-    } else if sess.target.arch == "powerpc64" {
-        "powerpc"
-    } else {
-        &*sess.target.arch
+    let raw_arch = &*sess.target.arch;
+    let arch = match raw_arch {
+        "x86_64" => "x86",
+        "arm64ec" => "aarch64",
+        "sparc64" => "sparc",
+        "powerpc64" => "powerpc",
+        _ => raw_arch,
     };
+    let (major, _, _) = get_version();
     match (arch, s) {
-        ("x86", "sse4.2") => Some(LLVMFeature::with_dependencies(
-            "sse4.2",
-            smallvec![TargetFeatureFoldStrength::EnableOnly("crc32")],
-        )),
-        ("x86", "pclmulqdq") => Some(LLVMFeature::new("pclmul")),
-        ("x86", "rdrand") => Some(LLVMFeature::new("rdrnd")),
-        ("x86", "bmi1") => Some(LLVMFeature::new("bmi")),
-        ("x86", "cmpxchg16b") => Some(LLVMFeature::new("cx16")),
-        ("x86", "lahfsahf") => Some(LLVMFeature::new("sahf")),
         ("aarch64", "rcpc2") => Some(LLVMFeature::new("rcpc-immo")),
         ("aarch64", "dpb") => Some(LLVMFeature::new("ccpp")),
         ("aarch64", "dpb2") => Some(LLVMFeature::new("ccdp")),
@@ -260,14 +249,23 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         ("aarch64", "fpmr") => None, // only existed in 18
         ("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
         // Filter out features that are not supported by the current LLVM version
-        ("loongarch32" | "loongarch64", "32s") if get_version().0 < 21 => None,
+        ("loongarch32" | "loongarch64", "32s") if major < 21 => None,
+        ("powerpc", "power8-crypto") => Some(LLVMFeature::new("crypto")),
+        ("sparc", "leoncasa") => Some(LLVMFeature::new("hasleoncasa")),
+        ("x86", "sse4.2") => Some(LLVMFeature::with_dependencies(
+            "sse4.2",
+            smallvec![TargetFeatureFoldStrength::EnableOnly("crc32")],
+        )),
+        ("x86", "pclmulqdq") => Some(LLVMFeature::new("pclmul")),
+        ("x86", "rdrand") => Some(LLVMFeature::new("rdrnd")),
+        ("x86", "bmi1") => Some(LLVMFeature::new("bmi")),
+        ("x86", "cmpxchg16b") => Some(LLVMFeature::new("cx16")),
+        ("x86", "lahfsahf") => Some(LLVMFeature::new("sahf")),
         // Enable the evex512 target feature if an avx512 target feature is enabled.
         ("x86", s) if s.starts_with("avx512") => Some(LLVMFeature::with_dependencies(
             s,
             smallvec![TargetFeatureFoldStrength::EnableOnly("evex512")],
         )),
-        ("sparc", "leoncasa") => Some(LLVMFeature::new("hasleoncasa")),
-        ("powerpc", "power8-crypto") => Some(LLVMFeature::new("crypto")),
         ("x86", "avx10.1") => Some(LLVMFeature::new("avx10.1-512")),
         ("x86", "avx10.2") => Some(LLVMFeature::new("avx10.2-512")),
         ("x86", "apxf") => Some(LLVMFeature::with_dependencies(
diff --git a/compiler/rustc_codegen_ssa/messages.ftl b/compiler/rustc_codegen_ssa/messages.ftl
index 1dd65d38a2be7..91c3806df4c34 100644
--- a/compiler/rustc_codegen_ssa/messages.ftl
+++ b/compiler/rustc_codegen_ssa/messages.ftl
@@ -8,8 +8,6 @@ codegen_ssa_aix_strip_not_used = using host's `strip` binary to cross-compile to
 
 codegen_ssa_archive_build_failure = failed to build archive at `{$path}`: {$error}
 
-codegen_ssa_autodiff_without_lto = using the autodiff feature requires using fat-lto
-
 codegen_ssa_bare_instruction_set = `#[instruction_set]` requires an argument
 
 codegen_ssa_binary_output_to_tty = option `-o` or `--emit` is used to write binary output type `{$shorthand}` to stdout, but stdout is a tty
diff --git a/compiler/rustc_codegen_ssa/src/errors.rs b/compiler/rustc_codegen_ssa/src/errors.rs
index fb5a82051405d..d5c30c5c7a6b0 100644
--- a/compiler/rustc_codegen_ssa/src/errors.rs
+++ b/compiler/rustc_codegen_ssa/src/errors.rs
@@ -37,10 +37,6 @@ pub(crate) struct CguNotRecorded<'a> {
     pub cgu_name: &'a str,
 }
 
-#[derive(Diagnostic)]
-#[diag(codegen_ssa_autodiff_without_lto)]
-pub struct AutodiffWithoutLto;
-
 #[derive(Diagnostic)]
 #[diag(codegen_ssa_unknown_reuse_kind)]
 pub(crate) struct UnknownReuseKind {
diff --git a/compiler/rustc_next_trait_solver/src/canonicalizer.rs b/compiler/rustc_next_trait_solver/src/canonical/canonicalizer.rs
similarity index 99%
rename from compiler/rustc_next_trait_solver/src/canonicalizer.rs
rename to compiler/rustc_next_trait_solver/src/canonical/canonicalizer.rs
index 4b4ec4956eb88..b25671d676b9c 100644
--- a/compiler/rustc_next_trait_solver/src/canonicalizer.rs
+++ b/compiler/rustc_next_trait_solver/src/canonical/canonicalizer.rs
@@ -57,7 +57,7 @@ enum CanonicalizeMode {
     },
 }
 
-pub struct Canonicalizer<'a, D: SolverDelegate<Interner = I>, I: Interner> {
+pub(super) struct Canonicalizer<'a, D: SolverDelegate<Interner = I>, I: Interner> {
     delegate: &'a D,
 
     // Immutable field.
@@ -83,7 +83,7 @@ pub struct Canonicalizer<'a, D: SolverDelegate<Interner = I>, I: Interner> {
 }
 
 impl<'a, D: SolverDelegate<Interner = I>, I: Interner> Canonicalizer<'a, D, I> {
-    pub fn canonicalize_response<T: TypeFoldable<I>>(
+    pub(super) fn canonicalize_response<T: TypeFoldable<I>>(
         delegate: &'a D,
         max_input_universe: ty::UniverseIndex,
         variables: &'a mut Vec<I::GenericArg>,
@@ -112,7 +112,6 @@ impl<'a, D: SolverDelegate<Interner = I>, I: Interner> Canonicalizer<'a, D, I> {
         let (max_universe, variables) = canonicalizer.finalize();
         Canonical { max_universe, variables, value }
     }
-
     fn canonicalize_param_env(
         delegate: &'a D,
         variables: &'a mut Vec<I::GenericArg>,
@@ -195,7 +194,7 @@ impl<'a, D: SolverDelegate<Interner = I>, I: Interner> Canonicalizer<'a, D, I> {
     ///
     /// We want to keep the option of canonicalizing `'static` to an existential
     /// variable in the future by changing the way we detect global where-bounds.
-    pub fn canonicalize_input<P: TypeFoldable<I>>(
+    pub(super) fn canonicalize_input<P: TypeFoldable<I>>(
         delegate: &'a D,
         variables: &'a mut Vec<I::GenericArg>,
         input: QueryInput<I, P>,
diff --git a/compiler/rustc_next_trait_solver/src/canonical/mod.rs b/compiler/rustc_next_trait_solver/src/canonical/mod.rs
new file mode 100644
index 0000000000000..e3520e238ed37
--- /dev/null
+++ b/compiler/rustc_next_trait_solver/src/canonical/mod.rs
@@ -0,0 +1,364 @@
+//! Canonicalization is used to separate some goal from its context,
+//! throwing away unnecessary information in the process.
+//!
+//! This is necessary to cache goals containing inference variables
+//! and placeholders without restricting them to the current `InferCtxt`.
+//!
+//! Canonicalization is fairly involved, for more details see the relevant
+//! section of the [rustc-dev-guide][c].
+//!
+//! [c]: https://rustc-dev-guide.rust-lang.org/solve/canonicalization.html
+
+use std::iter;
+
+use canonicalizer::Canonicalizer;
+use rustc_index::IndexVec;
+use rustc_type_ir::inherent::*;
+use rustc_type_ir::relate::solver_relating::RelateExt;
+use rustc_type_ir::{
+    self as ty, Canonical, CanonicalVarKind, CanonicalVarValues, InferCtxtLike, Interner,
+    TypeFoldable,
+};
+use tracing::instrument;
+
+use crate::delegate::SolverDelegate;
+use crate::resolve::eager_resolve_vars;
+use crate::solve::{
+    CanonicalInput, CanonicalResponse, Certainty, ExternalConstraintsData, Goal,
+    NestedNormalizationGoals, PredefinedOpaquesData, QueryInput, Response, inspect,
+};
+
+pub mod canonicalizer;
+
+trait ResponseT<I: Interner> {
+    fn var_values(&self) -> CanonicalVarValues<I>;
+}
+
+impl<I: Interner> ResponseT<I> for Response<I> {
+    fn var_values(&self) -> CanonicalVarValues<I> {
+        self.var_values
+    }
+}
+
+impl<I: Interner, T> ResponseT<I> for inspect::State<I, T> {
+    fn var_values(&self) -> CanonicalVarValues<I> {
+        self.var_values
+    }
+}
+
+/// Canonicalizes the goal remembering the original values
+/// for each bound variable.
+///
+/// This expects `goal` and `opaque_types` to be eager resolved.
+pub(super) fn canonicalize_goal<D, I>(
+    delegate: &D,
+    goal: Goal<I, I::Predicate>,
+    opaque_types: Vec<(ty::OpaqueTypeKey<I>, I::Ty)>,
+) -> (Vec<I::GenericArg>, CanonicalInput<I, I::Predicate>)
+where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+{
+    let mut orig_values = Default::default();
+    let canonical = Canonicalizer::canonicalize_input(
+        delegate,
+        &mut orig_values,
+        QueryInput {
+            goal,
+            predefined_opaques_in_body: delegate
+                .cx()
+                .mk_predefined_opaques_in_body(PredefinedOpaquesData { opaque_types }),
+        },
+    );
+    let query_input = ty::CanonicalQueryInput { canonical, typing_mode: delegate.typing_mode() };
+    (orig_values, query_input)
+}
+
+pub(super) fn canonicalize_response<D, I, T>(
+    delegate: &D,
+    max_input_universe: ty::UniverseIndex,
+    value: T,
+) -> ty::Canonical<I, T>
+where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+    T: TypeFoldable<I>,
+{
+    let mut orig_values = Default::default();
+    let canonical =
+        Canonicalizer::canonicalize_response(delegate, max_input_universe, &mut orig_values, value);
+    canonical
+}
+
+/// After calling a canonical query, we apply the constraints returned
+/// by the query using this function.
+///
+/// This happens in three steps:
+/// - we instantiate the bound variables of the query response
+/// - we unify the `var_values` of the response with the `original_values`
+/// - we apply the `external_constraints` returned by the query, returning
+///   the `normalization_nested_goals`
+pub(super) fn instantiate_and_apply_query_response<D, I>(
+    delegate: &D,
+    param_env: I::ParamEnv,
+    original_values: &[I::GenericArg],
+    response: CanonicalResponse<I>,
+    span: I::Span,
+) -> (NestedNormalizationGoals<I>, Certainty)
+where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+{
+    let instantiation =
+        compute_query_response_instantiation_values(delegate, &original_values, &response, span);
+
+    let Response { var_values, external_constraints, certainty } =
+        delegate.instantiate_canonical(response, instantiation);
+
+    unify_query_var_values(delegate, param_env, &original_values, var_values, span);
+
+    let ExternalConstraintsData { region_constraints, opaque_types, normalization_nested_goals } =
+        &*external_constraints;
+
+    register_region_constraints(delegate, region_constraints, span);
+    register_new_opaque_types(delegate, opaque_types, span);
+
+    (normalization_nested_goals.clone(), certainty)
+}
+
+/// This returns the canonical variable values to instantiate the bound variables of
+/// the canonical response. This depends on the `original_values` for the
+/// bound variables.
+fn compute_query_response_instantiation_values<D, I, T>(
+    delegate: &D,
+    original_values: &[I::GenericArg],
+    response: &Canonical<I, T>,
+    span: I::Span,
+) -> CanonicalVarValues<I>
+where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+    T: ResponseT<I>,
+{
+    // FIXME: Longterm canonical queries should deal with all placeholders
+    // created inside of the query directly instead of returning them to the
+    // caller.
+    let prev_universe = delegate.universe();
+    let universes_created_in_query = response.max_universe.index();
+    for _ in 0..universes_created_in_query {
+        delegate.create_next_universe();
+    }
+
+    let var_values = response.value.var_values();
+    assert_eq!(original_values.len(), var_values.len());
+
+    // If the query did not make progress with constraining inference variables,
+    // we would normally create a new inference variables for bound existential variables
+    // only then unify this new inference variable with the inference variable from
+    // the input.
+    //
+    // We therefore instantiate the existential variable in the canonical response with the
+    // inference variable of the input right away, which is more performant.
+    let mut opt_values = IndexVec::from_elem_n(None, response.variables.len());
+    for (original_value, result_value) in iter::zip(original_values, var_values.var_values.iter()) {
+        match result_value.kind() {
+            ty::GenericArgKind::Type(t) => {
+                // We disable the instantiation guess for inference variables
+                // and only use it for placeholders. We need to handle the
+                // `sub_root` of type inference variables which would make this
+                // more involved. They are also a lot rarer than region variables.
+                if let ty::Bound(debruijn, b) = t.kind()
+                    && !matches!(
+                        response.variables.get(b.var().as_usize()).unwrap(),
+                        CanonicalVarKind::Ty { .. }
+                    )
+                {
+                    assert_eq!(debruijn, ty::INNERMOST);
+                    opt_values[b.var()] = Some(*original_value);
+                }
+            }
+            ty::GenericArgKind::Lifetime(r) => {
+                if let ty::ReBound(debruijn, br) = r.kind() {
+                    assert_eq!(debruijn, ty::INNERMOST);
+                    opt_values[br.var()] = Some(*original_value);
+                }
+            }
+            ty::GenericArgKind::Const(c) => {
+                if let ty::ConstKind::Bound(debruijn, bv) = c.kind() {
+                    assert_eq!(debruijn, ty::INNERMOST);
+                    opt_values[bv.var()] = Some(*original_value);
+                }
+            }
+        }
+    }
+    CanonicalVarValues::instantiate(delegate.cx(), response.variables, |var_values, kind| {
+        if kind.universe() != ty::UniverseIndex::ROOT {
+            // A variable from inside a binder of the query. While ideally these shouldn't
+            // exist at all (see the FIXME at the start of this method), we have to deal with
+            // them for now.
+            delegate.instantiate_canonical_var(kind, span, &var_values, |idx| {
+                prev_universe + idx.index()
+            })
+        } else if kind.is_existential() {
+            // As an optimization we sometimes avoid creating a new inference variable here.
+            //
+            // All new inference variables we create start out in the current universe of the caller.
+            // This is conceptually wrong as these inference variables would be able to name
+            // more placeholders then they should be able to. However the inference variables have
+            // to "come from somewhere", so by equating them with the original values of the caller
+            // later on, we pull them down into their correct universe again.
+            if let Some(v) = opt_values[ty::BoundVar::from_usize(var_values.len())] {
+                v
+            } else {
+                delegate.instantiate_canonical_var(kind, span, &var_values, |_| prev_universe)
+            }
+        } else {
+            // For placeholders which were already part of the input, we simply map this
+            // universal bound variable back the placeholder of the input.
+            original_values[kind.expect_placeholder_index()]
+        }
+    })
+}
+
+/// Unify the `original_values` with the `var_values` returned by the canonical query..
+///
+/// This assumes that this unification will always succeed. This is the case when
+/// applying a query response right away. However, calling a canonical query, doing any
+/// other kind of trait solving, and only then instantiating the result of the query
+/// can cause the instantiation to fail. This is not supported and we ICE in this case.
+///
+/// We always structurally instantiate aliases. Relating aliases needs to be different
+/// depending on whether the alias is *rigid* or not. We're only really able to tell
+/// whether an alias is rigid by using the trait solver. When instantiating a response
+/// from the solver we assume that the solver correctly handled aliases and therefore
+/// always relate them structurally here.
+#[instrument(level = "trace", skip(delegate))]
+fn unify_query_var_values<D, I>(
+    delegate: &D,
+    param_env: I::ParamEnv,
+    original_values: &[I::GenericArg],
+    var_values: CanonicalVarValues<I>,
+    span: I::Span,
+) where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+{
+    assert_eq!(original_values.len(), var_values.len());
+
+    for (&orig, response) in iter::zip(original_values, var_values.var_values.iter()) {
+        let goals =
+            delegate.eq_structurally_relating_aliases(param_env, orig, response, span).unwrap();
+        assert!(goals.is_empty());
+    }
+}
+
+fn register_region_constraints<D, I>(
+    delegate: &D,
+    outlives: &[ty::OutlivesPredicate<I, I::GenericArg>],
+    span: I::Span,
+) where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+{
+    for &ty::OutlivesPredicate(lhs, rhs) in outlives {
+        match lhs.kind() {
+            ty::GenericArgKind::Lifetime(lhs) => delegate.sub_regions(rhs, lhs, span),
+            ty::GenericArgKind::Type(lhs) => delegate.register_ty_outlives(lhs, rhs, span),
+            ty::GenericArgKind::Const(_) => panic!("const outlives: {lhs:?}: {rhs:?}"),
+        }
+    }
+}
+
+fn register_new_opaque_types<D, I>(
+    delegate: &D,
+    opaque_types: &[(ty::OpaqueTypeKey<I>, I::Ty)],
+    span: I::Span,
+) where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+{
+    for &(key, ty) in opaque_types {
+        let prev = delegate.register_hidden_type_in_storage(key, ty, span);
+        // We eagerly resolve inference variables when computing the query response.
+        // This can cause previously distinct opaque type keys to now be structurally equal.
+        //
+        // To handle this, we store any duplicate entries in a separate list to check them
+        // at the end of typeck/borrowck. We could alternatively eagerly equate the hidden
+        // types here. However, doing so is difficult as it may result in nested goals and
+        // any errors may make it harder to track the control flow for diagnostics.
+        if let Some(prev) = prev {
+            delegate.add_duplicate_opaque_type(key, prev, span);
+        }
+    }
+}
+
+/// Used by proof trees to be able to recompute intermediate actions while
+/// evaluating a goal. The `var_values` not only include the bound variables
+/// of the query input, but also contain all unconstrained inference vars
+/// created while evaluating this goal.
+pub fn make_canonical_state<D, I, T>(
+    delegate: &D,
+    var_values: &[I::GenericArg],
+    max_input_universe: ty::UniverseIndex,
+    data: T,
+) -> inspect::CanonicalState<I, T>
+where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+    T: TypeFoldable<I>,
+{
+    let var_values = CanonicalVarValues { var_values: delegate.cx().mk_args(var_values) };
+    let state = inspect::State { var_values, data };
+    let state = eager_resolve_vars(delegate, state);
+    Canonicalizer::canonicalize_response(delegate, max_input_universe, &mut vec![], state)
+}
+
+// FIXME: needs to be pub to be accessed by downstream
+// `rustc_trait_selection::solve::inspect::analyse`.
+pub fn instantiate_canonical_state<D, I, T>(
+    delegate: &D,
+    span: I::Span,
+    param_env: I::ParamEnv,
+    orig_values: &mut Vec<I::GenericArg>,
+    state: inspect::CanonicalState<I, T>,
+) -> T
+where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+    T: TypeFoldable<I>,
+{
+    // In case any fresh inference variables have been created between `state`
+    // and the previous instantiation, extend `orig_values` for it.
+    orig_values.extend(
+        state.value.var_values.var_values.as_slice()[orig_values.len()..]
+            .iter()
+            .map(|&arg| delegate.fresh_var_for_kind_with_span(arg, span)),
+    );
+
+    let instantiation =
+        compute_query_response_instantiation_values(delegate, orig_values, &state, span);
+
+    let inspect::State { var_values, data } = delegate.instantiate_canonical(state, instantiation);
+
+    unify_query_var_values(delegate, param_env, orig_values, var_values, span);
+    data
+}
+
+pub fn response_no_constraints_raw<I: Interner>(
+    cx: I,
+    max_universe: ty::UniverseIndex,
+    variables: I::CanonicalVarKinds,
+    certainty: Certainty,
+) -> CanonicalResponse<I> {
+    ty::Canonical {
+        max_universe,
+        variables,
+        value: Response {
+            var_values: ty::CanonicalVarValues::make_identity(cx, variables),
+            // FIXME: maybe we should store the "no response" version in cx, like
+            // we do for cx.types and stuff.
+            external_constraints: cx.mk_external_constraints(ExternalConstraintsData::default()),
+            certainty,
+        },
+    }
+}
diff --git a/compiler/rustc_next_trait_solver/src/lib.rs b/compiler/rustc_next_trait_solver/src/lib.rs
index d3965e14c68a8..5fa29b7d9f813 100644
--- a/compiler/rustc_next_trait_solver/src/lib.rs
+++ b/compiler/rustc_next_trait_solver/src/lib.rs
@@ -10,7 +10,7 @@
 #![allow(rustc::usage_of_type_ir_traits)]
 // tidy-alphabetical-end
 
-pub mod canonicalizer;
+pub mod canonical;
 pub mod coherence;
 pub mod delegate;
 pub mod placeholder;
diff --git a/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/canonical.rs b/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/canonical.rs
deleted file mode 100644
index 889588afe61ff..0000000000000
--- a/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/canonical.rs
+++ /dev/null
@@ -1,517 +0,0 @@
-//! Canonicalization is used to separate some goal from its context,
-//! throwing away unnecessary information in the process.
-//!
-//! This is necessary to cache goals containing inference variables
-//! and placeholders without restricting them to the current `InferCtxt`.
-//!
-//! Canonicalization is fairly involved, for more details see the relevant
-//! section of the [rustc-dev-guide][c].
-//!
-//! [c]: https://rustc-dev-guide.rust-lang.org/solve/canonicalization.html
-
-use std::iter;
-
-use rustc_index::IndexVec;
-use rustc_type_ir::data_structures::HashSet;
-use rustc_type_ir::inherent::*;
-use rustc_type_ir::relate::solver_relating::RelateExt;
-use rustc_type_ir::solve::OpaqueTypesJank;
-use rustc_type_ir::{
-    self as ty, Canonical, CanonicalVarKind, CanonicalVarValues, InferCtxtLike, Interner,
-    TypeFoldable,
-};
-use tracing::{debug, instrument, trace};
-
-use crate::canonicalizer::Canonicalizer;
-use crate::delegate::SolverDelegate;
-use crate::resolve::eager_resolve_vars;
-use crate::solve::eval_ctxt::CurrentGoalKind;
-use crate::solve::{
-    CanonicalInput, CanonicalResponse, Certainty, EvalCtxt, ExternalConstraintsData, Goal,
-    MaybeCause, NestedNormalizationGoals, NoSolution, PredefinedOpaquesData, QueryInput,
-    QueryResult, Response, inspect, response_no_constraints_raw,
-};
-
-trait ResponseT<I: Interner> {
-    fn var_values(&self) -> CanonicalVarValues<I>;
-}
-
-impl<I: Interner> ResponseT<I> for Response<I> {
-    fn var_values(&self) -> CanonicalVarValues<I> {
-        self.var_values
-    }
-}
-
-impl<I: Interner, T> ResponseT<I> for inspect::State<I, T> {
-    fn var_values(&self) -> CanonicalVarValues<I> {
-        self.var_values
-    }
-}
-
-impl<D, I> EvalCtxt<'_, D>
-where
-    D: SolverDelegate<Interner = I>,
-    I: Interner,
-{
-    /// Canonicalizes the goal remembering the original values
-    /// for each bound variable.
-    ///
-    /// This expects `goal` and `opaque_types` to be eager resolved.
-    pub(super) fn canonicalize_goal(
-        delegate: &D,
-        goal: Goal<I, I::Predicate>,
-        opaque_types: Vec<(ty::OpaqueTypeKey<I>, I::Ty)>,
-    ) -> (Vec<I::GenericArg>, CanonicalInput<I, I::Predicate>) {
-        let mut orig_values = Default::default();
-        let canonical = Canonicalizer::canonicalize_input(
-            delegate,
-            &mut orig_values,
-            QueryInput {
-                goal,
-                predefined_opaques_in_body: delegate
-                    .cx()
-                    .mk_predefined_opaques_in_body(PredefinedOpaquesData { opaque_types }),
-            },
-        );
-        let query_input =
-            ty::CanonicalQueryInput { canonical, typing_mode: delegate.typing_mode() };
-        (orig_values, query_input)
-    }
-
-    /// To return the constraints of a canonical query to the caller, we canonicalize:
-    ///
-    /// - `var_values`: a map from bound variables in the canonical goal to
-    ///   the values inferred while solving the instantiated goal.
-    /// - `external_constraints`: additional constraints which aren't expressible
-    ///   using simple unification of inference variables.
-    ///
-    /// This takes the `shallow_certainty` which represents whether we're confident
-    /// that the final result of the current goal only depends on the nested goals.
-    ///
-    /// In case this is `Certainty::Maybe`, there may still be additional nested goals
-    /// or inference constraints required for this candidate to be hold. The candidate
-    /// always requires all already added constraints and nested goals.
-    #[instrument(level = "trace", skip(self), ret)]
-    pub(in crate::solve) fn evaluate_added_goals_and_make_canonical_response(
-        &mut self,
-        shallow_certainty: Certainty,
-    ) -> QueryResult<I> {
-        self.inspect.make_canonical_response(shallow_certainty);
-
-        let goals_certainty = self.try_evaluate_added_goals()?;
-        assert_eq!(
-            self.tainted,
-            Ok(()),
-            "EvalCtxt is tainted -- nested goals may have been dropped in a \
-            previous call to `try_evaluate_added_goals!`"
-        );
-
-        // We only check for leaks from universes which were entered inside
-        // of the query.
-        self.delegate.leak_check(self.max_input_universe).map_err(|NoSolution| {
-            trace!("failed the leak check");
-            NoSolution
-        })?;
-
-        let (certainty, normalization_nested_goals) =
-            match (self.current_goal_kind, shallow_certainty) {
-                // When normalizing, we've replaced the expected term with an unconstrained
-                // inference variable. This means that we dropped information which could
-                // have been important. We handle this by instead returning the nested goals
-                // to the caller, where they are then handled. We only do so if we do not
-                // need to recompute the `NormalizesTo` goal afterwards to avoid repeatedly
-                // uplifting its nested goals. This is the case if the `shallow_certainty` is
-                // `Certainty::Yes`.
-                (CurrentGoalKind::NormalizesTo, Certainty::Yes) => {
-                    let goals = std::mem::take(&mut self.nested_goals);
-                    // As we return all ambiguous nested goals, we can ignore the certainty
-                    // returned by `self.try_evaluate_added_goals()`.
-                    if goals.is_empty() {
-                        assert!(matches!(goals_certainty, Certainty::Yes));
-                    }
-                    (
-                        Certainty::Yes,
-                        NestedNormalizationGoals(
-                            goals.into_iter().map(|(s, g, _)| (s, g)).collect(),
-                        ),
-                    )
-                }
-                _ => {
-                    let certainty = shallow_certainty.and(goals_certainty);
-                    (certainty, NestedNormalizationGoals::empty())
-                }
-            };
-
-        if let Certainty::Maybe {
-            cause: cause @ MaybeCause::Overflow { keep_constraints: false, .. },
-            opaque_types_jank,
-        } = certainty
-        {
-            // If we have overflow, it's probable that we're substituting a type
-            // into itself infinitely and any partial substitutions in the query
-            // response are probably not useful anyways, so just return an empty
-            // query response.
-            //
-            // This may prevent us from potentially useful inference, e.g.
-            // 2 candidates, one ambiguous and one overflow, which both
-            // have the same inference constraints.
-            //
-            // Changing this to retain some constraints in the future
-            // won't be a breaking change, so this is good enough for now.
-            return Ok(self.make_ambiguous_response_no_constraints(cause, opaque_types_jank));
-        }
-
-        let external_constraints =
-            self.compute_external_query_constraints(certainty, normalization_nested_goals);
-        let (var_values, mut external_constraints) =
-            eager_resolve_vars(self.delegate, (self.var_values, external_constraints));
-
-        // Remove any trivial or duplicated region constraints once we've resolved regions
-        let mut unique = HashSet::default();
-        external_constraints.region_constraints.retain(|outlives| {
-            outlives.0.as_region().is_none_or(|re| re != outlives.1) && unique.insert(*outlives)
-        });
-
-        let canonical = Canonicalizer::canonicalize_response(
-            self.delegate,
-            self.max_input_universe,
-            &mut Default::default(),
-            Response {
-                var_values,
-                certainty,
-                external_constraints: self.cx().mk_external_constraints(external_constraints),
-            },
-        );
-
-        // HACK: We bail with overflow if the response would have too many non-region
-        // inference variables. This tends to only happen if we encounter a lot of
-        // ambiguous alias types which get replaced with fresh inference variables
-        // during generalization. This prevents hangs caused by an exponential blowup,
-        // see tests/ui/traits/next-solver/coherence-alias-hang.rs.
-        match self.current_goal_kind {
-            // We don't do so for `NormalizesTo` goals as we erased the expected term and
-            // bailing with overflow here would prevent us from detecting a type-mismatch,
-            // causing a coherence error in diesel, see #131969. We still bail with overflow
-            // when later returning from the parent AliasRelate goal.
-            CurrentGoalKind::NormalizesTo => {}
-            CurrentGoalKind::Misc | CurrentGoalKind::CoinductiveTrait => {
-                let num_non_region_vars = canonical
-                    .variables
-                    .iter()
-                    .filter(|c| !c.is_region() && c.is_existential())
-                    .count();
-                if num_non_region_vars > self.cx().recursion_limit() {
-                    debug!(?num_non_region_vars, "too many inference variables -> overflow");
-                    return Ok(self.make_ambiguous_response_no_constraints(
-                        MaybeCause::Overflow {
-                            suggest_increasing_limit: true,
-                            keep_constraints: false,
-                        },
-                        OpaqueTypesJank::AllGood,
-                    ));
-                }
-            }
-        }
-
-        Ok(canonical)
-    }
-
-    /// Constructs a totally unconstrained, ambiguous response to a goal.
-    ///
-    /// Take care when using this, since often it's useful to respond with
-    /// ambiguity but return constrained variables to guide inference.
-    pub(in crate::solve) fn make_ambiguous_response_no_constraints(
-        &self,
-        cause: MaybeCause,
-        opaque_types_jank: OpaqueTypesJank,
-    ) -> CanonicalResponse<I> {
-        response_no_constraints_raw(
-            self.cx(),
-            self.max_input_universe,
-            self.variables,
-            Certainty::Maybe { cause, opaque_types_jank },
-        )
-    }
-
-    /// Computes the region constraints and *new* opaque types registered when
-    /// proving a goal.
-    ///
-    /// If an opaque was already constrained before proving this goal, then the
-    /// external constraints do not need to record that opaque, since if it is
-    /// further constrained by inference, that will be passed back in the var
-    /// values.
-    #[instrument(level = "trace", skip(self), ret)]
-    fn compute_external_query_constraints(
-        &self,
-        certainty: Certainty,
-        normalization_nested_goals: NestedNormalizationGoals<I>,
-    ) -> ExternalConstraintsData<I> {
-        // We only return region constraints once the certainty is `Yes`. This
-        // is necessary as we may drop nested goals on ambiguity, which may result
-        // in unconstrained inference variables in the region constraints. It also
-        // prevents us from emitting duplicate region constraints, avoiding some
-        // unnecessary work. This slightly weakens the leak check in case it uses
-        // region constraints from an ambiguous nested goal. This is tested in both
-        // `tests/ui/higher-ranked/leak-check/leak-check-in-selection-5-ambig.rs` and
-        // `tests/ui/higher-ranked/leak-check/leak-check-in-selection-6-ambig-unify.rs`.
-        let region_constraints = if certainty == Certainty::Yes {
-            self.delegate.make_deduplicated_outlives_constraints()
-        } else {
-            Default::default()
-        };
-
-        // We only return *newly defined* opaque types from canonical queries.
-        //
-        // Constraints for any existing opaque types are already tracked by changes
-        // to the `var_values`.
-        let opaque_types = self
-            .delegate
-            .clone_opaque_types_added_since(self.initial_opaque_types_storage_num_entries);
-
-        ExternalConstraintsData { region_constraints, opaque_types, normalization_nested_goals }
-    }
-
-    /// After calling a canonical query, we apply the constraints returned
-    /// by the query using this function.
-    ///
-    /// This happens in three steps:
-    /// - we instantiate the bound variables of the query response
-    /// - we unify the `var_values` of the response with the `original_values`
-    /// - we apply the `external_constraints` returned by the query, returning
-    ///   the `normalization_nested_goals`
-    pub(super) fn instantiate_and_apply_query_response(
-        delegate: &D,
-        param_env: I::ParamEnv,
-        original_values: &[I::GenericArg],
-        response: CanonicalResponse<I>,
-        span: I::Span,
-    ) -> (NestedNormalizationGoals<I>, Certainty) {
-        let instantiation = Self::compute_query_response_instantiation_values(
-            delegate,
-            &original_values,
-            &response,
-            span,
-        );
-
-        let Response { var_values, external_constraints, certainty } =
-            delegate.instantiate_canonical(response, instantiation);
-
-        Self::unify_query_var_values(delegate, param_env, &original_values, var_values, span);
-
-        let ExternalConstraintsData {
-            region_constraints,
-            opaque_types,
-            normalization_nested_goals,
-        } = &*external_constraints;
-
-        Self::register_region_constraints(delegate, region_constraints, span);
-        Self::register_new_opaque_types(delegate, opaque_types, span);
-
-        (normalization_nested_goals.clone(), certainty)
-    }
-
-    /// This returns the canonical variable values to instantiate the bound variables of
-    /// the canonical response. This depends on the `original_values` for the
-    /// bound variables.
-    fn compute_query_response_instantiation_values<T: ResponseT<I>>(
-        delegate: &D,
-        original_values: &[I::GenericArg],
-        response: &Canonical<I, T>,
-        span: I::Span,
-    ) -> CanonicalVarValues<I> {
-        // FIXME: Longterm canonical queries should deal with all placeholders
-        // created inside of the query directly instead of returning them to the
-        // caller.
-        let prev_universe = delegate.universe();
-        let universes_created_in_query = response.max_universe.index();
-        for _ in 0..universes_created_in_query {
-            delegate.create_next_universe();
-        }
-
-        let var_values = response.value.var_values();
-        assert_eq!(original_values.len(), var_values.len());
-
-        // If the query did not make progress with constraining inference variables,
-        // we would normally create a new inference variables for bound existential variables
-        // only then unify this new inference variable with the inference variable from
-        // the input.
-        //
-        // We therefore instantiate the existential variable in the canonical response with the
-        // inference variable of the input right away, which is more performant.
-        let mut opt_values = IndexVec::from_elem_n(None, response.variables.len());
-        for (original_value, result_value) in
-            iter::zip(original_values, var_values.var_values.iter())
-        {
-            match result_value.kind() {
-                ty::GenericArgKind::Type(t) => {
-                    // We disable the instantiation guess for inference variables
-                    // and only use it for placeholders. We need to handle the
-                    // `sub_root` of type inference variables which would make this
-                    // more involved. They are also a lot rarer than region variables.
-                    if let ty::Bound(debruijn, b) = t.kind()
-                        && !matches!(
-                            response.variables.get(b.var().as_usize()).unwrap(),
-                            CanonicalVarKind::Ty { .. }
-                        )
-                    {
-                        assert_eq!(debruijn, ty::INNERMOST);
-                        opt_values[b.var()] = Some(*original_value);
-                    }
-                }
-                ty::GenericArgKind::Lifetime(r) => {
-                    if let ty::ReBound(debruijn, br) = r.kind() {
-                        assert_eq!(debruijn, ty::INNERMOST);
-                        opt_values[br.var()] = Some(*original_value);
-                    }
-                }
-                ty::GenericArgKind::Const(c) => {
-                    if let ty::ConstKind::Bound(debruijn, bv) = c.kind() {
-                        assert_eq!(debruijn, ty::INNERMOST);
-                        opt_values[bv.var()] = Some(*original_value);
-                    }
-                }
-            }
-        }
-        CanonicalVarValues::instantiate(delegate.cx(), response.variables, |var_values, kind| {
-            if kind.universe() != ty::UniverseIndex::ROOT {
-                // A variable from inside a binder of the query. While ideally these shouldn't
-                // exist at all (see the FIXME at the start of this method), we have to deal with
-                // them for now.
-                delegate.instantiate_canonical_var(kind, span, &var_values, |idx| {
-                    prev_universe + idx.index()
-                })
-            } else if kind.is_existential() {
-                // As an optimization we sometimes avoid creating a new inference variable here.
-                //
-                // All new inference variables we create start out in the current universe of the caller.
-                // This is conceptually wrong as these inference variables would be able to name
-                // more placeholders then they should be able to. However the inference variables have
-                // to "come from somewhere", so by equating them with the original values of the caller
-                // later on, we pull them down into their correct universe again.
-                if let Some(v) = opt_values[ty::BoundVar::from_usize(var_values.len())] {
-                    v
-                } else {
-                    delegate.instantiate_canonical_var(kind, span, &var_values, |_| prev_universe)
-                }
-            } else {
-                // For placeholders which were already part of the input, we simply map this
-                // universal bound variable back the placeholder of the input.
-                original_values[kind.expect_placeholder_index()]
-            }
-        })
-    }
-
-    /// Unify the `original_values` with the `var_values` returned by the canonical query..
-    ///
-    /// This assumes that this unification will always succeed. This is the case when
-    /// applying a query response right away. However, calling a canonical query, doing any
-    /// other kind of trait solving, and only then instantiating the result of the query
-    /// can cause the instantiation to fail. This is not supported and we ICE in this case.
-    ///
-    /// We always structurally instantiate aliases. Relating aliases needs to be different
-    /// depending on whether the alias is *rigid* or not. We're only really able to tell
-    /// whether an alias is rigid by using the trait solver. When instantiating a response
-    /// from the solver we assume that the solver correctly handled aliases and therefore
-    /// always relate them structurally here.
-    #[instrument(level = "trace", skip(delegate))]
-    fn unify_query_var_values(
-        delegate: &D,
-        param_env: I::ParamEnv,
-        original_values: &[I::GenericArg],
-        var_values: CanonicalVarValues<I>,
-        span: I::Span,
-    ) {
-        assert_eq!(original_values.len(), var_values.len());
-
-        for (&orig, response) in iter::zip(original_values, var_values.var_values.iter()) {
-            let goals =
-                delegate.eq_structurally_relating_aliases(param_env, orig, response, span).unwrap();
-            assert!(goals.is_empty());
-        }
-    }
-
-    fn register_region_constraints(
-        delegate: &D,
-        outlives: &[ty::OutlivesPredicate<I, I::GenericArg>],
-        span: I::Span,
-    ) {
-        for &ty::OutlivesPredicate(lhs, rhs) in outlives {
-            match lhs.kind() {
-                ty::GenericArgKind::Lifetime(lhs) => delegate.sub_regions(rhs, lhs, span),
-                ty::GenericArgKind::Type(lhs) => delegate.register_ty_outlives(lhs, rhs, span),
-                ty::GenericArgKind::Const(_) => panic!("const outlives: {lhs:?}: {rhs:?}"),
-            }
-        }
-    }
-
-    fn register_new_opaque_types(
-        delegate: &D,
-        opaque_types: &[(ty::OpaqueTypeKey<I>, I::Ty)],
-        span: I::Span,
-    ) {
-        for &(key, ty) in opaque_types {
-            let prev = delegate.register_hidden_type_in_storage(key, ty, span);
-            // We eagerly resolve inference variables when computing the query response.
-            // This can cause previously distinct opaque type keys to now be structurally equal.
-            //
-            // To handle this, we store any duplicate entries in a separate list to check them
-            // at the end of typeck/borrowck. We could alternatively eagerly equate the hidden
-            // types here. However, doing so is difficult as it may result in nested goals and
-            // any errors may make it harder to track the control flow for diagnostics.
-            if let Some(prev) = prev {
-                delegate.add_duplicate_opaque_type(key, prev, span);
-            }
-        }
-    }
-}
-
-/// Used by proof trees to be able to recompute intermediate actions while
-/// evaluating a goal. The `var_values` not only include the bound variables
-/// of the query input, but also contain all unconstrained inference vars
-/// created while evaluating this goal.
-pub(in crate::solve) fn make_canonical_state<D, T, I>(
-    delegate: &D,
-    var_values: &[I::GenericArg],
-    max_input_universe: ty::UniverseIndex,
-    data: T,
-) -> inspect::CanonicalState<I, T>
-where
-    D: SolverDelegate<Interner = I>,
-    I: Interner,
-    T: TypeFoldable<I>,
-{
-    let var_values = CanonicalVarValues { var_values: delegate.cx().mk_args(var_values) };
-    let state = inspect::State { var_values, data };
-    let state = eager_resolve_vars(delegate, state);
-    Canonicalizer::canonicalize_response(delegate, max_input_universe, &mut vec![], state)
-}
-
-// FIXME: needs to be pub to be accessed by downstream
-// `rustc_trait_selection::solve::inspect::analyse`.
-pub fn instantiate_canonical_state<D, I, T: TypeFoldable<I>>(
-    delegate: &D,
-    span: I::Span,
-    param_env: I::ParamEnv,
-    orig_values: &mut Vec<I::GenericArg>,
-    state: inspect::CanonicalState<I, T>,
-) -> T
-where
-    D: SolverDelegate<Interner = I>,
-    I: Interner,
-{
-    // In case any fresh inference variables have been created between `state`
-    // and the previous instantiation, extend `orig_values` for it.
-    orig_values.extend(
-        state.value.var_values.var_values.as_slice()[orig_values.len()..]
-            .iter()
-            .map(|&arg| delegate.fresh_var_for_kind_with_span(arg, span)),
-    );
-
-    let instantiation =
-        EvalCtxt::compute_query_response_instantiation_values(delegate, orig_values, &state, span);
-
-    let inspect::State { var_values, data } = delegate.instantiate_canonical(state, instantiation);
-
-    EvalCtxt::unify_query_var_values(delegate, param_env, orig_values, var_values, span);
-    data
-}
diff --git a/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs b/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs
index 5df7c92d88145..bb86357a85f8a 100644
--- a/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs
@@ -17,6 +17,10 @@ use rustc_type_ir::{
 use tracing::{debug, instrument, trace};
 
 use super::has_only_region_constraints;
+use crate::canonical::{
+    canonicalize_goal, canonicalize_response, instantiate_and_apply_query_response,
+    response_no_constraints_raw,
+};
 use crate::coherence;
 use crate::delegate::SolverDelegate;
 use crate::placeholder::BoundVarReplacer;
@@ -24,12 +28,11 @@ use crate::resolve::eager_resolve_vars;
 use crate::solve::search_graph::SearchGraph;
 use crate::solve::ty::may_use_unstable_feature;
 use crate::solve::{
-    CanonicalInput, Certainty, FIXPOINT_STEP_LIMIT, Goal, GoalEvaluation, GoalSource,
-    GoalStalledOn, HasChanged, NestedNormalizationGoals, NoSolution, QueryInput, QueryResult,
-    inspect,
+    CanonicalInput, CanonicalResponse, Certainty, ExternalConstraintsData, FIXPOINT_STEP_LIMIT,
+    Goal, GoalEvaluation, GoalSource, GoalStalledOn, HasChanged, MaybeCause,
+    NestedNormalizationGoals, NoSolution, QueryInput, QueryResult, Response, inspect,
 };
 
-pub(super) mod canonical;
 mod probe;
 
 /// The kind of goal we're currently proving.
@@ -464,8 +467,7 @@ where
         let opaque_types = self.delegate.clone_opaque_types_lookup_table();
         let (goal, opaque_types) = eager_resolve_vars(self.delegate, (goal, opaque_types));
 
-        let (orig_values, canonical_goal) =
-            Self::canonicalize_goal(self.delegate, goal, opaque_types);
+        let (orig_values, canonical_goal) = canonicalize_goal(self.delegate, goal, opaque_types);
         let canonical_result = self.search_graph.evaluate_goal(
             self.cx(),
             canonical_goal,
@@ -480,7 +482,7 @@ where
         let has_changed =
             if !has_only_region_constraints(response) { HasChanged::Yes } else { HasChanged::No };
 
-        let (normalization_nested_goals, certainty) = Self::instantiate_and_apply_query_response(
+        let (normalization_nested_goals, certainty) = instantiate_and_apply_query_response(
             self.delegate,
             goal.param_env,
             &orig_values,
@@ -1223,6 +1225,198 @@ where
             vec![]
         }
     }
+
+    /// To return the constraints of a canonical query to the caller, we canonicalize:
+    ///
+    /// - `var_values`: a map from bound variables in the canonical goal to
+    ///   the values inferred while solving the instantiated goal.
+    /// - `external_constraints`: additional constraints which aren't expressible
+    ///   using simple unification of inference variables.
+    ///
+    /// This takes the `shallow_certainty` which represents whether we're confident
+    /// that the final result of the current goal only depends on the nested goals.
+    ///
+    /// In case this is `Certainty::Maybe`, there may still be additional nested goals
+    /// or inference constraints required for this candidate to be hold. The candidate
+    /// always requires all already added constraints and nested goals.
+    #[instrument(level = "trace", skip(self), ret)]
+    pub(in crate::solve) fn evaluate_added_goals_and_make_canonical_response(
+        &mut self,
+        shallow_certainty: Certainty,
+    ) -> QueryResult<I> {
+        self.inspect.make_canonical_response(shallow_certainty);
+
+        let goals_certainty = self.try_evaluate_added_goals()?;
+        assert_eq!(
+            self.tainted,
+            Ok(()),
+            "EvalCtxt is tainted -- nested goals may have been dropped in a \
+            previous call to `try_evaluate_added_goals!`"
+        );
+
+        // We only check for leaks from universes which were entered inside
+        // of the query.
+        self.delegate.leak_check(self.max_input_universe).map_err(|NoSolution| {
+            trace!("failed the leak check");
+            NoSolution
+        })?;
+
+        let (certainty, normalization_nested_goals) =
+            match (self.current_goal_kind, shallow_certainty) {
+                // When normalizing, we've replaced the expected term with an unconstrained
+                // inference variable. This means that we dropped information which could
+                // have been important. We handle this by instead returning the nested goals
+                // to the caller, where they are then handled. We only do so if we do not
+                // need to recompute the `NormalizesTo` goal afterwards to avoid repeatedly
+                // uplifting its nested goals. This is the case if the `shallow_certainty` is
+                // `Certainty::Yes`.
+                (CurrentGoalKind::NormalizesTo, Certainty::Yes) => {
+                    let goals = std::mem::take(&mut self.nested_goals);
+                    // As we return all ambiguous nested goals, we can ignore the certainty
+                    // returned by `self.try_evaluate_added_goals()`.
+                    if goals.is_empty() {
+                        assert!(matches!(goals_certainty, Certainty::Yes));
+                    }
+                    (
+                        Certainty::Yes,
+                        NestedNormalizationGoals(
+                            goals.into_iter().map(|(s, g, _)| (s, g)).collect(),
+                        ),
+                    )
+                }
+                _ => {
+                    let certainty = shallow_certainty.and(goals_certainty);
+                    (certainty, NestedNormalizationGoals::empty())
+                }
+            };
+
+        if let Certainty::Maybe {
+            cause: cause @ MaybeCause::Overflow { keep_constraints: false, .. },
+            opaque_types_jank,
+        } = certainty
+        {
+            // If we have overflow, it's probable that we're substituting a type
+            // into itself infinitely and any partial substitutions in the query
+            // response are probably not useful anyways, so just return an empty
+            // query response.
+            //
+            // This may prevent us from potentially useful inference, e.g.
+            // 2 candidates, one ambiguous and one overflow, which both
+            // have the same inference constraints.
+            //
+            // Changing this to retain some constraints in the future
+            // won't be a breaking change, so this is good enough for now.
+            return Ok(self.make_ambiguous_response_no_constraints(cause, opaque_types_jank));
+        }
+
+        let external_constraints =
+            self.compute_external_query_constraints(certainty, normalization_nested_goals);
+        let (var_values, mut external_constraints) =
+            eager_resolve_vars(self.delegate, (self.var_values, external_constraints));
+
+        // Remove any trivial or duplicated region constraints once we've resolved regions
+        let mut unique = HashSet::default();
+        external_constraints.region_constraints.retain(|outlives| {
+            outlives.0.as_region().is_none_or(|re| re != outlives.1) && unique.insert(*outlives)
+        });
+
+        let canonical = canonicalize_response(
+            self.delegate,
+            self.max_input_universe,
+            Response {
+                var_values,
+                certainty,
+                external_constraints: self.cx().mk_external_constraints(external_constraints),
+            },
+        );
+
+        // HACK: We bail with overflow if the response would have too many non-region
+        // inference variables. This tends to only happen if we encounter a lot of
+        // ambiguous alias types which get replaced with fresh inference variables
+        // during generalization. This prevents hangs caused by an exponential blowup,
+        // see tests/ui/traits/next-solver/coherence-alias-hang.rs.
+        match self.current_goal_kind {
+            // We don't do so for `NormalizesTo` goals as we erased the expected term and
+            // bailing with overflow here would prevent us from detecting a type-mismatch,
+            // causing a coherence error in diesel, see #131969. We still bail with overflow
+            // when later returning from the parent AliasRelate goal.
+            CurrentGoalKind::NormalizesTo => {}
+            CurrentGoalKind::Misc | CurrentGoalKind::CoinductiveTrait => {
+                let num_non_region_vars = canonical
+                    .variables
+                    .iter()
+                    .filter(|c| !c.is_region() && c.is_existential())
+                    .count();
+                if num_non_region_vars > self.cx().recursion_limit() {
+                    debug!(?num_non_region_vars, "too many inference variables -> overflow");
+                    return Ok(self.make_ambiguous_response_no_constraints(
+                        MaybeCause::Overflow {
+                            suggest_increasing_limit: true,
+                            keep_constraints: false,
+                        },
+                        OpaqueTypesJank::AllGood,
+                    ));
+                }
+            }
+        }
+
+        Ok(canonical)
+    }
+
+    /// Constructs a totally unconstrained, ambiguous response to a goal.
+    ///
+    /// Take care when using this, since often it's useful to respond with
+    /// ambiguity but return constrained variables to guide inference.
+    pub(in crate::solve) fn make_ambiguous_response_no_constraints(
+        &self,
+        cause: MaybeCause,
+        opaque_types_jank: OpaqueTypesJank,
+    ) -> CanonicalResponse<I> {
+        response_no_constraints_raw(
+            self.cx(),
+            self.max_input_universe,
+            self.variables,
+            Certainty::Maybe { cause, opaque_types_jank },
+        )
+    }
+
+    /// Computes the region constraints and *new* opaque types registered when
+    /// proving a goal.
+    ///
+    /// If an opaque was already constrained before proving this goal, then the
+    /// external constraints do not need to record that opaque, since if it is
+    /// further constrained by inference, that will be passed back in the var
+    /// values.
+    #[instrument(level = "trace", skip(self), ret)]
+    fn compute_external_query_constraints(
+        &self,
+        certainty: Certainty,
+        normalization_nested_goals: NestedNormalizationGoals<I>,
+    ) -> ExternalConstraintsData<I> {
+        // We only return region constraints once the certainty is `Yes`. This
+        // is necessary as we may drop nested goals on ambiguity, which may result
+        // in unconstrained inference variables in the region constraints. It also
+        // prevents us from emitting duplicate region constraints, avoiding some
+        // unnecessary work. This slightly weakens the leak check in case it uses
+        // region constraints from an ambiguous nested goal. This is tested in both
+        // `tests/ui/higher-ranked/leak-check/leak-check-in-selection-5-ambig.rs` and
+        // `tests/ui/higher-ranked/leak-check/leak-check-in-selection-6-ambig-unify.rs`.
+        let region_constraints = if certainty == Certainty::Yes {
+            self.delegate.make_deduplicated_outlives_constraints()
+        } else {
+            Default::default()
+        };
+
+        // We only return *newly defined* opaque types from canonical queries.
+        //
+        // Constraints for any existing opaque types are already tracked by changes
+        // to the `var_values`.
+        let opaque_types = self
+            .delegate
+            .clone_opaque_types_added_since(self.initial_opaque_types_storage_num_entries);
+
+        ExternalConstraintsData { region_constraints, opaque_types, normalization_nested_goals }
+    }
 }
 
 /// Eagerly replace aliases with inference variables, emitting `AliasRelate`
@@ -1363,7 +1557,7 @@ pub(super) fn evaluate_root_goal_for_proof_tree<D: SolverDelegate<Interner = I>,
     let opaque_types = delegate.clone_opaque_types_lookup_table();
     let (goal, opaque_types) = eager_resolve_vars(delegate, (goal, opaque_types));
 
-    let (orig_values, canonical_goal) = EvalCtxt::canonicalize_goal(delegate, goal, opaque_types);
+    let (orig_values, canonical_goal) = canonicalize_goal(delegate, goal, opaque_types);
 
     let (canonical_result, final_revision) =
         delegate.cx().evaluate_root_goal_for_proof_tree_raw(canonical_goal);
@@ -1380,7 +1574,7 @@ pub(super) fn evaluate_root_goal_for_proof_tree<D: SolverDelegate<Interner = I>,
         Ok(response) => response,
     };
 
-    let (normalization_nested_goals, _certainty) = EvalCtxt::instantiate_and_apply_query_response(
+    let (normalization_nested_goals, _certainty) = instantiate_and_apply_query_response(
         delegate,
         goal.param_env,
         &proof_tree.orig_values,
diff --git a/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs b/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
index 2675ed0d0da65..4369148baf91d 100644
--- a/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
@@ -10,8 +10,8 @@ use derive_where::derive_where;
 use rustc_type_ir::inherent::*;
 use rustc_type_ir::{self as ty, Interner};
 
+use crate::canonical;
 use crate::delegate::SolverDelegate;
-use crate::solve::eval_ctxt::canonical;
 use crate::solve::{Certainty, Goal, GoalSource, QueryResult, inspect};
 
 /// We need to know whether to build a prove tree while evaluating. We
diff --git a/compiler/rustc_next_trait_solver/src/solve/inspect/mod.rs b/compiler/rustc_next_trait_solver/src/solve/inspect/mod.rs
index 0d8c00601269b..65f32f1947fef 100644
--- a/compiler/rustc_next_trait_solver/src/solve/inspect/mod.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/inspect/mod.rs
@@ -2,5 +2,3 @@ pub use rustc_type_ir::solve::inspect::*;
 
 mod build;
 pub(in crate::solve) use build::*;
-
-pub use crate::solve::eval_ctxt::canonical::instantiate_canonical_state;
diff --git a/compiler/rustc_next_trait_solver/src/solve/mod.rs b/compiler/rustc_next_trait_solver/src/solve/mod.rs
index fb900b592d13b..afb86aaf8ab21 100644
--- a/compiler/rustc_next_trait_solver/src/solve/mod.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/mod.rs
@@ -380,25 +380,6 @@ where
     }
 }
 
-fn response_no_constraints_raw<I: Interner>(
-    cx: I,
-    max_universe: ty::UniverseIndex,
-    variables: I::CanonicalVarKinds,
-    certainty: Certainty,
-) -> CanonicalResponse<I> {
-    ty::Canonical {
-        max_universe,
-        variables,
-        value: Response {
-            var_values: ty::CanonicalVarValues::make_identity(cx, variables),
-            // FIXME: maybe we should store the "no response" version in cx, like
-            // we do for cx.types and stuff.
-            external_constraints: cx.mk_external_constraints(ExternalConstraintsData::default()),
-            certainty,
-        },
-    }
-}
-
 /// The result of evaluating a goal.
 pub struct GoalEvaluation<I: Interner> {
     /// The goal we've evaluated. This is the input goal, but potentially with its
diff --git a/compiler/rustc_next_trait_solver/src/solve/search_graph.rs b/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
index 289325d70550c..aa9dfc9a9a265 100644
--- a/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
@@ -6,6 +6,7 @@ use rustc_type_ir::search_graph::{self, PathKind};
 use rustc_type_ir::solve::{CanonicalInput, Certainty, NoSolution, QueryResult};
 use rustc_type_ir::{Interner, TypingMode};
 
+use crate::canonical::response_no_constraints_raw;
 use crate::delegate::SolverDelegate;
 use crate::solve::{
     EvalCtxt, FIXPOINT_STEP_LIMIT, has_no_inference_or_external_constraints, inspect,
@@ -127,7 +128,7 @@ fn response_no_constraints<I: Interner>(
     input: CanonicalInput<I>,
     certainty: Certainty,
 ) -> QueryResult<I> {
-    Ok(super::response_no_constraints_raw(
+    Ok(response_no_constraints_raw(
         cx,
         input.canonical.max_universe,
         input.canonical.variables,
diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs
index 297df7c2c9765..795cb2b2cfeba 100644
--- a/compiler/rustc_session/src/config.rs
+++ b/compiler/rustc_session/src/config.rs
@@ -1509,6 +1509,11 @@ impl Options {
     pub fn get_symbol_mangling_version(&self) -> SymbolManglingVersion {
         self.cg.symbol_mangling_version.unwrap_or(SymbolManglingVersion::Legacy)
     }
+
+    #[inline]
+    pub fn autodiff_enabled(&self) -> bool {
+        self.unstable_opts.autodiff.contains(&AutoDiff::Enable)
+    }
 }
 
 impl UnstableOptions {
diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs
index 3525c7c1d1a99..d0dd2cdac0c48 100644
--- a/compiler/rustc_session/src/session.rs
+++ b/compiler/rustc_session/src/session.rs
@@ -600,6 +600,13 @@ impl Session {
 
     /// Calculates the flavor of LTO to use for this compilation.
     pub fn lto(&self) -> config::Lto {
+        // Autodiff currently requires fat-lto to have access to the llvm-ir of all (indirectly) used functions and types.
+        // fat-lto is the easiest solution to this requirement, but quite expensive.
+        // FIXME(autodiff): Make autodiff also work with embed-bc instead of fat-lto.
+        if self.opts.autodiff_enabled() {
+            return config::Lto::Fat;
+        }
+
         // If our target has codegen requirements ignore the command line
         if self.target.requires_lto {
             return config::Lto::Fat;
diff --git a/compiler/rustc_trait_selection/src/solve/inspect/analyse.rs b/compiler/rustc_trait_selection/src/solve/inspect/analyse.rs
index 086a7a44786d6..c010add0fc50f 100644
--- a/compiler/rustc_trait_selection/src/solve/inspect/analyse.rs
+++ b/compiler/rustc_trait_selection/src/solve/inspect/analyse.rs
@@ -18,9 +18,9 @@ use rustc_middle::traits::ObligationCause;
 use rustc_middle::traits::solve::{Certainty, Goal, GoalSource, NoSolution, QueryResult};
 use rustc_middle::ty::{TyCtxt, VisitorResult, try_visit};
 use rustc_middle::{bug, ty};
+use rustc_next_trait_solver::canonical::instantiate_canonical_state;
 use rustc_next_trait_solver::resolve::eager_resolve_vars;
-use rustc_next_trait_solver::solve::inspect::{self, instantiate_canonical_state};
-use rustc_next_trait_solver::solve::{MaybeCause, SolverDelegateEvalExt as _};
+use rustc_next_trait_solver::solve::{MaybeCause, SolverDelegateEvalExt as _, inspect};
 use rustc_span::Span;
 use tracing::instrument;
 
diff --git a/compiler/rustc_windows_rc/src/lib.rs b/compiler/rustc_windows_rc/src/lib.rs
index caa5e5ef27656..5e95557501ea2 100644
--- a/compiler/rustc_windows_rc/src/lib.rs
+++ b/compiler/rustc_windows_rc/src/lib.rs
@@ -35,8 +35,11 @@ pub fn compile_windows_resource_file(
     resources_dir.push("resources");
     fs::create_dir_all(&resources_dir).unwrap();
 
-    let resource_compiler =
-        find_resource_compiler(&env::var("CARGO_CFG_TARGET_ARCH").unwrap()).expect("found rc.exe");
+    let resource_compiler = if let Ok(path) = env::var("RUSTC_WINDOWS_RC") {
+        path.into()
+    } else {
+        find_resource_compiler(&env::var("CARGO_CFG_TARGET_ARCH").unwrap()).expect("found rc.exe")
+    };
 
     let rc_path = resources_dir.join(file_stem.with_extension("rc"));
 
diff --git a/library/std/src/net/socket_addr.rs b/library/std/src/net/socket_addr.rs
index 41e623e79ce27..5b56dd3f74472 100644
--- a/library/std/src/net/socket_addr.rs
+++ b/library/std/src/net/socket_addr.rs
@@ -6,7 +6,6 @@ mod tests;
 pub use core::net::{SocketAddr, SocketAddrV4, SocketAddrV6};
 
 use crate::net::{IpAddr, Ipv4Addr, Ipv6Addr};
-use crate::sys::net::LookupHost;
 use crate::{io, iter, option, slice, vec};
 
 /// A trait for objects which can be converted or resolved to one or more
@@ -188,15 +187,9 @@ impl ToSocketAddrs for (Ipv6Addr, u16) {
     }
 }
 
-fn resolve_socket_addr(lh: LookupHost) -> io::Result<vec::IntoIter<SocketAddr>> {
-    let p = lh.port();
-    let v: Vec<_> = lh
-        .map(|mut a| {
-            a.set_port(p);
-            a
-        })
-        .collect();
-    Ok(v.into_iter())
+fn lookup_host(host: &str, port: u16) -> io::Result<vec::IntoIter<SocketAddr>> {
+    let addrs = crate::sys::net::lookup_host(host, port)?;
+    Ok(Vec::from_iter(addrs).into_iter())
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -205,17 +198,14 @@ impl ToSocketAddrs for (&str, u16) {
     fn to_socket_addrs(&self) -> io::Result<vec::IntoIter<SocketAddr>> {
         let (host, port) = *self;
 
-        // try to parse the host as a regular IP address first
-        if let Ok(addr) = host.parse::<Ipv4Addr>() {
-            let addr = SocketAddrV4::new(addr, port);
-            return Ok(vec![SocketAddr::V4(addr)].into_iter());
-        }
-        if let Ok(addr) = host.parse::<Ipv6Addr>() {
-            let addr = SocketAddrV6::new(addr, port, 0, 0);
-            return Ok(vec![SocketAddr::V6(addr)].into_iter());
+        // Try to parse the host as a regular IP address first
+        if let Ok(addr) = host.parse::<IpAddr>() {
+            let addr = SocketAddr::new(addr, port);
+            return Ok(vec![addr].into_iter());
         }
 
-        resolve_socket_addr((host, port).try_into()?)
+        // Otherwise, make the system look it up.
+        lookup_host(host, port)
     }
 }
 
@@ -232,12 +222,21 @@ impl ToSocketAddrs for (String, u16) {
 impl ToSocketAddrs for str {
     type Iter = vec::IntoIter<SocketAddr>;
     fn to_socket_addrs(&self) -> io::Result<vec::IntoIter<SocketAddr>> {
-        // try to parse as a regular SocketAddr first
+        // Try to parse as a regular SocketAddr first
         if let Ok(addr) = self.parse() {
             return Ok(vec![addr].into_iter());
         }
 
-        resolve_socket_addr(self.try_into()?)
+        // Otherwise, split the string by ':' and convert the second part to u16...
+        let Some((host, port_str)) = self.rsplit_once(':') else {
+            return Err(io::const_error!(io::ErrorKind::InvalidInput, "invalid socket address"));
+        };
+        let Ok(port) = port_str.parse::<u16>() else {
+            return Err(io::const_error!(io::ErrorKind::InvalidInput, "invalid port value"));
+        };
+
+        // ... and make the system look up the host.
+        lookup_host(host, port)
     }
 }
 
diff --git a/library/std/src/sys/fs/wasi.rs b/library/std/src/sys/fs/wasi.rs
index b65d86de12a3d..0b65b9cb389df 100644
--- a/library/std/src/sys/fs/wasi.rs
+++ b/library/std/src/sys/fs/wasi.rs
@@ -848,7 +848,14 @@ fn remove_dir_all_recursive(parent: &WasiFd, path: &Path) -> io::Result<()> {
 
     // Iterate over all the entries in this directory, and travel recursively if
     // necessary
-    for entry in ReadDir::new(fd, dummy_root) {
+    //
+    // Note that all directory entries for this directory are read first before
+    // any removal is done. This works around the fact that the WASIp1 API for
+    // reading directories is not well-designed for handling mutations between
+    // invocations of reading a directory. By reading all the entries at once
+    // this ensures that, at least without concurrent modifications, it should
+    // be possible to delete everything.
+    for entry in ReadDir::new(fd, dummy_root).collect::<Vec<_>>() {
         let entry = entry?;
         let path = crate::str::from_utf8(&entry.name).map_err(|_| {
             io::const_error!(io::ErrorKind::Uncategorized, "invalid utf-8 file name found")
diff --git a/library/std/src/sys/net/connection/sgx.rs b/library/std/src/sys/net/connection/sgx.rs
index 9b54571997d0b..8c9c17d3f1714 100644
--- a/library/std/src/sys/net/connection/sgx.rs
+++ b/library/std/src/sys/net/connection/sgx.rs
@@ -499,16 +499,6 @@ impl fmt::Display for NonIpSockAddr {
 
 pub struct LookupHost(!);
 
-impl LookupHost {
-    fn new(host: String) -> io::Result<LookupHost> {
-        Err(io::Error::new(io::ErrorKind::Uncategorized, NonIpSockAddr { host }))
-    }
-
-    pub fn port(&self) -> u16 {
-        self.0
-    }
-}
-
 impl Iterator for LookupHost {
     type Item = SocketAddr;
     fn next(&mut self) -> Option<SocketAddr> {
@@ -516,18 +506,9 @@ impl Iterator for LookupHost {
     }
 }
 
-impl TryFrom<&str> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from(v: &str) -> io::Result<LookupHost> {
-        LookupHost::new(v.to_owned())
-    }
-}
-
-impl<'a> TryFrom<(&'a str, u16)> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from((host, port): (&'a str, u16)) -> io::Result<LookupHost> {
-        LookupHost::new(format!("{host}:{port}"))
-    }
+pub fn lookup_host(host: &str, port: u16) -> io::Result<LookupHost> {
+    Err(io::Error::new(
+        io::ErrorKind::Uncategorized,
+        NonIpSockAddr { host: format!("{host}:{port}") },
+    ))
 }
diff --git a/library/std/src/sys/net/connection/socket/mod.rs b/library/std/src/sys/net/connection/socket/mod.rs
index 564f2e3a01f3b..1dd06e97bbabd 100644
--- a/library/std/src/sys/net/connection/socket/mod.rs
+++ b/library/std/src/sys/net/connection/socket/mod.rs
@@ -258,7 +258,7 @@ fn to_ipv6mr_interface(value: u32) -> crate::ffi::c_uint {
 }
 
 ////////////////////////////////////////////////////////////////////////////////
-// get_host_addresses
+// lookup_host
 ////////////////////////////////////////////////////////////////////////////////
 
 pub struct LookupHost {
@@ -267,12 +267,6 @@ pub struct LookupHost {
     port: u16,
 }
 
-impl LookupHost {
-    pub fn port(&self) -> u16 {
-        self.port
-    }
-}
-
 impl Iterator for LookupHost {
     type Item = SocketAddr;
     fn next(&mut self) -> Option<SocketAddr> {
@@ -281,7 +275,10 @@ impl Iterator for LookupHost {
                 let cur = self.cur.as_ref()?;
                 self.cur = cur.ai_next;
                 match socket_addr_from_c(cur.ai_addr.cast(), cur.ai_addrlen as usize) {
-                    Ok(addr) => return Some(addr),
+                    Ok(mut addr) => {
+                        addr.set_port(self.port);
+                        return Some(addr);
+                    }
                     Err(_) => continue,
                 }
             }
@@ -298,42 +295,17 @@ impl Drop for LookupHost {
     }
 }
 
-impl TryFrom<&str> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from(s: &str) -> io::Result<LookupHost> {
-        macro_rules! try_opt {
-            ($e:expr, $msg:expr) => {
-                match $e {
-                    Some(r) => r,
-                    None => return Err(io::const_error!(io::ErrorKind::InvalidInput, $msg)),
-                }
-            };
+pub fn lookup_host(host: &str, port: u16) -> io::Result<LookupHost> {
+    init();
+    run_with_cstr(host.as_bytes(), &|c_host| {
+        let mut hints: c::addrinfo = unsafe { mem::zeroed() };
+        hints.ai_socktype = c::SOCK_STREAM;
+        let mut res = ptr::null_mut();
+        unsafe {
+            cvt_gai(c::getaddrinfo(c_host.as_ptr(), ptr::null(), &hints, &mut res))
+                .map(|_| LookupHost { original: res, cur: res, port })
         }
-
-        // split the string by ':' and convert the second part to u16
-        let (host, port_str) = try_opt!(s.rsplit_once(':'), "invalid socket address");
-        let port: u16 = try_opt!(port_str.parse().ok(), "invalid port value");
-        (host, port).try_into()
-    }
-}
-
-impl<'a> TryFrom<(&'a str, u16)> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from((host, port): (&'a str, u16)) -> io::Result<LookupHost> {
-        init();
-
-        run_with_cstr(host.as_bytes(), &|c_host| {
-            let mut hints: c::addrinfo = unsafe { mem::zeroed() };
-            hints.ai_socktype = c::SOCK_STREAM;
-            let mut res = ptr::null_mut();
-            unsafe {
-                cvt_gai(c::getaddrinfo(c_host.as_ptr(), ptr::null(), &hints, &mut res))
-                    .map(|_| LookupHost { original: res, cur: res, port })
-            }
-        })
-    }
+    })
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/library/std/src/sys/net/connection/socket/tests.rs b/library/std/src/sys/net/connection/socket/tests.rs
index fc236b8027b67..049355afca7ac 100644
--- a/library/std/src/sys/net/connection/socket/tests.rs
+++ b/library/std/src/sys/net/connection/socket/tests.rs
@@ -4,7 +4,7 @@ use crate::collections::HashMap;
 #[test]
 fn no_lookup_host_duplicates() {
     let mut addrs = HashMap::new();
-    let lh = match LookupHost::try_from(("localhost", 0)) {
+    let lh = match lookup_host("localhost", 0) {
         Ok(lh) => lh,
         Err(e) => panic!("couldn't resolve `localhost`: {e}"),
     };
diff --git a/library/std/src/sys/net/connection/uefi/mod.rs b/library/std/src/sys/net/connection/uefi/mod.rs
index 0036804287352..004f6d413a1f3 100644
--- a/library/std/src/sys/net/connection/uefi/mod.rs
+++ b/library/std/src/sys/net/connection/uefi/mod.rs
@@ -333,12 +333,6 @@ impl fmt::Debug for UdpSocket {
 
 pub struct LookupHost(!);
 
-impl LookupHost {
-    pub fn port(&self) -> u16 {
-        self.0
-    }
-}
-
 impl Iterator for LookupHost {
     type Item = SocketAddr;
     fn next(&mut self) -> Option<SocketAddr> {
@@ -346,18 +340,6 @@ impl Iterator for LookupHost {
     }
 }
 
-impl TryFrom<&str> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from(_v: &str) -> io::Result<LookupHost> {
-        unsupported()
-    }
-}
-
-impl<'a> TryFrom<(&'a str, u16)> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from(_v: (&'a str, u16)) -> io::Result<LookupHost> {
-        unsupported()
-    }
+pub fn lookup_host(_host: &str, _port: u16) -> io::Result<LookupHost> {
+    unsupported()
 }
diff --git a/library/std/src/sys/net/connection/unsupported.rs b/library/std/src/sys/net/connection/unsupported.rs
index fbc86343272fc..fb18e8dec557c 100644
--- a/library/std/src/sys/net/connection/unsupported.rs
+++ b/library/std/src/sys/net/connection/unsupported.rs
@@ -304,12 +304,6 @@ impl fmt::Debug for UdpSocket {
 
 pub struct LookupHost(!);
 
-impl LookupHost {
-    pub fn port(&self) -> u16 {
-        self.0
-    }
-}
-
 impl Iterator for LookupHost {
     type Item = SocketAddr;
     fn next(&mut self) -> Option<SocketAddr> {
@@ -317,18 +311,6 @@ impl Iterator for LookupHost {
     }
 }
 
-impl TryFrom<&str> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from(_v: &str) -> io::Result<LookupHost> {
-        unsupported()
-    }
-}
-
-impl<'a> TryFrom<(&'a str, u16)> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from(_v: (&'a str, u16)) -> io::Result<LookupHost> {
-        unsupported()
-    }
+pub fn lookup_host(_host: &str, _port: u16) -> io::Result<LookupHost> {
+    unsupported()
 }
diff --git a/library/std/src/sys/net/connection/wasip1.rs b/library/std/src/sys/net/connection/wasip1.rs
index cdfa25c8a4407..048dafdcd7f7c 100644
--- a/library/std/src/sys/net/connection/wasip1.rs
+++ b/library/std/src/sys/net/connection/wasip1.rs
@@ -477,12 +477,6 @@ impl fmt::Debug for UdpSocket {
 
 pub struct LookupHost(!);
 
-impl LookupHost {
-    pub fn port(&self) -> u16 {
-        self.0
-    }
-}
-
 impl Iterator for LookupHost {
     type Item = SocketAddr;
     fn next(&mut self) -> Option<SocketAddr> {
@@ -490,18 +484,6 @@ impl Iterator for LookupHost {
     }
 }
 
-impl<'a> TryFrom<&'a str> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from(_v: &'a str) -> io::Result<LookupHost> {
-        unsupported()
-    }
-}
-
-impl<'a> TryFrom<(&'a str, u16)> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from(_v: (&'a str, u16)) -> io::Result<LookupHost> {
-        unsupported()
-    }
+pub fn lookup_host(_host: &str, _port: u16) -> io::Result<LookupHost> {
+    unsupported()
 }
diff --git a/library/std/src/sys/net/connection/xous/dns.rs b/library/std/src/sys/net/connection/xous/dns.rs
index bb29d211fad56..b139376f59768 100644
--- a/library/std/src/sys/net/connection/xous/dns.rs
+++ b/library/std/src/sys/net/connection/xous/dns.rs
@@ -1,15 +1,8 @@
-use core::convert::{TryFrom, TryInto};
-
 use crate::io;
 use crate::net::{Ipv4Addr, SocketAddr, SocketAddrV4, SocketAddrV6};
 use crate::os::xous::ffi::lend_mut;
 use crate::os::xous::services::{DnsLendMut, dns_server};
 
-pub struct DnsError {
-    #[allow(dead_code)]
-    pub code: u8,
-}
-
 #[repr(C, align(4096))]
 struct LookupHostQuery([u8; 4096]);
 
@@ -20,12 +13,6 @@ pub struct LookupHost {
     count: usize,
 }
 
-impl LookupHost {
-    pub fn port(&self) -> u16 {
-        self.port
-    }
-}
-
 impl Iterator for LookupHost {
     type Item = SocketAddr;
     fn next(&mut self) -> Option<SocketAddr> {
@@ -72,7 +59,7 @@ impl Iterator for LookupHost {
     }
 }
 
-pub fn lookup(query: &str, port: u16) -> Result<LookupHost, DnsError> {
+pub fn lookup_host(query: &str, port: u16) -> io::Result<LookupHost> {
     let mut result = LookupHost { data: LookupHostQuery([0u8; 4096]), offset: 0, count: 0, port };
 
     // Copy the query into the message that gets sent to the DNS server
@@ -89,7 +76,7 @@ pub fn lookup(query: &str, port: u16) -> Result<LookupHost, DnsError> {
     )
     .unwrap();
     if result.data.0[0] != 0 {
-        return Err(DnsError { code: result.data.0[1] });
+        return Err(io::const_error!(io::ErrorKind::InvalidInput, "DNS failure"));
     }
     assert_eq!(result.offset, 0);
     result.count = result.data.0[1] as usize;
@@ -98,31 +85,3 @@ pub fn lookup(query: &str, port: u16) -> Result<LookupHost, DnsError> {
     result.offset = 2;
     Ok(result)
 }
-
-impl TryFrom<&str> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from(s: &str) -> io::Result<LookupHost> {
-        macro_rules! try_opt {
-            ($e:expr, $msg:expr) => {
-                match $e {
-                    Some(r) => r,
-                    None => return Err(io::const_error!(io::ErrorKind::InvalidInput, &$msg)),
-                }
-            };
-        }
-
-        // split the string by ':' and convert the second part to u16
-        let (host, port_str) = try_opt!(s.rsplit_once(':'), "invalid socket address");
-        let port: u16 = try_opt!(port_str.parse().ok(), "invalid port value");
-        (host, port).try_into()
-    }
-}
-
-impl TryFrom<(&str, u16)> for LookupHost {
-    type Error = io::Error;
-
-    fn try_from(v: (&str, u16)) -> io::Result<LookupHost> {
-        lookup(v.0, v.1).map_err(|_e| io::const_error!(io::ErrorKind::InvalidInput, "DNS failure"))
-    }
-}
diff --git a/library/std/src/sys/net/connection/xous/mod.rs b/library/std/src/sys/net/connection/xous/mod.rs
index e44a375b9e3c5..0f77be5c3fac8 100644
--- a/library/std/src/sys/net/connection/xous/mod.rs
+++ b/library/std/src/sys/net/connection/xous/mod.rs
@@ -45,4 +45,4 @@ pub struct GetAddress {
     raw: [u8; 4096],
 }
 
-pub use dns::LookupHost;
+pub use dns::lookup_host;
diff --git a/library/stdarch/.cirrus.yml b/library/stdarch/.cirrus.yml
deleted file mode 100644
index a0ecc03b953fd..0000000000000
--- a/library/stdarch/.cirrus.yml
+++ /dev/null
@@ -1,16 +0,0 @@
-task:
-  name: x86_64-unknown-freebsd
-  freebsd_instance:
-    image_family: freebsd-13-4
-  env:
-    # FIXME(freebsd): FreeBSD has a segfault when `RUST_BACKTRACE` is set
-    # https://github.com/rust-lang/rust/issues/132185
-    RUST_BACKTRACE: "0"
-  setup_script:
-    - curl https://sh.rustup.rs -sSf --output rustup.sh
-    - sh rustup.sh --default-toolchain nightly -y
-    - . $HOME/.cargo/env
-    - rustup default nightly
-  test_script:
-    - . $HOME/.cargo/env
-    - cargo build --all
diff --git a/library/stdarch/.github/workflows/main.yml b/library/stdarch/.github/workflows/main.yml
index 048ce9864604e..b0d476f0e2ea5 100644
--- a/library/stdarch/.github/workflows/main.yml
+++ b/library/stdarch/.github/workflows/main.yml
@@ -117,6 +117,8 @@ jobs:
           os: windows-2025
         - tuple: aarch64-pc-windows-msvc
           os: windows-11-arm
+        - tuple: arm64ec-pc-windows-msvc
+          os: windows-11-arm
         - tuple: x86_64-pc-windows-gnu
           os: windows-2025
         # - tuple: i686-pc-windows-gnu
@@ -207,14 +209,6 @@ jobs:
         rustup update nightly --no-self-update
         rustup default nightly
       shell: bash
-      if: matrix.target.os != 'windows-11-arm'
-    - name: Install Rust for `windows-11-arm` runners
-      # The arm runners don't have Rust pre-installed (https://github.com/actions/partner-runner-images/issues/77)
-      run: |
-        curl https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly
-        echo "$HOME/.cargo/bin" >> $GITHUB_PATH
-      shell: bash
-      if: matrix.target.os == 'windows-11-arm'
 
     - run: rustup target add ${{ matrix.target.tuple }}
       shell: bash
diff --git a/library/stdarch/Cargo.lock b/library/stdarch/Cargo.lock
index 9df0791b86523..a10a456acce1d 100644
--- a/library/stdarch/Cargo.lock
+++ b/library/stdarch/Cargo.lock
@@ -13,9 +13,9 @@ dependencies = [
 
 [[package]]
 name = "anstream"
-version = "0.6.19"
+version = "0.6.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
+checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -43,29 +43,29 @@ dependencies = [
 
 [[package]]
 name = "anstyle-query"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
+checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
 name = "anstyle-wincon"
-version = "3.0.9"
+version = "3.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
+checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a"
 dependencies = [
  "anstyle",
  "once_cell_polyfill",
- "windows-sys",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
 name = "anyhow"
-version = "1.0.98"
+version = "1.0.99"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
+checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
 
 [[package]]
 name = "assert-instr-macro"
@@ -84,30 +84,31 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
 name = "bitflags"
-version = "2.9.1"
+version = "2.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
+checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
 
 [[package]]
 name = "cc"
-version = "1.2.31"
+version = "1.2.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2"
+checksum = "5252b3d2648e5eedbc1a6f501e3c795e07025c1e93bbf8bbdd6eef7f447a6d54"
 dependencies = [
+ "find-msvc-tools",
  "shlex",
 ]
 
 [[package]]
 name = "cfg-if"
-version = "1.0.1"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
+checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
 
 [[package]]
 name = "clap"
-version = "4.5.42"
+version = "4.5.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882"
+checksum = "7eac00902d9d136acd712710d71823fb8ac8004ca445a89e73a41d45aa712931"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -115,9 +116,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.42"
+version = "4.5.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966"
+checksum = "2ad9bbf750e73b5884fb8a211a9424a1906c1e156724260fdae972f31d70e1d6"
 dependencies = [
  "anstream",
  "anstyle",
@@ -127,9 +128,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.41"
+version = "4.5.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
+checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -258,6 +259,12 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d"
+
 [[package]]
 name = "fnv"
 version = "1.0.7"
@@ -283,9 +290,9 @@ checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
 
 [[package]]
 name = "hashbrown"
-version = "0.15.4"
+version = "0.15.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 
 [[package]]
 name = "heck"
@@ -323,12 +330,12 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.10.0"
+version = "2.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
+checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.4",
+ "hashbrown 0.15.5",
 ]
 
 [[package]]
@@ -353,7 +360,7 @@ checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
 dependencies = [
  "hermit-abi",
  "libc",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -379,9 +386,9 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
 
 [[package]]
 name = "libc"
-version = "0.2.174"
+version = "0.2.175"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
+checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
 
 [[package]]
 name = "linked-hash-map"
@@ -391,9 +398,9 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
 
 [[package]]
 name = "log"
-version = "0.4.27"
+version = "0.4.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
 
 [[package]]
 name = "memchr"
@@ -428,9 +435,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.95"
+version = "1.0.101"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
+checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
 dependencies = [
  "unicode-ident",
 ]
@@ -497,9 +504,9 @@ dependencies = [
 
 [[package]]
 name = "rayon"
-version = "1.10.0"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
+checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
 dependencies = [
  "either",
  "rayon-core",
@@ -507,9 +514,9 @@ dependencies = [
 
 [[package]]
 name = "rayon-core"
-version = "1.12.1"
+version = "1.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
 dependencies = [
  "crossbeam-deque",
  "crossbeam-utils",
@@ -517,9 +524,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.11.1"
+version = "1.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -529,9 +536,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.9"
+version = "0.4.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
+checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -540,9 +547,9 @@ dependencies = [
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.5"
+version = "0.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
 
 [[package]]
 name = "rustc-demangle"
@@ -593,9 +600,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.142"
+version = "1.0.143"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7"
+checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
 dependencies = [
  "itoa",
  "memchr",
@@ -715,9 +722,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
 [[package]]
 name = "syn"
-version = "2.0.104"
+version = "2.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -774,7 +781,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "161296c618fa2d63f6ed5fffd1112937e803cb9ec71b32b01a76321555660917"
 dependencies = [
  "bitflags",
- "indexmap 2.10.0",
+ "indexmap 2.11.0",
  "semver",
 ]
 
@@ -791,20 +798,35 @@ dependencies = [
 
 [[package]]
 name = "winapi-util"
-version = "0.1.9"
+version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
+checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.60.2",
 ]
 
+[[package]]
+name = "windows-link"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
+
 [[package]]
 name = "windows-sys"
 version = "0.59.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
 dependencies = [
- "windows-targets",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.3",
 ]
 
 [[package]]
@@ -813,14 +835,31 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
 dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.0",
+ "windows_aarch64_msvc 0.53.0",
+ "windows_i686_gnu 0.53.0",
+ "windows_i686_gnullvm 0.53.0",
+ "windows_i686_msvc 0.53.0",
+ "windows_x86_64_gnu 0.53.0",
+ "windows_x86_64_gnullvm 0.53.0",
+ "windows_x86_64_msvc 0.53.0",
 ]
 
 [[package]]
@@ -829,48 +868,96 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
 [[package]]
 name = "windows_i686_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
+
 [[package]]
 name = "yaml-rust"
 version = "0.4.5"
@@ -882,18 +969,18 @@ dependencies = [
 
 [[package]]
 name = "zerocopy"
-version = "0.8.26"
+version = "0.8.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
+checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.26"
+version = "0.8.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
+checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/library/stdarch/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
index b5c6874ca52ed..e803dae1006a0 100644
--- a/library/stdarch/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
@@ -2,11 +2,11 @@ FROM ubuntu:25.10
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-    gcc libc6-dev qemu-user-static ca-certificates \
+    gcc libc6-dev qemu-user ca-certificates \
     gcc-loongarch64-linux-gnu libc6-dev-loong64-cross
 
 
 ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc \
-    CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-loongarch64-static -cpu max -L /usr/loongarch64-linux-gnu" \
+    CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-loongarch64 -cpu max -L /usr/loongarch64-linux-gnu" \
     OBJDUMP=loongarch64-linux-gnu-objdump \
     STDARCH_TEST_SKIP_FEATURE=frecipe
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index bc4c438038dc5..855261aaecfd0 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -188,6 +188,7 @@ pub fn vabds_f32(a: f32, b: f32) -> f32 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fabd))]
 pub fn vabdh_f16(a: f16, b: f16) -> f16 {
     unsafe { simd_extract!(vabd_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
@@ -947,6 +948,7 @@ pub fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -964,6 +966,7 @@ pub fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub fn vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -1029,6 +1032,7 @@ pub fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -1046,6 +1050,7 @@ pub fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub fn vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -1175,6 +1180,7 @@ pub fn vcages_f32(a: f32, b: f32) -> u32 {
 #[cfg_attr(test, assert_instr(facge))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcageh_f16(a: f16, b: f16) -> u16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -1255,6 +1261,7 @@ pub fn vcagts_f32(a: f32, b: f32) -> u32 {
 #[cfg_attr(test, assert_instr(facgt))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcagth_f16(a: f16, b: f16) -> u16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -1307,6 +1314,7 @@ pub fn vcales_f32(a: f32, b: f32) -> u32 {
 #[cfg_attr(test, assert_instr(facge))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcaleh_f16(a: f16, b: f16) -> u16 {
     vcageh_f16(b, a)
 }
@@ -1352,6 +1360,7 @@ pub fn vcalts_f32(a: f32, b: f32) -> u32 {
 #[cfg_attr(test, assert_instr(facgt))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcalth_f16(a: f16, b: f16) -> u16 {
     vcagth_f16(b, a)
 }
@@ -1469,6 +1478,7 @@ pub fn vceqd_u64(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqh_f16(a: f16, b: f16) -> u16 {
     unsafe { simd_extract!(vceq_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
 }
@@ -1478,6 +1488,7 @@ pub fn vceqh_f16(a: f16, b: f16) -> u16 {
 #[cfg_attr(test, assert_instr(fcmeq))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqz_f16(a: float16x4_t) -> uint16x4_t {
     let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0);
     unsafe { simd_eq(a, transmute(b)) }
@@ -1488,6 +1499,7 @@ pub fn vceqz_f16(a: float16x4_t) -> uint16x4_t {
 #[cfg_attr(test, assert_instr(fcmeq))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqzq_f16(a: float16x8_t) -> uint16x8_t {
     let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
     unsafe { simd_eq(a, transmute(b)) }
@@ -1756,6 +1768,7 @@ pub fn vceqzd_u64(a: u64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqzh_f16(a: f16) -> u16 {
     unsafe { simd_extract!(vceqz_f16(vdup_n_f16(a)), 0) }
 }
@@ -1873,6 +1886,7 @@ pub fn vcged_u64(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgeh_f16(a: f16, b: f16) -> u16 {
     unsafe { simd_extract!(vcge_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
 }
@@ -2029,6 +2043,7 @@ pub fn vcgezd_s64(a: i64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgezh_f16(a: f16) -> u16 {
     unsafe { simd_extract!(vcgez_f16(vdup_n_f16(a)), 0) }
 }
@@ -2128,6 +2143,7 @@ pub fn vcgtd_u64(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgth_f16(a: f16, b: f16) -> u16 {
     unsafe { simd_extract!(vcgt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
 }
@@ -2284,6 +2300,7 @@ pub fn vcgtzd_s64(a: i64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgtzh_f16(a: f16) -> u16 {
     unsafe { simd_extract!(vcgtz_f16(vdup_n_f16(a)), 0) }
 }
@@ -2383,6 +2400,7 @@ pub fn vcled_s64(a: i64, b: i64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcleh_f16(a: f16, b: f16) -> u16 {
     unsafe { simd_extract!(vcle_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
 }
@@ -2539,6 +2557,7 @@ pub fn vclezd_s64(a: i64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vclezh_f16(a: f16) -> u16 {
     unsafe { simd_extract!(vclez_f16(vdup_n_f16(a)), 0) }
 }
@@ -2620,6 +2639,7 @@ pub fn vcltd_s64(a: i64, b: i64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vclth_f16(a: f16, b: f16) -> u16 {
     unsafe { simd_extract!(vclt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
 }
@@ -2794,6 +2814,7 @@ pub fn vcltzd_s64(a: i64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcltzh_f16(a: f16) -> u16 {
     unsafe { simd_extract!(vcltz_f16(vdup_n_f16(a)), 0) }
 }
@@ -2803,6 +2824,7 @@ pub fn vcltzh_f16(a: f16) -> u16 {
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -2820,6 +2842,7 @@ pub fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -2887,6 +2910,7 @@ pub fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_lane_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -2915,6 +2939,7 @@ pub fn vcmla_lane_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_lane_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -2992,6 +3017,7 @@ pub fn vcmlaq_lane_f32<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_laneq_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -3020,6 +3046,7 @@ pub fn vcmla_laneq_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_laneq_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -3095,6 +3122,7 @@ pub fn vcmlaq_laneq_f32<const LANE: i32>(
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -3112,6 +3140,7 @@ pub fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -3179,6 +3208,7 @@ pub fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot180_lane_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -3207,6 +3237,7 @@ pub fn vcmla_rot180_lane_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot180_lane_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -3284,6 +3315,7 @@ pub fn vcmlaq_rot180_lane_f32<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot180_laneq_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -3312,6 +3344,7 @@ pub fn vcmla_rot180_laneq_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot180_laneq_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -3387,6 +3420,7 @@ pub fn vcmlaq_rot180_laneq_f32<const LANE: i32>(
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -3404,6 +3438,7 @@ pub fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -3471,6 +3506,7 @@ pub fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot270_lane_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -3499,6 +3535,7 @@ pub fn vcmla_rot270_lane_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot270_lane_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -3576,6 +3613,7 @@ pub fn vcmlaq_rot270_lane_f32<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot270_laneq_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -3604,6 +3642,7 @@ pub fn vcmla_rot270_laneq_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot270_laneq_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -3679,6 +3718,7 @@ pub fn vcmlaq_rot270_laneq_f32<const LANE: i32>(
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -3696,6 +3736,7 @@ pub fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float1
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -3763,6 +3804,7 @@ pub fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot90_lane_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -3791,6 +3833,7 @@ pub fn vcmla_rot90_lane_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot90_lane_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -3868,6 +3911,7 @@ pub fn vcmlaq_rot90_lane_f32<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot90_laneq_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -3896,6 +3940,7 @@ pub fn vcmla_rot90_laneq_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot90_laneq_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -7161,6 +7206,7 @@ pub fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fcvtn2))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_high_f16_f32(a: float16x4_t, b: float32x4_t) -> float16x8_t {
     vcombine_f16(a, vcvt_f16_f32(b))
 }
@@ -7170,6 +7216,7 @@ pub fn vcvt_high_f16_f32(a: float16x4_t, b: float32x4_t) -> float16x8_t {
 #[cfg_attr(test, assert_instr(fcvtl2))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_high_f32_f16(a: float16x8_t) -> float32x4_t {
     vcvt_f32_f16(vget_high_f16(a))
 }
@@ -7408,6 +7455,7 @@ pub fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvta_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -7424,6 +7472,7 @@ pub fn vcvta_s16_f16(a: float16x4_t) -> int16x4_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtaq_s16_f16(a: float16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -7504,6 +7553,7 @@ pub fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvta_u16_f16(a: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -7520,6 +7570,7 @@ pub fn vcvta_u16_f16(a: float16x4_t) -> uint16x4_t {
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtaq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -7600,6 +7651,7 @@ pub fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_s16_f16(a: f16) -> i16 {
     vcvtah_s32_f16(a) as i16
 }
@@ -7609,6 +7661,7 @@ pub fn vcvtah_s16_f16(a: f16) -> i16 {
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_s32_f16(a: f16) -> i32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -7625,6 +7678,7 @@ pub fn vcvtah_s32_f16(a: f16) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_s64_f16(a: f16) -> i64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -7641,6 +7695,7 @@ pub fn vcvtah_s64_f16(a: f16) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_u16_f16(a: f16) -> u16 {
     vcvtah_u32_f16(a) as u16
 }
@@ -7650,6 +7705,7 @@ pub fn vcvtah_u16_f16(a: f16) -> u16 {
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_u32_f16(a: f16) -> u32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -7666,6 +7722,7 @@ pub fn vcvtah_u32_f16(a: f16) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_u64_f16(a: f16) -> u64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -7764,6 +7821,7 @@ pub fn vcvts_f32_s32(a: i32) -> f32 {
 #[cfg_attr(test, assert_instr(scvtf))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_f16_s16(a: i16) -> f16 {
     a as f16
 }
@@ -7773,6 +7831,7 @@ pub fn vcvth_f16_s16(a: i16) -> f16 {
 #[cfg_attr(test, assert_instr(scvtf))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_f16_s32(a: i32) -> f16 {
     a as f16
 }
@@ -7782,6 +7841,7 @@ pub fn vcvth_f16_s32(a: i32) -> f16 {
 #[cfg_attr(test, assert_instr(scvtf))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_f16_s64(a: i64) -> f16 {
     a as f16
 }
@@ -7791,6 +7851,7 @@ pub fn vcvth_f16_s64(a: i64) -> f16 {
 #[cfg_attr(test, assert_instr(ucvtf))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_f16_u16(a: u16) -> f16 {
     a as f16
 }
@@ -7800,6 +7861,7 @@ pub fn vcvth_f16_u16(a: u16) -> f16 {
 #[cfg_attr(test, assert_instr(ucvtf))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_f16_u32(a: u32) -> f16 {
     a as f16
 }
@@ -7809,6 +7871,7 @@ pub fn vcvth_f16_u32(a: u32) -> f16 {
 #[cfg_attr(test, assert_instr(ucvtf))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_f16_u64(a: u64) -> f16 {
     a as f16
 }
@@ -7819,6 +7882,7 @@ pub fn vcvth_f16_u64(a: u64) -> f16 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_f16_s16<const N: i32>(a: i16) -> f16 {
     static_assert!(N >= 1 && N <= 16);
     vcvth_n_f16_s32::<N>(a as i32)
@@ -7830,6 +7894,7 @@ pub fn vcvth_n_f16_s16<const N: i32>(a: i16) -> f16 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_f16_s32<const N: i32>(a: i32) -> f16 {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -7848,6 +7913,7 @@ pub fn vcvth_n_f16_s32<const N: i32>(a: i32) -> f16 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_f16_s64<const N: i32>(a: i64) -> f16 {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -7866,6 +7932,7 @@ pub fn vcvth_n_f16_s64<const N: i32>(a: i64) -> f16 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_f16_u16<const N: i32>(a: u16) -> f16 {
     static_assert!(N >= 1 && N <= 16);
     vcvth_n_f16_u32::<N>(a as u32)
@@ -7877,6 +7944,7 @@ pub fn vcvth_n_f16_u16<const N: i32>(a: u16) -> f16 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_f16_u32<const N: i32>(a: u32) -> f16 {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -7895,6 +7963,7 @@ pub fn vcvth_n_f16_u32<const N: i32>(a: u32) -> f16 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_f16_u64<const N: i32>(a: u64) -> f16 {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -7913,6 +7982,7 @@ pub fn vcvth_n_f16_u64<const N: i32>(a: u64) -> f16 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_s16_f16<const N: i32>(a: f16) -> i16 {
     static_assert!(N >= 1 && N <= 16);
     vcvth_n_s32_f16::<N>(a) as i16
@@ -7924,6 +7994,7 @@ pub fn vcvth_n_s16_f16<const N: i32>(a: f16) -> i16 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_s32_f16<const N: i32>(a: f16) -> i32 {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -7942,6 +8013,7 @@ pub fn vcvth_n_s32_f16<const N: i32>(a: f16) -> i32 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_s64_f16<const N: i32>(a: f16) -> i64 {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -7960,6 +8032,7 @@ pub fn vcvth_n_s64_f16<const N: i32>(a: f16) -> i64 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_u16_f16<const N: i32>(a: f16) -> u16 {
     static_assert!(N >= 1 && N <= 16);
     vcvth_n_u32_f16::<N>(a) as u16
@@ -7971,6 +8044,7 @@ pub fn vcvth_n_u16_f16<const N: i32>(a: f16) -> u16 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_u32_f16<const N: i32>(a: f16) -> u32 {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -7989,6 +8063,7 @@ pub fn vcvth_n_u32_f16<const N: i32>(a: f16) -> u32 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_n_u64_f16<const N: i32>(a: f16) -> u64 {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -8006,6 +8081,7 @@ pub fn vcvth_n_u64_f16<const N: i32>(a: f16) -> u64 {
 #[cfg_attr(test, assert_instr(fcvtzs))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_s16_f16(a: f16) -> i16 {
     a as i16
 }
@@ -8015,6 +8091,7 @@ pub fn vcvth_s16_f16(a: f16) -> i16 {
 #[cfg_attr(test, assert_instr(fcvtzs))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_s32_f16(a: f16) -> i32 {
     a as i32
 }
@@ -8024,6 +8101,7 @@ pub fn vcvth_s32_f16(a: f16) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtzs))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_s64_f16(a: f16) -> i64 {
     a as i64
 }
@@ -8033,6 +8111,7 @@ pub fn vcvth_s64_f16(a: f16) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtzu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_u16_f16(a: f16) -> u16 {
     a as u16
 }
@@ -8042,6 +8121,7 @@ pub fn vcvth_u16_f16(a: f16) -> u16 {
 #[cfg_attr(test, assert_instr(fcvtzu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_u32_f16(a: f16) -> u32 {
     a as u32
 }
@@ -8051,6 +8131,7 @@ pub fn vcvth_u32_f16(a: f16) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtzu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvth_u64_f16(a: f16) -> u64 {
     a as u64
 }
@@ -8060,6 +8141,7 @@ pub fn vcvth_u64_f16(a: f16) -> u64 {
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtm_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8076,6 +8158,7 @@ pub fn vcvtm_s16_f16(a: float16x4_t) -> int16x4_t {
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmq_s16_f16(a: float16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8156,6 +8239,7 @@ pub fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtm_u16_f16(a: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8172,6 +8256,7 @@ pub fn vcvtm_u16_f16(a: float16x4_t) -> uint16x4_t {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8252,6 +8337,7 @@ pub fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_s16_f16(a: f16) -> i16 {
     vcvtmh_s32_f16(a) as i16
 }
@@ -8261,6 +8347,7 @@ pub fn vcvtmh_s16_f16(a: f16) -> i16 {
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_s32_f16(a: f16) -> i32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8277,6 +8364,7 @@ pub fn vcvtmh_s32_f16(a: f16) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_s64_f16(a: f16) -> i64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8293,6 +8381,7 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_u16_f16(a: f16) -> u16 {
     vcvtmh_u32_f16(a) as u16
 }
@@ -8302,6 +8391,7 @@ pub fn vcvtmh_u16_f16(a: f16) -> u16 {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_u32_f16(a: f16) -> u32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8318,6 +8408,7 @@ pub fn vcvtmh_u32_f16(a: f16) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_u64_f16(a: f16) -> u64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8398,6 +8489,7 @@ pub fn vcvtmd_u64_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtn_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8414,6 +8506,7 @@ pub fn vcvtn_s16_f16(a: float16x4_t) -> int16x4_t {
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnq_s16_f16(a: float16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8494,6 +8587,7 @@ pub fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtn_u16_f16(a: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8510,6 +8604,7 @@ pub fn vcvtn_u16_f16(a: float16x4_t) -> uint16x4_t {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8590,6 +8685,7 @@ pub fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_s16_f16(a: f16) -> i16 {
     vcvtnh_s32_f16(a) as i16
 }
@@ -8599,6 +8695,7 @@ pub fn vcvtnh_s16_f16(a: f16) -> i16 {
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_s32_f16(a: f16) -> i32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8615,6 +8712,7 @@ pub fn vcvtnh_s32_f16(a: f16) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_s64_f16(a: f16) -> i64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8631,6 +8729,7 @@ pub fn vcvtnh_s64_f16(a: f16) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_u16_f16(a: f16) -> u16 {
     vcvtnh_u32_f16(a) as u16
 }
@@ -8640,6 +8739,7 @@ pub fn vcvtnh_u16_f16(a: f16) -> u16 {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_u32_f16(a: f16) -> u32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8656,6 +8756,7 @@ pub fn vcvtnh_u32_f16(a: f16) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_u64_f16(a: f16) -> u64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8736,6 +8837,7 @@ pub fn vcvtnd_u64_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtp_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8752,6 +8854,7 @@ pub fn vcvtp_s16_f16(a: float16x4_t) -> int16x4_t {
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtpq_s16_f16(a: float16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8832,6 +8935,7 @@ pub fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtp_u16_f16(a: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8848,6 +8952,7 @@ pub fn vcvtp_u16_f16(a: float16x4_t) -> uint16x4_t {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtpq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8928,6 +9033,7 @@ pub fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_s16_f16(a: f16) -> i16 {
     vcvtph_s32_f16(a) as i16
 }
@@ -8937,6 +9043,7 @@ pub fn vcvtph_s16_f16(a: f16) -> i16 {
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_s32_f16(a: f16) -> i32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8953,6 +9060,7 @@ pub fn vcvtph_s32_f16(a: f16) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_s64_f16(a: f16) -> i64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8969,6 +9077,7 @@ pub fn vcvtph_s64_f16(a: f16) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_u16_f16(a: f16) -> u16 {
     vcvtph_u32_f16(a) as u16
 }
@@ -8978,6 +9087,7 @@ pub fn vcvtph_u16_f16(a: f16) -> u16 {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_u32_f16(a: f16) -> u32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -8994,6 +9104,7 @@ pub fn vcvtph_u32_f16(a: f16) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_u64_f16(a: f16) -> u64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -9305,6 +9416,7 @@ pub fn vcvtxd_f32_f64(a: f64) -> f32 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fdiv))]
 pub fn vdiv_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_div(a, b) }
@@ -9314,6 +9426,7 @@ pub fn vdiv_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fdiv))]
 pub fn vdivq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_div(a, b) }
@@ -9359,6 +9472,7 @@ pub fn vdivq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vdivh_f16(a: f16, b: f16) -> f16 {
     a / b
@@ -9608,6 +9722,7 @@ pub fn vdupd_lane_u64<const N: i32>(a: uint64x1_t) -> u64 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vduph_lane_f16<const N: i32>(a: float16x4_t) -> f16 {
     static_assert_uimm_bits!(N, 2);
     unsafe { simd_extract!(a, N as u32) }
@@ -9619,6 +9734,7 @@ pub fn vduph_lane_f16<const N: i32>(a: float16x4_t) -> f16 {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vduph_laneq_f16<const N: i32>(a: float16x8_t) -> f16 {
     static_assert_uimm_bits!(N, 4);
     unsafe { simd_extract!(a, N as u32) }
@@ -9977,6 +10093,7 @@ pub fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfma_lane_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -9992,6 +10109,7 @@ pub fn vfma_lane_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfma_laneq_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -10007,6 +10125,7 @@ pub fn vfma_laneq_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmaq_lane_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -10022,6 +10141,7 @@ pub fn vfmaq_lane_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmaq_laneq_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -10140,6 +10260,7 @@ pub fn vfma_laneq_f64<const LANE: i32>(
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmla))]
 pub fn vfma_n_f16(a: float16x4_t, b: float16x4_t, c: f16) -> float16x4_t {
     vfma_f16(a, b, vdup_n_f16(c))
@@ -10149,6 +10270,7 @@ pub fn vfma_n_f16(a: float16x4_t, b: float16x4_t, c: f16) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmla))]
 pub fn vfmaq_n_f16(a: float16x8_t, b: float16x8_t, c: f16) -> float16x8_t {
     vfmaq_f16(a, b, vdupq_n_f16(c))
@@ -10182,6 +10304,7 @@ pub fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
 #[cfg_attr(test, assert_instr(fmadd))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmah_f16(a: f16, b: f16, c: f16) -> f16 {
     unsafe { fmaf16(b, c, a) }
 }
@@ -10192,6 +10315,7 @@ pub fn vfmah_f16(a: f16, b: f16, c: f16) -> f16 {
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmah_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
@@ -10206,6 +10330,7 @@ pub fn vfmah_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmah_laneq_f16<const LANE: i32>(a: f16, b: f16, v: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
     unsafe {
@@ -10294,6 +10419,7 @@ pub fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlal2))]
 pub fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
     unsafe extern "unadjusted" {
@@ -10311,6 +10437,7 @@ pub fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float3
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlal2))]
 pub fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
     unsafe extern "unadjusted" {
@@ -10330,6 +10457,7 @@ pub fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlal_lane_high_f16<const LANE: i32>(
     r: float32x2_t,
     a: float16x4_t,
@@ -10346,6 +10474,7 @@ pub fn vfmlal_lane_high_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlal_laneq_high_f16<const LANE: i32>(
     r: float32x2_t,
     a: float16x4_t,
@@ -10362,6 +10491,7 @@ pub fn vfmlal_laneq_high_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlalq_lane_high_f16<const LANE: i32>(
     r: float32x4_t,
     a: float16x8_t,
@@ -10378,6 +10508,7 @@ pub fn vfmlalq_lane_high_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlalq_laneq_high_f16<const LANE: i32>(
     r: float32x4_t,
     a: float16x8_t,
@@ -10394,6 +10525,7 @@ pub fn vfmlalq_laneq_high_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlal_lane_low_f16<const LANE: i32>(
     r: float32x2_t,
     a: float16x4_t,
@@ -10410,6 +10542,7 @@ pub fn vfmlal_lane_low_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlal_laneq_low_f16<const LANE: i32>(
     r: float32x2_t,
     a: float16x4_t,
@@ -10426,6 +10559,7 @@ pub fn vfmlal_laneq_low_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlalq_lane_low_f16<const LANE: i32>(
     r: float32x4_t,
     a: float16x8_t,
@@ -10442,6 +10576,7 @@ pub fn vfmlalq_lane_low_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlalq_laneq_low_f16<const LANE: i32>(
     r: float32x4_t,
     a: float16x8_t,
@@ -10456,6 +10591,7 @@ pub fn vfmlalq_laneq_low_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlal))]
 pub fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
     unsafe extern "unadjusted" {
@@ -10473,6 +10609,7 @@ pub fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlal))]
 pub fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
     unsafe extern "unadjusted" {
@@ -10490,6 +10627,7 @@ pub fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float3
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlsl2))]
 pub fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
     unsafe extern "unadjusted" {
@@ -10507,6 +10645,7 @@ pub fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float3
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlsl2))]
 pub fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
     unsafe extern "unadjusted" {
@@ -10526,6 +10665,7 @@ pub fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlsl_lane_high_f16<const LANE: i32>(
     r: float32x2_t,
     a: float16x4_t,
@@ -10542,6 +10682,7 @@ pub fn vfmlsl_lane_high_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlsl_laneq_high_f16<const LANE: i32>(
     r: float32x2_t,
     a: float16x4_t,
@@ -10558,6 +10699,7 @@ pub fn vfmlsl_laneq_high_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlslq_lane_high_f16<const LANE: i32>(
     r: float32x4_t,
     a: float16x8_t,
@@ -10574,6 +10716,7 @@ pub fn vfmlslq_lane_high_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlslq_laneq_high_f16<const LANE: i32>(
     r: float32x4_t,
     a: float16x8_t,
@@ -10590,6 +10733,7 @@ pub fn vfmlslq_laneq_high_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlsl_lane_low_f16<const LANE: i32>(
     r: float32x2_t,
     a: float16x4_t,
@@ -10606,6 +10750,7 @@ pub fn vfmlsl_lane_low_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlsl_laneq_low_f16<const LANE: i32>(
     r: float32x2_t,
     a: float16x4_t,
@@ -10622,6 +10767,7 @@ pub fn vfmlsl_laneq_low_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlslq_lane_low_f16<const LANE: i32>(
     r: float32x4_t,
     a: float16x8_t,
@@ -10638,6 +10784,7 @@ pub fn vfmlslq_lane_low_f16<const LANE: i32>(
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlslq_laneq_low_f16<const LANE: i32>(
     r: float32x4_t,
     a: float16x8_t,
@@ -10652,6 +10799,7 @@ pub fn vfmlslq_laneq_low_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlsl))]
 pub fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
     unsafe extern "unadjusted" {
@@ -10669,6 +10817,7 @@ pub fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlsl))]
 pub fn vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
     unsafe extern "unadjusted" {
@@ -10699,6 +10848,7 @@ pub fn vfms_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfms_lane_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -10714,6 +10864,7 @@ pub fn vfms_lane_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfms_laneq_f16<const LANE: i32>(
     a: float16x4_t,
     b: float16x4_t,
@@ -10729,6 +10880,7 @@ pub fn vfms_laneq_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsq_lane_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -10744,6 +10896,7 @@ pub fn vfmsq_lane_f16<const LANE: i32>(
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsq_laneq_f16<const LANE: i32>(
     a: float16x8_t,
     b: float16x8_t,
@@ -10862,6 +11015,7 @@ pub fn vfms_laneq_f64<const LANE: i32>(
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmls))]
 pub fn vfms_n_f16(a: float16x4_t, b: float16x4_t, c: f16) -> float16x4_t {
     vfms_f16(a, b, vdup_n_f16(c))
@@ -10871,6 +11025,7 @@ pub fn vfms_n_f16(a: float16x4_t, b: float16x4_t, c: f16) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmls))]
 pub fn vfmsq_n_f16(a: float16x8_t, b: float16x8_t, c: f16) -> float16x8_t {
     vfmsq_f16(a, b, vdupq_n_f16(c))
@@ -10890,6 +11045,7 @@ pub fn vfms_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
 #[cfg_attr(test, assert_instr(fmsub))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsh_f16(a: f16, b: f16, c: f16) -> f16 {
     vfmah_f16(a, -b, c)
 }
@@ -10900,6 +11056,7 @@ pub fn vfmsh_f16(a: f16, b: f16, c: f16) -> f16 {
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsh_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
@@ -10914,6 +11071,7 @@ pub fn vfmsh_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsh_laneq_f16<const LANE: i32>(a: f16, b: f16, v: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
     unsafe {
@@ -11005,6 +11163,7 @@ pub fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(test, assert_instr(ldr))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
     crate::ptr::read_unaligned(ptr.cast())
 }
@@ -11016,6 +11175,7 @@ pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(test, assert_instr(ldr))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
     crate::ptr::read_unaligned(ptr.cast())
 }
@@ -13107,6 +13267,7 @@ pub fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmax))]
 pub fn vmaxh_f16(a: f16, b: f16) -> f16 {
     unsafe extern "unadjusted" {
@@ -13141,6 +13302,7 @@ pub fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnm))]
 pub fn vmaxnmh_f16(a: f16, b: f16) -> f16 {
     f16::max(a, b)
@@ -13150,6 +13312,7 @@ pub fn vmaxnmh_f16(a: f16, b: f16) -> f16 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnmv))]
 pub fn vmaxnmv_f16(a: float16x4_t) -> f16 {
     unsafe { simd_reduce_max(a) }
@@ -13159,6 +13322,7 @@ pub fn vmaxnmv_f16(a: float16x4_t) -> f16 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnmv))]
 pub fn vmaxnmvq_f16(a: float16x8_t) -> f16 {
     unsafe { simd_reduce_max(a) }
@@ -13195,6 +13359,7 @@ pub fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxv))]
 pub fn vmaxv_f16(a: float16x4_t) -> f16 {
     unsafe extern "unadjusted" {
@@ -13211,6 +13376,7 @@ pub fn vmaxv_f16(a: float16x4_t) -> f16 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxv))]
 pub fn vmaxvq_f16(a: float16x8_t) -> f16 {
     unsafe extern "unadjusted" {
@@ -13415,6 +13581,7 @@ pub fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmin))]
 pub fn vminh_f16(a: f16, b: f16) -> f16 {
     unsafe extern "unadjusted" {
@@ -13449,6 +13616,7 @@ pub fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnm))]
 pub fn vminnmh_f16(a: f16, b: f16) -> f16 {
     f16::min(a, b)
@@ -13458,6 +13626,7 @@ pub fn vminnmh_f16(a: f16, b: f16) -> f16 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnmv))]
 pub fn vminnmv_f16(a: float16x4_t) -> f16 {
     unsafe { simd_reduce_min(a) }
@@ -13467,6 +13636,7 @@ pub fn vminnmv_f16(a: float16x4_t) -> f16 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnmv))]
 pub fn vminnmvq_f16(a: float16x8_t) -> f16 {
     unsafe { simd_reduce_min(a) }
@@ -13503,6 +13673,7 @@ pub fn vminnmvq_f32(a: float32x4_t) -> f32 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminv))]
 pub fn vminv_f16(a: float16x4_t) -> f16 {
     unsafe extern "unadjusted" {
@@ -13519,6 +13690,7 @@ pub fn vminv_f16(a: float16x4_t) -> f16 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminv))]
 pub fn vminvq_f16(a: float16x8_t) -> f16 {
     unsafe extern "unadjusted" {
@@ -14554,6 +14726,7 @@ pub fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmul_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe {
@@ -14570,6 +14743,7 @@ pub fn vmul_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float1
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe {
@@ -14640,6 +14814,7 @@ pub fn vmuld_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vmulh_f16(a: f16, b: f16) -> f16 {
     a * b
@@ -14651,6 +14826,7 @@ pub fn vmulh_f16(a: f16, b: f16) -> f16 {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
@@ -14665,6 +14841,7 @@ pub fn vmulh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
     unsafe {
@@ -15073,6 +15250,7 @@ pub fn vmuld_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmulx))]
 pub fn vmulx_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -15089,6 +15267,7 @@ pub fn vmulx_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmulx))]
 pub fn vmulxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -15171,6 +15350,7 @@ pub fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulx_lane_f16<const LANE: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
@@ -15187,6 +15367,7 @@ pub fn vmulx_lane_f16<const LANE: i32>(a: float16x4_t, b: float16x4_t) -> float1
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulx_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe {
@@ -15203,6 +15384,7 @@ pub fn vmulx_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxq_lane_f16<const LANE: i32>(a: float16x8_t, b: float16x4_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
@@ -15232,6 +15414,7 @@ pub fn vmulxq_lane_f16<const LANE: i32>(a: float16x8_t, b: float16x4_t) -> float
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe {
@@ -15347,6 +15530,7 @@ pub fn vmulx_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float
 #[cfg_attr(test, assert_instr(fmulx))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulx_n_f16(a: float16x4_t, b: f16) -> float16x4_t {
     vmulx_f16(a, vdup_n_f16(b))
 }
@@ -15356,6 +15540,7 @@ pub fn vmulx_n_f16(a: float16x4_t, b: f16) -> float16x4_t {
 #[cfg_attr(test, assert_instr(fmulx))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxq_n_f16(a: float16x8_t, b: f16) -> float16x8_t {
     vmulxq_f16(a, vdupq_n_f16(b))
 }
@@ -15440,6 +15625,7 @@ pub fn vmulxs_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmulx))]
 pub fn vmulxh_f16(a: f16, b: f16) -> f16 {
     unsafe extern "unadjusted" {
@@ -15458,6 +15644,7 @@ pub fn vmulxh_f16(a: f16, b: f16) -> f16 {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
     unsafe { vmulxh_f16(a, simd_extract!(b, LANE as u32)) }
@@ -15469,6 +15656,7 @@ pub fn vmulxh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
     unsafe { vmulxh_f16(a, simd_extract!(b, LANE as u32)) }
@@ -15534,6 +15722,7 @@ pub fn vnegd_s64(a: i64) -> i64 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fneg))]
 pub fn vnegh_f16(a: f16) -> f16 {
     -a
@@ -15587,6 +15776,7 @@ pub fn vpaddd_u64(a: uint64x2_t) -> u64 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(faddp))]
 pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -15805,6 +15995,7 @@ pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxp))]
 pub fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -15821,6 +16012,7 @@ pub fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxp))]
 pub fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -15837,6 +16029,7 @@ pub fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -15853,6 +16046,7 @@ pub fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub fn vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -16109,6 +16303,7 @@ pub fn vpmaxs_f32(a: float32x2_t) -> f32 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminp))]
 pub fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -16125,6 +16320,7 @@ pub fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminp))]
 pub fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -16141,6 +16337,7 @@ pub fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnmp))]
 pub fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -16157,6 +16354,7 @@ pub fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnmp))]
 pub fn vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -21135,6 +21333,7 @@ pub fn vrecpes_f32(a: f32) -> f32 {
 #[cfg_attr(test, assert_instr(frecpe))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrecpeh_f16(a: f16) -> f16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -21215,6 +21414,7 @@ pub fn vrecpss_f32(a: f32, b: f32) -> f32 {
 #[cfg_attr(test, assert_instr(frecps))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrecpsh_f16(a: f16, b: f16) -> f16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -21263,6 +21463,7 @@ pub fn vrecpxs_f32(a: f32) -> f32 {
 #[cfg_attr(test, assert_instr(frecpx))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrecpxh_f16(a: f16) -> f16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -21276,164 +21477,73 @@ pub fn vrecpxh_f16(a: f16) -> f16 {
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21441,23 +21551,8 @@ pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21465,23 +21560,8 @@ pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21489,22 +21569,8 @@ pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21512,22 +21578,8 @@ pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21535,22 +21587,8 @@ pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21558,19 +21596,6 @@ pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -21582,7 +21607,6 @@ pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21590,22 +21614,8 @@ pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21613,22 +21623,8 @@ pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21636,19 +21632,6 @@ pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -21660,7 +21643,6 @@ pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21668,22 +21650,8 @@ pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21691,19 +21659,6 @@ pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -21715,7 +21670,6 @@ pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21723,120 +21677,44 @@ pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
+pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
+pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -21844,712 +21722,292 @@ pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
+pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
+pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
+pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
+pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
+pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
+pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
+pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
+pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
+pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
+pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
+pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
+pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
+pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
+pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
+pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
+pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
+pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"]
@@ -22581,7 +22039,6 @@ pub fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -22589,23 +22046,8 @@ pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -22613,23 +22055,8 @@ pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -22637,43 +22064,14 @@ pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
 #[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"]
 #[inline]
@@ -22935,6 +22333,7 @@ pub fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintz))]
 pub fn vrnd_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_trunc(a) }
@@ -22944,6 +22343,7 @@ pub fn vrnd_f16(a: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintz))]
 pub fn vrndq_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_trunc(a) }
@@ -22989,6 +22389,7 @@ pub fn vrndq_f64(a: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frinta))]
 pub fn vrnda_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_round(a) }
@@ -22998,6 +22399,7 @@ pub fn vrnda_f16(a: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frinta))]
 pub fn vrndaq_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_round(a) }
@@ -23043,6 +22445,7 @@ pub fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frinta))]
 pub fn vrndah_f16(a: f16) -> f16 {
     unsafe { roundf16(a) }
@@ -23052,6 +22455,7 @@ pub fn vrndah_f16(a: f16) -> f16 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintz))]
 pub fn vrndh_f16(a: f16) -> f16 {
     unsafe { truncf16(a) }
@@ -23061,6 +22465,7 @@ pub fn vrndh_f16(a: f16) -> f16 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frinti))]
 pub fn vrndi_f16(a: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -23077,6 +22482,7 @@ pub fn vrndi_f16(a: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frinti))]
 pub fn vrndiq_f16(a: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -23157,6 +22563,7 @@ pub fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frinti))]
 pub fn vrndih_f16(a: f16) -> f16 {
     unsafe extern "unadjusted" {
@@ -23173,6 +22580,7 @@ pub fn vrndih_f16(a: f16) -> f16 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintm))]
 pub fn vrndm_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_floor(a) }
@@ -23182,6 +22590,7 @@ pub fn vrndm_f16(a: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintm))]
 pub fn vrndmq_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_floor(a) }
@@ -23227,6 +22636,7 @@ pub fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintm))]
 pub fn vrndmh_f16(a: f16) -> f16 {
     unsafe { floorf16(a) }
@@ -23268,6 +22678,7 @@ pub fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintn))]
 pub fn vrndnh_f16(a: f16) -> f16 {
     unsafe extern "unadjusted" {
@@ -23300,6 +22711,7 @@ pub fn vrndns_f32(a: f32) -> f32 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintp))]
 pub fn vrndp_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_ceil(a) }
@@ -23309,6 +22721,7 @@ pub fn vrndp_f16(a: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintp))]
 pub fn vrndpq_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_ceil(a) }
@@ -23354,6 +22767,7 @@ pub fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintp))]
 pub fn vrndph_f16(a: f16) -> f16 {
     unsafe { ceilf16(a) }
@@ -23363,6 +22777,7 @@ pub fn vrndph_f16(a: f16) -> f16 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_round_ties_even(a) }
@@ -23372,6 +22787,7 @@ pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_round_ties_even(a) }
@@ -23417,6 +22833,7 @@ pub fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndxh_f16(a: f16) -> f16 {
     round_ties_even_f16(a)
@@ -23623,6 +23040,7 @@ pub fn vrsqrtes_f32(a: f32) -> f32 {
 #[cfg_attr(test, assert_instr(frsqrte))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrsqrteh_f16(a: f16) -> f16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -23703,6 +23121,7 @@ pub fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(test, assert_instr(frsqrts))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrsqrtsh_f16(a: f16, b: f16) -> f16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -24791,6 +24210,7 @@ pub fn vsqadds_u32(a: u32, b: i32) -> u32 {
 #[cfg_attr(test, assert_instr(fsqrt))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vsqrt_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_fsqrt(a) }
 }
@@ -24800,6 +24220,7 @@ pub fn vsqrt_f16(a: float16x4_t) -> float16x4_t {
 #[cfg_attr(test, assert_instr(fsqrt))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vsqrtq_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_fsqrt(a) }
 }
@@ -24844,6 +24265,7 @@ pub fn vsqrtq_f64(a: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fsqrt))]
 pub fn vsqrth_f16(a: f16) -> f16 {
     unsafe { sqrtf16(a) }
@@ -25177,6 +24599,7 @@ pub fn vsrid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(str))]
 #[allow(clippy::cast_ptr_alignment)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) {
     crate::ptr::write_unaligned(ptr.cast(), a)
 }
@@ -25189,6 +24612,7 @@ pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) {
 #[cfg_attr(test, assert_instr(str))]
 #[allow(clippy::cast_ptr_alignment)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) {
     crate::ptr::write_unaligned(ptr.cast(), a)
 }
@@ -26488,6 +25912,7 @@ pub fn vsubd_u64(a: u64, b: u64) -> u64 {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vsubh_f16(a: f16, b: f16) -> f16 {
     a - b
@@ -27283,6 +26708,7 @@ pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
@@ -27292,6 +26718,7 @@ pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
@@ -27517,6 +26944,7 @@ pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
@@ -27526,6 +26954,7 @@ pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
@@ -28056,6 +27485,7 @@ pub fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x1
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
@@ -28065,6 +27495,7 @@ pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
@@ -28290,6 +27721,7 @@ pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
@@ -28299,6 +27731,7 @@ pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
@@ -28542,6 +27975,7 @@ pub fn vxarq_u64<const IMM6: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
@@ -28551,6 +27985,7 @@ pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
@@ -28776,6 +28211,7 @@ pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
@@ -28785,6 +28221,7 @@ pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 32531c7da1356..e4e4e040f468d 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -820,6 +820,7 @@ pub fn vabaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vabd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4f16")]
@@ -842,6 +843,7 @@ pub fn vabd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vabdq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8f16")]
@@ -1405,6 +1407,7 @@ pub fn vabdl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vabs_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_fabs(a) }
 }
@@ -1419,6 +1422,7 @@ pub fn vabs_f16(a: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vabsq_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_fabs(a) }
 }
@@ -1625,6 +1629,7 @@ pub fn vabsq_s32(a: int32x4_t) -> int32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vabsh_f16(a: f16) -> f16 {
     unsafe { simd_extract!(vabs_f16(vdup_n_f16(a)), 0) }
 }
@@ -1639,6 +1644,7 @@ pub fn vabsh_f16(a: f16) -> f16 {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_add(a, b) }
 }
@@ -1653,6 +1659,7 @@ pub fn vadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_add(a, b) }
 }
@@ -2129,6 +2136,7 @@ pub fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vaddh_f16(a: f16, b: f16) -> f16 {
     a + b
 }
@@ -3828,6 +3836,7 @@ pub fn vbicq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     assert_instr(bsl)
 )]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vbsl_f16(a: uint16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     let not = int16x4_t::splat(-1);
     unsafe {
@@ -3848,6 +3857,7 @@ pub fn vbsl_f16(a: uint16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     assert_instr(bsl)
 )]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vbslq_f16(a: uint16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     let not = int16x8_t::splat(-1);
     unsafe {
@@ -4462,6 +4472,7 @@ pub fn vbslq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcage_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i16.v4f16")]
@@ -4484,6 +4495,7 @@ pub fn vcage_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcageq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v8i16.v8f16")]
@@ -4564,6 +4576,7 @@ pub fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcagt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i16.v4f16")]
@@ -4586,6 +4599,7 @@ pub fn vcagt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcagtq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v8i16.v8f16")]
@@ -4666,6 +4680,7 @@ pub fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcale_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     vcage_f16(b, a)
 }
@@ -4680,6 +4695,7 @@ pub fn vcale_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcaleq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     vcageq_f16(b, a)
 }
@@ -4736,6 +4752,7 @@ pub fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcalt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     vcagt_f16(b, a)
 }
@@ -4750,6 +4767,7 @@ pub fn vcalt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcaltq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     vcagtq_f16(b, a)
 }
@@ -4806,6 +4824,7 @@ pub fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vceq_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe { simd_eq(a, b) }
 }
@@ -4820,6 +4839,7 @@ pub fn vceq_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe { simd_eq(a, b) }
 }
@@ -5170,6 +5190,7 @@ pub fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcge_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe { simd_ge(a, b) }
 }
@@ -5184,6 +5205,7 @@ pub fn vcge_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgeq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe { simd_ge(a, b) }
 }
@@ -5492,6 +5514,7 @@ pub fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgez_f16(a: float16x4_t) -> uint16x4_t {
     let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0);
     unsafe { simd_ge(a, transmute(b)) }
@@ -5507,6 +5530,7 @@ pub fn vcgez_f16(a: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgezq_f16(a: float16x8_t) -> uint16x8_t {
     let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
     unsafe { simd_ge(a, transmute(b)) }
@@ -5522,6 +5546,7 @@ pub fn vcgezq_f16(a: float16x8_t) -> uint16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe { simd_gt(a, b) }
 }
@@ -5536,6 +5561,7 @@ pub fn vcgt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgtq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe { simd_gt(a, b) }
 }
@@ -5844,6 +5870,7 @@ pub fn vcgtq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgtz_f16(a: float16x4_t) -> uint16x4_t {
     let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0);
     unsafe { simd_gt(a, transmute(b)) }
@@ -5859,6 +5886,7 @@ pub fn vcgtz_f16(a: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgtzq_f16(a: float16x8_t) -> uint16x8_t {
     let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
     unsafe { simd_gt(a, transmute(b)) }
@@ -5874,6 +5902,7 @@ pub fn vcgtzq_f16(a: float16x8_t) -> uint16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcle_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe { simd_le(a, b) }
 }
@@ -5888,6 +5917,7 @@ pub fn vcle_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcleq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe { simd_le(a, b) }
 }
@@ -6196,6 +6226,7 @@ pub fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vclez_f16(a: float16x4_t) -> uint16x4_t {
     let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0);
     unsafe { simd_le(a, transmute(b)) }
@@ -6211,6 +6242,7 @@ pub fn vclez_f16(a: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vclezq_f16(a: float16x8_t) -> uint16x8_t {
     let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
     unsafe { simd_le(a, transmute(b)) }
@@ -6526,6 +6558,7 @@ pub fn vclsq_u32(a: uint32x4_t) -> int32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vclt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe { simd_lt(a, b) }
 }
@@ -6540,6 +6573,7 @@ pub fn vclt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcltq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe { simd_lt(a, b) }
 }
@@ -6848,6 +6882,7 @@ pub fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcltz_f16(a: float16x4_t) -> uint16x4_t {
     let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0);
     unsafe { simd_lt(a, transmute(b)) }
@@ -6863,6 +6898,7 @@ pub fn vcltz_f16(a: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcltzq_f16(a: float16x8_t) -> uint16x8_t {
     let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
     unsafe { simd_lt(a, transmute(b)) }
@@ -7536,6 +7572,7 @@ pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) }
@@ -7756,6 +7793,7 @@ pub fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcreate_f16(a: u64) -> float16x4_t {
     unsafe { transmute(a) }
 }
@@ -7771,6 +7809,7 @@ pub fn vcreate_f16(a: u64) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcreate_f16(a: u64) -> float16x4_t {
     unsafe {
         let ret_val: float16x4_t = transmute(a);
@@ -8274,6 +8313,7 @@ pub fn vcreate_p64(a: u64) -> poly64x1_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_f16_f32(a: float32x4_t) -> float16x4_t {
     unsafe { simd_cast(a) }
 }
@@ -8288,6 +8328,7 @@ pub fn vcvt_f16_f32(a: float32x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_f16_s16(a: int16x4_t) -> float16x4_t {
     unsafe { simd_cast(a) }
 }
@@ -8302,6 +8343,7 @@ pub fn vcvt_f16_s16(a: int16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_f16_s16(a: int16x8_t) -> float16x8_t {
     unsafe { simd_cast(a) }
 }
@@ -8316,6 +8358,7 @@ pub fn vcvtq_f16_s16(a: int16x8_t) -> float16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_f16_u16(a: uint16x4_t) -> float16x4_t {
     unsafe { simd_cast(a) }
 }
@@ -8330,6 +8373,7 @@ pub fn vcvt_f16_u16(a: uint16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_f16_u16(a: uint16x8_t) -> float16x8_t {
     unsafe { simd_cast(a) }
 }
@@ -8344,6 +8388,7 @@ pub fn vcvtq_f16_u16(a: uint16x8_t) -> float16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_f32_f16(a: float16x4_t) -> float32x4_t {
     unsafe { simd_cast(a) }
 }
@@ -8443,6 +8488,7 @@ pub fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_n_f16_s16<const N: i32>(a: int16x4_t) -> float16x4_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -8470,6 +8516,7 @@ pub fn vcvt_n_f16_s16<const N: i32>(a: int16x4_t) -> float16x4_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_n_f16_s16<const N: i32>(a: int16x8_t) -> float16x8_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -8497,6 +8544,7 @@ pub fn vcvtq_n_f16_s16<const N: i32>(a: int16x8_t) -> float16x8_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_n_f16_u16<const N: i32>(a: uint16x4_t) -> float16x4_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -8524,6 +8572,7 @@ pub fn vcvt_n_f16_u16<const N: i32>(a: uint16x4_t) -> float16x4_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_n_f16_u16<const N: i32>(a: uint16x8_t) -> float16x8_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -8703,6 +8752,7 @@ pub fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_n_s16_f16<const N: i32>(a: float16x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -8730,6 +8780,7 @@ pub fn vcvt_n_s16_f16<const N: i32>(a: float16x4_t) -> int16x4_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_n_s16_f16<const N: i32>(a: float16x8_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -8833,6 +8884,7 @@ pub fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_n_u16_f16<const N: i32>(a: float16x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -8860,6 +8912,7 @@ pub fn vcvt_n_u16_f16<const N: i32>(a: float16x4_t) -> uint16x4_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_n_u16_f16<const N: i32>(a: float16x8_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
@@ -8962,6 +9015,7 @@ pub fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe { simd_cast(a) }
 }
@@ -8976,6 +9030,7 @@ pub fn vcvt_s16_f16(a: float16x4_t) -> int16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_s16_f16(a: float16x8_t) -> int16x8_t {
     unsafe { simd_cast(a) }
 }
@@ -9048,6 +9103,7 @@ pub fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_u16_f16(a: float16x4_t) -> uint16x4_t {
     unsafe { simd_cast(a) }
 }
@@ -9062,6 +9118,7 @@ pub fn vcvt_u16_f16(a: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe { simd_cast(a) }
 }
@@ -9361,6 +9418,7 @@ pub fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vdup_lane_f16<const N: i32>(a: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(N, 2);
     unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
@@ -9377,6 +9435,7 @@ pub fn vdup_lane_f16<const N: i32>(a: float16x4_t) -> float16x4_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vdupq_lane_f16<const N: i32>(a: float16x4_t) -> float16x8_t {
     static_assert_uimm_bits!(N, 2);
     unsafe {
@@ -9922,6 +9981,7 @@ pub fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vdup_laneq_f16<const N: i32>(a: float16x8_t) -> float16x4_t {
     static_assert_uimm_bits!(N, 3);
     unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
@@ -9938,6 +9998,7 @@ pub fn vdup_laneq_f16<const N: i32>(a: float16x8_t) -> float16x4_t {
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vdupq_laneq_f16<const N: i32>(a: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
@@ -10482,6 +10543,7 @@ pub fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vdup_n_f16(a: f16) -> float16x4_t {
     float16x4_t::splat(a)
 }
@@ -10496,6 +10558,7 @@ pub fn vdup_n_f16(a: f16) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vdupq_n_f16(a: f16) -> float16x8_t {
     float16x8_t::splat(a)
 }
@@ -11443,6 +11506,7 @@ pub fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vext_f16<const N: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(N, 2);
     unsafe {
@@ -11814,6 +11878,7 @@ pub fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vextq_f16<const N: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
@@ -12394,6 +12459,7 @@ pub fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     unsafe { simd_fma(b, c, a) }
 }
@@ -12408,6 +12474,7 @@ pub fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe { simd_fma(b, c, a) }
 }
@@ -12507,6 +12574,7 @@ pub fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     unsafe {
         let b: float16x4_t = simd_neg(b);
@@ -12525,6 +12593,7 @@ pub fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe {
         let b: float16x8_t = simd_neg(b);
@@ -12627,6 +12696,7 @@ pub fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vget_high_f16(a: float16x8_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) }
@@ -12637,6 +12707,7 @@ pub fn vget_high_f16(a: float16x8_t) -> float16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vget_low_f16(a: float16x8_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
@@ -12884,6 +12955,7 @@ pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t {
 )]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vget_lane_f16<const LANE: i32>(a: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
     unsafe { simd_extract!(a, LANE as u32) }
@@ -12900,6 +12972,7 @@ pub fn vget_lane_f16<const LANE: i32>(a: float16x4_t) -> f16 {
 )]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vgetq_lane_f16<const LANE: i32>(a: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
     unsafe { simd_extract!(a, LANE as u32) }
@@ -14256,6 +14329,7 @@ pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
     let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0)));
     simd_shuffle!(x, x, [0, 0, 0, 0])
@@ -14273,6 +14347,7 @@ pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t {
     let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0)));
     simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
@@ -14843,6 +14918,7 @@ pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t {
 #[target_feature(enable = "neon,v7")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
     transmute(vld1_v4f16(
@@ -14860,6 +14936,7 @@ pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
 #[target_feature(enable = "neon,v7")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
     let ret_val: float16x4_t = transmute(vld1_v4f16(
@@ -14878,6 +14955,7 @@ pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
 #[target_feature(enable = "neon,v7")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
     transmute(vld1q_v8f16(
@@ -14895,6 +14973,7 @@ pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
 #[target_feature(enable = "neon,v7")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
     let ret_val: float16x8_t = transmute(vld1q_v8f16(
@@ -14916,6 +14995,7 @@ pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -14940,6 +15020,7 @@ pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -14964,6 +15045,7 @@ pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -14988,6 +15070,7 @@ pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -15012,6 +15095,7 @@ pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -15036,6 +15120,7 @@ pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -15732,6 +15817,7 @@ pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
     simd_insert!(src, LANE as u32, *ptr)
@@ -15750,6 +15836,7 @@ pub unsafe fn vld1_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x4_t)
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
     simd_insert!(src, LANE as u32, *ptr)
@@ -19490,6 +19577,7 @@ unsafe fn vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")]
@@ -19507,6 +19595,7 @@ unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")]
@@ -19548,6 +19637,7 @@ pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t {
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0")]
@@ -19565,6 +19655,7 @@ pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0")]
@@ -19584,6 +19675,7 @@ pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -19606,6 +19698,7 @@ pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -20521,6 +20614,7 @@ pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0")]
@@ -20538,6 +20632,7 @@ pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0")]
@@ -20557,6 +20652,7 @@ pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -20579,6 +20675,7 @@ pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -20880,6 +20977,7 @@ pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) -> float16x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -20905,6 +21003,7 @@ pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) ->
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x2_t) -> float16x8x2_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
@@ -20932,6 +21031,7 @@ pub unsafe fn vld2q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x2_t) -
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) -> float16x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -20957,6 +21057,7 @@ pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) ->
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld2q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x2_t) -> float16x8x2_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
@@ -22109,6 +22210,7 @@ pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0")]
@@ -22126,6 +22228,7 @@ pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0")]
@@ -22145,6 +22248,7 @@ pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -22167,6 +22271,7 @@ pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -23104,6 +23209,7 @@ pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0")]
@@ -23121,6 +23227,7 @@ pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0")]
@@ -23140,6 +23247,7 @@ pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -23162,6 +23270,7 @@ pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -23463,6 +23572,7 @@ pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) -> float16x4x3_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -23489,6 +23599,7 @@ pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) ->
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -> float16x8x3_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
@@ -23517,6 +23628,7 @@ pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) -> float16x4x3_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -23547,6 +23659,7 @@ pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) ->
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -> float16x8x3_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
@@ -24775,6 +24888,7 @@ pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0")]
@@ -24792,6 +24906,7 @@ pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0")]
@@ -24811,6 +24926,7 @@ pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -24833,6 +24949,7 @@ pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -25792,6 +25909,7 @@ pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0")]
@@ -25809,6 +25927,7 @@ pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0")]
@@ -25828,6 +25947,7 @@ pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -25850,6 +25970,7 @@ pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -26151,6 +26272,7 @@ pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) -> float16x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -26178,6 +26300,7 @@ pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) ->
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x4_t) -> float16x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
@@ -26207,6 +26330,7 @@ pub unsafe fn vld4q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x4_t) -
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) -> float16x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -26238,6 +26362,7 @@ pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) ->
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld4q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x4_t) -> float16x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
@@ -27527,6 +27652,7 @@ pub unsafe fn vldrq_p128(a: *const p128) -> p128 {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")]
@@ -27549,6 +27675,7 @@ pub fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")]
@@ -27917,6 +28044,7 @@ pub fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_fmax(a, b) }
 }
@@ -27931,6 +28059,7 @@ pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_fmax(a, b) }
 }
@@ -27987,6 +28116,7 @@ pub fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")]
@@ -28009,6 +28139,7 @@ pub fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")]
@@ -28377,6 +28508,7 @@ pub fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_fmin(a, b) }
 }
@@ -28391,6 +28523,7 @@ pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_fmin(a, b) }
 }
@@ -31573,6 +31706,7 @@ pub fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmov_n_f16(a: f16) -> float16x4_t {
     vdup_n_f16(a)
 }
@@ -31587,6 +31721,7 @@ pub fn vmov_n_f16(a: f16) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmovq_n_f16(a: f16) -> float16x8_t {
     vdupq_n_f16(a)
 }
@@ -32315,6 +32450,7 @@ pub fn vmovn_u64(a: uint64x2_t) -> uint32x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_mul(a, b) }
 }
@@ -32329,6 +32465,7 @@ pub fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_mul(a, b) }
 }
@@ -32386,6 +32523,7 @@ pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmul_lane_f16<const LANE: i32>(a: float16x4_t, v: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
@@ -32407,6 +32545,7 @@ pub fn vmul_lane_f16<const LANE: i32>(a: float16x4_t, v: float16x4_t) -> float16
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulq_lane_f16<const LANE: i32>(a: float16x8_t, v: float16x4_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
@@ -33022,6 +33161,7 @@ pub fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t {
     unsafe { simd_mul(a, vdup_n_f16(b)) }
 }
@@ -33036,6 +33176,7 @@ pub fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t {
     unsafe { simd_mul(a, vdupq_n_f16(b)) }
 }
@@ -34369,6 +34510,7 @@ pub fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vneg_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_neg(a) }
 }
@@ -34383,6 +34525,7 @@ pub fn vneg_f16(a: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vnegq_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_neg(a) }
 }
@@ -35613,6 +35756,7 @@ pub fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")]
@@ -36834,15 +36978,7 @@ pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")]
-        fn _vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vqadd_s8(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"]
@@ -36863,15 +36999,7 @@ pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")]
-        fn _vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    unsafe { _vqaddq_s8(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"]
@@ -36892,15 +37020,7 @@ pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")]
-        fn _vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vqadd_s16(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"]
@@ -36921,15 +37041,7 @@ pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")]
-        fn _vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    unsafe { _vqaddq_s16(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"]
@@ -36950,15 +37062,7 @@ pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")]
-        fn _vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vqadd_s32(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"]
@@ -36979,15 +37083,7 @@ pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")]
-        fn _vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    unsafe { _vqaddq_s32(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"]
@@ -37008,15 +37104,7 @@ pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v1i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")]
-        fn _vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-    unsafe { _vqadd_s64(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"]
@@ -37037,15 +37125,7 @@ pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v2i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")]
-        fn _vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-    unsafe { _vqaddq_s64(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"]
@@ -37066,15 +37146,7 @@ pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")]
-        fn _vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqadd_u8(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"]
@@ -37095,15 +37167,7 @@ pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")]
-        fn _vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-    unsafe { _vqaddq_u8(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"]
@@ -37124,15 +37188,7 @@ pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")]
-        fn _vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqadd_u16(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"]
@@ -37153,15 +37209,7 @@ pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")]
-        fn _vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-    unsafe { _vqaddq_u16(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"]
@@ -37182,15 +37230,7 @@ pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")]
-        fn _vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqadd_u32(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"]
@@ -37211,15 +37251,7 @@ pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")]
-        fn _vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vqaddq_u32(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"]
@@ -37240,15 +37272,7 @@ pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v1i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")]
-        fn _vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
-    }
-    unsafe { _vqadd_u64(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"]
@@ -37269,15 +37293,7 @@ pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v2i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")]
-        fn _vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
-    }
-    unsafe { _vqaddq_u64(a, b) }
+    unsafe { simd_saturating_add(a, b) }
 }
 #[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"]
@@ -41115,15 +41131,7 @@ pub fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i8")]
-        fn _vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vqsub_s8(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"]
@@ -41144,15 +41152,7 @@ pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v16i8")]
-        fn _vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    unsafe { _vqsubq_s8(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"]
@@ -41173,15 +41173,7 @@ pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i16")]
-        fn _vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vqsub_s16(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"]
@@ -41202,15 +41194,7 @@ pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i16")]
-        fn _vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    unsafe { _vqsubq_s16(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"]
@@ -41231,15 +41215,7 @@ pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i32")]
-        fn _vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vqsub_s32(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"]
@@ -41260,15 +41236,7 @@ pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i32")]
-        fn _vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    unsafe { _vqsubq_s32(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"]
@@ -41289,15 +41257,7 @@ pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v1i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v1i64")]
-        fn _vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-    unsafe { _vqsub_s64(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"]
@@ -41318,15 +41278,7 @@ pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v2i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i64")]
-        fn _vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-    unsafe { _vqsubq_s64(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"]
@@ -41347,15 +41299,7 @@ pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i8")]
-        fn _vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqsub_u8(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"]
@@ -41376,15 +41320,7 @@ pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v16i8")]
-        fn _vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-    unsafe { _vqsubq_u8(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"]
@@ -41405,15 +41341,7 @@ pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i16")]
-        fn _vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqsub_u16(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"]
@@ -41434,15 +41362,7 @@ pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i16")]
-        fn _vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-    unsafe { _vqsubq_u16(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"]
@@ -41463,15 +41383,7 @@ pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i32")]
-        fn _vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqsub_u32(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"]
@@ -41492,15 +41404,7 @@ pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i32")]
-        fn _vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vqsubq_u32(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"]
@@ -41521,15 +41425,7 @@ pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v1i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v1i64")]
-        fn _vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
-    }
-    unsafe { _vqsub_u64(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"]
@@ -41550,15 +41446,7 @@ pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v2i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i64")]
-        fn _vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
-    }
-    unsafe { _vqsubq_u64(a, b) }
+    unsafe { simd_saturating_sub(a, b) }
 }
 #[doc = "Rounding Add returning High Narrow (high half)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s16)"]
@@ -41943,6 +41831,7 @@ pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrecpe_f16(a: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")]
@@ -41965,6 +41854,7 @@ pub fn vrecpe_f16(a: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")]
@@ -42103,6 +41993,7 @@ pub fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")]
@@ -42125,6 +42016,7 @@ pub fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")]
@@ -42197,7 +42089,6 @@ pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42206,13 +42097,13 @@ pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42221,17 +42112,13 @@ pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42240,13 +42127,13 @@ pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42255,17 +42142,13 @@ pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42274,13 +42157,13 @@ pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42289,17 +42172,13 @@ pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42308,13 +42187,13 @@ pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42323,17 +42202,13 @@ pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42342,13 +42217,13 @@ pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42357,14 +42232,13 @@ pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42373,13 +42247,13 @@ pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42388,17 +42262,13 @@ pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42407,13 +42277,13 @@ pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42422,17 +42292,13 @@ pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42441,13 +42307,13 @@ pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42456,17 +42322,13 @@ pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42475,13 +42337,13 @@ pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42490,14 +42352,13 @@ pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42506,13 +42367,13 @@ pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42521,17 +42382,13 @@ pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42540,13 +42397,13 @@ pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42555,17 +42412,13 @@ pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42574,13 +42427,13 @@ pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42589,17 +42442,13 @@ pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42608,13 +42457,13 @@ pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42623,21 +42472,13 @@ pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42646,13 +42487,13 @@ pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42661,17 +42502,13 @@ pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42680,13 +42517,13 @@ pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42695,17 +42532,13 @@ pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42714,13 +42547,13 @@ pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42729,17 +42562,13 @@ pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42748,13 +42577,13 @@ pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42763,21 +42592,13 @@ pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42786,13 +42607,13 @@ pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42801,17 +42622,13 @@ pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42820,13 +42637,13 @@ pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42835,17 +42652,13 @@ pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42854,13 +42667,13 @@ pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42869,17 +42682,13 @@ pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42888,13 +42697,13 @@ pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42903,21 +42712,13 @@ pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42926,13 +42727,13 @@ pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
@@ -42941,18 +42742,14 @@ pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -42960,14 +42757,14 @@ pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -42975,18 +42772,14 @@ pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -42994,14 +42787,14 @@ pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43009,7338 +42802,43 @@ pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50356,39 +42854,12 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
+pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50404,39 +42875,12 @@ pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
+pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50452,39 +42896,12 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
+pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50500,43 +42917,12 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
+pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50552,39 +42938,12 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
+pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50600,39 +42959,12 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
+pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50648,43 +42980,12 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
+pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50700,13 +43001,12 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
+pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50722,17 +43022,12 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50748,13 +43043,12 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
+pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50770,17 +43064,12 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50796,13 +43085,12 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
+pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50818,17 +43106,12 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50844,13 +43127,12 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
+pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50866,17 +43148,12 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50892,13 +43169,12 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
+pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50914,17 +43190,12 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50940,13 +43211,12 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
+pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50962,14 +43232,12 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -50985,13 +43253,12 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51007,17 +43274,12 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51033,13 +43295,12 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
+pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51055,17 +43316,12 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51081,13 +43337,12 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
+pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51103,14 +43358,12 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51126,13 +43379,12 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
+pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51148,17 +43400,12 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51174,13 +43421,12 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51196,17 +43442,12 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51222,13 +43463,12 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
+pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51244,17 +43484,12 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51270,13 +43505,12 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
+pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51292,21 +43526,12 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51322,13 +43547,12 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
+pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51344,17 +43568,12 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51370,13 +43589,12 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
+pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51392,17 +43610,12 @@ pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51418,13 +43631,12 @@ pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
+pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51440,17 +43652,12 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51466,13 +43673,12 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
+pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51488,21 +43694,12 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51518,13 +43715,12 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
+pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51540,17 +43736,12 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51566,13 +43757,12 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
+pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51588,17 +43778,12 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51614,13 +43799,12 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
+pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51636,21 +43820,12 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51666,13 +43841,12 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
+pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51688,17 +43862,12 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51714,13 +43883,12 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
+pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51736,16 +43904,12 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51761,13 +43925,12 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
+pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51783,16 +43946,12 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51808,13 +43967,12 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
+pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51830,16 +43988,12 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51855,13 +44009,12 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
+pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51877,14 +44030,11 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -51901,13 +44051,12 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
+pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51923,13 +44072,12 @@ pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51945,16 +44093,12 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51970,13 +44114,12 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
+pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51992,16 +44135,12 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52017,13 +44156,12 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
+pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52039,16 +44177,12 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52064,13 +44198,12 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
+pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52086,16 +44219,12 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52111,13 +44240,12 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52133,16 +44261,12 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52158,13 +44282,12 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
+pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52180,17 +44303,12 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52206,13 +44324,12 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
+pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52228,21 +44345,12 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52258,13 +44366,12 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
+pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52280,17 +44387,12 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52306,13 +44408,12 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
+pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52328,17 +44429,12 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52354,13 +44450,12 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
+pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52376,17 +44471,12 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52402,13 +44492,12 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
+pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52424,21 +44513,12 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52454,13 +44534,12 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
+pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52476,17 +44555,12 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52502,13 +44576,12 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
+pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52524,17 +44597,12 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52550,13 +44618,12 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
+pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52572,21 +44639,12 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52602,13 +44660,12 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
+pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52624,17 +44681,12 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52650,13 +44702,12 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
+pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52672,17 +44723,12 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52698,13 +44744,12 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
+pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52720,17 +44765,12 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52746,13 +44786,12 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
+pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52768,17 +44807,12 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52794,13 +44828,12 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
+pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52816,17 +44849,12 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52842,13 +44870,12 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
+pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52864,14 +44891,12 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52887,13 +44912,12 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
+pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52909,17 +44933,12 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52935,13 +44954,12 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
+pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52957,17 +44975,12 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -52983,13 +44996,12 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
+pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53005,17 +45017,12 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53031,13 +45038,12 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
+pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53053,14 +45059,12 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53076,13 +45080,12 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
+pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53098,17 +45101,12 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53124,13 +45122,12 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
+pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53146,18 +45143,12 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53173,13 +45164,12 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
+pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53195,22 +45185,12 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53226,13 +45206,12 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
+pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53248,18 +45227,12 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53275,13 +45248,12 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
+pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53297,18 +45269,12 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53324,13 +45290,12 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
+pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53346,18 +45311,12 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53373,13 +45332,12 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
+pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53395,22 +45353,12 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53426,13 +45374,12 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
+pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53448,18 +45395,12 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53475,13 +45416,12 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
+pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53497,18 +45437,12 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53524,13 +45458,12 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
+pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53546,18 +45479,12 @@ pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53573,13 +45500,12 @@ pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
+pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53595,18 +45521,12 @@ pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53622,13 +45542,12 @@ pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53644,17 +45563,12 @@ pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53670,13 +45584,12 @@ pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
+pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53692,17 +45605,12 @@ pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53718,13 +45626,12 @@ pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
+pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53740,17 +45647,12 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53766,13 +45668,12 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
+pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53788,17 +45689,12 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53814,13 +45710,12 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
+pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53836,14 +45731,12 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53859,13 +45752,12 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
+pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53881,17 +45773,12 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53907,13 +45794,12 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
+pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53929,17 +45815,12 @@ pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53955,13 +45836,12 @@ pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
+pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -53977,17 +45857,12 @@ pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54003,13 +45878,12 @@ pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
+pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54025,14 +45899,12 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54048,13 +45920,12 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
+pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54070,17 +45941,12 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54096,13 +45962,12 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
+pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54118,17 +45983,12 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54144,13 +46004,12 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
+pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54166,21 +46025,12 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54196,13 +46046,12 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
+pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54218,17 +46067,12 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54244,13 +46088,12 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
+pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54266,17 +46109,12 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54292,13 +46130,12 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
+pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54314,17 +46151,12 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54340,13 +46172,12 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
+pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54362,21 +46193,12 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54392,13 +46214,12 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
+pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54414,17 +46235,12 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54440,13 +46256,12 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
+pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54462,17 +46277,12 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54488,13 +46298,12 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
+pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54510,17 +46319,12 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54536,13 +46340,12 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
+pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54558,23 +46361,14 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54588,15 +46382,14 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
+pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54610,22 +46403,14 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54639,15 +46424,14 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54661,18 +46445,14 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54686,15 +46466,14 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
+pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54708,18 +46487,14 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54733,15 +46508,14 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54755,18 +46529,14 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54780,15 +46550,14 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
+pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54802,22 +46571,14 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54831,15 +46592,14 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54853,18 +46613,14 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54878,15 +46634,14 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
+pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54900,18 +46655,14 @@ pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54925,15 +46676,14 @@ pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54947,18 +46697,14 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54972,15 +46718,14 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
+pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54994,22 +46739,14 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55023,15 +46760,14 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55045,18 +46781,14 @@ pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55070,15 +46802,14 @@ pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
+pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55092,18 +46823,14 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55117,15 +46844,14 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
+pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55139,16 +46865,14 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55162,15 +46886,14 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
+pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55184,17 +46907,14 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55208,15 +46928,14 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
+pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55230,20 +46949,14 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55257,15 +46970,14 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
+pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55279,16 +46991,14 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55302,15 +47012,14 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
+pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55324,16 +47033,14 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55347,15 +47054,14 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55369,19 +47075,14 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55395,15 +47096,14 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
+pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55417,16 +47117,14 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55440,15 +47138,14 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
+pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55462,16 +47159,14 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55485,15 +47180,14 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
+pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55507,19 +47201,14 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55533,15 +47222,14 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
+pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55555,16 +47243,14 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55578,15 +47264,14 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
+pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55600,16 +47285,14 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55623,15 +47306,14 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
+pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55645,17 +47327,14 @@ pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55669,15 +47348,14 @@ pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
+pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55691,20 +47369,14 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55718,15 +47390,14 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
+pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55740,16 +47411,14 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55763,15 +47432,14 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
+pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55785,16 +47453,14 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55808,15 +47474,14 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
+pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55830,17 +47495,12 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -55856,13 +47516,12 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
+pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -55878,14 +47537,12 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -55901,13 +47558,12 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
+pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -55923,14 +47579,12 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -55946,13 +47600,12 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
+pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -55968,17 +47621,12 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -55994,13 +47642,12 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
+pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56016,14 +47663,12 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56039,13 +47684,12 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
+pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56061,14 +47705,12 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56084,13 +47726,12 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
+pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56106,15 +47747,12 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56130,13 +47768,12 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56152,18 +47789,12 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56179,13 +47810,12 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
+pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56201,14 +47831,12 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56224,13 +47852,12 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
+pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56246,14 +47873,12 @@ pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56269,13 +47894,12 @@ pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
+pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56291,17 +47915,12 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56317,13 +47936,12 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
+pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56339,16 +47957,12 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56364,13 +47978,12 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
+pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56386,16 +47999,12 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56411,13 +48020,12 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
+pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56433,16 +48041,12 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56458,13 +48062,12 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
+pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56480,16 +48083,12 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56505,13 +48104,12 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
+pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56527,16 +48125,12 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56552,13 +48146,12 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
+pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56574,16 +48167,12 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56599,13 +48188,12 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
+pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56621,16 +48209,12 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56646,13 +48230,12 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
+pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56668,16 +48251,12 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56693,13 +48272,12 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
+pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56715,14 +48293,12 @@ pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56738,13 +48314,12 @@ pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
+pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56760,21 +48335,12 @@ pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56790,13 +48356,12 @@ pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
+pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56812,17 +48377,12 @@ pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56838,13 +48398,12 @@ pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
+pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56860,17 +48419,12 @@ pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56886,13 +48440,12 @@ pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
+pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56908,21 +48461,12 @@ pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56938,13 +48482,12 @@ pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
+pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56960,17 +48503,12 @@ pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -56986,13 +48524,12 @@ pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
+pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -57008,17 +48545,12 @@ pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -57034,13 +48566,12 @@ pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
+pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -57056,21 +48587,12 @@ pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -57086,13 +48608,12 @@ pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
+pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -57109,11 +48630,7 @@ pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+    unsafe { transmute(a) }
 }
 #[doc = "Reversing vector elements (swap endianness)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"]
@@ -57882,6 +49399,7 @@ pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrev64_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }
 }
@@ -57896,6 +49414,7 @@ pub fn vrev64_f16(a: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
 }
@@ -58258,13 +49777,13 @@ pub fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrndn_f16(a: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
             link_name = "llvm.roundeven.v4f16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f16")]
         fn _vrndn_f16(a: float16x4_t) -> float16x4_t;
     }
     unsafe { _vrndn_f16(a) }
@@ -58280,13 +49799,13 @@ pub fn vrndn_f16(a: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
             link_name = "llvm.roundeven.v8f16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v8f16")]
         fn _vrndnq_f16(a: float16x8_t) -> float16x8_t;
     }
     unsafe { _vrndnq_f16(a) }
@@ -58312,10 +49831,9 @@ pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t {
 pub fn vrndn_f32(a: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
             link_name = "llvm.roundeven.v2f32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
         fn _vrndn_f32(a: float32x2_t) -> float32x2_t;
     }
     unsafe { _vrndn_f32(a) }
@@ -58341,10 +49859,9 @@ pub fn vrndn_f32(a: float32x2_t) -> float32x2_t {
 pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
             link_name = "llvm.roundeven.v4f32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
         fn _vrndnq_f32(a: float32x4_t) -> float32x4_t;
     }
     unsafe { _vrndnq_f32(a) }
@@ -59366,6 +50883,7 @@ pub fn vrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     assert_instr(frsqrte)
 )]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrsqrte_f16(a: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f16")]
@@ -59388,6 +50906,7 @@ pub fn vrsqrte_f16(a: float16x4_t) -> float16x4_t {
     assert_instr(frsqrte)
 )]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v8f16")]
@@ -59526,6 +51045,7 @@ pub fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t {
     assert_instr(frsqrts)
 )]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f16")]
@@ -59548,6 +51068,7 @@ pub fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     assert_instr(frsqrts)
 )]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v8f16")]
@@ -60231,6 +51752,7 @@ pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vset_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe { simd_insert!(b, LANE as u32, a) }
@@ -60247,6 +51769,7 @@ pub fn vset_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> float16x4_t {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vsetq_lane_f16<const LANE: i32>(a: f16, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe { simd_insert!(b, LANE as u32, a) }
@@ -63699,6 +55222,7 @@ pub fn vsriq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) {
     vst1_v4f16(
@@ -63716,6 +55240,7 @@ pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) {
     vst1q_v8f16(
@@ -63734,6 +55259,7 @@ pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v4f16")]
@@ -63751,6 +55277,7 @@ pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v8f16")]
@@ -63767,6 +55294,7 @@ pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -63786,6 +55314,7 @@ pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -63806,6 +55335,7 @@ pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4f16")]
@@ -63823,6 +55353,7 @@ pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8f16")]
@@ -63839,6 +55370,7 @@ pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -63858,6 +55390,7 @@ pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -63877,6 +55410,7 @@ pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) {
     unsafe extern "unadjusted" {
@@ -63900,6 +55434,7 @@ pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) {
     unsafe extern "unadjusted" {
@@ -63923,6 +55458,7 @@ pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -63948,6 +55484,7 @@ pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -64556,6 +56093,7 @@ pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4_t) {
     static_assert_uimm_bits!(LANE, 2);
     *a = simd_extract!(b, LANE as u32);
@@ -64574,6 +56112,7 @@ pub unsafe fn vst1_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4_t) {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8_t) {
     static_assert_uimm_bits!(LANE, 3);
     *a = simd_extract!(b, LANE as u32);
@@ -67138,6 +58677,7 @@ unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32) {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) {
     unsafe extern "unadjusted" {
@@ -67155,6 +58695,7 @@ unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) {
     unsafe extern "unadjusted" {
@@ -67196,6 +58737,7 @@ pub unsafe fn vst1q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2_t) {
 #[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) {
     unsafe extern "unadjusted" {
@@ -67215,6 +58757,7 @@ pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) {
 #[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) {
     unsafe extern "unadjusted" {
@@ -67235,6 +58778,7 @@ pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) {
     unsafe extern "unadjusted" {
@@ -67252,6 +58796,7 @@ pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) {
     unsafe extern "unadjusted" {
@@ -67550,6 +59095,7 @@ pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -67571,6 +59117,7 @@ pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst2q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x2_t) {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
@@ -67593,6 +59140,7 @@ pub unsafe fn vst2q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x2_t) {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -67612,6 +59160,7 @@ pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst2q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x2_t) {
     static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
@@ -68413,6 +59962,7 @@ pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) {
     unsafe extern "unadjusted" {
@@ -68430,6 +59980,7 @@ pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) {
     unsafe extern "unadjusted" {
@@ -68446,6 +59997,7 @@ pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) {
 #[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) {
     unsafe extern "unadjusted" {
@@ -68465,6 +60017,7 @@ pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) {
 #[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) {
     unsafe extern "unadjusted" {
@@ -68767,6 +60320,7 @@ pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -68793,6 +60347,7 @@ pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst3q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x3_t) {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
@@ -68818,6 +60373,7 @@ pub unsafe fn vst3q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x3_t) {
 #[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -68839,6 +60395,7 @@ pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
 #[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst3q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x3_t) {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
@@ -69685,6 +61242,7 @@ pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst4))]
 pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) {
     unsafe extern "unadjusted" {
@@ -69709,6 +61267,7 @@ pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst4))]
 pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) {
     unsafe extern "unadjusted" {
@@ -69732,6 +61291,7 @@ pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) {
 #[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) {
     unsafe extern "unadjusted" {
@@ -69751,6 +61311,7 @@ pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) {
 #[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) {
     unsafe extern "unadjusted" {
@@ -70102,6 +61663,7 @@ pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -70129,6 +61691,7 @@ pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst4q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x4_t) {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
@@ -70155,6 +61718,7 @@ pub unsafe fn vst4q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x4_t) {
 #[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
@@ -70183,6 +61747,7 @@ pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
 #[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst4q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x4_t) {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
@@ -71124,6 +62689,7 @@ pub unsafe fn vstrq_p128(a: *mut p128, b: p128) {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_sub(a, b) }
 }
@@ -71138,6 +62704,7 @@ pub fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_sub(a, b) }
 }
@@ -73002,6 +64569,7 @@ pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     unsafe {
         let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
@@ -73024,6 +64592,7 @@ pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     unsafe {
         let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
@@ -74134,6 +65703,7 @@ pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     unsafe {
         let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
@@ -74156,6 +65726,7 @@ pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     unsafe {
         let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
@@ -74724,6 +66295,7 @@ pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     unsafe {
         let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
@@ -74746,6 +66318,7 @@ pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
 pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     unsafe {
         let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index 60c9daef68c42..fbd1967c544ad 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -5503,8 +5503,12 @@ mod tests {
     test_vcombine!(test_vcombine_s16 => vcombine_s16([3_i16, -4, 5, -6], [13_i16, -14, 15, -16]));
     test_vcombine!(test_vcombine_u16 => vcombine_u16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16]));
     test_vcombine!(test_vcombine_p16 => vcombine_p16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16]));
-    test_vcombine!(test_vcombine_f16 => vcombine_f16([3_f16, 4., 5., 6.],
-    [13_f16, 14., 15., 16.]));
+    #[cfg(not(target_arch = "arm64ec"))]
+    mod fp16 {
+        use super::*;
+        test_vcombine!(test_vcombine_f16 => vcombine_f16([3_f16, 4., 5., 6.],
+        [13_f16, 14., 15., 16.]));
+    }
 
     test_vcombine!(test_vcombine_s32 => vcombine_s32([3_i32, -4], [13_i32, -14]));
     test_vcombine!(test_vcombine_u32 => vcombine_u32([3_u32, 4], [13_u32, 14]));
diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index 7d3cbd55ad85c..26a9cb5899183 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -33,7 +33,8 @@
     x86_amx_intrinsics,
     f16,
     aarch64_unstable_target_feature,
-    bigint_helper_methods
+    bigint_helper_methods,
+    funnel_shifts
 )]
 #![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
 #![deny(clippy::missing_inline_in_public_items)]
diff --git a/library/stdarch/crates/core_arch/src/loongarch32/mod.rs b/library/stdarch/crates/core_arch/src/loongarch32/mod.rs
index fb05450373c28..4e3f3d27182e4 100644
--- a/library/stdarch/crates/core_arch/src/loongarch32/mod.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch32/mod.rs
@@ -17,9 +17,10 @@ unsafe extern "unadjusted" {
 /// Generates the cache operation instruction
 #[inline]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn cacop<const IMM12: i32>(a: i32, b: i32) {
-    static_assert_simm_bits!(IMM12, 12);
-    __cacop(a, b, IMM12);
+pub unsafe fn cacop<const IMM5: i32, const IMM_S12: i32>(b: i32) {
+    static_assert_uimm_bits!(IMM5, 5);
+    static_assert_simm_bits!(IMM_S12, 12);
+    __cacop(IMM5, b, IMM_S12);
 }
 
 /// Reads the CSR
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/mod.rs b/library/stdarch/crates/core_arch/src/loongarch64/mod.rs
index e8249805eae26..ab968aff20bbe 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/mod.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/mod.rs
@@ -64,9 +64,10 @@ pub fn crcc_w_d_w(a: i64, b: i32) -> i32 {
 /// Generates the cache operation instruction
 #[inline]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn cacop<const IMM12: i64>(a: i64, b: i64) {
-    static_assert_simm_bits!(IMM12, 12);
-    __cacop(a, b, IMM12);
+pub unsafe fn cacop<const IMM5: i64, const IMM_S12: i64>(b: i64) {
+    static_assert_uimm_bits!(IMM5, 5);
+    static_assert_simm_bits!(IMM_S12, 12);
+    __cacop(IMM5, b, IMM_S12);
 }
 
 /// Reads the CSR
@@ -125,14 +126,16 @@ pub unsafe fn asrtgt(a: i64, b: i64) {
 #[inline]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn lddir<const B: i64>(a: i64) -> i64 {
-    __lddir(a, B)
+pub unsafe fn lddir<const IMM8: i64>(a: i64) -> i64 {
+    static_assert_uimm_bits!(IMM8, 8);
+    __lddir(a, IMM8)
 }
 
 /// Loads the page table entry
 #[inline]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn ldpte<const B: i64>(a: i64) {
-    __ldpte(a, B)
+pub unsafe fn ldpte<const IMM8: i64>(a: i64) {
+    static_assert_uimm_bits!(IMM8, 8);
+    __ldpte(a, IMM8)
 }
diff --git a/library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs b/library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs
index 710b926f8df4f..8991fe857682b 100644
--- a/library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs
@@ -131,17 +131,17 @@ pub fn ibar<const IMM15: i32>() {
 /// Moves data from a GPR to the FCSR
 #[inline]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn movgr2fcsr<const IMM5: i32>(a: i32) {
-    static_assert_uimm_bits!(IMM5, 5);
-    __movgr2fcsr(IMM5, a);
+pub unsafe fn movgr2fcsr<const IMM2: i32>(a: i32) {
+    static_assert_uimm_bits!(IMM2, 2);
+    __movgr2fcsr(IMM2, a);
 }
 
 /// Moves data from a FCSR to the GPR
 #[inline]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn movfcsr2gr<const IMM5: i32>() -> i32 {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { __movfcsr2gr(IMM5) }
+pub fn movfcsr2gr<const IMM2: i32>() -> i32 {
+    static_assert_uimm_bits!(IMM2, 2);
+    unsafe { __movfcsr2gr(IMM2) }
 }
 
 /// Reads the 8-bit IO-CSR
diff --git a/library/stdarch/crates/core_arch/src/riscv64/mod.rs b/library/stdarch/crates/core_arch/src/riscv64/mod.rs
index 0e860f6f2ad2f..a7efc0c7f58a1 100644
--- a/library/stdarch/crates/core_arch/src/riscv64/mod.rs
+++ b/library/stdarch/crates/core_arch/src/riscv64/mod.rs
@@ -20,7 +20,12 @@ pub use zk::*;
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hlv_wu(src: *const u32) -> u32 {
     let value: u32;
-    asm!(".insn i 0x73, 0x4, {}, {}, 0x681", out(reg) value, in(reg) src, options(readonly, nostack));
+    asm!(
+        ".insn i 0x73, 0x4, {}, {}, 0x681",
+        lateout(reg) value,
+        in(reg) src,
+        options(readonly, nostack, preserves_flags)
+    );
     value
 }
 
@@ -38,7 +43,12 @@ pub unsafe fn hlv_wu(src: *const u32) -> u32 {
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hlv_d(src: *const i64) -> i64 {
     let value: i64;
-    asm!(".insn i 0x73, 0x4, {}, {}, 0x6C0", out(reg) value, in(reg) src, options(readonly, nostack));
+    asm!(
+        ".insn i 0x73, 0x4, {}, {}, 0x6C0",
+        lateout(reg) value,
+        in(reg) src,
+        options(readonly, nostack, preserves_flags)
+    );
     value
 }
 
@@ -53,5 +63,10 @@ pub unsafe fn hlv_d(src: *const i64) -> i64 {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hsv_d(dst: *mut i64, src: i64) {
-    asm!(".insn r 0x73, 0x4, 0x37, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
+    asm!(
+        ".insn r 0x73, 0x4, 0x37, x0, {}, {}",
+        in(reg) dst,
+        in(reg) src,
+        options(nostack, preserves_flags)
+    );
 }
diff --git a/library/stdarch/crates/core_arch/src/riscv64/zk.rs b/library/stdarch/crates/core_arch/src/riscv64/zk.rs
index c6af750bbc570..a30653cbe0885 100644
--- a/library/stdarch/crates/core_arch/src/riscv64/zk.rs
+++ b/library/stdarch/crates/core_arch/src/riscv64/zk.rs
@@ -176,7 +176,7 @@ pub fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
 /// Version: v1.0.1
 ///
 /// Section: 3.9
-#[target_feature(enable = "zkne", enable = "zknd")]
+#[target_feature(enable = "zknd")]
 #[cfg_attr(test, assert_instr(aes64im))]
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs b/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs
index 3ce24324de2e7..1bd147a64808c 100644
--- a/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs
+++ b/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs
@@ -44,7 +44,12 @@ use crate::arch::asm;
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub fn pause() {
-    unsafe { asm!(".insn i 0x0F, 0, x0, x0, 0x010", options(nomem, nostack)) }
+    unsafe {
+        asm!(
+            ".insn i 0x0F, 0, x0, x0, 0x010",
+            options(nomem, nostack, preserves_flags)
+        );
+    }
 }
 
 /// Generates the `NOP` instruction
@@ -54,7 +59,9 @@ pub fn pause() {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub fn nop() {
-    unsafe { asm!("nop", options(nomem, nostack)) }
+    unsafe {
+        asm!("nop", options(nomem, nostack, preserves_flags));
+    }
 }
 
 /// Generates the `WFI` instruction
@@ -65,7 +72,7 @@ pub fn nop() {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn wfi() {
-    asm!("wfi", options(nomem, nostack))
+    asm!("wfi", options(nomem, nostack, preserves_flags));
 }
 
 /// Generates the `FENCE.I` instruction
@@ -78,7 +85,7 @@ pub unsafe fn wfi() {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn fence_i() {
-    asm!("fence.i", options(nostack))
+    asm!("fence.i", options(nostack, preserves_flags));
 }
 
 /// Supervisor memory management fence for given virtual address and address space
@@ -92,7 +99,7 @@ pub unsafe fn fence_i() {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn sfence_vma(vaddr: usize, asid: usize) {
-    asm!("sfence.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
+    asm!("sfence.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack, preserves_flags));
 }
 
 /// Supervisor memory management fence for given virtual address
@@ -104,7 +111,7 @@ pub unsafe fn sfence_vma(vaddr: usize, asid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn sfence_vma_vaddr(vaddr: usize) {
-    asm!("sfence.vma {}, x0", in(reg) vaddr, options(nostack))
+    asm!("sfence.vma {}, x0", in(reg) vaddr, options(nostack, preserves_flags));
 }
 
 /// Supervisor memory management fence for given address space
@@ -118,7 +125,7 @@ pub unsafe fn sfence_vma_vaddr(vaddr: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn sfence_vma_asid(asid: usize) {
-    asm!("sfence.vma x0, {}", in(reg) asid, options(nostack))
+    asm!("sfence.vma x0, {}", in(reg) asid, options(nostack, preserves_flags));
 }
 
 /// Supervisor memory management fence for all address spaces and virtual addresses
@@ -129,7 +136,7 @@ pub unsafe fn sfence_vma_asid(asid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn sfence_vma_all() {
-    asm!("sfence.vma", options(nostack))
+    asm!("sfence.vma", options(nostack, preserves_flags));
 }
 
 /// Invalidate supervisor translation cache for given virtual address and address space
@@ -139,8 +146,13 @@ pub unsafe fn sfence_vma_all() {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn sinval_vma(vaddr: usize, asid: usize) {
-    // asm!("sinval.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
-    asm!(".insn r 0x73, 0, 0x0B, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
+    // asm!("sinval.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack, preserves_flags));
+    asm!(
+        ".insn r 0x73, 0, 0x0B, x0, {}, {}",
+        in(reg) vaddr,
+        in(reg) asid,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Invalidate supervisor translation cache for given virtual address
@@ -150,7 +162,11 @@ pub unsafe fn sinval_vma(vaddr: usize, asid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn sinval_vma_vaddr(vaddr: usize) {
-    asm!(".insn r 0x73, 0, 0x0B, x0, {}, x0", in(reg) vaddr, options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x0B, x0, {}, x0",
+        in(reg) vaddr,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Invalidate supervisor translation cache for given address space
@@ -160,7 +176,11 @@ pub unsafe fn sinval_vma_vaddr(vaddr: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn sinval_vma_asid(asid: usize) {
-    asm!(".insn r 0x73, 0, 0x0B, x0, x0, {}", in(reg) asid, options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x0B, x0, x0, {}",
+        in(reg) asid,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Invalidate supervisor translation cache for all address spaces and virtual addresses
@@ -170,7 +190,10 @@ pub unsafe fn sinval_vma_asid(asid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn sinval_vma_all() {
-    asm!(".insn r 0x73, 0, 0x0B, x0, x0, x0", options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x0B, x0, x0, x0",
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Generates the `SFENCE.W.INVAL` instruction
@@ -180,8 +203,11 @@ pub unsafe fn sinval_vma_all() {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn sfence_w_inval() {
-    // asm!("sfence.w.inval", options(nostack))
-    asm!(".insn i 0x73, 0, x0, x0, 0x180", options(nostack))
+    // asm!("sfence.w.inval", options(nostack, preserves_flags));
+    asm!(
+        ".insn i 0x73, 0, x0, x0, 0x180",
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Generates the `SFENCE.INVAL.IR` instruction
@@ -191,8 +217,11 @@ pub unsafe fn sfence_w_inval() {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn sfence_inval_ir() {
-    // asm!("sfence.inval.ir", options(nostack))
-    asm!(".insn i 0x73, 0, x0, x0, 0x181", options(nostack))
+    // asm!("sfence.inval.ir", options(nostack, preserves_flags));
+    asm!(
+        ".insn i 0x73, 0, x0, x0, 0x181",
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Loads virtual machine memory by signed byte integer
@@ -207,7 +236,12 @@ pub unsafe fn sfence_inval_ir() {
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hlv_b(src: *const i8) -> i8 {
     let value: i8;
-    asm!(".insn i 0x73, 0x4, {}, {}, 0x600", out(reg) value, in(reg) src, options(readonly, nostack));
+    asm!(
+        ".insn i 0x73, 0x4, {}, {}, 0x600",
+        lateout(reg) value,
+        in(reg) src,
+        options(readonly, nostack, preserves_flags)
+    );
     value
 }
 
@@ -223,7 +257,12 @@ pub unsafe fn hlv_b(src: *const i8) -> i8 {
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hlv_bu(src: *const u8) -> u8 {
     let value: u8;
-    asm!(".insn i 0x73, 0x4, {}, {}, 0x601", out(reg) value, in(reg) src, options(readonly, nostack));
+    asm!(
+        ".insn i 0x73, 0x4, {}, {}, 0x601",
+        lateout(reg) value,
+        in(reg) src,
+        options(readonly, nostack, preserves_flags)
+    );
     value
 }
 
@@ -239,7 +278,12 @@ pub unsafe fn hlv_bu(src: *const u8) -> u8 {
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hlv_h(src: *const i16) -> i16 {
     let value: i16;
-    asm!(".insn i 0x73, 0x4, {}, {}, 0x640", out(reg) value, in(reg) src, options(readonly, nostack));
+    asm!(
+        ".insn i 0x73, 0x4, {}, {}, 0x640",
+        lateout(reg) value,
+        in(reg) src,
+        options(readonly, nostack, preserves_flags)
+    );
     value
 }
 
@@ -255,7 +299,12 @@ pub unsafe fn hlv_h(src: *const i16) -> i16 {
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hlv_hu(src: *const u16) -> u16 {
     let value: u16;
-    asm!(".insn i 0x73, 0x4, {}, {}, 0x641", out(reg) value, in(reg) src, options(readonly, nostack));
+    asm!(
+        ".insn i 0x73, 0x4, {}, {}, 0x641",
+        lateout(reg) value,
+        in(reg) src,
+        options(readonly, nostack, preserves_flags)
+    );
     value
 }
 
@@ -271,7 +320,12 @@ pub unsafe fn hlv_hu(src: *const u16) -> u16 {
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hlvx_hu(src: *const u16) -> u16 {
     let insn: u16;
-    asm!(".insn i 0x73, 0x4, {}, {}, 0x643", out(reg) insn, in(reg) src, options(readonly, nostack));
+    asm!(
+        ".insn i 0x73, 0x4, {}, {}, 0x643",
+        lateout(reg) insn,
+        in(reg) src,
+        options(readonly, nostack, preserves_flags)
+    );
     insn
 }
 
@@ -287,7 +341,12 @@ pub unsafe fn hlvx_hu(src: *const u16) -> u16 {
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hlv_w(src: *const i32) -> i32 {
     let value: i32;
-    asm!(".insn i 0x73, 0x4, {}, {}, 0x680", out(reg) value, in(reg) src, options(readonly, nostack));
+    asm!(
+        ".insn i 0x73, 0x4, {}, {}, 0x680",
+        lateout(reg) value,
+        in(reg) src,
+        options(readonly, nostack, preserves_flags)
+    );
     value
 }
 
@@ -303,7 +362,12 @@ pub unsafe fn hlv_w(src: *const i32) -> i32 {
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hlvx_wu(src: *const u32) -> u32 {
     let insn: u32;
-    asm!(".insn i 0x73, 0x4, {}, {}, 0x683", out(reg) insn, in(reg) src, options(readonly, nostack));
+    asm!(
+        ".insn i 0x73, 0x4, {}, {}, 0x683",
+        lateout(reg) insn,
+        in(reg) src,
+        options(readonly, nostack, preserves_flags)
+    );
     insn
 }
 
@@ -318,7 +382,12 @@ pub unsafe fn hlvx_wu(src: *const u32) -> u32 {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hsv_b(dst: *mut i8, src: i8) {
-    asm!(".insn r 0x73, 0x4, 0x31, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
+    asm!(
+        ".insn r 0x73, 0x4, 0x31, x0, {}, {}",
+        in(reg) dst,
+        in(reg) src,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Stores virtual machine memory by half integer
@@ -332,7 +401,12 @@ pub unsafe fn hsv_b(dst: *mut i8, src: i8) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hsv_h(dst: *mut i16, src: i16) {
-    asm!(".insn r 0x73, 0x4, 0x33, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
+    asm!(
+        ".insn r 0x73, 0x4, 0x33, x0, {}, {}",
+        in(reg) dst,
+        in(reg) src,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Stores virtual machine memory by word integer
@@ -346,7 +420,12 @@ pub unsafe fn hsv_h(dst: *mut i16, src: i16) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hsv_w(dst: *mut i32, src: i32) {
-    asm!(".insn r 0x73, 0x4, 0x35, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
+    asm!(
+        ".insn r 0x73, 0x4, 0x35, x0, {}, {}",
+        in(reg) dst,
+        in(reg) src,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Hypervisor memory management fence for given guest virtual address and guest address space
@@ -360,8 +439,13 @@ pub unsafe fn hsv_w(dst: *mut i32, src: i32) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hfence_vvma(vaddr: usize, asid: usize) {
-    // asm!("hfence.vvma {}, {}", in(reg) vaddr, in(reg) asid)
-    asm!(".insn r 0x73, 0, 0x11, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
+    // asm!("hfence.vvma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack, preserves_flags));
+    asm!(
+        ".insn r 0x73, 0, 0x11, x0, {}, {}",
+        in(reg) vaddr,
+        in(reg) asid,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Hypervisor memory management fence for given guest virtual address
@@ -375,7 +459,11 @@ pub unsafe fn hfence_vvma(vaddr: usize, asid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hfence_vvma_vaddr(vaddr: usize) {
-    asm!(".insn r 0x73, 0, 0x11, x0, {}, x0", in(reg) vaddr, options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x11, x0, {}, x0",
+        in(reg) vaddr,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Hypervisor memory management fence for given guest address space
@@ -389,7 +477,11 @@ pub unsafe fn hfence_vvma_vaddr(vaddr: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hfence_vvma_asid(asid: usize) {
-    asm!(".insn r 0x73, 0, 0x11, x0, x0, {}", in(reg) asid, options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x11, x0, x0, {}",
+        in(reg) asid,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Hypervisor memory management fence for all guest address spaces and guest virtual addresses
@@ -403,7 +495,10 @@ pub unsafe fn hfence_vvma_asid(asid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hfence_vvma_all() {
-    asm!(".insn r 0x73, 0, 0x11, x0, x0, x0", options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x11, x0, x0, x0",
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Hypervisor memory management fence for guest physical address and virtual machine
@@ -416,8 +511,13 @@ pub unsafe fn hfence_vvma_all() {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hfence_gvma(gaddr: usize, vmid: usize) {
-    // asm!("hfence.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
-    asm!(".insn r 0x73, 0, 0x31, x0, {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
+    // asm!("hfence.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack, preserves_flags));
+    asm!(
+        ".insn r 0x73, 0, 0x31, x0, {}, {}",
+        in(reg) gaddr,
+        in(reg) vmid,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Hypervisor memory management fence for guest physical address
@@ -429,7 +529,11 @@ pub unsafe fn hfence_gvma(gaddr: usize, vmid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hfence_gvma_gaddr(gaddr: usize) {
-    asm!(".insn r 0x73, 0, 0x31, x0, {}, x0", in(reg) gaddr, options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x31, x0, {}, x0",
+        in(reg) gaddr,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Hypervisor memory management fence for given virtual machine
@@ -441,7 +545,11 @@ pub unsafe fn hfence_gvma_gaddr(gaddr: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hfence_gvma_vmid(vmid: usize) {
-    asm!(".insn r 0x73, 0, 0x31, x0, x0, {}", in(reg) vmid, options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x31, x0, x0, {}",
+        in(reg) vmid,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Hypervisor memory management fence for all virtual machines and guest physical addresses
@@ -453,7 +561,10 @@ pub unsafe fn hfence_gvma_vmid(vmid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hfence_gvma_all() {
-    asm!(".insn r 0x73, 0, 0x31, x0, x0, x0", options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x31, x0, x0, x0",
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Invalidate hypervisor translation cache for given guest virtual address and guest address space
@@ -465,8 +576,13 @@ pub unsafe fn hfence_gvma_all() {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hinval_vvma(vaddr: usize, asid: usize) {
-    // asm!("hinval.vvma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
-    asm!(".insn r 0x73, 0, 0x13, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
+    // asm!("hinval.vvma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack, preserves_flags));
+    asm!(
+        ".insn r 0x73, 0, 0x13, x0, {}, {}",
+        in(reg) vaddr,
+        in(reg) asid,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Invalidate hypervisor translation cache for given guest virtual address
@@ -478,7 +594,11 @@ pub unsafe fn hinval_vvma(vaddr: usize, asid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hinval_vvma_vaddr(vaddr: usize) {
-    asm!(".insn r 0x73, 0, 0x13, x0, {}, x0", in(reg) vaddr, options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x13, x0, {}, x0",
+        in(reg) vaddr,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Invalidate hypervisor translation cache for given guest address space
@@ -490,7 +610,11 @@ pub unsafe fn hinval_vvma_vaddr(vaddr: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hinval_vvma_asid(asid: usize) {
-    asm!(".insn r 0x73, 0, 0x13, x0, x0, {}", in(reg) asid, options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x13, x0, x0, {}",
+        in(reg) asid,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Invalidate hypervisor translation cache for all guest address spaces and guest virtual addresses
@@ -502,7 +626,10 @@ pub unsafe fn hinval_vvma_asid(asid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hinval_vvma_all() {
-    asm!(".insn r 0x73, 0, 0x13, x0, x0, x0", options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x13, x0, x0, x0",
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Invalidate hypervisor translation cache for guest physical address and virtual machine
@@ -515,8 +642,13 @@ pub unsafe fn hinval_vvma_all() {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hinval_gvma(gaddr: usize, vmid: usize) {
-    // asm!("hinval.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
-    asm!(".insn r 0x73, 0, 0x33, x0, {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
+    // asm!("hinval.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack, preserves_flags));
+    asm!(
+        ".insn r 0x73, 0, 0x33, x0, {}, {}",
+        in(reg) gaddr,
+        in(reg) vmid,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Invalidate hypervisor translation cache for guest physical address
@@ -528,7 +660,11 @@ pub unsafe fn hinval_gvma(gaddr: usize, vmid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hinval_gvma_gaddr(gaddr: usize) {
-    asm!(".insn r 0x73, 0, 0x33, x0, {}, x0", in(reg) gaddr, options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x33, x0, {}, x0",
+        in(reg) gaddr,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Invalidate hypervisor translation cache for given virtual machine
@@ -540,7 +676,11 @@ pub unsafe fn hinval_gvma_gaddr(gaddr: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hinval_gvma_vmid(vmid: usize) {
-    asm!(".insn r 0x73, 0, 0x33, x0, x0, {}", in(reg) vmid, options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x33, x0, x0, {}",
+        in(reg) vmid,
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Invalidate hypervisor translation cache for all virtual machines and guest physical addresses
@@ -552,7 +692,10 @@ pub unsafe fn hinval_gvma_vmid(vmid: usize) {
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub unsafe fn hinval_gvma_all() {
-    asm!(".insn r 0x73, 0, 0x33, x0, x0, x0", options(nostack))
+    asm!(
+        ".insn r 0x73, 0, 0x33, x0, x0, x0",
+        options(nostack, preserves_flags)
+    );
 }
 
 /// Reads the floating-point rounding mode register `frm`
@@ -574,6 +717,12 @@ pub unsafe fn hinval_gvma_all() {
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub fn frrm() -> u32 {
     let value: u32;
-    unsafe { asm!("frrm {}", out(reg) value, options(nomem, nostack)) };
+    unsafe {
+        asm!(
+            "frrm {}",
+            out(reg) value,
+            options(nomem, nostack, preserves_flags)
+        );
+    }
     value
 }
diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/zb.rs b/library/stdarch/crates/core_arch/src/riscv_shared/zb.rs
index 9472e3c8be9f6..514afd9080920 100644
--- a/library/stdarch/crates/core_arch/src/riscv_shared/zb.rs
+++ b/library/stdarch/crates/core_arch/src/riscv_shared/zb.rs
@@ -68,7 +68,7 @@ pub fn orc_b(rs: usize) -> usize {
 ///
 /// Section: 2.11
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
-#[target_feature(enable = "zbc")]
+#[target_feature(enable = "zbkc")]
 #[cfg_attr(test, assert_instr(clmul))]
 #[inline]
 pub fn clmul(rs1: usize, rs2: usize) -> usize {
@@ -93,7 +93,7 @@ pub fn clmul(rs1: usize, rs2: usize) -> usize {
 ///
 /// Section: 2.12
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
-#[target_feature(enable = "zbc")]
+#[target_feature(enable = "zbkc")]
 #[cfg_attr(test, assert_instr(clmulh))]
 #[inline]
 pub fn clmulh(rs1: usize, rs2: usize) -> usize {
diff --git a/library/stdarch/crates/core_arch/src/s390x/vector.rs b/library/stdarch/crates/core_arch/src/s390x/vector.rs
index 0ce720a9244cd..f018344ead12d 100644
--- a/library/stdarch/crates/core_arch/src/s390x/vector.rs
+++ b/library/stdarch/crates/core_arch/src/s390x/vector.rs
@@ -94,8 +94,6 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vsrlb"] fn vsrlb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
     #[link_name = "llvm.s390.vslb"] fn vslb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
 
-    #[link_name = "llvm.s390.vsldb"] fn vsldb(a: i8x16, b: i8x16, c: u32) -> i8x16;
-    #[link_name = "llvm.s390.vsld"] fn vsld(a: i8x16, b: i8x16, c: u32) -> i8x16;
     #[link_name = "llvm.s390.vsrd"] fn vsrd(a: i8x16, b: i8x16, c: u32) -> i8x16;
 
     #[link_name = "llvm.s390.verimb"] fn verimb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char, d: i32) -> vector_signed_char;
@@ -114,6 +112,9 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vsumqf"] fn vsumqf(a: vector_unsigned_int, b: vector_unsigned_int) -> u128;
     #[link_name = "llvm.s390.vsumqg"] fn vsumqg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> u128;
 
+    #[link_name = "llvm.s390.vaccq"] fn vaccq(a: u128, b: u128) -> u128;
+    #[link_name = "llvm.s390.vacccq"] fn vacccq(a: u128, b: u128, c: u128) -> u128;
+
     #[link_name = "llvm.s390.vscbiq"] fn vscbiq(a: u128, b: u128) -> u128;
     #[link_name = "llvm.s390.vsbiq"] fn vsbiq(a: u128, b: u128, c: u128) -> u128;
     #[link_name = "llvm.s390.vsbcbiq"] fn vsbcbiq(a: u128, b: u128, c: u128) -> u128;
@@ -166,13 +167,6 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vpklsfs"] fn vpklsfs(a: vector_unsigned_int, b: vector_unsigned_int) -> PackedTuple<vector_unsigned_short, i32>;
     #[link_name = "llvm.s390.vpklsgs"] fn vpklsgs(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> PackedTuple<vector_unsigned_int, i32>;
 
-    #[link_name = "llvm.s390.vuplb"] fn vuplb (a: vector_signed_char) -> vector_signed_short;
-    #[link_name = "llvm.s390.vuplhw"] fn vuplhw (a: vector_signed_short) -> vector_signed_int;
-    #[link_name = "llvm.s390.vuplf"] fn vuplf (a: vector_signed_int) -> vector_signed_long_long;
-    #[link_name = "llvm.s390.vupllb"] fn vupllb (a: vector_unsigned_char) -> vector_unsigned_short;
-    #[link_name = "llvm.s390.vupllh"] fn vupllh (a: vector_unsigned_short) -> vector_unsigned_int;
-    #[link_name = "llvm.s390.vupllf"] fn vupllf (a: vector_unsigned_int) -> vector_unsigned_long_long;
-
     #[link_name = "llvm.s390.vavgb"] fn vavgb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
     #[link_name = "llvm.s390.vavgh"] fn vavgh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short;
     #[link_name = "llvm.s390.vavgf"] fn vavgf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
@@ -185,22 +179,6 @@ unsafe extern "unadjusted" {
 
     #[link_name = "llvm.s390.vcksm"] fn vcksm(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
 
-    #[link_name = "llvm.s390.vmeb"] fn vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short;
-    #[link_name = "llvm.s390.vmeh"] fn vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int;
-    #[link_name = "llvm.s390.vmef"] fn vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long;
-
-    #[link_name = "llvm.s390.vmleb"] fn vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
-    #[link_name = "llvm.s390.vmleh"] fn vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
-    #[link_name = "llvm.s390.vmlef"] fn vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
-
-    #[link_name = "llvm.s390.vmob"] fn vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short;
-    #[link_name = "llvm.s390.vmoh"] fn vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int;
-    #[link_name = "llvm.s390.vmof"] fn vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long;
-
-    #[link_name = "llvm.s390.vmlob"] fn vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
-    #[link_name = "llvm.s390.vmloh"] fn vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
-    #[link_name = "llvm.s390.vmlof"] fn vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
-
     #[link_name = "llvm.s390.vmhb"] fn vmhb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
     #[link_name = "llvm.s390.vmhh"] fn vmhh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short;
     #[link_name = "llvm.s390.vmhf"] fn vmhf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
@@ -376,7 +354,20 @@ impl<const N: usize> ShuffleMask<N> {
         ShuffleMask(mask)
     }
 
-    const fn pack() -> Self {
+    const fn even() -> Self {
+        let mut mask = [0; N];
+        let mut i = 0;
+        let mut index = 0;
+        while index < N {
+            mask[index] = i as u32;
+
+            i += 2;
+            index += 1;
+        }
+        ShuffleMask(mask)
+    }
+
+    const fn odd() -> Self {
         let mut mask = [0; N];
         let mut i = 1;
         let mut index = 0;
@@ -389,6 +380,10 @@ impl<const N: usize> ShuffleMask<N> {
         ShuffleMask(mask)
     }
 
+    const fn pack() -> Self {
+        Self::odd()
+    }
+
     const fn unpack_low() -> Self {
         let mut mask = [0; N];
         let mut i = 0;
@@ -1198,10 +1193,8 @@ mod sealed {
     test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32,  "vector-enhancements-1" vfisb] }
     test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] }
 
-    // FIXME(llvm) llvm trunk already lowers roundeven to vfidb, but rust does not use it yet
-    // use https://godbolt.org/z/cWq95fexe to check, and enable the instruction test when it works
-    test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
-    test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }
+    test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, "vector-enhancements-1" vfisb] }
+    test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, vfidb] }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorRoundc {
@@ -2359,6 +2352,9 @@ mod sealed {
         unsafe fn vec_packs(self, b: Other) -> Self::Result;
     }
 
+    // FIXME(llvm): https://github.com/llvm/llvm-project/issues/153655
+    // Other targets can use a min/max for the saturation + a truncation.
+
     impl_vec_trait! { [VectorPacks vec_packs] vpksh (vector_signed_short, vector_signed_short) -> vector_signed_char }
     impl_vec_trait! { [VectorPacks vec_packs] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char }
     impl_vec_trait! { [VectorPacks vec_packs] vpksf (vector_signed_int, vector_signed_int) -> vector_signed_short }
@@ -2580,8 +2576,14 @@ mod sealed {
         unsafe fn vec_unpackl(self) -> Self::Result;
     }
 
-    // FIXME(llvm): a shuffle + simd_as does not currently optimize into a single instruction like
-    // unpachk above. Tracked in https://github.com/llvm/llvm-project/issues/129576.
+    // NOTE: `vuplh` is used for "unpack logical high", hence `vuplhw`.
+    impl_vec_unpack!(unpack_low vuplb vector_signed_char i8x8 vector_signed_short 8);
+    impl_vec_unpack!(unpack_low vuplhw vector_signed_short i16x4 vector_signed_int 4);
+    impl_vec_unpack!(unpack_low vuplf vector_signed_int i32x2 vector_signed_long_long 2);
+
+    impl_vec_unpack!(unpack_low vupllb vector_unsigned_char u8x8 vector_unsigned_short 8);
+    impl_vec_unpack!(unpack_low vupllh vector_unsigned_short u16x4 vector_unsigned_int 4);
+    impl_vec_unpack!(unpack_low vupllf vector_unsigned_int u32x2 vector_unsigned_long_long 2);
 
     impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplb (vector_signed_char) -> vector_signed_short}
     impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplhw (vector_signed_short) -> vector_signed_int}
@@ -2642,61 +2644,65 @@ mod sealed {
         unsafe fn vec_mule(self, b: Self) -> Result;
     }
 
-    // FIXME(llvm) sadly this does not yet work https://github.com/llvm/llvm-project/issues/129705
-    //    #[target_feature(enable = "vector")]
-    //    #[cfg_attr(test, assert_instr(vmleh))]
-    //    unsafe fn vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int {
-    //        let even_a: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>(
-    //            a,
-    //            a,
-    //            const { ShuffleMask([0, 2, 4, 6]) },
-    //        ));
-    //
-    //        let even_b: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>(
-    //            b,
-    //            b,
-    //            const { ShuffleMask([0, 2, 4, 6]) },
-    //        ));
-    //
-    //        simd_mul(even_a, even_b)
-    //    }
+    macro_rules! impl_vec_mul_even_odd {
+        ($mask:ident $instr:ident $src:ident $shuffled:ident $dst:ident $width:literal) => {
+            #[inline]
+            #[target_feature(enable = "vector")]
+            #[cfg_attr(test, assert_instr($instr))]
+            unsafe fn $instr(a: $src, b: $src) -> $dst {
+                let elems_a: $dst = simd_as(simd_shuffle::<_, _, $shuffled>(
+                    a,
+                    a, // this argument is ignored entirely.
+                    const { ShuffleMask::<$width>::$mask() },
+                ));
 
-    test_impl! { vec_vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmeb, vmeb ] }
-    test_impl! { vec_vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmeh, vmeh ] }
-    test_impl! { vec_vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmef, vmef ] }
+                let elems_b: $dst = simd_as(simd_shuffle::<_, _, $shuffled>(
+                    b,
+                    b, // this argument is ignored entirely.
+                    const { ShuffleMask::<$width>::$mask() },
+                ));
 
-    test_impl! { vec_vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmleb, vmleb ] }
-    test_impl! { vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmleh, vmleh ] }
-    test_impl! { vec_vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlef, vmlef ] }
+                simd_mul(elems_a, elems_b)
+            }
+        };
+    }
+
+    impl_vec_mul_even_odd! { even vmeb vector_signed_char i8x8 vector_signed_short 8 }
+    impl_vec_mul_even_odd! { even vmeh vector_signed_short i16x4 vector_signed_int 4 }
+    impl_vec_mul_even_odd! { even vmef vector_signed_int i32x2 vector_signed_long_long 2 }
 
-    impl_mul!([VectorMule vec_mule] vec_vmeb (vector_signed_char, vector_signed_char) -> vector_signed_short );
-    impl_mul!([VectorMule vec_mule] vec_vmeh (vector_signed_short, vector_signed_short) -> vector_signed_int);
-    impl_mul!([VectorMule vec_mule] vec_vmef (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
+    impl_vec_mul_even_odd! { even vmleb vector_unsigned_char u8x8 vector_unsigned_short 8 }
+    impl_vec_mul_even_odd! { even vmleh vector_unsigned_short u16x4 vector_unsigned_int 4 }
+    impl_vec_mul_even_odd! { even vmlef vector_unsigned_int u32x2 vector_unsigned_long_long 2 }
 
-    impl_mul!([VectorMule vec_mule] vec_vmleb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
-    impl_mul!([VectorMule vec_mule] vec_vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
-    impl_mul!([VectorMule vec_mule] vec_vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
+    impl_mul!([VectorMule vec_mule] vmeb (vector_signed_char, vector_signed_char) -> vector_signed_short );
+    impl_mul!([VectorMule vec_mule] vmeh (vector_signed_short, vector_signed_short) -> vector_signed_int);
+    impl_mul!([VectorMule vec_mule] vmef (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
+
+    impl_mul!([VectorMule vec_mule] vmleb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
+    impl_mul!([VectorMule vec_mule] vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
+    impl_mul!([VectorMule vec_mule] vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorMulo<Result> {
         unsafe fn vec_mulo(self, b: Self) -> Result;
     }
 
-    test_impl! { vec_vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmob, vmob ] }
-    test_impl! { vec_vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmoh, vmoh ] }
-    test_impl! { vec_vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmof, vmof ] }
+    impl_vec_mul_even_odd! { odd vmob vector_signed_char i8x8 vector_signed_short 8 }
+    impl_vec_mul_even_odd! { odd vmoh vector_signed_short i16x4 vector_signed_int 4 }
+    impl_vec_mul_even_odd! { odd vmof vector_signed_int i32x2 vector_signed_long_long 2 }
 
-    test_impl! { vec_vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmlob, vmlob ] }
-    test_impl! { vec_vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmloh, vmloh ] }
-    test_impl! { vec_vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlof, vmlof ] }
+    impl_vec_mul_even_odd! { odd vmlob vector_unsigned_char u8x8 vector_unsigned_short 8 }
+    impl_vec_mul_even_odd! { odd vmloh vector_unsigned_short u16x4 vector_unsigned_int 4 }
+    impl_vec_mul_even_odd! { odd vmlof vector_unsigned_int u32x2 vector_unsigned_long_long 2 }
 
-    impl_mul!([VectorMulo vec_mulo] vec_vmob (vector_signed_char, vector_signed_char) -> vector_signed_short );
-    impl_mul!([VectorMulo vec_mulo] vec_vmoh (vector_signed_short, vector_signed_short) -> vector_signed_int);
-    impl_mul!([VectorMulo vec_mulo] vec_vmof (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
+    impl_mul!([VectorMulo vec_mulo] vmob (vector_signed_char, vector_signed_char) -> vector_signed_short );
+    impl_mul!([VectorMulo vec_mulo] vmoh (vector_signed_short, vector_signed_short) -> vector_signed_int);
+    impl_mul!([VectorMulo vec_mulo] vmof (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
 
-    impl_mul!([VectorMulo vec_mulo] vec_vmlob (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
-    impl_mul!([VectorMulo vec_mulo] vec_vmloh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
-    impl_mul!([VectorMulo vec_mulo] vec_vmlof (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
+    impl_mul!([VectorMulo vec_mulo] vmlob (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
+    impl_mul!([VectorMulo vec_mulo] vmloh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
+    impl_mul!([VectorMulo vec_mulo] vmlof (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorMulh<Result> {
@@ -3319,8 +3325,7 @@ mod sealed {
 
     #[inline]
     #[target_feature(enable = "vector")]
-    // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
-    // #[cfg_attr(test, assert_instr(vsegb))]
+    #[cfg_attr(test, assert_instr(vsegb))]
     pub unsafe fn vsegb(a: vector_signed_char) -> vector_signed_long_long {
         simd_as(simd_shuffle::<_, _, i8x2>(
             a,
@@ -3331,8 +3336,7 @@ mod sealed {
 
     #[inline]
     #[target_feature(enable = "vector")]
-    // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
-    // #[cfg_attr(test, assert_instr(vsegh))]
+    #[cfg_attr(test, assert_instr(vsegh))]
     pub unsafe fn vsegh(a: vector_signed_short) -> vector_signed_long_long {
         simd_as(simd_shuffle::<_, _, i16x2>(
             a,
@@ -3343,8 +3347,7 @@ mod sealed {
 
     #[inline]
     #[target_feature(enable = "vector")]
-    // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
-    // #[cfg_attr(test, assert_instr(vsegf))]
+    #[cfg_attr(test, assert_instr(vsegf))]
     pub unsafe fn vsegf(a: vector_signed_int) -> vector_signed_long_long {
         simd_as(simd_shuffle::<_, _, i32x2>(
             a,
@@ -3482,10 +3485,33 @@ mod sealed {
         unsafe fn vec_sldb<const C: u32>(self, b: Self) -> Self;
     }
 
-    // FIXME(llvm) https://github.com/llvm/llvm-project/issues/129955
-    // ideally we could implement this in terms of llvm.fshl.i128
-    // #[link_name = "llvm.fshl.i128"] fn fshl_i128(a: u128, b: u128, c: u128) -> u128;
-    // transmute(fshl_i128(transmute(a), transmute(b), const { C * 8 } ))
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vsldb))]
+    unsafe fn test_vec_sld(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
+        a.vec_sld::<13>(b)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vsldb))]
+    unsafe fn test_vec_sldw(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
+        a.vec_sldw::<3>(b)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector-enhancements-2")]
+    #[cfg_attr(test, assert_instr(vsld))]
+    unsafe fn test_vec_sldb(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
+        a.vec_sldb::<7>(b)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector-enhancements-2")]
+    #[cfg_attr(test, assert_instr(vsrd))]
+    unsafe fn test_vec_srdb(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
+        a.vec_srdb::<7>(b)
+    }
 
     macro_rules! impl_vec_sld {
         ($($ty:ident)*) => {
@@ -3496,21 +3522,21 @@ mod sealed {
                     #[target_feature(enable = "vector")]
                     unsafe fn vec_sld<const C: u32>(self, b: Self) -> Self {
                         static_assert_uimm_bits!(C, 4);
-                        transmute(vsldb(transmute(self), transmute(b), C))
+                        transmute(u128::funnel_shl(transmute(self), transmute(b), C  * 8))
                     }
 
                     #[inline]
                     #[target_feature(enable = "vector")]
                     unsafe fn vec_sldw<const C: u32>(self, b: Self) -> Self {
                         static_assert_uimm_bits!(C, 2);
-                        transmute(vsldb(transmute(self), transmute(b), const { 4 * C }))
+                        transmute(u128::funnel_shl(transmute(self), transmute(b), C * 4 * 8))
                     }
 
                     #[inline]
                     #[target_feature(enable = "vector-enhancements-2")]
                     unsafe fn vec_sldb<const C: u32>(self, b: Self) -> Self {
                         static_assert_uimm_bits!(C, 3);
-                        transmute(vsld(transmute(self), transmute(b), C))
+                        transmute(u128::funnel_shl(transmute(self), transmute(b), C))
                     }
                 }
 
@@ -3521,6 +3547,11 @@ mod sealed {
                     unsafe fn vec_srdb<const C: u32>(self, b: Self) -> Self {
                         static_assert_uimm_bits!(C, 3);
                         transmute(vsrd(transmute(self), transmute(b), C))
+                        // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129955#issuecomment-3207488190
+                        // LLVM currently rewrites `fshr` to `fshl`, and the logic in the s390x
+                        // backend cannot deal with that yet.
+                        // #[link_name = "llvm.fshr.i128"] fn fshr_i128(a: u128, b: u128, c: u128) -> u128;
+                        // transmute(fshr_i128(transmute(self), transmute(b), const { C as u128 }))
                     }
                 }
             )*
@@ -4676,11 +4707,9 @@ pub unsafe fn vec_subc_u128(
     a: vector_unsigned_char,
     b: vector_unsigned_char,
 ) -> vector_unsigned_char {
-    // FIXME(llvm) sadly this does not work https://github.com/llvm/llvm-project/issues/129608
-    // let a: u128 = transmute(a);
-    // let b: u128 = transmute(b);
-    // transmute(!a.overflowing_sub(b).1 as u128)
-    transmute(vscbiq(transmute(a), transmute(b)))
+    let a: u128 = transmute(a);
+    let b: u128 = transmute(b);
+    transmute(!a.overflowing_sub(b).1 as u128)
 }
 
 /// Vector Add Compute Carryout unsigned 128-bits
@@ -4694,7 +4723,9 @@ pub unsafe fn vec_addc_u128(
 ) -> vector_unsigned_char {
     let a: u128 = transmute(a);
     let b: u128 = transmute(b);
-    transmute(a.overflowing_add(b).1 as u128)
+    // FIXME(llvm) https://github.com/llvm/llvm-project/pull/153557
+    // transmute(a.overflowing_add(b).1 as u128)
+    transmute(vaccq(a, b))
 }
 
 /// Vector Add With Carry unsigned 128-bits
@@ -4710,7 +4741,7 @@ pub unsafe fn vec_adde_u128(
     let a: u128 = transmute(a);
     let b: u128 = transmute(b);
     let c: u128 = transmute(c);
-    // FIXME(llvm) sadly this does not work
+    // FIXME(llvm) https://github.com/llvm/llvm-project/pull/153557
     //     let (d, _carry) = a.carrying_add(b, c & 1 != 0);
     //     transmute(d)
     transmute(vacq(a, b, c))
@@ -4729,8 +4760,10 @@ pub unsafe fn vec_addec_u128(
     let a: u128 = transmute(a);
     let b: u128 = transmute(b);
     let c: u128 = transmute(c);
-    let (_d, carry) = a.carrying_add(b, c & 1 != 0);
-    transmute(carry as u128)
+    // FIXME(llvm) https://github.com/llvm/llvm-project/pull/153557
+    // let (_d, carry) = a.carrying_add(b, c & 1 != 0);
+    // transmute(carry as u128)
+    transmute(vacccq(a, b, c))
 }
 
 /// Vector Subtract with Carryout
diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index 108bc3125c5f3..c864d6a516e08 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -141,16 +141,6 @@ unsafe extern "unadjusted" {
     fn llvm_f64x2_max(x: simd::f64x2, y: simd::f64x2) -> simd::f64x2;
 }
 
-#[repr(C, packed)]
-#[derive(Copy)]
-struct Unaligned<T>(T);
-
-impl<T: Copy> Clone for Unaligned<T> {
-    fn clone(&self) -> Unaligned<T> {
-        *self
-    }
-}
-
 /// Loads a `v128` vector from the given heap address.
 ///
 /// This intrinsic will emit a load with an alignment of 1. While this is
@@ -179,7 +169,7 @@ impl<T: Copy> Clone for Unaligned<T> {
 #[doc(alias("v128.load"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub unsafe fn v128_load(m: *const v128) -> v128 {
-    (*(m as *const Unaligned<v128>)).0
+    m.read_unaligned()
 }
 
 /// Load eight 8-bit integers and sign extend each one to a 16-bit lane
@@ -196,8 +186,8 @@ pub unsafe fn v128_load(m: *const v128) -> v128 {
 #[doc(alias("v128.load8x8_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub unsafe fn i16x8_load_extend_i8x8(m: *const i8) -> v128 {
-    let m = *(m as *const Unaligned<simd::i8x8>);
-    simd_cast::<_, simd::i16x8>(m.0).v128()
+    let m = m.cast::<simd::i8x8>().read_unaligned();
+    simd_cast::<_, simd::i16x8>(m).v128()
 }
 
 /// Load eight 8-bit integers and zero extend each one to a 16-bit lane
@@ -214,8 +204,8 @@ pub unsafe fn i16x8_load_extend_i8x8(m: *const i8) -> v128 {
 #[doc(alias("v128.load8x8_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub unsafe fn i16x8_load_extend_u8x8(m: *const u8) -> v128 {
-    let m = *(m as *const Unaligned<simd::u8x8>);
-    simd_cast::<_, simd::u16x8>(m.0).v128()
+    let m = m.cast::<simd::u8x8>().read_unaligned();
+    simd_cast::<_, simd::u16x8>(m).v128()
 }
 
 #[stable(feature = "wasm_simd", since = "1.54.0")]
@@ -235,8 +225,8 @@ pub use i16x8_load_extend_u8x8 as u16x8_load_extend_u8x8;
 #[doc(alias("v128.load16x4_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub unsafe fn i32x4_load_extend_i16x4(m: *const i16) -> v128 {
-    let m = *(m as *const Unaligned<simd::i16x4>);
-    simd_cast::<_, simd::i32x4>(m.0).v128()
+    let m = m.cast::<simd::i16x4>().read_unaligned();
+    simd_cast::<_, simd::i32x4>(m).v128()
 }
 
 /// Load four 16-bit integers and zero extend each one to a 32-bit lane
@@ -253,8 +243,8 @@ pub unsafe fn i32x4_load_extend_i16x4(m: *const i16) -> v128 {
 #[doc(alias("v128.load16x4_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub unsafe fn i32x4_load_extend_u16x4(m: *const u16) -> v128 {
-    let m = *(m as *const Unaligned<simd::u16x4>);
-    simd_cast::<_, simd::u32x4>(m.0).v128()
+    let m = m.cast::<simd::u16x4>().read_unaligned();
+    simd_cast::<_, simd::u32x4>(m).v128()
 }
 
 #[stable(feature = "wasm_simd", since = "1.54.0")]
@@ -274,8 +264,8 @@ pub use i32x4_load_extend_u16x4 as u32x4_load_extend_u16x4;
 #[doc(alias("v128.load32x2_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub unsafe fn i64x2_load_extend_i32x2(m: *const i32) -> v128 {
-    let m = *(m as *const Unaligned<simd::i32x2>);
-    simd_cast::<_, simd::i64x2>(m.0).v128()
+    let m = m.cast::<simd::i32x2>().read_unaligned();
+    simd_cast::<_, simd::i64x2>(m).v128()
 }
 
 /// Load two 32-bit integers and zero extend each one to a 64-bit lane
@@ -292,8 +282,8 @@ pub unsafe fn i64x2_load_extend_i32x2(m: *const i32) -> v128 {
 #[doc(alias("v128.load32x2_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub unsafe fn i64x2_load_extend_u32x2(m: *const u32) -> v128 {
-    let m = *(m as *const Unaligned<simd::u32x2>);
-    simd_cast::<_, simd::u64x2>(m.0).v128()
+    let m = m.cast::<simd::u32x2>().read_unaligned();
+    simd_cast::<_, simd::u64x2>(m).v128()
 }
 
 #[stable(feature = "wasm_simd", since = "1.54.0")]
@@ -453,7 +443,7 @@ pub unsafe fn v128_load64_zero(m: *const u64) -> v128 {
 #[doc(alias("v128.store"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub unsafe fn v128_store(m: *mut v128, a: v128) {
-    *(m as *mut Unaligned<v128>) = Unaligned(a);
+    m.write_unaligned(a)
 }
 
 /// Loads an 8-bit value from `m` and sets lane `L` of `v` to that value.
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index d53f83c0a10bc..52c6a11a43f0e 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -5823,7 +5823,7 @@ pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> _
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
+#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
 #[rustc_legacy_const_generics(1)]
 pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
     unsafe {
@@ -5897,7 +5897,7 @@ pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> _
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
+#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
 #[rustc_legacy_const_generics(1)]
 pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
     unsafe {
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
index 8c914803c665d..a86fc7199b83c 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
@@ -17282,14 +17282,14 @@ mod tests {
         assert_eq_m512h(a, b);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_load_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_load_sh(addr_of!(a).cast());
         assert_eq_m128h(a, b);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_load_sh() {
         let a = _mm_set_sh(1.0);
         let src = _mm_set_sh(2.);
@@ -17299,7 +17299,7 @@ mod tests {
         assert_eq_m128h(src, b);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_load_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
@@ -17344,7 +17344,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_move_sh() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_sh(9.0);
@@ -17353,7 +17353,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_move_sh() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_sh(9.0);
@@ -17363,7 +17363,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_move_sh() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_sh(9.0);
@@ -17402,7 +17402,7 @@ mod tests {
         assert_eq_m512h(a, b);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_store_sh() {
         let a = _mm_set_sh(1.0);
         let mut b = _mm_setzero_ph();
@@ -17410,7 +17410,7 @@ mod tests {
         assert_eq_m128h(a, b);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_store_sh() {
         let a = _mm_set_sh(1.0);
         let mut b = _mm_setzero_ph();
@@ -17655,7 +17655,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_add_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -17664,7 +17664,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_add_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -17681,7 +17681,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_add_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -17695,7 +17695,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_add_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -17704,7 +17704,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_add_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -17717,7 +17717,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_add_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -17945,7 +17945,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_sub_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -17954,7 +17954,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_sub_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -17971,7 +17971,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_sub_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -17985,7 +17985,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_sub_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -17994,7 +17994,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_sub_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18007,7 +18007,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_sub_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18235,7 +18235,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mul_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18244,7 +18244,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_mul_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18261,7 +18261,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_mul_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18275,7 +18275,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mul_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18284,7 +18284,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_mul_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18297,7 +18297,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_mul_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18457,7 +18457,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_div_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18466,7 +18466,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_div_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18483,7 +18483,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_div_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18497,7 +18497,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_div_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18506,7 +18506,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_div_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18519,7 +18519,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_div_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
@@ -18680,7 +18680,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -18689,7 +18689,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_mul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -18701,7 +18701,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_mul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -18711,7 +18711,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -18720,7 +18720,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_mul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -18730,7 +18730,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_mul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -18888,7 +18888,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -18897,7 +18897,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -18909,7 +18909,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -18919,7 +18919,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -18928,7 +18928,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -18938,7 +18938,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -19096,7 +19096,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_cmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19105,7 +19105,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_cmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19115,7 +19115,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_cmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19124,7 +19124,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_cmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19133,7 +19133,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_cmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19145,7 +19145,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_cmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19304,7 +19304,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fcmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19313,7 +19313,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fcmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19323,7 +19323,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fcmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19332,7 +19332,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fcmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19341,7 +19341,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fcmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19353,7 +19353,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fcmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
@@ -19692,7 +19692,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -19702,7 +19702,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -19715,7 +19715,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask3_fmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -19728,7 +19728,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -19741,7 +19741,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -19751,7 +19751,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -19768,7 +19768,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask3_fmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -19785,7 +19785,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -20002,7 +20002,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fcmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -20012,7 +20012,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fcmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -20025,7 +20025,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask3_fcmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -20038,7 +20038,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fcmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -20051,7 +20051,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fcmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -20061,7 +20061,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fcmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -20078,7 +20078,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask3_fcmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -20095,7 +20095,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fcmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
@@ -20311,7 +20311,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20321,7 +20321,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20334,7 +20334,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask3_fmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20347,7 +20347,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20360,7 +20360,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20370,7 +20370,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20387,7 +20387,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask3_fmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20404,7 +20404,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20620,7 +20620,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20630,7 +20630,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20643,7 +20643,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask3_fmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20656,7 +20656,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20669,7 +20669,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20930,7 +20930,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fnmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20940,7 +20940,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fnmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20953,7 +20953,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask3_fnmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20966,7 +20966,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fnmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20979,7 +20979,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fnmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -20989,7 +20989,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fnmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -21006,7 +21006,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask3_fnmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -21023,7 +21023,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fnmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -21240,7 +21240,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fnmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -21250,7 +21250,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fnmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -21263,7 +21263,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask3_fnmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -21276,7 +21276,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fnmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -21289,7 +21289,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_fnmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -21299,7 +21299,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_fnmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -21316,7 +21316,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask3_fnmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -21333,7 +21333,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_fnmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -21851,7 +21851,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_rcp_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -21860,7 +21860,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_rcp_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -21873,7 +21873,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_rcp_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -21970,7 +21970,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_rsqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
@@ -21979,7 +21979,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_rsqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
@@ -21992,7 +21992,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_rsqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
@@ -22127,7 +22127,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_sqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
@@ -22136,7 +22136,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_sqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
@@ -22149,7 +22149,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_sqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
@@ -22161,7 +22161,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_sqrt_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
@@ -22170,7 +22170,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_sqrt_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
@@ -22187,7 +22187,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_sqrt_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
@@ -22338,7 +22338,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_max_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22347,7 +22347,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_max_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22360,7 +22360,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_max_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22372,7 +22372,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_max_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22381,7 +22381,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_max_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22398,7 +22398,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_max_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22549,7 +22549,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_min_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22558,7 +22558,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_min_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22571,7 +22571,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_min_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22583,7 +22583,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_min_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22592,7 +22592,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_min_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22609,7 +22609,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_min_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
@@ -22746,7 +22746,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_getexp_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -22755,7 +22755,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_getexp_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -22768,7 +22768,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_getexp_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -22780,7 +22780,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_getexp_round_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -22789,7 +22789,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_getexp_round_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -22802,7 +22802,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_getexp_round_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -22958,7 +22958,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_getmant_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -22967,7 +22967,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_getmant_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -22980,7 +22980,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_getmant_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -22992,7 +22992,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_getmant_round_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -23003,7 +23003,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_getmant_round_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -23024,7 +23024,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_getmant_round_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -23167,7 +23167,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_roundscale_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
@@ -23176,7 +23176,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_roundscale_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
@@ -23189,7 +23189,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_roundscale_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
@@ -23201,7 +23201,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_roundscale_round_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
@@ -23210,7 +23210,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_roundscale_round_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
@@ -23223,7 +23223,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_roundscale_round_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
@@ -23372,7 +23372,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_scalef_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -23381,7 +23381,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_scalef_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -23394,7 +23394,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_scalef_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -23406,7 +23406,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_scalef_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -23415,7 +23415,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_scalef_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -23432,7 +23432,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_scalef_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
@@ -23576,7 +23576,7 @@ mod tests {
         assert_eq_m512h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_reduce_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
@@ -23585,7 +23585,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_reduce_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
@@ -23598,7 +23598,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_reduce_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
@@ -23610,7 +23610,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_reduce_round_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
@@ -23619,7 +23619,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_reduce_round_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
@@ -23636,7 +23636,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_reduce_round_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
@@ -24505,7 +24505,7 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_cvti32_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvti32_sh(a, 10);
@@ -24513,7 +24513,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_cvt_roundi32_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
@@ -24645,7 +24645,7 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_cvtu32_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtu32_sh(a, 10);
@@ -24653,7 +24653,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_cvt_roundu32_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
@@ -24711,7 +24711,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_cvtepi64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepi64_ph(a);
@@ -24719,7 +24719,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_mask_cvtepi64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
@@ -24728,7 +24728,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_maskz_cvtepi64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepi64_ph(0b01010101, a);
@@ -24736,7 +24736,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_cvt_roundepi64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
@@ -24755,7 +24755,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_maskz_cvt_roundepi64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -24815,7 +24815,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_cvtepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepu64_ph(a);
@@ -24823,7 +24823,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_mask_cvtepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
@@ -24832,7 +24832,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_maskz_cvtepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepu64_ph(0b01010101, a);
@@ -24840,7 +24840,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_cvt_roundepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
@@ -24848,7 +24848,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_mask_cvt_roundepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
@@ -24859,7 +24859,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_maskz_cvt_roundepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -25005,7 +25005,7 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_cvtss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
@@ -25014,7 +25014,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_cvtss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
@@ -25027,7 +25027,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_cvtss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
@@ -25039,7 +25039,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_cvt_roundss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
@@ -25048,7 +25048,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_cvt_roundss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
@@ -25065,7 +25065,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_cvt_roundss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
@@ -25129,7 +25129,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_cvtpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvtpd_ph(a);
@@ -25137,7 +25137,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_mask_cvtpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
@@ -25146,7 +25146,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_maskz_cvtpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvtpd_ph(0b01010101, a);
@@ -25154,7 +25154,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_cvt_roundpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
@@ -25162,7 +25162,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_mask_cvt_roundpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
@@ -25173,7 +25173,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm512_maskz_cvt_roundpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -25183,7 +25183,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_cvtsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
@@ -25192,7 +25192,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_cvtsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
@@ -25205,7 +25205,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_cvtsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
@@ -25217,7 +25217,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_cvt_roundsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
@@ -25226,7 +25226,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_mask_cvt_roundsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
@@ -25243,7 +25243,7 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
+    #[simd_test(enable = "avx512fp16,avx512vl")]
     unsafe fn test_mm_maskz_cvt_roundsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
diff --git a/library/stdarch/crates/intrinsic-test/src/arm/config.rs b/library/stdarch/crates/intrinsic-test/src/arm/config.rs
index 9a7b37253d1cb..72e997de154ab 100644
--- a/library/stdarch/crates/intrinsic-test/src/arm/config.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/config.rs
@@ -1,14 +1,17 @@
-pub fn build_notices(line_prefix: &str) -> String {
-    format!(
-        "\
-{line_prefix}This is a transient test file, not intended for distribution. Some aspects of the
-{line_prefix}test are derived from a JSON specification, published under the same license as the
-{line_prefix}`intrinsic-test` crate.\n
-"
-    )
-}
+pub const NOTICE: &str = "\
+// This is a transient test file, not intended for distribution. Some aspects of the
+// test are derived from a JSON specification, published under the same license as the
+// `intrinsic-test` crate.\n";
+
+pub const POLY128_OSTREAM_DECL: &str = r#"
+#ifdef __aarch64__
+std::ostream& operator<<(std::ostream& os, poly128_t value);
+#endif
+"#;
 
-pub const POLY128_OSTREAM_DEF: &str = r#"std::ostream& operator<<(std::ostream& os, poly128_t value) {
+pub const POLY128_OSTREAM_DEF: &str = r#"
+#ifdef __aarch64__
+std::ostream& operator<<(std::ostream& os, poly128_t value) {
     std::stringstream temp;
     do {
       int n = value % 10;
@@ -19,7 +22,9 @@ pub const POLY128_OSTREAM_DEF: &str = r#"std::ostream& operator<<(std::ostream&
     std::string res(tempstr.rbegin(), tempstr.rend());
     os << res;
     return os;
-}"#;
+}
+#endif
+"#;
 
 // Format f16 values (and vectors containing them) in a way that is consistent with C.
 pub const F16_FORMATTING_DEF: &str = r#"
@@ -118,4 +123,10 @@ pub const AARCH_CONFIGURATIONS: &str = r#"
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))]
 #![feature(fmt_helpers_for_derive)]
 #![feature(stdarch_neon_f16)]
+
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+use core::arch::aarch64::*;
+
+#[cfg(target_arch = "arm")]
+use core::arch::arm::*;
 "#;
diff --git a/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs
index fd93eff76e09c..29343bee4c300 100644
--- a/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs
@@ -1,7 +1,4 @@
-use crate::common::argument::ArgumentList;
-use crate::common::indentation::Indentation;
-use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition};
-use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};
+use crate::common::intrinsic_helpers::IntrinsicType;
 use std::ops::{Deref, DerefMut};
 
 #[derive(Debug, Clone, PartialEq)]
@@ -23,83 +20,3 @@ impl DerefMut for ArmIntrinsicType {
         &mut self.data
     }
 }
-
-impl IntrinsicDefinition<ArmIntrinsicType> for Intrinsic<ArmIntrinsicType> {
-    fn arguments(&self) -> ArgumentList<ArmIntrinsicType> {
-        self.arguments.clone()
-    }
-
-    fn results(&self) -> ArmIntrinsicType {
-        self.results.clone()
-    }
-
-    fn name(&self) -> String {
-        self.name.clone()
-    }
-
-    /// Generates a std::cout for the intrinsics results that will match the
-    /// rust debug output format for the return type. The generated line assumes
-    /// there is an int i in scope which is the current pass number.
-    fn print_result_c(&self, indentation: Indentation, additional: &str) -> String {
-        let lanes = if self.results().num_vectors() > 1 {
-            (0..self.results().num_vectors())
-                .map(|vector| {
-                    format!(
-                        r#""{ty}(" << {lanes} << ")""#,
-                        ty = self.results().c_single_vector_type(),
-                        lanes = (0..self.results().num_lanes())
-                            .map(move |idx| -> std::string::String {
-                                format!(
-                                    "{cast}{lane_fn}(__return_value.val[{vector}], {lane})",
-                                    cast = self.results().c_promotion(),
-                                    lane_fn = self.results().get_lane_function(),
-                                    lane = idx,
-                                    vector = vector,
-                                )
-                            })
-                            .collect::<Vec<_>>()
-                            .join(r#" << ", " << "#)
-                    )
-                })
-                .collect::<Vec<_>>()
-                .join(r#" << ", " << "#)
-        } else if self.results().num_lanes() > 1 {
-            (0..self.results().num_lanes())
-                .map(|idx| -> std::string::String {
-                    format!(
-                        "{cast}{lane_fn}(__return_value, {lane})",
-                        cast = self.results().c_promotion(),
-                        lane_fn = self.results().get_lane_function(),
-                        lane = idx
-                    )
-                })
-                .collect::<Vec<_>>()
-                .join(r#" << ", " << "#)
-        } else {
-            format!(
-                "{promote}cast<{cast}>(__return_value)",
-                cast = match self.results.kind() {
-                    TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(),
-                    TypeKind::Float if self.results().inner_size() == 32 => "float".to_string(),
-                    TypeKind::Float if self.results().inner_size() == 64 => "double".to_string(),
-                    TypeKind::Int(Sign::Signed) => format!("int{}_t", self.results().inner_size()),
-                    TypeKind::Int(Sign::Unsigned) =>
-                        format!("uint{}_t", self.results().inner_size()),
-                    TypeKind::Poly => format!("poly{}_t", self.results().inner_size()),
-                    ty => todo!("print_result_c - Unknown type: {:#?}", ty),
-                },
-                promote = self.results().c_promotion(),
-            )
-        };
-
-        format!(
-            r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) <<  {lanes} << "{close}" << std::endl;"#,
-            ty = if self.results().is_simd() {
-                format!("{}(", self.results().c_type())
-            } else {
-                String::from("")
-            },
-            close = if self.results.is_simd() { ")" } else { "" },
-        )
-    }
-}
diff --git a/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs b/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs
index b019abab21346..65c179ef0d083 100644
--- a/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs
@@ -113,7 +113,7 @@ fn json_to_intrinsic(
     Ok(Intrinsic {
         name,
         arguments,
-        results: results,
+        results,
         arch_tags: intr.architectures,
     })
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs
index 51f5ac4283783..08dc2d38702cd 100644
--- a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs
@@ -5,20 +5,11 @@ mod intrinsic;
 mod json_parser;
 mod types;
 
-use std::fs::{self, File};
-
-use rayon::prelude::*;
-
+use crate::common::SupportedArchitectureTest;
 use crate::common::cli::ProcessedCli;
-use crate::common::compare::compare_outputs;
-use crate::common::gen_c::{write_main_cpp, write_mod_cpp};
-use crate::common::gen_rust::{
-    compile_rust_programs, write_bin_cargo_toml, write_lib_cargo_toml, write_lib_rs, write_main_rs,
-};
+use crate::common::compile_c::CppCompilation;
 use crate::common::intrinsic::Intrinsic;
 use crate::common::intrinsic_helpers::TypeKind;
-use crate::common::{SupportedArchitectureTest, chunk_info};
-use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, POLY128_OSTREAM_DEF, build_notices};
 use intrinsic::ArmIntrinsicType;
 use json_parser::get_neon_intrinsics;
 
@@ -28,7 +19,30 @@ pub struct ArmArchitectureTest {
 }
 
 impl SupportedArchitectureTest for ArmArchitectureTest {
-    fn create(cli_options: ProcessedCli) -> Box<Self> {
+    type IntrinsicImpl = ArmIntrinsicType;
+
+    fn cli_options(&self) -> &ProcessedCli {
+        &self.cli_options
+    }
+
+    fn intrinsics(&self) -> &[Intrinsic<ArmIntrinsicType>] {
+        &self.intrinsics
+    }
+
+    const NOTICE: &str = config::NOTICE;
+
+    const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"];
+    const PLATFORM_C_DEFINITIONS: &str = config::POLY128_OSTREAM_DEF;
+    const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::POLY128_OSTREAM_DECL;
+
+    const PLATFORM_RUST_DEFINITIONS: &str = config::F16_FORMATTING_DEF;
+    const PLATFORM_RUST_CFGS: &str = config::AARCH_CONFIGURATIONS;
+
+    fn cpp_compilation(&self) -> Option<CppCompilation> {
+        compile::build_cpp_compilation(&self.cli_options)
+    }
+
+    fn create(cli_options: ProcessedCli) -> Self {
         let a32 = cli_options.target.contains("v7");
         let mut intrinsics = get_neon_intrinsics(&cli_options.filename, &cli_options.target)
             .expect("Error parsing input file");
@@ -50,149 +64,9 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
             .collect::<Vec<_>>();
         intrinsics.dedup();
 
-        Box::new(Self {
+        Self {
             intrinsics,
             cli_options,
-        })
-    }
-
-    fn build_c_file(&self) -> bool {
-        let c_target = "aarch64";
-        let platform_headers = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"];
-
-        let (chunk_size, chunk_count) = chunk_info(self.intrinsics.len());
-
-        let cpp_compiler_wrapped = compile::build_cpp_compilation(&self.cli_options);
-
-        let notice = &build_notices("// ");
-        fs::create_dir_all("c_programs").unwrap();
-        self.intrinsics
-            .par_chunks(chunk_size)
-            .enumerate()
-            .map(|(i, chunk)| {
-                let c_filename = format!("c_programs/mod_{i}.cpp");
-                let mut file = File::create(&c_filename).unwrap();
-                write_mod_cpp(&mut file, notice, c_target, platform_headers, chunk).unwrap();
-
-                // compile this cpp file into a .o file.
-                //
-                // This is done because `cpp_compiler_wrapped` is None when
-                // the --generate-only flag is passed
-                if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() {
-                    let output = cpp_compiler
-                        .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))?;
-                    assert!(output.status.success(), "{output:?}");
-                }
-
-                Ok(())
-            })
-            .collect::<Result<(), std::io::Error>>()
-            .unwrap();
-
-        let mut file = File::create("c_programs/main.cpp").unwrap();
-        write_main_cpp(
-            &mut file,
-            c_target,
-            POLY128_OSTREAM_DEF,
-            self.intrinsics.iter().map(|i| i.name.as_str()),
-        )
-        .unwrap();
-
-        // This is done because `cpp_compiler_wrapped` is None when
-        // the --generate-only flag is passed
-        if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() {
-            // compile this cpp file into a .o file
-            info!("compiling main.cpp");
-            let output = cpp_compiler
-                .compile_object_file("main.cpp", "intrinsic-test-programs.o")
-                .unwrap();
-            assert!(output.status.success(), "{output:?}");
-
-            let object_files = (0..chunk_count)
-                .map(|i| format!("mod_{i}.o"))
-                .chain(["intrinsic-test-programs.o".to_owned()]);
-
-            let output = cpp_compiler
-                .link_executable(object_files, "intrinsic-test-programs")
-                .unwrap();
-            assert!(output.status.success(), "{output:?}");
-        }
-
-        true
-    }
-
-    fn build_rust_file(&self) -> bool {
-        std::fs::create_dir_all("rust_programs/src").unwrap();
-
-        let architecture = if self.cli_options.target.contains("v7") {
-            "arm"
-        } else {
-            "aarch64"
-        };
-
-        let (chunk_size, chunk_count) = chunk_info(self.intrinsics.len());
-
-        let mut cargo = File::create("rust_programs/Cargo.toml").unwrap();
-        write_bin_cargo_toml(&mut cargo, chunk_count).unwrap();
-
-        let mut main_rs = File::create("rust_programs/src/main.rs").unwrap();
-        write_main_rs(
-            &mut main_rs,
-            chunk_count,
-            AARCH_CONFIGURATIONS,
-            "",
-            self.intrinsics.iter().map(|i| i.name.as_str()),
-        )
-        .unwrap();
-
-        let target = &self.cli_options.target;
-        let toolchain = self.cli_options.toolchain.as_deref();
-        let linker = self.cli_options.linker.as_deref();
-
-        let notice = &build_notices("// ");
-        self.intrinsics
-            .par_chunks(chunk_size)
-            .enumerate()
-            .map(|(i, chunk)| {
-                std::fs::create_dir_all(format!("rust_programs/mod_{i}/src"))?;
-
-                let rust_filename = format!("rust_programs/mod_{i}/src/lib.rs");
-                trace!("generating `{rust_filename}`");
-                let mut file = File::create(rust_filename)?;
-
-                let cfg = AARCH_CONFIGURATIONS;
-                let definitions = F16_FORMATTING_DEF;
-                write_lib_rs(&mut file, architecture, notice, cfg, definitions, chunk)?;
-
-                let toml_filename = format!("rust_programs/mod_{i}/Cargo.toml");
-                trace!("generating `{toml_filename}`");
-                let mut file = File::create(toml_filename).unwrap();
-
-                write_lib_cargo_toml(&mut file, &format!("mod_{i}"))?;
-
-                Ok(())
-            })
-            .collect::<Result<(), std::io::Error>>()
-            .unwrap();
-
-        compile_rust_programs(toolchain, target, linker)
-    }
-
-    fn compare_outputs(&self) -> bool {
-        if self.cli_options.toolchain.is_some() {
-            let intrinsics_name_list = self
-                .intrinsics
-                .iter()
-                .map(|i| i.name.clone())
-                .collect::<Vec<_>>();
-
-            compare_outputs(
-                &intrinsics_name_list,
-                &self.cli_options.runner,
-                &self.cli_options.target,
-            )
-        } else {
-            true
         }
     }
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/arm/types.rs b/library/stdarch/crates/intrinsic-test/src/arm/types.rs
index 32f8f106ce26d..e86a2c5189f0b 100644
--- a/library/stdarch/crates/intrinsic-test/src/arm/types.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/types.rs
@@ -1,5 +1,6 @@
 use super::intrinsic::ArmIntrinsicType;
 use crate::common::cli::Language;
+use crate::common::indentation::Indentation;
 use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};
 
 impl IntrinsicTypeDefinition for ArmIntrinsicType {
@@ -98,6 +99,71 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
             todo!("get_lane_function IntrinsicType: {:#?}", self)
         }
     }
+
+    /// Generates a std::cout for the intrinsics results that will match the
+    /// rust debug output format for the return type. The generated line assumes
+    /// there is an int i in scope which is the current pass number.
+    fn print_result_c(&self, indentation: Indentation, additional: &str) -> String {
+        let lanes = if self.num_vectors() > 1 {
+            (0..self.num_vectors())
+                .map(|vector| {
+                    format!(
+                        r#""{ty}(" << {lanes} << ")""#,
+                        ty = self.c_single_vector_type(),
+                        lanes = (0..self.num_lanes())
+                            .map(move |idx| -> std::string::String {
+                                format!(
+                                    "{cast}{lane_fn}(__return_value.val[{vector}], {lane})",
+                                    cast = self.c_promotion(),
+                                    lane_fn = self.get_lane_function(),
+                                    lane = idx,
+                                    vector = vector,
+                                )
+                            })
+                            .collect::<Vec<_>>()
+                            .join(r#" << ", " << "#)
+                    )
+                })
+                .collect::<Vec<_>>()
+                .join(r#" << ", " << "#)
+        } else if self.num_lanes() > 1 {
+            (0..self.num_lanes())
+                .map(|idx| -> std::string::String {
+                    format!(
+                        "{cast}{lane_fn}(__return_value, {lane})",
+                        cast = self.c_promotion(),
+                        lane_fn = self.get_lane_function(),
+                        lane = idx
+                    )
+                })
+                .collect::<Vec<_>>()
+                .join(r#" << ", " << "#)
+        } else {
+            format!(
+                "{promote}cast<{cast}>(__return_value)",
+                cast = match self.kind() {
+                    TypeKind::Float if self.inner_size() == 16 => "float16_t".to_string(),
+                    TypeKind::Float if self.inner_size() == 32 => "float".to_string(),
+                    TypeKind::Float if self.inner_size() == 64 => "double".to_string(),
+                    TypeKind::Int(Sign::Signed) => format!("int{}_t", self.inner_size()),
+                    TypeKind::Int(Sign::Unsigned) => format!("uint{}_t", self.inner_size()),
+                    TypeKind::Poly => format!("poly{}_t", self.inner_size()),
+                    ty => todo!("print_result_c - Unknown type: {:#?}", ty),
+                },
+                promote = self.c_promotion(),
+            )
+        };
+
+        format!(
+            r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) <<  {lanes} << "{close}" << std::endl;"#,
+            ty = if self.is_simd() {
+                format!("{}(", self.c_type())
+            } else {
+                String::from("")
+            },
+            close = if self.is_simd() { ")" } else { "" },
+        )
+    }
 }
 
 impl ArmIntrinsicType {
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
index 84755ce525053..28902b3dfe981 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
@@ -1,6 +1,7 @@
+use crate::common::intrinsic::Intrinsic;
+
 use super::argument::Argument;
 use super::indentation::Indentation;
-use super::intrinsic::IntrinsicDefinition;
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
 
 // The number of times each intrinsic will be called.
@@ -8,7 +9,7 @@ const PASSES: u32 = 20;
 
 pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
     w: &mut impl std::io::Write,
-    intrinsic: &dyn IntrinsicDefinition<T>,
+    intrinsic: &Intrinsic<T>,
     indentation: Indentation,
     additional: &str,
     passes: u32,
@@ -21,16 +22,18 @@ pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
             {body_indentation}auto __return_value = {intrinsic_call}({args});\n\
             {print_result}\n\
         {indentation}}}",
-        loaded_args = intrinsic.arguments().load_values_c(body_indentation),
-        intrinsic_call = intrinsic.name(),
-        args = intrinsic.arguments().as_call_param_c(),
-        print_result = intrinsic.print_result_c(body_indentation, additional)
+        loaded_args = intrinsic.arguments.load_values_c(body_indentation),
+        intrinsic_call = intrinsic.name,
+        args = intrinsic.arguments.as_call_param_c(),
+        print_result = intrinsic
+            .results
+            .print_result_c(body_indentation, additional)
     )
 }
 
 pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>(
     w: &mut impl std::io::Write,
-    intrinsic: &dyn IntrinsicDefinition<T>,
+    intrinsic: &Intrinsic<T>,
     indentation: Indentation,
     constraints: &mut (impl Iterator<Item = &'a Argument<T>> + Clone),
     name: String,
@@ -63,14 +66,14 @@ pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>(
 // Compiles C test programs using specified compiler
 pub fn create_c_test_function<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
-    intrinsic: &dyn IntrinsicDefinition<T>,
+    intrinsic: &Intrinsic<T>,
 ) -> std::io::Result<()> {
     let indentation = Indentation::default();
 
-    writeln!(w, "int run_{}() {{", intrinsic.name())?;
+    writeln!(w, "int run_{}() {{", intrinsic.name)?;
 
     // Define the arrays of arguments.
-    let arguments = intrinsic.arguments();
+    let arguments = &intrinsic.arguments;
     arguments.gen_arglists_c(w, indentation.nested(), PASSES)?;
 
     generate_c_constraint_blocks(
@@ -90,9 +93,9 @@ pub fn create_c_test_function<T: IntrinsicTypeDefinition>(
 pub fn write_mod_cpp<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
     notice: &str,
-    architecture: &str,
     platform_headers: &[&str],
-    intrinsics: &[impl IntrinsicDefinition<T>],
+    forward_declarations: &str,
+    intrinsics: &[Intrinsic<T>],
 ) -> std::io::Result<()> {
     write!(w, "{notice}")?;
 
@@ -122,12 +125,7 @@ std::ostream& operator<<(std::ostream& os, float16_t value);
 "#
     )?;
 
-    writeln!(w, "#ifdef __{architecture}__")?;
-    writeln!(
-        w,
-        "std::ostream& operator<<(std::ostream& os, poly128_t value);"
-    )?;
-    writeln!(w, "#endif")?;
+    writeln!(w, "{}", forward_declarations)?;
 
     for intrinsic in intrinsics {
         create_c_test_function(w, intrinsic)?;
@@ -138,7 +136,6 @@ std::ostream& operator<<(std::ostream& os, float16_t value);
 
 pub fn write_main_cpp<'a>(
     w: &mut impl std::io::Write,
-    architecture: &str,
     arch_specific_definitions: &str,
     intrinsics: impl Iterator<Item = &'a str> + Clone,
 ) -> std::io::Result<()> {
@@ -167,9 +164,8 @@ std::ostream& operator<<(std::ostream& os, float16_t value) {{
 "#
     )?;
 
-    writeln!(w, "#ifdef __{architecture}__")?;
+    // NOTE: It's assumed that this value contains the required `ifdef`s.
     writeln!(w, "{arch_specific_definitions }")?;
-    writeln!(w, "#endif")?;
 
     for intrinsic in intrinsics.clone() {
         writeln!(w, "extern int run_{intrinsic}(void);")?;
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
index 2a02b8fdff1df..c6b964a9ce4e4 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
@@ -1,8 +1,10 @@
 use itertools::Itertools;
 use std::process::Command;
 
+use crate::common::intrinsic::Intrinsic;
+
 use super::indentation::Indentation;
-use super::intrinsic::{IntrinsicDefinition, format_f16_return_value};
+use super::intrinsic::format_f16_return_value;
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
 
 // The number of times each intrinsic will be called.
@@ -96,11 +98,10 @@ pub fn write_main_rs<'a>(
 
 pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
-    architecture: &str,
     notice: &str,
     cfg: &str,
     definitions: &str,
-    intrinsics: &[impl IntrinsicDefinition<T>],
+    intrinsics: &[Intrinsic<T>],
 ) -> std::io::Result<()> {
     write!(w, "{notice}")?;
 
@@ -115,8 +116,6 @@ pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
 
     writeln!(w, "{cfg}")?;
 
-    writeln!(w, "use core_arch::arch::{architecture}::*;")?;
-
     writeln!(w, "{definitions}")?;
 
     for intrinsic in intrinsics {
@@ -189,16 +188,16 @@ pub fn compile_rust_programs(toolchain: Option<&str>, target: &str, linker: Opti
 
 pub fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
-    intrinsic: &dyn IntrinsicDefinition<T>,
+    intrinsic: &Intrinsic<T>,
     indentation: Indentation,
     specializations: &[Vec<u8>],
     passes: u32,
 ) -> std::io::Result<()> {
-    let intrinsic_name = intrinsic.name();
+    let intrinsic_name = &intrinsic.name;
 
     // Each function (and each specialization) has its own type. Erase that type with a cast.
     let mut coerce = String::from("unsafe fn(");
-    for _ in intrinsic.arguments().iter().filter(|a| !a.has_constraint()) {
+    for _ in intrinsic.arguments.iter().filter(|a| !a.has_constraint()) {
         coerce += "_, ";
     }
     coerce += ") -> _";
@@ -248,13 +247,13 @@ pub fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
                     }}\n\
                 }}\n\
             }}",
-        loaded_args = intrinsic.arguments().load_values_rust(indentation3),
-        args = intrinsic.arguments().as_call_param_rust(),
+        loaded_args = intrinsic.arguments.load_values_rust(indentation3),
+        args = intrinsic.arguments.as_call_param_rust(),
     )
 }
 
 /// Generate the specializations (unique sequences of const-generic arguments) for this intrinsic.
-fn generate_rust_specializations<'a>(
+fn generate_rust_specializations(
     constraints: &mut impl Iterator<Item = impl Iterator<Item = i64>>,
 ) -> Vec<Vec<u8>> {
     let mut specializations = vec![vec![]];
@@ -277,15 +276,15 @@ fn generate_rust_specializations<'a>(
 // Top-level function to create complete test program
 pub fn create_rust_test_module<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
-    intrinsic: &dyn IntrinsicDefinition<T>,
+    intrinsic: &Intrinsic<T>,
 ) -> std::io::Result<()> {
-    trace!("generating `{}`", intrinsic.name());
+    trace!("generating `{}`", intrinsic.name);
     let indentation = Indentation::default();
 
-    writeln!(w, "pub fn run_{}() {{", intrinsic.name())?;
+    writeln!(w, "pub fn run_{}() {{", intrinsic.name)?;
 
     // Define the arrays of arguments.
-    let arguments = intrinsic.arguments();
+    let arguments = &intrinsic.arguments;
     arguments.gen_arglists_rust(w, indentation.nested(), PASSES)?;
 
     // Define any const generics as `const` items, then generate the actual test loop.
diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs
index bc46ccfbac40c..95276d19b72f9 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs
@@ -1,5 +1,4 @@
 use super::argument::ArgumentList;
-use super::indentation::Indentation;
 use super::intrinsic_helpers::{IntrinsicTypeDefinition, TypeKind};
 
 /// An intrinsic
@@ -18,32 +17,14 @@ pub struct Intrinsic<T: IntrinsicTypeDefinition> {
     pub arch_tags: Vec<String>,
 }
 
-pub trait IntrinsicDefinition<T>
-where
-    T: IntrinsicTypeDefinition,
-{
-    fn arguments(&self) -> ArgumentList<T>;
-
-    fn results(&self) -> T;
-
-    fn name(&self) -> String;
-
-    /// Generates a std::cout for the intrinsics results that will match the
-    /// rust debug output format for the return type. The generated line assumes
-    /// there is an int i in scope which is the current pass number.
-    fn print_result_c(&self, _indentation: Indentation, _additional: &str) -> String;
-}
-
-pub fn format_f16_return_value<T: IntrinsicTypeDefinition>(
-    intrinsic: &dyn IntrinsicDefinition<T>,
-) -> String {
+pub fn format_f16_return_value<T: IntrinsicTypeDefinition>(intrinsic: &Intrinsic<T>) -> String {
     // the `intrinsic-test` crate compares the output of C and Rust intrinsics. Currently, It uses
     // a string representation of the output value to compare. In C, f16 values are currently printed
     // as hexadecimal integers. Since https://github.com/rust-lang/rust/pull/127013, rust does print
     // them as decimal floating point values. To keep the intrinsics tests working, for now, format
     // vectors containing f16 values like C prints them.
-    let return_value = match intrinsic.results().kind() {
-        TypeKind::Float if intrinsic.results().inner_size() == 16 => "debug_f16(__return_value)",
+    let return_value = match intrinsic.results.kind() {
+        TypeKind::Float if intrinsic.results.inner_size() == 16 => "debug_f16(__return_value)",
         _ => "format_args!(\"{__return_value:.150?}\")",
     };
 
diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
index f5e84ca97af21..7bc1015a387c1 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
@@ -325,4 +325,9 @@ pub trait IntrinsicTypeDefinition: Deref<Target = IntrinsicType> {
 
     /// can be directly defined in `impl` blocks
     fn c_single_vector_type(&self) -> String;
+
+    /// Generates a std::cout for the intrinsics results that will match the
+    /// rust debug output format for the return type. The generated line assumes
+    /// there is an int i in scope which is the current pass number.
+    fn print_result_c(&self, indentation: Indentation, additional: &str) -> String;
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/common/mod.rs b/library/stdarch/crates/intrinsic-test/src/common/mod.rs
index 5a57c8027db9b..666b3885c147b 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/mod.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/mod.rs
@@ -1,5 +1,20 @@
+use std::fs::File;
+
+use rayon::prelude::*;
+
 use cli::ProcessedCli;
 
+use crate::common::{
+    compile_c::CppCompilation,
+    gen_c::{write_main_cpp, write_mod_cpp},
+    gen_rust::{
+        compile_rust_programs, write_bin_cargo_toml, write_lib_cargo_toml, write_lib_rs,
+        write_main_rs,
+    },
+    intrinsic::Intrinsic,
+    intrinsic_helpers::IntrinsicTypeDefinition,
+};
+
 pub mod argument;
 pub mod cli;
 pub mod compare;
@@ -15,12 +30,162 @@ pub mod values;
 /// Architectures must support this trait
 /// to be successfully tested.
 pub trait SupportedArchitectureTest {
-    fn create(cli_options: ProcessedCli) -> Box<Self>
-    where
-        Self: Sized;
-    fn build_c_file(&self) -> bool;
-    fn build_rust_file(&self) -> bool;
-    fn compare_outputs(&self) -> bool;
+    type IntrinsicImpl: IntrinsicTypeDefinition + Sync;
+
+    fn cli_options(&self) -> &ProcessedCli;
+    fn intrinsics(&self) -> &[Intrinsic<Self::IntrinsicImpl>];
+
+    fn create(cli_options: ProcessedCli) -> Self;
+
+    const NOTICE: &str;
+
+    const PLATFORM_C_HEADERS: &[&str];
+    const PLATFORM_C_DEFINITIONS: &str;
+    const PLATFORM_C_FORWARD_DECLARATIONS: &str;
+
+    const PLATFORM_RUST_CFGS: &str;
+    const PLATFORM_RUST_DEFINITIONS: &str;
+
+    fn cpp_compilation(&self) -> Option<CppCompilation>;
+
+    fn build_c_file(&self) -> bool {
+        let (chunk_size, chunk_count) = chunk_info(self.intrinsics().len());
+
+        let cpp_compiler_wrapped = self.cpp_compilation();
+
+        std::fs::create_dir_all("c_programs").unwrap();
+        self.intrinsics()
+            .par_chunks(chunk_size)
+            .enumerate()
+            .map(|(i, chunk)| {
+                let c_filename = format!("c_programs/mod_{i}.cpp");
+                let mut file = File::create(&c_filename).unwrap();
+                write_mod_cpp(
+                    &mut file,
+                    Self::NOTICE,
+                    Self::PLATFORM_C_HEADERS,
+                    Self::PLATFORM_C_FORWARD_DECLARATIONS,
+                    chunk,
+                )
+                .unwrap();
+
+                // compile this cpp file into a .o file.
+                //
+                // This is done because `cpp_compiler_wrapped` is None when
+                // the --generate-only flag is passed
+                if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() {
+                    let output = cpp_compiler
+                        .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))?;
+                    assert!(output.status.success(), "{output:?}");
+                }
+
+                Ok(())
+            })
+            .collect::<Result<(), std::io::Error>>()
+            .unwrap();
+
+        let mut file = File::create("c_programs/main.cpp").unwrap();
+        write_main_cpp(
+            &mut file,
+            Self::PLATFORM_C_DEFINITIONS,
+            self.intrinsics().iter().map(|i| i.name.as_str()),
+        )
+        .unwrap();
+
+        // This is done because `cpp_compiler_wrapped` is None when
+        // the --generate-only flag is passed
+        if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() {
+            // compile this cpp file into a .o file
+            info!("compiling main.cpp");
+            let output = cpp_compiler
+                .compile_object_file("main.cpp", "intrinsic-test-programs.o")
+                .unwrap();
+            assert!(output.status.success(), "{output:?}");
+
+            let object_files = (0..chunk_count)
+                .map(|i| format!("mod_{i}.o"))
+                .chain(["intrinsic-test-programs.o".to_owned()]);
+
+            let output = cpp_compiler
+                .link_executable(object_files, "intrinsic-test-programs")
+                .unwrap();
+            assert!(output.status.success(), "{output:?}");
+        }
+
+        true
+    }
+
+    fn build_rust_file(&self) -> bool {
+        std::fs::create_dir_all("rust_programs/src").unwrap();
+
+        let (chunk_size, chunk_count) = chunk_info(self.intrinsics().len());
+
+        let mut cargo = File::create("rust_programs/Cargo.toml").unwrap();
+        write_bin_cargo_toml(&mut cargo, chunk_count).unwrap();
+
+        let mut main_rs = File::create("rust_programs/src/main.rs").unwrap();
+        write_main_rs(
+            &mut main_rs,
+            chunk_count,
+            Self::PLATFORM_RUST_CFGS,
+            "",
+            self.intrinsics().iter().map(|i| i.name.as_str()),
+        )
+        .unwrap();
+
+        let target = &self.cli_options().target;
+        let toolchain = self.cli_options().toolchain.as_deref();
+        let linker = self.cli_options().linker.as_deref();
+
+        self.intrinsics()
+            .par_chunks(chunk_size)
+            .enumerate()
+            .map(|(i, chunk)| {
+                std::fs::create_dir_all(format!("rust_programs/mod_{i}/src"))?;
+
+                let rust_filename = format!("rust_programs/mod_{i}/src/lib.rs");
+                trace!("generating `{rust_filename}`");
+                let mut file = File::create(rust_filename)?;
+
+                write_lib_rs(
+                    &mut file,
+                    Self::NOTICE,
+                    Self::PLATFORM_RUST_CFGS,
+                    Self::PLATFORM_RUST_DEFINITIONS,
+                    chunk,
+                )?;
+
+                let toml_filename = format!("rust_programs/mod_{i}/Cargo.toml");
+                trace!("generating `{toml_filename}`");
+                let mut file = File::create(toml_filename).unwrap();
+
+                write_lib_cargo_toml(&mut file, &format!("mod_{i}"))?;
+
+                Ok(())
+            })
+            .collect::<Result<(), std::io::Error>>()
+            .unwrap();
+
+        compile_rust_programs(toolchain, target, linker)
+    }
+
+    fn compare_outputs(&self) -> bool {
+        if self.cli_options().toolchain.is_some() {
+            let intrinsics_name_list = self
+                .intrinsics()
+                .iter()
+                .map(|i| i.name.clone())
+                .collect::<Vec<_>>();
+
+            compare::compare_outputs(
+                &intrinsics_name_list,
+                &self.cli_options().runner,
+                &self.cli_options().target,
+            )
+        } else {
+            true
+        }
+    }
 }
 
 pub fn chunk_info(intrinsic_count: usize) -> (usize, usize) {
diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs
index 538f317a2978b..44d7aafd827fe 100644
--- a/library/stdarch/crates/intrinsic-test/src/main.rs
+++ b/library/stdarch/crates/intrinsic-test/src/main.rs
@@ -13,23 +13,16 @@ fn main() {
     let args: Cli = clap::Parser::parse();
     let processed_cli_options = ProcessedCli::new(args);
 
-    let test_environment_result: Option<Box<dyn SupportedArchitectureTest>> =
-        match processed_cli_options.target.as_str() {
-            "aarch64-unknown-linux-gnu"
-            | "armv7-unknown-linux-gnueabihf"
-            | "aarch64_be-unknown-linux-gnu" => {
-                Some(ArmArchitectureTest::create(processed_cli_options))
-            }
+    match processed_cli_options.target.as_str() {
+        "aarch64-unknown-linux-gnu"
+        | "armv7-unknown-linux-gnueabihf"
+        | "aarch64_be-unknown-linux-gnu" => run(ArmArchitectureTest::create(processed_cli_options)),
 
-            _ => None,
-        };
-
-    if test_environment_result.is_none() {
-        std::process::exit(0);
+        _ => std::process::exit(0),
     }
+}
 
-    let test_environment = test_environment_result.unwrap();
-
+fn run(test_environment: impl SupportedArchitectureTest) {
     info!("building C binaries");
     if !test_environment.build_c_file() {
         std::process::exit(2);
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index a31613e6b1aef..ccdcea980e1b2 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -17,6 +17,10 @@ neon-stable: &neon-stable
 target-not-arm: &target-not-arm
   FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm"']]}]]
 
+# #[cfg(not(target_arch = "arm64ec"))]
+target-not-arm64ec: &target-not-arm64ec
+  FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm64ec"']]}]]
+
 # #[cfg_attr(all(test, not(target_env = "msvc"))]
 msvc-disabled: &msvc-disabled
   FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]
@@ -169,6 +173,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fabd]
     safety: safe
     types:
@@ -368,6 +373,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16"]
@@ -559,6 +565,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16"]
@@ -642,6 +649,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16"]
@@ -777,6 +785,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16"]
@@ -859,6 +868,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16"]
@@ -931,6 +941,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16", i32]
@@ -988,6 +999,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16", i32]
@@ -1036,6 +1048,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16"]
@@ -1078,6 +1091,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16"]
@@ -1175,6 +1189,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1202,6 +1217,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1219,6 +1235,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1246,6 +1263,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1264,6 +1282,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1386,6 +1405,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["s16", "f16", "h_f16_s16", i16]
@@ -1402,6 +1422,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "s16", "h", i16, 'a as i16']
@@ -1418,6 +1439,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "u16", "h", 'a as u16']
@@ -1435,6 +1457,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["u16", "f16", "h_f16_u16"]
@@ -1486,6 +1509,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x8_t, float16x4_t, float32x4_t]
@@ -1503,6 +1527,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float32x4_t, float16x8_t]
@@ -1650,6 +1675,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1675,6 +1701,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1693,6 +1720,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1783,6 +1811,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, int16x4_t]
@@ -1821,6 +1850,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "u32", 'h_u32_f16']
@@ -1842,6 +1872,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "i32", 'h_s32_f16']
@@ -1863,6 +1894,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "i16", 'h_s16_f16', 's32']
@@ -1877,6 +1909,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "u16", 'h_u16_f16', 'u32']
@@ -1948,6 +1981,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, int16x4_t]
@@ -1968,6 +2002,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -1987,6 +2022,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "i32", 'h']
@@ -2007,6 +2043,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "i16", 'h', 'i32']
@@ -2022,6 +2059,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "u32", 'h']
@@ -2042,6 +2080,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "u16", 'h', 'u32']
@@ -2077,6 +2116,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, int16x4_t]
@@ -2097,6 +2137,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -2291,6 +2332,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, int16x4_t]
@@ -2311,6 +2353,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -2331,6 +2374,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "i32", 'h']
@@ -2351,6 +2395,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "i16", 'h', 'i32']
@@ -2365,6 +2410,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "u32", 'h']
@@ -2385,6 +2431,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "u16", 'h', 'u32']
@@ -2531,6 +2578,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2549,6 +2597,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2770,6 +2819,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fneg]
     safety: safe
     types:
@@ -2986,6 +3036,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frintx]
     safety: safe
     types:
@@ -3002,6 +3053,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frintx]
     safety: safe
     types:
@@ -3033,6 +3085,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frinta]
     safety: safe
     types:
@@ -3049,6 +3102,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frinta]
     safety: safe
     types:
@@ -3096,6 +3150,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frintn]
     safety: safe
     types:
@@ -3130,6 +3185,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frintm]
     safety: safe
     types:
@@ -3146,6 +3202,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frintm]
     safety: safe
     types:
@@ -3178,6 +3235,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frintp]
     safety: safe
     types:
@@ -3193,6 +3251,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frintp]
     safety: safe
     types:
@@ -3222,6 +3281,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frintz]
     safety: safe
     types:
@@ -3238,6 +3298,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frintz]
     safety: safe
     types:
@@ -3273,6 +3334,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [frinti]
     safety: safe
     types:
@@ -3293,6 +3355,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     # TODO: double check me
     assert_instr: [frinti]
     safety: safe
@@ -5205,6 +5268,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmulx]
     safety: safe
     types:
@@ -5243,6 +5307,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmulx]
     safety: safe
     types:
@@ -5385,6 +5450,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
@@ -5438,6 +5504,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
@@ -5462,6 +5529,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, "f16"]
@@ -5546,6 +5614,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmla]
     safety: safe
     types:
@@ -5582,6 +5651,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fdiv]
     safety: safe
     types:
@@ -5597,6 +5667,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
     types:
@@ -5637,6 +5708,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
     types:
@@ -5928,6 +6000,7 @@ intrinsics:
       - *neon-fp16
       - *enable-fcma
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fcadd]
     safety: safe
     types:
@@ -5948,6 +6021,7 @@ intrinsics:
       - *neon-fp16
       - *enable-fcma
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fcadd]
     safety: safe
     types:
@@ -5988,6 +6062,7 @@ intrinsics:
       - FnCall: [target_feature, ['enable = "neon,fcma"']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
     types:
@@ -6028,6 +6103,7 @@ intrinsics:
       - FnCall: [target_feature, ['enable = "neon,fcma"']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
     types:
@@ -6069,6 +6145,7 @@ intrinsics:
       - FnCall: [target_feature, ['enable = "neon,fcma"']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
     types:
@@ -6113,6 +6190,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
@@ -6158,6 +6236,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
@@ -6203,6 +6282,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
@@ -6245,6 +6325,7 @@ intrinsics:
       - FnCall: [target_feature, ['enable = "neon,fcma"']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
     types:
@@ -6290,6 +6371,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
@@ -6337,6 +6419,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
@@ -6384,6 +6467,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
@@ -6430,6 +6514,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
@@ -6473,6 +6558,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
@@ -6568,6 +6654,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmax]
     safety: safe
     types:
@@ -6601,6 +6688,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmaxnm]
     safety: safe
     types:
@@ -6616,6 +6704,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fminnm]
     safety: safe
     types:
@@ -6657,6 +6746,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmaxnmv]
     safety: safe
     types:
@@ -6673,6 +6763,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fminnmv]
     safety: safe
     types:
@@ -6689,6 +6780,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmaxv]
     safety: safe
     types:
@@ -6708,6 +6800,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fminv]
     safety: safe
     types:
@@ -6762,6 +6855,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmin]
     safety: safe
     types:
@@ -6875,6 +6969,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [faddp]
     safety: safe
     types:
@@ -6894,6 +6989,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmaxp]
     safety: safe
     types:
@@ -6914,6 +7010,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmaxnmp]
     safety: safe
     types:
@@ -6934,6 +7031,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fminp]
     safety: safe
     types:
@@ -6954,6 +7052,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fminnmp]
     safety: safe
     types:
@@ -8379,6 +8478,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fsqrt]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -8393,6 +8493,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fsqrt]
     safety: safe
     types:
@@ -8445,6 +8546,7 @@ intrinsics:
       - *neon-fp16
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [h_f16, "f16"]
@@ -8501,6 +8603,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [h_f16, "f16"]
@@ -8557,6 +8660,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [h_f16, "f16"]
@@ -8595,6 +8699,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpx]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [h_f16, "f16"]
@@ -8676,6 +8781,7 @@ intrinsics:
       - [float64x1_t, float32x2_t]
       - [float32x4_t, float64x2_t]
       - [float64x2_t, float32x4_t]
+    big_endian_inverse: false
     compose:
       - FnCall: [transmute, [a]]
 
@@ -8687,6 +8793,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
     types:
@@ -8695,6 +8802,7 @@ intrinsics:
       # q
       - [float64x2_t, float16x8_t]
       - [float16x8_t, float64x2_t]
+    big_endian_inverse: false
     compose:
       - FnCall: [transmute, [a]]
 
@@ -9586,6 +9694,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
       - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]]
     safety: safe
     types:
@@ -9647,6 +9756,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
       - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]]
     safety: safe
     types:
@@ -9715,6 +9825,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
       - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
     types:
@@ -9765,6 +9876,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
       - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
     types:
@@ -9826,6 +9938,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
       - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]]
     safety: safe
     types:
@@ -9891,6 +10004,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
       - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]]
     safety: safe
     types:
@@ -10180,6 +10294,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10206,6 +10321,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10230,6 +10346,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "h_f16"]
@@ -10342,6 +10459,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "h_f16"]
@@ -10358,6 +10476,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10377,6 +10496,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10541,6 +10661,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t, 'f16x4', 'f16x4::new(0.0, 0.0, 0.0, 0.0)']
@@ -10576,6 +10697,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "u16", "h_f16"]
@@ -10700,6 +10822,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16"]
@@ -10842,6 +10965,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16"]
@@ -10972,6 +11096,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16", "u16"]
@@ -11126,6 +11251,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11146,6 +11272,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
     types:
@@ -11183,6 +11310,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11328,6 +11456,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "u16", 'h_f16']
@@ -11399,6 +11528,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmls]
     safety: safe
     types:
@@ -11651,6 +11781,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["h_f16", "f16"]
@@ -11734,6 +11865,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -11772,6 +11904,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "i32", 'h']
@@ -11792,6 +11925,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "i16", 'h', 'i32']
@@ -11807,6 +11941,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "u32", 'h']
@@ -11827,6 +11962,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["f16", "u16", 'h', 'u32']
@@ -12857,6 +12993,7 @@ intrinsics:
       - FnCall: [target_feature, ['enable = "{type[2]}"']]
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ldr]]}]]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -12924,6 +13061,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [str]]}]]
       - FnCall: [allow, ['clippy::cast_ptr_alignment']]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -13637,6 +13775,7 @@ intrinsics:
       - *neon-fp16
       - *enable-fhm
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmlal2]
     safety: safe
     types:
@@ -13660,6 +13799,7 @@ intrinsics:
       - *enable-fhm
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -13684,6 +13824,7 @@ intrinsics:
       - *neon-fp16
       - *enable-fhm
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmlal]
     safety: safe
     types:
@@ -13707,6 +13848,7 @@ intrinsics:
       - *enable-fhm
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -13731,6 +13873,7 @@ intrinsics:
       - *neon-fp16
       - *enable-fhm
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmlsl2]
     safety: safe
     types:
@@ -13753,6 +13896,7 @@ intrinsics:
       - *enable-fhm
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -13777,6 +13921,7 @@ intrinsics:
       - *neon-fp16
       - *enable-fhm
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [fmlsl]
     safety: safe
     types:
@@ -13799,6 +13944,7 @@ intrinsics:
       - *enable-fhm
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index c96c6e2a0c0b5..61a3a5853632c 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -37,6 +37,10 @@ target-is-arm: &target-is-arm
 target-not-arm: &target-not-arm
   FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm"']]}]]
 
+# #[cfg(not(target_arch = "arm64ec"))]
+target-not-arm64ec: &target-not-arm64ec
+  FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm64ec"']]}]]
+
 not-arm: &not-arm
   FnCall: [not, ['target_arch = "arm"']]
 
@@ -278,6 +282,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabd]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -396,6 +401,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmeq]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -457,6 +463,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabs]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -474,6 +481,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabs]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ['h_f16', 'f16']
@@ -555,6 +563,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmgt]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -573,6 +582,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmgt]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)']
@@ -651,6 +661,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmge]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -668,6 +679,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmle]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)']
@@ -849,6 +861,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facgt]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -895,6 +908,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facge]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -935,6 +949,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facgt]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -970,6 +985,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facge]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -1004,6 +1020,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [scvtf]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [int16x4_t, float16x4_t]
@@ -1038,6 +1055,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ucvtf]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [uint16x4_t, float16x4_t]
@@ -1109,6 +1127,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1140,6 +1159,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1171,6 +1191,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1229,6 +1250,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1482,6 +1504,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1501,6 +1524,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, f16, 'float16x4', '_n_']
@@ -1519,6 +1543,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1740,6 +1765,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1758,6 +1784,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2207,6 +2234,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fneg]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, 'f16']
@@ -2262,13 +2290,10 @@ intrinsics:
       - [uint64x1_t, u64, i64]
       - [uint64x2_t, u64, i64]
     compose:
-      - LLVMLink:
-          name: "uqsub.{neon_type[0]}"
-          links:
-            - link: "llvm.aarch64.neon.uqsub.v{neon_type[0].lane}{type[2]}"
-              arch: aarch64,arm64ec
-            - link: "llvm.usub.sat.v{neon_type[0].lane}{type[2]}"
-              arch: arm
+      - FnCall:
+        - simd_saturating_sub
+        - - a
+          - b
 
   - name: "vqsub{neon_type[0].no}"
     doc: Saturating subtract
@@ -2291,13 +2316,10 @@ intrinsics:
       - [int64x1_t, s64, i64]
       - [int64x2_t, s64, i64]
     compose:
-      - LLVMLink:
-          name: "sqsub.{neon_type[0]}"
-          links:
-            - link: "llvm.aarch64.neon.sqsub.v{neon_type[0].lane}{type[2]}"
-              arch: aarch64,arm64ec
-            - link: "llvm.ssub.sat.v{neon_type[0].lane}{type[2]}"
-              arch: arm
+      - FnCall:
+        - simd_saturating_sub
+        - - a
+          - b
 
   - name: "vhadd{neon_type.no}"
     doc: Halving add
@@ -2464,9 +2486,7 @@ intrinsics:
           name: "llvm.frinn.{neon_type}"
           links:
             - link: "llvm.roundeven.{neon_type}"
-              arch: aarch64,arm64ec
-            - link: "llvm.arm.neon.vrintn.{neon_type}"
-              arch: arm
+              arch: aarch64,arm64ec,arm
 
   - name: "vrndn{neon_type.no}"
     doc: "Floating-point round to integral, to nearest with ties to even"
@@ -2478,6 +2498,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frintn]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -2487,9 +2508,7 @@ intrinsics:
           name: "llvm.frinn.{neon_type}"
           links:
             - link: "llvm.roundeven.{neon_type}"
-              arch: aarch64,arm64ec
-            - link: "llvm.arm.neon.vrintn.{neon_type}"
-              arch: arm
+              arch: aarch64,arm64ec,arm
 
   - name: "vqadd{neon_type.no}"
     doc: Saturating add
@@ -2512,13 +2531,10 @@ intrinsics:
       - uint64x1_t
       - uint64x2_t
     compose:
-      - LLVMLink:
-          name: "uqadd.{neon_type}"
-          links:
-            - link: "llvm.aarch64.neon.uqadd.{neon_type}"
-              arch: aarch64,arm64ec
-            - link: "llvm.uadd.sat.{neon_type}"
-              arch: arm
+      - FnCall:
+        - simd_saturating_add
+        - - a
+          - b
 
   - name: "vqadd{neon_type.no}"
     doc: Saturating add
@@ -2541,13 +2557,10 @@ intrinsics:
       - int64x1_t
       - int64x2_t
     compose:
-      - LLVMLink:
-          name: "sqadd.{neon_type}"
-          links:
-            - link: "llvm.aarch64.neon.sqadd.{neon_type}"
-              arch: aarch64,arm64ec
-            - link: "llvm.sadd.sat.{neon_type}"
-              arch: arm
+      - FnCall:
+        - simd_saturating_add
+        - - a
+          - b
 
   - name: "vld1{neon_type[1].no}"
     doc: "Load multiple single-element structures to one, two, three, or four registers"
@@ -2743,6 +2756,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -2773,6 +2787,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ["2"]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety:
       unsafe: [neon]
@@ -2793,6 +2808,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1r]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -3385,6 +3401,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -3413,6 +3430,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -3440,6 +3458,7 @@ intrinsics:
       - *neon-fp16
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -3469,6 +3488,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2r]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -3498,6 +3518,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ["2"]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs:
       - "const LANE: i32"
     safety:
@@ -3540,6 +3561,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ["2"]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs:
       - "const LANE: i32"
     safety:
@@ -3580,6 +3602,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -3608,6 +3631,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -3635,6 +3659,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -3664,6 +3689,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3r]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -3693,6 +3719,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ["2"]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs:
       - "const LANE: i32"
     safety:
@@ -3737,6 +3764,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ["2"]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs:
       - "const LANE: i32"
     safety:
@@ -4718,6 +4746,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ["2"]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     types:
       - ['*mut f16', float16x4_t, '2']
       - ['*mut f16', float16x8_t, '3']
@@ -4955,6 +4984,7 @@ intrinsics:
       - *neon-v7
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [vst1]
     types:
       - [f16, float16x4x4_t, float16x4_t]
@@ -5098,6 +5128,7 @@ intrinsics:
       - *target-not-arm
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [st2]
     safety:
       unsafe: [neon]
@@ -5188,6 +5219,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety:
       unsafe: [neon]
@@ -5279,6 +5311,7 @@ intrinsics:
       - *neon-v7
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [vst2]
     safety:
       unsafe: [neon]
@@ -5345,6 +5378,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety:
       unsafe: [neon]
@@ -5555,6 +5589,7 @@ intrinsics:
       - *neon-v7
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [vst3]
     safety:
       unsafe: [neon]
@@ -5623,6 +5658,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety:
       unsafe: [neon]
@@ -5683,6 +5719,7 @@ intrinsics:
       - *target-not-arm
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [st3]
     safety:
       unsafe: [neon]
@@ -5747,6 +5784,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety:
       unsafe: [neon]
@@ -5960,6 +5998,7 @@ intrinsics:
       - *neon-v7
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [vst4]
     safety:
       unsafe: [neon]
@@ -6029,6 +6068,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety:
       unsafe: [neon]
@@ -6091,6 +6131,7 @@ intrinsics:
       - *target-not-arm
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [st4]
     safety:
       unsafe: [neon]
@@ -6157,6 +6198,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety:
       unsafe: [neon]
@@ -6317,6 +6359,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [f16, float16x4_t]
@@ -6366,6 +6409,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
@@ -6569,6 +6613,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmla]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -6678,6 +6723,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fsub]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ['f16', float16x4_t]
@@ -6696,6 +6742,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fadd]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -6716,6 +6763,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fadd]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ['h_f16', 'f16']
@@ -7194,6 +7242,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmax]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -7236,6 +7285,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmaxnm]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -7254,6 +7304,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fminnm]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -7340,6 +7391,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmin]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -7404,6 +7456,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [faddp]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -8247,6 +8300,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsqrts]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frsqrts]]}]]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -8295,6 +8349,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frecpe]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -8343,6 +8398,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frecps]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -8424,6 +8480,7 @@ intrinsics:
       - [poly16x8_t, p128]
       - [int8x16_t, p128]
       - [uint8x16_t, p128]
+    big_endian_inverse: false
     compose:
       - FnCall: [transmute, [a]]
 
@@ -8661,6 +8718,7 @@ intrinsics:
       - [poly8x16_t, float32x4_t]
       - [poly16x8_t, float32x4_t]
       - [p128, float32x4_t]
+    big_endian_inverse: false
     compose:
       - FnCall: [transmute, [a]]
 
@@ -8675,6 +8733,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       # non-q
@@ -8723,6 +8782,7 @@ intrinsics:
       - [float16x8_t, uint16x8_t]
       - [float16x8_t, uint32x4_t]
       - [float16x8_t, uint64x2_t]
+    big_endian_inverse: false
     compose:
       - FnCall: [transmute, [a]]
 
@@ -8737,6 +8797,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [poly64x1_t, float16x4_t]
@@ -8746,6 +8807,7 @@ intrinsics:
       - [poly128_t, float16x8_t]
       - [float16x8_t, poly128_t]
       - [float16x8_t, poly64x2_t]
+    big_endian_inverse: false
     compose:
       - FnCall: [transmute, [a]]
 
@@ -8759,6 +8821,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [rev64]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, "[3, 2, 1, 0]"]
@@ -9074,6 +9137,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ["u64", float16x4_t]
@@ -9147,6 +9211,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -9578,6 +9643,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn2]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, float16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]']
@@ -9733,6 +9799,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, float16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]']
@@ -9803,6 +9870,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp2]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, float16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]']
@@ -10050,6 +10118,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst1]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -10077,6 +10146,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -10103,6 +10173,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst1]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -10179,6 +10250,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -10233,6 +10305,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -10376,6 +10449,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmge]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -10394,6 +10468,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmge]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)']
@@ -10633,6 +10708,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzu]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -10652,6 +10728,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtn]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float32x4_t, float16x4_t]
@@ -10668,6 +10745,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtl]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, float32x4_t]
@@ -11029,6 +11107,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, "f16"]
@@ -11141,6 +11220,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmgt]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t]
@@ -11159,6 +11239,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmlt]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)']
@@ -11271,6 +11352,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmls]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -11386,6 +11468,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsqrte]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frsqrte]]}]]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
@@ -11499,6 +11582,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzs]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, int16x4_t]
@@ -11748,6 +11832,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -11834,6 +11919,7 @@ intrinsics:
       - FnCall: [target_feature, ['enable = "{type[3]}"']]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
     types:
       - ['*const f16', float16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::<f16>() as i32', '_v4f16']
@@ -12117,6 +12203,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -12145,6 +12232,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -12172,6 +12260,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -12201,6 +12290,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4r]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
@@ -12230,6 +12320,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ["2"]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs:
       - "const LANE: i32"
     safety:
@@ -12276,6 +12367,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ["2"]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs:
       - "const LANE: i32"
     safety:
@@ -13674,6 +13766,7 @@ intrinsics:
       - *neon-v7
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[4]}"']]}]]
     types:
       - ['_v4f16', '* const i8', float16x4_t, i32, '16']
@@ -13738,6 +13831,7 @@ intrinsics:
       - *neon-v7
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[2]}"']]}]]
     types:
       - ['*mut f16', float16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::<f16>() as i32', '_v4f16']
@@ -13918,6 +14012,7 @@ intrinsics:
       - *neon-v7
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
     types:
@@ -13933,6 +14028,7 @@ intrinsics:
       - *neon-v7
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
     types:
@@ -13952,6 +14048,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ["1"]]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -13971,6 +14068,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup]]}]]
       - *neon-fp16
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - [float16x4_t, f16]
@@ -14585,6 +14683,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vbsl']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['bsl']]}]]
       - *neon-unstable-f16
+      - *target-not-arm64ec
     safety: safe
     types:
       - ['vbslq_f16', 'uint16x8_t', 'float16x8_t', 'int16x8_t::splat(-1)']
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs b/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs
index 3f3bfed132c76..0f4de83dacb4a 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs
@@ -85,7 +85,7 @@ pub fn generate_load_store_tests(
         TokenStream::from_str(&PREAMBLE).map_err(|e| format!("Preamble is invalid: {e}"))?;
     // Only output manual tests for the SVE set
     let manual_tests = match &load_intrinsics[0].target_features[..] {
-        [s] if s == "sve" => TokenStream::from_str(&MANUAL_TESTS)
+        [s] if s == "sve" => TokenStream::from_str(MANUAL_TESTS)
             .map_err(|e| format!("Manual tests are invalid: {e}"))?,
         _ => quote!(),
     };
diff --git a/library/stdarch/crates/stdarch-test/src/disassembly.rs b/library/stdarch/crates/stdarch-test/src/disassembly.rs
index f5167ea8d8ef3..4c136cff02ae6 100644
--- a/library/stdarch/crates/stdarch-test/src/disassembly.rs
+++ b/library/stdarch/crates/stdarch-test/src/disassembly.rs
@@ -27,9 +27,9 @@ fn normalize(mut symbol: &str) -> String {
         symbol = symbol[last_colon + 1..].to_string();
     }
 
-    // Normalize to no leading underscore to handle platforms that may
+    // Normalize to no leading mangling chars to handle platforms that may
     // inject extra ones in symbol names.
-    while symbol.starts_with('_') || symbol.starts_with('.') {
+    while symbol.starts_with('_') || symbol.starts_with('.') || symbol.starts_with('#') {
         symbol.remove(0);
     }
     // Windows/x86 has a suffix such as @@4.
@@ -49,6 +49,8 @@ pub(crate) fn disassemble_myself() -> HashSet<Function> {
         "i686-pc-windows-msvc"
     } else if cfg!(target_arch = "aarch64") {
         "aarch64-pc-windows-msvc"
+    } else if cfg!(target_arch = "arm64ec") {
+        "arm64ec-pc-windows-msvc"
     } else {
         panic!("disassembly unimplemented")
     };
diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs
index 95ffbaf666cea..621f55bc0951f 100644
--- a/library/stdarch/examples/hex.rs
+++ b/library/stdarch/examples/hex.rs
@@ -22,7 +22,6 @@
 #![allow(
     clippy::unwrap_used,
     clippy::print_stdout,
-    clippy::unwrap_used,
     clippy::shadow_reuse,
     clippy::cast_possible_wrap,
     clippy::cast_ptr_alignment,
diff --git a/src/bootstrap/src/core/builder/cargo.rs b/src/bootstrap/src/core/builder/cargo.rs
index 6121bf7d9cd62..ee2bb710674c0 100644
--- a/src/bootstrap/src/core/builder/cargo.rs
+++ b/src/bootstrap/src/core/builder/cargo.rs
@@ -1227,6 +1227,11 @@ impl Builder<'_> {
             rustflags.arg("-Zehcont-guard");
         }
 
+        // Optionally override the rc.exe when compiling rustc on Windows.
+        if let Some(windows_rc) = &self.config.windows_rc {
+            cargo.env("RUSTC_WINDOWS_RC", windows_rc);
+        }
+
         // For `cargo doc` invocations, make rustdoc print the Rust version into the docs
         // This replaces spaces with tabs because RUSTDOCFLAGS does not
         // support arguments with regular spaces. Hopefully someday Cargo will
diff --git a/src/bootstrap/src/core/config/config.rs b/src/bootstrap/src/core/config/config.rs
index efb7ad9169971..a97399b3d4ae9 100644
--- a/src/bootstrap/src/core/config/config.rs
+++ b/src/bootstrap/src/core/config/config.rs
@@ -273,6 +273,7 @@ pub struct Config {
     pub gdb: Option<PathBuf>,
     pub lldb: Option<PathBuf>,
     pub python: Option<PathBuf>,
+    pub windows_rc: Option<PathBuf>,
     pub reuse: Option<PathBuf>,
     pub cargo_native_static: bool,
     pub configure_args: Vec<String>,
@@ -450,6 +451,7 @@ impl Config {
             nodejs: build_nodejs,
             npm: build_npm,
             python: build_python,
+            windows_rc: build_windows_rc,
             reuse: build_reuse,
             locked_deps: build_locked_deps,
             vendor: build_vendor,
@@ -1342,6 +1344,7 @@ impl Config {
                 .unwrap_or(rust_debug == Some(true)),
             vendor,
             verbose_tests,
+            windows_rc: build_windows_rc.map(PathBuf::from),
             // tidy-alphabetical-end
         }
     }
diff --git a/src/bootstrap/src/core/config/toml/build.rs b/src/bootstrap/src/core/config/toml/build.rs
index 25c19f1070a39..a9d4d3961c9b7 100644
--- a/src/bootstrap/src/core/config/toml/build.rs
+++ b/src/bootstrap/src/core/config/toml/build.rs
@@ -37,6 +37,7 @@ define_config! {
         nodejs: Option<String> = "nodejs",
         npm: Option<String> = "npm",
         python: Option<String> = "python",
+        windows_rc: Option<String> = "windows-rc",
         reuse: Option<String> = "reuse",
         locked_deps: Option<bool> = "locked-deps",
         vendor: Option<bool> = "vendor",
diff --git a/src/bootstrap/src/utils/change_tracker.rs b/src/bootstrap/src/utils/change_tracker.rs
index 2c48cebd2df05..6b187578c31ec 100644
--- a/src/bootstrap/src/utils/change_tracker.rs
+++ b/src/bootstrap/src/utils/change_tracker.rs
@@ -551,4 +551,9 @@ pub const CONFIG_CHANGE_HISTORY: &[ChangeInfo] = &[
         severity: ChangeSeverity::Info,
         summary: "There is now a bootstrap option called `rust.parallel-frontend-threads`, which can be used to set the number of threads for the compiler frontend used during compilation of Rust code.",
     },
+    ChangeInfo {
+        change_id: 146663,
+        severity: ChangeSeverity::Info,
+        summary: "New option `build.windows-rc` that will override which resource compiler on Windows will be used to compile Rust.",
+    },
 ];
diff --git a/src/librustdoc/html/static/js/rustdoc.d.ts b/src/librustdoc/html/static/js/rustdoc.d.ts
index 938ccc7d2c32e..951eb2291b89e 100644
--- a/src/librustdoc/html/static/js/rustdoc.d.ts
+++ b/src/librustdoc/html/static/js/rustdoc.d.ts
@@ -348,7 +348,7 @@ declare namespace rustdoc {
         returned: rustdoc.QueryElement[],
         is_alias: boolean,
         alias?: string,
-        original?: rustdoc.Rlow,
+        item: rustdoc.Row,
     }
 
     /**
@@ -533,4 +533,27 @@ declare namespace rustdoc {
      * Generated by `render_call_locations` in `render/mod.rs`.
      */
     type ScrapedLoc = [[number, number], string, string]
+
+    /**
+     * Each of these identifiers are used specially by
+     * type-driven search. Most of them are lang items
+     * in the compiler.
+     */
+    type TypeNameIds = {
+        "typeNameIdOfOutput": number,
+        "typeNameIdOfFnPtr": number,
+        "typeNameIdOfFn": number,
+        "typeNameIdOfFnMut": number,
+        "typeNameIdOfFnOnce": number,
+        "typeNameIdOfArray": number,
+        "typeNameIdOfSlice": number,
+        "typeNameIdOfArrayOrSlice": number,
+        "typeNameIdOfTuple": number,
+        "typeNameIdOfUnit": number,
+        "typeNameIdOfTupleOrUnit": number,
+        "typeNameIdOfReference": number,
+        "typeNameIdOfPointer": number,
+        "typeNameIdOfHof": number,
+        "typeNameIdOfNever": number,
+    };
 }
diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js
index 3b84ae2bed060..482134933a61e 100644
--- a/src/librustdoc/html/static/js/search.js
+++ b/src/librustdoc/html/static/js/search.js
@@ -1190,17 +1190,6 @@ class DocSearch {
         this.rootPath = rootPath;
         this.database = database;
 
-        this.typeNameIdOfOutput = -1;
-        this.typeNameIdOfArray = -1;
-        this.typeNameIdOfSlice = -1;
-        this.typeNameIdOfArrayOrSlice = -1;
-        this.typeNameIdOfTuple = -1;
-        this.typeNameIdOfUnit = -1;
-        this.typeNameIdOfTupleOrUnit = -1;
-        this.typeNameIdOfReference = -1;
-        this.typeNameIdOfPointer = -1;
-        this.typeNameIdOfHof = -1;
-
         this.utf8decoder = new TextDecoder();
 
         /** @type {Map<number|null, rustdoc.FunctionType>} */
@@ -1208,14 +1197,49 @@ class DocSearch {
     }
 
     /**
-     * Load search index. If you do not call this function, `execQuery`
-     * will never fulfill.
+     * Load type name ID set.
+     *
+     * Each of these identifiers are used specially by
+     * type-driven search. Most of them are lang items
+     * in the compiler.
+     *
+     * Use this function, which caches the result, and not
+     * getTypeNameIdsAsync, which is an internal implementation
+     * detail for this.
+     *
+     * @return {Promise<rustdoc.TypeNameIds>|rustdoc.TypeNameIds}
      */
-    async buildIndex() {
+    getTypeNameIds() {
+        if (this.typeNameIds) {
+            return this.typeNameIds;
+        }
         const nn = this.database.getData("normalizedName");
         if (!nn) {
-            return;
+            return {
+                typeNameIdOfOutput: -1,
+                typeNameIdOfFnPtr: -1,
+                typeNameIdOfFn: -1,
+                typeNameIdOfFnMut: -1,
+                typeNameIdOfFnOnce: -1,
+                typeNameIdOfArray: -1,
+                typeNameIdOfSlice: -1,
+                typeNameIdOfArrayOrSlice: -1,
+                typeNameIdOfTuple: -1,
+                typeNameIdOfUnit: -1,
+                typeNameIdOfTupleOrUnit: -1,
+                typeNameIdOfReference: -1,
+                typeNameIdOfPointer: -1,
+                typeNameIdOfHof: -1,
+                typeNameIdOfNever: -1,
+            };
         }
+        return this.getTypeNameIdsAsync(nn);
+    }
+    /**
+     * @param {stringdex.DataColumn} nn
+     * @returns {Promise<rustdoc.TypeNameIds>}
+     */
+    async getTypeNameIdsAsync(nn) {
         // Each of these identifiers are used specially by
         // type-driven search.
         const [
@@ -1274,21 +1298,39 @@ class DocSearch {
             }
             return -1;
         };
-        this.typeNameIdOfOutput = await first(output, TY_ASSOCTYPE, "");
-        this.typeNameIdOfFnPtr = await first(fn, TY_PRIMITIVE, "");
-        this.typeNameIdOfFn = await first(fn, TY_TRAIT, "core::ops");
-        this.typeNameIdOfFnMut = await first(fnMut, TY_TRAIT, "core::ops");
-        this.typeNameIdOfFnOnce = await first(fnOnce, TY_TRAIT, "core::ops");
-        this.typeNameIdOfArray = await first(array, TY_PRIMITIVE, "");
-        this.typeNameIdOfSlice = await first(slice, TY_PRIMITIVE, "");
-        this.typeNameIdOfArrayOrSlice = await first(arrayOrSlice, TY_PRIMITIVE, "");
-        this.typeNameIdOfTuple = await first(tuple, TY_PRIMITIVE, "");
-        this.typeNameIdOfUnit = await first(unit, TY_PRIMITIVE, "");
-        this.typeNameIdOfTupleOrUnit = await first(tupleOrUnit, TY_PRIMITIVE, "");
-        this.typeNameIdOfReference = await first(reference, TY_PRIMITIVE, "");
-        this.typeNameIdOfPointer = await first(pointer, TY_PRIMITIVE, "");
-        this.typeNameIdOfHof = await first(hof, TY_PRIMITIVE, "");
-        this.typeNameIdOfNever = await first(never, TY_PRIMITIVE, "");
+        const typeNameIdOfOutput = await first(output, TY_ASSOCTYPE, "");
+        const typeNameIdOfFnPtr = await first(fn, TY_PRIMITIVE, "");
+        const typeNameIdOfFn = await first(fn, TY_TRAIT, "core::ops");
+        const typeNameIdOfFnMut = await first(fnMut, TY_TRAIT, "core::ops");
+        const typeNameIdOfFnOnce = await first(fnOnce, TY_TRAIT, "core::ops");
+        const typeNameIdOfArray = await first(array, TY_PRIMITIVE, "");
+        const typeNameIdOfSlice = await first(slice, TY_PRIMITIVE, "");
+        const typeNameIdOfArrayOrSlice = await first(arrayOrSlice, TY_PRIMITIVE, "");
+        const typeNameIdOfTuple = await first(tuple, TY_PRIMITIVE, "");
+        const typeNameIdOfUnit = await first(unit, TY_PRIMITIVE, "");
+        const typeNameIdOfTupleOrUnit = await first(tupleOrUnit, TY_PRIMITIVE, "");
+        const typeNameIdOfReference = await first(reference, TY_PRIMITIVE, "");
+        const typeNameIdOfPointer = await first(pointer, TY_PRIMITIVE, "");
+        const typeNameIdOfHof = await first(hof, TY_PRIMITIVE, "");
+        const typeNameIdOfNever = await first(never, TY_PRIMITIVE, "");
+        this.typeNameIds = {
+            typeNameIdOfOutput,
+            typeNameIdOfFnPtr,
+            typeNameIdOfFn,
+            typeNameIdOfFnMut,
+            typeNameIdOfFnOnce,
+            typeNameIdOfArray,
+            typeNameIdOfSlice,
+            typeNameIdOfArrayOrSlice,
+            typeNameIdOfTuple,
+            typeNameIdOfUnit,
+            typeNameIdOfTupleOrUnit,
+            typeNameIdOfReference,
+            typeNameIdOfPointer,
+            typeNameIdOfHof,
+            typeNameIdOfNever,
+        };
+        return this.typeNameIds;
     }
 
     /**
@@ -1758,12 +1800,8 @@ class DocSearch {
         const l = crateNames.length;
         const names = [];
         for (let i = 0; i < l; ++i) {
-            names.push(crateNames.at(i).then(name => {
-                if (name === undefined) {
-                    return "";
-                }
-                return this.utf8decoder.decode(name);
-            }));
+            const name = await crateNames.at(i);
+            names.push(name === undefined ? "" : this.utf8decoder.decode(name));
         }
         return Promise.all(names);
     }
@@ -1820,6 +1858,9 @@ class DocSearch {
             modulePathData,
             exactModuleName,
             exactModulePathData,
+            parentName,
+            parentPath,
+            crate,
         ] = await Promise.all([
             entry && entry.modulePath !== null ? this.getName(entry.modulePath) : null,
             entry && entry.modulePath !== null ? this.getPathData(entry.modulePath) : null,
@@ -1829,6 +1870,13 @@ class DocSearch {
             entry && entry.exactModulePath !== null ?
                 this.getPathData(entry.exactModulePath) :
                 null,
+            entry && entry.parent !== null ?
+                this.getName(entry.parent) :
+                null,
+            entry && entry.parent !== null ?
+                this.getPathData(entry.parent) :
+                null,
+            entry ? nonnull(await this.getName(entry.krate)) : "",
         ]);
         const name = name_ === null ? "" : name_;
         const normalizedName = (name.indexOf("_") === -1 ?
@@ -1838,12 +1886,9 @@ class DocSearch {
             (modulePathData.modulePath === "" ?
                 moduleName :
                 `${modulePathData.modulePath}::${moduleName}`);
-        const [parentName, parentPath] = entry !== null && entry.parent !== null ?
-            await Promise.all([this.getName(entry.parent), this.getPathData(entry.parent)]) :
-            [null, null];
         return {
             id,
-            crate: entry ? nonnull(await this.getName(entry.krate)) : "",
+            crate,
             ty: entry ? entry.ty : nonnull(path).ty,
             name,
             normalizedName,
@@ -2148,6 +2193,7 @@ class DocSearch {
          * @returns {Promise<rustdoc.DisplayTypeSignature>}
          */
         const formatDisplayTypeSignature = async(obj, typeInfo, elems, returned) => {
+            const typeNameIds = await this.getTypeNameIds();
             const objType = obj.type;
             if (!objType) {
                 return {type: [], mappedNames: new Map(), whereClause: new Map()};
@@ -2173,10 +2219,12 @@ class DocSearch {
                                 return true;
                             },
                             0,
+                            typeNameIds,
                         );
                         return !!fnOutput;
                     },
                     0,
+                    typeNameIds,
                 );
             } else {
                 const highlighted = unifyFunctionTypes(
@@ -2189,6 +2237,7 @@ class DocSearch {
                         return true;
                     },
                     0,
+                    typeNameIds,
                 );
                 if (typeInfo === "elems") {
                     fnInputs = highlighted;
@@ -2268,7 +2317,7 @@ class DocSearch {
              * @returns {Promise<void>}
              */
             const writeHof = async(fnType, result) => {
-                const hofOutput = fnType.bindings.get(this.typeNameIdOfOutput) || [];
+                const hofOutput = fnType.bindings.get(typeNameIds.typeNameIdOfOutput) || [];
                 const hofInputs = fnType.generics;
                 pushText(fnType, result);
                 pushText({name: " (", highlighted: false}, result);
@@ -2309,11 +2358,14 @@ class DocSearch {
              * @returns {Promise<boolean>}
              */
             const writeSpecialPrimitive = async(fnType, result) => {
-                if (fnType.id === this.typeNameIdOfArray || fnType.id === this.typeNameIdOfSlice ||
-                    fnType.id === this.typeNameIdOfTuple || fnType.id === this.typeNameIdOfUnit) {
+                if (fnType.id === typeNameIds.typeNameIdOfArray ||
+                    fnType.id === typeNameIds.typeNameIdOfSlice ||
+                    fnType.id === typeNameIds.typeNameIdOfTuple ||
+                    fnType.id === typeNameIds.typeNameIdOfUnit
+                ) {
                     const [ob, sb] =
-                        fnType.id === this.typeNameIdOfArray ||
-                            fnType.id === this.typeNameIdOfSlice ?
+                        fnType.id === typeNameIds.typeNameIdOfArray ||
+                            fnType.id === typeNameIds.typeNameIdOfSlice ?
                         ["[", "]"] :
                         ["(", ")"];
                     pushText({ name: ob, highlighted: fnType.highlighted }, result);
@@ -2325,7 +2377,7 @@ class DocSearch {
                     );
                     pushText({ name: sb, highlighted: fnType.highlighted }, result);
                     return true;
-                } else if (fnType.id === this.typeNameIdOfReference) {
+                } else if (fnType.id === typeNameIds.typeNameIdOfReference) {
                     pushText({ name: "&", highlighted: fnType.highlighted }, result);
                     let prevHighlighted = false;
                     await onEachBtwnAsync(
@@ -2341,7 +2393,7 @@ class DocSearch {
                         }, result),
                     );
                     return true;
-                } else if (fnType.id === this.typeNameIdOfPointer) {
+                } else if (fnType.id === typeNameIds.typeNameIdOfPointer) {
                     pushText({ name: "*", highlighted: fnType.highlighted }, result);
                     if (fnType.generics.length < 2) {
                         pushText({ name: "const ", highlighted: fnType.highlighted }, result);
@@ -2361,14 +2413,14 @@ class DocSearch {
                     );
                     return true;
                 } else if (
-                    fnType.id === this.typeNameIdOfFn ||
-                    fnType.id === this.typeNameIdOfFnMut ||
-                    fnType.id === this.typeNameIdOfFnOnce ||
-                    fnType.id === this.typeNameIdOfFnPtr
+                    fnType.id === typeNameIds.typeNameIdOfFn ||
+                    fnType.id === typeNameIds.typeNameIdOfFnMut ||
+                    fnType.id === typeNameIds.typeNameIdOfFnOnce ||
+                    fnType.id === typeNameIds.typeNameIdOfFnPtr
                 ) {
                     await writeHof(fnType, result);
                     return true;
-                } else if (fnType.id === this.typeNameIdOfNever) {
+                } else if (fnType.id === typeNameIds.typeNameIdOfNever) {
                     pushText({ name: "!", highlighted: fnType.highlighted }, result);
                     return true;
                 }
@@ -2426,10 +2478,10 @@ class DocSearch {
                             return;
                         }
                     } else if (fnType.ty === TY_TRAIT && (
-                        fnType.id === this.typeNameIdOfFn ||
-                        fnType.id === this.typeNameIdOfFnMut ||
-                        fnType.id === this.typeNameIdOfFnOnce ||
-                        fnType.id === this.typeNameIdOfFnPtr
+                        fnType.id === typeNameIds.typeNameIdOfFn ||
+                        fnType.id === typeNameIds.typeNameIdOfFnMut ||
+                        fnType.id === typeNameIds.typeNameIdOfFnOnce ||
+                        fnType.id === typeNameIds.typeNameIdOfFnPtr
                     )) {
                         await writeHof(fnType, result);
                         return;
@@ -2540,7 +2592,7 @@ class DocSearch {
          * Add extra data to result objects, and filter items that have been
          * marked for removal.
          *
-         * @param {[rustdoc.PlainResultObject, rustdoc.Row][]} results
+         * @param {rustdoc.PlainResultObject[]} results
          * @param {"sig"|"elems"|"returned"|null} typeInfo
          * @param {Set<string>} duplicates
          * @returns {rustdoc.ResultObject[]}
@@ -2548,7 +2600,8 @@ class DocSearch {
         const transformResults = (results, typeInfo, duplicates) => {
             const out = [];
 
-            for (const [result, item] of results) {
+            for (const result of results) {
+                const item = result.item;
                 if (item.id !== -1) {
                     const res = buildHrefAndPath(item);
                     // many of these properties don't strictly need to be
@@ -2633,7 +2686,7 @@ class DocSearch {
                 const normalizedUserQuery = parsedQuery.userQuery.toLowerCase();
                 const isMixedCase = normalizedUserQuery !== userQuery;
                 /**
-                 * @type {[rustdoc.PlainResultObject, rustdoc.Row][]}
+                 * @type {rustdoc.PlainResultObject[]}
                  */
                 const result_list = [];
                 for (const result of results.values()) {
@@ -2641,23 +2694,22 @@ class DocSearch {
                         continue;
                     }
                     /**
-                     * @type {rustdoc.Row?}
+                     * @type {rustdoc.Row}
                      */
-                    const item = await this.getRow(result.id, typeInfo !== null);
-                    if (!item) {
-                        continue;
-                    }
+                    const item = result.item;
                     if (filterCrates !== null && item.crate !== filterCrates) {
                         continue;
                     }
                     if (item) {
-                        result_list.push([result, item]);
+                        result_list.push(result);
                     } else {
                         continue;
                     }
                 }
 
-                result_list.sort(([aaa, aai], [bbb, bbi]) => {
+                result_list.sort((aaa, bbb) => {
+                    const aai = aaa.item;
+                    const bbi = bbb.item;
                     /** @type {number} */
                     let a;
                     /** @type {number} */
@@ -2804,6 +2856,7 @@ class DocSearch {
          * @param {number} unboxingDepth
          *     - Limit checks that Ty matches Vec<Ty>,
          *       but not Vec<ParamEnvAnd<WithInfcx<ConstTy<Interner<Ty=Ty>>>>>
+         * @param {rustdoc.TypeNameIds} typeNameIds
          *
          * @return {rustdoc.HighlightedFunctionType[]|null}
          *     - Returns highlighted results if a match, null otherwise.
@@ -2815,6 +2868,7 @@ class DocSearch {
             mgensIn,
             solutionCb,
             unboxingDepth,
+            typeNameIds,
         ) {
             if (unboxingDepth >= UNBOXING_LIMIT) {
                 return null;
@@ -2837,7 +2891,12 @@ class DocSearch {
                 && queryElems[0].bindings.size === 0) {
                 const queryElem = queryElems[0];
                 for (const [i, fnType] of fnTypesIn.entries()) {
-                    if (!unifyFunctionTypeIsMatchCandidate(fnType, queryElem, mgens)) {
+                    if (!unifyFunctionTypeIsMatchCandidate(
+                        fnType,
+                        queryElem,
+                        mgens,
+                        typeNameIds,
+                    )) {
                         continue;
                     }
                     if (fnType.id !== null &&
@@ -2873,6 +2932,7 @@ class DocSearch {
                                 mgens ? new Map(mgens) : null,
                                 solutionCb,
                                 unboxingDepth,
+                                typeNameIds,
                             ) || fnType.generics,
                         });
                         return highlighted;
@@ -2885,6 +2945,7 @@ class DocSearch {
                         whereClause,
                         mgens,
                         unboxingDepth + 1,
+                        typeNameIds,
                     )) {
                         continue;
                     }
@@ -2898,6 +2959,7 @@ class DocSearch {
                             mgens,
                             solutionCb,
                             unboxingDepth + 1,
+                            typeNameIds,
                         );
                         if (highlightedGenerics) {
                             const highlighted = [...fnTypesIn];
@@ -2916,6 +2978,7 @@ class DocSearch {
                             mgens ? new Map(mgens) : null,
                             solutionCb,
                             unboxingDepth + 1,
+                            typeNameIds,
                         );
                         if (highlightedGenerics) {
                             const highlighted = [...fnTypesIn];
@@ -2960,7 +3023,12 @@ class DocSearch {
             let queryElemsTmp = null;
             for (let i = flast; i >= 0; i -= 1) {
                 const fnType = fnTypes[i];
-                if (!unifyFunctionTypeIsMatchCandidate(fnType, queryElem, mgens)) {
+                if (!unifyFunctionTypeIsMatchCandidate(
+                    fnType,
+                    queryElem,
+                    mgens,
+                    typeNameIds,
+                )) {
                     continue;
                 }
                 let mgensScratch;
@@ -3004,6 +3072,7 @@ class DocSearch {
                             whereClause,
                             mgensScratch,
                             unboxingDepth,
+                            typeNameIds,
                         );
                         if (!solution) {
                             return false;
@@ -3017,6 +3086,7 @@ class DocSearch {
                                 simplifiedMgens,
                                 solutionCb,
                                 unboxingDepth,
+                                typeNameIds,
                             );
                             if (unifiedGenerics !== null) {
                                 unifiedGenericsMgens = simplifiedMgens;
@@ -3026,6 +3096,7 @@ class DocSearch {
                         return false;
                     },
                     unboxingDepth,
+                    typeNameIds,
                 );
                 if (passesUnification) {
                     passesUnification.length = fl;
@@ -3042,6 +3113,7 @@ class DocSearch {
                                 unifiedGenericsMgens,
                                 solutionCb,
                                 unboxingDepth,
+                                typeNameIds,
                             // @ts-expect-error
                             ) : unifiedGenerics.splice(0, v.length)];
                         })),
@@ -3061,6 +3133,7 @@ class DocSearch {
                     whereClause,
                     mgens,
                     unboxingDepth + 1,
+                    typeNameIds,
                 )) {
                     continue;
                 }
@@ -3077,6 +3150,7 @@ class DocSearch {
                     mgens,
                     solutionCb,
                     unboxingDepth + 1,
+                    typeNameIds,
                 );
                 if (passesUnification) {
                     const highlightedGenerics = passesUnification.slice(
@@ -3117,6 +3191,7 @@ class DocSearch {
          * @param {number} unboxingDepth
          *     - Limit checks that Ty matches Vec<Ty>,
          *       but not Vec<ParamEnvAnd<WithInfcx<ConstTy<Interner<Ty=Ty>>>>>
+         * @param {rustdoc.TypeNameIds} typeNameIds
          *
          * @return {rustdoc.HighlightedFunctionType[]|null}
          *     - Returns highlighted results if a match, null otherwise.
@@ -3128,6 +3203,7 @@ class DocSearch {
             mgensIn,
             solutionCb,
             unboxingDepth,
+            typeNameIds,
         ) {
             if (unboxingDepth >= UNBOXING_LIMIT) {
                 return null;
@@ -3145,7 +3221,12 @@ class DocSearch {
             }
             const fnType = fnTypesIn[0];
             const queryElem = queryElems[0];
-            if (unifyFunctionTypeIsMatchCandidate(fnType, queryElem, mgens)) {
+            if (unifyFunctionTypeIsMatchCandidate(
+                fnType,
+                queryElem,
+                mgens,
+                typeNameIds,
+            )) {
                 if (fnType.id !== null &&
                     fnType.id < 0 &&
                     queryElem.id !== null &&
@@ -3163,6 +3244,7 @@ class DocSearch {
                             mgensScratch,
                             solutionCb,
                             unboxingDepth,
+                            typeNameIds,
                         );
                         if (fnTypesRemaining) {
                             const highlighted = [fnType, ...fnTypesRemaining];
@@ -3189,6 +3271,7 @@ class DocSearch {
                                 whereClause,
                                 mgensScratch,
                                 unboxingDepth,
+                                typeNameIds,
                             );
                             if (!solution) {
                                 return false;
@@ -3202,6 +3285,7 @@ class DocSearch {
                                     simplifiedMgens,
                                     solutionCb,
                                     unboxingDepth,
+                                    typeNameIds,
                                 );
                                 if (unifiedGenerics !== null) {
                                     return true;
@@ -3209,6 +3293,7 @@ class DocSearch {
                             }
                         },
                         unboxingDepth,
+                        typeNameIds,
                     );
                     if (fnTypesRemaining) {
                         const highlighted = [fnType, ...fnTypesRemaining];
@@ -3227,6 +3312,7 @@ class DocSearch {
                 whereClause,
                 mgens,
                 unboxingDepth + 1,
+                typeNameIds,
             )) {
                 let highlightedRemaining;
                 if (fnType.id !== null && fnType.id < 0) {
@@ -3248,6 +3334,7 @@ class DocSearch {
                                 mgensScratch,
                                 solutionCb,
                                 unboxingDepth,
+                                typeNameIds,
                             );
                             if (hl) {
                                 highlightedRemaining = hl;
@@ -3255,6 +3342,7 @@ class DocSearch {
                             return hl;
                         },
                         unboxingDepth + 1,
+                        typeNameIds,
                     );
                     if (highlightedGenerics) {
                         return [Object.assign({
@@ -3282,6 +3370,7 @@ class DocSearch {
                                 mgensScratch,
                                 solutionCb,
                                 unboxingDepth,
+                                typeNameIds,
                             );
                             if (hl) {
                                 highlightedRemaining = hl;
@@ -3289,6 +3378,7 @@ class DocSearch {
                             return hl;
                         },
                         unboxingDepth + 1,
+                        typeNameIds,
                     );
                     if (highlightedGenerics) {
                         return [Object.assign({}, fnType, {
@@ -3314,10 +3404,11 @@ class DocSearch {
          *
          * @param {rustdoc.FunctionType} fnType
          * @param {rustdoc.QueryElement} queryElem
+         * @param {rustdoc.TypeNameIds} typeNameIds
          * @param {Map<number,number>|null} mgensIn - Map query generics to function generics.
          * @returns {boolean}
          */
-        const unifyFunctionTypeIsMatchCandidate = (fnType, queryElem, mgensIn) => {
+        const unifyFunctionTypeIsMatchCandidate = (fnType, queryElem, mgensIn, typeNameIds) => {
             // type filters look like `trait:Read` or `enum:Result`
             if (!typePassesFilter(queryElem.typeFilter, fnType.ty)) {
                 return false;
@@ -3336,21 +3427,23 @@ class DocSearch {
             } else {
                 // For these special cases, matching code need added to the inverted index.
                 // search_index.rs -> convert_render_type does this
-                if (queryElem.id === this.typeNameIdOfArrayOrSlice &&
-                    (fnType.id === this.typeNameIdOfSlice || fnType.id === this.typeNameIdOfArray)
+                if (queryElem.id === typeNameIds.typeNameIdOfArrayOrSlice &&
+                    (fnType.id === typeNameIds.typeNameIdOfSlice ||
+                        fnType.id === typeNameIds.typeNameIdOfArray)
                 ) {
                     // [] matches primitive:array or primitive:slice
                     // if it matches, then we're fine, and this is an appropriate match candidate
-                } else if (queryElem.id === this.typeNameIdOfTupleOrUnit &&
-                    (fnType.id === this.typeNameIdOfTuple || fnType.id === this.typeNameIdOfUnit)
+                } else if (queryElem.id === typeNameIds.typeNameIdOfTupleOrUnit &&
+                    (fnType.id === typeNameIds.typeNameIdOfTuple ||
+                        fnType.id === typeNameIds.typeNameIdOfUnit)
                 ) {
                     // () matches primitive:tuple or primitive:unit
                     // if it matches, then we're fine, and this is an appropriate match candidate
-                } else if (queryElem.id === this.typeNameIdOfHof && (
-                    fnType.id === this.typeNameIdOfFn ||
-                    fnType.id === this.typeNameIdOfFnMut ||
-                    fnType.id === this.typeNameIdOfFnOnce ||
-                    fnType.id === this.typeNameIdOfFnPtr
+                } else if (queryElem.id === typeNameIds.typeNameIdOfHof && (
+                    fnType.id === typeNameIds.typeNameIdOfFn ||
+                    fnType.id === typeNameIds.typeNameIdOfFnMut ||
+                    fnType.id === typeNameIds.typeNameIdOfFnOnce ||
+                    fnType.id === typeNameIds.typeNameIdOfFnPtr
                 )) {
                     // -> matches fn, fnonce, and fnmut
                     // if it matches, then we're fine, and this is an appropriate match candidate
@@ -3414,6 +3507,7 @@ class DocSearch {
          * @param {Map<number,number>|null} mgensIn - Map query generics to function generics.
          *                                            Never modified.
          * @param {number} unboxingDepth
+         * @param {rustdoc.TypeNameIds} typeNameIds
          * @returns {false|{
          *     mgens: [Map<number,number>|null], simplifiedGenerics: rustdoc.FunctionType[]
          * }}
@@ -3424,6 +3518,7 @@ class DocSearch {
             whereClause,
             mgensIn,
             unboxingDepth,
+            typeNameIds,
         ) {
             if (fnType.bindings.size < queryElem.bindings.size) {
                 return false;
@@ -3455,6 +3550,7 @@ class DocSearch {
                                 return false;
                             },
                             unboxingDepth,
+                            typeNameIds,
                         );
                         return newSolutions;
                     });
@@ -3486,6 +3582,7 @@ class DocSearch {
          * @param {rustdoc.FunctionType[][]} whereClause - Trait bounds for generic items.
          * @param {Map<number,number>|null} mgens - Map query generics to function generics.
          * @param {number} unboxingDepth
+         * @param {rustdoc.TypeNameIds} typeNameIds
          * @returns {boolean}
          */
         function unifyFunctionTypeIsUnboxCandidate(
@@ -3494,6 +3591,7 @@ class DocSearch {
             whereClause,
             mgens,
             unboxingDepth,
+            typeNameIds,
         ) {
             if (unboxingDepth >= UNBOXING_LIMIT) {
                 return false;
@@ -3512,6 +3610,7 @@ class DocSearch {
                     whereClause,
                     mgens,
                     unboxingDepth,
+                    typeNameIds,
                 );
             } else if (fnType.unboxFlag &&
                 (fnType.generics.length > 0 || fnType.bindings.size > 0)) {
@@ -3525,6 +3624,7 @@ class DocSearch {
                     whereClause,
                     mgens,
                     unboxingDepth,
+                    typeNameIds,
                 );
             }
             return false;
@@ -3537,14 +3637,15 @@ class DocSearch {
          * @param {rustdoc.QueryElement[]} elems
          * @param {rustdoc.FunctionType[]} list    - A list of function types.
          * @param {rustdoc.FunctionType[][]} where_clause - Trait bounds for generic items.
+         * @param {rustdoc.TypeNameIds} typeNameIds
          */
-        function containsTypeFromQuery(elems, list, where_clause) {
+        function containsTypeFromQuery(elems, list, where_clause, typeNameIds) {
             if (!list) return false;
             for (const ty of elems) {
                 if (ty.id !== null && ty.id < 0) {
                     continue;
                 }
-                if (checkIfInList(list, ty, where_clause, null, 0)) {
+                if (checkIfInList(list, ty, where_clause, null, 0, typeNameIds)) {
                     return true;
                 }
             }
@@ -3560,12 +3661,13 @@ class DocSearch {
          * @param {rustdoc.FunctionType[][]} whereClause - Trait bounds for generic items.
          * @param {Map<number,number>|null} mgens - Map functions generics to query generics.
          * @param {number} unboxingDepth
+         * @param {rustdoc.TypeNameIds} typeNameIds
          *
          * @return {boolean} - Returns true if found, false otherwise.
          */
-        function checkIfInList(list, elem, whereClause, mgens, unboxingDepth) {
+        function checkIfInList(list, elem, whereClause, mgens, unboxingDepth, typeNameIds) {
             for (const entry of list) {
-                if (checkType(entry, elem, whereClause, mgens, unboxingDepth)) {
+                if (checkType(entry, elem, whereClause, mgens, unboxingDepth, typeNameIds)) {
                     return true;
                 }
             }
@@ -3580,11 +3682,12 @@ class DocSearch {
          * @param {rustdoc.QueryElement} elem          - The element from the parsed query.
          * @param {rustdoc.FunctionType[][]} whereClause - Trait bounds for generic items.
          * @param {Map<number,number>|null} mgens - Map query generics to function generics.
+         * @param {number} unboxingDepth
+         * @param {rustdoc.TypeNameIds} typeNameIds
          *
          * @return {boolean} - Returns true if the type matches, false otherwise.
          */
-        // @ts-expect-error
-        const checkType = (row, elem, whereClause, mgens, unboxingDepth) => {
+        const checkType = (row, elem, whereClause, mgens, unboxingDepth, typeNameIds) => {
             if (unboxingDepth >= UNBOXING_LIMIT) {
                 return false;
             }
@@ -3593,9 +3696,9 @@ class DocSearch {
                 row.generics.length === 0 && elem.generics.length === 0 &&
                 row.bindings.size === 0 && elem.bindings.size === 0 &&
                 // special case
-                elem.id !== this.typeNameIdOfArrayOrSlice &&
-                elem.id !== this.typeNameIdOfHof &&
-                elem.id !== this.typeNameIdOfTupleOrUnit
+                elem.id !== typeNameIds.typeNameIdOfArrayOrSlice &&
+                elem.id !== typeNameIds.typeNameIdOfHof &&
+                elem.id !== typeNameIds.typeNameIdOfTupleOrUnit
             ) {
                 return row.id === elem.id && typePassesFilter(elem.typeFilter, row.ty);
             } else {
@@ -3607,6 +3710,7 @@ class DocSearch {
                     mgens,
                     () => true,
                     unboxingDepth,
+                    typeNameIds,
                 );
             }
         };
@@ -3673,7 +3777,10 @@ class DocSearch {
         /**
          * Compute an "edit distance" that ignores missing path elements.
          * @param {string[]} contains search query path
-         * @param {rustdoc.Row} row indexed item
+         * @param {{
+         *     modulePath: string,
+         *     parent: { path: rustdoc.PathData, name: string}?,
+         * }} row indexed item
          * @returns {null|number} edit distance
          */
         function checkRowPath(contains, row) {
@@ -3752,7 +3859,7 @@ class DocSearch {
                         is_alias: true,
                         elems: [], // only used in type-based queries
                         returned: [], // only used in type-based queries
-                        original: await this.getRow(alias, false),
+                        item: nonnull(await this.getRow(alias, false)),
                     };
                 };
                 /**
@@ -3834,6 +3941,7 @@ class DocSearch {
                             elems: [], // only used in type-based queries
                             returned: [], // only used in type-based queries
                             is_alias: false,
+                            item: row,
                         } : null;
                     } else {
                         return {
@@ -3848,6 +3956,7 @@ class DocSearch {
                             elems: [], // only used in type-based queries
                             returned: [], // only used in type-based queries
                             is_alias: false,
+                            item: row,
                         };
                     }
                 };
@@ -4359,9 +4468,10 @@ class DocSearch {
                 };
 
                 // finally, we can do the actual unification loop
-                const [allInputs, allOutput] = await Promise.all([
+                const [allInputs, allOutput, typeNameIds] = await Promise.all([
                     unpackPostingsListAll(inputs, "invertedFunctionInputsIndex"),
                     unpackPostingsListAll(output, "invertedFunctionOutputIndex"),
+                    this.getTypeNameIds(),
                 ]);
                 let checkCounter = 0;
                 /**
@@ -4430,12 +4540,18 @@ class DocSearch {
                                             mgens,
                                             checkTypeMgensForConflict,
                                             0, // unboxing depth
+                                            typeNameIds,
                                         );
                                     },
                                     0, // unboxing depth
+                                    typeNameIds,
                                 )) {
                                     return null;
                                 }
+                                const item = await this.getRow(id, true);
+                                if (!item) {
+                                    return null;
+                                }
                                 const result = {
                                     id,
                                     dist: fnData.elemCount,
@@ -4444,8 +4560,9 @@ class DocSearch {
                                     elems: inputs,
                                     returned: output,
                                     is_alias: false,
+                                    item,
                                 };
-                                const entry = await this.getEntryData(id);
+                                const entry = item.entry;
                                 if ((entry && !isFnLikeTy(entry.ty)) ||
                                     (isReturnTypeQuery &&
                                         functionSignature &&
@@ -4453,6 +4570,7 @@ class DocSearch {
                                             output,
                                             functionSignature.inputs,
                                             functionSignature.where_clause,
+                                            typeNameIds,
                                         )
                                     )
                                 ) {
@@ -5273,7 +5391,6 @@ if (ROOT_PATH === null) {
 const database = await Stringdex.loadDatabase(hooks);
 if (typeof window !== "undefined") {
     docSearch = new DocSearch(ROOT_PATH, database);
-    await docSearch.buildIndex();
     onEachLazy(document.querySelectorAll(
         ".search-form.loading",
     ), form => {
@@ -5286,7 +5403,6 @@ if (typeof window !== "undefined") {
     }
 } else if (typeof exports !== "undefined") {
     docSearch = new DocSearch(ROOT_PATH, database);
-    await docSearch.buildIndex();
     return { docSearch, DocSearch };
 }
 };
diff --git a/src/librustdoc/html/static/js/stringdex.d.ts b/src/librustdoc/html/static/js/stringdex.d.ts
index 2eb1fdf95d845..71c6bfdf48145 100644
--- a/src/librustdoc/html/static/js/stringdex.d.ts
+++ b/src/librustdoc/html/static/js/stringdex.d.ts
@@ -29,7 +29,7 @@ declare namespace stringdex {
      */
     interface DataColumn {
         isEmpty(id: number): boolean;
-        at(id: number): Promise<Uint8Array|undefined>;
+        at(id: number): Promise<Uint8Array>|Uint8Array|undefined;
         search(name: Uint8Array|string): Promise<Trie?>;
         searchLev(name: Uint8Array|string): AsyncGenerator<Trie>;
         length: number,
diff --git a/src/librustdoc/html/static/js/stringdex.js b/src/librustdoc/html/static/js/stringdex.js
index b7f605a10351d..5bdc81e330efa 100644
--- a/src/librustdoc/html/static/js/stringdex.js
+++ b/src/librustdoc/html/static/js/stringdex.js
@@ -2261,7 +2261,7 @@ function loadDatabase(hooks) {
             this.hashes = hashes;
             this.emptyset = emptyset;
             this.name = name;
-            /** @type {{"hash": Uint8Array, "data": Promise<Uint8Array[]>?, "end": number}[]} */
+            /** @type {{"hash": Uint8Array, "data": Uint8Array[]?, "end": number}[]} */
             this.buckets = [];
             this.bucket_keys = [];
             const l = counts.length;
@@ -2295,64 +2295,75 @@ function loadDatabase(hooks) {
         /**
          * Look up a cell by row ID.
          * @param {number} id
-         * @returns {Promise<Uint8Array|undefined>}
+         * @returns {Promise<Uint8Array>|Uint8Array|undefined}
          */
-        async at(id) {
+        at(id) {
             if (this.emptyset.contains(id)) {
-                return Promise.resolve(EMPTY_UINT8);
+                return EMPTY_UINT8;
             } else {
                 let idx = -1;
                 while (this.bucket_keys[idx + 1] <= id) {
                     idx += 1;
                 }
                 if (idx === -1 || idx >= this.bucket_keys.length) {
-                    return Promise.resolve(undefined);
+                    return undefined;
                 } else {
                     const start = this.bucket_keys[idx];
-                    const {hash, end} = this.buckets[idx];
-                    let data = this.buckets[idx].data;
+                    const bucket = this.buckets[idx];
+                    const data = this.buckets[idx].data;
                     if (data === null) {
-                        const dataSansEmptysetOrig = await registry.dataLoadByNameAndHash(
-                            this.name,
-                            hash,
-                        );
-                        // After the `await` resolves, another task might fill
-                        // in the data. If so, we should use that.
-                        data = this.buckets[idx].data;
-                        if (data !== null) {
-                            return (await data)[id - start];
-                        }
-                        const dataSansEmptyset = [...dataSansEmptysetOrig];
-                        /** @type {(Uint8Array[])|null} */
-                        let dataWithEmptyset = null;
-                        let pos = start;
-                        let insertCount = 0;
-                        while (pos < end) {
-                            if (this.emptyset.contains(pos)) {
-                                if (dataWithEmptyset === null) {
-                                    dataWithEmptyset = dataSansEmptyset.splice(0, insertCount);
-                                } else if (insertCount !== 0) {
-                                    dataWithEmptyset.push(
-                                        ...dataSansEmptyset.splice(0, insertCount),
-                                    );
-                                }
-                                insertCount = 0;
-                                dataWithEmptyset.push(EMPTY_UINT8);
-                            } else {
-                                insertCount += 1;
-                            }
-                            pos += 1;
-                        }
-                        data = Promise.resolve(
-                            dataWithEmptyset === null ?
-                                dataSansEmptyset :
-                                dataWithEmptyset.concat(dataSansEmptyset),
+                        return this.atAsyncFetch(id, start, bucket);
+                    } else {
+                        return data[id - start];
+                    }
+                }
+            }
+        }
+        /**
+         * Look up a cell by row ID.
+         * @param {number} id
+         * @param {number} start
+         * @param {{hash: Uint8Array, data: Uint8Array[] | null, end: number}} bucket
+         * @returns {Promise<Uint8Array>}
+         */
+        async atAsyncFetch(id, start, bucket) {
+            const {hash, end} = bucket;
+            const dataSansEmptysetOrig = await registry.dataLoadByNameAndHash(
+                this.name,
+                hash,
+            );
+            // After the `await` resolves, another task might fill
+            // in the data. If so, we should use that.
+            let data = bucket.data;
+            if (data !== null) {
+                return data[id - start];
+            }
+            const dataSansEmptyset = [...dataSansEmptysetOrig];
+            /** @type {(Uint8Array[])|null} */
+            let dataWithEmptyset = null;
+            let pos = start;
+            let insertCount = 0;
+            while (pos < end) {
+                if (this.emptyset.contains(pos)) {
+                    if (dataWithEmptyset === null) {
+                        dataWithEmptyset = dataSansEmptyset.splice(0, insertCount);
+                    } else if (insertCount !== 0) {
+                        dataWithEmptyset.push(
+                            ...dataSansEmptyset.splice(0, insertCount),
                         );
-                        this.buckets[idx].data = data;
                     }
-                    return (await data)[id - start];
+                    insertCount = 0;
+                    dataWithEmptyset.push(EMPTY_UINT8);
+                } else {
+                    insertCount += 1;
                 }
+                pos += 1;
             }
+            data = dataWithEmptyset === null ?
+                dataSansEmptyset :
+                dataWithEmptyset.concat(dataSansEmptyset);
+            bucket.data = data;
+            return data[id - start];
         }
         /**
          * Search by exact substring
diff --git a/src/tools/miri/.github/workflows/ci.yml b/src/tools/miri/.github/workflows/ci.yml
index c0fed96d4e6e3..e1a948c92faa4 100644
--- a/src/tools/miri/.github/workflows/ci.yml
+++ b/src/tools/miri/.github/workflows/ci.yml
@@ -31,21 +31,22 @@ jobs:
             os: ubuntu-24.04-arm
             multiarch: armhf
             gcc_cross: arm-linux-gnueabihf
-          - host_target: riscv64gc-unknown-linux-gnu
-            os: ubuntu-latest
-            multiarch: riscv64
-            gcc_cross: riscv64-linux-gnu
-            qemu: true
-          - host_target: s390x-unknown-linux-gnu
-            os: ubuntu-latest
-            multiarch: s390x
-            gcc_cross: s390x-linux-gnu
-            qemu: true
-          - host_target: powerpc64le-unknown-linux-gnu
-            os: ubuntu-latest
-            multiarch: ppc64el
-            gcc_cross: powerpc64le-linux-gnu
-            qemu: true
+          # Disabled due to Ubuntu repo trouble
+          # - host_target: riscv64gc-unknown-linux-gnu
+          #   os: ubuntu-latest
+          #   multiarch: riscv64
+          #   gcc_cross: riscv64-linux-gnu
+          #   qemu: true
+          # - host_target: s390x-unknown-linux-gnu
+          #   os: ubuntu-latest
+          #   multiarch: s390x
+          #   gcc_cross: s390x-linux-gnu
+          #   qemu: true
+          # - host_target: powerpc64le-unknown-linux-gnu
+          #   os: ubuntu-latest
+          #   multiarch: ppc64el
+          #   gcc_cross: powerpc64le-linux-gnu
+          #   qemu: true
           - host_target: aarch64-apple-darwin
             os: macos-latest
           - host_target: i686-pc-windows-msvc
@@ -67,7 +68,7 @@ jobs:
       - name: install multiarch
         if: ${{ matrix.multiarch != '' }}
         run: |
-          # s390x, ppc64el need Ubuntu Ports to be in the mirror list
+          # s390x, ppc64el, riscv64 need Ubuntu Ports to be in the mirror list
           sudo bash -c "echo 'https://ports.ubuntu.com/	priority:4' >> /etc/apt/apt-mirrors.txt"
           # Add architecture
           sudo dpkg --add-architecture ${{ matrix.multiarch }}
diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock
index 4df17c83c7e48..8792d90ac5366 100644
--- a/src/tools/miri/Cargo.lock
+++ b/src/tools/miri/Cargo.lock
@@ -363,9 +363,9 @@ dependencies = [
 
 [[package]]
 name = "cxx"
-version = "1.0.161"
+version = "1.0.173"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3523cc02ad831111491dd64b27ad999f1ae189986728e477604e61b81f828df"
+checksum = "6c64ed3da1c337cbaae7223cdcff8b4dddf698d188cd3eaddd1116f6b0295950"
 dependencies = [
  "cc",
  "cxxbridge-cmd",
@@ -377,9 +377,9 @@ dependencies = [
 
 [[package]]
 name = "cxx-build"
-version = "1.0.161"
+version = "1.0.173"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "212b754247a6f07b10fa626628c157593f0abf640a3dd04cce2760eca970f909"
+checksum = "ae0a26a75a05551f5ae3d75b3557543d06682284eaa7419113162d602cb45766"
 dependencies = [
  "cc",
  "codespan-reporting",
@@ -392,9 +392,9 @@ dependencies = [
 
 [[package]]
 name = "cxxbridge-cmd"
-version = "1.0.161"
+version = "1.0.173"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f426a20413ec2e742520ba6837c9324b55ffac24ead47491a6e29f933c5b135a"
+checksum = "952d408b6002b7db4b36da07c682a9cbb34f2db0efa03e976ae50a388414e16c"
 dependencies = [
  "clap",
  "codespan-reporting",
@@ -406,15 +406,15 @@ dependencies = [
 
 [[package]]
 name = "cxxbridge-flags"
-version = "1.0.161"
+version = "1.0.173"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a258b6069020b4e5da6415df94a50ee4f586a6c38b037a180e940a43d06a070d"
+checksum = "ccbd201b471c75c6abb6321cace706d1982d270e308b891c11a3262d320f5265"
 
 [[package]]
 name = "cxxbridge-macro"
-version = "1.0.161"
+version = "1.0.173"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8dec184b52be5008d6eaf7e62fc1802caf1ad1227d11b3b7df2c409c7ffc3f4"
+checksum = "2bea8b915bbc4cb4288f242aa7ca18b23ecc6965e4d6e7c1b07905e3fe2e0c41"
 dependencies = [
  "indexmap",
  "proc-macro2",
@@ -501,9 +501,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
 
 [[package]]
 name = "foldhash"
-version = "0.1.5"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
 
 [[package]]
 name = "form_urlencoded"
diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml
index 924dfed2bcab4..12123c260da0a 100644
--- a/src/tools/miri/Cargo.toml
+++ b/src/tools/miri/Cargo.toml
@@ -70,7 +70,7 @@ harness = false
 
 [features]
 default = ["stack-cache", "native-lib"]
-genmc = ["dep:genmc-sys"] # this enables a GPL dependency!
+genmc = ["dep:genmc-sys"]
 stack-cache = []
 stack-cache-consistency-check = ["stack-cache"]
 tracing = ["serde_json"]
diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md
index d433ee8daaa8d..a5214e213b3cf 100644
--- a/src/tools/miri/README.md
+++ b/src/tools/miri/README.md
@@ -220,7 +220,7 @@ degree documented below):
   - `solaris` / `illumos`: maintained by @devnexen. Supports the entire test suite.
   - `freebsd`: maintained by @YohDeadfall and @LorrensP-2158466. Supports the entire test suite.
   - `android`: **maintainer wanted**. Support very incomplete, but a basic "hello world" works.
-  - `wasi`: **maintainer wanted**. Support very incomplete, not even standard output works, but an empty `main` function works.
+  - `wasi`: **maintainer wanted**. Support very incomplete, but a basic "hello world" works.
 - For targets on other operating systems, Miri might fail before even reaching the `main` function.
 
 However, even for targets that we do support, the degree of support for accessing platform APIs
diff --git a/src/tools/miri/ci/ci.sh b/src/tools/miri/ci/ci.sh
index b66530e77b8a8..bcc110f648b9b 100755
--- a/src/tools/miri/ci/ci.sh
+++ b/src/tools/miri/ci/ci.sh
@@ -137,6 +137,7 @@ function run_tests_minimal {
 
 # In particular, fully cover all tier 1 targets.
 # We also want to run the many-seeds tests on all tier 1 targets.
+# We run GC_STRESS only once for each tier 1 OS.
 case $HOST_TARGET in
   x86_64-unknown-linux-gnu)
     # Host
@@ -147,29 +148,31 @@ case $HOST_TARGET in
     ;;
   i686-unknown-linux-gnu)
     # Host
-    # Without GC_STRESS as this is a slow runner.
     MIR_OPT=1 MANY_SEEDS=64 TEST_BENCH=1 CARGO_MIRI_ENV=1 run_tests
     # Partially supported targets (tier 2)
     BASIC="empty_main integer heap_alloc libc-mem vec string btreemap" # ensures we have the basics: pre-main code, system allocator
     UNIX="hello panic/panic panic/unwind concurrency/simple atomic libc-mem libc-misc libc-random env num_cpus" # the things that are very similar across all Unixes, and hence easily supported there
     TEST_TARGET=aarch64-linux-android  run_tests_minimal $BASIC $UNIX time hashmap random thread sync concurrency epoll eventfd
-    TEST_TARGET=wasm32-wasip2          run_tests_minimal $BASIC wasm
+    TEST_TARGET=wasm32-wasip2          run_tests_minimal $BASIC hello wasm
     TEST_TARGET=wasm32-unknown-unknown run_tests_minimal no_std empty_main wasm # this target doesn't really have std
     TEST_TARGET=thumbv7em-none-eabihf  run_tests_minimal no_std
     ;;
   aarch64-unknown-linux-gnu)
     # Host
-    GC_STRESS=1 MIR_OPT=1 MANY_SEEDS=64 TEST_BENCH=1 CARGO_MIRI_ENV=1 run_tests
+    MIR_OPT=1 MANY_SEEDS=64 TEST_BENCH=1 CARGO_MIRI_ENV=1 run_tests
     # Extra tier 2
     MANY_SEEDS=16 TEST_TARGET=arm-unknown-linux-gnueabi run_tests # 32bit ARM
     MANY_SEEDS=16 TEST_TARGET=aarch64-pc-windows-gnullvm run_tests # gnullvm ABI
     MANY_SEEDS=16 TEST_TARGET=s390x-unknown-linux-gnu run_tests # big-endian architecture of choice
-    # Custom target JSON file
-    TEST_TARGET=tests/x86_64-unknown-kernel.json MIRI_NO_STD=1 run_tests_minimal no_std
+    # Not officially supported tier 2
+    MANY_SEEDS=16 TEST_TARGET=x86_64-unknown-freebsd run_tests
+    MANY_SEEDS=16 TEST_TARGET=i686-unknown-freebsd run_tests
     ;;
   armv7-unknown-linux-gnueabihf)
     # Host
-    GC_STRESS=1 MIR_OPT=1 MANY_SEEDS=64 TEST_BENCH=1 CARGO_MIRI_ENV=1 run_tests
+    MIR_OPT=1 MANY_SEEDS=64 TEST_BENCH=1 CARGO_MIRI_ENV=1 run_tests
+    # Custom target JSON file
+    TEST_TARGET=tests/x86_64-unknown-kernel.json MIRI_NO_STD=1 run_tests_minimal no_std
     ;;
   aarch64-apple-darwin)
     # Host
@@ -181,12 +184,9 @@ case $HOST_TARGET in
     MANY_SEEDS=16 TEST_TARGET=mips-unknown-linux-gnu run_tests # a 32bit big-endian target, and also a target without 64bit atomics
     MANY_SEEDS=16 TEST_TARGET=x86_64-unknown-illumos run_tests
     MANY_SEEDS=16 TEST_TARGET=x86_64-pc-solaris run_tests
-    MANY_SEEDS=16 TEST_TARGET=x86_64-unknown-freebsd run_tests
-    MANY_SEEDS=16 TEST_TARGET=i686-unknown-freebsd run_tests
     ;;
   i686-pc-windows-msvc)
     # Host
-    # Without GC_STRESS as this is a very slow runner.
     MIR_OPT=1 MANY_SEEDS=64 TEST_BENCH=1 run_tests
     # Extra tier 1
     # We really want to ensure a Linux target works on a Windows host,
@@ -195,8 +195,7 @@ case $HOST_TARGET in
     ;;
   aarch64-pc-windows-msvc)
     # Host
-    # Without GC_STRESS as this is a very slow runner.
-    MIR_OPT=1 MANY_SEEDS=64 TEST_BENCH=1 CARGO_MIRI_ENV=1 run_tests
+    GC_STRESS=1 MIR_OPT=1 MANY_SEEDS=64 TEST_BENCH=1 CARGO_MIRI_ENV=1 run_tests
     # Extra tier 1
     MANY_SEEDS=64 TEST_TARGET=i686-unknown-linux-gnu run_tests
     ;;
diff --git a/src/tools/miri/doc/genmc.md b/src/tools/miri/doc/genmc.md
index 5aabe90b5dab2..44e11dcbec44c 100644
--- a/src/tools/miri/doc/genmc.md
+++ b/src/tools/miri/doc/genmc.md
@@ -9,9 +9,7 @@ Miri-GenMC integrates that model checker into Miri.
 
 ## Usage
 
-**IMPORTANT: The license of GenMC and thus the `genmc-sys` crate in the Miri repo is currently "GPL-3.0-or-later", so a binary produced with the `genmc` feature is subject to the requirements of the GPL. As long as that remains the case, the `genmc` feature of Miri is OFF-BY-DEFAULT and must be OFF for all Miri releases.**
-
-For testing/developing Miri-GenMC (while keeping in mind the licensing issues):
+For testing/developing Miri-GenMC:
 - clone the Miri repo.
 - build Miri-GenMC with `./miri build --features=genmc`.
 - OR: install Miri-GenMC in the current system with `./miri install --features=genmc`
@@ -21,7 +19,30 @@ Basic usage:
 MIRIFLAGS="-Zmiri-genmc" cargo miri run
 ```
 
-<!-- FIXME(genmc): explain options. -->
+Note that `cargo miri test` in GenMC mode is currently not supported.
+
+### Supported Parameters
+
+- `-Zmiri-genmc`: Enable GenMC mode (not required if any other GenMC options are used).
+- `-Zmiri-genmc-estimate`: This enables estimation of the concurrent execution space and verification time, before running the full verification. This should help users detect when their program is too complex to fully verify in a reasonable time. This will explore enough executions to make a good estimation, but at least 10 and at most `estimation-max` executions.
+- `-Zmiri-genmc-estimation-max={MAX_ITERATIONS}`: Set the maximum number of executions that will be explored during estimation (default: 1000).
+- `-Zmiri-genmc-print-exec-graphs={none,explored,blocked,all}`: Make GenMC print the execution graph of the program after every explored, every blocked, or after every execution (default: None).
+- `-Zmiri-genmc-print-exec-graphs`: Shorthand for suffix `=explored`.
+- `-Zmiri-genmc-print-genmc-output`: Print the output that GenMC provides. NOTE: this output is quite verbose and the events in the printed execution graph are hard to map back to the Rust code location they originate from.
+- `-Zmiri-genmc-log=LOG_LEVEL`: Change the log level for GenMC. Default: `warning`.
+  - `quiet`:    Disable logging.
+  - `error`:    Print errors.
+  - `warning`:  Print errors and warnings.
+  - `tip`:      Print errors, warnings and tips.
+  - If Miri is built with debug assertions, there are additional log levels available (downgraded to `tip` without debug assertions):
+    - `debug1`:   Print revisits considered by GenMC.
+    - `debug2`:   Print the execution graph after every memory access.
+    - `debug3`:   Print reads-from values considered by GenMC.
+- `-Zmiri-genmc-verbose`: Show more information, such as estimated number of executions, and time taken for verification.
+
+#### Regular Miri parameters useful for GenMC mode
+
+- `-Zmiri-disable-weak-memory-emulation`: Disable any weak memory effects (effectively upgrading all atomic orderings in the program to `SeqCst`). This option may reduce the number of explored program executions, but any bugs related to weak memory effects will be missed. This option can help determine if an error is caused by weak memory effects (i.e., if it disappears with this option enabled).
 
 <!-- FIXME(genmc): explain Miri-GenMC specific functions. -->
 
@@ -57,6 +78,15 @@ The process for obtaining them is as follows:
   If you place this directory inside the Miri folder, it is recommended to call it `genmc-src` as that tells `./miri fmt` to avoid
   formatting the Rust files inside that folder.
 
+### Formatting the C++ code
+
+For formatting the C++ code we provide a `.clang-format` file in the `genmc-sys` directory.
+With `clang-format` installed, run this command to format the c++ files (replace the `-i` with `--dry-run` to just see the changes.):
+```
+find ./genmc-sys/cpp/ -name "*.cpp" -o -name "*.hpp" | xargs clang-format --style=file:"./genmc-sys/.clang-format" -i
+```
+NOTE: this is currently not done automatically on pull requests to Miri.
+
 <!-- FIXME(genmc): explain how submitting code to GenMC should be handled. -->
 
 <!-- FIXME(genmc): explain development. -->
diff --git a/src/tools/miri/genmc-sys/.clang-format b/src/tools/miri/genmc-sys/.clang-format
new file mode 100644
index 0000000000000..669d76ea6c9bc
--- /dev/null
+++ b/src/tools/miri/genmc-sys/.clang-format
@@ -0,0 +1,52 @@
+# .clang-format
+
+BasedOnStyle: LLVM
+Standard: c++20
+
+ColumnLimit: 100
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AllowShortBlocksOnASingleLine: Empty
+BreakBeforeBraces: Attach
+
+BinPackArguments: false
+BinPackParameters: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AlwaysBreakAfterReturnType: None
+
+# Force parameters to break and align
+AlignAfterOpenBracket: BlockIndent
+AllowAllArgumentsOnNextLine: false
+
+# Spacing around braces and parentheses
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceInEmptyBlock: false
+SpacesInContainerLiterals: true
+SpacesInParensOptions:
+  InCStyleCasts: false
+  InConditionalStatements: false
+  InEmptyParentheses: false
+  Other: false
+SpacesInSquareBrackets: false
+
+# Brace spacing for initializers
+Cpp11BracedListStyle: false
+SpaceBeforeCpp11BracedList: true
+
+# Import grouping: group standard, external, and project includes.
+IncludeBlocks: Regroup
+SortIncludes: true
+
+# Granularity: sort includes per module/file.
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+
+# Miscellaneous
+SpaceAfterCStyleCast: true
+SpaceBeforeParens: ControlStatements
+PointerAlignment: Left
+IndentCaseLabels: true
+IndentWidth: 4
+TabWidth: 4
+UseTab: Never
\ No newline at end of file
diff --git a/src/tools/miri/genmc-sys/Cargo.toml b/src/tools/miri/genmc-sys/Cargo.toml
index 737ab9073bfb8..6443ecd969df4 100644
--- a/src/tools/miri/genmc-sys/Cargo.toml
+++ b/src/tools/miri/genmc-sys/Cargo.toml
@@ -1,17 +1,15 @@
 [package]
 authors = ["Miri Team"]
-# The parts in this repo are MIT OR Apache-2.0, but we are linking in
-# code from https://github.com/MPI-SWS/genmc which is GPL-3.0-or-later.
-license = "(MIT OR Apache-2.0) AND GPL-3.0-or-later"
+license = "MIT OR Apache-2.0"
 name = "genmc-sys"
 version = "0.1.0"
 edition = "2024"
 
 [dependencies]
-cxx = { version = "1.0.160", features = ["c++20"] }
+cxx = { version = "1.0.173", features = ["c++20"] }
 
 [build-dependencies]
 cc = "1.2.16"
 cmake = "0.1.54"
 git2 = { version = "0.20.2", default-features = false, features = ["https"] }
-cxx-build = { version = "1.0.160", features = ["parallel"] }
+cxx-build = { version = "1.0.173", features = ["parallel"] }
diff --git a/src/tools/miri/genmc-sys/build.rs b/src/tools/miri/genmc-sys/build.rs
index 479a3bd7186b7..8d437c20a0926 100644
--- a/src/tools/miri/genmc-sys/build.rs
+++ b/src/tools/miri/genmc-sys/build.rs
@@ -26,17 +26,27 @@ mod downloading {
     use super::GENMC_DOWNLOAD_PATH;
 
     /// The GenMC repository the we get our commit from.
-    pub(crate) const GENMC_GITHUB_URL: &str = "https://github.com/MPI-SWS/genmc.git";
+    pub(crate) const GENMC_GITHUB_URL: &str = "https://gitlab.inf.ethz.ch/public-plf/genmc.git";
     /// The GenMC commit we depend on. It must be available on the specified GenMC repository.
-    pub(crate) const GENMC_COMMIT: &str = "3438dd2c1202cd4a47ed7881d099abf23e4167ab";
+    pub(crate) const GENMC_COMMIT: &str = "af9cc9ccd5d412b16defc35dbf36571c63a19c76";
 
-    pub(crate) fn download_genmc() -> PathBuf {
+    /// Ensure that a local GenMC repo is present and set to the correct commit.
+    /// Return the path of the GenMC repo and whether the checked out commit was changed.
+    pub(crate) fn download_genmc() -> (PathBuf, bool) {
         let Ok(genmc_download_path) = PathBuf::from_str(GENMC_DOWNLOAD_PATH);
         let commit_oid = Oid::from_str(GENMC_COMMIT).expect("Commit should be valid.");
 
         match Repository::open(&genmc_download_path) {
             Ok(repo) => {
                 assert_repo_unmodified(&repo);
+                if let Ok(head) = repo.head()
+                    && let Ok(head_commit) = head.peel_to_commit()
+                    && head_commit.id() == commit_oid
+                {
+                    // Fast path: The expected commit is already checked out.
+                    return (genmc_download_path, false);
+                }
+                // Check if the local repository already contains the commit we need, download it otherwise.
                 let commit = update_local_repo(&repo, commit_oid);
                 checkout_commit(&repo, &commit);
             }
@@ -51,7 +61,7 @@ mod downloading {
             }
         };
 
-        genmc_download_path
+        (genmc_download_path, true)
     }
 
     fn get_remote(repo: &Repository) -> Remote<'_> {
@@ -71,7 +81,8 @@ mod downloading {
 
         // Update remote URL.
         println!(
-            "cargo::warning=GenMC repository remote URL has changed from '{remote_url:?}' to '{GENMC_GITHUB_URL}'"
+            "cargo::warning=GenMC repository remote URL has changed from '{}' to '{GENMC_GITHUB_URL}'",
+            remote_url.unwrap_or_default()
         );
         repo.remote_set_url("origin", GENMC_GITHUB_URL)
             .expect("cannot rename url of remote 'origin'");
@@ -175,19 +186,25 @@ fn link_to_llvm(config_file: &Path) -> (String, String) {
 }
 
 /// Build the GenMC model checker library and the Rust-C++ interop library with cxx.rs
-fn compile_cpp_dependencies(genmc_path: &Path) {
+fn compile_cpp_dependencies(genmc_path: &Path, always_configure: bool) {
+    // Give each step a separate build directory to prevent interference.
+    let out_dir = PathBuf::from(std::env::var("OUT_DIR").as_deref().unwrap());
+    let genmc_build_dir = out_dir.join("genmc");
+    let interface_build_dir = out_dir.join("miri_genmc");
+
     // Part 1:
     // Compile the GenMC library using cmake.
 
-    let cmakelists_path = genmc_path.join("CMakeLists.txt");
-
     // FIXME(genmc,cargo): Switch to using `CARGO_CFG_DEBUG_ASSERTIONS` once https://github.com/rust-lang/cargo/issues/15760 is completed.
     // Enable/disable additional debug checks, prints and options for GenMC, based on the Rust profile (debug/release)
     let enable_genmc_debug = matches!(std::env::var("PROFILE").as_deref().unwrap(), "debug");
 
-    let mut config = cmake::Config::new(cmakelists_path);
-    config.profile(GENMC_CMAKE_PROFILE);
-    config.define("GENMC_DEBUG", if enable_genmc_debug { "ON" } else { "OFF" });
+    let mut config = cmake::Config::new(genmc_path);
+    config
+        .always_configure(always_configure) // We force running the configure step when the GenMC commit changed.
+        .out_dir(genmc_build_dir)
+        .profile(GENMC_CMAKE_PROFILE)
+        .define("GENMC_DEBUG", if enable_genmc_debug { "ON" } else { "OFF" });
 
     // The actual compilation happens here:
     let genmc_install_dir = config.build();
@@ -210,6 +227,13 @@ fn compile_cpp_dependencies(genmc_path: &Path) {
     // These definitions are parsed into a cmake list and then printed to the config.h file, so they are ';' separated.
     let definitions = llvm_definitions.split(";");
 
+    let cpp_files = [
+        "./cpp/src/MiriInterface/EventHandling.cpp",
+        "./cpp/src/MiriInterface/Exploration.cpp",
+        "./cpp/src/MiriInterface/Setup.cpp",
+        "./cpp/src/MiriInterface/ThreadManagement.cpp",
+    ];
+
     let mut bridge = cxx_build::bridge("src/lib.rs");
     // FIXME(genmc,cmake): Remove once the GenMC debug setting is available in the config.h file.
     if enable_genmc_debug {
@@ -225,9 +249,9 @@ fn compile_cpp_dependencies(genmc_path: &Path) {
         .std("c++23")
         .include(genmc_include_dir)
         .include(llvm_include_dirs)
-        .include("./src_cpp")
-        .file("./src_cpp/MiriInterface.hpp")
-        .file("./src_cpp/MiriInterface.cpp")
+        .include("./cpp/include")
+        .files(&cpp_files)
+        .out_dir(interface_build_dir)
         .compile("genmc_interop");
 
     // Link the Rust-C++ interface library generated by cxx_build:
@@ -235,15 +259,9 @@ fn compile_cpp_dependencies(genmc_path: &Path) {
 }
 
 fn main() {
-    // Make sure we don't accidentally distribute a binary with GPL code.
-    if option_env!("RUSTC_STAGE").is_some() {
-        panic!(
-            "genmc should not be enabled in the rustc workspace since it includes a GPL dependency"
-        );
-    }
-
     // Select which path to use for the GenMC repo:
-    let genmc_path = if let Ok(genmc_src_path) = std::env::var("GENMC_SRC_PATH") {
+    let (genmc_path, always_configure) = if let Some(genmc_src_path) = option_env!("GENMC_SRC_PATH")
+    {
         let genmc_src_path =
             PathBuf::from_str(&genmc_src_path).expect("GENMC_SRC_PATH should contain a valid path");
         assert!(
@@ -251,13 +269,20 @@ fn main() {
             "GENMC_SRC_PATH={} does not exist!",
             genmc_src_path.display()
         );
-        genmc_src_path
+        // Rebuild files in the given path change.
+        println!("cargo::rerun-if-changed={}", genmc_src_path.display());
+        // We disable `always_configure` when working with a local repository,
+        // since it increases compile times when working on `genmc-sys`.
+        (genmc_src_path, false)
     } else {
+        // Download GenMC if required and ensure that the correct commit is checked out.
+        // If anything changed in the downloaded repository (e.g., the commit),
+        // we set `always_configure` to ensure there are no weird configs from previous builds.
         downloading::download_genmc()
     };
 
     // Build all required components:
-    compile_cpp_dependencies(&genmc_path);
+    compile_cpp_dependencies(&genmc_path, always_configure);
 
     // Only rebuild if anything changes:
     // Note that we don't add the downloaded GenMC repo, since that should never be modified
@@ -265,5 +290,5 @@ fn main() {
     // cloned (since cargo detects that as a file modification).
     println!("cargo::rerun-if-changed={RUST_CXX_BRIDGE_FILE_PATH}");
     println!("cargo::rerun-if-changed=./src");
-    println!("cargo::rerun-if-changed=./src_cpp");
+    println!("cargo::rerun-if-changed=./cpp");
 }
diff --git a/src/tools/miri/genmc-sys/cpp/include/MiriInterface.hpp b/src/tools/miri/genmc-sys/cpp/include/MiriInterface.hpp
new file mode 100644
index 0000000000000..444c9375319af
--- /dev/null
+++ b/src/tools/miri/genmc-sys/cpp/include/MiriInterface.hpp
@@ -0,0 +1,345 @@
+#ifndef GENMC_MIRI_INTERFACE_HPP
+#define GENMC_MIRI_INTERFACE_HPP
+
+// CXX.rs generated headers:
+#include "rust/cxx.h"
+
+// GenMC generated headers:
+#include "config.h"
+
+// Miri `genmc-sys/src_cpp` headers:
+#include "ResultHandling.hpp"
+
+// GenMC headers:
+#include "ExecutionGraph/EventLabel.hpp"
+#include "Static/ModuleID.hpp"
+#include "Support/MemOrdering.hpp"
+#include "Support/RMWOps.hpp"
+#include "Verification/Config.hpp"
+#include "Verification/GenMCDriver.hpp"
+
+// C++ headers:
+#include <cstdint>
+#include <format>
+#include <iomanip>
+#include <memory>
+
+/**** Types available to both Rust and C++ ****/
+
+struct GenmcParams;
+enum class LogLevel : std::uint8_t;
+
+struct GenmcScalar;
+struct SchedulingResult;
+struct EstimationResult;
+struct LoadResult;
+struct StoreResult;
+struct ReadModifyWriteResult;
+struct CompareExchangeResult;
+
+// GenMC uses `int` for its thread IDs.
+using ThreadId = int;
+
+/// Set the log level for GenMC.
+///
+/// # Safety
+///
+/// This function is not thread safe, since it writes to the global, mutable, non-atomic `logLevel`
+/// variable. Any GenMC function may read from `logLevel` unsynchronized.
+/// The safest way to use this function is to set the log level exactly once before first calling
+/// `create_handle`.
+/// Never calling this function is safe, GenMC will fall back to its default log level.
+/* unsafe */ void set_log_level_raw(LogLevel log_level);
+
+struct MiriGenmcShim : private GenMCDriver {
+
+  public:
+    MiriGenmcShim(std::shared_ptr<const Config> conf, Mode mode /* = VerificationMode{} */)
+        : GenMCDriver(std::move(conf), nullptr, mode) {}
+
+    /// Create a new `MiriGenmcShim`, which wraps a `GenMCDriver`.
+    ///
+    /// # Safety
+    ///
+    /// This function is marked as unsafe since the `logLevel` global variable is non-atomic.
+    /// This function should not be called in an unsynchronized way with `set_log_level_raw`,
+    /// since this function and any methods on the returned `MiriGenmcShim` may read the
+    /// `logLevel`, causing a data race. The safest way to use these functions is to call
+    /// `set_log_level_raw` once, and only then start creating handles. There should not be any
+    /// other (safe) way to create a `MiriGenmcShim`.
+    /* unsafe */ static auto create_handle(const GenmcParams& params, bool estimation_mode)
+        -> std::unique_ptr<MiriGenmcShim>;
+
+    virtual ~MiriGenmcShim() {}
+
+    /**** Execution start/end handling ****/
+
+    // This function must be called at the start of any execution, before any events are
+    // reported to GenMC.
+    void handle_execution_start();
+    // This function must be called at the end of any execution, even if an error was found
+    // during the execution.
+    // Returns `null`, or a string containing an error message if an error occured.
+    std::unique_ptr<std::string> handle_execution_end();
+
+    /***** Functions for handling events encountered during program execution. *****/
+
+    /**** Memory access handling ****/
+
+    [[nodiscard]] LoadResult handle_load(
+        ThreadId thread_id,
+        uint64_t address,
+        uint64_t size,
+        MemOrdering ord,
+        GenmcScalar old_val
+    );
+    [[nodiscard]] ReadModifyWriteResult handle_read_modify_write(
+        ThreadId thread_id,
+        uint64_t address,
+        uint64_t size,
+        RMWBinOp rmw_op,
+        MemOrdering ordering,
+        GenmcScalar rhs_value,
+        GenmcScalar old_val
+    );
+    [[nodiscard]] CompareExchangeResult handle_compare_exchange(
+        ThreadId thread_id,
+        uint64_t address,
+        uint64_t size,
+        GenmcScalar expected_value,
+        GenmcScalar new_value,
+        GenmcScalar old_val,
+        MemOrdering success_ordering,
+        MemOrdering fail_load_ordering,
+        bool can_fail_spuriously
+    );
+    [[nodiscard]] StoreResult handle_store(
+        ThreadId thread_id,
+        uint64_t address,
+        uint64_t size,
+        GenmcScalar value,
+        GenmcScalar old_val,
+        MemOrdering ord
+    );
+
+    void handle_fence(ThreadId thread_id, MemOrdering ord);
+
+    /**** Memory (de)allocation ****/
+    auto handle_malloc(ThreadId thread_id, uint64_t size, uint64_t alignment) -> uint64_t;
+    void handle_free(ThreadId thread_id, uint64_t address);
+
+    /**** Thread management ****/
+    void handle_thread_create(ThreadId thread_id, ThreadId parent_id);
+    void handle_thread_join(ThreadId thread_id, ThreadId child_id);
+    void handle_thread_finish(ThreadId thread_id, uint64_t ret_val);
+    void handle_thread_kill(ThreadId thread_id);
+
+    /***** Exploration related functionality *****/
+
+    /** Ask the GenMC scheduler for a new thread to schedule and return whether the execution is
+     * finished, blocked, or can continue.
+     * Updates the next instruction kind for the given thread id. */
+    auto schedule_next(const int curr_thread_id, const ActionKind curr_thread_next_instr_kind)
+        -> SchedulingResult;
+
+    /**
+     * Check whether there are more executions to explore.
+     * If there are more executions, this method prepares for the next execution and returns
+     * `true`. Returns true if there are no more executions to explore. */
+    auto is_exploration_done() -> bool {
+        return GenMCDriver::done();
+    }
+
+    /**** Result querying functionality. ****/
+
+    // NOTE: We don't want to share the `VerificationResult` type with the Rust side, since it
+    // is very large, uses features that CXX.rs doesn't support and may change as GenMC changes.
+    // Instead, we only use the result on the C++ side, and only expose these getter function to
+    // the Rust side.
+
+    // Note that CXX.rs doesn't support returning a C++ string to Rust by value,
+    // it must be behind an indirection like a `unique_ptr` (tested with CXX 1.0.170).
+
+    /// Get the number of blocked executions encountered by GenMC (cast into a fixed with
+    /// integer)
+    auto get_blocked_execution_count() const -> uint64_t {
+        return static_cast<uint64_t>(getResult().exploredBlocked);
+    }
+
+    /// Get the number of executions explored by GenMC (cast into a fixed with integer)
+    auto get_explored_execution_count() const -> uint64_t {
+        return static_cast<uint64_t>(getResult().explored);
+    }
+
+    /// Get all messages that GenMC produced (errors, warnings), combined into one string.
+    auto get_result_message() const -> std::unique_ptr<std::string> {
+        return std::make_unique<std::string>(getResult().message);
+    }
+
+    /// If an error occurred, return a string describing the error, otherwise, return `nullptr`.
+    auto get_error_string() const -> std::unique_ptr<std::string> {
+        const auto& result = GenMCDriver::getResult();
+        if (result.status.has_value())
+            return format_error(result.status.value());
+        return nullptr;
+    }
+
+    /**** Printing and estimation mode functionality. ****/
+
+    /// Get the results of a run in estimation mode.
+    auto get_estimation_results() const -> EstimationResult;
+
+  private:
+    /** Increment the event index in the given thread by 1 and return the new event. */
+    [[nodiscard]] inline auto inc_pos(ThreadId tid) -> Event {
+        ERROR_ON(tid >= threads_action_.size(), "ThreadId out of bounds");
+        return ++threads_action_[tid].event;
+    }
+    /** Decrement the event index in the given thread by 1 and return the new event. */
+    inline auto dec_pos(ThreadId tid) -> Event {
+        ERROR_ON(tid >= threads_action_.size(), "ThreadId out of bounds");
+        return --threads_action_[tid].event;
+    }
+
+    /**
+     * Helper function for loads that need to reset the event counter when no value is returned.
+     * Same syntax as `GenMCDriver::handleLoad`, but this takes a thread id instead of an Event.
+     * Automatically calls `inc_pos` and `dec_pos` where needed for the given thread.
+     */
+    template <EventLabel::EventLabelKind k, typename... Ts>
+    auto handle_load_reset_if_none(ThreadId tid, Ts&&... params) -> HandleResult<SVal> {
+        const auto pos = inc_pos(tid);
+        const auto ret = GenMCDriver::handleLoad<k>(pos, std::forward<Ts>(params)...);
+        // If we didn't get a value, we have to reset the index of the current thread.
+        if (!std::holds_alternative<SVal>(ret)) {
+            dec_pos(tid);
+        }
+        return ret;
+    }
+
+    /**
+     * GenMC uses the term `Action` to refer to a struct of:
+     * - `ActionKind`, storing whether the next instruction in a thread may be a load
+     * - `Event`, storing the most recent event index added for a thread
+     *
+     * Here we store the "action" for each thread. In particular we use this to assign event
+     * indices, since GenMC expects us to do that.
+     */
+    std::vector<Action> threads_action_;
+};
+
+/// Get the bit mask that GenMC expects for global memory allocations.
+/// FIXME(genmc): currently we use `get_global_alloc_static_mask()` to ensure the
+/// `SAddr::staticMask` constant is consistent between Miri and GenMC, but if
+/// https://github.com/dtolnay/cxx/issues/1051 is fixed we could share the constant
+///   directly.
+constexpr auto get_global_alloc_static_mask() -> uint64_t {
+    return SAddr::staticMask;
+}
+
+// CXX.rs generated headers:
+// NOTE: this must be included *after* `MiriGenmcShim` and all the other types we use are defined,
+// otherwise there will be compilation errors due to missing definitions.
+#include "genmc-sys/src/lib.rs.h"
+
+/**** Result handling ****/
+// NOTE: these must come after the cxx_bridge generated code, since that contains the actual
+// definitions of these types.
+
+namespace GenmcScalarExt {
+inline GenmcScalar uninit() {
+    return GenmcScalar {
+        .value = 0,
+        .is_init = false,
+    };
+}
+
+inline GenmcScalar from_sval(SVal sval) {
+    return GenmcScalar {
+        .value = sval.get(),
+        .is_init = true,
+    };
+}
+
+inline SVal to_sval(GenmcScalar scalar) {
+    ERROR_ON(!scalar.is_init, "Cannot convert an uninitialized `GenmcScalar` into an `SVal`\n");
+    return SVal(scalar.value);
+}
+} // namespace GenmcScalarExt
+
+namespace LoadResultExt {
+inline LoadResult no_value() {
+    return LoadResult {
+        .error = std::unique_ptr<std::string>(nullptr),
+        .has_value = false,
+        .read_value = GenmcScalarExt::uninit(),
+    };
+}
+
+inline LoadResult from_value(SVal read_value) {
+    return LoadResult { .error = std::unique_ptr<std::string>(nullptr),
+                        .has_value = true,
+                        .read_value = GenmcScalarExt::from_sval(read_value) };
+}
+
+inline LoadResult from_error(std::unique_ptr<std::string> error) {
+    return LoadResult { .error = std::move(error),
+                        .has_value = false,
+                        .read_value = GenmcScalarExt::uninit() };
+}
+} // namespace LoadResultExt
+
+namespace StoreResultExt {
+inline StoreResult ok(bool is_coherence_order_maximal_write) {
+    return StoreResult { /* error: */ std::unique_ptr<std::string>(nullptr),
+                         is_coherence_order_maximal_write };
+}
+
+inline StoreResult from_error(std::unique_ptr<std::string> error) {
+    return StoreResult { .error = std::move(error), .is_coherence_order_maximal_write = false };
+}
+} // namespace StoreResultExt
+
+namespace ReadModifyWriteResultExt {
+inline ReadModifyWriteResult
+ok(SVal old_value, SVal new_value, bool is_coherence_order_maximal_write) {
+    return ReadModifyWriteResult { .error = std::unique_ptr<std::string>(nullptr),
+                                   .old_value = GenmcScalarExt::from_sval(old_value),
+                                   .new_value = GenmcScalarExt::from_sval(new_value),
+                                   .is_coherence_order_maximal_write =
+                                       is_coherence_order_maximal_write };
+}
+
+inline ReadModifyWriteResult from_error(std::unique_ptr<std::string> error) {
+    return ReadModifyWriteResult { .error = std::move(error),
+                                   .old_value = GenmcScalarExt::uninit(),
+                                   .new_value = GenmcScalarExt::uninit(),
+                                   .is_coherence_order_maximal_write = false };
+}
+} // namespace ReadModifyWriteResultExt
+
+namespace CompareExchangeResultExt {
+inline CompareExchangeResult success(SVal old_value, bool is_coherence_order_maximal_write) {
+    return CompareExchangeResult { .error = nullptr,
+                                   .old_value = GenmcScalarExt::from_sval(old_value),
+                                   .is_success = true,
+                                   .is_coherence_order_maximal_write =
+                                       is_coherence_order_maximal_write };
+}
+
+inline CompareExchangeResult failure(SVal old_value) {
+    return CompareExchangeResult { .error = nullptr,
+                                   .old_value = GenmcScalarExt::from_sval(old_value),
+                                   .is_success = false,
+                                   .is_coherence_order_maximal_write = false };
+}
+
+inline CompareExchangeResult from_error(std::unique_ptr<std::string> error) {
+    return CompareExchangeResult { .error = std::move(error),
+                                   .old_value = GenmcScalarExt::uninit(),
+                                   .is_success = false,
+                                   .is_coherence_order_maximal_write = false };
+}
+} // namespace CompareExchangeResultExt
+
+#endif /* GENMC_MIRI_INTERFACE_HPP */
diff --git a/src/tools/miri/genmc-sys/cpp/include/ResultHandling.hpp b/src/tools/miri/genmc-sys/cpp/include/ResultHandling.hpp
new file mode 100644
index 0000000000000..189f32e6f513d
--- /dev/null
+++ b/src/tools/miri/genmc-sys/cpp/include/ResultHandling.hpp
@@ -0,0 +1,21 @@
+#ifndef GENMC_RESULT_HANDLING_HPP
+#define GENMC_RESULT_HANDLING_HPP
+
+// CXX.rs generated headers:
+#include "rust/cxx.h"
+
+// GenMC headers:
+#include "Verification/VerificationError.hpp"
+
+#include <string>
+
+/** Information about an error, formatted as a string to avoid having to share an error enum and
+ * printing functionality with the Rust side. */
+static auto format_error(VerificationError err) -> std::unique_ptr<std::string> {
+    auto buf = std::string();
+    auto s = llvm::raw_string_ostream(buf);
+    s << err;
+    return std::make_unique<std::string>(s.str());
+}
+
+#endif /* GENMC_RESULT_HANDLING_HPP */
diff --git a/src/tools/miri/genmc-sys/cpp/src/MiriInterface/EventHandling.cpp b/src/tools/miri/genmc-sys/cpp/src/MiriInterface/EventHandling.cpp
new file mode 100644
index 0000000000000..05c82641df948
--- /dev/null
+++ b/src/tools/miri/genmc-sys/cpp/src/MiriInterface/EventHandling.cpp
@@ -0,0 +1,251 @@
+/** This file contains functionality related to handling events encountered
+ * during an execution, such as loads, stores or memory (de)allocation. */
+
+#include "MiriInterface.hpp"
+
+// CXX.rs generated headers:
+#include "genmc-sys/src/lib.rs.h"
+
+// GenMC headers:
+#include "ADT/value_ptr.hpp"
+#include "ExecutionGraph/EventLabel.hpp"
+#include "ExecutionGraph/LoadAnnotation.hpp"
+#include "Runtime/InterpreterEnumAPI.hpp"
+#include "Static/ModuleID.hpp"
+#include "Support/ASize.hpp"
+#include "Support/Error.hpp"
+#include "Support/Logger.hpp"
+#include "Support/MemAccess.hpp"
+#include "Support/RMWOps.hpp"
+#include "Support/SAddr.hpp"
+#include "Support/SVal.hpp"
+#include "Support/ThreadInfo.hpp"
+#include "Support/Verbosity.hpp"
+#include "Verification/GenMCDriver.hpp"
+#include "Verification/MemoryModel.hpp"
+
+// C++ headers:
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+/**** Memory access handling ****/
+
+[[nodiscard]] auto MiriGenmcShim::handle_load(
+    ThreadId thread_id,
+    uint64_t address,
+    uint64_t size,
+    MemOrdering ord,
+    GenmcScalar old_val
+) -> LoadResult {
+    // `type` is only used for printing.
+    const auto type = AType::Unsigned;
+    const auto ret = handle_load_reset_if_none<EventLabel::EventLabelKind::Read>(
+        thread_id,
+        ord,
+        SAddr(address),
+        ASize(size),
+        type
+    );
+
+    if (const auto* err = std::get_if<VerificationError>(&ret))
+        return LoadResultExt::from_error(format_error(*err));
+    const auto* ret_val = std::get_if<SVal>(&ret);
+    if (ret_val == nullptr)
+        ERROR("Unimplemented: load returned unexpected result.");
+    return LoadResultExt::from_value(*ret_val);
+}
+
+[[nodiscard]] auto MiriGenmcShim::handle_store(
+    ThreadId thread_id,
+    uint64_t address,
+    uint64_t size,
+    GenmcScalar value,
+    GenmcScalar old_val,
+    MemOrdering ord
+) -> StoreResult {
+    const auto pos = inc_pos(thread_id);
+    const auto ret = GenMCDriver::handleStore<EventLabel::EventLabelKind::Write>(
+        pos,
+        ord,
+        SAddr(address),
+        ASize(size),
+        /* type */ AType::Unsigned, // `type` is only used for printing.
+        GenmcScalarExt::to_sval(value),
+        EventDeps()
+    );
+
+    if (const auto* err = std::get_if<VerificationError>(&ret))
+        return StoreResultExt::from_error(format_error(*err));
+    if (!std::holds_alternative<std::monostate>(ret))
+        ERROR("store returned unexpected result");
+
+    // FIXME(genmc,mixed-accesses): Use the value that GenMC returns from handleStore (once
+    // available).
+    const auto& g = getExec().getGraph();
+    return StoreResultExt::ok(
+        /* is_coherence_order_maximal_write */ g.co_max(SAddr(address))->getPos() == pos
+    );
+}
+
+void MiriGenmcShim::handle_fence(ThreadId thread_id, MemOrdering ord) {
+    const auto pos = inc_pos(thread_id);
+    GenMCDriver::handleFence(pos, ord, EventDeps());
+}
+
+[[nodiscard]] auto MiriGenmcShim::handle_read_modify_write(
+    ThreadId thread_id,
+    uint64_t address,
+    uint64_t size,
+    RMWBinOp rmw_op,
+    MemOrdering ordering,
+    GenmcScalar rhs_value,
+    GenmcScalar old_val
+) -> ReadModifyWriteResult {
+    // NOTE: Both the store and load events should get the same `ordering`, it should not be split
+    // into a load and a store component. This means we can have for example `AcqRel` loads and
+    // stores, but this is intended for RMW operations.
+
+    // Somewhat confusingly, the GenMC term for RMW read/write labels is
+    // `FaiRead` and `FaiWrite`.
+    const auto load_ret = handle_load_reset_if_none<EventLabel::EventLabelKind::FaiRead>(
+        thread_id,
+        ordering,
+        SAddr(address),
+        ASize(size),
+        AType::Unsigned, // The type is only used for printing.
+        rmw_op,
+        GenmcScalarExt::to_sval(rhs_value),
+        EventDeps()
+    );
+    if (const auto* err = std::get_if<VerificationError>(&load_ret))
+        return ReadModifyWriteResultExt::from_error(format_error(*err));
+
+    const auto* ret_val = std::get_if<SVal>(&load_ret);
+    if (nullptr == ret_val) {
+        ERROR("Unimplemented: read-modify-write returned unexpected result.");
+    }
+    const auto read_old_val = *ret_val;
+    const auto new_value =
+        executeRMWBinOp(read_old_val, GenmcScalarExt::to_sval(rhs_value), size, rmw_op);
+
+    const auto storePos = inc_pos(thread_id);
+    const auto store_ret = GenMCDriver::handleStore<EventLabel::EventLabelKind::FaiWrite>(
+        storePos,
+        ordering,
+        SAddr(address),
+        ASize(size),
+        AType::Unsigned, // The type is only used for printing.
+        new_value
+    );
+    if (const auto* err = std::get_if<VerificationError>(&store_ret))
+        return ReadModifyWriteResultExt::from_error(format_error(*err));
+
+    const auto* store_ret_val = std::get_if<std::monostate>(&store_ret);
+    ERROR_ON(nullptr == store_ret_val, "Unimplemented: RMW store returned unexpected result.");
+
+    // FIXME(genmc,mixed-accesses): Use the value that GenMC returns from handleStore (once
+    // available).
+    const auto& g = getExec().getGraph();
+    return ReadModifyWriteResultExt::ok(
+        /* old_value: */ read_old_val,
+        new_value,
+        /* is_coherence_order_maximal_write */ g.co_max(SAddr(address))->getPos() == storePos
+    );
+}
+
+[[nodiscard]] auto MiriGenmcShim::handle_compare_exchange(
+    ThreadId thread_id,
+    uint64_t address,
+    uint64_t size,
+    GenmcScalar expected_value,
+    GenmcScalar new_value,
+    GenmcScalar old_val,
+    MemOrdering success_ordering,
+    MemOrdering fail_load_ordering,
+    bool can_fail_spuriously
+) -> CompareExchangeResult {
+    // NOTE: Both the store and load events should get the same `ordering`, it should not be split
+    // into a load and a store component. This means we can have for example `AcqRel` loads and
+    // stores, but this is intended for CAS operations.
+
+    // FIXME(GenMC): properly handle failure memory ordering.
+
+    auto expectedVal = GenmcScalarExt::to_sval(expected_value);
+    auto new_val = GenmcScalarExt::to_sval(new_value);
+
+    const auto load_ret = handle_load_reset_if_none<EventLabel::EventLabelKind::CasRead>(
+        thread_id,
+        success_ordering,
+        SAddr(address),
+        ASize(size),
+        AType::Unsigned, // The type is only used for printing.
+        expectedVal,
+        new_val
+    );
+    if (const auto* err = std::get_if<VerificationError>(&load_ret))
+        return CompareExchangeResultExt::from_error(format_error(*err));
+    const auto* ret_val = std::get_if<SVal>(&load_ret);
+    ERROR_ON(nullptr == ret_val, "Unimplemented: load returned unexpected result.");
+    const auto read_old_val = *ret_val;
+    if (read_old_val != expectedVal)
+        return CompareExchangeResultExt::failure(read_old_val);
+
+    // FIXME(GenMC): Add support for modelling spurious failures.
+
+    const auto storePos = inc_pos(thread_id);
+    const auto store_ret = GenMCDriver::handleStore<EventLabel::EventLabelKind::CasWrite>(
+        storePos,
+        success_ordering,
+        SAddr(address),
+        ASize(size),
+        AType::Unsigned, // The type is only used for printing.
+        new_val
+    );
+    if (const auto* err = std::get_if<VerificationError>(&store_ret))
+        return CompareExchangeResultExt::from_error(format_error(*err));
+    const auto* store_ret_val = std::get_if<std::monostate>(&store_ret);
+    ERROR_ON(
+        nullptr == store_ret_val,
+        "Unimplemented: compare-exchange store returned unexpected result."
+    );
+
+    // FIXME(genmc,mixed-accesses): Use the value that GenMC returns from handleStore (once
+    // available).
+    const auto& g = getExec().getGraph();
+    return CompareExchangeResultExt::success(
+        read_old_val,
+        /* is_coherence_order_maximal_write */ g.co_max(SAddr(address))->getPos() == storePos
+    );
+}
+
+/**** Memory (de)allocation ****/
+
+auto MiriGenmcShim::handle_malloc(ThreadId thread_id, uint64_t size, uint64_t alignment)
+    -> uint64_t {
+    const auto pos = inc_pos(thread_id);
+
+    // These are only used for printing and features Miri-GenMC doesn't support (yet).
+    const auto storage_duration = StorageDuration::SD_Heap;
+    // Volatile, as opposed to "persistent" (i.e., non-volatile memory that persists over reboots)
+    const auto storage_type = StorageType::ST_Volatile;
+    const auto address_space = AddressSpace::AS_User;
+
+    const SVal ret_val = GenMCDriver::handleMalloc(
+        pos,
+        size,
+        alignment,
+        storage_duration,
+        storage_type,
+        address_space,
+        EventDeps()
+    );
+    return ret_val.get();
+}
+
+void MiriGenmcShim::handle_free(ThreadId thread_id, uint64_t address) {
+    const auto pos = inc_pos(thread_id);
+    GenMCDriver::handleFree(pos, SAddr(address), EventDeps());
+    // FIXME(genmc): add error handling once GenMC returns errors from `handleFree`
+}
diff --git a/src/tools/miri/genmc-sys/cpp/src/MiriInterface/Exploration.cpp b/src/tools/miri/genmc-sys/cpp/src/MiriInterface/Exploration.cpp
new file mode 100644
index 0000000000000..5e7188f17e0d2
--- /dev/null
+++ b/src/tools/miri/genmc-sys/cpp/src/MiriInterface/Exploration.cpp
@@ -0,0 +1,56 @@
+/** This file contains functionality related to exploration, such as scheduling.  */
+
+#include "MiriInterface.hpp"
+
+// CXX.rs generated headers:
+#include "genmc-sys/src/lib.rs.h"
+
+// GenMC headers:
+#include "Support/Error.hpp"
+#include "Support/Verbosity.hpp"
+
+// C++ headers:
+#include <cmath>
+#include <cstdint>
+#include <limits>
+
+auto MiriGenmcShim::schedule_next(
+    const int curr_thread_id,
+    const ActionKind curr_thread_next_instr_kind
+) -> SchedulingResult {
+    // The current thread is the only one where the `kind` could have changed since we last made
+    // a scheduling decision.
+    threads_action_[curr_thread_id].kind = curr_thread_next_instr_kind;
+
+    if (const auto result = GenMCDriver::scheduleNext(threads_action_))
+        return SchedulingResult { ExecutionState::Ok, static_cast<int32_t>(result.value()) };
+    if (GenMCDriver::isExecutionBlocked())
+        return SchedulingResult { ExecutionState::Blocked, 0 };
+    return SchedulingResult { ExecutionState::Finished, 0 };
+}
+
+/**** Execution start/end handling ****/
+
+void MiriGenmcShim::handle_execution_start() {
+    threads_action_.clear();
+    threads_action_.push_back(Action(ActionKind::Load, Event::getInit()));
+    GenMCDriver::handleExecutionStart();
+}
+
+auto MiriGenmcShim::handle_execution_end() -> std::unique_ptr<std::string> {
+    // FIXME(genmc): add error handling once GenMC returns an error here.
+    GenMCDriver::handleExecutionEnd();
+    return {};
+}
+
+/**** Estimation mode result ****/
+
+auto MiriGenmcShim::get_estimation_results() const -> EstimationResult {
+    const auto& res = getResult();
+    return EstimationResult {
+        .mean = static_cast<double>(res.estimationMean),
+        .sd = static_cast<double>(std::sqrt(res.estimationVariance)),
+        .explored_execs = static_cast<uint64_t>(res.explored),
+        .blocked_execs = static_cast<uint64_t>(res.exploredBlocked),
+    };
+}
diff --git a/src/tools/miri/genmc-sys/cpp/src/MiriInterface/Setup.cpp b/src/tools/miri/genmc-sys/cpp/src/MiriInterface/Setup.cpp
new file mode 100644
index 0000000000000..af13f0d07746e
--- /dev/null
+++ b/src/tools/miri/genmc-sys/cpp/src/MiriInterface/Setup.cpp
@@ -0,0 +1,197 @@
+/** This file contains functionality related to creation of the GenMCDriver,
+ * including translating settings set by Miri.  */
+
+#include "MiriInterface.hpp"
+
+// CXX.rs generated headers:
+#include "genmc-sys/src/lib.rs.h"
+
+// GenMC headers:
+#include "Support/Error.hpp"
+#include "Support/Verbosity.hpp"
+#include "Verification/InterpreterCallbacks.hpp"
+
+// C++ headers:
+#include <cstdint>
+
+/**
+ * Translate the Miri-GenMC `LogLevel` to the GenMC `VerbosityLevel`.
+ * Downgrade any debug options to `Tip` if `ENABLE_GENMC_DEBUG` is not enabled.
+ */
+static auto to_genmc_verbosity_level(const LogLevel log_level) -> VerbosityLevel {
+    switch (log_level) {
+        case LogLevel::Quiet:
+            return VerbosityLevel::Quiet;
+        case LogLevel::Error:
+            return VerbosityLevel::Error;
+        case LogLevel::Warning:
+            return VerbosityLevel::Warning;
+        case LogLevel::Tip:
+            return VerbosityLevel::Tip;
+#ifdef ENABLE_GENMC_DEBUG
+        case LogLevel::Debug1Revisits:
+            return VerbosityLevel::Debug1;
+        case LogLevel::Debug2MemoryAccesses:
+            return VerbosityLevel::Debug2;
+        case LogLevel::Debug3ReadsFrom:
+            return VerbosityLevel::Debug3;
+#else
+        // Downgrade to `Tip` if the debug levels are not available.
+        case LogLevel::Debug1Revisits:
+        case LogLevel::Debug2MemoryAccesses:
+        case LogLevel::Debug3ReadsFrom:
+            return VerbosityLevel::Tip;
+#endif
+        default:
+            WARN_ONCE(
+                "unknown-log-level",
+                "Unknown `LogLevel`, defaulting to `VerbosityLevel::Tip`."
+            );
+            return VerbosityLevel::Tip;
+    }
+}
+
+/* unsafe */ void set_log_level_raw(LogLevel log_level) {
+    // The `logLevel` is a static, non-atomic variable.
+    // It should never be changed if `MiriGenmcShim` still exists, since any of its methods may read
+    // the `logLevel`, otherwise it may cause data races.
+    logLevel = to_genmc_verbosity_level(log_level);
+}
+
+/* unsafe */ auto MiriGenmcShim::create_handle(const GenmcParams& params, bool estimation_mode)
+    -> std::unique_ptr<MiriGenmcShim> {
+    auto conf = std::make_shared<Config>();
+
+    // Set whether GenMC should print execution graphs after every explored/blocked execution.
+    conf->printExecGraphs =
+        (params.print_execution_graphs == ExecutiongraphPrinting::Explored ||
+         params.print_execution_graphs == ExecutiongraphPrinting::ExploredAndBlocked);
+    conf->printBlockedExecs =
+        (params.print_execution_graphs == ExecutiongraphPrinting::Blocked ||
+         params.print_execution_graphs == ExecutiongraphPrinting::ExploredAndBlocked);
+
+    // `1024` is the default value that GenMC uses.
+    // If any thread has at least this many events, a warning/tip will be printed.
+    //
+    // Miri produces a lot more events than GenMC, so the graph size warning triggers on almost
+    // all programs. The current value is large enough so the warning is not be triggered by any
+    // reasonable programs.
+    // FIXME(genmc): The emitted warning mentions features not supported by Miri ('--unroll'
+    // parameter).
+    // FIXME(genmc): A more appropriate limit should be chosen once the warning is useful for
+    // Miri.
+    conf->warnOnGraphSize = 1024 * 1024;
+
+    // We only support the `RC11` memory model for Rust, and `SC` when weak memory emulation is
+    // disabled.
+    conf->model = params.disable_weak_memory_emulation ? ModelType::SC : ModelType::RC11;
+
+    // This prints the seed that GenMC picks for randomized scheduling during estimation mode.
+    conf->printRandomScheduleSeed = params.print_random_schedule_seed;
+
+    // FIXME(genmc): supporting IPR requires annotations for `assume` and `spinloops`.
+    conf->ipr = false;
+    // FIXME(genmc): supporting BAM requires `Barrier` support + detecting whether return value
+    // of barriers are used.
+    conf->disableBAM = true;
+
+    // Instruction caching could help speed up verification by filling the graph from cache, if
+    // the list of values read by all load events in a thread have been seen before. Combined
+    // with not replaying completed threads, this can also reducing the amount of Mir
+    // interpretation required by Miri. With the current setup, this would be incorrect, since
+    // Miri doesn't give GenMC the actual values read by non-atomic reads.
+    conf->instructionCaching = false;
+    // Many of Miri's checks work under the assumption that threads are only executed in an
+    // order that could actually happen during a normal execution. Formally, this means that
+    // replaying an execution needs to respect the po-rf-relation of the executiongraph (po ==
+    // program-order, rf == reads-from). This means, any event in the graph, when replayed, must
+    // happen after any events that happen before it in the same graph according to the program
+    // code, and all (non-atomic) reads must happen after the write event they read from.
+    //
+    // Not replaying completed threads means any read event from that thread never happens in
+    // Miri's memory, so this would only work if there are never any non-atomic reads from any
+    // value written by the skipped thread.
+    conf->replayCompletedThreads = true;
+
+    // FIXME(genmc): implement symmetry reduction.
+    ERROR_ON(
+        params.do_symmetry_reduction,
+        "Symmetry reduction is currently unsupported in GenMC mode."
+    );
+    conf->symmetryReduction = params.do_symmetry_reduction;
+
+    // Set the scheduling policy. GenMC uses `WFR` for estimation mode.
+    // For normal verification, `WF` has the best performance and is the GenMC default.
+    // Other scheduling policies are used by GenMC for testing and for modes currently
+    // unsupported with Miri such as bounding, which uses LTR.
+    conf->schedulePolicy = estimation_mode ? SchedulePolicy::WFR : SchedulePolicy::WF;
+
+    // Set the min and max number of executions tested in estimation mode.
+    conf->estimationMin = 10; // default taken from GenMC
+    conf->estimationMax = params.estimation_max;
+    // Deviation threshold % under which estimation is deemed good enough.
+    conf->sdThreshold = 10; // default taken from GenMC
+    // Set the mode used for this driver, either estimation or verification.
+    const auto mode = estimation_mode ? GenMCDriver::Mode(GenMCDriver::EstimationMode {})
+                                      : GenMCDriver::Mode(GenMCDriver::VerificationMode {});
+
+    // Running Miri-GenMC without race detection is not supported.
+    // Disabling this option also changes the behavior of the replay scheduler to only schedule
+    // at atomic operations, which is required with Miri. This happens because Miri can generate
+    // multiple GenMC events for a single MIR terminator. Without this option, the scheduler
+    // might incorrectly schedule an atomic MIR terminator because the first event it creates is
+    // a non-atomic (e.g., `StorageLive`).
+    conf->disableRaceDetection = false;
+
+    // Miri can already check for unfreed memory. Also, GenMC cannot distinguish between memory
+    // that is allowed to leak and memory that is not.
+    conf->warnUnfreedMemory = false;
+
+    // FIXME(genmc,error handling): This function currently exits on error, but will return an
+    // error value in the future. The return value should be checked once this change is made.
+    checkConfig(*conf);
+
+    // Create the actual driver and Miri-GenMC communication shim.
+    auto driver = std::make_unique<MiriGenmcShim>(std::move(conf), mode);
+
+    // FIXME(genmc,HACK): Until a proper solution is implemented in GenMC, these callbacks will
+    // allow Miri to return information about global allocations and override uninitialized memory
+    // checks for non-atomic loads (Miri handles those without GenMC, so the error would be wrong).
+    auto interpreter_callbacks = InterpreterCallbacks {
+        // Miri already ensures that memory accesses are valid, so this check doesn't matter.
+        // We check that the address is static, but skip checking if it is part of an actual
+        // allocation.
+        .isStaticallyAllocated = [](SAddr addr) { return addr.isStatic(); },
+        // FIXME(genmc,error reporting): Once a proper a proper API for passing such information is
+        // implemented in GenMC, Miri should use it to improve the produced error messages.
+        .getStaticName = [](SAddr addr) { return "[UNKNOWN STATIC]"; },
+        // This function is called to get the initial value stored at the given address.
+        //
+        // From a Miri perspective, this API doesn't work very well: most memory starts out
+        // "uninitialized";
+        // only statics have an initial value. And their initial value is just a sequence of bytes,
+        // but GenMC
+        // expect this to be already split into separate atomic variables. So we return a dummy
+        // value.
+        // This value should never be visible to the interpreted program.
+        // GenMC does not understand uninitialized memory the same way Miri does, which may cause
+        // this function to be called. The returned value can be visible to Miri or the user:
+        // - Printing the execution graph may contain this value in place of uninitialized values.
+        //   FIXME(genmc): NOTE: printing the execution graph is not yet implemented.
+        // - Non-atomic loads may return this value, but Miri ignores values of non-atomic loads.
+        // - Atomic loads will *not* see this value once mixed atomic-non-atomic support is added.
+        //   Currently, atomic loads can see this value, unless initialized by an *atomic* store.
+        //   FIXME(genmc): update this comment once mixed atomic-non-atomic support is added.
+        //
+        // FIXME(genmc): implement proper support for uninitialized memory in GenMC. Ideally, the
+        // initial value getter would return an `optional<SVal>`, since the memory location may be
+        // uninitialized.
+        .initValGetter = [](const AAccess& a) { return SVal(0xDEAD); },
+        // Miri serves non-atomic loads from its own memory and these GenMC checks are wrong in
+        // that case. This should no longer be required with proper mixed-size access support.
+        .skipUninitLoadChecks = [](MemOrdering ord) { return ord == MemOrdering::NotAtomic; },
+    };
+    driver->setInterpCallbacks(std::move(interpreter_callbacks));
+
+    return driver;
+}
diff --git a/src/tools/miri/genmc-sys/cpp/src/MiriInterface/ThreadManagement.cpp b/src/tools/miri/genmc-sys/cpp/src/MiriInterface/ThreadManagement.cpp
new file mode 100644
index 0000000000000..352d27adc3e8b
--- /dev/null
+++ b/src/tools/miri/genmc-sys/cpp/src/MiriInterface/ThreadManagement.cpp
@@ -0,0 +1,54 @@
+
+/** This file contains functionality related thread management (creation, finishing, join, etc.)  */
+
+#include "MiriInterface.hpp"
+
+// CXX.rs generated headers:
+#include "genmc-sys/src/lib.rs.h"
+
+// GenMC headers:
+#include "Support/Error.hpp"
+#include "Support/Verbosity.hpp"
+
+// C++ headers:
+#include <cstdint>
+
+void MiriGenmcShim::handle_thread_create(ThreadId thread_id, ThreadId parent_id) {
+    // NOTE: The threadCreate event happens in the parent:
+    const auto pos = inc_pos(parent_id);
+    // FIXME(genmc): for supporting symmetry reduction, these will need to be properly set:
+    const unsigned fun_id = 0;
+    const SVal arg = SVal(0);
+    const ThreadInfo child_info = ThreadInfo { thread_id, parent_id, fun_id, arg };
+
+    // NOTE: Default memory ordering (`Release`) used here.
+    const auto child_tid = GenMCDriver::handleThreadCreate(pos, child_info, EventDeps());
+    // Sanity check the thread id, which is the index in the `threads_action_` array.
+    BUG_ON(child_tid != thread_id || child_tid <= 0 || child_tid != threads_action_.size());
+    threads_action_.push_back(Action(ActionKind::Load, Event(child_tid, 0)));
+}
+
+void MiriGenmcShim::handle_thread_join(ThreadId thread_id, ThreadId child_id) {
+    // The thread join event happens in the parent.
+    const auto pos = inc_pos(thread_id);
+
+    // NOTE: Default memory ordering (`Acquire`) used here.
+    const auto ret = GenMCDriver::handleThreadJoin(pos, child_id, EventDeps());
+    // If the join failed, decrease the event index again:
+    if (!std::holds_alternative<SVal>(ret)) {
+        dec_pos(thread_id);
+    }
+
+    // NOTE: Thread return value is ignored, since Miri doesn't need it.
+}
+
+void MiriGenmcShim::handle_thread_finish(ThreadId thread_id, uint64_t ret_val) {
+    const auto pos = inc_pos(thread_id);
+    // NOTE: Default memory ordering (`Release`) used here.
+    GenMCDriver::handleThreadFinish(pos, SVal(ret_val));
+}
+
+void MiriGenmcShim::handle_thread_kill(ThreadId thread_id) {
+    const auto pos = inc_pos(thread_id);
+    GenMCDriver::handleThreadKill(pos);
+}
diff --git a/src/tools/miri/genmc-sys/src/lib.rs b/src/tools/miri/genmc-sys/src/lib.rs
index ab46d729ea167..733b3d780b187 100644
--- a/src/tools/miri/genmc-sys/src/lib.rs
+++ b/src/tools/miri/genmc-sys/src/lib.rs
@@ -1,30 +1,456 @@
+use std::str::FromStr;
+use std::sync::OnceLock;
+
+pub use cxx::UniquePtr;
+
 pub use self::ffi::*;
 
+/// Defined in "genmc/src/Support/SAddr.hpp".
+/// The first bit of all global addresses must be set to `1`.
+/// This means the mask, interpreted as an address, is the lower bound of where the global address space starts.
+///
+/// FIXME(genmc): rework this if non-64bit support is added to GenMC (the current allocation scheme only allows for 64bit addresses).
+/// FIXME(genmc): currently we use `get_global_alloc_static_mask()` to ensure the constant is consistent between Miri and GenMC,
+///   but if https://github.com/dtolnay/cxx/issues/1051 is fixed we could share the constant directly.
+pub const GENMC_GLOBAL_ADDRESSES_MASK: u64 = 1 << 63;
+
+/// GenMC thread ids are C++ type `int`, which is equivalent to Rust's `i32` on most platforms.
+/// The main thread always has thread id 0.
+pub const GENMC_MAIN_THREAD_ID: i32 = 0;
+
+/// Changing GenMC's log level is not thread safe, so we limit it to only be set once to prevent any data races.
+/// This value will be initialized when the first `MiriGenmcShim` is created.
+static GENMC_LOG_LEVEL: OnceLock<LogLevel> = OnceLock::new();
+
+// Create a new handle to the GenMC model checker.
+// The first call to this function determines the log level of GenMC, any future call with a different log level will panic.
+pub fn create_genmc_driver_handle(
+    params: &GenmcParams,
+    genmc_log_level: LogLevel,
+    do_estimation: bool,
+) -> UniquePtr<MiriGenmcShim> {
+    // SAFETY: Only setting the GenMC log level once is guaranteed by the `OnceLock`.
+    // No other place calls `set_log_level_raw`, so the `logLevel` value in GenMC will not change once we initialize it once.
+    // All functions that use GenMC's `logLevel` can only be accessed in safe Rust through a `MiriGenmcShim`.
+    // There is no way to get `MiriGenmcShim` other than through `create_handle`, and we only call it *after* setting the log level, preventing any possible data races.
+    assert_eq!(
+        &genmc_log_level,
+        GENMC_LOG_LEVEL.get_or_init(|| {
+            unsafe { set_log_level_raw(genmc_log_level) };
+            genmc_log_level
+        }),
+        "Attempt to change the GenMC log level after it was already set"
+    );
+    unsafe { MiriGenmcShim::create_handle(params, do_estimation) }
+}
+
+impl GenmcScalar {
+    pub const UNINIT: Self = Self { value: 0, is_init: false };
+
+    pub const fn from_u64(value: u64) -> Self {
+        Self { value, is_init: true }
+    }
+}
+
 impl Default for GenmcParams {
     fn default() -> Self {
         Self {
+            estimation_max: 1000, // default taken from GenMC
             print_random_schedule_seed: false,
             do_symmetry_reduction: false,
-            // FIXME(GenMC): Add defaults for remaining parameters
+            // GenMC graphs can be quite large since Miri produces a lot of (non-atomic) events.
+            print_execution_graphs: ExecutiongraphPrinting::None,
+            disable_weak_memory_emulation: false,
         }
     }
 }
 
+impl Default for LogLevel {
+    fn default() -> Self {
+        // FIXME(genmc): set `Tip` by default once the GenMC tips are relevant to Miri.
+        Self::Warning
+    }
+}
+
+impl FromStr for SchedulePolicy {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "wf" => SchedulePolicy::WF,
+            "wfr" => SchedulePolicy::WFR,
+            "arbitrary" | "random" => SchedulePolicy::Arbitrary,
+            "ltr" => SchedulePolicy::LTR,
+            _ => return Err("invalid scheduling policy"),
+        })
+    }
+}
+
+impl FromStr for LogLevel {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "quiet" => LogLevel::Quiet,
+            "error" => LogLevel::Error,
+            "warning" => LogLevel::Warning,
+            "tip" => LogLevel::Tip,
+            "debug1" => LogLevel::Debug1Revisits,
+            "debug2" => LogLevel::Debug2MemoryAccesses,
+            "debug3" => LogLevel::Debug3ReadsFrom,
+            _ => return Err("invalid log level"),
+        })
+    }
+}
+
 #[cxx::bridge]
 mod ffi {
+    /**** Types shared between Miri/Rust and Miri/C++ through cxx_bridge: ****/
+
     /// Parameters that will be given to GenMC for setting up the model checker.
-    /// (The fields of this struct are visible to both Rust and C++)
+    /// The fields of this struct are visible to both Rust and C++.
+    /// Note that this struct is #[repr(C)], so the order of fields matters.
     #[derive(Clone, Debug)]
     struct GenmcParams {
+        /// Maximum number of executions explored in estimation mode.
+        pub estimation_max: u32,
         pub print_random_schedule_seed: bool,
         pub do_symmetry_reduction: bool,
-        // FIXME(GenMC): Add remaining parameters.
+        pub print_execution_graphs: ExecutiongraphPrinting,
+        /// Enabling this will set the memory model used by GenMC to "Sequential Consistency" (SC).
+        /// This will disable any weak memory effects, which reduces the number of program executions that will be explored.
+        pub disable_weak_memory_emulation: bool,
+    }
+
+    /// This is mostly equivalent to GenMC `VerbosityLevel`, but the debug log levels are always present (not conditionally compiled based on `ENABLE_GENMC_DEBUG`).
+    /// We add this intermediate type to prevent changes to the GenMC log-level from breaking the Miri
+    /// build, and to have a stable type for the C++-Rust interface, independent of `ENABLE_GENMC_DEBUG`.
+    #[derive(Debug)]
+    enum LogLevel {
+        /// Disable *all* logging (including error messages on a crash).
+        Quiet,
+        /// Log errors.
+        Error,
+        /// Log errors and warnings.
+        Warning,
+        /// Log errors, warnings and tips.
+        Tip,
+        /// Debug print considered revisits.
+        /// Downgraded to `Tip` if `GENMC_DEBUG` is not enabled.
+        Debug1Revisits,
+        /// Print the execution graph after every memory access.
+        /// Also includes the previous debug log level.
+        /// Downgraded to `Tip` if `GENMC_DEBUG` is not enabled.
+        Debug2MemoryAccesses,
+        /// Print reads-from values considered by GenMC.
+        /// Also includes the previous debug log level.
+        /// Downgraded to `Tip` if `GENMC_DEBUG` is not enabled.
+        Debug3ReadsFrom,
     }
+
+    #[derive(Debug)]
+    /// Setting to control which execution graphs GenMC prints after every execution.
+    enum ExecutiongraphPrinting {
+        /// Print no graphs.
+        None,
+        /// Print graphs of all fully explored executions.
+        Explored,
+        /// Print graphs of all blocked executions.
+        Blocked,
+        /// Print graphs of all executions.
+        ExploredAndBlocked,
+    }
+
+    /// This type corresponds to `Option<SVal>` (or `std::optional<SVal>`), where `SVal` is the type that GenMC uses for storing values.
+    /// CXX doesn't support `std::optional` currently, so we need to use an extra `bool` to define whether this value is initialized or not.
+    #[derive(Debug, Clone, Copy)]
+    struct GenmcScalar {
+        value: u64,
+        is_init: bool,
+    }
+
+    #[must_use]
+    #[derive(Debug, Clone, Copy)]
+    enum ExecutionState {
+        Ok,
+        Blocked,
+        Finished,
+    }
+
+    #[must_use]
+    #[derive(Debug)]
+    struct SchedulingResult {
+        exec_state: ExecutionState,
+        next_thread: i32,
+    }
+
+    #[must_use]
+    #[derive(Debug)]
+    struct EstimationResult {
+        /// Expected number of total executions.
+        mean: f64,
+        /// Standard deviation of the total executions estimate.
+        sd: f64,
+        /// Number of explored executions during the estimation.
+        explored_execs: u64,
+        /// Number of encounteded blocked executions during the estimation.
+        blocked_execs: u64,
+    }
+
+    #[must_use]
+    #[derive(Debug)]
+    struct LoadResult {
+        /// If not null, contains the error encountered during the handling of the load.
+        error: UniquePtr<CxxString>,
+        /// Indicates whether a value was read or not.
+        has_value: bool,
+        /// The value that was read. Should not be used if `has_value` is `false`.
+        read_value: GenmcScalar,
+    }
+
+    #[must_use]
+    #[derive(Debug)]
+    struct StoreResult {
+        /// If not null, contains the error encountered during the handling of the store.
+        error: UniquePtr<CxxString>,
+        /// `true` if the write should also be reflected in Miri's memory representation.
+        is_coherence_order_maximal_write: bool,
+    }
+
+    #[must_use]
+    #[derive(Debug)]
+    struct ReadModifyWriteResult {
+        /// If there was an error, it will be stored in `error`, otherwise it is `None`.
+        error: UniquePtr<CxxString>,
+        /// The value that was read by the RMW operation as the left operand.
+        old_value: GenmcScalar,
+        /// The value that was produced by the RMW operation.
+        new_value: GenmcScalar,
+        /// `true` if the write should also be reflected in Miri's memory representation.
+        is_coherence_order_maximal_write: bool,
+    }
+
+    #[must_use]
+    #[derive(Debug)]
+    struct CompareExchangeResult {
+        /// If there was an error, it will be stored in `error`, otherwise it is `None`.
+        error: UniquePtr<CxxString>,
+        /// The value that was read by the compare-exchange.
+        old_value: GenmcScalar,
+        /// `true` if compare_exchange op was successful.
+        is_success: bool,
+        /// `true` if the write should also be reflected in Miri's memory representation.
+        is_coherence_order_maximal_write: bool,
+    }
+
+    /**** These are GenMC types that we have to copy-paste here since cxx does not support
+    "importing" externally defined C++ types. ****/
+
+    #[derive(Clone, Copy, Debug)]
+    enum SchedulePolicy {
+        LTR,
+        WF,
+        WFR,
+        Arbitrary,
+    }
+
+    #[derive(Debug)]
+    /// Corresponds to GenMC's type with the same name.
+    /// Should only be modified if changed by GenMC.
+    enum ActionKind {
+        /// Any Mir terminator that's atomic and has load semantics.
+        Load,
+        /// Anything that's not a `Load`.
+        NonLoad,
+    }
+
+    #[derive(Debug)]
+    /// Corresponds to GenMC's type with the same name.
+    /// Should only be modified if changed by GenMC.
+    enum MemOrdering {
+        NotAtomic = 0,
+        Relaxed = 1,
+        // We skip 2 in case we support consume.
+        Acquire = 3,
+        Release = 4,
+        AcquireRelease = 5,
+        SequentiallyConsistent = 6,
+    }
+
+    #[derive(Debug)]
+    enum RMWBinOp {
+        Xchg = 0,
+        Add = 1,
+        Sub = 2,
+        And = 3,
+        Nand = 4,
+        Or = 5,
+        Xor = 6,
+        Max = 7,
+        Min = 8,
+        UMax = 9,
+        UMin = 10,
+    }
+
+    // # Safety
+    //
+    // This block is unsafe to allow defining safe methods inside.
+    //
+    // `get_global_alloc_static_mask` is safe since it just returns a constant.
+    // All methods on `MiriGenmcShim` are safe by the correct usage of the two unsafe functions
+    // `set_log_level_raw` and `MiriGenmcShim::create_handle`.
+    // See the doc comment on those two functions for their safety requirements.
     unsafe extern "C++" {
         include!("MiriInterface.hpp");
 
-        type MiriGenMCShim;
+        /**** Types shared between Miri/Rust and Miri/C++: ****/
+        type MiriGenmcShim;
+
+        /**** Types shared between Miri/Rust and GenMC/C++:
+        (This tells cxx that the enums defined above are already defined on the C++ side;
+        it will emit assertions to ensure that the two definitions agree.) ****/
+        type ActionKind;
+        type MemOrdering;
+        type RMWBinOp;
+        type SchedulePolicy;
+
+        /// Set the log level for GenMC.
+        ///
+        /// # Safety
+        ///
+        /// This function is not thread safe, since it writes to the global, mutable, non-atomic `logLevel` variable.
+        /// Any GenMC function may read from `logLevel` unsynchronized.
+        /// The safest way to use this function is to set the log level exactly once before first calling `create_handle`.
+        /// Never calling this function is safe, GenMC will fall back to its default log level.
+        unsafe fn set_log_level_raw(log_level: LogLevel);
+
+        /// Create a new `MiriGenmcShim`, which wraps a `GenMCDriver`.
+        ///
+        /// # Safety
+        ///
+        /// This function is marked as unsafe since the `logLevel` global variable is non-atomic.
+        /// This function should not be called in an unsynchronized way with `set_log_level_raw`, since
+        /// this function and any methods on the returned `MiriGenmcShim` may read the `logLevel`,
+        /// causing a data race.
+        /// The safest way to use these functions is to call `set_log_level_raw` once, and only then
+        /// start creating handles.
+        /// There should not be any other (safe) way to create a `MiriGenmcShim`.
+        #[Self = "MiriGenmcShim"]
+        unsafe fn create_handle(
+            params: &GenmcParams,
+            estimation_mode: bool,
+        ) -> UniquePtr<MiriGenmcShim>;
+        /// Get the bit mask that GenMC expects for global memory allocations.
+        fn get_global_alloc_static_mask() -> u64;
+
+        /// This function must be called at the start of any execution, before any events are reported to GenMC.
+        fn handle_execution_start(self: Pin<&mut MiriGenmcShim>);
+        /// This function must be called at the end of any execution, even if an error was found during the execution.
+        /// Returns `null`, or a string containing an error message if an error occured.
+        fn handle_execution_end(self: Pin<&mut MiriGenmcShim>) -> UniquePtr<CxxString>;
+
+        /***** Functions for handling events encountered during program execution. *****/
+
+        /**** Memory access handling ****/
+        fn handle_load(
+            self: Pin<&mut MiriGenmcShim>,
+            thread_id: i32,
+            address: u64,
+            size: u64,
+            memory_ordering: MemOrdering,
+            old_value: GenmcScalar,
+        ) -> LoadResult;
+        fn handle_read_modify_write(
+            self: Pin<&mut MiriGenmcShim>,
+            thread_id: i32,
+            address: u64,
+            size: u64,
+            rmw_op: RMWBinOp,
+            ordering: MemOrdering,
+            rhs_value: GenmcScalar,
+            old_value: GenmcScalar,
+        ) -> ReadModifyWriteResult;
+        fn handle_compare_exchange(
+            self: Pin<&mut MiriGenmcShim>,
+            thread_id: i32,
+            address: u64,
+            size: u64,
+            expected_value: GenmcScalar,
+            new_value: GenmcScalar,
+            old_value: GenmcScalar,
+            success_ordering: MemOrdering,
+            fail_load_ordering: MemOrdering,
+            can_fail_spuriously: bool,
+        ) -> CompareExchangeResult;
+        fn handle_store(
+            self: Pin<&mut MiriGenmcShim>,
+            thread_id: i32,
+            address: u64,
+            size: u64,
+            value: GenmcScalar,
+            old_value: GenmcScalar,
+            memory_ordering: MemOrdering,
+        ) -> StoreResult;
+        fn handle_fence(
+            self: Pin<&mut MiriGenmcShim>,
+            thread_id: i32,
+            memory_ordering: MemOrdering,
+        );
+
+        /**** Memory (de)allocation ****/
+        fn handle_malloc(
+            self: Pin<&mut MiriGenmcShim>,
+            thread_id: i32,
+            size: u64,
+            alignment: u64,
+        ) -> u64;
+        fn handle_free(self: Pin<&mut MiriGenmcShim>, thread_id: i32, address: u64);
+
+        /**** Thread management ****/
+        fn handle_thread_create(self: Pin<&mut MiriGenmcShim>, thread_id: i32, parent_id: i32);
+        fn handle_thread_join(self: Pin<&mut MiriGenmcShim>, thread_id: i32, child_id: i32);
+        fn handle_thread_finish(self: Pin<&mut MiriGenmcShim>, thread_id: i32, ret_val: u64);
+        fn handle_thread_kill(self: Pin<&mut MiriGenmcShim>, thread_id: i32);
+
+        /***** Exploration related functionality *****/
+
+        /// Ask the GenMC scheduler for a new thread to schedule and
+        /// return whether the execution is finished, blocked, or can continue.
+        /// Updates the next instruction kind for the given thread id.
+        fn schedule_next(
+            self: Pin<&mut MiriGenmcShim>,
+            curr_thread_id: i32,
+            curr_thread_next_instr_kind: ActionKind,
+        ) -> SchedulingResult;
+
+        /// Check whether there are more executions to explore.
+        /// If there are more executions, this method prepares for the next execution and returns `true`.
+        fn is_exploration_done(self: Pin<&mut MiriGenmcShim>) -> bool;
+
+        /**** Result querying functionality. ****/
+
+        // NOTE: We don't want to share the `VerificationResult` type with the Rust side, since it
+        // is very large, uses features that CXX.rs doesn't support and may change as GenMC changes.
+        // Instead, we only use the result on the C++ side, and only expose these getter function to
+        // the Rust side.
+        // Each `GenMCDriver` contains one `VerificationResult`, and each `MiriGenmcShim` contains on `GenMCDriver`.
+        // GenMC builds up the content of the `struct VerificationResult` over the course of an exploration,
+        // but it's safe to look at it at any point, since it is only accessible through exactly one `MiriGenmcShim`.
+        // All these functions for querying the result can be safely called repeatedly and at any time,
+        // though the results may be incomplete if called before `handle_execution_end`.
+
+        /// Get the number of blocked executions encountered by GenMC (cast into a fixed with integer)
+        fn get_blocked_execution_count(self: &MiriGenmcShim) -> u64;
+        /// Get the number of executions explored by GenMC (cast into a fixed with integer)
+        fn get_explored_execution_count(self: &MiriGenmcShim) -> u64;
+        /// Get all messages that GenMC produced (errors, warnings), combined into one string.
+        fn get_result_message(self: &MiriGenmcShim) -> UniquePtr<CxxString>;
+        /// If an error occurred, return a string describing the error, otherwise, return `nullptr`.
+        fn get_error_string(self: &MiriGenmcShim) -> UniquePtr<CxxString>;
+
+        /**** Printing functionality. ****/
 
-        fn createGenmcHandle(config: &GenmcParams) -> UniquePtr<MiriGenMCShim>;
+        /// Get the results of a run in estimation mode.
+        fn get_estimation_results(self: &MiriGenmcShim) -> EstimationResult;
     }
 }
diff --git a/src/tools/miri/genmc-sys/src_cpp/MiriInterface.cpp b/src/tools/miri/genmc-sys/src_cpp/MiriInterface.cpp
deleted file mode 100644
index 0827bb3d40746..0000000000000
--- a/src/tools/miri/genmc-sys/src_cpp/MiriInterface.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-#include "MiriInterface.hpp"
-
-#include "genmc-sys/src/lib.rs.h"
-
-auto MiriGenMCShim::createHandle(const GenmcParams &config)
-	-> std::unique_ptr<MiriGenMCShim>
-{
-	auto conf = std::make_shared<Config>();
-
-	// Miri needs all threads to be replayed, even fully completed ones.
-	conf->replayCompletedThreads = true;
-
-	// We only support the RC11 memory model for Rust.
-	conf->model = ModelType::RC11;
-
-	conf->printRandomScheduleSeed = config.print_random_schedule_seed;
-
-	// FIXME(genmc): disable any options we don't support currently:
-	conf->ipr = false;
-	conf->disableBAM = true;
-	conf->instructionCaching = false;
-
-	ERROR_ON(config.do_symmetry_reduction, "Symmetry reduction is currently unsupported in GenMC mode.");
-	conf->symmetryReduction = config.do_symmetry_reduction;
-
-	// FIXME(genmc): Should there be a way to change this option from Miri?
-	conf->schedulePolicy = SchedulePolicy::WF;
-
-	// FIXME(genmc): implement estimation mode:
-	conf->estimate = false;
-	conf->estimationMax = 1000;
-	const auto mode = conf->estimate ? GenMCDriver::Mode(GenMCDriver::EstimationMode{})
-									  : GenMCDriver::Mode(GenMCDriver::VerificationMode{});
-
-	// Running Miri-GenMC without race detection is not supported.
-	// Disabling this option also changes the behavior of the replay scheduler to only schedule at atomic operations, which is required with Miri.
-	// This happens because Miri can generate multiple GenMC events for a single MIR terminator. Without this option,
-	// the scheduler might incorrectly schedule an atomic MIR terminator because the first event it creates is a non-atomic (e.g., `StorageLive`).
-	conf->disableRaceDetection = false;
-
-	// Miri can already check for unfreed memory. Also, GenMC cannot distinguish between memory
-	// that is allowed to leak and memory that is not.
-	conf->warnUnfreedMemory = false;
-
-	// FIXME(genmc): check config:
-	// checkConfigOptions(*conf);
-
-	auto driver = std::make_unique<MiriGenMCShim>(std::move(conf), mode);
-	return driver;
-}
diff --git a/src/tools/miri/genmc-sys/src_cpp/MiriInterface.hpp b/src/tools/miri/genmc-sys/src_cpp/MiriInterface.hpp
deleted file mode 100644
index e55522ef41897..0000000000000
--- a/src/tools/miri/genmc-sys/src_cpp/MiriInterface.hpp
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef GENMC_MIRI_INTERFACE_HPP
-#define GENMC_MIRI_INTERFACE_HPP
-
-#include "rust/cxx.h"
-
-#include "config.h"
-
-#include "Config/Config.hpp"
-#include "Verification/GenMCDriver.hpp"
-
-#include <iostream>
-
-/**** Types available to Miri ****/
-
-// Config struct defined on the Rust side and translated to C++ by cxx.rs:
-struct GenmcParams;
-
-struct MiriGenMCShim : private GenMCDriver
-{
-
-public:
-	MiriGenMCShim(std::shared_ptr<const Config> conf, Mode mode /* = VerificationMode{} */)
-		: GenMCDriver(std::move(conf), nullptr, mode)
-	{
-		std::cerr << "C++: GenMC handle created!" << std::endl;
-	}
-
-	virtual ~MiriGenMCShim()
-	{
-		std::cerr << "C++: GenMC handle destroyed!" << std::endl;
-	}
-
-	static std::unique_ptr<MiriGenMCShim> createHandle(const GenmcParams &config);
-};
-
-/**** Functions available to Miri ****/
-
-// NOTE: CXX doesn't support exposing static methods to Rust currently, so we expose this function instead.
-static inline auto createGenmcHandle(const GenmcParams &config) -> std::unique_ptr<MiriGenMCShim>
-{
-	return MiriGenMCShim::createHandle(config);
-}
-
-#endif /* GENMC_MIRI_INTERFACE_HPP */
diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index d44488399f8b9..0eb16a943d6ab 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-51ff895062ba60a7cba53f57af928c3fb7b0f2f4
+3f1552a273e43e15f6ed240d00e1efdd6a53e65e
diff --git a/src/tools/miri/src/alloc_addresses/address_generator.rs b/src/tools/miri/src/alloc_addresses/address_generator.rs
new file mode 100644
index 0000000000000..3764f5dcb8244
--- /dev/null
+++ b/src/tools/miri/src/alloc_addresses/address_generator.rs
@@ -0,0 +1,78 @@
+use std::ops::Range;
+
+use rand::Rng;
+use rustc_abi::{Align, Size};
+use rustc_const_eval::interpret::{InterpResult, interp_ok};
+use rustc_middle::{err_exhaust, throw_exhaust};
+
+/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
+/// of `align` that is larger or equal to `addr`
+fn align_addr(addr: u64, align: u64) -> u64 {
+    match addr % align {
+        0 => addr,
+        rem => addr.strict_add(align) - rem,
+    }
+}
+
+/// This provides the logic to generate addresses for memory allocations in a given address range.
+#[derive(Debug)]
+pub struct AddressGenerator {
+    /// This is used as a memory address when a new pointer is casted to an integer. It
+    /// is always larger than any address that was previously made part of a block.
+    next_base_addr: u64,
+    /// This is the last address that can be allocated.
+    end: u64,
+}
+
+impl AddressGenerator {
+    pub fn new(addr_range: Range<u64>) -> Self {
+        Self { next_base_addr: addr_range.start, end: addr_range.end }
+    }
+
+    /// Get the remaining range where this `AddressGenerator` can still allocate addresses.
+    pub fn get_remaining(&self) -> Range<u64> {
+        self.next_base_addr..self.end
+    }
+
+    /// Generate a new address with the specified size and alignment, using the given Rng to add some randomness.
+    /// The returned allocation is guaranteed not to overlap with any address ranges given out by the generator before.
+    /// Returns an error if the allocation request cannot be fulfilled.
+    pub fn generate<'tcx, R: Rng>(
+        &mut self,
+        size: Size,
+        align: Align,
+        rng: &mut R,
+    ) -> InterpResult<'tcx, u64> {
+        // Leave some space to the previous allocation, to give it some chance to be less aligned.
+        // We ensure that `(self.next_base_addr + slack) % 16` is uniformly distributed.
+        let slack = rng.random_range(0..16);
+        // From next_base_addr + slack, round up to adjust for alignment.
+        let base_addr =
+            self.next_base_addr.checked_add(slack).ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
+        let base_addr = align_addr(base_addr, align.bytes());
+
+        // Remember next base address.  If this allocation is zero-sized, leave a gap of at
+        // least 1 to avoid two allocations having the same base address. (The logic in
+        // `alloc_id_from_addr` assumes unique addresses, and different function/vtable pointers
+        // need to be distinguishable!)
+        self.next_base_addr = base_addr
+            .checked_add(size.bytes().max(1))
+            .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
+        // Even if `Size` didn't overflow, we might still have filled up the address space.
+        if self.next_base_addr > self.end {
+            throw_exhaust!(AddressSpaceFull);
+        }
+        interp_ok(base_addr)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_align_addr() {
+        assert_eq!(align_addr(37, 4), 40);
+        assert_eq!(align_addr(44, 4), 44);
+    }
+}
diff --git a/src/tools/miri/src/alloc_addresses/mod.rs b/src/tools/miri/src/alloc_addresses/mod.rs
index 334503d2994b1..f011ee717850d 100644
--- a/src/tools/miri/src/alloc_addresses/mod.rs
+++ b/src/tools/miri/src/alloc_addresses/mod.rs
@@ -1,15 +1,16 @@
 //! This module is responsible for managing the absolute addresses that allocations are located at,
 //! and for casting between pointers and integers based on those addresses.
 
+mod address_generator;
 mod reuse_pool;
 
 use std::cell::RefCell;
-use std::cmp::max;
 
-use rand::Rng;
 use rustc_abi::{Align, Size};
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
+use rustc_middle::ty::TyCtxt;
 
+pub use self::address_generator::AddressGenerator;
 use self::reuse_pool::ReusePool;
 use crate::concurrency::VClock;
 use crate::*;
@@ -33,6 +34,8 @@ pub struct GlobalStateInner {
     /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
     /// from the base address, and we need to find the `AllocId` it belongs to. This is not the
     /// *full* inverse of `base_addr`; dead allocations have been removed.
+    /// Note that in GenMC mode, dead allocations are *not* removed -- and also, addresses are never
+    /// reused. This lets us use the address as a cross-execution-stable identifier for an allocation.
     int_to_ptr_map: Vec<(u64, AllocId)>,
     /// The base address for each allocation.  We cannot put that into
     /// `AllocExtra` because function pointers also have a base address, and
@@ -49,9 +52,8 @@ pub struct GlobalStateInner {
     /// Whether an allocation has been exposed or not. This cannot be put
     /// into `AllocExtra` for the same reason as `base_addr`.
     exposed: FxHashSet<AllocId>,
-    /// This is used as a memory address when a new pointer is casted to an integer. It
-    /// is always larger than any address that was previously made part of a block.
-    next_base_addr: u64,
+    /// The generator for new addresses in a given range.
+    address_generator: AddressGenerator,
     /// The provenance to use for int2ptr casts
     provenance_mode: ProvenanceMode,
 }
@@ -64,7 +66,7 @@ impl VisitProvenance for GlobalStateInner {
             prepared_alloc_bytes: _,
             reuse: _,
             exposed: _,
-            next_base_addr: _,
+            address_generator: _,
             provenance_mode: _,
         } = self;
         // Though base_addr, int_to_ptr_map, and exposed contain AllocIds, we do not want to visit them.
@@ -77,14 +79,14 @@ impl VisitProvenance for GlobalStateInner {
 }
 
 impl GlobalStateInner {
-    pub fn new(config: &MiriConfig, stack_addr: u64) -> Self {
+    pub fn new<'tcx>(config: &MiriConfig, stack_addr: u64, tcx: TyCtxt<'tcx>) -> Self {
         GlobalStateInner {
             int_to_ptr_map: Vec::default(),
             base_addr: FxHashMap::default(),
             prepared_alloc_bytes: FxHashMap::default(),
             reuse: ReusePool::new(config),
             exposed: FxHashSet::default(),
-            next_base_addr: stack_addr,
+            address_generator: AddressGenerator::new(stack_addr..tcx.target_usize_max()),
             provenance_mode: config.provenance_mode,
         }
     }
@@ -96,15 +98,6 @@ impl GlobalStateInner {
     }
 }
 
-/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
-/// of `align` that is larger or equal to `addr`
-fn align_addr(addr: u64, align: u64) -> u64 {
-    match addr % align {
-        0 => addr,
-        rem => addr.strict_add(align) - rem,
-    }
-}
-
 impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
 trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
     fn addr_from_alloc_id_uncached(
@@ -132,7 +125,8 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
         // Miri's address assignment leaks state across thread boundaries, which is incompatible
         // with GenMC execution. So we instead let GenMC assign addresses to allocations.
         if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
-            let addr = genmc_ctx.handle_alloc(&this.machine, info.size, info.align, memory_kind)?;
+            let addr =
+                genmc_ctx.handle_alloc(this, alloc_id, info.size, info.align, memory_kind)?;
             return interp_ok(addr);
         }
 
@@ -189,39 +183,22 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
             this.active_thread(),
         ) {
             if let Some(clock) = clock {
-                this.acquire_clock(&clock);
+                this.acquire_clock(&clock)?;
             }
             interp_ok(reuse_addr)
         } else {
             // We have to pick a fresh address.
-            // Leave some space to the previous allocation, to give it some chance to be less aligned.
-            // We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
-            let slack = rng.random_range(0..16);
-            // From next_base_addr + slack, round up to adjust for alignment.
-            let base_addr = global_state
-                .next_base_addr
-                .checked_add(slack)
-                .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
-            let base_addr = align_addr(base_addr, info.align.bytes());
-
-            // Remember next base address.  If this allocation is zero-sized, leave a gap of at
-            // least 1 to avoid two allocations having the same base address. (The logic in
-            // `alloc_id_from_addr` assumes unique addresses, and different function/vtable pointers
-            // need to be distinguishable!)
-            global_state.next_base_addr = base_addr
-                .checked_add(max(info.size.bytes(), 1))
-                .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
-            // Even if `Size` didn't overflow, we might still have filled up the address space.
-            if global_state.next_base_addr > this.target_usize_max() {
-                throw_exhaust!(AddressSpaceFull);
-            }
+            let new_addr =
+                global_state.address_generator.generate(info.size, info.align, &mut rng)?;
+
             // If we filled up more than half the address space, start aggressively reusing
             // addresses to avoid running out.
-            if global_state.next_base_addr > u64::try_from(this.target_isize_max()).unwrap() {
+            let remaining_range = global_state.address_generator.get_remaining();
+            if remaining_range.start > remaining_range.end / 2 {
                 global_state.reuse.address_space_shortage();
             }
 
-            interp_ok(base_addr)
+            interp_ok(new_addr)
         }
     }
 }
@@ -268,7 +245,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         // We only use this provenance if it has been exposed, or if the caller requested also non-exposed allocations
         if !only_exposed_allocations || global_state.exposed.contains(&alloc_id) {
             // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
-            debug_assert!(this.is_alloc_live(alloc_id));
+            // In GenMC mode, we keep all allocations, so this check doesn't apply there.
+            if this.machine.data_race.as_genmc_ref().is_none() {
+                debug_assert!(this.is_alloc_live(alloc_id));
+            }
             Some(alloc_id)
         } else {
             None
@@ -485,6 +465,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
 impl<'tcx> MiriMachine<'tcx> {
     pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) {
+        // In GenMC mode, we can't remove dead allocation info since such pointers can
+        // still be stored in atomics and we need this info to convert GenMC pointers to Miri pointers.
+        // `global_state.reuse` is also unused so we can just skip this entire function.
+        if self.data_race.as_genmc_ref().is_some() {
+            return;
+        }
+
         let global_state = self.alloc_addresses.get_mut();
         let rng = self.rng.get_mut();
 
@@ -511,6 +498,8 @@ impl<'tcx> MiriMachine<'tcx> {
         // Also remember this address for future reuse.
         let thread = self.threads.active_thread();
         global_state.reuse.add_addr(rng, addr, size, align, kind, thread, || {
+            // We already excluded GenMC above. We cannot use `self.release_clock` as
+            // `self.alloc_addresses` is borrowed.
             if let Some(data_race) = self.data_race.as_vclocks_ref() {
                 data_race.release_clock(&self.threads, |clock| clock.clone())
             } else {
@@ -519,14 +508,3 @@ impl<'tcx> MiriMachine<'tcx> {
         })
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_align_addr() {
-        assert_eq!(align_addr(37, 4), 40);
-        assert_eq!(align_addr(44, 4), 44);
-    }
-}
diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs
index 9b0bee72aeff6..8b15a7863476e 100644
--- a/src/tools/miri/src/bin/miri.rs
+++ b/src/tools/miri/src/bin/miri.rs
@@ -39,7 +39,7 @@ use std::sync::atomic::{AtomicI32, AtomicU32, Ordering};
 
 use miri::{
     BacktraceStyle, BorrowTrackerMethod, GenmcConfig, GenmcCtx, MiriConfig, MiriEntryFnType,
-    ProvenanceMode, RetagFields, TreeBorrowsParams, ValidationMode,
+    ProvenanceMode, RetagFields, TreeBorrowsParams, ValidationMode, run_genmc_mode,
 };
 use rustc_abi::ExternAbi;
 use rustc_data_structures::sync;
@@ -186,10 +186,18 @@ impl rustc_driver::Callbacks for MiriCompilerCalls {
                     optimizations is usually marginal at best.");
         }
 
-        if let Some(_genmc_config) = &config.genmc_config {
-            let _genmc_ctx = Rc::new(GenmcCtx::new(&config));
-
-            todo!("GenMC mode not yet implemented");
+        // Run in GenMC mode if enabled.
+        if config.genmc_config.is_some() {
+            // This is the entry point used in GenMC mode.
+            // This closure will be called multiple times to explore the concurrent execution space of the program.
+            let eval_entry_once = |genmc_ctx: Rc<GenmcCtx>| {
+                miri::eval_entry(tcx, entry_def_id, entry_type, &config, Some(genmc_ctx))
+            };
+            let return_code = run_genmc_mode(&config, eval_entry_once, tcx).unwrap_or_else(|| {
+                tcx.dcx().abort_if_errors();
+                rustc_driver::EXIT_FAILURE
+            });
+            exit(return_code);
         };
 
         if let Some(many_seeds) = self.many_seeds.take() {
@@ -737,19 +745,11 @@ fn main() {
         many_seeds.map(|seeds| ManySeedsConfig { seeds, keep_going: many_seeds_keep_going });
 
     // Validate settings for data race detection and GenMC mode.
-    if miri_config.genmc_config.is_some() {
-        if !miri_config.data_race_detector {
-            fatal_error!("Cannot disable data race detection in GenMC mode (currently)");
-        } else if !miri_config.weak_memory_emulation {
-            fatal_error!("Cannot disable weak memory emulation in GenMC mode");
-        }
-        if miri_config.borrow_tracker.is_some() {
-            eprintln!(
-                "warning: borrow tracking has been disabled, it is not (yet) supported in GenMC mode."
-            );
-            miri_config.borrow_tracker = None;
-        }
-    } else if miri_config.weak_memory_emulation && !miri_config.data_race_detector {
+    if let Err(err) = GenmcConfig::validate_genmc_mode_settings(&mut miri_config) {
+        fatal_error!("Invalid settings: {err}");
+    }
+
+    if miri_config.weak_memory_emulation && !miri_config.data_race_detector {
         fatal_error!(
             "Weak memory emulation cannot be enabled when the data race detector is disabled"
         );
diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
index 2977efaae04ac..5fe00ab02c4b9 100644
--- a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
@@ -740,6 +740,7 @@ trait EvalContextPrivExt<'tcx, 'ecx>: crate::MiriInterpCxExt<'tcx> {
                 if let Some(access) = access {
                     assert_eq!(access, AccessKind::Write);
                     // Make sure the data race model also knows about this.
+                    // FIXME(genmc): Ensure this is still done in GenMC mode. Check for other places where GenMC may need to be informed.
                     if let Some(data_race) = alloc_extra.data_race.as_vclocks_mut() {
                         data_race.write(
                             alloc_id,
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs
index 1f29bcfc2b035..22bd63bd6b6f4 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs
@@ -756,6 +756,8 @@ impl<'tcx> Tree {
                             // Don't check for protector if it is a Cell (see `unsafe_cell_deallocate` in `interior_mutability.rs`).
                             // Related to https://github.com/rust-lang/rust/issues/55005.
                             && !perm.permission().is_cell()
+                            // Only trigger UB if the accessed bit is set, i.e. if the protector is actually protecting this offset. See #4579.
+                            && perm.is_accessed()
                         {
                             Err(TransitionError::ProtectedDealloc)
                         } else {
diff --git a/src/tools/miri/src/clock.rs b/src/tools/miri/src/clock.rs
index 34465e9cac60d..dbbe741a0714d 100644
--- a/src/tools/miri/src/clock.rs
+++ b/src/tools/miri/src/clock.rs
@@ -46,14 +46,21 @@ impl Instant {
                 InstantKind::Virtual { nanoseconds: earlier },
             ) => {
                 let duration = nanoseconds.saturating_sub(earlier);
-                // `Duration` does not provide a nice constructor from a `u128` of nanoseconds,
-                // so we have to implement this ourselves.
-                // It is possible for second to overflow because u64::MAX < (u128::MAX / 1e9).
-                // It will be saturated to u64::MAX seconds if the value after division exceeds u64::MAX.
-                let seconds = u64::try_from(duration / 1_000_000_000).unwrap_or(u64::MAX);
-                // It is impossible for nanosecond to overflow because u32::MAX > 1e9.
-                let nanosecond = u32::try_from(duration.wrapping_rem(1_000_000_000)).unwrap();
-                Duration::new(seconds, nanosecond)
+                cfg_select! {
+                    bootstrap => {
+                         // `Duration` does not provide a nice constructor from a `u128` of nanoseconds,
+                        // so we have to implement this ourselves.
+                        // It is possible for second to overflow because u64::MAX < (u128::MAX / 1e9).
+                        // It will be saturated to u64::MAX seconds if the value after division exceeds u64::MAX.
+                        let seconds = u64::try_from(duration / 1_000_000_000).unwrap_or(u64::MAX);
+                        // It is impossible for nanosecond to overflow because u32::MAX > 1e9.
+                        let nanosecond = u32::try_from(duration.wrapping_rem(1_000_000_000)).unwrap();
+                        Duration::new(seconds, nanosecond)
+                    }
+                    _ => {
+                        Duration::from_nanos_u128(duration)
+                    }
+                }
             }
             _ => panic!("all `Instant` must be of the same kind"),
         }
diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs
index 38d76f5cf73b7..1ad9ace1b5d1a 100644
--- a/src/tools/miri/src/concurrency/data_race.rs
+++ b/src/tools/miri/src/concurrency/data_race.rs
@@ -56,6 +56,7 @@ use super::vector_clock::{VClock, VTimestamp, VectorIdx};
 use super::weak_memory::EvalContextExt as _;
 use crate::concurrency::GlobalDataRaceHandler;
 use crate::diagnostics::RacingOp;
+use crate::intrinsics::AtomicRmwOp;
 use crate::*;
 
 pub type AllocState = VClockAlloc;
@@ -182,6 +183,9 @@ struct AtomicMemoryCellClocks {
     /// contains the vector of timestamps that will
     /// happen-before a thread if an acquire-load is
     /// performed on the data.
+    ///
+    /// With weak memory emulation, this is the clock of the most recent write. It is then only used
+    /// for release sequences, to integrate the most recent clock into the next one for RMWs.
     sync_vector: VClock,
 
     /// The size of accesses to this atomic location.
@@ -275,7 +279,7 @@ struct MemoryCellClocks {
     /// zero on each write operation.
     read: VClock,
 
-    /// Atomic access, acquire, release sequence tracking clocks.
+    /// Atomic access tracking clocks.
     /// For non-atomic memory this value is set to None.
     /// For atomic memory, each byte carries this information.
     atomic_ops: Option<Box<AtomicMemoryCellClocks>>,
@@ -503,10 +507,11 @@ impl MemoryCellClocks {
         thread_clocks: &mut ThreadClockSet,
         index: VectorIdx,
         access_size: Size,
+        sync_clock: Option<&VClock>,
     ) -> Result<(), DataRace> {
         self.atomic_read_detect(thread_clocks, index, access_size)?;
-        if let Some(atomic) = self.atomic() {
-            thread_clocks.clock.join(&atomic.sync_vector);
+        if let Some(sync_clock) = sync_clock.or_else(|| self.atomic().map(|a| &a.sync_vector)) {
+            thread_clocks.clock.join(sync_clock);
         }
         Ok(())
     }
@@ -519,10 +524,11 @@ impl MemoryCellClocks {
         thread_clocks: &mut ThreadClockSet,
         index: VectorIdx,
         access_size: Size,
+        sync_clock: Option<&VClock>,
     ) -> Result<(), DataRace> {
         self.atomic_read_detect(thread_clocks, index, access_size)?;
-        if let Some(atomic) = self.atomic() {
-            thread_clocks.fence_acquire.join(&atomic.sync_vector);
+        if let Some(sync_clock) = sync_clock.or_else(|| self.atomic().map(|a| &a.sync_vector)) {
+            thread_clocks.fence_acquire.join(sync_clock);
         }
         Ok(())
     }
@@ -554,7 +560,8 @@ impl MemoryCellClocks {
         // The handling of release sequences was changed in C++20 and so
         // the code here is different to the paper since now all relaxed
         // stores block release sequences. The exception for same-thread
-        // relaxed stores has been removed.
+        // relaxed stores has been removed. We always overwrite the `sync_vector`,
+        // meaning the previous release sequence is broken.
         let atomic = self.atomic_mut_unwrap();
         atomic.sync_vector.clone_from(&thread_clocks.fence_release);
         Ok(())
@@ -570,6 +577,8 @@ impl MemoryCellClocks {
     ) -> Result<(), DataRace> {
         self.atomic_write_detect(thread_clocks, index, access_size)?;
         let atomic = self.atomic_mut_unwrap();
+        // This *joining* of `sync_vector` implements release sequences: future
+        // reads of this location will acquire our clock *and* what was here before.
         atomic.sync_vector.join(&thread_clocks.clock);
         Ok(())
     }
@@ -584,6 +593,8 @@ impl MemoryCellClocks {
     ) -> Result<(), DataRace> {
         self.atomic_write_detect(thread_clocks, index, access_size)?;
         let atomic = self.atomic_mut_unwrap();
+        // This *joining* of `sync_vector` implements release sequences: future
+        // reads of this location will acquire our fence clock *and* what was here before.
         atomic.sync_vector.join(&thread_clocks.fence_release);
         Ok(())
     }
@@ -719,8 +730,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
         // Only metadata on the location itself is used.
 
         if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
-            // FIXME(GenMC): Inform GenMC what a non-atomic read here would return, to support mixed atomics/non-atomics
-            let old_val = None;
+            let old_val = this.run_for_validation_ref(|this| this.read_scalar(place)).discard_err();
             return genmc_ctx.atomic_load(
                 this,
                 place.ptr().addr(),
@@ -731,8 +741,8 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
         }
 
         let scalar = this.allow_data_races_ref(move |this| this.read_scalar(place))?;
-        let buffered_scalar = this.buffered_atomic_read(place, atomic, scalar, || {
-            this.validate_atomic_load(place, atomic)
+        let buffered_scalar = this.buffered_atomic_read(place, atomic, scalar, |sync_clock| {
+            this.validate_atomic_load(place, atomic, sync_clock)
         })?;
         interp_ok(buffered_scalar.ok_or_else(|| err_ub!(InvalidUninitBytes(None)))?)
     }
@@ -751,11 +761,22 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
         // The program didn't actually do a read, so suppress the memory access hooks.
         // This is also a very special exception where we just ignore an error -- if this read
         // was UB e.g. because the memory is uninitialized, we don't want to know!
-        let old_val = this.run_for_validation_mut(|this| this.read_scalar(dest)).discard_err();
+        let old_val = this.run_for_validation_ref(|this| this.read_scalar(dest)).discard_err();
+
         // Inform GenMC about the atomic store.
         if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
-            // FIXME(GenMC): Inform GenMC what a non-atomic read here would return, to support mixed atomics/non-atomics
-            genmc_ctx.atomic_store(this, dest.ptr().addr(), dest.layout.size, val, atomic)?;
+            if genmc_ctx.atomic_store(
+                this,
+                dest.ptr().addr(),
+                dest.layout.size,
+                val,
+                old_val,
+                atomic,
+            )? {
+                // The store might be the latest store in coherence order (determined by GenMC).
+                // If it is, we need to update the value in Miri's memory:
+                this.allow_data_races_mut(|this| this.write_scalar(val, dest))?;
+            }
             return interp_ok(());
         }
         this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
@@ -768,9 +789,8 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
         &mut self,
         place: &MPlaceTy<'tcx>,
         rhs: &ImmTy<'tcx>,
-        op: mir::BinOp,
-        not: bool,
-        atomic: AtomicRwOrd,
+        atomic_op: AtomicRmwOp,
+        ord: AtomicRwOrd,
     ) -> InterpResult<'tcx, ImmTy<'tcx>> {
         let this = self.eval_context_mut();
         this.atomic_access_check(place, AtomicAccessType::Rmw)?;
@@ -779,26 +799,42 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
 
         // Inform GenMC about the atomic rmw operation.
         if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
-            // FIXME(GenMC): Inform GenMC what a non-atomic read here would return, to support mixed atomics/non-atomics
             let (old_val, new_val) = genmc_ctx.atomic_rmw_op(
                 this,
                 place.ptr().addr(),
                 place.layout.size,
-                atomic,
-                (op, not),
+                atomic_op,
+                place.layout.backend_repr.is_signed(),
+                ord,
                 rhs.to_scalar(),
+                old.to_scalar(),
             )?;
-            this.allow_data_races_mut(|this| this.write_scalar(new_val, place))?;
+            if let Some(new_val) = new_val {
+                this.allow_data_races_mut(|this| this.write_scalar(new_val, place))?;
+            }
             return interp_ok(ImmTy::from_scalar(old_val, old.layout));
         }
 
-        let val = this.binary_op(op, &old, rhs)?;
-        let val = if not { this.unary_op(mir::UnOp::Not, &val)? } else { val };
+        let val = match atomic_op {
+            AtomicRmwOp::MirOp { op, neg } => {
+                let val = this.binary_op(op, &old, rhs)?;
+                if neg { this.unary_op(mir::UnOp::Not, &val)? } else { val }
+            }
+            AtomicRmwOp::Max => {
+                let lt = this.binary_op(mir::BinOp::Lt, &old, rhs)?.to_scalar().to_bool()?;
+                if lt { rhs } else { &old }.clone()
+            }
+            AtomicRmwOp::Min => {
+                let lt = this.binary_op(mir::BinOp::Lt, &old, rhs)?.to_scalar().to_bool()?;
+                if lt { &old } else { rhs }.clone()
+            }
+        };
+
         this.allow_data_races_mut(|this| this.write_immediate(*val, place))?;
 
-        this.validate_atomic_rmw(place, atomic)?;
+        this.validate_atomic_rmw(place, ord)?;
 
-        this.buffered_atomic_rmw(val.to_scalar(), place, atomic, old.to_scalar())?;
+        this.buffered_atomic_rmw(val.to_scalar(), place, ord, old.to_scalar())?;
         interp_ok(old)
     }
 
@@ -818,14 +854,19 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
 
         // Inform GenMC about the atomic atomic exchange.
         if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
-            // FIXME(GenMC): Inform GenMC what a non-atomic read here would return, to support mixed atomics/non-atomics
-            let (old_val, _is_success) = genmc_ctx.atomic_exchange(
+            let (old_val, new_val) = genmc_ctx.atomic_exchange(
                 this,
                 place.ptr().addr(),
                 place.layout.size,
                 new,
                 atomic,
+                old,
             )?;
+            // The store might be the latest store in coherence order (determined by GenMC).
+            // If it is, we need to update the value in Miri's memory:
+            if let Some(new_val) = new_val {
+                this.allow_data_races_mut(|this| this.write_scalar(new_val, place))?;
+            }
             return interp_ok(old_val);
         }
 
@@ -835,55 +876,6 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
         interp_ok(old)
     }
 
-    /// Perform an conditional atomic exchange with a memory place and a new
-    /// scalar value, the old value is returned.
-    fn atomic_min_max_scalar(
-        &mut self,
-        place: &MPlaceTy<'tcx>,
-        rhs: ImmTy<'tcx>,
-        min: bool,
-        atomic: AtomicRwOrd,
-    ) -> InterpResult<'tcx, ImmTy<'tcx>> {
-        let this = self.eval_context_mut();
-        this.atomic_access_check(place, AtomicAccessType::Rmw)?;
-
-        let old = this.allow_data_races_mut(|this| this.read_immediate(place))?;
-
-        // Inform GenMC about the atomic min/max operation.
-        if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
-            // FIXME(GenMC): Inform GenMC what a non-atomic read here would return, to support mixed atomics/non-atomics
-            let (old_val, new_val) = genmc_ctx.atomic_min_max_op(
-                this,
-                place.ptr().addr(),
-                place.layout.size,
-                atomic,
-                min,
-                old.layout.backend_repr.is_signed(),
-                rhs.to_scalar(),
-            )?;
-            this.allow_data_races_mut(|this| this.write_scalar(new_val, place))?;
-            return interp_ok(ImmTy::from_scalar(old_val, old.layout));
-        }
-
-        let lt = this.binary_op(mir::BinOp::Lt, &old, &rhs)?.to_scalar().to_bool()?;
-
-        #[rustfmt::skip] // rustfmt makes this unreadable
-        let new_val = if min {
-            if lt { &old } else { &rhs }
-        } else {
-            if lt { &rhs } else { &old }
-        };
-
-        this.allow_data_races_mut(|this| this.write_immediate(**new_val, place))?;
-
-        this.validate_atomic_rmw(place, atomic)?;
-
-        this.buffered_atomic_rmw(new_val.to_scalar(), place, atomic, old.to_scalar())?;
-
-        // Return the old value.
-        interp_ok(old)
-    }
-
     /// Perform an atomic compare and exchange at a given memory location.
     /// On success an atomic RMW operation is performed and on failure
     /// only an atomic read occurs. If `can_fail_spuriously` is true,
@@ -903,15 +895,12 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
         let this = self.eval_context_mut();
         this.atomic_access_check(place, AtomicAccessType::Rmw)?;
 
-        // Failure ordering cannot be stronger than success ordering, therefore first attempt
-        // to read with the failure ordering and if successful then try again with the success
-        // read ordering and write in the success case.
         // Read as immediate for the sake of `binary_op()`
         let old = this.allow_data_races_mut(|this| this.read_immediate(place))?;
 
         // Inform GenMC about the atomic atomic compare exchange.
         if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
-            let (old, cmpxchg_success) = genmc_ctx.atomic_compare_exchange(
+            let (old_value, new_value, cmpxchg_success) = genmc_ctx.atomic_compare_exchange(
                 this,
                 place.ptr().addr(),
                 place.layout.size,
@@ -920,11 +909,14 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
                 success,
                 fail,
                 can_fail_spuriously,
+                old.to_scalar(),
             )?;
-            if cmpxchg_success {
-                this.allow_data_races_mut(|this| this.write_scalar(new, place))?;
+            // The store might be the latest store in coherence order (determined by GenMC).
+            // If it is, we need to update the value in Miri's memory:
+            if let Some(new_value) = new_value {
+                this.allow_data_races_mut(|this| this.write_scalar(new_value, place))?;
             }
-            return interp_ok(Immediate::ScalarPair(old, Scalar::from_bool(cmpxchg_success)));
+            return interp_ok(Immediate::ScalarPair(old_value, Scalar::from_bool(cmpxchg_success)));
         }
 
         // `binary_op` will bail if either of them is not a scalar.
@@ -948,7 +940,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
             this.validate_atomic_rmw(place, success)?;
             this.buffered_atomic_rmw(new, place, success, old.to_scalar())?;
         } else {
-            this.validate_atomic_load(place, fail)?;
+            this.validate_atomic_load(place, fail, /* can use latest sync clock */ None)?;
             // A failed compare exchange is equivalent to a load, reading from the latest store
             // in the modification order.
             // Since `old` is only a value and not the store element, we need to separately
@@ -976,20 +968,36 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
     /// with this program point.
     ///
     /// The closure will only be invoked if data race handling is on.
-    fn release_clock<R>(&self, callback: impl FnOnce(&VClock) -> R) -> Option<R> {
+    fn release_clock<R>(
+        &self,
+        callback: impl FnOnce(&VClock) -> R,
+    ) -> InterpResult<'tcx, Option<R>> {
         let this = self.eval_context_ref();
-        Some(
-            this.machine.data_race.as_vclocks_ref()?.release_clock(&this.machine.threads, callback),
-        )
+        interp_ok(match &this.machine.data_race {
+            GlobalDataRaceHandler::None => None,
+            GlobalDataRaceHandler::Genmc(_genmc_ctx) =>
+                throw_unsup_format!(
+                    "this operation performs synchronization that is not supported in GenMC mode"
+                ),
+            GlobalDataRaceHandler::Vclocks(data_race) =>
+                Some(data_race.release_clock(&this.machine.threads, callback)),
+        })
     }
 
     /// Acquire the given clock into the current thread, establishing synchronization with
     /// the moment when that clock snapshot was taken via `release_clock`.
-    fn acquire_clock(&self, clock: &VClock) {
+    fn acquire_clock(&self, clock: &VClock) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
-        if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-            data_race.acquire_clock(clock, &this.machine.threads);
+        match &this.machine.data_race {
+            GlobalDataRaceHandler::None => {}
+            GlobalDataRaceHandler::Genmc(_genmc_ctx) =>
+                throw_unsup_format!(
+                    "this operation performs synchronization that is not supported in GenMC mode"
+                ),
+            GlobalDataRaceHandler::Vclocks(data_race) =>
+                data_race.acquire_clock(clock, &this.machine.threads),
         }
+        interp_ok(())
     }
 }
 
@@ -1174,6 +1182,18 @@ impl VClockAlloc {
         }))?
     }
 
+    /// Return the release/acquire synchronization clock for the given memory range.
+    pub(super) fn sync_clock(&self, access_range: AllocRange) -> VClock {
+        let alloc_ranges = self.alloc_ranges.borrow();
+        let mut clock = VClock::default();
+        for (_, mem_clocks) in alloc_ranges.iter(access_range.start, access_range.size) {
+            if let Some(atomic) = mem_clocks.atomic() {
+                clock.join(&atomic.sync_vector);
+            }
+        }
+        clock
+    }
+
     /// Detect data-races for an unsynchronized read operation. It will not perform
     /// data-race detection if `race_detecting()` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
@@ -1450,6 +1470,7 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
         &self,
         place: &MPlaceTy<'tcx>,
         atomic: AtomicReadOrd,
+        sync_clock: Option<&VClock>,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
         this.validate_atomic_op(
@@ -1458,9 +1479,9 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
             AccessType::AtomicLoad,
             move |memory, clocks, index, atomic| {
                 if atomic == AtomicReadOrd::Relaxed {
-                    memory.load_relaxed(&mut *clocks, index, place.layout.size)
+                    memory.load_relaxed(&mut *clocks, index, place.layout.size, sync_clock)
                 } else {
-                    memory.load_acquire(&mut *clocks, index, place.layout.size)
+                    memory.load_acquire(&mut *clocks, index, place.layout.size, sync_clock)
                 }
             },
         )
@@ -1505,9 +1526,9 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
             AccessType::AtomicRmw,
             move |memory, clocks, index, _| {
                 if acquire {
-                    memory.load_acquire(clocks, index, place.layout.size)?;
+                    memory.load_acquire(clocks, index, place.layout.size, None)?;
                 } else {
-                    memory.load_relaxed(clocks, index, place.layout.size)?;
+                    memory.load_relaxed(clocks, index, place.layout.size, None)?;
                 }
                 if release {
                     memory.rmw_release(clocks, index, place.layout.size)
diff --git a/src/tools/miri/src/concurrency/genmc/config.rs b/src/tools/miri/src/concurrency/genmc/config.rs
index c56adab90fe2b..c7cfa6012b8dc 100644
--- a/src/tools/miri/src/concurrency/genmc/config.rs
+++ b/src/tools/miri/src/concurrency/genmc/config.rs
@@ -1,13 +1,24 @@
+use genmc_sys::LogLevel;
+
 use super::GenmcParams;
+use crate::{IsolatedOp, MiriConfig, RejectOpWith};
 
 /// Configuration for GenMC mode.
 /// The `params` field is shared with the C++ side.
 /// The remaining options are kept on the Rust side.
 #[derive(Debug, Default, Clone)]
 pub struct GenmcConfig {
+    /// Parameters sent to the C++ side to create a new handle to the GenMC model checker.
     pub(super) params: GenmcParams,
-    do_estimation: bool,
-    // FIXME(GenMC): add remaining options.
+    pub(super) do_estimation: bool,
+    /// Print the output message that GenMC generates when an error occurs.
+    /// This error message is currently hard to use, since there is no clear mapping between the events that GenMC sees and the Rust code location where this event was produced.
+    pub(super) print_genmc_output: bool,
+    /// The log level for GenMC.
+    pub(super) log_level: LogLevel,
+    /// Enable more verbose output, such as number of executions estimate
+    /// and time to completion of verification step.
+    pub(super) verbose_output: bool,
 }
 
 impl GenmcConfig {
@@ -29,7 +40,80 @@ impl GenmcConfig {
         if trimmed_arg.is_empty() {
             return Ok(()); // this corresponds to "-Zmiri-genmc"
         }
-        // FIXME(GenMC): implement remaining parameters.
-        todo!();
+        let genmc_config = genmc_config.as_mut().unwrap();
+        let Some(trimmed_arg) = trimmed_arg.strip_prefix("-") else {
+            return Err(format!("Invalid GenMC argument \"-Zmiri-genmc{trimmed_arg}\""));
+        };
+        if let Some(log_level) = trimmed_arg.strip_prefix("log=") {
+            genmc_config.log_level = log_level.parse()?;
+        } else if let Some(trimmed_arg) = trimmed_arg.strip_prefix("print-exec-graphs") {
+            use genmc_sys::ExecutiongraphPrinting;
+            genmc_config.params.print_execution_graphs = match trimmed_arg {
+                "=none" => ExecutiongraphPrinting::None,
+                // Make GenMC print explored executions.
+                "" | "=explored" => ExecutiongraphPrinting::Explored,
+                // Make GenMC print blocked executions.
+                "=blocked" => ExecutiongraphPrinting::Blocked,
+                // Make GenMC print all executions.
+                "=all" => ExecutiongraphPrinting::ExploredAndBlocked,
+                _ =>
+                    return Err(format!(
+                        "Invalid suffix to GenMC argument '-Zmiri-genmc-print-exec-graphs', expected '', '=none', '=explored', '=blocked' or '=all'"
+                    )),
+            }
+        } else if trimmed_arg == "estimate" {
+            // FIXME(genmc): should this be on by default (like for GenMC)?
+            // Enable estimating the execution space and require time before running the actual verification.
+            genmc_config.do_estimation = true;
+        } else if let Some(estimation_max_str) = trimmed_arg.strip_prefix("estimation-max=") {
+            // Set the maximum number of executions to explore during estimation.
+            genmc_config.params.estimation_max = estimation_max_str.parse().ok().filter(|estimation_max| *estimation_max > 0).ok_or_else(|| {
+                format!(
+                    "'-Zmiri-genmc-estimation-max=...' expects a positive integer argument, but got '{estimation_max_str}'"
+                )
+            })?;
+        } else if trimmed_arg == "print-genmc-output" {
+            genmc_config.print_genmc_output = true;
+        } else if trimmed_arg == "verbose" {
+            genmc_config.verbose_output = true;
+        } else {
+            return Err(format!("Invalid GenMC argument: \"-Zmiri-genmc-{trimmed_arg}\""));
+        }
+        Ok(())
+    }
+
+    /// Validate settings for GenMC mode (NOP if GenMC mode disabled).
+    ///
+    /// Unsupported configurations return an error.
+    /// Adjusts Miri settings where required, printing a warnings if the change might be unexpected for the user.
+    pub fn validate_genmc_mode_settings(miri_config: &mut MiriConfig) -> Result<(), &'static str> {
+        let Some(genmc_config) = miri_config.genmc_config.as_mut() else {
+            return Ok(());
+        };
+
+        // Check for disallowed configurations.
+        if !miri_config.data_race_detector {
+            return Err("Cannot disable data race detection in GenMC mode");
+        } else if !miri_config.native_lib.is_empty() {
+            return Err("native-lib not supported in GenMC mode.");
+        } else if miri_config.isolated_op != IsolatedOp::Reject(RejectOpWith::Abort) {
+            return Err("Cannot disable isolation in GenMC mode");
+        }
+
+        // Adjust settings where needed.
+        if !miri_config.weak_memory_emulation {
+            genmc_config.params.disable_weak_memory_emulation = true;
+        }
+        if miri_config.borrow_tracker.is_some() {
+            eprintln!(
+                "warning: borrow tracking has been disabled, it is not (yet) supported in GenMC mode."
+            );
+            miri_config.borrow_tracker = None;
+        }
+        // We enable fixed scheduling so Miri doesn't randomly yield before a terminator, which anyway
+        // would be a NOP in GenMC mode.
+        miri_config.fixed_scheduling = true;
+
+        Ok(())
     }
 }
diff --git a/src/tools/miri/src/concurrency/genmc/dummy.rs b/src/tools/miri/src/concurrency/genmc/dummy.rs
index 79d27c4be1591..c28984cef35ad 100644
--- a/src/tools/miri/src/concurrency/genmc/dummy.rs
+++ b/src/tools/miri/src/concurrency/genmc/dummy.rs
@@ -1,49 +1,45 @@
-#![allow(unused)]
-
 use rustc_abi::{Align, Size};
-use rustc_const_eval::interpret::{InterpCx, InterpResult};
-use rustc_middle::mir;
+use rustc_const_eval::interpret::{AllocId, InterpCx, InterpResult};
 
+pub use self::run::run_genmc_mode;
+use crate::intrinsics::AtomicRmwOp;
 use crate::{
-    AtomicFenceOrd, AtomicReadOrd, AtomicRwOrd, AtomicWriteOrd, MemoryKind, MiriConfig,
-    MiriMachine, Scalar, ThreadId, ThreadManager, VisitProvenance, VisitWith,
+    AtomicFenceOrd, AtomicReadOrd, AtomicRwOrd, AtomicWriteOrd, MemoryKind, MiriMachine, Scalar,
+    ThreadId, ThreadManager, VisitProvenance, VisitWith,
 };
 
+#[derive(Clone, Copy, Debug)]
+pub enum ExitType {
+    MainThreadFinish,
+    ExitCalled,
+}
+
 #[derive(Debug)]
 pub struct GenmcCtx {}
 
 #[derive(Debug, Default, Clone)]
 pub struct GenmcConfig {}
 
-impl GenmcCtx {
-    pub fn new(_miri_config: &MiriConfig) -> Self {
-        unreachable!()
-    }
+mod run {
+    use std::rc::Rc;
 
-    pub fn get_stuck_execution_count(&self) -> usize {
-        unreachable!()
-    }
+    use rustc_middle::ty::TyCtxt;
 
-    pub fn print_genmc_graph(&self) {
-        unreachable!()
-    }
+    use crate::{GenmcCtx, MiriConfig};
 
-    pub fn is_exploration_done(&self) -> bool {
-        unreachable!()
+    pub fn run_genmc_mode<'tcx>(
+        _config: &MiriConfig,
+        _eval_entry: impl Fn(Rc<GenmcCtx>) -> Option<i32>,
+        _tcx: TyCtxt<'tcx>,
+    ) -> Option<i32> {
+        unreachable!();
     }
+}
 
-    /**** Memory access handling ****/
-
-    pub(crate) fn handle_execution_start(&self) {
-        unreachable!()
-    }
+impl GenmcCtx {
+    // We don't provide the `new` function in the dummy module.
 
-    pub(crate) fn handle_execution_end<'tcx>(
-        &self,
-        _ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
-    ) -> Result<(), String> {
-        unreachable!()
-    }
+    /**** Memory access handling ****/
 
     pub(super) fn set_ongoing_action_data_race_free(&self, _enable: bool) {
         unreachable!()
@@ -67,8 +63,9 @@ impl GenmcCtx {
         _address: Size,
         _size: Size,
         _value: Scalar,
+        _old_value: Option<Scalar>,
         _ordering: AtomicWriteOrd,
-    ) -> InterpResult<'tcx, ()> {
+    ) -> InterpResult<'tcx, bool> {
         unreachable!()
     }
 
@@ -76,7 +73,7 @@ impl GenmcCtx {
         &self,
         _machine: &MiriMachine<'tcx>,
         _ordering: AtomicFenceOrd,
-    ) -> InterpResult<'tcx, ()> {
+    ) -> InterpResult<'tcx> {
         unreachable!()
     }
 
@@ -85,23 +82,12 @@ impl GenmcCtx {
         _ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
         _address: Size,
         _size: Size,
+        _atomic_op: AtomicRmwOp,
+        _is_signed: bool,
         _ordering: AtomicRwOrd,
-        (rmw_op, not): (mir::BinOp, bool),
         _rhs_scalar: Scalar,
-    ) -> InterpResult<'tcx, (Scalar, Scalar)> {
-        unreachable!()
-    }
-
-    pub(crate) fn atomic_min_max_op<'tcx>(
-        &self,
-        ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
-        address: Size,
-        size: Size,
-        ordering: AtomicRwOrd,
-        min: bool,
-        is_signed: bool,
-        rhs_scalar: Scalar,
-    ) -> InterpResult<'tcx, (Scalar, Scalar)> {
+        _old_value: Scalar,
+    ) -> InterpResult<'tcx, (Scalar, Option<Scalar>)> {
         unreachable!()
     }
 
@@ -112,7 +98,8 @@ impl GenmcCtx {
         _size: Size,
         _rhs_scalar: Scalar,
         _ordering: AtomicRwOrd,
-    ) -> InterpResult<'tcx, (Scalar, bool)> {
+        _old_value: Scalar,
+    ) -> InterpResult<'tcx, (Scalar, Option<Scalar>)> {
         unreachable!()
     }
 
@@ -126,7 +113,8 @@ impl GenmcCtx {
         _success: AtomicRwOrd,
         _fail: AtomicReadOrd,
         _can_fail_spuriously: bool,
-    ) -> InterpResult<'tcx, (Scalar, bool)> {
+        _old_value: Scalar,
+    ) -> InterpResult<'tcx, (Scalar, Option<Scalar>, bool)> {
         unreachable!()
     }
 
@@ -135,7 +123,7 @@ impl GenmcCtx {
         _machine: &MiriMachine<'tcx>,
         _address: Size,
         _size: Size,
-    ) -> InterpResult<'tcx, ()> {
+    ) -> InterpResult<'tcx> {
         unreachable!()
     }
 
@@ -144,7 +132,7 @@ impl GenmcCtx {
         _machine: &MiriMachine<'tcx>,
         _address: Size,
         _size: Size,
-    ) -> InterpResult<'tcx, ()> {
+    ) -> InterpResult<'tcx> {
         unreachable!()
     }
 
@@ -152,7 +140,8 @@ impl GenmcCtx {
 
     pub(crate) fn handle_alloc<'tcx>(
         &self,
-        _machine: &MiriMachine<'tcx>,
+        _ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
+        _alloc_id: AllocId,
         _size: Size,
         _alignment: Align,
         _memory_kind: MemoryKind,
@@ -163,11 +152,10 @@ impl GenmcCtx {
     pub(crate) fn handle_dealloc<'tcx>(
         &self,
         _machine: &MiriMachine<'tcx>,
+        _alloc_id: AllocId,
         _address: Size,
-        _size: Size,
-        _align: Align,
         _kind: MemoryKind,
-    ) -> InterpResult<'tcx, ()> {
+    ) -> InterpResult<'tcx> {
         unreachable!()
     }
 
@@ -176,8 +164,10 @@ impl GenmcCtx {
     pub(crate) fn handle_thread_create<'tcx>(
         &self,
         _threads: &ThreadManager<'tcx>,
+        _start_routine: crate::Pointer,
+        _func_arg: &crate::ImmTy<'tcx>,
         _new_thread_id: ThreadId,
-    ) -> InterpResult<'tcx, ()> {
+    ) -> InterpResult<'tcx> {
         unreachable!()
     }
 
@@ -185,24 +175,26 @@ impl GenmcCtx {
         &self,
         _active_thread_id: ThreadId,
         _child_thread_id: ThreadId,
-    ) -> InterpResult<'tcx, ()> {
+    ) -> InterpResult<'tcx> {
         unreachable!()
     }
 
-    pub(crate) fn handle_thread_stack_empty(&self, _thread_id: ThreadId) {
+    pub(crate) fn handle_thread_finish<'tcx>(&self, _threads: &ThreadManager<'tcx>) {
         unreachable!()
     }
 
-    pub(crate) fn handle_thread_finish<'tcx>(
+    pub(crate) fn handle_exit<'tcx>(
         &self,
-        _threads: &ThreadManager<'tcx>,
-    ) -> InterpResult<'tcx, ()> {
+        _thread: ThreadId,
+        _exit_code: i32,
+        _exit_type: ExitType,
+    ) -> InterpResult<'tcx> {
         unreachable!()
     }
 
     /**** Scheduling functionality ****/
 
-    pub(crate) fn schedule_thread<'tcx>(
+    pub fn schedule_thread<'tcx>(
         &self,
         _ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
     ) -> InterpResult<'tcx, ThreadId> {
@@ -211,6 +203,7 @@ impl GenmcCtx {
 
     /**** Blocking instructions ****/
 
+    #[allow(unused)]
     pub(crate) fn handle_verifier_assume<'tcx>(
         &self,
         _machine: &MiriMachine<'tcx>,
@@ -229,7 +222,7 @@ impl VisitProvenance for GenmcCtx {
 impl GenmcConfig {
     pub fn parse_arg(
         _genmc_config: &mut Option<GenmcConfig>,
-        trimmed_arg: &str,
+        _trimmed_arg: &str,
     ) -> Result<(), String> {
         if cfg!(feature = "genmc") {
             Err(format!("GenMC is disabled in this build of Miri"))
@@ -238,7 +231,9 @@ impl GenmcConfig {
         }
     }
 
-    pub fn should_print_graph(&self, _rep: usize) -> bool {
-        unreachable!()
+    pub fn validate_genmc_mode_settings(
+        _miri_config: &mut crate::MiriConfig,
+    ) -> Result<(), &'static str> {
+        Ok(())
     }
 }
diff --git a/src/tools/miri/src/concurrency/genmc/global_allocations.rs b/src/tools/miri/src/concurrency/genmc/global_allocations.rs
new file mode 100644
index 0000000000000..272f8d2484038
--- /dev/null
+++ b/src/tools/miri/src/concurrency/genmc/global_allocations.rs
@@ -0,0 +1,118 @@
+use std::collections::hash_map::Entry;
+use std::sync::RwLock;
+
+use genmc_sys::{GENMC_GLOBAL_ADDRESSES_MASK, get_global_alloc_static_mask};
+use rand::SeedableRng;
+use rand::rngs::StdRng;
+use rustc_const_eval::interpret::{AllocId, AllocInfo, InterpResult, interp_ok};
+use rustc_data_structures::fx::FxHashMap;
+use tracing::debug;
+
+use crate::alloc_addresses::AddressGenerator;
+
+#[derive(Debug)]
+struct GlobalStateInner {
+    /// The base address for each *global* allocation.
+    base_addr: FxHashMap<AllocId, u64>,
+    /// We use the same address generator that Miri uses in normal operation.
+    address_generator: AddressGenerator,
+    /// The address generator needs an Rng to randomize the offsets between allocations.
+    /// We don't use the `MiriMachine` Rng since this is global, cross-machine state.
+    rng: StdRng,
+}
+
+/// Allocator for global memory in GenMC mode.
+/// Miri doesn't discover all global allocations statically like LLI does for GenMC.
+/// The existing global memory allocator in GenMC doesn't support this, so we take over these allocations.
+/// Global allocations need to be in a specific address range, with the lower limit given by the `GENMC_GLOBAL_ADDRESSES_MASK` constant.
+///
+/// Every global allocation must have the same addresses across all executions of a single program.
+/// Therefore there is only 1 global allocator, and it syncs new globals across executions, even if they are explored in parallel.
+#[derive(Debug)]
+pub struct GlobalAllocationHandler(RwLock<GlobalStateInner>);
+
+impl GlobalAllocationHandler {
+    /// Create a new global address generator with a given max address `last_addr`
+    /// (corresponding to the highest address available on the target platform, unless another limit exists).
+    /// No addresses higher than this will be allocated.
+    /// Will panic if the given address limit is too small to allocate any addresses.
+    pub fn new(last_addr: u64) -> GlobalAllocationHandler {
+        assert_eq!(GENMC_GLOBAL_ADDRESSES_MASK, get_global_alloc_static_mask());
+        assert_ne!(GENMC_GLOBAL_ADDRESSES_MASK, 0);
+        // FIXME(genmc): Remove if non-64bit targets are supported.
+        assert!(
+            GENMC_GLOBAL_ADDRESSES_MASK < last_addr,
+            "only 64bit platforms are currently supported (highest address {last_addr:#x} <= minimum global address {GENMC_GLOBAL_ADDRESSES_MASK:#x})."
+        );
+        Self(RwLock::new(GlobalStateInner {
+            base_addr: FxHashMap::default(),
+            address_generator: AddressGenerator::new(GENMC_GLOBAL_ADDRESSES_MASK..last_addr),
+            // FIXME(genmc): We could provide a way to changes this seed, to allow for different global addresses.
+            rng: StdRng::seed_from_u64(0),
+        }))
+    }
+}
+
+impl GlobalStateInner {
+    fn global_allocate_addr<'tcx>(
+        &mut self,
+        alloc_id: AllocId,
+        info: AllocInfo,
+    ) -> InterpResult<'tcx, u64> {
+        let entry = match self.base_addr.entry(alloc_id) {
+            Entry::Occupied(occupied_entry) => {
+                // Looks like some other thread allocated this for us
+                // between when we released the read lock and aquired the write lock,
+                // so we just return that value.
+                return interp_ok(*occupied_entry.get());
+            }
+            Entry::Vacant(vacant_entry) => vacant_entry,
+        };
+
+        // This allocation does not have a base address yet, pick or reuse one.
+        // We are not in native lib mode (incompatible with GenMC mode), so we control the addresses ourselves.
+        let new_addr = self.address_generator.generate(info.size, info.align, &mut self.rng)?;
+
+        // Cache the address for future use.
+        entry.insert(new_addr);
+
+        interp_ok(new_addr)
+    }
+}
+
+impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
+pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
+    /// Allocate a new address for the given alloc id, or return the cached address.
+    /// Each alloc id is assigned one unique allocation which will not change if this function is called again with the same alloc id.
+    fn get_global_allocation_address(
+        &self,
+        global_allocation_handler: &GlobalAllocationHandler,
+        alloc_id: AllocId,
+    ) -> InterpResult<'tcx, u64> {
+        let this = self.eval_context_ref();
+        let info = this.get_alloc_info(alloc_id);
+
+        let global_state = global_allocation_handler.0.read().unwrap();
+        if let Some(base_addr) = global_state.base_addr.get(&alloc_id) {
+            debug!(
+                "GenMC: address for global with alloc id {alloc_id:?} was cached: {base_addr} == {base_addr:#x}"
+            );
+            return interp_ok(*base_addr);
+        }
+
+        // We need to upgrade to a write lock. `std::sync::RwLock` doesn't support this, so we drop the guard and lock again
+        // Note that another thread might allocate the address while the `RwLock` is unlocked, but we handle this case in the allocation function.
+        drop(global_state);
+        let mut global_state = global_allocation_handler.0.write().unwrap();
+        // With the write lock, we can safely allocate an address only once per `alloc_id`.
+        let new_addr = global_state.global_allocate_addr(alloc_id, info)?;
+        debug!("GenMC: global with alloc id {alloc_id:?} got address: {new_addr} == {new_addr:#x}");
+        assert_eq!(
+            GENMC_GLOBAL_ADDRESSES_MASK,
+            new_addr & GENMC_GLOBAL_ADDRESSES_MASK,
+            "Global address allocated outside global address space."
+        );
+
+        interp_ok(new_addr)
+    }
+}
diff --git a/src/tools/miri/src/concurrency/genmc/helper.rs b/src/tools/miri/src/concurrency/genmc/helper.rs
new file mode 100644
index 0000000000000..48a5ec8bb2608
--- /dev/null
+++ b/src/tools/miri/src/concurrency/genmc/helper.rs
@@ -0,0 +1,225 @@
+use std::sync::RwLock;
+
+use genmc_sys::{MemOrdering, RMWBinOp};
+use rustc_abi::Size;
+use rustc_const_eval::interpret::{InterpResult, interp_ok};
+use rustc_data_structures::fx::FxHashSet;
+use rustc_middle::mir;
+use rustc_middle::ty::ScalarInt;
+use rustc_span::Span;
+use tracing::debug;
+
+use super::GenmcScalar;
+use crate::diagnostics::EvalContextExt;
+use crate::intrinsics::AtomicRmwOp;
+use crate::{
+    AtomicFenceOrd, AtomicReadOrd, AtomicRwOrd, AtomicWriteOrd, InterpCx, MiriInterpCx,
+    MiriMachine, NonHaltingDiagnostic, Scalar, throw_unsup_format,
+};
+
+/// Maximum size memory access in bytes that GenMC supports.
+pub(super) const MAX_ACCESS_SIZE: u64 = 8;
+
+/// Type for storing spans for already emitted warnings.
+pub(super) type WarningCache = RwLock<FxHashSet<Span>>;
+
+#[derive(Default)]
+pub(super) struct Warnings {
+    pub(super) compare_exchange_failure_ordering: WarningCache,
+    pub(super) compare_exchange_weak: WarningCache,
+}
+
+/// Emit a warning if it hasn't already been reported for current span.
+pub(super) fn emit_warning<'tcx>(
+    ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
+    cache: &WarningCache,
+    diagnostic: impl FnOnce() -> NonHaltingDiagnostic,
+) {
+    let span = ecx.machine.current_span();
+    if cache.read().unwrap().contains(&span) {
+        return;
+    }
+    // This span has not yet been reported, so we insert it into the cache and report it.
+    let mut cache = cache.write().unwrap();
+    if cache.insert(span) {
+        // Some other thread may have added this span while we didn't hold the lock, so we only emit it if the insertions succeeded.
+        ecx.emit_diagnostic(diagnostic());
+    }
+}
+
+/// This function is used to split up a large memory access into aligned, non-overlapping chunks of a limited size.
+/// Returns an iterator over the chunks, yielding `(base address, size)` of each chunk, ordered by address.
+pub fn split_access(address: Size, size: Size) -> impl Iterator<Item = (u64, u64)> {
+    let start_address = address.bytes();
+    let end_address = start_address + size.bytes();
+
+    let start_address_aligned = start_address.next_multiple_of(MAX_ACCESS_SIZE);
+    let end_address_aligned = (end_address / MAX_ACCESS_SIZE) * MAX_ACCESS_SIZE; // prev_multiple_of
+
+    debug!(
+        "GenMC: splitting NA memory access into {MAX_ACCESS_SIZE} byte chunks: {}B + {} * {MAX_ACCESS_SIZE}B + {}B = {size:?}",
+        start_address_aligned - start_address,
+        (end_address_aligned - start_address_aligned) / MAX_ACCESS_SIZE,
+        end_address - end_address_aligned,
+    );
+
+    // FIXME(genmc): could make remaining accesses powers-of-2, instead of 1 byte.
+    let start_chunks = (start_address..start_address_aligned).map(|address| (address, 1));
+    let aligned_chunks = (start_address_aligned..end_address_aligned)
+        .step_by(MAX_ACCESS_SIZE.try_into().unwrap())
+        .map(|address| (address, MAX_ACCESS_SIZE));
+    let end_chunks = (end_address_aligned..end_address).map(|address| (address, 1));
+
+    start_chunks.chain(aligned_chunks).chain(end_chunks)
+}
+
+/// Inverse function to `scalar_to_genmc_scalar`.
+///
+/// Convert a Miri `Scalar` to a `GenmcScalar`.
+/// To be able to restore pointer provenance from a `GenmcScalar`, the base address of the allocation of the pointer is also stored in the `GenmcScalar`.
+/// We cannot use the `AllocId` instead of the base address, since Miri has no control over the `AllocId`, and it may change across executions.
+/// Pointers with `Wildcard` provenance are not supported.
+pub fn scalar_to_genmc_scalar<'tcx>(
+    _ecx: &MiriInterpCx<'tcx>,
+    scalar: Scalar,
+) -> InterpResult<'tcx, GenmcScalar> {
+    interp_ok(match scalar {
+        rustc_const_eval::interpret::Scalar::Int(scalar_int) => {
+            // FIXME(genmc): Add u128 support once GenMC supports it.
+            let value: u64 = scalar_int.to_uint(scalar_int.size()).try_into().unwrap();
+            GenmcScalar { value, is_init: true }
+        }
+        rustc_const_eval::interpret::Scalar::Ptr(_pointer, _size) =>
+            throw_unsup_format!(
+                "FIXME(genmc): Implement sending pointers (with provenance) to GenMC."
+            ),
+    })
+}
+
+/// Inverse function to `scalar_to_genmc_scalar`.
+///
+/// Convert a `GenmcScalar` back into a Miri `Scalar`.
+/// For pointers, attempt to convert the stored base address of their allocation back into an `AllocId`.
+pub fn genmc_scalar_to_scalar<'tcx>(
+    _ecx: &MiriInterpCx<'tcx>,
+    scalar: GenmcScalar,
+    size: Size,
+) -> InterpResult<'tcx, Scalar> {
+    // FIXME(genmc): Add GenmcScalar to Miri Pointer conversion.
+
+    // NOTE: GenMC always returns 64 bit values, and the upper bits are not yet truncated.
+    // FIXME(genmc): GenMC should be doing the truncation, not Miri.
+    let (value_scalar_int, _got_truncated) = ScalarInt::truncate_from_uint(scalar.value, size);
+    interp_ok(Scalar::Int(value_scalar_int))
+}
+
+impl AtomicReadOrd {
+    pub(super) fn to_genmc(self) -> MemOrdering {
+        match self {
+            AtomicReadOrd::Relaxed => MemOrdering::Relaxed,
+            AtomicReadOrd::Acquire => MemOrdering::Acquire,
+            AtomicReadOrd::SeqCst => MemOrdering::SequentiallyConsistent,
+        }
+    }
+}
+
+impl AtomicWriteOrd {
+    pub(super) fn to_genmc(self) -> MemOrdering {
+        match self {
+            AtomicWriteOrd::Relaxed => MemOrdering::Relaxed,
+            AtomicWriteOrd::Release => MemOrdering::Release,
+            AtomicWriteOrd::SeqCst => MemOrdering::SequentiallyConsistent,
+        }
+    }
+}
+
+impl AtomicFenceOrd {
+    pub(super) fn to_genmc(self) -> MemOrdering {
+        match self {
+            AtomicFenceOrd::Acquire => MemOrdering::Acquire,
+            AtomicFenceOrd::Release => MemOrdering::Release,
+            AtomicFenceOrd::AcqRel => MemOrdering::AcquireRelease,
+            AtomicFenceOrd::SeqCst => MemOrdering::SequentiallyConsistent,
+        }
+    }
+}
+
+/// Since GenMC ignores the failure memory ordering and Miri should not detect bugs that don't actually exist, we upgrade the success ordering if required.
+/// This means that Miri running in GenMC mode will not explore all possible executions allowed under the RC11 memory model.
+/// FIXME(genmc): remove this once GenMC properly supports the failure memory ordering.
+pub(super) fn maybe_upgrade_compare_exchange_success_orderings(
+    success: AtomicRwOrd,
+    failure: AtomicReadOrd,
+) -> AtomicRwOrd {
+    use AtomicReadOrd::*;
+    let (success_read, success_write) = success.split_memory_orderings();
+    let upgraded_success_read = match (success_read, failure) {
+        (_, SeqCst) | (SeqCst, _) => SeqCst,
+        (Acquire, _) | (_, Acquire) => Acquire,
+        (Relaxed, Relaxed) => Relaxed,
+    };
+    AtomicRwOrd::from_split_memory_orderings(upgraded_success_read, success_write)
+}
+
+impl AtomicRwOrd {
+    /// Split up an atomic read-write memory ordering into a separate read and write ordering.
+    pub(super) fn split_memory_orderings(self) -> (AtomicReadOrd, AtomicWriteOrd) {
+        match self {
+            AtomicRwOrd::Relaxed => (AtomicReadOrd::Relaxed, AtomicWriteOrd::Relaxed),
+            AtomicRwOrd::Acquire => (AtomicReadOrd::Acquire, AtomicWriteOrd::Relaxed),
+            AtomicRwOrd::Release => (AtomicReadOrd::Relaxed, AtomicWriteOrd::Release),
+            AtomicRwOrd::AcqRel => (AtomicReadOrd::Acquire, AtomicWriteOrd::Release),
+            AtomicRwOrd::SeqCst => (AtomicReadOrd::SeqCst, AtomicWriteOrd::SeqCst),
+        }
+    }
+
+    /// Split up an atomic read-write memory ordering into a separate read and write ordering.
+    fn from_split_memory_orderings(
+        read_ordering: AtomicReadOrd,
+        write_ordering: AtomicWriteOrd,
+    ) -> Self {
+        match (read_ordering, write_ordering) {
+            (AtomicReadOrd::Relaxed, AtomicWriteOrd::Relaxed) => AtomicRwOrd::Relaxed,
+            (AtomicReadOrd::Acquire, AtomicWriteOrd::Relaxed) => AtomicRwOrd::Acquire,
+            (AtomicReadOrd::Relaxed, AtomicWriteOrd::Release) => AtomicRwOrd::Release,
+            (AtomicReadOrd::Acquire, AtomicWriteOrd::Release) => AtomicRwOrd::AcqRel,
+            (AtomicReadOrd::SeqCst, AtomicWriteOrd::SeqCst) => AtomicRwOrd::SeqCst,
+            _ =>
+                panic!(
+                    "Unsupported memory ordering combination ({read_ordering:?}, {write_ordering:?})"
+                ),
+        }
+    }
+
+    pub(super) fn to_genmc(self) -> MemOrdering {
+        match self {
+            AtomicRwOrd::Relaxed => MemOrdering::Relaxed,
+            AtomicRwOrd::Acquire => MemOrdering::Acquire,
+            AtomicRwOrd::Release => MemOrdering::Release,
+            AtomicRwOrd::AcqRel => MemOrdering::AcquireRelease,
+            AtomicRwOrd::SeqCst => MemOrdering::SequentiallyConsistent,
+        }
+    }
+}
+
+/// Convert an atomic binary operation to its GenMC counterpart.
+pub(super) fn to_genmc_rmw_op(atomic_op: AtomicRmwOp, is_signed: bool) -> RMWBinOp {
+    match (atomic_op, is_signed) {
+        (AtomicRmwOp::Min, true) => RMWBinOp::Min,
+        (AtomicRmwOp::Max, true) => RMWBinOp::Max,
+        (AtomicRmwOp::Min, false) => RMWBinOp::UMin,
+        (AtomicRmwOp::Max, false) => RMWBinOp::UMax,
+        (AtomicRmwOp::MirOp { op, neg }, _is_signed) =>
+            match (op, neg) {
+                (mir::BinOp::Add, false) => RMWBinOp::Add,
+                (mir::BinOp::Sub, false) => RMWBinOp::Sub,
+                (mir::BinOp::BitXor, false) => RMWBinOp::Xor,
+                (mir::BinOp::BitAnd, false) => RMWBinOp::And,
+                (mir::BinOp::BitAnd, true) => RMWBinOp::Nand,
+                (mir::BinOp::BitOr, false) => RMWBinOp::Or,
+                _ => {
+                    panic!("unsupported atomic operation: bin_op: {op:?}, negate: {neg}");
+                }
+            },
+    }
+}
diff --git a/src/tools/miri/src/concurrency/genmc/mod.rs b/src/tools/miri/src/concurrency/genmc/mod.rs
index 3617775e27eaf..0086d3f2bf0bc 100644
--- a/src/tools/miri/src/concurrency/genmc/mod.rs
+++ b/src/tools/miri/src/concurrency/genmc/mod.rs
@@ -1,77 +1,188 @@
-#![allow(unused)] // FIXME(GenMC): remove this
+use std::cell::{Cell, RefCell};
+use std::sync::Arc;
 
-use std::cell::Cell;
-
-use genmc_sys::{GenmcParams, createGenmcHandle};
+use genmc_sys::{
+    EstimationResult, GENMC_GLOBAL_ADDRESSES_MASK, GenmcScalar, MemOrdering, MiriGenmcShim,
+    RMWBinOp, UniquePtr, create_genmc_driver_handle,
+};
 use rustc_abi::{Align, Size};
-use rustc_const_eval::interpret::{InterpCx, InterpResult, interp_ok};
-use rustc_middle::mir;
-
+use rustc_const_eval::interpret::{AllocId, InterpCx, InterpResult, interp_ok};
+use rustc_middle::{throw_machine_stop, throw_ub_format, throw_unsup_format};
+// FIXME(genmc,tracing): Implement some work-around for enabling debug/trace level logging (currently disabled statically in rustc).
+use tracing::{debug, info};
+
+use self::global_allocations::{EvalContextExt as _, GlobalAllocationHandler};
+use self::helper::{
+    MAX_ACCESS_SIZE, Warnings, emit_warning, genmc_scalar_to_scalar,
+    maybe_upgrade_compare_exchange_success_orderings, scalar_to_genmc_scalar, to_genmc_rmw_op,
+};
+use self::run::GenmcMode;
+use self::thread_id_map::ThreadIdMap;
+use crate::concurrency::genmc::helper::split_access;
+use crate::intrinsics::AtomicRmwOp;
 use crate::{
     AtomicFenceOrd, AtomicReadOrd, AtomicRwOrd, AtomicWriteOrd, MemoryKind, MiriConfig,
-    MiriMachine, Scalar, ThreadId, ThreadManager, VisitProvenance, VisitWith,
+    MiriMachine, MiriMemoryKind, NonHaltingDiagnostic, Scalar, TerminationInfo, ThreadId,
+    ThreadManager, VisitProvenance, VisitWith,
 };
 
 mod config;
+mod global_allocations;
+mod helper;
+mod run;
+pub(crate) mod scheduling;
+mod thread_id_map;
+
+pub use genmc_sys::GenmcParams;
 
 pub use self::config::GenmcConfig;
+pub use self::run::run_genmc_mode;
+
+#[derive(Debug)]
+pub enum ExecutionEndResult {
+    /// An error occurred at the end of the execution.
+    Error(String),
+    /// No errors occurred, and there are more executions to explore.
+    Continue,
+    /// No errors occurred and we are finished.
+    Stop,
+}
 
-// FIXME(GenMC): add fields
-pub struct GenmcCtx {
-    /// Some actions Miri does are allowed to cause data races.
-    /// GenMC will not be informed about certain actions (e.g. non-atomic loads) when this flag is set.
-    allow_data_races: Cell<bool>,
+#[derive(Clone, Copy, Debug)]
+pub enum ExitType {
+    MainThreadFinish,
+    ExitCalled,
 }
 
-impl GenmcCtx {
-    /// Create a new `GenmcCtx` from a given config.
-    pub fn new(miri_config: &MiriConfig) -> Self {
-        let genmc_config = miri_config.genmc_config.as_ref().unwrap();
+/// The exit status of a program.
+/// GenMC must store this if a thread exits while any others can still run.
+/// The other threads must also be explored before the program is terminated.
+#[derive(Clone, Copy, Debug)]
+struct ExitStatus {
+    exit_code: i32,
+    exit_type: ExitType,
+}
 
-        let handle = createGenmcHandle(&genmc_config.params);
-        assert!(!handle.is_null());
+impl ExitStatus {
+    fn do_leak_check(self) -> bool {
+        matches!(self.exit_type, ExitType::MainThreadFinish)
+    }
+}
 
-        eprintln!("Miri: GenMC handle creation successful!");
+/// State that is reset at the start of every execution.
+#[derive(Debug, Default)]
+struct PerExecutionState {
+    /// Thread id management, such as mapping between Miri `ThreadId` and GenMC's thread ids, or selecting GenMC thread ids.
+    thread_id_manager: RefCell<ThreadIdMap>,
 
-        drop(handle);
-        eprintln!("Miri: Dropping GenMC handle successful!");
+    /// A flag to indicate that we should not forward non-atomic accesses to genmc, e.g. because we
+    /// are executing an atomic operation.
+    allow_data_races: Cell<bool>,
 
-        // FIXME(GenMC): implement
-        std::process::exit(0);
+    /// The exit status of the program. We keep running other threads even after `exit` to ensure
+    /// we cover all possible executions.
+    /// `None` if no thread has called `exit` and the main thread isn't finished yet.
+    exit_status: Cell<Option<ExitStatus>>,
+}
+
+impl PerExecutionState {
+    fn reset(&self) {
+        self.allow_data_races.replace(false);
+        self.thread_id_manager.borrow_mut().reset();
+        self.exit_status.set(None);
     }
+}
 
-    pub fn get_stuck_execution_count(&self) -> usize {
-        todo!()
+struct GlobalState {
+    /// Keep track of global allocations, to ensure they keep the same address across different executions, even if the order of allocations changes.
+    /// The `AllocId` for globals is stable across executions, so we can use it as an identifier.
+    global_allocations: GlobalAllocationHandler,
+
+    /// Cache for which warnings have already been shown to the user.
+    /// `None` if warnings are disabled.
+    warning_cache: Option<Warnings>,
+}
+
+impl GlobalState {
+    fn new(target_usize_max: u64, print_warnings: bool) -> Self {
+        Self {
+            global_allocations: GlobalAllocationHandler::new(target_usize_max),
+            warning_cache: print_warnings.then(Default::default),
+        }
     }
+}
 
-    pub fn print_genmc_graph(&self) {
-        todo!()
+/// The main interface with GenMC.
+/// Each `GenmcCtx` owns one `MiriGenmcShim`, which owns one `GenMCDriver` (the GenMC model checker).
+/// For each GenMC run (estimation or verification), one or more `GenmcCtx` can be created (one per Miri thread).
+/// However, for now, we only ever have one `GenmcCtx` per run.
+///
+/// In multithreading, each worker thread has its own `GenmcCtx`, which will have their results combined in the end.
+/// FIXME(genmc): implement multithreading.
+///
+/// Some data is shared across all `GenmcCtx` in the same run, namely data for global allocation handling.
+/// Globals must be allocated in a consistent manner, i.e., each global allocation must have the same address in each execution.
+///
+/// Some state is reset between each execution in the same run.
+pub struct GenmcCtx {
+    /// Handle to the GenMC model checker.
+    handle: RefCell<UniquePtr<MiriGenmcShim>>,
+
+    /// State that is reset at the start of every execution.
+    exec_state: PerExecutionState,
+
+    /// State that persists across executions.
+    /// All `GenmcCtx` in one verification step share this state.
+    global_state: Arc<GlobalState>,
+}
+
+/// GenMC Context creation and administrative / query actions
+impl GenmcCtx {
+    /// Create a new `GenmcCtx` from a given config.
+    fn new(miri_config: &MiriConfig, global_state: Arc<GlobalState>, mode: GenmcMode) -> Self {
+        let genmc_config = miri_config.genmc_config.as_ref().unwrap();
+        let handle = RefCell::new(create_genmc_driver_handle(
+            &genmc_config.params,
+            genmc_config.log_level,
+            /* do_estimation: */ mode == GenmcMode::Estimation,
+        ));
+        Self { handle, exec_state: Default::default(), global_state }
     }
 
-    /// This function determines if we should continue exploring executions or if we are done.
-    ///
-    /// In GenMC mode, the input program should be repeatedly executed until this function returns `true` or an error is found.
-    pub fn is_exploration_done(&self) -> bool {
-        todo!()
+    fn get_estimation_results(&self) -> EstimationResult {
+        self.handle.borrow().get_estimation_results()
     }
 
-    /// Inform GenMC that a new program execution has started.
-    /// This function should be called at the start of every execution.
-    pub(crate) fn handle_execution_start(&self) {
-        todo!()
+    /// Get the number of blocked executions encountered by GenMC.
+    fn get_blocked_execution_count(&self) -> u64 {
+        self.handle.borrow().get_blocked_execution_count()
     }
 
-    /// Inform GenMC that the program's execution has ended.
-    ///
-    /// This function must be called even when the execution got stuck (i.e., it returned a `InterpErrorKind::MachineStop` with error kind `TerminationInfo::GenmcStuckExecution`).
-    pub(crate) fn handle_execution_end<'tcx>(
-        &self,
-        ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
-    ) -> Result<(), String> {
-        todo!()
+    /// Get the number of explored executions encountered by GenMC.
+    fn get_explored_execution_count(&self) -> u64 {
+        self.handle.borrow().get_explored_execution_count()
     }
 
-    /**** Memory access handling ****/
+    /// Check if GenMC encountered an error that wasn't immediately returned during execution.
+    /// Returns a string representation of the error if one occurred.
+    fn try_get_error(&self) -> Option<String> {
+        self.handle
+            .borrow()
+            .get_error_string()
+            .as_ref()
+            .map(|error| error.to_string_lossy().to_string())
+    }
+
+    /// Check if GenMC encountered an error that wasn't immediately returned during execution.
+    /// Returns a string representation of the error if one occurred.
+    fn get_result_message(&self) -> String {
+        self.handle
+            .borrow()
+            .get_result_message()
+            .as_ref()
+            .map(|error| error.to_string_lossy().to_string())
+            .expect("there should always be a message")
+    }
 
     /// Select whether data race free actions should be allowed. This function should be used carefully!
     ///
@@ -83,10 +194,68 @@ impl GenmcCtx {
     /// # Panics
     /// If data race free is attempted to be set more than once (i.e., no nesting allowed).
     pub(super) fn set_ongoing_action_data_race_free(&self, enable: bool) {
-        let old = self.allow_data_races.replace(enable);
+        debug!("GenMC: set_ongoing_action_data_race_free ({enable})");
+        let old = self.exec_state.allow_data_races.replace(enable);
         assert_ne!(old, enable, "cannot nest allow_data_races");
     }
 
+    /// Check whether data races are currently allowed (e.g., for loading values for validation which are not actually loaded by the program).
+    fn get_alloc_data_races(&self) -> bool {
+        self.exec_state.allow_data_races.get()
+    }
+}
+
+/// GenMC event handling. These methods are used to inform GenMC about events happening in the program, and to handle scheduling decisions.
+impl GenmcCtx {
+    /// Prepare for the next execution and inform GenMC about it.
+    /// Must be called before at the start of every execution.
+    fn prepare_next_execution(&self) {
+        // Reset per-execution state.
+        self.exec_state.reset();
+        // Inform GenMC about the new execution.
+        self.handle.borrow_mut().pin_mut().handle_execution_start();
+    }
+
+    /// Inform GenMC that the program's execution has ended.
+    ///
+    /// This function must be called even when the execution is blocked
+    /// (i.e., it returned a `InterpErrorKind::MachineStop` with error kind `TerminationInfo::GenmcBlockedExecution`).
+    /// Don't call this function if an error was found.
+    ///
+    /// GenMC detects certain errors only when the execution ends.
+    /// If an error occured, a string containing a short error description is returned.
+    ///
+    /// GenMC currently doesn't return an error in all cases immediately when one happens.
+    /// This function will also check for those, and return their error description.
+    ///
+    /// To get the all messages (warnings, errors) that GenMC produces, use the `get_result_message` method.
+    fn handle_execution_end(&self) -> ExecutionEndResult {
+        let result = self.handle.borrow_mut().pin_mut().handle_execution_end();
+        if let Some(error) = result.as_ref() {
+            return ExecutionEndResult::Error(error.to_string_lossy().to_string());
+        }
+
+        // GenMC decides if there is more to explore:
+        let exploration_done = self.handle.borrow_mut().pin_mut().is_exploration_done();
+
+        // GenMC currently does not return an error value immediately in all cases.
+        // Both `handle_execution_end` and `is_exploration_done` can produce such errors.
+        // We manually query for any errors here to ensure we don't miss any.
+        if let Some(error) = self.try_get_error() {
+            ExecutionEndResult::Error(error)
+        } else if exploration_done {
+            ExecutionEndResult::Stop
+        } else {
+            ExecutionEndResult::Continue
+        }
+    }
+
+    /**** Memory access handling ****/
+
+    /// Inform GenMC about an atomic load.
+    /// Returns that value that the load should read.
+    ///
+    /// `old_value` is the value that a non-atomic load would read here, or `None` if the memory is uninitalized.
     pub(crate) fn atomic_load<'tcx>(
         &self,
         ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
@@ -95,64 +264,93 @@ impl GenmcCtx {
         ordering: AtomicReadOrd,
         old_val: Option<Scalar>,
     ) -> InterpResult<'tcx, Scalar> {
-        assert!(!self.allow_data_races.get());
-        todo!()
+        assert!(!self.get_alloc_data_races(), "atomic load with data race checking disabled.");
+        let genmc_old_value = if let Some(scalar) = old_val {
+            scalar_to_genmc_scalar(ecx, scalar)?
+        } else {
+            GenmcScalar::UNINIT
+        };
+        let read_value =
+            self.handle_load(&ecx.machine, address, size, ordering.to_genmc(), genmc_old_value)?;
+        genmc_scalar_to_scalar(ecx, read_value, size)
     }
 
+    /// Inform GenMC about an atomic store.
+    /// Returns `true` if the stored value should be reflected in Miri's memory.
+    ///
+    /// `old_value` is the value that a non-atomic load would read here, or `None` if the memory is uninitalized.
     pub(crate) fn atomic_store<'tcx>(
         &self,
         ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
         address: Size,
         size: Size,
         value: Scalar,
+        old_value: Option<Scalar>,
         ordering: AtomicWriteOrd,
-    ) -> InterpResult<'tcx, ()> {
-        assert!(!self.allow_data_races.get());
-        todo!()
+    ) -> InterpResult<'tcx, bool> {
+        assert!(!self.get_alloc_data_races(), "atomic store with data race checking disabled.");
+        let genmc_value = scalar_to_genmc_scalar(ecx, value)?;
+        let genmc_old_value = if let Some(scalar) = old_value {
+            scalar_to_genmc_scalar(ecx, scalar)?
+        } else {
+            GenmcScalar::UNINIT
+        };
+        self.handle_store(
+            &ecx.machine,
+            address,
+            size,
+            genmc_value,
+            genmc_old_value,
+            ordering.to_genmc(),
+        )
     }
 
+    /// Inform GenMC about an atomic fence.
     pub(crate) fn atomic_fence<'tcx>(
         &self,
         machine: &MiriMachine<'tcx>,
         ordering: AtomicFenceOrd,
-    ) -> InterpResult<'tcx, ()> {
-        assert!(!self.allow_data_races.get());
-        todo!()
+    ) -> InterpResult<'tcx> {
+        assert!(!self.get_alloc_data_races(), "atomic fence with data race checking disabled.");
+
+        let thread_infos = self.exec_state.thread_id_manager.borrow();
+        let curr_thread = machine.threads.active_thread();
+        let genmc_tid = thread_infos.get_genmc_tid(curr_thread);
+
+        self.handle.borrow_mut().pin_mut().handle_fence(genmc_tid, ordering.to_genmc());
+        interp_ok(())
     }
 
     /// Inform GenMC about an atomic read-modify-write operation.
     ///
-    /// Returns `(old_val, new_val)`.
+    /// Returns `(old_val, Option<new_val>)`. `new_val` might not be the latest write in coherence order, which is indicated by `None`.
+    ///
+    /// `old_value` is the value that a non-atomic load would read here, or `None` if the memory is uninitalized.
     pub(crate) fn atomic_rmw_op<'tcx>(
         &self,
         ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
         address: Size,
         size: Size,
+        atomic_op: AtomicRmwOp,
+        is_signed: bool,
         ordering: AtomicRwOrd,
-        (rmw_op, not): (mir::BinOp, bool),
         rhs_scalar: Scalar,
-    ) -> InterpResult<'tcx, (Scalar, Scalar)> {
-        assert!(!self.allow_data_races.get());
-        todo!()
+        old_value: Scalar,
+    ) -> InterpResult<'tcx, (Scalar, Option<Scalar>)> {
+        self.handle_atomic_rmw_op(
+            ecx,
+            address,
+            size,
+            ordering,
+            to_genmc_rmw_op(atomic_op, is_signed),
+            scalar_to_genmc_scalar(ecx, rhs_scalar)?,
+            scalar_to_genmc_scalar(ecx, old_value)?,
+        )
     }
 
-    /// Inform GenMC about an atomic `min` or `max` operation.
+    /// Returns `(old_val, Option<new_val>)`. `new_val` might not be the latest write in coherence order, which is indicated by `None`.
     ///
-    /// Returns `(old_val, new_val)`.
-    pub(crate) fn atomic_min_max_op<'tcx>(
-        &self,
-        ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
-        address: Size,
-        size: Size,
-        ordering: AtomicRwOrd,
-        min: bool,
-        is_signed: bool,
-        rhs_scalar: Scalar,
-    ) -> InterpResult<'tcx, (Scalar, Scalar)> {
-        assert!(!self.allow_data_races.get());
-        todo!()
-    }
-
+    /// `old_value` is the value that a non-atomic load would read here, or `None` if the memory is uninitalized.
     pub(crate) fn atomic_exchange<'tcx>(
         &self,
         ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
@@ -160,11 +358,24 @@ impl GenmcCtx {
         size: Size,
         rhs_scalar: Scalar,
         ordering: AtomicRwOrd,
-    ) -> InterpResult<'tcx, (Scalar, bool)> {
-        assert!(!self.allow_data_races.get());
-        todo!()
+        old_value: Scalar,
+    ) -> InterpResult<'tcx, (Scalar, Option<Scalar>)> {
+        self.handle_atomic_rmw_op(
+            ecx,
+            address,
+            size,
+            ordering,
+            /* genmc_rmw_op */ RMWBinOp::Xchg,
+            scalar_to_genmc_scalar(ecx, rhs_scalar)?,
+            scalar_to_genmc_scalar(ecx, old_value)?,
+        )
     }
 
+    /// Inform GenMC about an atomic compare-exchange operation.
+    ///
+    /// Returns the old value read by the compare exchange, optionally the value that Miri should write back to its memory, and whether the compare-exchange was a success or not.
+    ///
+    /// `old_value` is the value that a non-atomic load would read here, or `None` if the memory is uninitalized.
     pub(crate) fn atomic_compare_exchange<'tcx>(
         &self,
         ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
@@ -175,9 +386,83 @@ impl GenmcCtx {
         success: AtomicRwOrd,
         fail: AtomicReadOrd,
         can_fail_spuriously: bool,
-    ) -> InterpResult<'tcx, (Scalar, bool)> {
-        assert!(!self.allow_data_races.get());
-        todo!()
+        old_value: Scalar,
+    ) -> InterpResult<'tcx, (Scalar, Option<Scalar>, bool)> {
+        assert!(
+            !self.get_alloc_data_races(),
+            "atomic compare-exchange with data race checking disabled."
+        );
+        assert_ne!(0, size.bytes());
+        assert!(
+            size.bytes() <= MAX_ACCESS_SIZE,
+            "GenMC currently does not support atomic accesses larger than {} bytes (got {} bytes)",
+            MAX_ACCESS_SIZE,
+            size.bytes()
+        );
+
+        // Upgrade the success memory ordering to equal the failure ordering, since GenMC currently ignores the failure ordering.
+        // FIXME(genmc): remove this once GenMC properly supports the failure memory ordering.
+        let upgraded_success_ordering =
+            maybe_upgrade_compare_exchange_success_orderings(success, fail);
+
+        if let Some(warning_cache) = &self.global_state.warning_cache {
+            // FIXME(genmc): remove once GenMC supports failure memory ordering in `compare_exchange`.
+            let (effective_failure_ordering, _) =
+                upgraded_success_ordering.split_memory_orderings();
+            // Return a warning if the actual orderings don't match the upgraded ones.
+            if success != upgraded_success_ordering || effective_failure_ordering != fail {
+                emit_warning(ecx, &warning_cache.compare_exchange_failure_ordering, || {
+                    NonHaltingDiagnostic::GenmcCompareExchangeOrderingMismatch {
+                        success_ordering: success,
+                        upgraded_success_ordering,
+                        failure_ordering: fail,
+                        effective_failure_ordering,
+                    }
+                });
+            }
+            // FIXME(genmc): remove once GenMC implements spurious failures for `compare_exchange_weak`.
+            if can_fail_spuriously {
+                emit_warning(ecx, &warning_cache.compare_exchange_weak, || {
+                    NonHaltingDiagnostic::GenmcCompareExchangeWeak
+                });
+            }
+        }
+
+        debug!(
+            "GenMC: atomic_compare_exchange, address: {address:?}, size: {size:?} (expect: {expected_old_value:?}, new: {new_value:?}, old_value: {old_value:?}, {success:?}, orderings: {fail:?}), can fail spuriously: {can_fail_spuriously}"
+        );
+
+        let thread_infos = self.exec_state.thread_id_manager.borrow();
+        let genmc_tid = thread_infos.get_genmc_tid(ecx.machine.threads.active_thread());
+
+        let cas_result = self.handle.borrow_mut().pin_mut().handle_compare_exchange(
+            genmc_tid,
+            address.bytes(),
+            size.bytes(),
+            scalar_to_genmc_scalar(ecx, expected_old_value)?,
+            scalar_to_genmc_scalar(ecx, new_value)?,
+            scalar_to_genmc_scalar(ecx, old_value)?,
+            upgraded_success_ordering.to_genmc(),
+            fail.to_genmc(),
+            can_fail_spuriously,
+        );
+
+        if let Some(error) = cas_result.error.as_ref() {
+            // FIXME(genmc): error handling
+            throw_ub_format!("{}", error.to_string_lossy());
+        }
+
+        let return_scalar = genmc_scalar_to_scalar(ecx, cas_result.old_value, size)?;
+        debug!(
+            "GenMC: atomic_compare_exchange: result: {cas_result:?}, returning scalar: {return_scalar:?}"
+        );
+        // The write can only be a co-maximal write if the CAS succeeded.
+        assert!(cas_result.is_success || !cas_result.is_coherence_order_maximal_write);
+        interp_ok((
+            return_scalar,
+            cas_result.is_coherence_order_maximal_write.then_some(new_value),
+            cas_result.is_success,
+        ))
     }
 
     /// Inform GenMC about a non-atomic memory load
@@ -188,40 +473,178 @@ impl GenmcCtx {
         machine: &MiriMachine<'tcx>,
         address: Size,
         size: Size,
-    ) -> InterpResult<'tcx, ()> {
-        todo!()
+    ) -> InterpResult<'tcx> {
+        debug!(
+            "GenMC: received memory_load (non-atomic): address: {:#x}, size: {}",
+            address.bytes(),
+            size.bytes()
+        );
+        if self.get_alloc_data_races() {
+            debug!("GenMC: data race checking disabled, ignoring non-atomic load.");
+            return interp_ok(());
+        }
+        // GenMC doesn't like ZSTs, and they can't have any data races, so we skip them
+        if size.bytes() == 0 {
+            return interp_ok(());
+        }
+
+        let handle_load = |address, size| {
+            // NOTE: Values loaded non-atomically are still handled by Miri, so we discard whatever we get from GenMC
+            let _read_value = self.handle_load(
+                machine,
+                address,
+                size,
+                MemOrdering::NotAtomic,
+                // This value is used to update the co-maximal store event to the same location.
+                // We don't need to update that store, since if it is ever read by any atomic loads, the value will be updated then.
+                // We use uninit for lack of a better value, since we don't know whether the location we currently load from is initialized or not.
+                GenmcScalar::UNINIT,
+            )?;
+            interp_ok(())
+        };
+
+        // This load is small enough so GenMC can handle it.
+        if size.bytes() <= MAX_ACCESS_SIZE {
+            return handle_load(address, size);
+        }
+
+        // This load is too big to be a single GenMC access, we have to split it.
+        // FIXME(genmc): This will misbehave if there are non-64bit-atomics in there.
+        // Needs proper support on the GenMC side for large and mixed atomic accesses.
+        for (address, size) in split_access(address, size) {
+            handle_load(Size::from_bytes(address), Size::from_bytes(size))?;
+        }
+        interp_ok(())
     }
 
+    /// Inform GenMC about a non-atomic memory store
+    ///
+    /// NOTE: Unlike for *atomic* stores, we don't provide the actual stored values to GenMC here.
     pub(crate) fn memory_store<'tcx>(
         &self,
         machine: &MiriMachine<'tcx>,
         address: Size,
         size: Size,
-    ) -> InterpResult<'tcx, ()> {
-        todo!()
+    ) -> InterpResult<'tcx> {
+        debug!(
+            "GenMC: received memory_store (non-atomic): address: {:#x}, size: {}",
+            address.bytes(),
+            size.bytes()
+        );
+        if self.get_alloc_data_races() {
+            debug!("GenMC: data race checking disabled, ignoring non-atomic store.");
+            return interp_ok(());
+        }
+        // GenMC doesn't like ZSTs, and they can't have any data races, so we skip them
+        if size.bytes() == 0 {
+            return interp_ok(());
+        }
+
+        let handle_store = |address, size| {
+            // We always write the the stored values to Miri's memory, whether GenMC says the write is co-maximal or not.
+            // The GenMC scheduler ensures that replaying an execution happens in porf-respecting order (po := program order, rf: reads-from order).
+            // This means that for any non-atomic read Miri performs, the corresponding write has already been replayed.
+            let _is_co_max_write = self.handle_store(
+                machine,
+                address,
+                size,
+                // We don't know the value that this store will write, but GenMC expects that we give it an actual value.
+                // Unfortunately, there are situations where this value can actually become visible
+                // to the program: when there is an atomic load reading from a non-atomic store.
+                // FIXME(genmc): update once mixed atomic-non-atomic support is added. Afterwards, this value should never be readable.
+                GenmcScalar::from_u64(0xDEADBEEF),
+                // This value is used to update the co-maximal store event to the same location.
+                // This old value cannot be read anymore by any future loads, since we are doing another non-atomic store to the same location.
+                // Any future load will either see the store we are adding now, or we have a data race (there can only be one possible non-atomic value to read from at any time).
+                // We use uninit for lack of a better value, since we don't know whether the location we currently write to is initialized or not.
+                GenmcScalar::UNINIT,
+                MemOrdering::NotAtomic,
+            )?;
+            interp_ok(())
+        };
+
+        // This store is small enough so GenMC can handle it.
+        if size.bytes() <= MAX_ACCESS_SIZE {
+            return handle_store(address, size);
+        }
+
+        // This store is too big to be a single GenMC access, we have to split it.
+        // FIXME(genmc): This will misbehave if there are non-64bit-atomics in there.
+        // Needs proper support on the GenMC side for large and mixed atomic accesses.
+        for (address, size) in split_access(address, size) {
+            handle_store(Size::from_bytes(address), Size::from_bytes(size))?;
+        }
+        interp_ok(())
     }
 
     /**** Memory (de)allocation ****/
 
+    /// This is also responsible for determining the address of the new allocation.
     pub(crate) fn handle_alloc<'tcx>(
         &self,
-        machine: &MiriMachine<'tcx>,
+        ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
+        alloc_id: AllocId,
         size: Size,
         alignment: Align,
         memory_kind: MemoryKind,
     ) -> InterpResult<'tcx, u64> {
-        todo!()
+        assert!(
+            !self.get_alloc_data_races(),
+            "memory allocation with data race checking disabled."
+        );
+        let machine = &ecx.machine;
+        if memory_kind == MiriMemoryKind::Global.into() {
+            return ecx
+                .get_global_allocation_address(&self.global_state.global_allocations, alloc_id);
+        }
+        let thread_infos = self.exec_state.thread_id_manager.borrow();
+        let curr_thread = machine.threads.active_thread();
+        let genmc_tid = thread_infos.get_genmc_tid(curr_thread);
+        // GenMC doesn't support ZSTs, so we set the minimum size to 1 byte
+        let genmc_size = size.bytes().max(1);
+
+        let chosen_address = self.handle.borrow_mut().pin_mut().handle_malloc(
+            genmc_tid,
+            genmc_size,
+            alignment.bytes(),
+        );
+
+        // Non-global addresses should not be in the global address space or null.
+        assert_ne!(0, chosen_address, "GenMC malloc returned nullptr.");
+        assert_eq!(0, chosen_address & GENMC_GLOBAL_ADDRESSES_MASK);
+        // Sanity check the address alignment:
+        assert!(
+            chosen_address.is_multiple_of(alignment.bytes()),
+            "GenMC returned address {chosen_address:#x} with lower alignment than requested ({}).",
+            alignment.bytes()
+        );
+
+        interp_ok(chosen_address)
     }
 
     pub(crate) fn handle_dealloc<'tcx>(
         &self,
         machine: &MiriMachine<'tcx>,
+        alloc_id: AllocId,
         address: Size,
-        size: Size,
-        align: Align,
         kind: MemoryKind,
-    ) -> InterpResult<'tcx, ()> {
-        todo!()
+    ) -> InterpResult<'tcx> {
+        assert_ne!(
+            kind,
+            MiriMemoryKind::Global.into(),
+            "we probably shouldn't try to deallocate global allocations (alloc_id: {alloc_id:?})"
+        );
+        assert!(
+            !self.get_alloc_data_races(),
+            "memory deallocation with data race checking disabled."
+        );
+        let thread_infos = self.exec_state.thread_id_manager.borrow();
+        let curr_thread = machine.threads.active_thread();
+        let genmc_tid = thread_infos.get_genmc_tid(curr_thread);
+
+        self.handle.borrow_mut().pin_mut().handle_free(genmc_tid, address.bytes());
+
+        interp_ok(())
     }
 
     /**** Thread management ****/
@@ -229,50 +652,90 @@ impl GenmcCtx {
     pub(crate) fn handle_thread_create<'tcx>(
         &self,
         threads: &ThreadManager<'tcx>,
+        // FIXME(genmc,symmetry reduction): pass info to GenMC
+        _start_routine: crate::Pointer,
+        _func_arg: &crate::ImmTy<'tcx>,
         new_thread_id: ThreadId,
-    ) -> InterpResult<'tcx, ()> {
-        assert!(!self.allow_data_races.get());
-        todo!()
+    ) -> InterpResult<'tcx> {
+        assert!(!self.get_alloc_data_races(), "thread creation with data race checking disabled.");
+        let mut thread_infos = self.exec_state.thread_id_manager.borrow_mut();
+
+        let curr_thread_id = threads.active_thread();
+        let genmc_parent_tid = thread_infos.get_genmc_tid(curr_thread_id);
+        let genmc_new_tid = thread_infos.add_thread(new_thread_id);
+
+        self.handle.borrow_mut().pin_mut().handle_thread_create(genmc_new_tid, genmc_parent_tid);
+        interp_ok(())
     }
 
     pub(crate) fn handle_thread_join<'tcx>(
         &self,
         active_thread_id: ThreadId,
         child_thread_id: ThreadId,
-    ) -> InterpResult<'tcx, ()> {
-        assert!(!self.allow_data_races.get());
-        todo!()
-    }
+    ) -> InterpResult<'tcx> {
+        assert!(!self.get_alloc_data_races(), "thread join with data race checking disabled.");
+        let thread_infos = self.exec_state.thread_id_manager.borrow();
 
-    pub(crate) fn handle_thread_stack_empty(&self, thread_id: ThreadId) {
-        todo!()
-    }
+        let genmc_curr_tid = thread_infos.get_genmc_tid(active_thread_id);
+        let genmc_child_tid = thread_infos.get_genmc_tid(child_thread_id);
 
-    pub(crate) fn handle_thread_finish<'tcx>(
-        &self,
-        threads: &ThreadManager<'tcx>,
-    ) -> InterpResult<'tcx, ()> {
-        assert!(!self.allow_data_races.get());
-        todo!()
+        self.handle.borrow_mut().pin_mut().handle_thread_join(genmc_curr_tid, genmc_child_tid);
+
+        interp_ok(())
     }
 
-    /**** Scheduling functionality ****/
+    pub(crate) fn handle_thread_finish<'tcx>(&self, threads: &ThreadManager<'tcx>) {
+        assert!(!self.get_alloc_data_races(), "thread finish with data race checking disabled.");
+        let curr_thread_id = threads.active_thread();
 
-    /// Ask for a scheduling decision. This should be called before every MIR instruction.
-    ///
-    /// GenMC may realize that the execution got stuck, then this function will return a `InterpErrorKind::MachineStop` with error kind `TerminationInfo::GenmcStuckExecution`).
-    ///
-    /// This is **not** an error by iself! Treat this as if the program ended normally: `handle_execution_end` should be called next, which will determine if were are any actual errors.
-    pub(crate) fn schedule_thread<'tcx>(
+        let thread_infos = self.exec_state.thread_id_manager.borrow();
+        let genmc_tid = thread_infos.get_genmc_tid(curr_thread_id);
+
+        debug!("GenMC: thread {curr_thread_id:?} ({genmc_tid:?}) finished.");
+        // NOTE: Miri doesn't support return values for threads, but GenMC expects one, so we return 0
+        self.handle.borrow_mut().pin_mut().handle_thread_finish(genmc_tid, /* ret_val */ 0);
+    }
+
+    /// Handle a call to `libc::exit` or the exit of the main thread.
+    /// Unless an error is returned, the program should continue executing (in a different thread, chosen by the next scheduling call).
+    pub(crate) fn handle_exit<'tcx>(
         &self,
-        ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
-    ) -> InterpResult<'tcx, ThreadId> {
-        assert!(!self.allow_data_races.get());
-        todo!()
+        thread: ThreadId,
+        exit_code: i32,
+        exit_type: ExitType,
+    ) -> InterpResult<'tcx> {
+        // Calling `libc::exit` doesn't do cleanup, so we skip the leak check in that case.
+        let exit_status = ExitStatus { exit_code, exit_type };
+
+        if let Some(old_exit_status) = self.exec_state.exit_status.get() {
+            throw_ub_format!(
+                "`exit` called twice, first with status {old_exit_status:?}, now with status {exit_status:?}",
+            );
+        }
+
+        // FIXME(genmc): Add a flag to continue exploration even when the program exits with a non-zero exit code.
+        if exit_code != 0 {
+            info!("GenMC: 'exit' called with non-zero argument, aborting execution.");
+            let leak_check = exit_status.do_leak_check();
+            throw_machine_stop!(TerminationInfo::Exit { code: exit_code, leak_check });
+        }
+
+        if matches!(exit_type, ExitType::ExitCalled) {
+            let thread_infos = self.exec_state.thread_id_manager.borrow();
+            let genmc_tid = thread_infos.get_genmc_tid(thread);
+
+            self.handle.borrow_mut().pin_mut().handle_thread_kill(genmc_tid);
+        } else {
+            assert_eq!(thread, ThreadId::MAIN_THREAD);
+        }
+        // We continue executing now, so we store the exit status.
+        self.exec_state.exit_status.set(Some(exit_status));
+        interp_ok(())
     }
 
     /**** Blocking instructions ****/
 
+    #[allow(unused)]
     pub(crate) fn handle_verifier_assume<'tcx>(
         &self,
         machine: &MiriMachine<'tcx>,
@@ -282,14 +745,171 @@ impl GenmcCtx {
     }
 }
 
-impl VisitProvenance for GenmcCtx {
-    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
-        // We don't have any tags.
+impl GenmcCtx {
+    /// Inform GenMC about a load (atomic or non-atomic).
+    /// Returns the value that GenMC wants this load to read.
+    fn handle_load<'tcx>(
+        &self,
+        machine: &MiriMachine<'tcx>,
+        address: Size,
+        size: Size,
+        memory_ordering: MemOrdering,
+        genmc_old_value: GenmcScalar,
+    ) -> InterpResult<'tcx, GenmcScalar> {
+        assert!(
+            size.bytes() != 0
+                && (memory_ordering == MemOrdering::NotAtomic || size.bytes().is_power_of_two())
+        );
+        if size.bytes() > MAX_ACCESS_SIZE {
+            throw_unsup_format!(
+                "GenMC mode currently does not support atomics larger than {MAX_ACCESS_SIZE} bytes.",
+            );
+        }
+        let thread_infos = self.exec_state.thread_id_manager.borrow();
+        let curr_thread_id = machine.threads.active_thread();
+        let genmc_tid = thread_infos.get_genmc_tid(curr_thread_id);
+
+        debug!(
+            "GenMC: load, thread: {curr_thread_id:?} ({genmc_tid:?}), address: {addr} == {addr:#x}, size: {size:?}, ordering: {memory_ordering:?}, old_value: {genmc_old_value:x?}",
+            addr = address.bytes()
+        );
+
+        let load_result = self.handle.borrow_mut().pin_mut().handle_load(
+            genmc_tid,
+            address.bytes(),
+            size.bytes(),
+            memory_ordering,
+            genmc_old_value,
+        );
+
+        if let Some(error) = load_result.error.as_ref() {
+            // FIXME(genmc): error handling
+            throw_ub_format!("{}", error.to_string_lossy());
+        }
+
+        if !load_result.has_value {
+            // FIXME(GenMC): Implementing certain GenMC optimizations will lead to this.
+            unimplemented!("GenMC: load returned no value.");
+        }
+
+        debug!("GenMC: load returned value: {:?}", load_result.read_value);
+        interp_ok(load_result.read_value)
     }
-}
 
-impl GenmcCtx {
-    fn handle_user_block<'tcx>(&self, machine: &MiriMachine<'tcx>) -> InterpResult<'tcx, ()> {
+    /// Inform GenMC about a store (atomic or non-atomic).
+    /// Returns true if the store is co-maximal, i.e., it should be written to Miri's memory too.
+    fn handle_store<'tcx>(
+        &self,
+        machine: &MiriMachine<'tcx>,
+        address: Size,
+        size: Size,
+        genmc_value: GenmcScalar,
+        genmc_old_value: GenmcScalar,
+        memory_ordering: MemOrdering,
+    ) -> InterpResult<'tcx, bool> {
+        assert!(
+            size.bytes() != 0
+                && (memory_ordering == MemOrdering::NotAtomic || size.bytes().is_power_of_two())
+        );
+        if size.bytes() > MAX_ACCESS_SIZE {
+            throw_unsup_format!(
+                "GenMC mode currently does not support atomics larger than {MAX_ACCESS_SIZE} bytes."
+            );
+        }
+        let thread_infos = self.exec_state.thread_id_manager.borrow();
+        let curr_thread_id = machine.threads.active_thread();
+        let genmc_tid = thread_infos.get_genmc_tid(curr_thread_id);
+
+        debug!(
+            "GenMC: store, thread: {curr_thread_id:?} ({genmc_tid:?}), address: {addr} = {addr:#x}, size: {size:?}, ordering {memory_ordering:?}, value: {genmc_value:?}",
+            addr = address.bytes()
+        );
+
+        let store_result = self.handle.borrow_mut().pin_mut().handle_store(
+            genmc_tid,
+            address.bytes(),
+            size.bytes(),
+            genmc_value,
+            genmc_old_value,
+            memory_ordering,
+        );
+
+        if let Some(error) = store_result.error.as_ref() {
+            // FIXME(genmc): error handling
+            throw_ub_format!("{}", error.to_string_lossy());
+        }
+
+        interp_ok(store_result.is_coherence_order_maximal_write)
+    }
+
+    /// Inform GenMC about an atomic read-modify-write operation.
+    /// This includes atomic swap (also often called "exchange"), but does *not*
+    /// include compare-exchange (see `RMWBinOp` for full list of operations).
+    /// Returns the previous value at that memory location, and optionally the value that should be written back to Miri's memory.
+    fn handle_atomic_rmw_op<'tcx>(
+        &self,
+        ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
+        address: Size,
+        size: Size,
+        ordering: AtomicRwOrd,
+        genmc_rmw_op: RMWBinOp,
+        genmc_rhs_scalar: GenmcScalar,
+        genmc_old_value: GenmcScalar,
+    ) -> InterpResult<'tcx, (Scalar, Option<Scalar>)> {
+        assert!(
+            !self.get_alloc_data_races(),
+            "atomic read-modify-write operation with data race checking disabled."
+        );
+        assert_ne!(0, size.bytes());
+        assert!(
+            size.bytes() <= MAX_ACCESS_SIZE,
+            "GenMC currently does not support atomic accesses larger than {} bytes (got {} bytes)",
+            MAX_ACCESS_SIZE,
+            size.bytes()
+        );
+
+        let curr_thread_id = ecx.machine.threads.active_thread();
+        let genmc_tid = self.exec_state.thread_id_manager.borrow().get_genmc_tid(curr_thread_id);
+        debug!(
+            "GenMC: atomic_rmw_op, thread: {curr_thread_id:?} ({genmc_tid:?}) (op: {genmc_rmw_op:?}, rhs value: {genmc_rhs_scalar:?}), address: {address:?}, size: {size:?}, ordering: {ordering:?}",
+        );
+        let rmw_result = self.handle.borrow_mut().pin_mut().handle_read_modify_write(
+            genmc_tid,
+            address.bytes(),
+            size.bytes(),
+            genmc_rmw_op,
+            ordering.to_genmc(),
+            genmc_rhs_scalar,
+            genmc_old_value,
+        );
+
+        if let Some(error) = rmw_result.error.as_ref() {
+            // FIXME(genmc): error handling
+            throw_ub_format!("{}", error.to_string_lossy());
+        }
+
+        let old_value_scalar = genmc_scalar_to_scalar(ecx, rmw_result.old_value, size)?;
+
+        let new_value_scalar = if rmw_result.is_coherence_order_maximal_write {
+            Some(genmc_scalar_to_scalar(ecx, rmw_result.new_value, size)?)
+        } else {
+            None
+        };
+        interp_ok((old_value_scalar, new_value_scalar))
+    }
+
+    /**** Blocking functionality ****/
+
+    /// Handle a user thread getting blocked.
+    /// This may happen due to an manual `assume` statement added by a user
+    /// or added by some automated program transformation, e.g., for spinloops.
+    fn handle_user_block<'tcx>(&self, _machine: &MiriMachine<'tcx>) -> InterpResult<'tcx> {
         todo!()
     }
 }
+
+impl VisitProvenance for GenmcCtx {
+    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
+        // We don't have any tags.
+    }
+}
diff --git a/src/tools/miri/src/concurrency/genmc/run.rs b/src/tools/miri/src/concurrency/genmc/run.rs
new file mode 100644
index 0000000000000..33c5b6b0a005f
--- /dev/null
+++ b/src/tools/miri/src/concurrency/genmc/run.rs
@@ -0,0 +1,166 @@
+use std::rc::Rc;
+use std::sync::Arc;
+use std::time::Instant;
+
+use genmc_sys::EstimationResult;
+use rustc_middle::ty::TyCtxt;
+
+use super::GlobalState;
+use crate::concurrency::genmc::ExecutionEndResult;
+use crate::rustc_const_eval::interpret::PointerArithmetic;
+use crate::{GenmcConfig, GenmcCtx, MiriConfig};
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub(super) enum GenmcMode {
+    Estimation,
+    Verification,
+}
+
+impl GenmcMode {
+    /// Return whether warnings on unsupported features should be printed in this mode.
+    fn print_unsupported_warnings(self) -> bool {
+        self == GenmcMode::Verification
+    }
+}
+
+/// Do a complete run of the program in GenMC mode.
+/// This will call `eval_entry` multiple times, until either:
+/// - An error is detected (indicated by a `None` return value)
+/// - All possible executions are explored.
+///
+/// Returns `None` is an error is detected, or `Some(return_value)` with the return value of the last run of the program.
+pub fn run_genmc_mode<'tcx>(
+    config: &MiriConfig,
+    eval_entry: impl Fn(Rc<GenmcCtx>) -> Option<i32>,
+    tcx: TyCtxt<'tcx>,
+) -> Option<i32> {
+    let genmc_config = config.genmc_config.as_ref().unwrap();
+    // Run in Estimation mode if requested.
+    if genmc_config.do_estimation {
+        eprintln!("Estimating GenMC verification time...");
+        run_genmc_mode_impl(config, &eval_entry, tcx, GenmcMode::Estimation)?;
+    }
+    // Run in Verification mode.
+    eprintln!("Running GenMC Verification...");
+    run_genmc_mode_impl(config, &eval_entry, tcx, GenmcMode::Verification)
+}
+
+fn run_genmc_mode_impl<'tcx>(
+    config: &MiriConfig,
+    eval_entry: &impl Fn(Rc<GenmcCtx>) -> Option<i32>,
+    tcx: TyCtxt<'tcx>,
+    mode: GenmcMode,
+) -> Option<i32> {
+    let time_start = Instant::now();
+    let genmc_config = config.genmc_config.as_ref().unwrap();
+
+    // There exists only one `global_state` per full run in GenMC mode.
+    // It is shared by all `GenmcCtx` in this run.
+    // FIXME(genmc): implement multithreading once GenMC supports it.
+    let global_state =
+        Arc::new(GlobalState::new(tcx.target_usize_max(), mode.print_unsupported_warnings()));
+    let genmc_ctx = Rc::new(GenmcCtx::new(config, global_state, mode));
+
+    // `rep` is used to report the progress, Miri will panic on wrap-around.
+    for rep in 0u64.. {
+        tracing::info!("Miri-GenMC loop {}", rep + 1);
+
+        // Prepare for the next execution and inform GenMC about it.
+        genmc_ctx.prepare_next_execution();
+
+        // Execute the program until completion to get the return value, or return if an error happens:
+        let Some(return_code) = eval_entry(genmc_ctx.clone()) else {
+            genmc_ctx.print_genmc_output(genmc_config, tcx);
+            return None;
+        };
+
+        // We inform GenMC that the execution is complete.
+        // If there was an error, we print it.
+        match genmc_ctx.handle_execution_end() {
+            ExecutionEndResult::Continue => continue,
+            ExecutionEndResult::Stop => {
+                let elapsed_time_sec = Instant::now().duration_since(time_start).as_secs_f64();
+                // Print the output for the current mode.
+                if mode == GenmcMode::Estimation {
+                    genmc_ctx.print_estimation_output(genmc_config, elapsed_time_sec);
+                } else {
+                    genmc_ctx.print_verification_output(genmc_config, elapsed_time_sec);
+                }
+                // Return the return code of the last execution.
+                return Some(return_code);
+            }
+            ExecutionEndResult::Error(error) => {
+                // This can be reached for errors that affect the entire execution, not just a specific event.
+                // For instance, linearizability checking and liveness checking report their errors this way.
+                // Neither are supported by Miri-GenMC at the moment though. However, GenMC also
+                // treats races on deallocation as global errors, so this code path is still reachable.
+                // Since we don't have any span information for the error at this point,
+                // we just print GenMC's error string, and the full GenMC output if requested.
+                eprintln!("(GenMC) Error detected: {error}");
+                genmc_ctx.print_genmc_output(genmc_config, tcx);
+                return None;
+            }
+        }
+    }
+    unreachable!()
+}
+
+impl GenmcCtx {
+    /// Print the full output message produced by GenMC if requested, or a hint on how to enable it.
+    ///
+    /// This message can be very verbose and is likely not useful for the average user.
+    /// This function should be called *after* Miri has printed all of its output.
+    fn print_genmc_output(&self, genmc_config: &GenmcConfig, tcx: TyCtxt<'_>) {
+        if genmc_config.print_genmc_output {
+            eprintln!("GenMC error report:");
+            eprintln!("{}", self.get_result_message());
+        } else {
+            tcx.dcx().note(
+                "add `-Zmiri-genmc-print-genmc-output` to MIRIFLAGS to see the detailed GenMC error report"
+            );
+        }
+    }
+
+    /// Given the time taken for the estimation mode run, print the expected time range for verification.
+    /// Verbose output also includes information about the expected number of executions and how many estimation rounds were explored or got blocked.
+    fn print_estimation_output(&self, genmc_config: &GenmcConfig, elapsed_time_sec: f64) {
+        let EstimationResult { mean, sd, explored_execs, blocked_execs } =
+            self.get_estimation_results();
+        #[allow(clippy::as_conversions)]
+        let time_per_exec_sec = elapsed_time_sec / (explored_execs as f64 + blocked_execs as f64);
+        let estimated_mean_sec = time_per_exec_sec * mean;
+        let estimated_sd_sec = time_per_exec_sec * sd;
+
+        if genmc_config.verbose_output {
+            eprintln!("Finished estimation in {elapsed_time_sec:.2?}s");
+            if blocked_execs != 0 {
+                eprintln!("  Explored executions: {explored_execs}");
+                eprintln!("  Blocked  executions: {blocked_execs}");
+            }
+            eprintln!("Expected number of executions: {mean:.0} ± {sd:.0}");
+        }
+        // The estimation can be out-of-bounds of an `f64`.
+        if !(mean.is_finite() && mean >= 0.0 && sd.is_finite() && sd >= 0.0) {
+            eprintln!("WARNING: Estimation gave weird results, there may have been an overflow.");
+        }
+        eprintln!("Expected verification time: {estimated_mean_sec:.2}s ± {estimated_sd_sec:.2}s");
+    }
+
+    /// Given the time taken for the verification mode run, print the expected time range for verification.
+    /// Verbose output also includes information about the expected number of executions and how many estimation rounds were explored or got blocked.
+    fn print_verification_output(&self, genmc_config: &GenmcConfig, elapsed_time_sec: f64) {
+        let explored_execution_count = self.get_explored_execution_count();
+        let blocked_execution_count = self.get_blocked_execution_count();
+        eprintln!(
+            "Verification complete with {} executions. No errors found.",
+            explored_execution_count + blocked_execution_count
+        );
+        if genmc_config.verbose_output {
+            if blocked_execution_count > 0 {
+                eprintln!("Number of complete executions explored: {explored_execution_count}");
+                eprintln!("Number of blocked executions seen: {blocked_execution_count}");
+            }
+            eprintln!("Verification took {elapsed_time_sec:.2?}s.");
+        }
+    }
+}
diff --git a/src/tools/miri/src/concurrency/genmc/scheduling.rs b/src/tools/miri/src/concurrency/genmc/scheduling.rs
new file mode 100644
index 0000000000000..b5c23f2d0845f
--- /dev/null
+++ b/src/tools/miri/src/concurrency/genmc/scheduling.rs
@@ -0,0 +1,69 @@
+use genmc_sys::{ActionKind, ExecutionState};
+
+use super::GenmcCtx;
+use crate::{
+    InterpCx, InterpResult, MiriMachine, TerminationInfo, ThreadId, interp_ok, throw_machine_stop,
+};
+
+impl GenmcCtx {
+    pub(crate) fn schedule_thread<'tcx>(
+        &self,
+        ecx: &InterpCx<'tcx, MiriMachine<'tcx>>,
+    ) -> InterpResult<'tcx, ThreadId> {
+        let thread_manager = &ecx.machine.threads;
+        let active_thread_id = thread_manager.active_thread();
+
+        // Determine whether the next instruction in the current thread might be a load.
+        // This is used for the "writes-first" scheduling in GenMC.
+        // Scheduling writes before reads can be beneficial for verification performance.
+        // `Load` is a safe default for the next instruction type if we cannot guarantee that it isn't a load.
+        let curr_thread_next_instr_kind = if !thread_manager.active_thread_ref().is_enabled() {
+            // The current thread can get blocked (e.g., due to a thread join, `Mutex::lock`, assume statement, ...), then we need to ask GenMC for another thread to schedule.
+            // Most to all blocking operations have load semantics, since they wait on something to change in another thread,
+            // e.g., a thread join waiting on another thread to finish (join loads the return value(s) of the other thread),
+            // or a thread waiting for another thread to unlock a `Mutex`, which loads the mutex state (Locked, Unlocked).
+            ActionKind::Load
+        } else {
+            // This thread is still enabled. If it executes a terminator next, we consider yielding,
+            // but in all other cases we just keep running this thread since it never makes sense
+            // to yield before a non-atomic operation.
+            let Some(frame) = thread_manager.active_thread_stack().last() else {
+                return interp_ok(active_thread_id);
+            };
+            let either::Either::Left(loc) = frame.current_loc() else {
+                // We are unwinding, so the next step is definitely not atomic.
+                return interp_ok(active_thread_id);
+            };
+            let basic_block = &frame.body().basic_blocks[loc.block];
+            if let Some(_statement) = basic_block.statements.get(loc.statement_index) {
+                // Statements can't be atomic.
+                return interp_ok(active_thread_id);
+            }
+
+            // FIXME(genmc): determine terminator kind.
+            ActionKind::Load
+        };
+
+        let thread_infos = self.exec_state.thread_id_manager.borrow();
+        let genmc_tid = thread_infos.get_genmc_tid(active_thread_id);
+
+        let mut mc = self.handle.borrow_mut();
+        let pinned_mc = mc.as_mut().unwrap();
+        let result = pinned_mc.schedule_next(genmc_tid, curr_thread_next_instr_kind);
+        // Depending on the exec_state, we either schedule the given thread, or we are finished with this execution.
+        match result.exec_state {
+            ExecutionState::Ok => interp_ok(thread_infos.get_miri_tid(result.next_thread)),
+            ExecutionState::Blocked => throw_machine_stop!(TerminationInfo::GenmcBlockedExecution),
+            ExecutionState::Finished => {
+                let exit_status = self.exec_state.exit_status.get().expect(
+                    "If the execution is finished, we should have a return value from the program.",
+                );
+                throw_machine_stop!(TerminationInfo::Exit {
+                    code: exit_status.exit_code,
+                    leak_check: exit_status.do_leak_check()
+                });
+            }
+            _ => unreachable!(),
+        }
+    }
+}
diff --git a/src/tools/miri/src/concurrency/genmc/thread_id_map.rs b/src/tools/miri/src/concurrency/genmc/thread_id_map.rs
new file mode 100644
index 0000000000000..9676496fe7fea
--- /dev/null
+++ b/src/tools/miri/src/concurrency/genmc/thread_id_map.rs
@@ -0,0 +1,63 @@
+use genmc_sys::GENMC_MAIN_THREAD_ID;
+use rustc_data_structures::fx::FxHashMap;
+
+use crate::ThreadId;
+
+#[derive(Debug)]
+pub struct ThreadIdMap {
+    /// Map from Miri thread IDs to GenMC thread IDs.
+    /// We assume as little as possible about Miri thread IDs, so we use a map.
+    miri_to_genmc: FxHashMap<ThreadId, i32>,
+    /// Map from GenMC thread IDs to Miri thread IDs.
+    /// We control which thread IDs are used, so we choose them in as an incrementing counter.
+    genmc_to_miri: Vec<ThreadId>, // FIXME(genmc): check if this assumption is (and will stay) correct.
+}
+
+impl Default for ThreadIdMap {
+    fn default() -> Self {
+        let miri_to_genmc = [(ThreadId::MAIN_THREAD, GENMC_MAIN_THREAD_ID)].into_iter().collect();
+        let genmc_to_miri = vec![ThreadId::MAIN_THREAD];
+        Self { miri_to_genmc, genmc_to_miri }
+    }
+}
+
+impl ThreadIdMap {
+    pub fn reset(&mut self) {
+        self.miri_to_genmc.clear();
+        self.miri_to_genmc.insert(ThreadId::MAIN_THREAD, GENMC_MAIN_THREAD_ID);
+        self.genmc_to_miri.clear();
+        self.genmc_to_miri.push(ThreadId::MAIN_THREAD);
+    }
+
+    #[must_use]
+    /// Add a new Miri thread to the mapping and dispense a new thread ID for GenMC to use.
+    pub fn add_thread(&mut self, thread_id: ThreadId) -> i32 {
+        // NOTE: We select the new thread ids as integers incremented by one (we use the length as the counter).
+        let next_thread_id = self.genmc_to_miri.len();
+        let genmc_tid = next_thread_id.try_into().unwrap();
+        // If there is already an entry, we override it.
+        // This could happen if Miri were to reuse `ThreadId`s, but we assume that if this happens, the previous thread with that id doesn't exist anymore.
+        self.miri_to_genmc.insert(thread_id, genmc_tid);
+        self.genmc_to_miri.push(thread_id);
+
+        genmc_tid
+    }
+
+    #[must_use]
+    /// Try to get the GenMC thread ID corresponding to a given Miri `ThreadId`.
+    /// Panics if there is no mapping for the given `ThreadId`.
+    pub fn get_genmc_tid(&self, thread_id: ThreadId) -> i32 {
+        *self.miri_to_genmc.get(&thread_id).unwrap()
+    }
+
+    #[must_use]
+    /// Get the Miri `ThreadId` corresponding to a given GenMC thread id.
+    /// Panics if the given thread id isn't valid.
+    pub fn get_miri_tid(&self, genmc_tid: i32) -> ThreadId {
+        let index: usize = genmc_tid.try_into().unwrap();
+        self.genmc_to_miri
+            .get(index)
+            .copied()
+            .expect("A thread id returned from GenMC should exist.")
+    }
+}
diff --git a/src/tools/miri/src/concurrency/init_once.rs b/src/tools/miri/src/concurrency/init_once.rs
index 165215f9270ce..daea20b3779b2 100644
--- a/src/tools/miri/src/concurrency/init_once.rs
+++ b/src/tools/miri/src/concurrency/init_once.rs
@@ -96,10 +96,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         init_once.status = InitOnceStatus::Complete;
 
         // Each complete happens-before the end of the wait
-        if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-            data_race
-                .release_clock(&this.machine.threads, |clock| init_once.clock.clone_from(clock));
-        }
+        this.release_clock(|clock| init_once.clock.clone_from(clock))?;
 
         // Wake up everyone.
         // need to take the queue to avoid having `this` be borrowed multiple times
@@ -125,10 +122,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         init_once.status = InitOnceStatus::Uninitialized;
 
         // Each complete happens-before the end of the wait
-        if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-            data_race
-                .release_clock(&this.machine.threads, |clock| init_once.clock.clone_from(clock));
-        }
+        this.release_clock(|clock| init_once.clock.clone_from(clock))?;
 
         // Wake up one waiting thread, so they can go ahead and try to init this.
         if let Some(waiter) = init_once.waiters.pop_front() {
@@ -142,7 +136,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
     /// Synchronize with the previous completion of an InitOnce.
     /// Must only be called after checking that it is complete.
     #[inline]
-    fn init_once_observe_completed(&mut self, init_once_ref: &InitOnceRef) {
+    fn init_once_observe_completed(&mut self, init_once_ref: &InitOnceRef) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let init_once = init_once_ref.0.borrow();
 
@@ -152,6 +146,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             "observing the completion of incomplete init once"
         );
 
-        this.acquire_clock(&init_once.clock);
+        this.acquire_clock(&init_once.clock)
     }
 }
diff --git a/src/tools/miri/src/concurrency/mod.rs b/src/tools/miri/src/concurrency/mod.rs
index 435615efd9fa7..9dae858592f1f 100644
--- a/src/tools/miri/src/concurrency/mod.rs
+++ b/src/tools/miri/src/concurrency/mod.rs
@@ -22,5 +22,5 @@ pub mod weak_memory;
 mod genmc;
 
 pub use self::data_race_handler::{AllocDataRaceHandler, GlobalDataRaceHandler};
-pub use self::genmc::{GenmcConfig, GenmcCtx};
+pub use self::genmc::{ExitType, GenmcConfig, GenmcCtx, run_genmc_mode};
 pub use self::vector_clock::VClock;
diff --git a/src/tools/miri/src/concurrency/sync.rs b/src/tools/miri/src/concurrency/sync.rs
index 179094db0febd..15d486c27e36a 100644
--- a/src/tools/miri/src/concurrency/sync.rs
+++ b/src/tools/miri/src/concurrency/sync.rs
@@ -204,7 +204,7 @@ pub(super) trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
             this.mutex_enqueue_and_block(mutex_ref, Some((retval, dest)));
         } else {
             // We can have it right now!
-            this.mutex_lock(&mutex_ref);
+            this.mutex_lock(&mutex_ref)?;
             // Don't forget to write the return value.
             this.write_scalar(retval, &dest)?;
         }
@@ -338,7 +338,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
     }
 
     /// Lock by setting the mutex owner and increasing the lock count.
-    fn mutex_lock(&mut self, mutex_ref: &MutexRef) {
+    fn mutex_lock(&mut self, mutex_ref: &MutexRef) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let thread = this.active_thread();
         let mut mutex = mutex_ref.0.borrow_mut();
@@ -352,9 +352,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             mutex.owner = Some(thread);
         }
         mutex.lock_count = mutex.lock_count.strict_add(1);
-        if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-            data_race.acquire_clock(&mutex.clock, &this.machine.threads);
-        }
+        this.acquire_clock(&mutex.clock)?;
+        interp_ok(())
     }
 
     /// Try unlocking by decreasing the lock count and returning the old lock
@@ -377,11 +376,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // The mutex is completely unlocked. Try transferring ownership
                 // to another thread.
 
-                if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-                    data_race.release_clock(&this.machine.threads, |clock| {
-                        mutex.clock.clone_from(clock)
-                    });
-                }
+                this.release_clock(|clock| mutex.clock.clone_from(clock))?;
                 let thread_id = mutex.queue.pop_front();
                 // We need to drop our mutex borrow before unblock_thread
                 // because it will be borrowed again in the unblock callback.
@@ -425,7 +420,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     assert_eq!(unblock, UnblockKind::Ready);
 
                     assert!(mutex_ref.owner().is_none());
-                    this.mutex_lock(&mutex_ref);
+                    this.mutex_lock(&mutex_ref)?;
 
                     if let Some((retval, dest)) = retval_dest {
                         this.write_scalar(retval, &dest)?;
@@ -439,7 +434,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
     /// Read-lock the lock by adding the `reader` the list of threads that own
     /// this lock.
-    fn rwlock_reader_lock(&mut self, rwlock_ref: &RwLockRef) {
+    fn rwlock_reader_lock(&mut self, rwlock_ref: &RwLockRef) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let thread = this.active_thread();
         trace!("rwlock_reader_lock: now also held (one more time) by {:?}", thread);
@@ -447,9 +442,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         assert!(!rwlock.is_write_locked(), "the lock is write locked");
         let count = rwlock.readers.entry(thread).or_insert(0);
         *count = count.strict_add(1);
-        if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-            data_race.acquire_clock(&rwlock.clock_unlocked, &this.machine.threads);
-        }
+        this.acquire_clock(&rwlock.clock_unlocked)?;
+        interp_ok(())
     }
 
     /// Try read-unlock the lock for the current threads and potentially give the lock to a new owner.
@@ -472,12 +466,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             }
             Entry::Vacant(_) => return interp_ok(false), // we did not even own this lock
         }
-        if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-            // Add this to the shared-release clock of all concurrent readers.
-            data_race.release_clock(&this.machine.threads, |clock| {
-                rwlock.clock_current_readers.join(clock)
-            });
-        }
+        // Add this to the shared-release clock of all concurrent readers.
+        this.release_clock(|clock| rwlock.clock_current_readers.join(clock))?;
 
         // The thread was a reader. If the lock is not held any more, give it to a writer.
         if rwlock.is_locked().not() {
@@ -521,7 +511,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 }
                 |this, unblock: UnblockKind| {
                     assert_eq!(unblock, UnblockKind::Ready);
-                    this.rwlock_reader_lock(&rwlock_ref);
+                    this.rwlock_reader_lock(&rwlock_ref)?;
                     this.write_scalar(retval, &dest)?;
                     interp_ok(())
                 }
@@ -531,7 +521,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
     /// Lock by setting the writer that owns the lock.
     #[inline]
-    fn rwlock_writer_lock(&mut self, rwlock_ref: &RwLockRef) {
+    fn rwlock_writer_lock(&mut self, rwlock_ref: &RwLockRef) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let thread = this.active_thread();
         trace!("rwlock_writer_lock: now held by {:?}", thread);
@@ -539,9 +529,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let mut rwlock = rwlock_ref.0.borrow_mut();
         assert!(!rwlock.is_locked(), "the rwlock is already locked");
         rwlock.writer = Some(thread);
-        if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-            data_race.acquire_clock(&rwlock.clock_unlocked, &this.machine.threads);
-        }
+        this.acquire_clock(&rwlock.clock_unlocked)?;
+        interp_ok(())
     }
 
     /// Try to unlock an rwlock held by the current thread.
@@ -559,11 +548,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             rwlock.writer = None;
             trace!("rwlock_writer_unlock: unlocked by {:?}", thread);
             // Record release clock for next lock holder.
-            if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-                data_race.release_clock(&this.machine.threads, |clock| {
-                    rwlock.clock_unlocked.clone_from(clock)
-                });
-            }
+            this.release_clock(|clock| rwlock.clock_unlocked.clone_from(clock))?;
 
             // The thread was a writer.
             //
@@ -613,7 +598,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 }
                 |this, unblock: UnblockKind| {
                     assert_eq!(unblock, UnblockKind::Ready);
-                    this.rwlock_writer_lock(&rwlock_ref);
+                    this.rwlock_writer_lock(&rwlock_ref)?;
                     this.write_scalar(retval, &dest)?;
                     interp_ok(())
                 }
@@ -663,12 +648,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     match unblock {
                         UnblockKind::Ready => {
                             // The condvar was signaled. Make sure we get the clock for that.
-                            if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-                                data_race.acquire_clock(
+                            this.acquire_clock(
                                     &condvar_ref.0.borrow().clock,
-                                    &this.machine.threads,
-                                );
-                            }
+                                )?;
                             // Try to acquire the mutex.
                             // The timeout only applies to the first wait (until the signal), not for mutex acquisition.
                             this.condvar_reacquire_mutex(mutex_ref, retval_succ, dest)
@@ -695,9 +677,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let mut condvar = condvar_ref.0.borrow_mut();
 
         // Each condvar signal happens-before the end of the condvar wake
-        if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-            data_race.release_clock(&this.machine.threads, |clock| condvar.clock.clone_from(clock));
-        }
+        this.release_clock(|clock| condvar.clock.clone_from(clock))?;
         let Some(waiter) = condvar.waiters.pop_front() else {
             return interp_ok(false);
         };
@@ -736,9 +716,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                         UnblockKind::Ready => {
                             let futex = futex_ref.0.borrow();
                             // Acquire the clock of the futex.
-                            if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-                                data_race.acquire_clock(&futex.clock, &this.machine.threads);
-                            }
+                            this.acquire_clock(&futex.clock)?;
                         },
                         UnblockKind::TimedOut => {
                             // Remove the waiter from the futex.
@@ -766,9 +744,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let mut futex = futex_ref.0.borrow_mut();
 
         // Each futex-wake happens-before the end of the futex wait
-        if let Some(data_race) = this.machine.data_race.as_vclocks_ref() {
-            data_race.release_clock(&this.machine.threads, |clock| futex.clock.clone_from(clock));
-        }
+        this.release_clock(|clock| futex.clock.clone_from(clock))?;
 
         // Remove `count` of the threads in the queue that match any of the bits in the bitset.
         // We collect all of them before unblocking because the unblock callback may access the
diff --git a/src/tools/miri/src/concurrency/thread.rs b/src/tools/miri/src/concurrency/thread.rs
index fe1ef86ccd31e..00c5e337b1e9e 100644
--- a/src/tools/miri/src/concurrency/thread.rs
+++ b/src/tools/miri/src/concurrency/thread.rs
@@ -209,6 +209,11 @@ impl<'tcx> Thread<'tcx> {
         self.thread_name.as_deref()
     }
 
+    /// Return whether this thread is enabled or not.
+    pub fn is_enabled(&self) -> bool {
+        self.state.is_enabled()
+    }
+
     /// Get the name of the current thread for display purposes; will include thread ID if not set.
     fn thread_display_name(&self, id: ThreadId) -> String {
         if let Some(ref thread_name) = self.thread_name {
@@ -404,6 +409,7 @@ pub struct ThreadManager<'tcx> {
     /// A mapping from a thread-local static to the thread specific allocation.
     thread_local_allocs: FxHashMap<(DefId, ThreadId), StrictPointer>,
     /// A flag that indicates that we should change the active thread.
+    /// Completely ignored in GenMC mode.
     yield_active_thread: bool,
     /// A flag that indicates that we should do round robin scheduling of threads else randomized scheduling is used.
     fixed_scheduling: bool,
@@ -676,13 +682,6 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
     #[inline]
     fn run_on_stack_empty(&mut self) -> InterpResult<'tcx, Poll<()>> {
         let this = self.eval_context_mut();
-        // Inform GenMC that a thread has finished all user code. GenMC needs to know this for scheduling.
-        // FIXME(GenMC): Thread-local destructors *are* user code, so this is odd. Also now that we
-        // support pre-main constructors, it can get called there as well.
-        if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
-            let thread_id = this.active_thread();
-            genmc_ctx.handle_thread_stack_empty(thread_id);
-        }
         let mut callback = this
             .active_thread_mut()
             .on_stack_empty
@@ -703,19 +702,19 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
     /// If GenMC mode is active, the scheduling is instead handled by GenMC.
     fn schedule(&mut self) -> InterpResult<'tcx, SchedulingAction> {
         let this = self.eval_context_mut();
-        // In GenMC mode, we let GenMC do the scheduling
+
+        // In GenMC mode, we let GenMC do the scheduling.
         if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
             let next_thread_id = genmc_ctx.schedule_thread(this)?;
 
             let thread_manager = &mut this.machine.threads;
             thread_manager.active_thread = next_thread_id;
-            thread_manager.yield_active_thread = false;
 
             assert!(thread_manager.threads[thread_manager.active_thread].state.is_enabled());
             return interp_ok(SchedulingAction::ExecuteStep);
         }
 
-        // We are not in GenMC mode, so we control the schedule
+        // We are not in GenMC mode, so we control the scheduling.
         let thread_manager = &mut this.machine.threads;
         let clock = &this.machine.monotonic_clock;
         let rng = this.machine.rng.get_mut();
@@ -863,7 +862,12 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             GlobalDataRaceHandler::Vclocks(data_race) =>
                 data_race.thread_created(&this.machine.threads, new_thread_id, current_span),
             GlobalDataRaceHandler::Genmc(genmc_ctx) =>
-                genmc_ctx.handle_thread_create(&this.machine.threads, new_thread_id)?,
+                genmc_ctx.handle_thread_create(
+                    &this.machine.threads,
+                    start_routine,
+                    &func_arg,
+                    new_thread_id,
+                )?,
         }
         // Write the current thread-id, switch to the next thread later
         // to treat this write operation as occurring on the current thread.
@@ -916,13 +920,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let thread = this.active_thread_mut();
         assert!(thread.stack.is_empty(), "only threads with an empty stack can be terminated");
         thread.state = ThreadState::Terminated;
-        match &mut this.machine.data_race {
-            GlobalDataRaceHandler::None => {}
-            GlobalDataRaceHandler::Vclocks(data_race) =>
-                data_race.thread_terminated(&this.machine.threads),
-            GlobalDataRaceHandler::Genmc(genmc_ctx) =>
-                genmc_ctx.handle_thread_finish(&this.machine.threads)?,
-        }
+
         // Deallocate TLS.
         let gone_thread = this.active_thread();
         {
@@ -953,6 +951,18 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 }
             }
         }
+
+        match &mut this.machine.data_race {
+            GlobalDataRaceHandler::None => {}
+            GlobalDataRaceHandler::Vclocks(data_race) =>
+                data_race.thread_terminated(&this.machine.threads),
+            GlobalDataRaceHandler::Genmc(genmc_ctx) => {
+                // Inform GenMC that the thread finished.
+                // This needs to happen once all accesses to the thread are done, including freeing any TLS statics.
+                genmc_ctx.handle_thread_finish(&this.machine.threads)
+            }
+        }
+
         // Unblock joining threads.
         let unblock_reason = BlockReason::Join(gone_thread);
         let threads = &this.machine.threads.threads;
@@ -978,6 +988,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         callback: DynUnblockCallback<'tcx>,
     ) {
         let this = self.eval_context_mut();
+        if timeout.is_some() && this.machine.data_race.as_genmc_ref().is_some() {
+            panic!("Unimplemented: Timeouts not yet supported in GenMC mode.");
+        }
         let timeout = timeout.map(|(clock, anchor, duration)| {
             let anchor = match clock {
                 TimeoutClock::RealTime => {
@@ -1076,6 +1089,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 "{:?} blocked on {:?} when trying to join",
                 thread_mgr.active_thread, joined_thread_id
             );
+            if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
+                genmc_ctx.handle_thread_join(thread_mgr.active_thread, joined_thread_id)?;
+            }
+
             // The joined thread is still running, we need to wait for it.
             // Once we get unblocked, perform the appropriate synchronization and write the return value.
             let dest = return_dest.clone();
diff --git a/src/tools/miri/src/concurrency/weak_memory.rs b/src/tools/miri/src/concurrency/weak_memory.rs
index a752ef7e45480..6ebade6a568a2 100644
--- a/src/tools/miri/src/concurrency/weak_memory.rs
+++ b/src/tools/miri/src/concurrency/weak_memory.rs
@@ -62,8 +62,11 @@
 //! You can refer to test cases in weak_memory/extra_cpp.rs and weak_memory/extra_cpp_unsafe.rs for examples of these operations.
 
 // Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
-// 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
-// but this is not used anywhere so it's omitted here.
+// 1. In the operational semantics, loads acquire the vector clock of the atomic location
+// irrespective of which store buffer element is loaded. That's incorrect; the synchronization clock
+// needs to be tracked per-store-buffer-element. (The paper has a field "clocks" for that purpose,
+// but it is not actuallt used.) tsan11 does this correctly
+// (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.cc#L305).
 //
 // 2. In the operational semantics, each store element keeps the timestamp of a thread when it loads from the store.
 // If the same thread loads from the same store element multiple times, then the timestamps at all loads are saved in a list of load elements.
@@ -138,16 +141,17 @@ enum LoadRecency {
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 struct StoreElement {
-    /// The identifier of the vector index, corresponding to a thread
-    /// that performed the store.
-    store_index: VectorIdx,
+    /// The thread that performed the store.
+    store_thread: VectorIdx,
+    /// The timestamp of the storing thread when it performed the store
+    store_timestamp: VTimestamp,
+
+    /// The vector clock that can be acquired by loading this store.
+    sync_clock: VClock,
 
     /// Whether this store is SC.
     is_seqcst: bool,
 
-    /// The timestamp of the storing thread when it performed the store
-    timestamp: VTimestamp,
-
     /// The value of this store. `None` means uninitialized.
     // FIXME: Currently, we cannot represent partial initialization.
     val: Option<Scalar>,
@@ -159,9 +163,12 @@ struct StoreElement {
 
 #[derive(Debug, Clone, PartialEq, Eq, Default)]
 struct LoadInfo {
-    /// Timestamp of first loads from this store element by each thread
+    /// Timestamp of first loads from this store element by each thread.
     timestamps: FxHashMap<VectorIdx, VTimestamp>,
-    /// Whether this store element has been read by an SC load
+    /// Whether this store element has been read by an SC load.
+    /// This is crucial to ensure we respect coherence-ordered-before. Concretely we use
+    /// this to ensure that if a store element is seen by an SC load, then all later SC loads
+    /// cannot see `mo`-earlier store elements.
     sc_loaded: bool,
 }
 
@@ -243,8 +250,10 @@ impl<'tcx> StoreBuffer {
         let store_elem = StoreElement {
             // The thread index and timestamp of the initialisation write
             // are never meaningfully used, so it's fine to leave them as 0
-            store_index: VectorIdx::from(0),
-            timestamp: VTimestamp::ZERO,
+            store_thread: VectorIdx::from(0),
+            store_timestamp: VTimestamp::ZERO,
+            // The initialization write is non-atomic so nothing can be acquired.
+            sync_clock: VClock::default(),
             val: init,
             is_seqcst: false,
             load_info: RefCell::new(LoadInfo::default()),
@@ -273,7 +282,7 @@ impl<'tcx> StoreBuffer {
         thread_mgr: &ThreadManager<'_>,
         is_seqcst: bool,
         rng: &mut (impl rand::Rng + ?Sized),
-        validate: impl FnOnce() -> InterpResult<'tcx>,
+        validate: impl FnOnce(Option<&VClock>) -> InterpResult<'tcx>,
     ) -> InterpResult<'tcx, (Option<Scalar>, LoadRecency)> {
         // Having a live borrow to store_buffer while calling validate_atomic_load is fine
         // because the race detector doesn't touch store_buffer
@@ -290,7 +299,7 @@ impl<'tcx> StoreBuffer {
         // after we've picked a store element from the store buffer, as presented
         // in ATOMIC LOAD rule of the paper. This is because fetch_store
         // requires access to ThreadClockSet.clock, which is updated by the race detector
-        validate()?;
+        validate(Some(&store_elem.sync_clock))?;
 
         let (index, clocks) = global.active_thread_state(thread_mgr);
         let loaded = store_elem.load_impl(index, &clocks, is_seqcst);
@@ -303,10 +312,11 @@ impl<'tcx> StoreBuffer {
         global: &DataRaceState,
         thread_mgr: &ThreadManager<'_>,
         is_seqcst: bool,
+        sync_clock: VClock,
     ) -> InterpResult<'tcx> {
         let (index, clocks) = global.active_thread_state(thread_mgr);
 
-        self.store_impl(val, index, &clocks.clock, is_seqcst);
+        self.store_impl(val, index, &clocks.clock, is_seqcst, sync_clock);
         interp_ok(())
     }
 
@@ -333,7 +343,9 @@ impl<'tcx> StoreBuffer {
                     return false;
                 }
 
-                keep_searching = if store_elem.timestamp <= clocks.clock[store_elem.store_index] {
+                keep_searching = if store_elem.store_timestamp
+                    <= clocks.clock[store_elem.store_thread]
+                {
                     // CoWR: if a store happens-before the current load,
                     // then we can't read-from anything earlier in modification order.
                     // C++20 §6.9.2.2 [intro.races] paragraph 18
@@ -345,7 +357,7 @@ impl<'tcx> StoreBuffer {
                     // then we cannot read-from anything earlier in modification order.
                     // C++20 §6.9.2.2 [intro.races] paragraph 16
                     false
-                } else if store_elem.timestamp <= clocks.write_seqcst[store_elem.store_index]
+                } else if store_elem.store_timestamp <= clocks.write_seqcst[store_elem.store_thread]
                     && store_elem.is_seqcst
                 {
                     // The current non-SC load, which may be sequenced-after an SC fence,
@@ -353,7 +365,7 @@ impl<'tcx> StoreBuffer {
                     // C++17 §32.4 [atomics.order] paragraph 4
                     false
                 } else if is_seqcst
-                    && store_elem.timestamp <= clocks.read_seqcst[store_elem.store_index]
+                    && store_elem.store_timestamp <= clocks.read_seqcst[store_elem.store_thread]
                 {
                     // The current SC load cannot read-before the last store sequenced-before
                     // the last SC fence.
@@ -391,17 +403,19 @@ impl<'tcx> StoreBuffer {
         }
     }
 
-    /// ATOMIC STORE IMPL in the paper (except we don't need the location's vector clock)
+    /// ATOMIC STORE IMPL in the paper
     fn store_impl(
         &mut self,
         val: Scalar,
         index: VectorIdx,
         thread_clock: &VClock,
         is_seqcst: bool,
+        sync_clock: VClock,
     ) {
         let store_elem = StoreElement {
-            store_index: index,
-            timestamp: thread_clock[index],
+            store_thread: index,
+            store_timestamp: thread_clock[index],
+            sync_clock,
             // In the language provided in the paper, an atomic store takes the value from a
             // non-atomic memory location.
             // But we already have the immediate value here so we don't need to do the memory
@@ -419,7 +433,7 @@ impl<'tcx> StoreBuffer {
             // so that in a later SC load, only the last SC store (i.e. this one) or stores that
             // aren't ordered by hb with the last SC is picked.
             self.buffer.iter_mut().rev().for_each(|elem| {
-                if elem.timestamp <= thread_clock[elem.store_index] {
+                if elem.store_timestamp <= thread_clock[elem.store_thread] {
                     elem.is_seqcst = true;
                 }
             })
@@ -462,7 +476,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr(), 0)?;
         if let (
             crate::AllocExtra {
-                data_race: AllocDataRaceHandler::Vclocks(_, Some(alloc_buffers)),
+                data_race: AllocDataRaceHandler::Vclocks(data_race_clocks, Some(alloc_buffers)),
                 ..
             },
             crate::MiriMachine {
@@ -475,19 +489,29 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 global.sc_write(threads);
             }
             let range = alloc_range(base_offset, place.layout.size);
+            let sync_clock = data_race_clocks.sync_clock(range);
             let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, Some(init))?;
+            // The RMW always reads from the most recent store.
             buffer.read_from_last_store(global, threads, atomic == AtomicRwOrd::SeqCst);
-            buffer.buffered_write(new_val, global, threads, atomic == AtomicRwOrd::SeqCst)?;
+            buffer.buffered_write(
+                new_val,
+                global,
+                threads,
+                atomic == AtomicRwOrd::SeqCst,
+                sync_clock,
+            )?;
         }
         interp_ok(())
     }
 
+    /// The argument to `validate` is the synchronization clock of the memory that is being read,
+    /// if we are reading from a store buffer element.
     fn buffered_atomic_read(
         &self,
         place: &MPlaceTy<'tcx>,
         atomic: AtomicReadOrd,
         latest_in_mo: Scalar,
-        validate: impl FnOnce() -> InterpResult<'tcx>,
+        validate: impl FnOnce(Option<&VClock>) -> InterpResult<'tcx>,
     ) -> InterpResult<'tcx, Option<Scalar>> {
         let this = self.eval_context_ref();
         'fallback: {
@@ -525,7 +549,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         }
 
         // Race detector or weak memory disabled, simply read the latest value
-        validate()?;
+        validate(None)?;
         interp_ok(Some(latest_in_mo))
     }
 
@@ -533,6 +557,8 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
     ///
     /// `init` says with which value to initialize the store buffer in case there wasn't a store
     /// buffer for this memory range before.
+    ///
+    /// Must be called *after* `validate_atomic_store` to ensure that `sync_clock` is up-to-date.
     fn buffered_atomic_write(
         &mut self,
         val: Scalar,
@@ -544,7 +570,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr(), 0)?;
         if let (
             crate::AllocExtra {
-                data_race: AllocDataRaceHandler::Vclocks(_, Some(alloc_buffers)),
+                data_race: AllocDataRaceHandler::Vclocks(data_race_clocks, Some(alloc_buffers)),
                 ..
             },
             crate::MiriMachine {
@@ -556,9 +582,18 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 global.sc_write(threads);
             }
 
-            let buffer = alloc_buffers
-                .get_or_create_store_buffer_mut(alloc_range(base_offset, dest.layout.size), init)?;
-            buffer.buffered_write(val, global, threads, atomic == AtomicWriteOrd::SeqCst)?;
+            let range = alloc_range(base_offset, dest.layout.size);
+            // It's a bit annoying that we have to go back to the data race part to get the clock...
+            // but it does make things a lot simpler.
+            let sync_clock = data_race_clocks.sync_clock(range);
+            let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, init)?;
+            buffer.buffered_write(
+                val,
+                global,
+                threads,
+                atomic == AtomicWriteOrd::SeqCst,
+                sync_clock,
+            )?;
         }
 
         // Caller should've written to dest with the vanilla scalar write, we do nothing here
diff --git a/src/tools/miri/src/diagnostics.rs b/src/tools/miri/src/diagnostics.rs
index b5ca9601547eb..15f7ccbabca6a 100644
--- a/src/tools/miri/src/diagnostics.rs
+++ b/src/tools/miri/src/diagnostics.rs
@@ -31,8 +31,9 @@ pub enum TerminationInfo {
     },
     Int2PtrWithStrictProvenance,
     Deadlock,
-    /// In GenMC mode, an execution can get stuck in certain cases. This is not an error.
-    GenmcStuckExecution,
+    /// In GenMC mode, executions can get blocked, which stops the current execution without running any cleanup.
+    /// No leak checks should be performed if this happens, since they would give false positives.
+    GenmcBlockedExecution,
     MultipleSymbolDefinitions {
         link_name: Symbol,
         first: SpanData,
@@ -77,7 +78,8 @@ impl fmt::Display for TerminationInfo {
             StackedBorrowsUb { msg, .. } => write!(f, "{msg}"),
             TreeBorrowsUb { title, .. } => write!(f, "{title}"),
             Deadlock => write!(f, "the evaluated program deadlocked"),
-            GenmcStuckExecution => write!(f, "GenMC determined that the execution got stuck"),
+            GenmcBlockedExecution =>
+                write!(f, "GenMC determined that the execution got blocked (this is not an error)"),
             MultipleSymbolDefinitions { link_name, .. } =>
                 write!(f, "multiple definitions of symbol `{link_name}`"),
             SymbolShimClashing { link_name, .. } =>
@@ -139,6 +141,13 @@ pub enum NonHaltingDiagnostic {
         ptr: Pointer,
     },
     ExternTypeReborrow,
+    GenmcCompareExchangeWeak,
+    GenmcCompareExchangeOrderingMismatch {
+        success_ordering: AtomicRwOrd,
+        upgraded_success_ordering: AtomicRwOrd,
+        failure_ordering: AtomicReadOrd,
+        effective_failure_ordering: AtomicReadOrd,
+    },
 }
 
 /// Level of Miri specific diagnostics
@@ -243,11 +252,12 @@ pub fn report_error<'tcx>(
                 labels.push(format!("this thread got stuck here"));
                 None
             }
-            GenmcStuckExecution => {
-                // This case should only happen in GenMC mode. We treat it like a normal program exit.
+            GenmcBlockedExecution => {
+                // This case should only happen in GenMC mode.
                 assert!(ecx.machine.data_race.as_genmc_ref().is_some());
-                tracing::info!("GenMC: found stuck execution");
-                return Some((0, true));
+                // The program got blocked by GenMC without finishing the execution.
+                // No cleanup code was executed, so we don't do any leak checks.
+                return Some((0, false));
             }
             MultipleSymbolDefinitions { .. } | SymbolShimClashing { .. } => None,
         };
@@ -634,6 +644,8 @@ impl<'tcx> MiriMachine<'tcx> {
                 ("sharing memory with a native function".to_string(), DiagLevel::Warning),
             ExternTypeReborrow =>
                 ("reborrow of reference to `extern type`".to_string(), DiagLevel::Warning),
+            GenmcCompareExchangeWeak | GenmcCompareExchangeOrderingMismatch { .. } =>
+                ("GenMC might miss possible behaviors of this code".to_string(), DiagLevel::Warning),
             CreatedPointerTag(..)
             | PoppedPointerTag(..)
             | CreatedAlloc(..)
@@ -672,6 +684,23 @@ impl<'tcx> MiriMachine<'tcx> {
                 format!("weak memory emulation: outdated value returned from load at {ptr}"),
             ExternTypeReborrow =>
                 format!("reborrow of a reference to `extern type` is not properly supported"),
+            GenmcCompareExchangeWeak =>
+                "GenMC currently does not model spurious failures of `compare_exchange_weak`. Miri with GenMC might miss bugs related to spurious failures."
+                    .to_string(),
+            GenmcCompareExchangeOrderingMismatch {
+                success_ordering,
+                upgraded_success_ordering,
+                failure_ordering,
+                effective_failure_ordering,
+            } => {
+                let was_upgraded_msg = if success_ordering != upgraded_success_ordering {
+                    format!("Success ordering '{success_ordering:?}' was upgraded to '{upgraded_success_ordering:?}' to match failure ordering '{failure_ordering:?}'")
+                } else {
+                    assert_ne!(failure_ordering, effective_failure_ordering);
+                    format!("Due to success ordering '{success_ordering:?}', the failure ordering '{failure_ordering:?}' is treated like '{effective_failure_ordering:?}'")
+                };
+                format!("GenMC currently does not model the failure ordering for `compare_exchange`. {was_upgraded_msg}. Miri with GenMC might miss bugs related to this memory access.")
+            }
         };
 
         let notes = match &e {
diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs
index caed98f04f32e..82ca38c3752bc 100644
--- a/src/tools/miri/src/eval.rs
+++ b/src/tools/miri/src/eval.rs
@@ -247,8 +247,21 @@ impl<'tcx> MainThreadState<'tcx> {
                 // to be like a global `static`, so that all memory reached by it is considered to "not leak".
                 this.terminate_active_thread(TlsAllocAction::Leak)?;
 
-                // Stop interpreter loop.
-                throw_machine_stop!(TerminationInfo::Exit { code: exit_code, leak_check: true });
+                // In GenMC mode, we do not immediately stop execution on main thread exit.
+                if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
+                    // If there's no error, execution will continue (on another thread).
+                    genmc_ctx.handle_exit(
+                        ThreadId::MAIN_THREAD,
+                        exit_code,
+                        crate::concurrency::ExitType::MainThreadFinish,
+                    )?;
+                } else {
+                    // Stop interpreter loop.
+                    throw_machine_stop!(TerminationInfo::Exit {
+                        code: exit_code,
+                        leak_check: true
+                    });
+                }
             }
         }
         interp_ok(Poll::Pending)
@@ -449,10 +462,6 @@ pub fn eval_entry<'tcx>(
     // Copy setting before we move `config`.
     let ignore_leaks = config.ignore_leaks;
 
-    if let Some(genmc_ctx) = &genmc_ctx {
-        genmc_ctx.handle_execution_start();
-    }
-
     let mut ecx = match create_ecx(tcx, entry_id, entry_type, config, genmc_ctx).report_err() {
         Ok(v) => v,
         Err(err) => {
@@ -476,15 +485,6 @@ pub fn eval_entry<'tcx>(
     // Show diagnostic, if any.
     let (return_code, leak_check) = report_error(&ecx, err)?;
 
-    // We inform GenMC that the execution is complete.
-    if let Some(genmc_ctx) = ecx.machine.data_race.as_genmc_ref()
-        && let Err(error) = genmc_ctx.handle_execution_end(&ecx)
-    {
-        // FIXME(GenMC): Improve error reporting.
-        tcx.dcx().err(format!("GenMC returned an error: \"{error}\""));
-        return None;
-    }
-
     // If we get here there was no fatal error.
 
     // Possibly check for memory leaks.
diff --git a/src/tools/miri/src/intrinsics/atomic.rs b/src/tools/miri/src/intrinsics/atomic.rs
index e634125292754..9bb0ab70de236 100644
--- a/src/tools/miri/src/intrinsics/atomic.rs
+++ b/src/tools/miri/src/intrinsics/atomic.rs
@@ -5,10 +5,13 @@ use rustc_middle::{mir, ty};
 use super::check_intrinsic_arg_count;
 use crate::*;
 
-pub enum AtomicOp {
-    /// The `bool` indicates whether the result of the operation should be negated (`UnOp::Not`,
-    /// must be a boolean-typed operation).
-    MirOp(mir::BinOp, bool),
+pub enum AtomicRmwOp {
+    MirOp {
+        op: mir::BinOp,
+        /// Indicates whether the result of the operation should be negated (`UnOp::Not`, must be a
+        /// boolean-typed operation).
+        neg: bool,
+    },
     Max,
     Min,
 }
@@ -106,55 +109,85 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
             "or" => {
                 let ord = get_ord_at(2);
-                this.atomic_rmw_op(args, dest, AtomicOp::MirOp(BinOp::BitOr, false), rw_ord(ord))?;
+                this.atomic_rmw_op(
+                    args,
+                    dest,
+                    AtomicRmwOp::MirOp { op: BinOp::BitOr, neg: false },
+                    rw_ord(ord),
+                )?;
             }
             "xor" => {
                 let ord = get_ord_at(2);
-                this.atomic_rmw_op(args, dest, AtomicOp::MirOp(BinOp::BitXor, false), rw_ord(ord))?;
+                this.atomic_rmw_op(
+                    args,
+                    dest,
+                    AtomicRmwOp::MirOp { op: BinOp::BitXor, neg: false },
+                    rw_ord(ord),
+                )?;
             }
             "and" => {
                 let ord = get_ord_at(2);
-                this.atomic_rmw_op(args, dest, AtomicOp::MirOp(BinOp::BitAnd, false), rw_ord(ord))?;
+                this.atomic_rmw_op(
+                    args,
+                    dest,
+                    AtomicRmwOp::MirOp { op: BinOp::BitAnd, neg: false },
+                    rw_ord(ord),
+                )?;
             }
             "nand" => {
                 let ord = get_ord_at(2);
-                this.atomic_rmw_op(args, dest, AtomicOp::MirOp(BinOp::BitAnd, true), rw_ord(ord))?;
+                this.atomic_rmw_op(
+                    args,
+                    dest,
+                    AtomicRmwOp::MirOp { op: BinOp::BitAnd, neg: true },
+                    rw_ord(ord),
+                )?;
             }
             "xadd" => {
                 let ord = get_ord_at(2);
-                this.atomic_rmw_op(args, dest, AtomicOp::MirOp(BinOp::Add, false), rw_ord(ord))?;
+                this.atomic_rmw_op(
+                    args,
+                    dest,
+                    AtomicRmwOp::MirOp { op: BinOp::Add, neg: false },
+                    rw_ord(ord),
+                )?;
             }
             "xsub" => {
                 let ord = get_ord_at(2);
-                this.atomic_rmw_op(args, dest, AtomicOp::MirOp(BinOp::Sub, false), rw_ord(ord))?;
+                this.atomic_rmw_op(
+                    args,
+                    dest,
+                    AtomicRmwOp::MirOp { op: BinOp::Sub, neg: false },
+                    rw_ord(ord),
+                )?;
             }
             "min" => {
                 let ord = get_ord_at(1);
                 // Later we will use the type to indicate signed vs unsigned,
                 // so make sure it matches the intrinsic name.
                 assert!(matches!(args[1].layout.ty.kind(), ty::Int(_)));
-                this.atomic_rmw_op(args, dest, AtomicOp::Min, rw_ord(ord))?;
+                this.atomic_rmw_op(args, dest, AtomicRmwOp::Min, rw_ord(ord))?;
             }
             "umin" => {
                 let ord = get_ord_at(1);
                 // Later we will use the type to indicate signed vs unsigned,
                 // so make sure it matches the intrinsic name.
                 assert!(matches!(args[1].layout.ty.kind(), ty::Uint(_)));
-                this.atomic_rmw_op(args, dest, AtomicOp::Min, rw_ord(ord))?;
+                this.atomic_rmw_op(args, dest, AtomicRmwOp::Min, rw_ord(ord))?;
             }
             "max" => {
                 let ord = get_ord_at(1);
                 // Later we will use the type to indicate signed vs unsigned,
                 // so make sure it matches the intrinsic name.
                 assert!(matches!(args[1].layout.ty.kind(), ty::Int(_)));
-                this.atomic_rmw_op(args, dest, AtomicOp::Max, rw_ord(ord))?;
+                this.atomic_rmw_op(args, dest, AtomicRmwOp::Max, rw_ord(ord))?;
             }
             "umax" => {
                 let ord = get_ord_at(1);
                 // Later we will use the type to indicate signed vs unsigned,
                 // so make sure it matches the intrinsic name.
                 assert!(matches!(args[1].layout.ty.kind(), ty::Uint(_)));
-                this.atomic_rmw_op(args, dest, AtomicOp::Max, rw_ord(ord))?;
+                this.atomic_rmw_op(args, dest, AtomicRmwOp::Max, rw_ord(ord))?;
             }
 
             _ => return interp_ok(EmulateItemResult::NotSupported),
@@ -222,8 +255,8 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
         &mut self,
         args: &[OpTy<'tcx>],
         dest: &MPlaceTy<'tcx>,
-        atomic_op: AtomicOp,
-        atomic: AtomicRwOrd,
+        atomic_op: AtomicRmwOp,
+        ord: AtomicRwOrd,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
 
@@ -231,8 +264,9 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
         let place = this.deref_pointer(place)?;
         let rhs = this.read_immediate(rhs)?;
 
+        // The LHS can be a pointer, the RHS must be an integer.
         if !(place.layout.ty.is_integral() || place.layout.ty.is_raw_ptr())
-            || !(rhs.layout.ty.is_integral() || rhs.layout.ty.is_raw_ptr())
+            || !rhs.layout.ty.is_integral()
         {
             span_bug!(
                 this.cur_span(),
@@ -240,14 +274,7 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
             );
         }
 
-        let old = match atomic_op {
-            AtomicOp::Min =>
-                this.atomic_min_max_scalar(&place, rhs, /* min */ true, atomic)?,
-            AtomicOp::Max =>
-                this.atomic_min_max_scalar(&place, rhs, /* min */ false, atomic)?,
-            AtomicOp::MirOp(op, not) =>
-                this.atomic_rmw_op_immediate(&place, &rhs, op, not, atomic)?,
-        };
+        let old = this.atomic_rmw_op_immediate(&place, &rhs, atomic_op, ord)?;
         this.write_immediate(*old, dest)?; // old value is returned
         interp_ok(())
     }
diff --git a/src/tools/miri/src/intrinsics/math.rs b/src/tools/miri/src/intrinsics/math.rs
new file mode 100644
index 0000000000000..b9c99f2859468
--- /dev/null
+++ b/src/tools/miri/src/intrinsics/math.rs
@@ -0,0 +1,313 @@
+use rand::Rng;
+use rustc_apfloat::{self, Float, FloatConvert, Round};
+use rustc_middle::mir;
+use rustc_middle::ty::{self, FloatTy};
+
+use self::helpers::{ToHost, ToSoft};
+use super::check_intrinsic_arg_count;
+use crate::*;
+
+fn sqrt<'tcx, F: Float + FloatConvert<F> + Into<Scalar>>(
+    this: &mut MiriInterpCx<'tcx>,
+    args: &[OpTy<'tcx>],
+    dest: &MPlaceTy<'tcx>,
+) -> InterpResult<'tcx> {
+    let [f] = check_intrinsic_arg_count(args)?;
+    let f = this.read_scalar(f)?;
+    let f: F = f.to_float()?;
+    // Sqrt is specified to be fully precise.
+    let res = math::sqrt(f);
+    let res = this.adjust_nan(res, &[f]);
+    this.write_scalar(res, dest)
+}
+
+impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
+pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
+    fn emulate_math_intrinsic(
+        &mut self,
+        intrinsic_name: &str,
+        _generic_args: ty::GenericArgsRef<'tcx>,
+        args: &[OpTy<'tcx>],
+        dest: &MPlaceTy<'tcx>,
+    ) -> InterpResult<'tcx, EmulateItemResult> {
+        let this = self.eval_context_mut();
+
+        match intrinsic_name {
+            // Operations we can do with soft-floats.
+            "sqrtf16" => sqrt::<rustc_apfloat::ieee::Half>(this, args, dest)?,
+            "sqrtf32" => sqrt::<rustc_apfloat::ieee::Single>(this, args, dest)?,
+            "sqrtf64" => sqrt::<rustc_apfloat::ieee::Double>(this, args, dest)?,
+            "sqrtf128" => sqrt::<rustc_apfloat::ieee::Quad>(this, args, dest)?,
+
+            "fmaf32" => {
+                let [a, b, c] = check_intrinsic_arg_count(args)?;
+                let a = this.read_scalar(a)?.to_f32()?;
+                let b = this.read_scalar(b)?.to_f32()?;
+                let c = this.read_scalar(c)?.to_f32()?;
+                let res = a.mul_add(b, c).value;
+                let res = this.adjust_nan(res, &[a, b, c]);
+                this.write_scalar(res, dest)?;
+            }
+            "fmaf64" => {
+                let [a, b, c] = check_intrinsic_arg_count(args)?;
+                let a = this.read_scalar(a)?.to_f64()?;
+                let b = this.read_scalar(b)?.to_f64()?;
+                let c = this.read_scalar(c)?.to_f64()?;
+                let res = a.mul_add(b, c).value;
+                let res = this.adjust_nan(res, &[a, b, c]);
+                this.write_scalar(res, dest)?;
+            }
+
+            "fmuladdf32" => {
+                let [a, b, c] = check_intrinsic_arg_count(args)?;
+                let a = this.read_scalar(a)?.to_f32()?;
+                let b = this.read_scalar(b)?.to_f32()?;
+                let c = this.read_scalar(c)?.to_f32()?;
+                let fuse: bool = this.machine.float_nondet && this.machine.rng.get_mut().random();
+                let res = if fuse { a.mul_add(b, c).value } else { ((a * b).value + c).value };
+                let res = this.adjust_nan(res, &[a, b, c]);
+                this.write_scalar(res, dest)?;
+            }
+            "fmuladdf64" => {
+                let [a, b, c] = check_intrinsic_arg_count(args)?;
+                let a = this.read_scalar(a)?.to_f64()?;
+                let b = this.read_scalar(b)?.to_f64()?;
+                let c = this.read_scalar(c)?.to_f64()?;
+                let fuse: bool = this.machine.float_nondet && this.machine.rng.get_mut().random();
+                let res = if fuse { a.mul_add(b, c).value } else { ((a * b).value + c).value };
+                let res = this.adjust_nan(res, &[a, b, c]);
+                this.write_scalar(res, dest)?;
+            }
+
+            #[rustfmt::skip]
+            | "fadd_fast"
+            | "fsub_fast"
+            | "fmul_fast"
+            | "fdiv_fast"
+            | "frem_fast"
+            => {
+                let [a, b] = check_intrinsic_arg_count(args)?;
+                let a = this.read_immediate(a)?;
+                let b = this.read_immediate(b)?;
+                let op = match intrinsic_name {
+                    "fadd_fast" => mir::BinOp::Add,
+                    "fsub_fast" => mir::BinOp::Sub,
+                    "fmul_fast" => mir::BinOp::Mul,
+                    "fdiv_fast" => mir::BinOp::Div,
+                    "frem_fast" => mir::BinOp::Rem,
+                    _ => bug!(),
+                };
+                let float_finite = |x: &ImmTy<'tcx>| -> InterpResult<'tcx, bool> {
+                    let ty::Float(fty) = x.layout.ty.kind() else {
+                        bug!("float_finite: non-float input type {}", x.layout.ty)
+                    };
+                    interp_ok(match fty {
+                        FloatTy::F16 => x.to_scalar().to_f16()?.is_finite(),
+                        FloatTy::F32 => x.to_scalar().to_f32()?.is_finite(),
+                        FloatTy::F64 => x.to_scalar().to_f64()?.is_finite(),
+                        FloatTy::F128 => x.to_scalar().to_f128()?.is_finite(),
+                    })
+                };
+                match (float_finite(&a)?, float_finite(&b)?) {
+                    (false, false) => throw_ub_format!(
+                        "`{intrinsic_name}` intrinsic called with non-finite value as both parameters",
+                    ),
+                    (false, _) => throw_ub_format!(
+                        "`{intrinsic_name}` intrinsic called with non-finite value as first parameter",
+                    ),
+                    (_, false) => throw_ub_format!(
+                        "`{intrinsic_name}` intrinsic called with non-finite value as second parameter",
+                    ),
+                    _ => {}
+                }
+                let res = this.binary_op(op, &a, &b)?;
+                // This cannot be a NaN so we also don't have to apply any non-determinism.
+                // (Also, `binary_op` already called `generate_nan` if needed.)
+                if !float_finite(&res)? {
+                    throw_ub_format!("`{intrinsic_name}` intrinsic produced non-finite value as result");
+                }
+                // Apply a relative error of 4ULP to simulate non-deterministic precision loss
+                // due to optimizations.
+                let res = math::apply_random_float_error_to_imm(this, res, 4)?;
+                this.write_immediate(*res, dest)?;
+            }
+
+            "float_to_int_unchecked" => {
+                let [val] = check_intrinsic_arg_count(args)?;
+                let val = this.read_immediate(val)?;
+
+                let res = this
+                    .float_to_int_checked(&val, dest.layout, Round::TowardZero)?
+                    .ok_or_else(|| {
+                        err_ub_format!(
+                            "`float_to_int_unchecked` intrinsic called on {val} which cannot be represented in target type `{:?}`",
+                            dest.layout.ty
+                        )
+                    })?;
+
+                this.write_immediate(*res, dest)?;
+            }
+
+            // Operations that need host floats.
+            #[rustfmt::skip]
+            | "sinf32"
+            | "cosf32"
+            | "expf32"
+            | "exp2f32"
+            | "logf32"
+            | "log10f32"
+            | "log2f32"
+            => {
+                let [f] = check_intrinsic_arg_count(args)?;
+                let f = this.read_scalar(f)?.to_f32()?;
+
+                let res = math::fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| {
+                    // Using host floats (but it's fine, these operations do not have
+                    // guaranteed precision).
+                    let host = f.to_host();
+                    let res = match intrinsic_name {
+                        "sinf32" => host.sin(),
+                        "cosf32" => host.cos(),
+                        "expf32" => host.exp(),
+                        "exp2f32" => host.exp2(),
+                        "logf32" => host.ln(),
+                        "log10f32" => host.log10(),
+                        "log2f32" => host.log2(),
+                        _ => bug!(),
+                    };
+                    let res = res.to_soft();
+
+                    // Apply a relative error of 4ULP to introduce some non-determinism
+                    // simulating imprecise implementations and optimizations.
+                    let res = math::apply_random_float_error_ulp(
+                        this,
+                        res,
+                        4,
+                    );
+
+                    // Clamp the result to the guaranteed range of this function according to the C standard,
+                    // if any.
+                    math::clamp_float_value(intrinsic_name, res)
+                });
+                let res = this.adjust_nan(res, &[f]);
+                this.write_scalar(res, dest)?;
+            }
+
+            #[rustfmt::skip]
+            | "sinf64"
+            | "cosf64"
+            | "expf64"
+            | "exp2f64"
+            | "logf64"
+            | "log10f64"
+            | "log2f64"
+            => {
+                let [f] = check_intrinsic_arg_count(args)?;
+                let f = this.read_scalar(f)?.to_f64()?;
+
+                let res = math::fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| {
+                    // Using host floats (but it's fine, these operations do not have
+                    // guaranteed precision).
+                    let host = f.to_host();
+                    let res = match intrinsic_name {
+                        "sinf64" => host.sin(),
+                        "cosf64" => host.cos(),
+                        "expf64" => host.exp(),
+                        "exp2f64" => host.exp2(),
+                        "logf64" => host.ln(),
+                        "log10f64" => host.log10(),
+                        "log2f64" => host.log2(),
+                        _ => bug!(),
+                    };
+                    let res = res.to_soft();
+
+                    // Apply a relative error of 4ULP to introduce some non-determinism
+                    // simulating imprecise implementations and optimizations.
+                    let res = math::apply_random_float_error_ulp(
+                        this,
+                        res,
+                        4,
+                    );
+
+                    // Clamp the result to the guaranteed range of this function according to the C standard,
+                    // if any.
+                    math::clamp_float_value(intrinsic_name, res)
+                });
+                let res = this.adjust_nan(res, &[f]);
+                this.write_scalar(res, dest)?;
+            }
+
+            "powf32" => {
+                let [f1, f2] = check_intrinsic_arg_count(args)?;
+                let f1 = this.read_scalar(f1)?.to_f32()?;
+                let f2 = this.read_scalar(f2)?.to_f32()?;
+
+                let res =
+                    math::fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| {
+                        // Using host floats (but it's fine, this operation does not have guaranteed precision).
+                        let res = f1.to_host().powf(f2.to_host()).to_soft();
+
+                        // Apply a relative error of 4ULP to introduce some non-determinism
+                        // simulating imprecise implementations and optimizations.
+                        math::apply_random_float_error_ulp(this, res, 4)
+                    });
+                let res = this.adjust_nan(res, &[f1, f2]);
+                this.write_scalar(res, dest)?;
+            }
+            "powf64" => {
+                let [f1, f2] = check_intrinsic_arg_count(args)?;
+                let f1 = this.read_scalar(f1)?.to_f64()?;
+                let f2 = this.read_scalar(f2)?.to_f64()?;
+
+                let res =
+                    math::fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| {
+                        // Using host floats (but it's fine, this operation does not have guaranteed precision).
+                        let res = f1.to_host().powf(f2.to_host()).to_soft();
+
+                        // Apply a relative error of 4ULP to introduce some non-determinism
+                        // simulating imprecise implementations and optimizations.
+                        math::apply_random_float_error_ulp(this, res, 4)
+                    });
+                let res = this.adjust_nan(res, &[f1, f2]);
+                this.write_scalar(res, dest)?;
+            }
+
+            "powif32" => {
+                let [f, i] = check_intrinsic_arg_count(args)?;
+                let f = this.read_scalar(f)?.to_f32()?;
+                let i = this.read_scalar(i)?.to_i32()?;
+
+                let res = math::fixed_powi_value(this, f, i).unwrap_or_else(|| {
+                    // Using host floats (but it's fine, this operation does not have guaranteed precision).
+                    let res = f.to_host().powi(i).to_soft();
+
+                    // Apply a relative error of 4ULP to introduce some non-determinism
+                    // simulating imprecise implementations and optimizations.
+                    math::apply_random_float_error_ulp(this, res, 4)
+                });
+                let res = this.adjust_nan(res, &[f]);
+                this.write_scalar(res, dest)?;
+            }
+            "powif64" => {
+                let [f, i] = check_intrinsic_arg_count(args)?;
+                let f = this.read_scalar(f)?.to_f64()?;
+                let i = this.read_scalar(i)?.to_i32()?;
+
+                let res = math::fixed_powi_value(this, f, i).unwrap_or_else(|| {
+                    // Using host floats (but it's fine, this operation does not have guaranteed precision).
+                    let res = f.to_host().powi(i).to_soft();
+
+                    // Apply a relative error of 4ULP to introduce some non-determinism
+                    // simulating imprecise implementations and optimizations.
+                    math::apply_random_float_error_ulp(this, res, 4)
+                });
+                let res = this.adjust_nan(res, &[f]);
+                this.write_scalar(res, dest)?;
+            }
+
+            _ => return interp_ok(EmulateItemResult::NotSupported),
+        }
+
+        interp_ok(EmulateItemResult::NeedsReturn)
+    }
+}
diff --git a/src/tools/miri/src/intrinsics/mod.rs b/src/tools/miri/src/intrinsics/mod.rs
index 4628c30e2dfbe..a80b939d84ea9 100644
--- a/src/tools/miri/src/intrinsics/mod.rs
+++ b/src/tools/miri/src/intrinsics/mod.rs
@@ -1,17 +1,19 @@
 #![warn(clippy::arithmetic_side_effects)]
 
 mod atomic;
+mod math;
 mod simd;
 
+pub use self::atomic::AtomicRmwOp;
+
+#[rustfmt::skip] // prevent `use` reordering
 use rand::Rng;
 use rustc_abi::Size;
-use rustc_apfloat::{self, Float, Round};
-use rustc_middle::mir;
-use rustc_middle::ty::{self, FloatTy};
+use rustc_middle::{mir, ty};
 use rustc_span::{Symbol, sym};
 
 use self::atomic::EvalContextExt as _;
-use self::helpers::{ToHost, ToSoft};
+use self::math::EvalContextExt as _;
 use self::simd::EvalContextExt as _;
 use crate::*;
 
@@ -176,288 +178,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 this.write_scalar(Scalar::from_bool(branch), dest)?;
             }
 
-            "sqrtf32" => {
-                let [f] = check_intrinsic_arg_count(args)?;
-                let f = this.read_scalar(f)?.to_f32()?;
-                // Sqrt is specified to be fully precise.
-                let res = math::sqrt(f);
-                let res = this.adjust_nan(res, &[f]);
-                this.write_scalar(res, dest)?;
-            }
-            "sqrtf64" => {
-                let [f] = check_intrinsic_arg_count(args)?;
-                let f = this.read_scalar(f)?.to_f64()?;
-                // Sqrt is specified to be fully precise.
-                let res = math::sqrt(f);
-                let res = this.adjust_nan(res, &[f]);
-                this.write_scalar(res, dest)?;
-            }
-
-            #[rustfmt::skip]
-            | "sinf32"
-            | "cosf32"
-            | "expf32"
-            | "exp2f32"
-            | "logf32"
-            | "log10f32"
-            | "log2f32"
-            => {
-                let [f] = check_intrinsic_arg_count(args)?;
-                let f = this.read_scalar(f)?.to_f32()?;
-
-                let res = math::fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| {
-                    // Using host floats (but it's fine, these operations do not have
-                    // guaranteed precision).
-                    let host = f.to_host();
-                    let res = match intrinsic_name {
-                        "sinf32" => host.sin(),
-                        "cosf32" => host.cos(),
-                        "expf32" => host.exp(),
-                        "exp2f32" => host.exp2(),
-                        "logf32" => host.ln(),
-                        "log10f32" => host.log10(),
-                        "log2f32" => host.log2(),
-                        _ => bug!(),
-                    };
-                    let res = res.to_soft();
-
-                    // Apply a relative error of 4ULP to introduce some non-determinism
-                    // simulating imprecise implementations and optimizations.
-                    let res = math::apply_random_float_error_ulp(
-                        this,
-                        res,
-                        4,
-                    );
-
-                    // Clamp the result to the guaranteed range of this function according to the C standard,
-                    // if any.
-                    math::clamp_float_value(intrinsic_name, res)
-                });
-                let res = this.adjust_nan(res, &[f]);
-                this.write_scalar(res, dest)?;
-            }
-
-            #[rustfmt::skip]
-            | "sinf64"
-            | "cosf64"
-            | "expf64"
-            | "exp2f64"
-            | "logf64"
-            | "log10f64"
-            | "log2f64"
-            => {
-                let [f] = check_intrinsic_arg_count(args)?;
-                let f = this.read_scalar(f)?.to_f64()?;
-
-                let res = math::fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| {
-                    // Using host floats (but it's fine, these operations do not have
-                    // guaranteed precision).
-                    let host = f.to_host();
-                    let res = match intrinsic_name {
-                        "sinf64" => host.sin(),
-                        "cosf64" => host.cos(),
-                        "expf64" => host.exp(),
-                        "exp2f64" => host.exp2(),
-                        "logf64" => host.ln(),
-                        "log10f64" => host.log10(),
-                        "log2f64" => host.log2(),
-                        _ => bug!(),
-                    };
-                    let res = res.to_soft();
-
-                    // Apply a relative error of 4ULP to introduce some non-determinism
-                    // simulating imprecise implementations and optimizations.
-                    let res = math::apply_random_float_error_ulp(
-                        this,
-                        res,
-                        4,
-                    );
-
-                    // Clamp the result to the guaranteed range of this function according to the C standard,
-                    // if any.
-                    math::clamp_float_value(intrinsic_name, res)
-                });
-                let res = this.adjust_nan(res, &[f]);
-                this.write_scalar(res, dest)?;
-            }
-
-            "fmaf32" => {
-                let [a, b, c] = check_intrinsic_arg_count(args)?;
-                let a = this.read_scalar(a)?.to_f32()?;
-                let b = this.read_scalar(b)?.to_f32()?;
-                let c = this.read_scalar(c)?.to_f32()?;
-                let res = a.mul_add(b, c).value;
-                let res = this.adjust_nan(res, &[a, b, c]);
-                this.write_scalar(res, dest)?;
-            }
-            "fmaf64" => {
-                let [a, b, c] = check_intrinsic_arg_count(args)?;
-                let a = this.read_scalar(a)?.to_f64()?;
-                let b = this.read_scalar(b)?.to_f64()?;
-                let c = this.read_scalar(c)?.to_f64()?;
-                let res = a.mul_add(b, c).value;
-                let res = this.adjust_nan(res, &[a, b, c]);
-                this.write_scalar(res, dest)?;
-            }
-
-            "fmuladdf32" => {
-                let [a, b, c] = check_intrinsic_arg_count(args)?;
-                let a = this.read_scalar(a)?.to_f32()?;
-                let b = this.read_scalar(b)?.to_f32()?;
-                let c = this.read_scalar(c)?.to_f32()?;
-                let fuse: bool = this.machine.float_nondet && this.machine.rng.get_mut().random();
-                let res = if fuse { a.mul_add(b, c).value } else { ((a * b).value + c).value };
-                let res = this.adjust_nan(res, &[a, b, c]);
-                this.write_scalar(res, dest)?;
-            }
-            "fmuladdf64" => {
-                let [a, b, c] = check_intrinsic_arg_count(args)?;
-                let a = this.read_scalar(a)?.to_f64()?;
-                let b = this.read_scalar(b)?.to_f64()?;
-                let c = this.read_scalar(c)?.to_f64()?;
-                let fuse: bool = this.machine.float_nondet && this.machine.rng.get_mut().random();
-                let res = if fuse { a.mul_add(b, c).value } else { ((a * b).value + c).value };
-                let res = this.adjust_nan(res, &[a, b, c]);
-                this.write_scalar(res, dest)?;
-            }
-
-            "powf32" => {
-                let [f1, f2] = check_intrinsic_arg_count(args)?;
-                let f1 = this.read_scalar(f1)?.to_f32()?;
-                let f2 = this.read_scalar(f2)?.to_f32()?;
-
-                let res =
-                    math::fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| {
-                        // Using host floats (but it's fine, this operation does not have guaranteed precision).
-                        let res = f1.to_host().powf(f2.to_host()).to_soft();
-
-                        // Apply a relative error of 4ULP to introduce some non-determinism
-                        // simulating imprecise implementations and optimizations.
-                        math::apply_random_float_error_ulp(this, res, 4)
-                    });
-                let res = this.adjust_nan(res, &[f1, f2]);
-                this.write_scalar(res, dest)?;
-            }
-            "powf64" => {
-                let [f1, f2] = check_intrinsic_arg_count(args)?;
-                let f1 = this.read_scalar(f1)?.to_f64()?;
-                let f2 = this.read_scalar(f2)?.to_f64()?;
-
-                let res =
-                    math::fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| {
-                        // Using host floats (but it's fine, this operation does not have guaranteed precision).
-                        let res = f1.to_host().powf(f2.to_host()).to_soft();
-
-                        // Apply a relative error of 4ULP to introduce some non-determinism
-                        // simulating imprecise implementations and optimizations.
-                        math::apply_random_float_error_ulp(this, res, 4)
-                    });
-                let res = this.adjust_nan(res, &[f1, f2]);
-                this.write_scalar(res, dest)?;
-            }
-
-            "powif32" => {
-                let [f, i] = check_intrinsic_arg_count(args)?;
-                let f = this.read_scalar(f)?.to_f32()?;
-                let i = this.read_scalar(i)?.to_i32()?;
-
-                let res = math::fixed_powi_value(this, f, i).unwrap_or_else(|| {
-                    // Using host floats (but it's fine, this operation does not have guaranteed precision).
-                    let res = f.to_host().powi(i).to_soft();
-
-                    // Apply a relative error of 4ULP to introduce some non-determinism
-                    // simulating imprecise implementations and optimizations.
-                    math::apply_random_float_error_ulp(this, res, 4)
-                });
-                let res = this.adjust_nan(res, &[f]);
-                this.write_scalar(res, dest)?;
-            }
-            "powif64" => {
-                let [f, i] = check_intrinsic_arg_count(args)?;
-                let f = this.read_scalar(f)?.to_f64()?;
-                let i = this.read_scalar(i)?.to_i32()?;
-
-                let res = math::fixed_powi_value(this, f, i).unwrap_or_else(|| {
-                    // Using host floats (but it's fine, this operation does not have guaranteed precision).
-                    let res = f.to_host().powi(i).to_soft();
-
-                    // Apply a relative error of 4ULP to introduce some non-determinism
-                    // simulating imprecise implementations and optimizations.
-                    math::apply_random_float_error_ulp(this, res, 4)
-                });
-                let res = this.adjust_nan(res, &[f]);
-                this.write_scalar(res, dest)?;
-            }
-
-            #[rustfmt::skip]
-            | "fadd_fast"
-            | "fsub_fast"
-            | "fmul_fast"
-            | "fdiv_fast"
-            | "frem_fast"
-            => {
-                let [a, b] = check_intrinsic_arg_count(args)?;
-                let a = this.read_immediate(a)?;
-                let b = this.read_immediate(b)?;
-                let op = match intrinsic_name {
-                    "fadd_fast" => mir::BinOp::Add,
-                    "fsub_fast" => mir::BinOp::Sub,
-                    "fmul_fast" => mir::BinOp::Mul,
-                    "fdiv_fast" => mir::BinOp::Div,
-                    "frem_fast" => mir::BinOp::Rem,
-                    _ => bug!(),
-                };
-                let float_finite = |x: &ImmTy<'tcx>| -> InterpResult<'tcx, bool> {
-                    let ty::Float(fty) = x.layout.ty.kind() else {
-                        bug!("float_finite: non-float input type {}", x.layout.ty)
-                    };
-                    interp_ok(match fty {
-                        FloatTy::F16 => x.to_scalar().to_f16()?.is_finite(),
-                        FloatTy::F32 => x.to_scalar().to_f32()?.is_finite(),
-                        FloatTy::F64 => x.to_scalar().to_f64()?.is_finite(),
-                        FloatTy::F128 => x.to_scalar().to_f128()?.is_finite(),
-                    })
-                };
-                match (float_finite(&a)?, float_finite(&b)?) {
-                    (false, false) => throw_ub_format!(
-                        "`{intrinsic_name}` intrinsic called with non-finite value as both parameters",
-                    ),
-                    (false, _) => throw_ub_format!(
-                        "`{intrinsic_name}` intrinsic called with non-finite value as first parameter",
-                    ),
-                    (_, false) => throw_ub_format!(
-                        "`{intrinsic_name}` intrinsic called with non-finite value as second parameter",
-                    ),
-                    _ => {}
-                }
-                let res = this.binary_op(op, &a, &b)?;
-                // This cannot be a NaN so we also don't have to apply any non-determinism.
-                // (Also, `binary_op` already called `generate_nan` if needed.)
-                if !float_finite(&res)? {
-                    throw_ub_format!("`{intrinsic_name}` intrinsic produced non-finite value as result");
-                }
-                // Apply a relative error of 4ULP to simulate non-deterministic precision loss
-                // due to optimizations.
-                let res = math::apply_random_float_error_to_imm(this, res, 4)?;
-                this.write_immediate(*res, dest)?;
-            }
-
-            "float_to_int_unchecked" => {
-                let [val] = check_intrinsic_arg_count(args)?;
-                let val = this.read_immediate(val)?;
-
-                let res = this
-                    .float_to_int_checked(&val, dest.layout, Round::TowardZero)?
-                    .ok_or_else(|| {
-                        err_ub_format!(
-                            "`float_to_int_unchecked` intrinsic called on {val} which cannot be represented in target type `{:?}`",
-                            dest.layout.ty
-                        )
-                    })?;
-
-                this.write_immediate(*res, dest)?;
-            }
-
             // Other
             "breakpoint" => {
                 let [] = check_intrinsic_arg_count(args)?;
@@ -469,7 +189,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // Make these a NOP, so we get the better Miri-native error messages.
             }
 
-            _ => return interp_ok(EmulateItemResult::NotSupported),
+            _ => return this.emulate_math_intrinsic(intrinsic_name, generic_args, args, dest),
         }
 
         interp_ok(EmulateItemResult::NeedsReturn)
diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs
index 0856411b8e836..7f5f25b9f66b2 100644
--- a/src/tools/miri/src/lib.rs
+++ b/src/tools/miri/src/lib.rs
@@ -18,6 +18,7 @@
 #![feature(derive_coerce_pointee)]
 #![feature(arbitrary_self_types)]
 #![feature(iter_advance_by)]
+#![cfg_attr(not(bootstrap), feature(duration_from_nanos_u128))]
 // Configure clippy and other lints
 #![allow(
     clippy::collapsible_else_if,
@@ -132,7 +133,7 @@ pub use crate::concurrency::thread::{
     BlockReason, DynUnblockCallback, EvalContextExt as _, StackEmptyCallback, ThreadId,
     ThreadManager, TimeoutAnchor, TimeoutClock, UnblockKind,
 };
-pub use crate::concurrency::{GenmcConfig, GenmcCtx};
+pub use crate::concurrency::{GenmcConfig, GenmcCtx, run_genmc_mode};
 pub use crate::data_structures::dedup_range_map::DedupRangeMap;
 pub use crate::data_structures::mono_hash_map::MonoHashMap;
 pub use crate::diagnostics::{
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
index e4540fb9bc58b..04c8bee72c038 100644
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@@ -747,11 +747,13 @@ impl<'tcx> MiriMachine<'tcx> {
             thread_cpu_affinity
                 .insert(threads.active_thread(), CpuAffinityMask::new(&layout_cx, config.num_cpus));
         }
+        let alloc_addresses =
+            RefCell::new(alloc_addresses::GlobalStateInner::new(config, stack_addr, tcx));
         MiriMachine {
             tcx,
             borrow_tracker,
             data_race,
-            alloc_addresses: RefCell::new(alloc_addresses::GlobalStateInner::new(config, stack_addr)),
+            alloc_addresses,
             // `env_vars` depends on a full interpreter so we cannot properly initialize it yet.
             env_vars: EnvVars::default(),
             main_fn_ret_place: None,
@@ -1504,9 +1506,8 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
         }
         match &machine.data_race {
             GlobalDataRaceHandler::None => {}
-            GlobalDataRaceHandler::Genmc(genmc_ctx) => {
-                genmc_ctx.memory_store(machine, ptr.addr(), range.size)?;
-            }
+            GlobalDataRaceHandler::Genmc(genmc_ctx) =>
+                genmc_ctx.memory_store(machine, ptr.addr(), range.size)?,
             GlobalDataRaceHandler::Vclocks(_global_state) => {
                 let _trace = enter_trace_span!(data_race::before_memory_write);
                 let AllocDataRaceHandler::Vclocks(data_race, weak_memory) =
@@ -1543,7 +1544,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
         match &machine.data_race {
             GlobalDataRaceHandler::None => {}
             GlobalDataRaceHandler::Genmc(genmc_ctx) =>
-                genmc_ctx.handle_dealloc(machine, ptr.addr(), size, align, kind)?,
+                genmc_ctx.handle_dealloc(machine, alloc_id, ptr.addr(), kind)?,
             GlobalDataRaceHandler::Vclocks(_global_state) => {
                 let _trace = enter_trace_span!(data_race::before_memory_deallocation);
                 let data_race = alloc_extra.data_race.as_vclocks_mut().unwrap();
diff --git a/src/tools/miri/src/math.rs b/src/tools/miri/src/math.rs
index 7d2f1c083687c..50472ed3638e9 100644
--- a/src/tools/miri/src/math.rs
+++ b/src/tools/miri/src/math.rs
@@ -2,7 +2,7 @@ use std::ops::Neg;
 use std::{f32, f64};
 
 use rand::Rng as _;
-use rustc_apfloat::Float as _;
+use rustc_apfloat::Float;
 use rustc_apfloat::ieee::{DoubleS, IeeeFloat, Semantics, SingleS};
 use rustc_middle::ty::{self, FloatTy, ScalarInt};
 
@@ -210,61 +210,66 @@ where
     let pi_over_2 = (pi / two).value;
     let pi_over_4 = (pi_over_2 / two).value;
 
-    Some(match (intrinsic_name, args) {
+    // Remove `f32`/`f64` suffix, if any.
+    let name = intrinsic_name
+        .strip_suffix("f32")
+        .or_else(|| intrinsic_name.strip_suffix("f64"))
+        .unwrap_or(intrinsic_name);
+    // Also strip trailing `f` (indicates "float"), with an exception for "erf" to avoid
+    // removing that `f`.
+    let name = if name == "erf" { name } else { name.strip_suffix("f").unwrap_or(name) };
+    Some(match (name, args) {
         // cos(±0) and cosh(±0)= 1
-        ("cosf32" | "cosf64" | "coshf" | "cosh", [input]) if input.is_zero() => one,
+        ("cos" | "cosh", [input]) if input.is_zero() => one,
 
         // e^0 = 1
-        ("expf32" | "expf64" | "exp2f32" | "exp2f64", [input]) if input.is_zero() => one,
+        ("exp" | "exp2", [input]) if input.is_zero() => one,
 
         // tanh(±INF) = ±1
-        ("tanhf" | "tanh", [input]) if input.is_infinite() => one.copy_sign(*input),
+        ("tanh", [input]) if input.is_infinite() => one.copy_sign(*input),
 
         // atan(±INF) = ±π/2
-        ("atanf" | "atan", [input]) if input.is_infinite() => pi_over_2.copy_sign(*input),
+        ("atan", [input]) if input.is_infinite() => pi_over_2.copy_sign(*input),
 
         // erf(±INF) = ±1
-        ("erff" | "erf", [input]) if input.is_infinite() => one.copy_sign(*input),
+        ("erf", [input]) if input.is_infinite() => one.copy_sign(*input),
 
         // erfc(-INF) = 2
-        ("erfcf" | "erfc", [input]) if input.is_neg_infinity() => (one + one).value,
+        ("erfc", [input]) if input.is_neg_infinity() => (one + one).value,
 
         // hypot(x, ±0) = abs(x), if x is not a NaN.
-        ("_hypotf" | "hypotf" | "_hypot" | "hypot", [x, y]) if !x.is_nan() && y.is_zero() =>
-            x.abs(),
+        // `_hypot` is the Windows name for this.
+        ("_hypot" | "hypot", [x, y]) if !x.is_nan() && y.is_zero() => x.abs(),
 
         // atan2(±0,−0) = ±π.
         // atan2(±0, y) = ±π for y < 0.
         // Must check for non NaN because `y.is_negative()` also applies to NaN.
-        ("atan2f" | "atan2", [x, y]) if (x.is_zero() && (y.is_negative() && !y.is_nan())) =>
-            pi.copy_sign(*x),
+        ("atan2", [x, y]) if (x.is_zero() && (y.is_negative() && !y.is_nan())) => pi.copy_sign(*x),
 
         // atan2(±x,−∞) = ±π for finite x > 0.
-        ("atan2f" | "atan2", [x, y])
-            if (!x.is_zero() && !x.is_infinite()) && y.is_neg_infinity() =>
+        ("atan2", [x, y]) if (!x.is_zero() && !x.is_infinite()) && y.is_neg_infinity() =>
             pi.copy_sign(*x),
 
         // atan2(x, ±0) = −π/2 for x < 0.
         // atan2(x, ±0) =  π/2 for x > 0.
-        ("atan2f" | "atan2", [x, y]) if !x.is_zero() && y.is_zero() => pi_over_2.copy_sign(*x),
+        ("atan2", [x, y]) if !x.is_zero() && y.is_zero() => pi_over_2.copy_sign(*x),
 
         //atan2(±∞, −∞) = ±3π/4
-        ("atan2f" | "atan2", [x, y]) if x.is_infinite() && y.is_neg_infinity() =>
+        ("atan2", [x, y]) if x.is_infinite() && y.is_neg_infinity() =>
             (pi_over_4 * three).value.copy_sign(*x),
 
         //atan2(±∞, +∞) = ±π/4
-        ("atan2f" | "atan2", [x, y]) if x.is_infinite() && y.is_pos_infinity() =>
-            pi_over_4.copy_sign(*x),
+        ("atan2", [x, y]) if x.is_infinite() && y.is_pos_infinity() => pi_over_4.copy_sign(*x),
 
         // atan2(±∞, y) returns ±π/2 for finite y.
-        ("atan2f" | "atan2", [x, y]) if x.is_infinite() && (!y.is_infinite() && !y.is_nan()) =>
+        ("atan2", [x, y]) if x.is_infinite() && (!y.is_infinite() && !y.is_nan()) =>
             pi_over_2.copy_sign(*x),
 
         // (-1)^(±INF) = 1
-        ("powf32" | "powf64", [base, exp]) if *base == -one && exp.is_infinite() => one,
+        ("pow", [base, exp]) if *base == -one && exp.is_infinite() => one,
 
         // 1^y = 1 for any y, even a NaN
-        ("powf32" | "powf64", [base, exp]) if *base == one => {
+        ("pow", [base, exp]) if *base == one => {
             let rng = this.machine.rng.get_mut();
             // SNaN exponents get special treatment: they might return 1, or a NaN.
             let return_nan = exp.is_signaling() && this.machine.float_nondet && rng.random();
@@ -273,7 +278,7 @@ where
         }
 
         // x^(±0) = 1 for any x, even a NaN
-        ("powf32" | "powf64", [base, exp]) if exp.is_zero() => {
+        ("pow", [base, exp]) if exp.is_zero() => {
             let rng = this.machine.rng.get_mut();
             // SNaN bases get special treatment: they might return 1, or a NaN.
             let return_nan = base.is_signaling() && this.machine.float_nondet && rng.random();
@@ -308,23 +313,23 @@ where
             Some(if return_nan { ecx.generate_nan(&[base]) } else { one })
         }
 
-        _ => return None,
+        _ => None,
     }
 }
 
-pub(crate) fn sqrt<S: rustc_apfloat::ieee::Semantics>(x: IeeeFloat<S>) -> IeeeFloat<S> {
+pub(crate) fn sqrt<F: Float>(x: F) -> F {
     match x.category() {
         // preserve zero sign
         rustc_apfloat::Category::Zero => x,
         // propagate NaN
         rustc_apfloat::Category::NaN => x,
         // sqrt of negative number is NaN
-        _ if x.is_negative() => IeeeFloat::NAN,
+        _ if x.is_negative() => F::NAN,
         // sqrt(∞) = ∞
-        rustc_apfloat::Category::Infinity => IeeeFloat::INFINITY,
+        rustc_apfloat::Category::Infinity => F::INFINITY,
         rustc_apfloat::Category::Normal => {
             // Floating point precision, excluding the integer bit
-            let prec = i32::try_from(S::PRECISION).unwrap() - 1;
+            let prec = i32::try_from(F::PRECISION).unwrap() - 1;
 
             // x = 2^(exp - prec) * mant
             // where mant is an integer with prec+1 bits
@@ -389,7 +394,7 @@ pub(crate) fn sqrt<S: rustc_apfloat::ieee::Semantics>(x: IeeeFloat<S>) -> IeeeFl
             res = (res + 1) >> 1;
 
             // Build resulting value with res as mantissa and exp/2 as exponent
-            IeeeFloat::from_u128(res).value.scalbn(exp / 2 - prec)
+            F::from_u128(res).value.scalbn(exp / 2 - prec)
         }
     }
 }
diff --git a/src/tools/miri/src/shims/foreign_items.rs b/src/tools/miri/src/shims/foreign_items.rs
index 187423472ab8c..eca8cccf5efc4 100644
--- a/src/tools/miri/src/shims/foreign_items.rs
+++ b/src/tools/miri/src/shims/foreign_items.rs
@@ -3,7 +3,6 @@ use std::io::Write;
 use std::path::Path;
 
 use rustc_abi::{Align, AlignFromBytesError, CanonAbi, Size};
-use rustc_apfloat::Float;
 use rustc_ast::expand::allocator::alloc_error_handler_name;
 use rustc_hir::attrs::Linkage;
 use rustc_hir::def::DefKind;
@@ -15,7 +14,6 @@ use rustc_middle::{mir, ty};
 use rustc_span::Symbol;
 use rustc_target::callconv::FnAbi;
 
-use self::helpers::{ToHost, ToSoft};
 use super::alloc::EvalContextExt as _;
 use super::backtrace::EvalContextExt as _;
 use crate::helpers::EvalContextExt as _;
@@ -491,13 +489,22 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
             "exit" => {
                 let [code] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
                 let code = this.read_scalar(code)?.to_i32()?;
+                if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
+                    // If there is no error, execution should continue (on a different thread).
+                    genmc_ctx.handle_exit(
+                        this.machine.threads.active_thread(),
+                        code,
+                        crate::concurrency::ExitType::ExitCalled,
+                    )?;
+                    todo!(); // FIXME(genmc): Add a way to return here that is allowed to not do progress (can't use existing EmulateItemResult variants).
+                }
                 throw_machine_stop!(TerminationInfo::Exit { code, leak_check: false });
             }
             "abort" => {
                 let [] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
                 throw_machine_stop!(TerminationInfo::Abort(
                     "the program aborted execution".to_owned()
-                ))
+                ));
             }
 
             // Standard C allocation
@@ -809,225 +816,6 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 this.write_pointer(ptr_dest, dest)?;
             }
 
-            // math functions (note that there are also intrinsics for some other functions)
-            #[rustfmt::skip]
-            | "cbrtf"
-            | "coshf"
-            | "sinhf"
-            | "tanf"
-            | "tanhf"
-            | "acosf"
-            | "asinf"
-            | "atanf"
-            | "log1pf"
-            | "expm1f"
-            | "tgammaf"
-            | "erff"
-            | "erfcf"
-            => {
-                let [f] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
-                let f = this.read_scalar(f)?.to_f32()?;
-
-                let res = math::fixed_float_value(this, link_name.as_str(), &[f]).unwrap_or_else(|| {
-                    // Using host floats (but it's fine, these operations do not have
-                    // guaranteed precision).
-                    let f_host = f.to_host();
-                    let res = match link_name.as_str() {
-                        "cbrtf" => f_host.cbrt(),
-                        "coshf" => f_host.cosh(),
-                        "sinhf" => f_host.sinh(),
-                        "tanf" => f_host.tan(),
-                        "tanhf" => f_host.tanh(),
-                        "acosf" => f_host.acos(),
-                        "asinf" => f_host.asin(),
-                        "atanf" => f_host.atan(),
-                        "log1pf" => f_host.ln_1p(),
-                        "expm1f" => f_host.exp_m1(),
-                        "tgammaf" => f_host.gamma(),
-                        "erff" => f_host.erf(),
-                        "erfcf" => f_host.erfc(),
-                        _ => bug!(),
-                    };
-                    let res = res.to_soft();
-                    // Apply a relative error of 4ULP to introduce some non-determinism
-                    // simulating imprecise implementations and optimizations.
-                    let res = math::apply_random_float_error_ulp(this, res, 4);
-
-                    // Clamp the result to the guaranteed range of this function according to the C standard,
-                    // if any.
-                    math::clamp_float_value(link_name.as_str(), res)
-                });
-                let res = this.adjust_nan(res, &[f]);
-                this.write_scalar(res, dest)?;
-            }
-            #[rustfmt::skip]
-            | "_hypotf"
-            | "hypotf"
-            | "atan2f"
-            | "fdimf"
-            => {
-                let [f1, f2] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
-                let f1 = this.read_scalar(f1)?.to_f32()?;
-                let f2 = this.read_scalar(f2)?.to_f32()?;
-
-                let res = math::fixed_float_value(this, link_name.as_str(), &[f1, f2])
-                    .unwrap_or_else(|| {
-                        let res = match link_name.as_str() {
-                            // underscore case for windows, here and below
-                            // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019)
-                            // Using host floats (but it's fine, these operations do not have guaranteed precision).
-                            "_hypotf" | "hypotf" => f1.to_host().hypot(f2.to_host()).to_soft(),
-                            "atan2f" => f1.to_host().atan2(f2.to_host()).to_soft(),
-                            #[allow(deprecated)]
-                            "fdimf" => f1.to_host().abs_sub(f2.to_host()).to_soft(),
-                            _ => bug!(),
-                        };
-                        // Apply a relative error of 4ULP to introduce some non-determinism
-                        // simulating imprecise implementations and optimizations.
-                        let res = math::apply_random_float_error_ulp(this, res, 4);
-
-                        // Clamp the result to the guaranteed range of this function according to the C standard,
-                        // if any.
-                        math::clamp_float_value(link_name.as_str(), res)
-                    });
-                let res = this.adjust_nan(res, &[f1, f2]);
-                this.write_scalar(res, dest)?;
-            }
-            #[rustfmt::skip]
-            | "cbrt"
-            | "cosh"
-            | "sinh"
-            | "tan"
-            | "tanh"
-            | "acos"
-            | "asin"
-            | "atan"
-            | "log1p"
-            | "expm1"
-            | "tgamma"
-            | "erf"
-            | "erfc"
-            => {
-                let [f] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
-                let f = this.read_scalar(f)?.to_f64()?;
-
-                let res = math::fixed_float_value(this, link_name.as_str(), &[f]).unwrap_or_else(|| {
-                    // Using host floats (but it's fine, these operations do not have
-                    // guaranteed precision).
-                    let f_host = f.to_host();
-                    let res = match link_name.as_str() {
-                        "cbrt" => f_host.cbrt(),
-                        "cosh" => f_host.cosh(),
-                        "sinh" => f_host.sinh(),
-                        "tan" => f_host.tan(),
-                        "tanh" => f_host.tanh(),
-                        "acos" => f_host.acos(),
-                        "asin" => f_host.asin(),
-                        "atan" => f_host.atan(),
-                        "log1p" => f_host.ln_1p(),
-                        "expm1" => f_host.exp_m1(),
-                        "tgamma" => f_host.gamma(),
-                        "erf" => f_host.erf(),
-                        "erfc" => f_host.erfc(),
-                        _ => bug!(),
-                    };
-                    let res = res.to_soft();
-                    // Apply a relative error of 4ULP to introduce some non-determinism
-                    // simulating imprecise implementations and optimizations.
-                    let res = math::apply_random_float_error_ulp(this, res, 4);
-
-                    // Clamp the result to the guaranteed range of this function according to the C standard,
-                    // if any.
-                    math::clamp_float_value(link_name.as_str(), res)
-                });
-                let res = this.adjust_nan(res, &[f]);
-                this.write_scalar(res, dest)?;
-            }
-            #[rustfmt::skip]
-            | "_hypot"
-            | "hypot"
-            | "atan2"
-            | "fdim"
-            => {
-                let [f1, f2] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
-                let f1 = this.read_scalar(f1)?.to_f64()?;
-                let f2 = this.read_scalar(f2)?.to_f64()?;
-
-                let res = math::fixed_float_value(this, link_name.as_str(), &[f1, f2]).unwrap_or_else(|| {
-                    let res = match link_name.as_str() {
-                        // underscore case for windows, here and below
-                        // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019)
-                        // Using host floats (but it's fine, these operations do not have guaranteed precision).
-                        "_hypot" | "hypot" => f1.to_host().hypot(f2.to_host()).to_soft(),
-                        "atan2" => f1.to_host().atan2(f2.to_host()).to_soft(),
-                        #[allow(deprecated)]
-                        "fdim" => f1.to_host().abs_sub(f2.to_host()).to_soft(),
-                        _ => bug!(),
-                    };
-                    // Apply a relative error of 4ULP to introduce some non-determinism
-                    // simulating imprecise implementations and optimizations.
-                    let res = math::apply_random_float_error_ulp(this, res, 4);
-
-                    // Clamp the result to the guaranteed range of this function according to the C standard,
-                    // if any.
-                    math::clamp_float_value(link_name.as_str(), res)
-                });
-                let res = this.adjust_nan(res, &[f1, f2]);
-                this.write_scalar(res, dest)?;
-            }
-            #[rustfmt::skip]
-            | "_ldexp"
-            | "ldexp"
-            | "scalbn"
-            => {
-                let [x, exp] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
-                // For radix-2 (binary) systems, `ldexp` and `scalbn` are the same.
-                let x = this.read_scalar(x)?.to_f64()?;
-                let exp = this.read_scalar(exp)?.to_i32()?;
-
-                let res = x.scalbn(exp);
-                let res = this.adjust_nan(res, &[x]);
-                this.write_scalar(res, dest)?;
-            }
-            "lgammaf_r" => {
-                let [x, signp] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-                let x = this.read_scalar(x)?.to_f32()?;
-                let signp = this.deref_pointer_as(signp, this.machine.layouts.i32)?;
-
-                // Using host floats (but it's fine, these operations do not have guaranteed precision).
-                let (res, sign) = x.to_host().ln_gamma();
-                this.write_int(sign, &signp)?;
-
-                let res = res.to_soft();
-                // Apply a relative error of 4ULP to introduce some non-determinism
-                // simulating imprecise implementations and optimizations.
-                let res = math::apply_random_float_error_ulp(this, res, 4);
-                // Clamp the result to the guaranteed range of this function according to the C standard,
-                // if any.
-                let res = math::clamp_float_value(link_name.as_str(), res);
-                let res = this.adjust_nan(res, &[x]);
-                this.write_scalar(res, dest)?;
-            }
-            "lgamma_r" => {
-                let [x, signp] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-                let x = this.read_scalar(x)?.to_f64()?;
-                let signp = this.deref_pointer_as(signp, this.machine.layouts.i32)?;
-
-                // Using host floats (but it's fine, these operations do not have guaranteed precision).
-                let (res, sign) = x.to_host().ln_gamma();
-                this.write_int(sign, &signp)?;
-
-                let res = res.to_soft();
-                // Apply a relative error of 4ULP to introduce some non-determinism
-                // simulating imprecise implementations and optimizations.
-                let res = math::apply_random_float_error_ulp(this, res, 4);
-                // Clamp the result to the guaranteed range of this function according to the C standard,
-                // if any.
-                let res = math::clamp_float_value(link_name.as_str(), res);
-                let res = this.adjust_nan(res, &[x]);
-                this.write_scalar(res, dest)?;
-            }
-
             // LLVM intrinsics
             "llvm.prefetch" => {
                 let [p, rw, loc, ty] =
@@ -1109,8 +897,18 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 }
             }
 
-            // Platform-specific shims
-            _ =>
+            // Fallback to shims in submodules.
+            _ => {
+                // Math shims
+                #[expect(irrefutable_let_patterns)]
+                if let res = shims::math::EvalContextExt::emulate_foreign_item_inner(
+                    this, link_name, abi, args, dest,
+                )? && !matches!(res, EmulateItemResult::NotSupported)
+                {
+                    return interp_ok(res);
+                }
+
+                // Platform-specific shims
                 return match this.tcx.sess.target.os.as_ref() {
                     _ if this.target_os_is_unix() =>
                         shims::unix::foreign_items::EvalContextExt::emulate_foreign_item_inner(
@@ -1125,7 +923,8 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                             this, link_name, abi, args, dest,
                         ),
                     _ => interp_ok(EmulateItemResult::NotSupported),
-                },
+                };
+            }
         };
         // We only fall through to here if we did *not* hit the `_` arm above,
         // i.e., if we actually emulated the function with one of the shims.
diff --git a/src/tools/miri/src/shims/math.rs b/src/tools/miri/src/shims/math.rs
new file mode 100644
index 0000000000000..576e76494bcb7
--- /dev/null
+++ b/src/tools/miri/src/shims/math.rs
@@ -0,0 +1,247 @@
+use rustc_abi::CanonAbi;
+use rustc_apfloat::Float;
+use rustc_middle::ty::Ty;
+use rustc_span::Symbol;
+use rustc_target::callconv::FnAbi;
+
+use self::helpers::{ToHost, ToSoft};
+use crate::*;
+
+impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
+pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
+    fn emulate_foreign_item_inner(
+        &mut self,
+        link_name: Symbol,
+        abi: &FnAbi<'tcx, Ty<'tcx>>,
+        args: &[OpTy<'tcx>],
+        dest: &MPlaceTy<'tcx>,
+    ) -> InterpResult<'tcx, EmulateItemResult> {
+        let this = self.eval_context_mut();
+
+        // math functions (note that there are also intrinsics for some other functions)
+        match link_name.as_str() {
+            // math functions (note that there are also intrinsics for some other functions)
+            #[rustfmt::skip]
+            | "cbrtf"
+            | "coshf"
+            | "sinhf"
+            | "tanf"
+            | "tanhf"
+            | "acosf"
+            | "asinf"
+            | "atanf"
+            | "log1pf"
+            | "expm1f"
+            | "tgammaf"
+            | "erff"
+            | "erfcf"
+            => {
+                let [f] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
+                let f = this.read_scalar(f)?.to_f32()?;
+
+                let res = math::fixed_float_value(this, link_name.as_str(), &[f]).unwrap_or_else(|| {
+                    // Using host floats (but it's fine, these operations do not have
+                    // guaranteed precision).
+                    let f_host = f.to_host();
+                    let res = match link_name.as_str() {
+                        "cbrtf" => f_host.cbrt(),
+                        "coshf" => f_host.cosh(),
+                        "sinhf" => f_host.sinh(),
+                        "tanf" => f_host.tan(),
+                        "tanhf" => f_host.tanh(),
+                        "acosf" => f_host.acos(),
+                        "asinf" => f_host.asin(),
+                        "atanf" => f_host.atan(),
+                        "log1pf" => f_host.ln_1p(),
+                        "expm1f" => f_host.exp_m1(),
+                        "tgammaf" => f_host.gamma(),
+                        "erff" => f_host.erf(),
+                        "erfcf" => f_host.erfc(),
+                        _ => bug!(),
+                    };
+                    let res = res.to_soft();
+                    // Apply a relative error of 4ULP to introduce some non-determinism
+                    // simulating imprecise implementations and optimizations.
+                    let res = math::apply_random_float_error_ulp(this, res, 4);
+
+                    // Clamp the result to the guaranteed range of this function according to the C standard,
+                    // if any.
+                    math::clamp_float_value(link_name.as_str(), res)
+                });
+                let res = this.adjust_nan(res, &[f]);
+                this.write_scalar(res, dest)?;
+            }
+            #[rustfmt::skip]
+            | "_hypotf"
+            | "hypotf"
+            | "atan2f"
+            | "fdimf"
+            => {
+                let [f1, f2] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
+                let f1 = this.read_scalar(f1)?.to_f32()?;
+                let f2 = this.read_scalar(f2)?.to_f32()?;
+
+                let res = math::fixed_float_value(this, link_name.as_str(), &[f1, f2])
+                    .unwrap_or_else(|| {
+                        let res = match link_name.as_str() {
+                            // underscore case for windows, here and below
+                            // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019)
+                            // Using host floats (but it's fine, these operations do not have guaranteed precision).
+                            "_hypotf" | "hypotf" => f1.to_host().hypot(f2.to_host()).to_soft(),
+                            "atan2f" => f1.to_host().atan2(f2.to_host()).to_soft(),
+                            #[allow(deprecated)]
+                            "fdimf" => f1.to_host().abs_sub(f2.to_host()).to_soft(),
+                            _ => bug!(),
+                        };
+                        // Apply a relative error of 4ULP to introduce some non-determinism
+                        // simulating imprecise implementations and optimizations.
+                        let res = math::apply_random_float_error_ulp(this, res, 4);
+
+                        // Clamp the result to the guaranteed range of this function according to the C standard,
+                        // if any.
+                        math::clamp_float_value(link_name.as_str(), res)
+                    });
+                let res = this.adjust_nan(res, &[f1, f2]);
+                this.write_scalar(res, dest)?;
+            }
+            #[rustfmt::skip]
+            | "cbrt"
+            | "cosh"
+            | "sinh"
+            | "tan"
+            | "tanh"
+            | "acos"
+            | "asin"
+            | "atan"
+            | "log1p"
+            | "expm1"
+            | "tgamma"
+            | "erf"
+            | "erfc"
+            => {
+                let [f] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
+                let f = this.read_scalar(f)?.to_f64()?;
+
+                let res = math::fixed_float_value(this, link_name.as_str(), &[f]).unwrap_or_else(|| {
+                    // Using host floats (but it's fine, these operations do not have
+                    // guaranteed precision).
+                    let f_host = f.to_host();
+                    let res = match link_name.as_str() {
+                        "cbrt" => f_host.cbrt(),
+                        "cosh" => f_host.cosh(),
+                        "sinh" => f_host.sinh(),
+                        "tan" => f_host.tan(),
+                        "tanh" => f_host.tanh(),
+                        "acos" => f_host.acos(),
+                        "asin" => f_host.asin(),
+                        "atan" => f_host.atan(),
+                        "log1p" => f_host.ln_1p(),
+                        "expm1" => f_host.exp_m1(),
+                        "tgamma" => f_host.gamma(),
+                        "erf" => f_host.erf(),
+                        "erfc" => f_host.erfc(),
+                        _ => bug!(),
+                    };
+                    let res = res.to_soft();
+                    // Apply a relative error of 4ULP to introduce some non-determinism
+                    // simulating imprecise implementations and optimizations.
+                    let res = math::apply_random_float_error_ulp(this, res, 4);
+
+                    // Clamp the result to the guaranteed range of this function according to the C standard,
+                    // if any.
+                    math::clamp_float_value(link_name.as_str(), res)
+                });
+                let res = this.adjust_nan(res, &[f]);
+                this.write_scalar(res, dest)?;
+            }
+            #[rustfmt::skip]
+            | "_hypot"
+            | "hypot"
+            | "atan2"
+            | "fdim"
+            => {
+                let [f1, f2] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
+                let f1 = this.read_scalar(f1)?.to_f64()?;
+                let f2 = this.read_scalar(f2)?.to_f64()?;
+
+                let res = math::fixed_float_value(this, link_name.as_str(), &[f1, f2]).unwrap_or_else(|| {
+                    let res = match link_name.as_str() {
+                        // underscore case for windows, here and below
+                        // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019)
+                        // Using host floats (but it's fine, these operations do not have guaranteed precision).
+                        "_hypot" | "hypot" => f1.to_host().hypot(f2.to_host()).to_soft(),
+                        "atan2" => f1.to_host().atan2(f2.to_host()).to_soft(),
+                        #[allow(deprecated)]
+                        "fdim" => f1.to_host().abs_sub(f2.to_host()).to_soft(),
+                        _ => bug!(),
+                    };
+                    // Apply a relative error of 4ULP to introduce some non-determinism
+                    // simulating imprecise implementations and optimizations.
+                    let res = math::apply_random_float_error_ulp(this, res, 4);
+
+                    // Clamp the result to the guaranteed range of this function according to the C standard,
+                    // if any.
+                    math::clamp_float_value(link_name.as_str(), res)
+                });
+                let res = this.adjust_nan(res, &[f1, f2]);
+                this.write_scalar(res, dest)?;
+            }
+            #[rustfmt::skip]
+            | "_ldexp"
+            | "ldexp"
+            | "scalbn"
+            => {
+                let [x, exp] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
+                // For radix-2 (binary) systems, `ldexp` and `scalbn` are the same.
+                let x = this.read_scalar(x)?.to_f64()?;
+                let exp = this.read_scalar(exp)?.to_i32()?;
+
+                let res = x.scalbn(exp);
+                let res = this.adjust_nan(res, &[x]);
+                this.write_scalar(res, dest)?;
+            }
+            "lgammaf_r" => {
+                let [x, signp] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
+                let x = this.read_scalar(x)?.to_f32()?;
+                let signp = this.deref_pointer_as(signp, this.machine.layouts.i32)?;
+
+                // Using host floats (but it's fine, these operations do not have guaranteed precision).
+                let (res, sign) = x.to_host().ln_gamma();
+                this.write_int(sign, &signp)?;
+
+                let res = res.to_soft();
+                // Apply a relative error of 4ULP to introduce some non-determinism
+                // simulating imprecise implementations and optimizations.
+                let res = math::apply_random_float_error_ulp(this, res, 4);
+                // Clamp the result to the guaranteed range of this function according to the C standard,
+                // if any.
+                let res = math::clamp_float_value(link_name.as_str(), res);
+                let res = this.adjust_nan(res, &[x]);
+                this.write_scalar(res, dest)?;
+            }
+            "lgamma_r" => {
+                let [x, signp] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
+                let x = this.read_scalar(x)?.to_f64()?;
+                let signp = this.deref_pointer_as(signp, this.machine.layouts.i32)?;
+
+                // Using host floats (but it's fine, these operations do not have guaranteed precision).
+                let (res, sign) = x.to_host().ln_gamma();
+                this.write_int(sign, &signp)?;
+
+                let res = res.to_soft();
+                // Apply a relative error of 4ULP to introduce some non-determinism
+                // simulating imprecise implementations and optimizations.
+                let res = math::apply_random_float_error_ulp(this, res, 4);
+                // Clamp the result to the guaranteed range of this function according to the C standard,
+                // if any.
+                let res = math::clamp_float_value(link_name.as_str(), res);
+                let res = this.adjust_nan(res, &[x]);
+                this.write_scalar(res, dest)?;
+            }
+
+            _ => return interp_ok(EmulateItemResult::NotSupported),
+        }
+
+        interp_ok(EmulateItemResult::NeedsReturn)
+    }
+}
diff --git a/src/tools/miri/src/shims/mod.rs b/src/tools/miri/src/shims/mod.rs
index 7f594d4fdd6bc..7f7bc3b1cf720 100644
--- a/src/tools/miri/src/shims/mod.rs
+++ b/src/tools/miri/src/shims/mod.rs
@@ -4,6 +4,7 @@ mod aarch64;
 mod alloc;
 mod backtrace;
 mod files;
+mod math;
 #[cfg(all(unix, feature = "native-lib"))]
 mod native_lib;
 mod unix;
diff --git a/src/tools/miri/src/shims/unix/freebsd/foreign_items.rs b/src/tools/miri/src/shims/unix/freebsd/foreign_items.rs
index 9e247053fbcdb..413df85ee3aae 100644
--- a/src/tools/miri/src/shims/unix/freebsd/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/freebsd/foreign_items.rs
@@ -159,7 +159,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 let result = this.macos_fbsd_readdir_r(dirp, entry, result)?;
                 this.write_scalar(result, dest)?;
             }
-
+            "readdir" | "readdir@FBSD_1.0" => {
+                let [dirp] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
+                let result = this.readdir64("dirent", dirp)?;
+                this.write_scalar(result, dest)?;
+            }
             // Miscellaneous
             "__error" => {
                 let [] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
diff --git a/src/tools/miri/src/shims/unix/fs.rs b/src/tools/miri/src/shims/unix/fs.rs
index f9bcacf64c412..22bec9bd83941 100644
--- a/src/tools/miri/src/shims/unix/fs.rs
+++ b/src/tools/miri/src/shims/unix/fs.rs
@@ -900,14 +900,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         }
     }
 
-    fn linux_solarish_readdir64(
-        &mut self,
-        dirent_type: &str,
-        dirp_op: &OpTy<'tcx>,
-    ) -> InterpResult<'tcx, Scalar> {
+    fn readdir64(&mut self, dirent_type: &str, dirp_op: &OpTy<'tcx>) -> InterpResult<'tcx, Scalar> {
         let this = self.eval_context_mut();
 
-        if !matches!(&*this.tcx.sess.target.os, "linux" | "solaris" | "illumos") {
+        if !matches!(&*this.tcx.sess.target.os, "linux" | "solaris" | "illumos" | "freebsd") {
             panic!("`linux_solaris_readdir64` should not be called on {}", this.tcx.sess.target.os);
         }
 
@@ -926,6 +922,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
         let entry = match open_dir.read_dir.next() {
             Some(Ok(dir_entry)) => {
+                // If the host is a Unix system, fill in the inode number with its real value.
+                // If not, use 0 as a fallback value.
+                #[cfg(unix)]
+                let ino = std::os::unix::fs::DirEntryExt::ino(&dir_entry);
+                #[cfg(not(unix))]
+                let ino = 0u64;
+
                 // Write the directory entry into a newly allocated buffer.
                 // The name is written with write_bytes, while the rest of the
                 // dirent64 (or dirent) struct is written using write_int_fields.
@@ -947,6 +950,15 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 //     pub d_reclen: c_ushort,
                 //     pub d_name: [c_char; 3],
                 // }
+                //
+                // On FreeBSD:
+                // pub struct dirent{
+                //     pub d_fileno: uint32_t,
+                //     pub d_reclen: uint16_t,
+                //     pub d_type: uint8_t,
+                //     pub d_namlen: uint8_t,
+                //     pub d_name: [c_char; 256]
+                // }
 
                 let mut name = dir_entry.file_name(); // not a Path as there are no separators!
                 name.push("\0"); // Add a NUL terminator
@@ -965,31 +977,35 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     MiriMemoryKind::Runtime.into(),
                     AllocInit::Uninit,
                 )?;
-                let entry: Pointer = entry.into();
+                let entry = this.ptr_to_mplace(entry.into(), dirent_layout);
 
-                // If the host is a Unix system, fill in the inode number with its real value.
-                // If not, use 0 as a fallback value.
-                #[cfg(unix)]
-                let ino = std::os::unix::fs::DirEntryExt::ino(&dir_entry);
-                #[cfg(not(unix))]
-                let ino = 0u64;
-
-                let file_type = this.file_type_to_d_type(dir_entry.file_type())?;
+                // Write common fields
+                let ino_name =
+                    if this.tcx.sess.target.os == "freebsd" { "d_fileno" } else { "d_ino" };
                 this.write_int_fields_named(
-                    &[("d_ino", ino.into()), ("d_off", 0), ("d_reclen", size.into())],
-                    &this.ptr_to_mplace(entry, dirent_layout),
+                    &[(ino_name, ino.into()), ("d_reclen", size.into())],
+                    &entry,
                 )?;
 
-                if let Some(d_type) = this
-                    .try_project_field_named(&this.ptr_to_mplace(entry, dirent_layout), "d_type")?
-                {
+                // Write "optional" fields.
+                if let Some(d_off) = this.try_project_field_named(&entry, "d_off")? {
+                    this.write_null(&d_off)?;
+                }
+
+                if let Some(d_namlen) = this.try_project_field_named(&entry, "d_namlen")? {
+                    this.write_int(name_len.strict_sub(1), &d_namlen)?;
+                }
+
+                let file_type = this.file_type_to_d_type(dir_entry.file_type())?;
+                if let Some(d_type) = this.try_project_field_named(&entry, "d_type")? {
                     this.write_int(file_type, &d_type)?;
                 }
 
-                let name_ptr = entry.wrapping_offset(Size::from_bytes(d_name_offset), this);
+                // The name is not a normal field, we already computed the offset above.
+                let name_ptr = entry.ptr().wrapping_offset(Size::from_bytes(d_name_offset), this);
                 this.write_bytes_ptr(name_ptr, name_bytes.iter().copied())?;
 
-                Some(entry)
+                Some(entry.ptr())
             }
             None => {
                 // end of stream: return NULL
diff --git a/src/tools/miri/src/shims/unix/linux/foreign_items.rs b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
index e7e0c3b6ecd96..79052698f4bad 100644
--- a/src/tools/miri/src/shims/unix/linux/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
@@ -38,7 +38,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             // File related shims
             "readdir64" => {
                 let [dirp] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-                let result = this.linux_solarish_readdir64("dirent64", dirp)?;
+                let result = this.readdir64("dirent64", dirp)?;
                 this.write_scalar(result, dest)?;
             }
             "sync_file_range" => {
diff --git a/src/tools/miri/src/shims/unix/linux_like/epoll.rs b/src/tools/miri/src/shims/unix/linux_like/epoll.rs
index d460abc783dd5..3133c149293db 100644
--- a/src/tools/miri/src/shims/unix/linux_like/epoll.rs
+++ b/src/tools/miri/src/shims/unix/linux_like/epoll.rs
@@ -608,7 +608,7 @@ fn check_and_update_one_event_interest<'tcx>(
         // If we are tracking data races, remember the current clock so we can sync with it later.
         ecx.release_clock(|clock| {
             event_instance.clock.clone_from(clock);
-        });
+        })?;
         // Triggers the notification by inserting it to the ready list.
         ready_list.insert(epoll_key, event_instance);
         interp_ok(true)
@@ -639,7 +639,7 @@ fn return_ready_list<'tcx>(
                 &des.1,
             )?;
             // Synchronize waking thread with the event of interest.
-            ecx.acquire_clock(&epoll_event_instance.clock);
+            ecx.acquire_clock(&epoll_event_instance.clock)?;
 
             num_of_events = num_of_events.strict_add(1);
         } else {
diff --git a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs
index ee7deb8d38308..5d4f207d365e1 100644
--- a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs
+++ b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs
@@ -199,7 +199,7 @@ fn eventfd_write<'tcx>(
             // Future `read` calls will synchronize with this write, so update the FD clock.
             ecx.release_clock(|clock| {
                 eventfd.clock.borrow_mut().join(clock);
-            });
+            })?;
 
             // Store new counter value.
             eventfd.counter.set(new_count);
@@ -294,7 +294,7 @@ fn eventfd_read<'tcx>(
         );
     } else {
         // Synchronize with all prior `write` calls to this FD.
-        ecx.acquire_clock(&eventfd.clock.borrow());
+        ecx.acquire_clock(&eventfd.clock.borrow())?;
 
         // Return old counter value into user-space buffer.
         ecx.write_int(counter, &buf_place)?;
diff --git a/src/tools/miri/src/shims/unix/macos/sync.rs b/src/tools/miri/src/shims/unix/macos/sync.rs
index 05616dd5a4287..c4ddff7805ed2 100644
--- a/src/tools/miri/src/shims/unix/macos/sync.rs
+++ b/src/tools/miri/src/shims/unix/macos/sync.rs
@@ -296,7 +296,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
             this.mutex_enqueue_and_block(mutex_ref, None);
         } else {
-            this.mutex_lock(&mutex_ref);
+            this.mutex_lock(&mutex_ref)?;
         }
 
         interp_ok(())
@@ -321,7 +321,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             // reentrancy.
             this.write_scalar(Scalar::from_bool(false), dest)?;
         } else {
-            this.mutex_lock(&mutex_ref);
+            this.mutex_lock(&mutex_ref)?;
             this.write_scalar(Scalar::from_bool(true), dest)?;
         }
 
diff --git a/src/tools/miri/src/shims/unix/solarish/foreign_items.rs b/src/tools/miri/src/shims/unix/solarish/foreign_items.rs
index d7033a65fe22b..31269bf00c948 100644
--- a/src/tools/miri/src/shims/unix/solarish/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/solarish/foreign_items.rs
@@ -106,7 +106,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             }
             "readdir" => {
                 let [dirp] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-                let result = this.linux_solarish_readdir64("dirent", dirp)?;
+                let result = this.readdir64("dirent", dirp)?;
                 this.write_scalar(result, dest)?;
             }
 
diff --git a/src/tools/miri/src/shims/unix/sync.rs b/src/tools/miri/src/shims/unix/sync.rs
index 5ad4fd501a607..cefd8b81ac180 100644
--- a/src/tools/miri/src/shims/unix/sync.rs
+++ b/src/tools/miri/src/shims/unix/sync.rs
@@ -498,14 +498,14 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     MutexKind::Normal => throw_machine_stop!(TerminationInfo::Deadlock),
                     MutexKind::ErrorCheck => this.eval_libc_i32("EDEADLK"),
                     MutexKind::Recursive => {
-                        this.mutex_lock(&mutex.mutex_ref);
+                        this.mutex_lock(&mutex.mutex_ref)?;
                         0
                     }
                 }
             }
         } else {
             // The mutex is unlocked. Let's lock it.
-            this.mutex_lock(&mutex.mutex_ref);
+            this.mutex_lock(&mutex.mutex_ref)?;
             0
         };
         this.write_scalar(Scalar::from_i32(ret), dest)?;
@@ -525,14 +525,14 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     MutexKind::Default | MutexKind::Normal | MutexKind::ErrorCheck =>
                         this.eval_libc_i32("EBUSY"),
                     MutexKind::Recursive => {
-                        this.mutex_lock(&mutex.mutex_ref);
+                        this.mutex_lock(&mutex.mutex_ref)?;
                         0
                     }
                 }
             }
         } else {
             // The mutex is unlocked. Let's lock it.
-            this.mutex_lock(&mutex.mutex_ref);
+            this.mutex_lock(&mutex.mutex_ref)?;
             0
         }))
     }
@@ -600,7 +600,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 dest.clone(),
             );
         } else {
-            this.rwlock_reader_lock(&rwlock.rwlock_ref);
+            this.rwlock_reader_lock(&rwlock.rwlock_ref)?;
             this.write_null(dest)?;
         }
 
@@ -615,7 +615,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         if rwlock.rwlock_ref.is_write_locked() {
             interp_ok(Scalar::from_i32(this.eval_libc_i32("EBUSY")))
         } else {
-            this.rwlock_reader_lock(&rwlock.rwlock_ref);
+            this.rwlock_reader_lock(&rwlock.rwlock_ref)?;
             interp_ok(Scalar::from_i32(0))
         }
     }
@@ -648,7 +648,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 dest.clone(),
             );
         } else {
-            this.rwlock_writer_lock(&rwlock.rwlock_ref);
+            this.rwlock_writer_lock(&rwlock.rwlock_ref)?;
             this.write_null(dest)?;
         }
 
@@ -663,7 +663,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         if rwlock.rwlock_ref.is_locked() {
             interp_ok(Scalar::from_i32(this.eval_libc_i32("EBUSY")))
         } else {
-            this.rwlock_writer_lock(&rwlock.rwlock_ref);
+            this.rwlock_writer_lock(&rwlock.rwlock_ref)?;
             interp_ok(Scalar::from_i32(0))
         }
     }
diff --git a/src/tools/miri/src/shims/unix/unnamed_socket.rs b/src/tools/miri/src/shims/unix/unnamed_socket.rs
index 7eb82851033d8..81703d6e176bf 100644
--- a/src/tools/miri/src/shims/unix/unnamed_socket.rs
+++ b/src/tools/miri/src/shims/unix/unnamed_socket.rs
@@ -259,7 +259,7 @@ fn anonsocket_write<'tcx>(
         // Remember this clock so `read` can synchronize with us.
         ecx.release_clock(|clock| {
             writebuf.clock.join(clock);
-        });
+        })?;
         // Do full write / partial write based on the space available.
         let write_size = len.min(available_space);
         let actual_write_size = ecx.write_to_host(&mut writebuf.buf, write_size, ptr)?.unwrap();
@@ -345,7 +345,7 @@ fn anonsocket_read<'tcx>(
         // Synchronize with all previous writes to this buffer.
         // FIXME: this over-synchronizes; a more precise approach would be to
         // only sync with the writes whose data we will read.
-        ecx.acquire_clock(&readbuf.clock);
+        ecx.acquire_clock(&readbuf.clock)?;
 
         // Do full read / partial read based on the space available.
         // Conveniently, `read` exists on `VecDeque` and has exactly the desired behavior.
diff --git a/src/tools/miri/src/shims/wasi/foreign_items.rs b/src/tools/miri/src/shims/wasi/foreign_items.rs
index bfcdbd8130d84..ffc02dc986f9a 100644
--- a/src/tools/miri/src/shims/wasi/foreign_items.rs
+++ b/src/tools/miri/src/shims/wasi/foreign_items.rs
@@ -35,6 +35,74 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 this.write_pointer(res, dest)?;
             }
 
+            // Standard input/output
+            // FIXME: These shims are hacks that just get basic stdout/stderr working. We can't
+            // constrain them to "std" since std itself uses the wasi crate for this.
+            "get-stdout" => {
+                let [] =
+                    this.check_shim_sig(shim_sig!(extern "C" fn() -> i32), link_name, abi, args)?;
+                this.write_scalar(Scalar::from_i32(1), dest)?; // POSIX FD number for stdout
+            }
+            "get-stderr" => {
+                let [] =
+                    this.check_shim_sig(shim_sig!(extern "C" fn() -> i32), link_name, abi, args)?;
+                this.write_scalar(Scalar::from_i32(2), dest)?; // POSIX FD number for stderr
+            }
+            "[resource-drop]output-stream" => {
+                let [handle] =
+                    this.check_shim_sig(shim_sig!(extern "C" fn(i32) -> ()), link_name, abi, args)?;
+                let handle = this.read_scalar(handle)?.to_i32()?;
+
+                if !(handle == 1 || handle == 2) {
+                    throw_unsup_format!("wasm output-stream: unsupported handle");
+                }
+                // We don't actually close these FDs, so this is a NOP.
+            }
+            "[method]output-stream.blocking-write-and-flush" => {
+                let [handle, buf, len, ret_area] = this.check_shim_sig(
+                    shim_sig!(extern "C" fn(i32, *mut _, usize, *mut _) -> ()),
+                    link_name,
+                    abi,
+                    args,
+                )?;
+                let handle = this.read_scalar(handle)?.to_i32()?;
+                let buf = this.read_pointer(buf)?;
+                let len = this.read_target_usize(len)?;
+                let ret_area = this.read_pointer(ret_area)?;
+
+                if len > 4096 {
+                    throw_unsup_format!(
+                        "wasm output-stream.blocking-write-and-flush: buffer too big"
+                    );
+                }
+                let len = usize::try_from(len).unwrap();
+                let Some(fd) = this.machine.fds.get(handle) else {
+                    throw_unsup_format!(
+                        "wasm output-stream.blocking-write-and-flush: unsupported handle"
+                    );
+                };
+                fd.write(
+                    this.machine.communicate(),
+                    buf,
+                    len,
+                    this,
+                    callback!(
+                        @capture<'tcx> {
+                            len: usize,
+                            ret_area: Pointer,
+                        }
+                        |this, result: Result<usize, IoError>| {
+                            if !matches!(result, Ok(l) if l == len) {
+                                throw_unsup_format!("wasm output-stream.blocking-write-and-flush: returning errors is not supported");
+                            }
+                            // 0 in the first byte of the ret_area indicates success.
+                            let ret = this.ptr_to_mplace(ret_area, this.machine.layouts.u8);
+                            this.write_null(&ret)?;
+                            interp_ok(())
+                    }),
+                )?;
+            }
+
             _ => return interp_ok(EmulateItemResult::NotSupported),
         }
         interp_ok(EmulateItemResult::NeedsReturn)
diff --git a/src/tools/miri/src/shims/windows/foreign_items.rs b/src/tools/miri/src/shims/windows/foreign_items.rs
index 7b13f1d908073..c3ca01bbf3403 100644
--- a/src/tools/miri/src/shims/windows/foreign_items.rs
+++ b/src/tools/miri/src/shims/windows/foreign_items.rs
@@ -820,6 +820,22 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 this.write_int(length.strict_sub(1), dest)?;
             }
 
+            "_Unwind_RaiseException" => {
+                // This is not formally part of POSIX, but it is very wide-spread on POSIX systems.
+                // It was originally specified as part of the Itanium C++ ABI:
+                // https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html#base-throw.
+                // MinGW implements _Unwind_RaiseException on top of SEH exceptions.
+                if this.tcx.sess.target.env != "gnu" {
+                    throw_unsup_format!(
+                        "`_Unwind_RaiseException` is not supported on non-MinGW Windows",
+                    );
+                }
+                // This function looks and behaves excatly like miri_start_unwind.
+                let [payload] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
+                this.handle_miri_start_unwind(payload)?;
+                return interp_ok(EmulateItemResult::NeedsUnwind);
+            }
+
             // Incomplete shims that we "stub out" just to get pre-main initialization code to work.
             // These shims are enabled only when the caller is in the standard library.
             "GetProcessHeap" if this.frame_in_std() => {
@@ -880,22 +896,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 this.write_null(dest)?;
             }
 
-            "_Unwind_RaiseException" => {
-                // This is not formally part of POSIX, but it is very wide-spread on POSIX systems.
-                // It was originally specified as part of the Itanium C++ ABI:
-                // https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html#base-throw.
-                // MinGW implements _Unwind_RaiseException on top of SEH exceptions.
-                if this.tcx.sess.target.env != "gnu" {
-                    throw_unsup_format!(
-                        "`_Unwind_RaiseException` is not supported on non-MinGW Windows",
-                    );
-                }
-                // This function looks and behaves excatly like miri_start_unwind.
-                let [payload] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-                this.handle_miri_start_unwind(payload)?;
-                return interp_ok(EmulateItemResult::NeedsUnwind);
-            }
-
             _ => return interp_ok(EmulateItemResult::NotSupported),
         }
 
diff --git a/src/tools/miri/src/shims/windows/sync.rs b/src/tools/miri/src/shims/windows/sync.rs
index 9165e76b63d14..a893999ef8e52 100644
--- a/src/tools/miri/src/shims/windows/sync.rs
+++ b/src/tools/miri/src/shims/windows/sync.rs
@@ -60,7 +60,7 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 true
             }
             InitOnceStatus::Complete => {
-                this.init_once_observe_completed(init_once_ref);
+                this.init_once_observe_completed(init_once_ref)?;
                 this.write_scalar(this.eval_windows("c", "FALSE"), pending_place)?;
                 this.write_scalar(this.eval_windows("c", "TRUE"), dest)?;
                 true
diff --git a/src/tools/miri/tests/fail/both_borrows/zero-sized-protected.rs b/src/tools/miri/tests/fail/both_borrows/zero-sized-protected.rs
deleted file mode 100644
index df9a73a444eda..0000000000000
--- a/src/tools/miri/tests/fail/both_borrows/zero-sized-protected.rs
+++ /dev/null
@@ -1,18 +0,0 @@
-//@revisions: stack tree
-//@[tree]compile-flags: -Zmiri-tree-borrows
-use std::alloc::{Layout, alloc, dealloc};
-
-// `x` is strongly protected but covers zero bytes.
-// Let's see if deallocating the allocation x points to is UB:
-// in TB, it is UB, but in SB it is not.
-fn test(_x: &mut (), ptr: *mut u8, l: Layout) {
-    unsafe { dealloc(ptr, l) }; //~[tree] ERROR: /deallocation .* is forbidden/
-}
-
-fn main() {
-    let l = Layout::from_size_align(1, 1).unwrap();
-    let ptr = unsafe { alloc(l) };
-    unsafe { test(&mut *ptr.cast::<()>(), ptr, l) };
-    // In SB the test would pass if it weren't for this line.
-    unsafe { std::hint::unreachable_unchecked() }; //~[stack] ERROR: unreachable
-}
diff --git a/src/tools/miri/tests/fail/both_borrows/zero-sized-protected.stack.stderr b/src/tools/miri/tests/fail/both_borrows/zero-sized-protected.stack.stderr
deleted file mode 100644
index 3e4a7ccac363d..0000000000000
--- a/src/tools/miri/tests/fail/both_borrows/zero-sized-protected.stack.stderr
+++ /dev/null
@@ -1,15 +0,0 @@
-error: Undefined Behavior: entering unreachable code
-  --> tests/fail/both_borrows/zero-sized-protected.rs:LL:CC
-   |
-LL |     unsafe { std::hint::unreachable_unchecked() };
-   |              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Undefined Behavior occurred here
-   |
-   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
-   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
-   = note: BACKTRACE:
-   = note: inside `main` at tests/fail/both_borrows/zero-sized-protected.rs:LL:CC
-
-note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
-
-error: aborting due to 1 previous error
-
diff --git a/src/tools/miri/tests/fail/both_borrows/zero-sized-protected.tree.stderr b/src/tools/miri/tests/fail/both_borrows/zero-sized-protected.tree.stderr
deleted file mode 100644
index 66a7d7794e968..0000000000000
--- a/src/tools/miri/tests/fail/both_borrows/zero-sized-protected.tree.stderr
+++ /dev/null
@@ -1,32 +0,0 @@
-error: Undefined Behavior: deallocation through <TAG> (root of the allocation) at ALLOC[0x0] is forbidden
-  --> tests/fail/both_borrows/zero-sized-protected.rs:LL:CC
-   |
-LL |     unsafe { dealloc(ptr, l) };
-   |              ^^^^^^^^^^^^^^^ Undefined Behavior occurred here
-   |
-   = help: this indicates a potential bug in the program: it performed an invalid operation, but the Tree Borrows rules it violated are still experimental
-   = help: see https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/tree-borrows.md for further information
-   = help: the allocation of the accessed tag <TAG> (root of the allocation) also contains the strongly protected tag <TAG>
-   = help: the strongly protected tag <TAG> disallows deallocations
-help: the accessed tag <TAG> was created here
-  --> tests/fail/both_borrows/zero-sized-protected.rs:LL:CC
-   |
-LL |     let ptr = unsafe { alloc(l) };
-   |                        ^^^^^^^^
-help: the strongly protected tag <TAG> was created here, in the initial state Reserved
-  --> tests/fail/both_borrows/zero-sized-protected.rs:LL:CC
-   |
-LL | fn test(_x: &mut (), ptr: *mut u8, l: Layout) {
-   |         ^^
-   = note: BACKTRACE (of the first span):
-   = note: inside `test` at tests/fail/both_borrows/zero-sized-protected.rs:LL:CC
-note: inside `main`
-  --> tests/fail/both_borrows/zero-sized-protected.rs:LL:CC
-   |
-LL |     unsafe { test(&mut *ptr.cast::<()>(), ptr, l) };
-   |              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
-
-error: aborting due to 1 previous error
-
diff --git a/src/tools/miri/tests/genmc/fail/data_race/mpu2_rels_rlx.rs b/src/tools/miri/tests/genmc/fail/data_race/mpu2_rels_rlx.rs
new file mode 100644
index 0000000000000..32954a643b34b
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/data_race/mpu2_rels_rlx.rs
@@ -0,0 +1,45 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's test `wrong/racy/MPU2+rels+rlx`.
+// Test if Miri with GenMC can detect the data race on `X`.
+// The data race only occurs if thread 1 finishes, then threads 3 and 4 run, then thread 2.
+//
+// This data race is hard to detect for Miri without GenMC, requiring -Zmiri-many-seeds=0..1024 at the time this test was created.
+
+// FIXME(genmc): once Miri-GenMC error reporting is improved, ensure that it correctly points to the two spans involved in the data race.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering::*;
+
+use genmc::spawn_pthread_closure;
+static mut X: u64 = 0;
+static Y: AtomicUsize = AtomicUsize::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        let _t1 = spawn_pthread_closure(|| {
+            X = 1;
+            Y.store(0, Release);
+            Y.store(1, Relaxed);
+        });
+        let _t2 = spawn_pthread_closure(|| {
+            if Y.load(Relaxed) > 2 {
+                X = 2; //~ ERROR: Undefined Behavior: Non-atomic race
+            }
+        });
+        let _t3 = spawn_pthread_closure(|| {
+            let _ = Y.compare_exchange(2, 3, Relaxed, Relaxed);
+        });
+
+        let _t4 = spawn_pthread_closure(|| {
+            let _ = Y.compare_exchange(1, 2, Relaxed, Relaxed);
+        });
+    }
+    0
+}
diff --git a/src/tools/miri/tests/genmc/fail/data_race/mpu2_rels_rlx.stderr b/src/tools/miri/tests/genmc/fail/data_race/mpu2_rels_rlx.stderr
new file mode 100644
index 0000000000000..1ffb55f22e6e0
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/data_race/mpu2_rels_rlx.stderr
@@ -0,0 +1,22 @@
+Running GenMC Verification...
+error: Undefined Behavior: Non-atomic race
+  --> tests/genmc/fail/data_race/mpu2_rels_rlx.rs:LL:CC
+   |
+LL |                 X = 2;
+   |                 ^^^^^ Undefined Behavior occurred here
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE on thread `unnamed-ID`:
+   = note: inside closure at tests/genmc/fail/data_race/mpu2_rels_rlx.rs:LL:CC
+   = note: inside `<std::boxed::Box<{closure@tests/genmc/fail/data_race/mpu2_rels_rlx.rs:LL:CC}> as std::ops::FnOnce<()>>::call_once` at RUSTLIB/alloc/src/boxed.rs:LL:CC
+note: inside `genmc::spawn_pthread_closure::thread_func::<{closure@tests/genmc/fail/data_race/mpu2_rels_rlx.rs:LL:CC}>`
+  --> tests/genmc/fail/data_race/../../../utils/genmc.rs:LL:CC
+   |
+LL |         f();
+   |         ^^^
+
+note: add `-Zmiri-genmc-print-genmc-output` to MIRIFLAGS to see the detailed GenMC error report
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rel_rlx.stderr b/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rel_rlx.stderr
new file mode 100644
index 0000000000000..b0037c211c69d
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rel_rlx.stderr
@@ -0,0 +1,22 @@
+Running GenMC Verification...
+error: Undefined Behavior: Non-atomic race
+  --> tests/genmc/fail/data_race/weak_orderings.rs:LL:CC
+   |
+LL |                 X = 2;
+   |                 ^^^^^ Undefined Behavior occurred here
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE on thread `unnamed-ID`:
+   = note: inside closure at tests/genmc/fail/data_race/weak_orderings.rs:LL:CC
+   = note: inside `<std::boxed::Box<{closure@tests/genmc/fail/data_race/weak_orderings.rs:LL:CC}> as std::ops::FnOnce<()>>::call_once` at RUSTLIB/alloc/src/boxed.rs:LL:CC
+note: inside `genmc::spawn_pthread_closure::thread_func::<{closure@tests/genmc/fail/data_race/weak_orderings.rs:LL:CC}>`
+  --> tests/genmc/fail/data_race/../../../utils/genmc.rs:LL:CC
+   |
+LL |         f();
+   |         ^^^
+
+note: add `-Zmiri-genmc-print-genmc-output` to MIRIFLAGS to see the detailed GenMC error report
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rlx_acq.stderr b/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rlx_acq.stderr
new file mode 100644
index 0000000000000..b0037c211c69d
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rlx_acq.stderr
@@ -0,0 +1,22 @@
+Running GenMC Verification...
+error: Undefined Behavior: Non-atomic race
+  --> tests/genmc/fail/data_race/weak_orderings.rs:LL:CC
+   |
+LL |                 X = 2;
+   |                 ^^^^^ Undefined Behavior occurred here
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE on thread `unnamed-ID`:
+   = note: inside closure at tests/genmc/fail/data_race/weak_orderings.rs:LL:CC
+   = note: inside `<std::boxed::Box<{closure@tests/genmc/fail/data_race/weak_orderings.rs:LL:CC}> as std::ops::FnOnce<()>>::call_once` at RUSTLIB/alloc/src/boxed.rs:LL:CC
+note: inside `genmc::spawn_pthread_closure::thread_func::<{closure@tests/genmc/fail/data_race/weak_orderings.rs:LL:CC}>`
+  --> tests/genmc/fail/data_race/../../../utils/genmc.rs:LL:CC
+   |
+LL |         f();
+   |         ^^^
+
+note: add `-Zmiri-genmc-print-genmc-output` to MIRIFLAGS to see the detailed GenMC error report
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rlx_rlx.stderr b/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rlx_rlx.stderr
new file mode 100644
index 0000000000000..b0037c211c69d
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rlx_rlx.stderr
@@ -0,0 +1,22 @@
+Running GenMC Verification...
+error: Undefined Behavior: Non-atomic race
+  --> tests/genmc/fail/data_race/weak_orderings.rs:LL:CC
+   |
+LL |                 X = 2;
+   |                 ^^^^^ Undefined Behavior occurred here
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE on thread `unnamed-ID`:
+   = note: inside closure at tests/genmc/fail/data_race/weak_orderings.rs:LL:CC
+   = note: inside `<std::boxed::Box<{closure@tests/genmc/fail/data_race/weak_orderings.rs:LL:CC}> as std::ops::FnOnce<()>>::call_once` at RUSTLIB/alloc/src/boxed.rs:LL:CC
+note: inside `genmc::spawn_pthread_closure::thread_func::<{closure@tests/genmc/fail/data_race/weak_orderings.rs:LL:CC}>`
+  --> tests/genmc/fail/data_race/../../../utils/genmc.rs:LL:CC
+   |
+LL |         f();
+   |         ^^^
+
+note: add `-Zmiri-genmc-print-genmc-output` to MIRIFLAGS to see the detailed GenMC error report
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rs b/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rs
new file mode 100644
index 0000000000000..1568a302f85ad
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/data_race/weak_orderings.rs
@@ -0,0 +1,40 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+//@revisions: rlx_rlx rlx_acq rel_rlx
+
+// Translated from GenMC's test `wrong/racy/MP+rel+rlx`, `MP+rlx+acq` and `MP+rlx+rlx`.
+// Test if Miri with GenMC can detect the data race on `X`.
+// Relaxed orderings on an atomic store-load pair should not synchronize the non-atomic write to X, leading to a data race.
+
+// FIXME(genmc): once Miri-GenMC error reporting is improved, ensure that it correctly points to the two spans involved in the data race.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering::{self, *};
+
+use genmc::spawn_pthread_closure;
+
+static mut X: u64 = 0;
+static Y: AtomicUsize = AtomicUsize::new(0);
+
+const STORE_ORD: Ordering = if cfg!(rel_rlx) { Release } else { Relaxed };
+const LOAD_ORD: Ordering = if cfg!(rlx_acq) { Acquire } else { Relaxed };
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        spawn_pthread_closure(|| {
+            X = 1;
+            Y.store(1, STORE_ORD);
+        });
+        spawn_pthread_closure(|| {
+            if Y.load(LOAD_ORD) != 0 {
+                X = 2; //~ ERROR: Undefined Behavior: Non-atomic race
+            }
+        });
+    }
+    0
+}
diff --git a/src/tools/miri/tests/genmc/fail/loom/buggy_inc.rs b/src/tools/miri/tests/genmc/fail/loom/buggy_inc.rs
new file mode 100644
index 0000000000000..508eae756f320
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/loom/buggy_inc.rs
@@ -0,0 +1,66 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: Copyright (c) 2019 Carl Lerche
+
+// This is the test `checks_fail` from loom/test/smoke.rs adapted for Miri-GenMC.
+// https://github.com/tokio-rs/loom/blob/dbf32b04bae821c64be44405a0bb72ca08741558/tests/smoke.rs
+
+// This test checks that an incorrect implementation of an incrementing counter is detected.
+// The counter behaves wrong if two threads try to increment at the same time (increments can be lost).
+
+#![no_main]
+
+#[cfg(not(any(non_genmc_std, genmc_std)))]
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+struct BuggyInc {
+    num: AtomicUsize,
+}
+
+impl BuggyInc {
+    const fn new() -> BuggyInc {
+        BuggyInc { num: AtomicUsize::new(0) }
+    }
+
+    fn inc(&self) {
+        // The bug is here:
+        // Another thread can increment `self.num` between the next two lines,
+        // which is then overridden by this thread.
+        let curr = self.num.load(Acquire);
+        self.num.store(curr + 1, Release);
+    }
+}
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        static BUGGY_INC: BuggyInc = BuggyInc::new();
+        // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+        BUGGY_INC.num.store(0, Relaxed);
+
+        let ids = [
+            spawn_pthread_closure(|| {
+                BUGGY_INC.inc();
+            }),
+            spawn_pthread_closure(|| {
+                BUGGY_INC.inc();
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // We check that we can detect the incorrect counter implementation:
+        if 2 != BUGGY_INC.num.load(Relaxed) {
+            std::process::abort(); //~ ERROR: abnormal termination
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/fail/loom/buggy_inc.stderr b/src/tools/miri/tests/genmc/fail/loom/buggy_inc.stderr
new file mode 100644
index 0000000000000..290cdf90a0847
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/loom/buggy_inc.stderr
@@ -0,0 +1,16 @@
+Running GenMC Verification...
+error: abnormal termination: the program aborted execution
+  --> tests/genmc/fail/loom/buggy_inc.rs:LL:CC
+   |
+LL |             std::process::abort();
+   |             ^^^^^^^^^^^^^^^^^^^^^ abnormal termination occurred here
+   |
+   = note: BACKTRACE:
+   = note: inside `miri_start` at tests/genmc/fail/loom/buggy_inc.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+note: add `-Zmiri-genmc-print-genmc-output` to MIRIFLAGS to see the detailed GenMC error report
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/genmc/fail/loom/store_buffering.genmc.stderr b/src/tools/miri/tests/genmc/fail/loom/store_buffering.genmc.stderr
new file mode 100644
index 0000000000000..7742790e33307
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/loom/store_buffering.genmc.stderr
@@ -0,0 +1,16 @@
+Running GenMC Verification...
+error: abnormal termination: the program aborted execution
+  --> tests/genmc/fail/loom/store_buffering.rs:LL:CC
+   |
+LL |             std::process::abort();
+   |             ^^^^^^^^^^^^^^^^^^^^^ abnormal termination occurred here
+   |
+   = note: BACKTRACE:
+   = note: inside `miri_start` at tests/genmc/fail/loom/store_buffering.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+note: add `-Zmiri-genmc-print-genmc-output` to MIRIFLAGS to see the detailed GenMC error report
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/genmc/fail/loom/store_buffering.non_genmc.stderr b/src/tools/miri/tests/genmc/fail/loom/store_buffering.non_genmc.stderr
new file mode 100644
index 0000000000000..a6c3ed7055e75
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/loom/store_buffering.non_genmc.stderr
@@ -0,0 +1,13 @@
+error: abnormal termination: the program aborted execution
+  --> tests/genmc/fail/loom/store_buffering.rs:LL:CC
+   |
+LL |             std::process::abort();
+   |             ^^^^^^^^^^^^^^^^^^^^^ abnormal termination occurred here
+   |
+   = note: BACKTRACE:
+   = note: inside `miri_start` at tests/genmc/fail/loom/store_buffering.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/genmc/fail/loom/store_buffering.rs b/src/tools/miri/tests/genmc/fail/loom/store_buffering.rs
new file mode 100644
index 0000000000000..4955dfd8a77ad
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/loom/store_buffering.rs
@@ -0,0 +1,57 @@
+//@ revisions: non_genmc genmc
+//@[genmc] compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: Copyright (c) 2019 Carl Lerche
+
+// This is the test `store_buffering` from `loom/test/litmus.rs`, adapted for Miri-GenMC.
+// https://github.com/tokio-rs/loom/blob/dbf32b04bae821c64be44405a0bb72ca08741558/tests/litmus.rs
+
+// This test shows the comparison between running Miri with or without GenMC.
+// Without GenMC, Miri requires multiple iterations of the loop to detect the error.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // For normal Miri, we need multiple repetitions, but GenMC should find the bug with only 1.
+    const REPS: usize = if cfg!(non_genmc) { 128 } else { 1 };
+    for _ in 0..REPS {
+        // New atomics every iterations, so they don't influence each other.
+        let x = AtomicUsize::new(0);
+        let y = AtomicUsize::new(0);
+
+        // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+        x.store(0, Relaxed);
+        y.store(0, Relaxed);
+
+        let mut a: usize = 1234;
+        let mut b: usize = 1234;
+        unsafe {
+            let ids = [
+                spawn_pthread_closure(|| {
+                    x.store(1, Relaxed);
+                    a = y.load(Relaxed)
+                }),
+                spawn_pthread_closure(|| {
+                    y.store(1, Relaxed);
+                    b = x.load(Relaxed)
+                }),
+            ];
+            join_pthreads(ids);
+        }
+        if (a, b) == (0, 0) {
+            std::process::abort(); //~ ERROR: abnormal termination
+        }
+    }
+
+    0
+}
diff --git a/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.relaxed4.stderr b/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.relaxed4.stderr
new file mode 100644
index 0000000000000..80ac7206644c6
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.relaxed4.stderr
@@ -0,0 +1,16 @@
+Running GenMC Verification...
+error: abnormal termination: the program aborted execution
+  --> tests/genmc/fail/simple/2w2w_weak.rs:LL:CC
+   |
+LL |             std::process::abort();
+   |             ^^^^^^^^^^^^^^^^^^^^^ abnormal termination occurred here
+   |
+   = note: BACKTRACE:
+   = note: inside `miri_start` at tests/genmc/fail/simple/2w2w_weak.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+note: add `-Zmiri-genmc-print-genmc-output` to MIRIFLAGS to see the detailed GenMC error report
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.release4.stderr b/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.release4.stderr
new file mode 100644
index 0000000000000..80ac7206644c6
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.release4.stderr
@@ -0,0 +1,16 @@
+Running GenMC Verification...
+error: abnormal termination: the program aborted execution
+  --> tests/genmc/fail/simple/2w2w_weak.rs:LL:CC
+   |
+LL |             std::process::abort();
+   |             ^^^^^^^^^^^^^^^^^^^^^ abnormal termination occurred here
+   |
+   = note: BACKTRACE:
+   = note: inside `miri_start` at tests/genmc/fail/simple/2w2w_weak.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+note: add `-Zmiri-genmc-print-genmc-output` to MIRIFLAGS to see the detailed GenMC error report
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.rs b/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.rs
new file mode 100644
index 0000000000000..baf3584966ec8
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.rs
@@ -0,0 +1,72 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+//@revisions: sc3_rel1 release4 relaxed4
+
+// The pass tests "2w2w_3sc_1rel.rs", "2w2w_4rel" and "2w2w_4sc" and the fail test "2w2w_weak.rs" are related.
+//
+// This test has multiple variants using different memory orderings.
+// When using any combination of orderings except using all 4 `SeqCst`, the memory model allows the program to result in (X, Y) == (1, 1).
+// The "pass" variants only check that we get the expected number of executions (3 for all SC, 4 otherwise),
+// and a valid outcome every execution, but do not check that we get all allowed results.
+// This "fail" variant ensures we can explore the execution resulting in (1, 1), with an incorrect assumption that the result (1, 1) is impossible.
+//
+// Miri without GenMC is unable to produce this program execution and thus detect the incorrect assumption, even with `-Zmiri-many-seeds`.
+//
+// To get good coverage, we test the combination with the strongest orderings allowing this result (3 `SeqCst`, 1 `Release`),
+// the weakest orderings (4 `Relaxed`), and one in between (4 `Release`).
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::{self, *};
+
+use crate::genmc::{join_pthreads, spawn_pthread_closure};
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+// Strongest orderings allowing result (1, 1).
+#[cfg(seqcst_rel)]
+const STORE_ORD_3: Ordering = SeqCst;
+#[cfg(seqcst_rel)]
+const STORE_ORD_1: Ordering = Release;
+
+// 4 * `Release`.
+#[cfg(acqrel)]
+const STORE_ORD_3: Ordering = Release;
+#[cfg(acqrel)]
+const STORE_ORD_1: Ordering = Release;
+
+// Weakest orderings (4 * `Relaxed`).
+#[cfg(not(any(acqrel, seqcst_rel)))]
+const STORE_ORD_3: Ordering = Relaxed;
+#[cfg(not(any(acqrel, seqcst_rel)))]
+const STORE_ORD_1: Ordering = Relaxed;
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, STORE_ORD_3);
+                Y.store(2, STORE_ORD_3);
+            }),
+            spawn_pthread_closure(|| {
+                Y.store(1, STORE_ORD_1);
+                X.store(2, STORE_ORD_3);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // We incorrectly assume that the result (1, 1) as unreachable.
+        let result = (X.load(Relaxed), Y.load(Relaxed));
+        if result == (1, 1) {
+            std::process::abort(); //~ ERROR: abnormal termination
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.sc3_rel1.stderr b/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.sc3_rel1.stderr
new file mode 100644
index 0000000000000..80ac7206644c6
--- /dev/null
+++ b/src/tools/miri/tests/genmc/fail/simple/2w2w_weak.sc3_rel1.stderr
@@ -0,0 +1,16 @@
+Running GenMC Verification...
+error: abnormal termination: the program aborted execution
+  --> tests/genmc/fail/simple/2w2w_weak.rs:LL:CC
+   |
+LL |             std::process::abort();
+   |             ^^^^^^^^^^^^^^^^^^^^^ abnormal termination occurred here
+   |
+   = note: BACKTRACE:
+   = note: inside `miri_start` at tests/genmc/fail/simple/2w2w_weak.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+note: add `-Zmiri-genmc-print-genmc-output` to MIRIFLAGS to see the detailed GenMC error report
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/genmc/pass/atomics/cas_failure_ord_racy_key_init.rs b/src/tools/miri/tests/genmc/pass/atomics/cas_failure_ord_racy_key_init.rs
new file mode 100644
index 0000000000000..8a77d54a64f8b
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/atomics/cas_failure_ord_racy_key_init.rs
@@ -0,0 +1,70 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows -Zmiri-ignore-leaks
+
+// Adapted from: `impl LazyKey`, `fn lazy_init`: rust/library/std/src/sys/thread_local/key/racy.rs
+// Two threads race to initialize a key, which is just an index into an array in this test.
+// The (Acquire, Release) orderings on the compare_exchange prevent any data races for reading from `VALUES[key]`.
+
+// FIXME(genmc): GenMC does not model the failure ordering of compare_exchange currently.
+// Miri thus upgrades the success ordering to prevent showing any false data races in cases like this one.
+// Once GenMC supports the failure ordering, this test should work without the upgrading.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+const KEY_SENTVAL: usize = usize::MAX;
+
+static KEY: AtomicUsize = AtomicUsize::new(KEY_SENTVAL);
+
+static mut VALUES: [usize; 2] = [0, 0];
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    KEY.store(KEY_SENTVAL, Relaxed);
+
+    unsafe {
+        let mut a = 0;
+        let mut b = 0;
+        let ids = [
+            spawn_pthread_closure(|| {
+                VALUES[0] = 42;
+                let key = get_or_init(0);
+                if key > 2 {
+                    std::process::abort();
+                }
+                a = VALUES[key];
+            }),
+            spawn_pthread_closure(|| {
+                VALUES[1] = 1234;
+                let key = get_or_init(1);
+                if key > 2 {
+                    std::process::abort();
+                }
+                b = VALUES[key];
+            }),
+        ];
+        join_pthreads(ids);
+        if a != b {
+            std::process::abort();
+        }
+    }
+    0
+}
+
+fn get_or_init(key: usize) -> usize {
+    match KEY.compare_exchange(KEY_SENTVAL, key, Release, Acquire) {
+        // The CAS succeeded, so we've created the actual key.
+        Ok(_) => key,
+        // If someone beat us to the punch, use their key instead.
+        // The `Acquire` failure ordering means we synchronized with the `Release` compare_exchange in the thread that wrote the other key.
+        // We can thus read from `VALUES[other_key]` without a data race.
+        Err(other_key) => other_key,
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/atomics/cas_failure_ord_racy_key_init.stderr b/src/tools/miri/tests/genmc/pass/atomics/cas_failure_ord_racy_key_init.stderr
new file mode 100644
index 0000000000000..31438f9352fe6
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/atomics/cas_failure_ord_racy_key_init.stderr
@@ -0,0 +1,22 @@
+Running GenMC Verification...
+warning: GenMC currently does not model the failure ordering for `compare_exchange`. Success ordering 'Release' was upgraded to 'AcqRel' to match failure ordering 'Acquire'. Miri with GenMC might miss bugs related to this memory access.
+  --> tests/genmc/pass/atomics/cas_failure_ord_racy_key_init.rs:LL:CC
+   |
+LL |     match KEY.compare_exchange(KEY_SENTVAL, key, Release, Acquire) {
+   |           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ GenMC might miss possible behaviors of this code
+   |
+   = note: BACKTRACE on thread `unnamed-ID`:
+   = note: inside `get_or_init` at tests/genmc/pass/atomics/cas_failure_ord_racy_key_init.rs:LL:CC
+note: inside closure
+  --> tests/genmc/pass/atomics/cas_failure_ord_racy_key_init.rs:LL:CC
+   |
+LL |                 let key = get_or_init(0);
+   |                           ^^^^^^^^^^^^^^
+   = note: inside `<std::boxed::Box<{closure@tests/genmc/pass/atomics/cas_failure_ord_racy_key_init.rs:LL:CC}> as std::ops::FnOnce<()>>::call_once` at RUSTLIB/alloc/src/boxed.rs:LL:CC
+note: inside `genmc::spawn_pthread_closure::thread_func::<{closure@tests/genmc/pass/atomics/cas_failure_ord_racy_key_init.rs:LL:CC}>`
+  --> tests/genmc/pass/atomics/../../../utils/genmc.rs:LL:CC
+   |
+LL |         f();
+   |         ^^^
+
+Verification complete with 2 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/atomics/cas_simple.rs b/src/tools/miri/tests/genmc/pass/atomics/cas_simple.rs
new file mode 100644
index 0000000000000..e32c7cdf80c43
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/atomics/cas_simple.rs
@@ -0,0 +1,34 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Test the basic functionality of compare_exchange.
+
+#![no_main]
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering::*;
+
+static VALUE: AtomicUsize = AtomicUsize::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    VALUE.store(1, SeqCst);
+
+    // Expect success:
+    if VALUE.compare_exchange(1, 2, SeqCst, SeqCst) != Ok(1) {
+        std::process::abort();
+    }
+    // New value should be written:
+    if 2 != VALUE.load(SeqCst) {
+        std::process::abort()
+    }
+
+    // Expect failure:
+    if VALUE.compare_exchange(1234, 42, SeqCst, SeqCst) != Err(2) {
+        std::process::abort();
+    }
+    // Value should be unchanged:
+    if 2 != VALUE.load(SeqCst) {
+        std::process::abort()
+    }
+    0
+}
diff --git a/src/tools/miri/tests/genmc/pass/atomics/cas_simple.stderr b/src/tools/miri/tests/genmc/pass/atomics/cas_simple.stderr
new file mode 100644
index 0000000000000..7867be2dbe8ed
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/atomics/cas_simple.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 1 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/atomics/rmw_ops.rs b/src/tools/miri/tests/genmc/pass/atomics/rmw_ops.rs
new file mode 100644
index 0000000000000..f48466e8ce10c
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/atomics/rmw_ops.rs
@@ -0,0 +1,91 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// This test check for correct handling of atomic read-modify-write operations for all integer sizes.
+// Atomic max and min should return the previous value, and store the result in the atomic.
+// Atomic addition and subtraction should have wrapping semantics.
+// `and`, `nand`, `or`, `xor` should behave like their non-atomic counterparts.
+
+// FIXME(genmc): add 128 bit atomics for platforms that support it, once GenMC gets 128 bit atomic support
+
+#![no_main]
+
+use std::sync::atomic::*;
+
+const ORD: Ordering = Ordering::SeqCst;
+
+fn assert_eq<T: Eq>(x: T, y: T) {
+    if x != y {
+        std::process::abort();
+    }
+}
+
+macro_rules! test_rmw_edge_cases {
+    ($int:ty, $atomic:ty) => {{
+        let x = <$atomic>::new(123);
+        // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+        x.store(123, ORD);
+
+        // MAX, ADD
+        assert_eq(123, x.fetch_max(0, ORD)); // `max` keeps existing value
+        assert_eq(123, x.fetch_max(<$int>::MAX, ORD)); // `max` stores the new value
+        assert_eq(<$int>::MAX, x.fetch_add(10, ORD)); // `fetch_add` should be wrapping
+        assert_eq(<$int>::MAX.wrapping_add(10), x.load(ORD));
+
+        // MIN, SUB
+        x.store(42, ORD);
+        assert_eq(42, x.fetch_min(<$int>::MAX, ORD)); // `min` keeps existing value
+        assert_eq(42, x.fetch_min(<$int>::MIN, ORD)); // `min` stores the new value
+        assert_eq(<$int>::MIN, x.fetch_sub(10, ORD)); // `fetch_sub` should be wrapping
+        assert_eq(<$int>::MIN.wrapping_sub(10), x.load(ORD));
+
+        // Small enough pattern to work for all integer sizes.
+        let pattern = 0b01010101;
+
+        // AND
+        x.store(!0, ORD);
+        assert_eq(!0, x.fetch_and(pattern, ORD));
+        assert_eq(!0 & pattern, x.load(ORD));
+
+        // NAND
+        x.store(!0, ORD);
+        assert_eq(!0, x.fetch_nand(pattern, ORD));
+        assert_eq(!(!0 & pattern), x.load(ORD));
+
+        // OR
+        x.store(!0, ORD);
+        assert_eq(!0, x.fetch_or(pattern, ORD));
+        assert_eq(!0 | pattern, x.load(ORD));
+
+        // XOR
+        x.store(!0, ORD);
+        assert_eq(!0, x.fetch_xor(pattern, ORD));
+        assert_eq(!0 ^ pattern, x.load(ORD));
+
+        // SWAP
+        x.store(!0, ORD);
+        assert_eq(!0, x.swap(pattern, ORD));
+        assert_eq(pattern, x.load(ORD));
+
+        // Check correct behavior of atomic min/max combined with overflowing add/sub.
+        x.store(10, ORD);
+        assert_eq(10, x.fetch_add(<$int>::MAX, ORD)); // definitely overflows, so new value of x is smaller than 10
+        assert_eq(<$int>::MAX.wrapping_add(10), x.fetch_max(10, ORD)); // new value of x should be 10
+        // assert_eq(10, x.load(ORD)); // FIXME(genmc,#4572): enable this check once GenMC correctly handles min/max truncation.
+    }};
+}
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    test_rmw_edge_cases!(u8, AtomicU8);
+    test_rmw_edge_cases!(u16, AtomicU16);
+    test_rmw_edge_cases!(u32, AtomicU32);
+    test_rmw_edge_cases!(u64, AtomicU64);
+    test_rmw_edge_cases!(usize, AtomicUsize);
+    test_rmw_edge_cases!(i8, AtomicI8);
+    test_rmw_edge_cases!(i16, AtomicI16);
+    test_rmw_edge_cases!(i32, AtomicI32);
+    test_rmw_edge_cases!(i64, AtomicI64);
+    test_rmw_edge_cases!(isize, AtomicIsize);
+
+    0
+}
diff --git a/src/tools/miri/tests/genmc/pass/atomics/rmw_ops.stderr b/src/tools/miri/tests/genmc/pass/atomics/rmw_ops.stderr
new file mode 100644
index 0000000000000..7867be2dbe8ed
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/atomics/rmw_ops.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 1 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2cowr.rs b/src/tools/miri/tests/genmc/pass/litmus/2cowr.rs
new file mode 100644
index 0000000000000..d3fdb470ed367
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2cowr.rs
@@ -0,0 +1,51 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's test "2CoWR".
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                a = Y.load(Acquire);
+            }),
+            spawn_pthread_closure(|| {
+                b = X.load(Acquire);
+            }),
+            spawn_pthread_closure(|| {
+                X.store(1, Release);
+            }),
+            spawn_pthread_closure(|| {
+                Y.store(1, Release);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        if !matches!((a, b), (0, 0) | (0, 1) | (1, 0) | (1, 1)) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2cowr.stderr b/src/tools/miri/tests/genmc/pass/litmus/2cowr.stderr
new file mode 100644
index 0000000000000..a67635dee1bef
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2cowr.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 4 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2w2w_2sc_scf.rs b/src/tools/miri/tests/genmc/pass/litmus/2w2w_2sc_scf.rs
new file mode 100644
index 0000000000000..3b3fca02285d6
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2w2w_2sc_scf.rs
@@ -0,0 +1,33 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's test "2+2W+2sc+scf".
+// It tests correct handling of SeqCst fences combined with relaxed accesses.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        spawn_pthread_closure(|| {
+            X.store(1, SeqCst);
+            Y.store(2, SeqCst);
+        });
+        spawn_pthread_closure(|| {
+            Y.store(1, Relaxed);
+            std::sync::atomic::fence(SeqCst);
+            X.store(2, Relaxed);
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2w2w_2sc_scf.stderr b/src/tools/miri/tests/genmc/pass/litmus/2w2w_2sc_scf.stderr
new file mode 100644
index 0000000000000..485142e945a72
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2w2w_2sc_scf.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 3 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2w2w_3sc_1rel.release1.stderr b/src/tools/miri/tests/genmc/pass/litmus/2w2w_3sc_1rel.release1.stderr
new file mode 100644
index 0000000000000..a67635dee1bef
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2w2w_3sc_1rel.release1.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 4 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2w2w_3sc_1rel.release2.stderr b/src/tools/miri/tests/genmc/pass/litmus/2w2w_3sc_1rel.release2.stderr
new file mode 100644
index 0000000000000..a67635dee1bef
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2w2w_3sc_1rel.release2.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 4 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2w2w_3sc_1rel.rs b/src/tools/miri/tests/genmc/pass/litmus/2w2w_3sc_1rel.rs
new file mode 100644
index 0000000000000..22fe9524c37f5
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2w2w_3sc_1rel.rs
@@ -0,0 +1,54 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+//@revisions: release1 release2
+
+// Translated from GenMC's test "2+2W+3sc+rel1" and "2+2W+3sc+rel2" (two variants that swap which store is `Release`).
+//
+// The pass tests "2w2w_3sc_1rel.rs", "2w2w_4rel" and "2w2w_4sc" and the fail test "2w2w_weak.rs" are related.
+// Check "2w2w_weak.rs" for a more detailed description.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, SeqCst);
+                Y.store(2, SeqCst);
+            }),
+            // Variant 1: `Release` goes first.
+            #[cfg(release1)]
+            spawn_pthread_closure(|| {
+                Y.store(1, Release);
+                X.store(2, SeqCst);
+            }),
+            // Variant 2: `Release` goes second.
+            #[cfg(not(release1))]
+            spawn_pthread_closure(|| {
+                Y.store(1, SeqCst);
+                X.store(2, Release);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        let result = (X.load(Relaxed), Y.load(Relaxed));
+        if !matches!(result, (1, 2) | (1, 1) | (2, 2) | (2, 1)) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2w2w_4rel.rs b/src/tools/miri/tests/genmc/pass/litmus/2w2w_4rel.rs
new file mode 100644
index 0000000000000..f47f5a11c5c96
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2w2w_4rel.rs
@@ -0,0 +1,49 @@
+//@revisions: weak sc
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+//@[sc]compile-flags: -Zmiri-disable-weak-memory-emulation
+
+// Translated from GenMC's test "2+2W".
+//
+// The pass tests "2w2w_3sc_1rel.rs", "2w2w_4rel" and "2w2w_4sc" and the fail test "2w2w_weak.rs" are related.
+// Check "2w2w_weak.rs" for a more detailed description.
+//
+// The `sc` variant of this test checks that without weak memory emulation, only 3 instead of 4 executions are explored (like the `2w2w_4sc.rs` test).
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        let ids = [
+            spawn_pthread_closure(|| {
+                Y.store(1, Release);
+                X.store(2, Release);
+            }),
+            spawn_pthread_closure(|| {
+                X.store(1, Release);
+                Y.store(2, Release);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        let result = (X.load(Relaxed), Y.load(Relaxed));
+        if !matches!(result, (1, 2) | (1, 1) | (2, 2) | (2, 1)) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2w2w_4rel.sc.stderr b/src/tools/miri/tests/genmc/pass/litmus/2w2w_4rel.sc.stderr
new file mode 100644
index 0000000000000..485142e945a72
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2w2w_4rel.sc.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 3 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2w2w_4rel.weak.stderr b/src/tools/miri/tests/genmc/pass/litmus/2w2w_4rel.weak.stderr
new file mode 100644
index 0000000000000..a67635dee1bef
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2w2w_4rel.weak.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 4 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2w2w_4sc.rs b/src/tools/miri/tests/genmc/pass/litmus/2w2w_4sc.rs
new file mode 100644
index 0000000000000..c5711ba04fce2
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2w2w_4sc.rs
@@ -0,0 +1,45 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's test "2+2W+4c".
+//
+// The pass tests "2w2w_3sc_1rel.rs", "2w2w_4rel" and "2w2w_4sc" and the fail test "2w2w_weak.rs" are related.
+// Check "2w2w_weak.rs" for a more detailed description.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, SeqCst);
+                Y.store(2, SeqCst);
+            }),
+            spawn_pthread_closure(|| {
+                Y.store(1, SeqCst);
+                X.store(2, SeqCst);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        let result = (X.load(Relaxed), Y.load(Relaxed));
+        if !matches!(result, (2, 1) | (2, 2) | (1, 2)) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/2w2w_4sc.stderr b/src/tools/miri/tests/genmc/pass/litmus/2w2w_4sc.stderr
new file mode 100644
index 0000000000000..485142e945a72
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/2w2w_4sc.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 3 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/IRIW-acq-sc.rs b/src/tools/miri/tests/genmc/pass/litmus/IRIW-acq-sc.rs
new file mode 100644
index 0000000000000..1ec62ff21342d
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/IRIW-acq-sc.rs
@@ -0,0 +1,64 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/IRIW-acq-sc" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let mut c = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, SeqCst);
+            }),
+            spawn_pthread_closure(|| {
+                a = X.load(Acquire);
+                Y.load(SeqCst);
+            }),
+            spawn_pthread_closure(|| {
+                b = Y.load(Acquire);
+                c = X.load(SeqCst);
+            }),
+            spawn_pthread_closure(|| {
+                Y.store(1, SeqCst);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        if !matches!(
+            (a, b, c),
+            (0, 0, 0)
+                | (0, 0, 1)
+                | (0, 1, 0)
+                | (0, 1, 1)
+                | (1, 0, 0)
+                | (1, 0, 1)
+                | (1, 1, 0)
+                | (1, 1, 1)
+        ) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/IRIW-acq-sc.stderr b/src/tools/miri/tests/genmc/pass/litmus/IRIW-acq-sc.stderr
new file mode 100644
index 0000000000000..d6ec73a8c6672
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/IRIW-acq-sc.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 16 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/IRIWish.rs b/src/tools/miri/tests/genmc/pass/litmus/IRIWish.rs
new file mode 100644
index 0000000000000..c81573d59d1da
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/IRIWish.rs
@@ -0,0 +1,64 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/IRIWish" test.
+// This test prints the values read by the different threads to check that we get all the values we expect.
+
+// NOTE: the order of the lines in the output may change with changes to GenMC.
+// Before blessing the new output, ensure that only the order of lines in the output changed, and none of the outputs are missing.
+
+// NOTE: GenMC supports instruction caching and does not need replay completed threads.
+// This means that an identical test in GenMC may output fewer lines (disable instruction caching to see all results).
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+#[path = "../../../utils/mod.rs"]
+mod utils;
+
+use std::fmt::Write;
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+use crate::utils::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+
+    unsafe {
+        let mut results = [1234; 5];
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                let r1 = X.load(Relaxed);
+                Y.store(r1, Release);
+                results[0] = r1;
+            }),
+            spawn_pthread_closure(|| {
+                results[1] = X.load(Relaxed);
+                std::sync::atomic::fence(AcqRel);
+                results[2] = Y.load(Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                results[3] = Y.load(Relaxed);
+                std::sync::atomic::fence(AcqRel);
+                results[4] = X.load(Relaxed);
+            }),
+        ];
+        join_pthreads(ids);
+
+        // Print the values to check that we get all of them:
+        writeln!(MiriStderr, "{results:?}").unwrap_or_else(|_| std::process::abort());
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/IRIWish.stderr b/src/tools/miri/tests/genmc/pass/litmus/IRIWish.stderr
new file mode 100644
index 0000000000000..7ea2dd5085136
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/IRIWish.stderr
@@ -0,0 +1,30 @@
+Running GenMC Verification...
+[1, 1, 1, 1, 1]
+[1, 1, 1, 0, 1]
+[1, 1, 1, 0, 0]
+[1, 1, 0, 1, 1]
+[1, 1, 0, 0, 1]
+[1, 1, 0, 0, 0]
+[1, 0, 1, 1, 1]
+[1, 0, 1, 0, 1]
+[1, 0, 1, 0, 0]
+[1, 0, 0, 1, 1]
+[1, 0, 0, 0, 1]
+[1, 0, 0, 0, 0]
+[0, 1, 0, 0, 1]
+[0, 1, 0, 0, 0]
+[0, 1, 0, 0, 1]
+[0, 1, 0, 0, 0]
+[0, 1, 0, 0, 1]
+[0, 1, 0, 0, 0]
+[0, 1, 0, 0, 1]
+[0, 1, 0, 0, 0]
+[0, 0, 0, 0, 1]
+[0, 0, 0, 0, 0]
+[0, 0, 0, 0, 1]
+[0, 0, 0, 0, 0]
+[0, 0, 0, 0, 1]
+[0, 0, 0, 0, 0]
+[0, 0, 0, 0, 1]
+[0, 0, 0, 0, 0]
+Verification complete with 28 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/LB.rs b/src/tools/miri/tests/genmc/pass/litmus/LB.rs
new file mode 100644
index 0000000000000..1cee3230b127c
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/LB.rs
@@ -0,0 +1,47 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/LB" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                a = Y.load(Acquire);
+                X.store(2, Release);
+            }),
+            spawn_pthread_closure(|| {
+                b = X.load(Acquire);
+                Y.store(1, Release);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        if !matches!((a, b), (0, 0) | (0, 2) | (1, 0)) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/LB.stderr b/src/tools/miri/tests/genmc/pass/litmus/LB.stderr
new file mode 100644
index 0000000000000..485142e945a72
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/LB.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 3 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/LB_incMPs.rs b/src/tools/miri/tests/genmc/pass/litmus/LB_incMPs.rs
new file mode 100644
index 0000000000000..e43d92fc6c55a
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/LB_incMPs.rs
@@ -0,0 +1,41 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/LB+incMPs" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+static W: AtomicU64 = AtomicU64::new(0);
+static Z: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        spawn_pthread_closure(|| {
+            X.load(Acquire);
+            Z.fetch_add(1, AcqRel);
+        });
+        spawn_pthread_closure(|| {
+            Z.fetch_add(1, AcqRel);
+            Y.store(1, Release);
+        });
+        spawn_pthread_closure(|| {
+            Y.load(Acquire);
+            W.fetch_add(1, AcqRel);
+        });
+        spawn_pthread_closure(|| {
+            W.fetch_add(1, AcqRel);
+            X.store(1, Release);
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/LB_incMPs.stderr b/src/tools/miri/tests/genmc/pass/litmus/LB_incMPs.stderr
new file mode 100644
index 0000000000000..e414cd5e064d9
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/LB_incMPs.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 15 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/MP.rs b/src/tools/miri/tests/genmc/pass/litmus/MP.rs
new file mode 100644
index 0000000000000..e245cdd15eef2
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/MP.rs
@@ -0,0 +1,47 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/MP" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Release);
+                Y.store(1, Release);
+            }),
+            spawn_pthread_closure(|| {
+                a = Y.load(Acquire);
+                b = X.load(Acquire);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        if !matches!((a, b), (0, 0) | (0, 1) | (1, 1)) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/MP.stderr b/src/tools/miri/tests/genmc/pass/litmus/MP.stderr
new file mode 100644
index 0000000000000..485142e945a72
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/MP.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 3 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/MPU2_rels_acqf.rs b/src/tools/miri/tests/genmc/pass/litmus/MPU2_rels_acqf.rs
new file mode 100644
index 0000000000000..9bb156a99970f
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/MPU2_rels_acqf.rs
@@ -0,0 +1,67 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/MPU2+rels+acqf" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+#[path = "../../../utils/mod.rs"]
+mod utils;
+
+use std::fmt::Write;
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+use crate::utils::*;
+
+// Note: the GenMC equivalent of this test (genmc/tests/correct/litmus/MPU2+rels+acqf/mpu2+rels+acqf.c) uses non-atomic accesses for `X` with disabled race detection.
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+
+    unsafe {
+        let mut a = Ok(1234);
+        let mut b = Ok(1234);
+        let mut c = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Relaxed);
+
+                Y.store(0, Release);
+                Y.store(1, Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                a = Y.compare_exchange(2, 3, Relaxed, Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                b = Y.compare_exchange(1, 2, Relaxed, Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                c = Y.load(Acquire);
+                if c > 2 {
+                    std::sync::atomic::fence(Acquire);
+                    X.store(2, Relaxed);
+                }
+            }),
+        ];
+        join_pthreads(ids);
+
+        // Print the values to check that we get all of them:
+        writeln!(
+            MiriStderr,
+            "X={}, Y={}, a={a:?}, b={b:?}, c={c}",
+            X.load(Relaxed),
+            Y.load(Relaxed)
+        )
+        .unwrap_or_else(|_| std::process::abort());
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/MPU2_rels_acqf.stderr b/src/tools/miri/tests/genmc/pass/litmus/MPU2_rels_acqf.stderr
new file mode 100644
index 0000000000000..29b59ce3bc1a3
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/MPU2_rels_acqf.stderr
@@ -0,0 +1,38 @@
+Running GenMC Verification...
+X=1, Y=2, a=Err(1), b=Ok(1), c=2
+X=1, Y=2, a=Err(1), b=Ok(1), c=1
+X=1, Y=2, a=Err(1), b=Ok(1), c=0
+X=1, Y=2, a=Err(1), b=Ok(1), c=0
+X=2, Y=3, a=Ok(2), b=Ok(1), c=3
+X=1, Y=3, a=Ok(2), b=Ok(1), c=3
+X=1, Y=3, a=Ok(2), b=Ok(1), c=2
+X=1, Y=3, a=Ok(2), b=Ok(1), c=1
+X=1, Y=3, a=Ok(2), b=Ok(1), c=0
+X=1, Y=3, a=Ok(2), b=Ok(1), c=0
+X=1, Y=1, a=Err(1), b=Err(0), c=1
+X=1, Y=1, a=Err(1), b=Err(0), c=0
+X=1, Y=1, a=Err(1), b=Err(0), c=0
+X=1, Y=1, a=Err(1), b=Err(0), c=1
+X=1, Y=1, a=Err(1), b=Err(0), c=0
+X=1, Y=1, a=Err(1), b=Err(0), c=0
+X=1, Y=2, a=Err(0), b=Ok(1), c=2
+X=1, Y=2, a=Err(0), b=Ok(1), c=1
+X=1, Y=2, a=Err(0), b=Ok(1), c=0
+X=1, Y=2, a=Err(0), b=Ok(1), c=0
+X=1, Y=1, a=Err(0), b=Err(0), c=1
+X=1, Y=1, a=Err(0), b=Err(0), c=0
+X=1, Y=1, a=Err(0), b=Err(0), c=0
+X=1, Y=1, a=Err(0), b=Err(0), c=1
+X=1, Y=1, a=Err(0), b=Err(0), c=0
+X=1, Y=1, a=Err(0), b=Err(0), c=0
+X=1, Y=2, a=Err(0), b=Ok(1), c=2
+X=1, Y=2, a=Err(0), b=Ok(1), c=1
+X=1, Y=2, a=Err(0), b=Ok(1), c=0
+X=1, Y=2, a=Err(0), b=Ok(1), c=0
+X=1, Y=1, a=Err(0), b=Err(0), c=1
+X=1, Y=1, a=Err(0), b=Err(0), c=0
+X=1, Y=1, a=Err(0), b=Err(0), c=0
+X=1, Y=1, a=Err(0), b=Err(0), c=1
+X=1, Y=1, a=Err(0), b=Err(0), c=0
+X=1, Y=1, a=Err(0), b=Err(0), c=0
+Verification complete with 36 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/MPU_rels_acq.rs b/src/tools/miri/tests/genmc/pass/litmus/MPU_rels_acq.rs
new file mode 100644
index 0000000000000..b36c8a288f426
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/MPU_rels_acq.rs
@@ -0,0 +1,42 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/MPU+rels+acq" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+// Note: the GenMC equivalent of this test (genmc/tests/correct/litmus/MPU+rels+acq/mpu+rels+acq.c) uses non-atomic accesses for `X` with disabled race detection.
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+use crate::genmc::*;
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+
+    unsafe {
+        spawn_pthread_closure(|| {
+            X.store(1, Relaxed);
+
+            Y.store(0, Release);
+            Y.store(1, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            Y.fetch_add(1, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            if Y.load(Acquire) > 1 {
+                X.store(2, Relaxed);
+            }
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/MPU_rels_acq.stderr b/src/tools/miri/tests/genmc/pass/litmus/MPU_rels_acq.stderr
new file mode 100644
index 0000000000000..423ee6dc39996
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/MPU_rels_acq.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 13 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/MP_incMPs.rs b/src/tools/miri/tests/genmc/pass/litmus/MP_incMPs.rs
new file mode 100644
index 0000000000000..a08b7de27d13c
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/MP_incMPs.rs
@@ -0,0 +1,36 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/MP+incMP" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+static Z: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        spawn_pthread_closure(|| {
+            Y.load(Acquire);
+            Z.fetch_add(1, AcqRel);
+        });
+        spawn_pthread_closure(|| {
+            Z.fetch_add(1, AcqRel);
+            X.load(Acquire);
+        });
+        spawn_pthread_closure(|| {
+            X.store(1, Release);
+            Y.store(1, Release);
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/MP_incMPs.stderr b/src/tools/miri/tests/genmc/pass/litmus/MP_incMPs.stderr
new file mode 100644
index 0000000000000..f527b61202327
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/MP_incMPs.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 7 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/MP_rels_acqf.rs b/src/tools/miri/tests/genmc/pass/litmus/MP_rels_acqf.rs
new file mode 100644
index 0000000000000..cf9f5f2dbfa34
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/MP_rels_acqf.rs
@@ -0,0 +1,40 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/MP+rels+acqf" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+// Note: the GenMC equivalent of this test (genmc/tests/correct/litmus/MP+rels+acqf/mp+rels+acqf.c) uses non-atomic accesses for `X` with disabled race detection.
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+use crate::genmc::*;
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+
+    unsafe {
+        spawn_pthread_closure(|| {
+            X.store(1, Relaxed);
+
+            Y.store(0, Release);
+            Y.store(1, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            if Y.load(Relaxed) != 0 {
+                std::sync::atomic::fence(Acquire);
+                let _x = X.load(Relaxed);
+            }
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/MP_rels_acqf.stderr b/src/tools/miri/tests/genmc/pass/litmus/MP_rels_acqf.stderr
new file mode 100644
index 0000000000000..a67635dee1bef
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/MP_rels_acqf.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 4 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/SB.rs b/src/tools/miri/tests/genmc/pass/litmus/SB.rs
new file mode 100644
index 0000000000000..e592fe05c4e49
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/SB.rs
@@ -0,0 +1,47 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/SB" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Release);
+                a = Y.load(Acquire);
+            }),
+            spawn_pthread_closure(|| {
+                Y.store(1, Release);
+                b = X.load(Acquire);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        if !matches!((a, b), (0, 0) | (0, 1) | (1, 0) | (1, 1)) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/SB.stderr b/src/tools/miri/tests/genmc/pass/litmus/SB.stderr
new file mode 100644
index 0000000000000..a67635dee1bef
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/SB.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 4 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/SB_2sc_scf.rs b/src/tools/miri/tests/genmc/pass/litmus/SB_2sc_scf.rs
new file mode 100644
index 0000000000000..ffc44de1bc7cf
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/SB_2sc_scf.rs
@@ -0,0 +1,32 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/SB+2sc+scf" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        spawn_pthread_closure(|| {
+            X.store(1, SeqCst);
+            Y.load(SeqCst);
+        });
+        spawn_pthread_closure(|| {
+            Y.store(1, Relaxed);
+            std::sync::atomic::fence(SeqCst);
+            X.load(Relaxed);
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/SB_2sc_scf.stderr b/src/tools/miri/tests/genmc/pass/litmus/SB_2sc_scf.stderr
new file mode 100644
index 0000000000000..485142e945a72
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/SB_2sc_scf.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 3 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/Z6_U.rs b/src/tools/miri/tests/genmc/pass/litmus/Z6_U.rs
new file mode 100644
index 0000000000000..f96679b23a5cd
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/Z6_U.rs
@@ -0,0 +1,62 @@
+//@revisions: weak sc
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+//@[sc]compile-flags: -Zmiri-disable-weak-memory-emulation
+
+// Translated from GenMC's "litmus/Z6.U" test.
+//
+// The `sc` variant of this test checks that we get fewer executions when weak memory emulation is disabled.
+
+// NOTE: the order of the lines in the output may change with changes to GenMC.
+// Before blessing the new output, ensure that only the order of lines in the output changed, and none of the outputs are missing.
+
+// NOTE: GenMC supports instruction caching and does not need replay completed threads.
+// This means that an identical test in GenMC may output fewer lines (disable instruction caching to see all results).
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+#[path = "../../../utils/mod.rs"]
+mod utils;
+
+use std::fmt::Write;
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+use crate::utils::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, SeqCst);
+                Y.store(1, Release);
+            }),
+            spawn_pthread_closure(|| {
+                Y.fetch_add(1, SeqCst);
+                a = Y.load(Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                Y.store(3, SeqCst);
+                b = X.load(SeqCst);
+            }),
+        ];
+        join_pthreads(ids);
+
+        // Print the values to check that we get all of them:
+        writeln!(MiriStderr, "a={a}, b={b}, X={}, Y={}", X.load(Relaxed), Y.load(Relaxed))
+            .unwrap_or_else(|_| std::process::abort());
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/Z6_U.sc.stderr b/src/tools/miri/tests/genmc/pass/litmus/Z6_U.sc.stderr
new file mode 100644
index 0000000000000..c8fbb8951a386
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/Z6_U.sc.stderr
@@ -0,0 +1,20 @@
+Running GenMC Verification...
+a=2, b=1, X=1, Y=3
+a=4, b=1, X=1, Y=4
+a=3, b=1, X=1, Y=3
+a=2, b=1, X=1, Y=2
+a=2, b=0, X=1, Y=2
+a=1, b=1, X=1, Y=1
+a=1, b=0, X=1, Y=1
+a=4, b=1, X=1, Y=1
+a=4, b=0, X=1, Y=1
+a=1, b=1, X=1, Y=3
+a=3, b=1, X=1, Y=3
+a=1, b=1, X=1, Y=1
+a=1, b=0, X=1, Y=1
+a=3, b=1, X=1, Y=1
+a=3, b=0, X=1, Y=1
+a=1, b=1, X=1, Y=3
+a=1, b=1, X=1, Y=1
+a=1, b=0, X=1, Y=1
+Verification complete with 18 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/Z6_U.weak.stderr b/src/tools/miri/tests/genmc/pass/litmus/Z6_U.weak.stderr
new file mode 100644
index 0000000000000..72c59d33f77cf
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/Z6_U.weak.stderr
@@ -0,0 +1,24 @@
+Running GenMC Verification...
+a=2, b=1, X=1, Y=3
+a=4, b=1, X=1, Y=4
+a=4, b=0, X=1, Y=4
+a=3, b=1, X=1, Y=3
+a=2, b=1, X=1, Y=2
+a=2, b=0, X=1, Y=2
+a=1, b=1, X=1, Y=1
+a=1, b=0, X=1, Y=1
+a=4, b=1, X=1, Y=1
+a=4, b=0, X=1, Y=1
+a=1, b=1, X=1, Y=3
+a=1, b=0, X=1, Y=3
+a=3, b=1, X=1, Y=3
+a=3, b=0, X=1, Y=3
+a=1, b=1, X=1, Y=1
+a=1, b=0, X=1, Y=1
+a=3, b=1, X=1, Y=1
+a=3, b=0, X=1, Y=1
+a=1, b=1, X=1, Y=3
+a=1, b=0, X=1, Y=3
+a=1, b=1, X=1, Y=1
+a=1, b=0, X=1, Y=1
+Verification complete with 22 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/Z6_acq.rs b/src/tools/miri/tests/genmc/pass/litmus/Z6_acq.rs
new file mode 100644
index 0000000000000..b00f3a59ce672
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/Z6_acq.rs
@@ -0,0 +1,38 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/Z6+acq" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+static Z: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        spawn_pthread_closure(|| {
+            X.store(1, Relaxed);
+            std::sync::atomic::fence(SeqCst);
+            Y.store(1, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            Y.load(Acquire);
+            Z.store(1, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            Z.store(2, Relaxed);
+            std::sync::atomic::fence(SeqCst);
+            X.load(Relaxed);
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/Z6_acq.stderr b/src/tools/miri/tests/genmc/pass/litmus/Z6_acq.stderr
new file mode 100644
index 0000000000000..f527b61202327
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/Z6_acq.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 7 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/atomicpo.rs b/src/tools/miri/tests/genmc/pass/litmus/atomicpo.rs
new file mode 100644
index 0000000000000..75be89893dab8
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/atomicpo.rs
@@ -0,0 +1,32 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's test "litmus/atomicpo".
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        spawn_pthread_closure(|| {
+            X.store(1, Relaxed);
+            std::sync::atomic::fence(AcqRel);
+            Y.store(1, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            Y.swap(1, Relaxed);
+            X.swap(1, Relaxed);
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/atomicpo.stderr b/src/tools/miri/tests/genmc/pass/litmus/atomicpo.stderr
new file mode 100644
index 0000000000000..a67635dee1bef
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/atomicpo.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 4 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/casdep.rs b/src/tools/miri/tests/genmc/pass/litmus/casdep.rs
new file mode 100644
index 0000000000000..c376d96b111cc
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/casdep.rs
@@ -0,0 +1,37 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's test "litmus/casdep".
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+static Z: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+    Z.store(0, Relaxed);
+
+    unsafe {
+        spawn_pthread_closure(|| {
+            let a = X.load(Relaxed);
+            let _b = Y.compare_exchange(a, 1, Relaxed, Relaxed);
+            Z.store(a, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            Y.store(2, Relaxed);
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/casdep.stderr b/src/tools/miri/tests/genmc/pass/litmus/casdep.stderr
new file mode 100644
index 0000000000000..bde951866d013
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/casdep.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 2 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/ccr.rs b/src/tools/miri/tests/genmc/pass/litmus/ccr.rs
new file mode 100644
index 0000000000000..865895b4dcd96
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/ccr.rs
@@ -0,0 +1,34 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's test "litmus/ccr".
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+
+    unsafe {
+        spawn_pthread_closure(|| {
+            let expected = 0;
+            let _ = X.compare_exchange(expected, 42, Relaxed, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            let expected = 0;
+            let _ = X.compare_exchange(expected, 17, Relaxed, Relaxed);
+            X.load(Relaxed);
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/ccr.stderr b/src/tools/miri/tests/genmc/pass/litmus/ccr.stderr
new file mode 100644
index 0000000000000..bde951866d013
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/ccr.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 2 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/cii.rs b/src/tools/miri/tests/genmc/pass/litmus/cii.rs
new file mode 100644
index 0000000000000..663c806e667c4
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/cii.rs
@@ -0,0 +1,35 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's test "litmus/cii".
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+
+    unsafe {
+        spawn_pthread_closure(|| {
+            let expected = 1;
+            let _ = X.compare_exchange(expected, 2, Relaxed, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            X.fetch_add(1, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            X.fetch_add(1, Relaxed);
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/cii.stderr b/src/tools/miri/tests/genmc/pass/litmus/cii.stderr
new file mode 100644
index 0000000000000..b9bdf2245aed6
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/cii.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 6 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/corr.rs b/src/tools/miri/tests/genmc/pass/litmus/corr.rs
new file mode 100644
index 0000000000000..d6c95100fc241
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/corr.rs
@@ -0,0 +1,45 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "CoRR" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Release);
+                X.store(2, Release);
+            }),
+            spawn_pthread_closure(|| {
+                a = X.load(Acquire);
+                b = X.load(Acquire);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        if !matches!((a, b), (0, 0) | (0, 1) | (0, 2) | (1, 1) | (1, 2) | (2, 2)) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/corr.stderr b/src/tools/miri/tests/genmc/pass/litmus/corr.stderr
new file mode 100644
index 0000000000000..b9bdf2245aed6
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/corr.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 6 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/corr0.rs b/src/tools/miri/tests/genmc/pass/litmus/corr0.rs
new file mode 100644
index 0000000000000..f722131fda846
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/corr0.rs
@@ -0,0 +1,46 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "CoRR0" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Release);
+            }),
+            spawn_pthread_closure(|| {
+                a = X.load(Acquire);
+            }),
+            spawn_pthread_closure(|| {
+                b = X.load(Acquire);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        if !matches!((a, b), (0, 0) | (0, 1) | (1, 0) | (1, 1)) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/corr0.stderr b/src/tools/miri/tests/genmc/pass/litmus/corr0.stderr
new file mode 100644
index 0000000000000..a67635dee1bef
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/corr0.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 4 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/corr1.rs b/src/tools/miri/tests/genmc/pass/litmus/corr1.rs
new file mode 100644
index 0000000000000..a4e8249bac309
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/corr1.rs
@@ -0,0 +1,58 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "CoRR1" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let mut c = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Release);
+            }),
+            spawn_pthread_closure(|| {
+                X.store(2, Release);
+            }),
+            spawn_pthread_closure(|| {
+                a = X.load(Acquire);
+                b = X.load(Acquire);
+            }),
+            spawn_pthread_closure(|| {
+                c = X.load(Acquire);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values (only 0, 1, 2 are allowed):
+        if !(matches!(a, 0..=2) && matches!(b, 0..=2) && matches!(c, 0..=2)) {
+            std::process::abort();
+        }
+        // The 36 possible program executions can have 21 different results for (a, b, c).
+        // Of the 27 = 3*3*3 total results for (a, b, c),
+        // those where `a != 0` and `b == 0` are not allowed by the memory model.
+        // Once the load for `a` reads either 1 or 2, the load for `b` must see that store too, so it cannot read 0.
+        if a != 0 && b == 0 {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/corr1.stderr b/src/tools/miri/tests/genmc/pass/litmus/corr1.stderr
new file mode 100644
index 0000000000000..384425fc43c67
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/corr1.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 36 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/corr2.rs b/src/tools/miri/tests/genmc/pass/litmus/corr2.rs
new file mode 100644
index 0000000000000..2f490d3637797
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/corr2.rs
@@ -0,0 +1,70 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "CoRR2" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let mut c = 1234;
+        let mut d = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Release);
+            }),
+            spawn_pthread_closure(|| {
+                X.store(2, Release);
+            }),
+            spawn_pthread_closure(|| {
+                a = X.load(Acquire);
+                b = X.load(Acquire);
+            }),
+            spawn_pthread_closure(|| {
+                c = X.load(Acquire);
+                d = X.load(Acquire);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values (only 0, 1, 2 are allowed):
+        if !(matches!(a, 0..=2) && matches!(b, 0..=2) && matches!(c, 0..=2) && matches!(d, 0..=2)) {
+            std::process::abort();
+        }
+
+        // The 72 possible program executions can have 47 different results for (a, b, c, d).
+        // Of the 81 = 3*3*3*3 total results for (a, b, c, d),
+        // those where `a != 0` and `b == 0` are not allowed by the memory model.
+        // Once the load for `a` reads either 1 or 2, the load for `b` must see that store too, so it cannot read 0.
+        // The same applies to `c, d` in the other thread.
+        //
+        // Additionally, if one thread reads `1, 2` or `2, 1`, the other thread cannot see the opposite order.
+        if a != 0 && b == 0 {
+            std::process::abort();
+        } else if c != 0 && d == 0 {
+            std::process::abort();
+        } else if (a, b) == (1, 2) && (c, d) == (2, 1) {
+            std::process::abort();
+        } else if (a, b) == (2, 1) && (c, d) == (1, 2) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/corr2.stderr b/src/tools/miri/tests/genmc/pass/litmus/corr2.stderr
new file mode 100644
index 0000000000000..07fd2d4de180c
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/corr2.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 72 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/corw.rs b/src/tools/miri/tests/genmc/pass/litmus/corw.rs
new file mode 100644
index 0000000000000..7acc20822a4f4
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/corw.rs
@@ -0,0 +1,43 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "CoRW" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                a = X.load(Acquire);
+                X.store(1, Release);
+            }),
+            spawn_pthread_closure(|| {
+                X.store(2, Release);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values (the load cannot read `1`):
+        if !matches!(a, 0 | 2) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/corw.stderr b/src/tools/miri/tests/genmc/pass/litmus/corw.stderr
new file mode 100644
index 0000000000000..485142e945a72
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/corw.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 3 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/cowr.rs b/src/tools/miri/tests/genmc/pass/litmus/cowr.rs
new file mode 100644
index 0000000000000..1c51f23a09c66
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/cowr.rs
@@ -0,0 +1,40 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "CoWR" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        let mut a = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Release);
+                a = X.load(Acquire);
+            }),
+            spawn_pthread_closure(|| {
+                X.store(2, Release);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values (the load cannot read `0`):
+        if !matches!(a, 1 | 2) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/cowr.stderr b/src/tools/miri/tests/genmc/pass/litmus/cowr.stderr
new file mode 100644
index 0000000000000..485142e945a72
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/cowr.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 3 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/cumul-release.rs b/src/tools/miri/tests/genmc/pass/litmus/cumul-release.rs
new file mode 100644
index 0000000000000..2387976a8ca61
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/cumul-release.rs
@@ -0,0 +1,58 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's test "litmus/cumul-release".
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+static Z: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+    Z.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let mut c = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Relaxed);
+                Y.store(1, Release);
+            }),
+            spawn_pthread_closure(|| {
+                a = Y.load(Relaxed);
+                Z.store(a, Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                b = Z.load(Relaxed);
+                std::sync::atomic::fence(AcqRel);
+                c = X.load(Relaxed);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        if !matches!(
+            (a, b, c),
+            (0, 0, 0) | (0, 0, 1) | (1, 0, 0) | (1, 0, 1) | (1, 1, 0) | (1, 1, 1)
+        ) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/cumul-release.stderr b/src/tools/miri/tests/genmc/pass/litmus/cumul-release.stderr
new file mode 100644
index 0000000000000..a031dfc8977a5
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/cumul-release.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 8 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/default.rs b/src/tools/miri/tests/genmc/pass/litmus/default.rs
new file mode 100644
index 0000000000000..55fb1ac34acb4
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/default.rs
@@ -0,0 +1,47 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/default" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+
+    unsafe {
+        let mut a = 1234;
+        let mut b = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                a = X.load(Acquire);
+                b = X.load(Acquire);
+            }),
+            spawn_pthread_closure(|| {
+                X.store(1, Release);
+            }),
+            spawn_pthread_closure(|| {
+                X.store(2, Release);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        if !matches!((a, b), (0, 0) | (0, 1) | (0, 2) | (1, 1) | (1, 2) | (2, 1) | (2, 2)) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/default.stderr b/src/tools/miri/tests/genmc/pass/litmus/default.stderr
new file mode 100644
index 0000000000000..87d38d250411a
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/default.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 12 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/detour.join.stderr b/src/tools/miri/tests/genmc/pass/litmus/detour.join.stderr
new file mode 100644
index 0000000000000..5006f11fefb1c
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/detour.join.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 9 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/detour.no_join.stderr b/src/tools/miri/tests/genmc/pass/litmus/detour.no_join.stderr
new file mode 100644
index 0000000000000..5006f11fefb1c
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/detour.no_join.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 9 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/detour.rs b/src/tools/miri/tests/genmc/pass/litmus/detour.rs
new file mode 100644
index 0000000000000..7136c029bbb54
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/detour.rs
@@ -0,0 +1,68 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+//@revisions: join no_join
+
+// Translated from GenMC's "litmus/detour" test.
+
+// This test has two revisitions to test whether we get the same result
+// independent of whether we join the spawned threads or not.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicI64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicI64 = AtomicI64::new(0);
+static Y: AtomicI64 = AtomicI64::new(0);
+static Z: AtomicI64 = AtomicI64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove these initializing writes once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+    Y.store(0, Relaxed);
+    Z.store(0, Relaxed);
+
+    unsafe {
+        // Make these static so we can exit the main thread while the other threads still run.
+        // If these are `let mut` like the other tests, this will cause a use-after-free bug.
+        static mut A: i64 = 1234;
+        static mut B: i64 = 1234;
+        static mut C: i64 = 1234;
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                A = Z.load(Relaxed);
+                X.store(A.wrapping_sub(1), Relaxed);
+                B = X.load(Relaxed);
+                Y.store(B, Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                C = Y.load(Relaxed);
+                Z.store(C, Relaxed);
+            }),
+        ];
+
+        // The `no_join` revision doesn't join any of the running threads to test that
+        // we still explore the same number of executions in that case.
+        if cfg!(no_join) {
+            return 0;
+        }
+
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        if !matches!((A, B, C), (0, 1, 0) | (0, -1, 0) | (0, 1, 1) | (0, -1, -1)) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/fr_w_w_w_reads.rs b/src/tools/miri/tests/genmc/pass/litmus/fr_w_w_w_reads.rs
new file mode 100644
index 0000000000000..796ffbf97f96b
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/fr_w_w_w_reads.rs
@@ -0,0 +1,53 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "fr+w+w+w+reads" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+
+    unsafe {
+        let mut result = [1234; 4];
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.store(1, Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                X.store(2, Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                X.store(3, Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                result[0] = X.load(Relaxed);
+                result[1] = X.load(Relaxed);
+                result[2] = X.load(Relaxed);
+                result[3] = X.load(Relaxed);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        for val in result {
+            if !matches!(val, 0..=3) {
+                std::process::abort();
+            }
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/fr_w_w_w_reads.stderr b/src/tools/miri/tests/genmc/pass/litmus/fr_w_w_w_reads.stderr
new file mode 100644
index 0000000000000..c09b50e282f1e
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/fr_w_w_w_reads.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 210 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/inc2w.rs b/src/tools/miri/tests/genmc/pass/litmus/inc2w.rs
new file mode 100644
index 0000000000000..fd8574c4f7c78
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/inc2w.rs
@@ -0,0 +1,45 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's test "litmus/inc2w".
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    // FIXME(genmc,HACK): remove this initializing write once Miri-GenMC supports mixed atomic-non-atomic accesses.
+    X.store(0, Relaxed);
+
+    unsafe {
+        let ids = [
+            spawn_pthread_closure(|| {
+                X.fetch_add(1, Relaxed);
+            }),
+            spawn_pthread_closure(|| {
+                X.store(4, Release);
+            }),
+            spawn_pthread_closure(|| {
+                X.fetch_add(2, Relaxed);
+            }),
+        ];
+        // Join so we can read the final values.
+        join_pthreads(ids);
+
+        // Check that we don't get any unexpected values:
+        let x = X.load(Relaxed);
+        if !matches!(x, 4..=7) {
+            std::process::abort();
+        }
+
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/inc2w.stderr b/src/tools/miri/tests/genmc/pass/litmus/inc2w.stderr
new file mode 100644
index 0000000000000..b9bdf2245aed6
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/inc2w.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 6 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/inc_inc_RR_W_RR.rs b/src/tools/miri/tests/genmc/pass/litmus/inc_inc_RR_W_RR.rs
new file mode 100644
index 0000000000000..40ca486318598
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/inc_inc_RR_W_RR.rs
@@ -0,0 +1,63 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::ffi::c_void;
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+
+static mut A: u64 = 0;
+static mut B: u64 = 0;
+static mut C: u64 = 0;
+static mut D: u64 = 0;
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    let thread_order = [thread_1, thread_2, thread_3, thread_4, thread_5];
+    let ids = unsafe { spawn_pthreads_no_params(thread_order) };
+    unsafe { join_pthreads(ids) };
+
+    if unsafe { A == 42 && B == 2 && C == 1 && D == 42 } {
+        std::process::abort();
+    }
+
+    0
+}
+
+pub extern "C" fn thread_1(_value: *mut c_void) -> *mut c_void {
+    X.fetch_add(1, Relaxed);
+    std::ptr::null_mut()
+}
+
+pub extern "C" fn thread_2(_value: *mut c_void) -> *mut c_void {
+    X.fetch_add(1, Relaxed);
+    std::ptr::null_mut()
+}
+
+pub extern "C" fn thread_3(_value: *mut c_void) -> *mut c_void {
+    unsafe {
+        A = X.load(Relaxed);
+        B = X.load(Relaxed);
+    }
+    std::ptr::null_mut()
+}
+
+pub extern "C" fn thread_4(_value: *mut c_void) -> *mut c_void {
+    X.store(42, Relaxed);
+    std::ptr::null_mut()
+}
+
+pub extern "C" fn thread_5(_value: *mut c_void) -> *mut c_void {
+    unsafe {
+        C = X.load(Relaxed);
+        D = X.load(Relaxed);
+    }
+    std::ptr::null_mut()
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/inc_inc_RR_W_RR.stderr b/src/tools/miri/tests/genmc/pass/litmus/inc_inc_RR_W_RR.stderr
new file mode 100644
index 0000000000000..770fb7ef88046
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/inc_inc_RR_W_RR.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 600 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/riwi.rs b/src/tools/miri/tests/genmc/pass/litmus/riwi.rs
new file mode 100644
index 0000000000000..49564c8e4fe0a
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/riwi.rs
@@ -0,0 +1,31 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows
+
+// Translated from GenMC's "litmus/riwi" test.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static X: AtomicU64 = AtomicU64::new(0);
+static Y: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        spawn_pthread_closure(|| {
+            Y.load(Relaxed);
+            X.fetch_add(1, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            X.fetch_add(1, Relaxed);
+            Y.store(1, Release);
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/riwi.stderr b/src/tools/miri/tests/genmc/pass/litmus/riwi.stderr
new file mode 100644
index 0000000000000..485142e945a72
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/riwi.stderr
@@ -0,0 +1,2 @@
+Running GenMC Verification...
+Verification complete with 3 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/litmus/viktor-relseq.rs b/src/tools/miri/tests/genmc/pass/litmus/viktor-relseq.rs
new file mode 100644
index 0000000000000..3256c9f421193
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/viktor-relseq.rs
@@ -0,0 +1,38 @@
+//@compile-flags: -Zmiri-genmc -Zmiri-disable-stacked-borrows -Zmiri-genmc-estimate
+
+// Translated from GenMC's "litmus/viktor-relseq" test.
+//
+// This test also checks that we can run the GenMC estimation mode.
+
+#![no_main]
+
+#[path = "../../../utils/genmc.rs"]
+mod genmc;
+
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering::*;
+
+use crate::genmc::*;
+
+static LOCK: AtomicU64 = AtomicU64::new(0);
+
+#[unsafe(no_mangle)]
+fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
+    unsafe {
+        spawn_pthread_closure(|| {
+            LOCK.fetch_add(1, Acquire);
+            LOCK.fetch_add(1, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            LOCK.fetch_add(1, Relaxed);
+            LOCK.fetch_add(1, Relaxed);
+        });
+        spawn_pthread_closure(|| {
+            LOCK.fetch_add(1, Release);
+        });
+        spawn_pthread_closure(|| {
+            LOCK.fetch_add(1, Relaxed);
+        });
+        0
+    }
+}
diff --git a/src/tools/miri/tests/genmc/pass/litmus/viktor-relseq.stderr b/src/tools/miri/tests/genmc/pass/litmus/viktor-relseq.stderr
new file mode 100644
index 0000000000000..c53d5c569b9ca
--- /dev/null
+++ b/src/tools/miri/tests/genmc/pass/litmus/viktor-relseq.stderr
@@ -0,0 +1,4 @@
+Estimating GenMC verification time...
+Expected verification time: [MEAN] ± [SD]
+Running GenMC Verification...
+Verification complete with 180 executions. No errors found.
diff --git a/src/tools/miri/tests/genmc/pass/test_cxx_build.rs b/src/tools/miri/tests/genmc/pass/test_cxx_build.rs
deleted file mode 100644
index f621bd9114fa4..0000000000000
--- a/src/tools/miri/tests/genmc/pass/test_cxx_build.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-//@compile-flags: -Zmiri-genmc
-
-#![no_main]
-
-#[unsafe(no_mangle)]
-fn miri_start(_argc: isize, _argv: *const *const u8) -> isize {
-    0
-}
diff --git a/src/tools/miri/tests/genmc/pass/test_cxx_build.stderr b/src/tools/miri/tests/genmc/pass/test_cxx_build.stderr
deleted file mode 100644
index 4b7aa824bd122..0000000000000
--- a/src/tools/miri/tests/genmc/pass/test_cxx_build.stderr
+++ /dev/null
@@ -1,5 +0,0 @@
-warning: borrow tracking has been disabled, it is not (yet) supported in GenMC mode.
-C++: GenMC handle created!
-Miri: GenMC handle creation successful!
-C++: GenMC handle destroyed!
-Miri: Dropping GenMC handle successful!
diff --git a/src/tools/miri/tests/pass/0weak_memory_consistency.rs b/src/tools/miri/tests/pass/0weak_memory/consistency.rs
similarity index 88%
rename from src/tools/miri/tests/pass/0weak_memory_consistency.rs
rename to src/tools/miri/tests/pass/0weak_memory/consistency.rs
index e4ed9675de822..16a38ebd9d4da 100644
--- a/src/tools/miri/tests/pass/0weak_memory_consistency.rs
+++ b/src/tools/miri/tests/pass/0weak_memory/consistency.rs
@@ -1,6 +1,7 @@
-//@compile-flags: -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows -Zmiri-disable-validation -Zmiri-provenance-gc=10000
+//@compile-flags: -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows -Zmiri-disable-validation
 // This test's runtime explodes if the GC interval is set to 1 (which we do in CI), so we
 // override it internally back to the default frequency.
+//@compile-flags: -Zmiri-provenance-gc=10000
 
 // The following tests check whether our weak memory emulation produces
 // any inconsistent execution outcomes
@@ -14,13 +15,6 @@
 // To mitigate this, each test is ran enough times such that the chance
 // of spurious success is very low. These tests never spuriously fail.
 
-// Test cases and their consistent outcomes are from
-// http://svr-pes20-cppmem.cl.cam.ac.uk/cppmem/
-// Based on
-// M. Batty, S. Owens, S. Sarkar, P. Sewell and T. Weber,
-// "Mathematizing C++ concurrency", ACM SIGPLAN Notices, vol. 46, no. 1, pp. 55-66, 2011.
-// Available: https://ss265.host.cs.st-andrews.ac.uk/papers/n3132.pdf.
-
 use std::sync::atomic::Ordering::*;
 use std::sync::atomic::{AtomicBool, AtomicI32, Ordering, fence};
 use std::thread::spawn;
@@ -56,6 +50,8 @@ fn spin_until_bool(loc: &AtomicBool, ord: Ordering, val: bool) -> bool {
     val
 }
 
+/// Test matching https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf, Figure 7.
+/// (The Figure 8 test is in `weak.rs`.)
 fn test_corr() {
     let x = static_atomic(0);
     let y = static_atomic(0);
@@ -75,19 +71,25 @@ fn test_corr() {
     let j3 = spawn(move || { //                      |                    |
         spin_until_i32(&y, Acquire, 1); // <---------+                    |
         x.load(Relaxed) // <----------------------------------------------+
-        // The two reads on x are ordered by hb, so they cannot observe values
-        // differently from the modification order. If the first read observed
-        // 2, then the second read must observe 2 as well.
     });
 
     j1.join().unwrap();
     let r2 = j2.join().unwrap();
     let r3 = j3.join().unwrap();
+    // The two reads on x are ordered by hb, so they cannot observe values
+    // differently from the modification order. If the first read observed
+    // 2, then the second read must observe 2 as well.
     if r2 == 2 {
         assert_eq!(r3, 2);
     }
 }
 
+/// This test case is from:
+/// http://svr-pes20-cppmem.cl.cam.ac.uk/cppmem/, "WRC"
+/// Based on
+/// M. Batty, S. Owens, S. Sarkar, P. Sewell and T. Weber,
+/// "Mathematizing C++ concurrency", ACM SIGPLAN Notices, vol. 46, no. 1, pp. 55-66, 2011.
+/// Available: https://www.cl.cam.ac.uk/~pes20/cpp/popl085ap-sewell.pdf.
 fn test_wrc() {
     let x = static_atomic(0);
     let y = static_atomic(0);
@@ -114,6 +116,8 @@ fn test_wrc() {
     assert_eq!(r3, 1);
 }
 
+/// Another test from http://svr-pes20-cppmem.cl.cam.ac.uk/cppmem/:
+/// MP (na_rel+acq_na)
 fn test_message_passing() {
     let mut var = 0u32;
     let ptr = &mut var as *mut u32;
@@ -139,7 +143,8 @@ fn test_message_passing() {
     assert_eq!(r2, 1);
 }
 
-// LB+acq_rel+acq_rel
+/// Another test from http://svr-pes20-cppmem.cl.cam.ac.uk/cppmem/:
+/// LB+acq_rel+acq_rel
 fn test_load_buffering_acq_rel() {
     let x = static_atomic(0);
     let y = static_atomic(0);
diff --git a/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs b/src/tools/miri/tests/pass/0weak_memory/consistency_sc.rs
similarity index 99%
rename from src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs
rename to src/tools/miri/tests/pass/0weak_memory/consistency_sc.rs
index 937c2a8cf282c..cb8535b8ad74b 100644
--- a/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs
+++ b/src/tools/miri/tests/pass/0weak_memory/consistency_sc.rs
@@ -1,6 +1,7 @@
-//@compile-flags: -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows -Zmiri-disable-validation -Zmiri-provenance-gc=10000
+//@compile-flags: -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows -Zmiri-disable-validation
 // This test's runtime explodes if the GC interval is set to 1 (which we do in CI), so we
 // override it internally back to the default frequency.
+//@compile-flags: -Zmiri-provenance-gc=10000
 
 // The following tests check whether our weak memory emulation produces
 // any inconsistent execution outcomes
@@ -348,7 +349,7 @@ fn test_sc_relaxed() {
 }
 
 pub fn main() {
-    for _ in 0..50 {
+    for _ in 0..32 {
         test_sc_store_buffering();
         test_iriw_sc_rlx();
         test_cpp20_sc_fence_fix();
diff --git a/src/tools/miri/tests/pass/weak_memory/extra_cpp.rs b/src/tools/miri/tests/pass/0weak_memory/extra_cpp.rs
similarity index 100%
rename from src/tools/miri/tests/pass/weak_memory/extra_cpp.rs
rename to src/tools/miri/tests/pass/0weak_memory/extra_cpp.rs
diff --git a/src/tools/miri/tests/pass/0weak_memory/weak.rs b/src/tools/miri/tests/pass/0weak_memory/weak.rs
new file mode 100644
index 0000000000000..c752fc114ba57
--- /dev/null
+++ b/src/tools/miri/tests/pass/0weak_memory/weak.rs
@@ -0,0 +1,263 @@
+//@compile-flags: -Zmiri-ignore-leaks -Zmiri-fixed-schedule
+// This test's runtime explodes if the GC interval is set to 1 (which we do in CI), so we
+// override it internally back to the default frequency.
+//@compile-flags: -Zmiri-provenance-gc=10000
+
+// Tests showing weak memory behaviours are exhibited, even with a fixed scheule.
+// We run all tests a number of times and then check that we see the desired list of outcomes.
+
+// Spurious failure is possible, if you are really unlucky with
+// the RNG and always read the latest value from the store buffer.
+
+use std::sync::atomic::Ordering::*;
+use std::sync::atomic::{AtomicUsize, fence};
+use std::thread::spawn;
+
+#[allow(dead_code)]
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+// We can't create static items because we need to run each test multiple times.
+fn static_atomic(val: usize) -> &'static AtomicUsize {
+    Box::leak(Box::new(AtomicUsize::new(val)))
+}
+
+// Spins until it reads the given value
+fn spin_until(loc: &AtomicUsize, val: usize) -> usize {
+    while loc.load(Relaxed) != val {
+        std::hint::spin_loop();
+    }
+    val
+}
+
+/// Check that the function produces the intended set of outcomes.
+#[track_caller]
+fn check_all_outcomes<T: Eq + std::hash::Hash + std::fmt::Debug>(
+    expected: impl IntoIterator<Item = T>,
+    generate: impl Fn() -> T,
+) {
+    use std::collections::HashSet;
+
+    let expected: HashSet<T> = HashSet::from_iter(expected);
+    let mut seen = HashSet::new();
+    // Let's give it N times as many tries as we are expecting values.
+    let tries = expected.len() * 16;
+    for i in 0..tries {
+        let val = generate();
+        assert!(expected.contains(&val), "got an unexpected value: {val:?}");
+        seen.insert(val);
+        if i > tries / 2 && expected.len() == seen.len() {
+            // We saw everything and we did quite a few tries, let's avoid wasting time.
+            return;
+        }
+    }
+    // Let's see if we saw them all.
+    for val in expected {
+        if !seen.contains(&val) {
+            panic!("did not get value that should be possible: {val:?}");
+        }
+    }
+}
+
+fn relaxed() {
+    check_all_outcomes([0, 1, 2], || {
+        let x = static_atomic(0);
+        let j1 = spawn(move || {
+            x.store(1, Relaxed);
+            // Preemption is disabled, so the store above will never be the
+            // latest store visible to another thread.
+            x.store(2, Relaxed);
+        });
+
+        let j2 = spawn(move || x.load(Relaxed));
+
+        j1.join().unwrap();
+        let r2 = j2.join().unwrap();
+
+        // There are three possible values here: 0 (from the initial read), 1 (from the first relaxed
+        // read), and 2 (the last read).
+        r2
+    });
+}
+
+// https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf Figure 8
+fn seq_cst() {
+    check_all_outcomes([1, 3], || {
+        let x = static_atomic(0);
+
+        let j1 = spawn(move || {
+            x.store(1, Relaxed);
+        });
+
+        let j2 = spawn(move || {
+            x.store(2, SeqCst);
+            x.store(3, SeqCst);
+        });
+
+        let j3 = spawn(move || x.load(SeqCst));
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+        let r3 = j3.join().unwrap();
+
+        // Even though we force t3 to run last, it can still see the value 1.
+        // And it can *never* see the value 2!
+        r3
+    });
+}
+
+fn initialization_write(add_fence: bool) {
+    check_all_outcomes([11, 22], || {
+        let x = static_atomic(11);
+
+        let wait = static_atomic(0);
+
+        let j1 = spawn(move || {
+            x.store(22, Relaxed);
+            // Relaxed is intentional. We want to test if the thread 2 reads the initialisation write
+            // after a relaxed write
+            wait.store(1, Relaxed);
+        });
+
+        let j2 = spawn(move || {
+            spin_until(wait, 1);
+            if add_fence {
+                fence(AcqRel);
+            }
+            x.load(Relaxed)
+        });
+
+        j1.join().unwrap();
+        let r2 = j2.join().unwrap();
+
+        r2
+    });
+}
+
+fn faa_replaced_by_load() {
+    check_all_outcomes([true, false], || {
+        // Example from https://github.com/llvm/llvm-project/issues/56450#issuecomment-1183695905
+        pub fn rdmw(storing: &AtomicUsize, sync: &AtomicUsize, loading: &AtomicUsize) -> usize {
+            storing.store(1, Relaxed);
+            fence(Release);
+            // sync.fetch_add(0, Relaxed);
+            sync.load(Relaxed);
+            fence(Acquire);
+            loading.load(Relaxed)
+        }
+
+        let x = static_atomic(0);
+        let y = static_atomic(0);
+        let z = static_atomic(0);
+
+        let t1 = spawn(move || rdmw(y, x, z));
+
+        let t2 = spawn(move || rdmw(z, x, y));
+
+        let a = t1.join().unwrap();
+        let b = t2.join().unwrap();
+        (a, b) == (0, 0)
+    });
+}
+
+/// Checking that the weaker release sequence example from
+/// <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0982r0.html> can actually produce the
+/// new behavior (`Some(0)` in our version).
+fn weaker_release_sequences() {
+    check_all_outcomes([None, Some(0), Some(1)], || {
+        let x = static_atomic(0);
+        let y = static_atomic(0);
+
+        let t1 = spawn(move || {
+            x.store(2, Relaxed);
+        });
+        let t2 = spawn(move || {
+            y.store(1, Relaxed);
+            x.store(1, Release);
+            x.store(3, Relaxed);
+        });
+        let t3 = spawn(move || {
+            if x.load(Acquire) == 3 {
+                // In C++11, if we read the 3 here, and if the store of 1 was just before the store
+                // of 3 in mo order (which it is because we fix the schedule), this forms a release
+                // sequence, meaning we acquire the release store of 1, and we can thus never see
+                // the value 0.
+                // In C++20, this is no longer a release sequence, so 0 can now be observed.
+                Some(y.load(Relaxed))
+            } else {
+                None
+            }
+        });
+
+        t1.join().unwrap();
+        t2.join().unwrap();
+        t3.join().unwrap()
+    });
+}
+
+/// Ensuring normal release sequences (with RMWs) still work correctly.
+fn release_sequence() {
+    check_all_outcomes([None, Some(1)], || {
+        let x = static_atomic(0);
+        let y = static_atomic(0);
+
+        let t1 = spawn(move || {
+            y.store(1, Relaxed);
+            x.store(1, Release);
+            x.swap(3, Relaxed);
+        });
+        let t2 = spawn(move || {
+            if x.load(Acquire) == 3 {
+                // If we read 3 here, we are seeing the result of the `x.swap` above, which was
+                // relaxed but forms a release sequence with the `x.store`. This means there is a
+                // release sequence, so we acquire the `y.store` and cannot see the original value
+                // `0` any more.
+                Some(y.load(Relaxed))
+            } else {
+                None
+            }
+        });
+
+        t1.join().unwrap();
+        t2.join().unwrap()
+    });
+}
+
+/// Ensure that when we read from an outdated release store, we acquire its clock.
+fn old_release_store() {
+    check_all_outcomes([None, Some(1)], || {
+        let x = static_atomic(0);
+        let y = static_atomic(0);
+
+        let t1 = spawn(move || {
+            y.store(1, Relaxed);
+            x.store(1, Release); // this is what we want to read from
+            x.store(3, Relaxed);
+        });
+        let t2 = spawn(move || {
+            if x.load(Acquire) == 1 {
+                // We must have acquired the `y.store` so we cannot see the initial value any more.
+                Some(y.load(Relaxed))
+            } else {
+                None
+            }
+        });
+
+        t1.join().unwrap();
+        t2.join().unwrap()
+    });
+}
+
+pub fn main() {
+    relaxed();
+    seq_cst();
+    initialization_write(false);
+    initialization_write(true);
+    faa_replaced_by_load();
+    release_sequence();
+    weaker_release_sequences();
+    old_release_store();
+}
diff --git a/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs b/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs
index 82976326a8df3..115e232dde4c2 100644
--- a/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs
+++ b/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs
@@ -1,6 +1,7 @@
 //@revisions: stack tree
 //@[tree]compile-flags: -Zmiri-tree-borrows
 #![feature(allocator_api)]
+use std::alloc::{Layout, alloc, dealloc};
 use std::cell::Cell;
 use std::ptr;
 
@@ -305,5 +306,14 @@ fn zst() {
         let ptr = &raw mut *b as *mut ();
         drop(b);
         let _ref = &mut *ptr;
+
+        // zero-sized protectors do not affect deallocation
+        fn with_protector(_x: &mut (), ptr: *mut u8, l: Layout) {
+            // `_x` here is strongly protected but covers zero bytes.
+            unsafe { dealloc(ptr, l) };
+        }
+        let l = Layout::from_size_align(1, 1).unwrap();
+        let ptr = alloc(l);
+        with_protector(&mut *ptr.cast::<()>(), ptr, l);
     }
 }
diff --git a/src/tools/miri/tests/pass/float.rs b/src/tools/miri/tests/pass/float.rs
index 9f1b3f612b2d2..3ce5ea8356bd5 100644
--- a/src/tools/miri/tests/pass/float.rs
+++ b/src/tools/miri/tests/pass/float.rs
@@ -281,6 +281,35 @@ fn basic() {
     assert_eq!(34.2f64.abs(), 34.2f64);
     assert_eq!((-1.0f128).abs(), 1.0f128);
     assert_eq!(34.2f128.abs(), 34.2f128);
+
+    assert_eq!(64_f16.sqrt(), 8_f16);
+    assert_eq!(64_f32.sqrt(), 8_f32);
+    assert_eq!(64_f64.sqrt(), 8_f64);
+    assert_eq!(64_f128.sqrt(), 8_f128);
+    assert_eq!(f16::INFINITY.sqrt(), f16::INFINITY);
+    assert_eq!(f32::INFINITY.sqrt(), f32::INFINITY);
+    assert_eq!(f64::INFINITY.sqrt(), f64::INFINITY);
+    assert_eq!(f128::INFINITY.sqrt(), f128::INFINITY);
+    assert_eq!(0.0_f16.sqrt().total_cmp(&0.0), std::cmp::Ordering::Equal);
+    assert_eq!(0.0_f32.sqrt().total_cmp(&0.0), std::cmp::Ordering::Equal);
+    assert_eq!(0.0_f64.sqrt().total_cmp(&0.0), std::cmp::Ordering::Equal);
+    assert_eq!(0.0_f128.sqrt().total_cmp(&0.0), std::cmp::Ordering::Equal);
+    assert_eq!((-0.0_f16).sqrt().total_cmp(&-0.0), std::cmp::Ordering::Equal);
+    assert_eq!((-0.0_f32).sqrt().total_cmp(&-0.0), std::cmp::Ordering::Equal);
+    assert_eq!((-0.0_f64).sqrt().total_cmp(&-0.0), std::cmp::Ordering::Equal);
+    assert_eq!((-0.0_f128).sqrt().total_cmp(&-0.0), std::cmp::Ordering::Equal);
+    assert!((-5.0_f16).sqrt().is_nan());
+    assert!((-5.0_f32).sqrt().is_nan());
+    assert!((-5.0_f64).sqrt().is_nan());
+    assert!((-5.0_f128).sqrt().is_nan());
+    assert!(f16::NEG_INFINITY.sqrt().is_nan());
+    assert!(f32::NEG_INFINITY.sqrt().is_nan());
+    assert!(f64::NEG_INFINITY.sqrt().is_nan());
+    assert!(f128::NEG_INFINITY.sqrt().is_nan());
+    assert!(f16::NAN.sqrt().is_nan());
+    assert!(f32::NAN.sqrt().is_nan());
+    assert!(f64::NAN.sqrt().is_nan());
+    assert!(f128::NAN.sqrt().is_nan());
 }
 
 /// Test casts from floats to ints and back
@@ -1012,21 +1041,6 @@ pub fn libm() {
         unsafe { ldexp(a, b) }
     }
 
-    assert_eq!(64_f32.sqrt(), 8_f32);
-    assert_eq!(64_f64.sqrt(), 8_f64);
-    assert_eq!(f32::INFINITY.sqrt(), f32::INFINITY);
-    assert_eq!(f64::INFINITY.sqrt(), f64::INFINITY);
-    assert_eq!(0.0_f32.sqrt().total_cmp(&0.0), std::cmp::Ordering::Equal);
-    assert_eq!(0.0_f64.sqrt().total_cmp(&0.0), std::cmp::Ordering::Equal);
-    assert_eq!((-0.0_f32).sqrt().total_cmp(&-0.0), std::cmp::Ordering::Equal);
-    assert_eq!((-0.0_f64).sqrt().total_cmp(&-0.0), std::cmp::Ordering::Equal);
-    assert!((-5.0_f32).sqrt().is_nan());
-    assert!((-5.0_f64).sqrt().is_nan());
-    assert!(f32::NEG_INFINITY.sqrt().is_nan());
-    assert!(f64::NEG_INFINITY.sqrt().is_nan());
-    assert!(f32::NAN.sqrt().is_nan());
-    assert!(f64::NAN.sqrt().is_nan());
-
     assert_approx_eq!(25f32.powi(-2), 0.0016f32);
     assert_approx_eq!(23.2f64.powi(2), 538.24f64);
 
diff --git a/src/tools/miri/tests/pass/float_nan.rs b/src/tools/miri/tests/pass/float_nan.rs
index 3ffdb6868ac0a..902816307403a 100644
--- a/src/tools/miri/tests/pass/float_nan.rs
+++ b/src/tools/miri/tests/pass/float_nan.rs
@@ -1,7 +1,8 @@
+// This test's runtime explodes if the GC interval is set to 1 (which we do in CI), so we
+// override it internally back to the default frequency.
+//@compile-flags: -Zmiri-provenance-gc=10000
 #![feature(float_gamma, portable_simd, core_intrinsics)]
-use std::collections::HashSet;
 use std::fmt;
-use std::hash::Hash;
 use std::hint::black_box;
 
 fn ldexp(a: f64, b: i32) -> f64 {
@@ -25,15 +26,26 @@ enum NaNKind {
 }
 use NaNKind::*;
 
+/// Check that the function produces the intended set of outcomes.
 #[track_caller]
-fn check_all_outcomes<T: Eq + Hash + fmt::Display>(expected: HashSet<T>, generate: impl Fn() -> T) {
+fn check_all_outcomes<T: Eq + std::hash::Hash + fmt::Display>(
+    expected: impl IntoIterator<Item = T>,
+    generate: impl Fn() -> T,
+) {
+    use std::collections::HashSet;
+
+    let expected: HashSet<T> = HashSet::from_iter(expected);
     let mut seen = HashSet::new();
-    // Let's give it sixteen times as many tries as we are expecting values.
-    let tries = expected.len() * 16;
-    for _ in 0..tries {
+    // Let's give it N times as many tries as we are expecting values.
+    let tries = expected.len() * 12;
+    for i in 0..tries {
         let val = generate();
         assert!(expected.contains(&val), "got an unexpected value: {val}");
         seen.insert(val);
+        if i > tries / 2 && expected.len() == seen.len() {
+            // We saw everything and we did quite a few tries, let's avoid wasting time.
+            return;
+        }
     }
     // Let's see if we saw them all.
     for val in expected {
@@ -193,51 +205,50 @@ impl F64 {
 
 fn test_f32() {
     // Freshly generated NaNs can have either sign.
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(0.0 / black_box(0.0)),
-    );
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(0.0 / black_box(0.0))
+    });
     // When there are NaN inputs, their payload can be propagated, with any sign.
     let all1_payload = u32_ones(22);
     let all1 = F32::nan(Pos, Quiet, all1_payload).as_f32();
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F32::nan(Pos, Quiet, 0),
             F32::nan(Neg, Quiet, 0),
             F32::nan(Pos, Quiet, all1_payload),
             F32::nan(Neg, Quiet, all1_payload),
-        ]),
+        ],
         || F32::from(0.0 + all1),
     );
     // When there are two NaN inputs, the output can be either one, or the preferred NaN.
     let just1 = F32::nan(Neg, Quiet, 1).as_f32();
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F32::nan(Pos, Quiet, 0),
             F32::nan(Neg, Quiet, 0),
             F32::nan(Pos, Quiet, 1),
             F32::nan(Neg, Quiet, 1),
             F32::nan(Pos, Quiet, all1_payload),
             F32::nan(Neg, Quiet, all1_payload),
-        ]),
+        ],
         || F32::from(just1 - all1),
     );
     // When there are *signaling* NaN inputs, they might be quieted or not.
     let all1_snan = F32::nan(Pos, Signaling, all1_payload).as_f32();
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F32::nan(Pos, Quiet, 0),
             F32::nan(Neg, Quiet, 0),
             F32::nan(Pos, Quiet, all1_payload),
             F32::nan(Neg, Quiet, all1_payload),
             F32::nan(Pos, Signaling, all1_payload),
             F32::nan(Neg, Signaling, all1_payload),
-        ]),
+        ],
         || F32::from(0.0 * all1_snan),
     );
     // Mix signaling and non-signaling NaN.
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F32::nan(Pos, Quiet, 0),
             F32::nan(Neg, Quiet, 0),
             F32::nan(Pos, Quiet, 1),
@@ -246,35 +257,26 @@ fn test_f32() {
             F32::nan(Neg, Quiet, all1_payload),
             F32::nan(Pos, Signaling, all1_payload),
             F32::nan(Neg, Signaling, all1_payload),
-        ]),
+        ],
         || F32::from(just1 % all1_snan),
     );
 
     // Unary `-` must preserve payloads exactly.
-    check_all_outcomes(HashSet::from_iter([F32::nan(Neg, Quiet, all1_payload)]), || {
-        F32::from(-all1)
-    });
-    check_all_outcomes(HashSet::from_iter([F32::nan(Neg, Signaling, all1_payload)]), || {
-        F32::from(-all1_snan)
-    });
+    check_all_outcomes([F32::nan(Neg, Quiet, all1_payload)], || F32::from(-all1));
+    check_all_outcomes([F32::nan(Neg, Signaling, all1_payload)], || F32::from(-all1_snan));
 
     // Intrinsics
     let nan = F32::nan(Neg, Quiet, 0).as_f32();
     let snan = F32::nan(Neg, Signaling, 1).as_f32();
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(f32::min(nan, nan))
+    });
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(nan.floor())
+    });
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || F32::from(nan.sin()));
     check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(f32::min(nan, nan)),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(nan.floor()),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(nan.sin()),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([
+        [
             F32::nan(Pos, Quiet, 0),
             F32::nan(Neg, Quiet, 0),
             F32::nan(Pos, Quiet, 1),
@@ -285,37 +287,32 @@ fn test_f32() {
             F32::nan(Neg, Quiet, all1_payload),
             F32::nan(Pos, Signaling, all1_payload),
             F32::nan(Neg, Signaling, all1_payload),
-        ]),
+        ],
         || F32::from(just1.mul_add(F32::nan(Neg, Quiet, 2).as_f32(), all1_snan)),
     );
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(nan.powf(nan))
+    });
     check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(nan.powf(nan)),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([1.0f32.into()]),
+        [1.0f32.into()],
         || F32::from(1.0f32.powf(nan)), // special `pow` rule
     );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(nan.powi(1)),
-    );
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(nan.powi(1))
+    });
 
     // libm functions
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(nan.sinh())
+    });
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(nan.atan2(nan))
+    });
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(nan.ln_gamma().0)
+    });
     check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(nan.sinh()),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(nan.atan2(nan)),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(nan.ln_gamma().0),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([
+        [
             F32::from(1.0),
             F32::nan(Pos, Quiet, 0),
             F32::nan(Neg, Quiet, 0),
@@ -323,58 +320,57 @@ fn test_f32() {
             F32::nan(Neg, Quiet, 1),
             F32::nan(Pos, Signaling, 1),
             F32::nan(Neg, Signaling, 1),
-        ]),
+        ],
         || F32::from(snan.powf(0.0)),
     );
 }
 
 fn test_f64() {
     // Freshly generated NaNs can have either sign.
-    check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(0.0 / black_box(0.0)),
-    );
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || {
+        F64::from(0.0 / black_box(0.0))
+    });
     // When there are NaN inputs, their payload can be propagated, with any sign.
     let all1_payload = u64_ones(51);
     let all1 = F64::nan(Pos, Quiet, all1_payload).as_f64();
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F64::nan(Pos, Quiet, 0),
             F64::nan(Neg, Quiet, 0),
             F64::nan(Pos, Quiet, all1_payload),
             F64::nan(Neg, Quiet, all1_payload),
-        ]),
+        ],
         || F64::from(0.0 + all1),
     );
     // When there are two NaN inputs, the output can be either one, or the preferred NaN.
     let just1 = F64::nan(Neg, Quiet, 1).as_f64();
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F64::nan(Pos, Quiet, 0),
             F64::nan(Neg, Quiet, 0),
             F64::nan(Pos, Quiet, 1),
             F64::nan(Neg, Quiet, 1),
             F64::nan(Pos, Quiet, all1_payload),
             F64::nan(Neg, Quiet, all1_payload),
-        ]),
+        ],
         || F64::from(just1 - all1),
     );
     // When there are *signaling* NaN inputs, they might be quieted or not.
     let all1_snan = F64::nan(Pos, Signaling, all1_payload).as_f64();
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F64::nan(Pos, Quiet, 0),
             F64::nan(Neg, Quiet, 0),
             F64::nan(Pos, Quiet, all1_payload),
             F64::nan(Neg, Quiet, all1_payload),
             F64::nan(Pos, Signaling, all1_payload),
             F64::nan(Neg, Signaling, all1_payload),
-        ]),
+        ],
         || F64::from(0.0 * all1_snan),
     );
     // Mix signaling and non-signaling NaN.
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F64::nan(Pos, Quiet, 0),
             F64::nan(Neg, Quiet, 0),
             F64::nan(Pos, Quiet, 1),
@@ -383,27 +379,22 @@ fn test_f64() {
             F64::nan(Neg, Quiet, all1_payload),
             F64::nan(Pos, Signaling, all1_payload),
             F64::nan(Neg, Signaling, all1_payload),
-        ]),
+        ],
         || F64::from(just1 % all1_snan),
     );
 
     // Intrinsics
     let nan = F64::nan(Neg, Quiet, 0).as_f64();
     let snan = F64::nan(Neg, Signaling, 1).as_f64();
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || {
+        F64::from(f64::min(nan, nan))
+    });
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || {
+        F64::from(nan.floor())
+    });
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || F64::from(nan.sin()));
     check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(f64::min(nan, nan)),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(nan.floor()),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(nan.sin()),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([
+        [
             F64::nan(Pos, Quiet, 0),
             F64::nan(Neg, Quiet, 0),
             F64::nan(Pos, Quiet, 1),
@@ -414,41 +405,35 @@ fn test_f64() {
             F64::nan(Neg, Quiet, all1_payload),
             F64::nan(Pos, Signaling, all1_payload),
             F64::nan(Neg, Signaling, all1_payload),
-        ]),
+        ],
         || F64::from(just1.mul_add(F64::nan(Neg, Quiet, 2).as_f64(), all1_snan)),
     );
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || {
+        F64::from(nan.powf(nan))
+    });
     check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(nan.powf(nan)),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([1.0f64.into()]),
+        [1.0f64.into()],
         || F64::from(1.0f64.powf(nan)), // special `pow` rule
     );
-    check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(nan.powi(1)),
-    );
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || {
+        F64::from(nan.powi(1))
+    });
 
     // libm functions
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || {
+        F64::from(nan.sinh())
+    });
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || {
+        F64::from(nan.atan2(nan))
+    });
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || {
+        F64::from(ldexp(nan, 1))
+    });
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || {
+        F64::from(nan.ln_gamma().0)
+    });
     check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(nan.sinh()),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(nan.atan2(nan)),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(ldexp(nan, 1)),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(nan.ln_gamma().0),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([
+        [
             F64::from(1.0),
             F64::nan(Pos, Quiet, 0),
             F64::nan(Neg, Quiet, 0),
@@ -456,7 +441,7 @@ fn test_f64() {
             F64::nan(Neg, Quiet, 1),
             F64::nan(Pos, Signaling, 1),
             F64::nan(Neg, Signaling, 1),
-        ]),
+        ],
         || F64::from(snan.powf(0.0)),
     );
 }
@@ -467,82 +452,79 @@ fn test_casts() {
     let left1_payload_64 = (all1_payload_32 as u64) << (51 - 22);
 
     // 64-to-32
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(F64::nan(Pos, Quiet, 0).as_f64() as f32),
-    );
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(F64::nan(Pos, Quiet, 0).as_f64() as f32)
+    });
     // The preferred payload is always a possibility.
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F32::nan(Pos, Quiet, 0),
             F32::nan(Neg, Quiet, 0),
             F32::nan(Pos, Quiet, all1_payload_32),
             F32::nan(Neg, Quiet, all1_payload_32),
-        ]),
+        ],
         || F32::from(F64::nan(Pos, Quiet, all1_payload_64).as_f64() as f32),
     );
     // If the input is signaling, then the output *may* also be signaling.
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F32::nan(Pos, Quiet, 0),
             F32::nan(Neg, Quiet, 0),
             F32::nan(Pos, Quiet, all1_payload_32),
             F32::nan(Neg, Quiet, all1_payload_32),
             F32::nan(Pos, Signaling, all1_payload_32),
             F32::nan(Neg, Signaling, all1_payload_32),
-        ]),
+        ],
         || F32::from(F64::nan(Pos, Signaling, all1_payload_64).as_f64() as f32),
     );
     // Check that the low bits are gone (not the high bits).
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(F64::nan(Pos, Quiet, 1).as_f64() as f32)
+    });
     check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(F64::nan(Pos, Quiet, 1).as_f64() as f32),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([
+        [
             F32::nan(Pos, Quiet, 0),
             F32::nan(Neg, Quiet, 0),
             F32::nan(Pos, Quiet, 1),
             F32::nan(Neg, Quiet, 1),
-        ]),
+        ],
         || F32::from(F64::nan(Pos, Quiet, 1 << (51 - 22)).as_f64() as f32),
     );
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F32::nan(Pos, Quiet, 0),
             F32::nan(Neg, Quiet, 0),
             // The `1` payload becomes `0`, and the `0` payload cannot be signaling,
             // so these are the only options.
-        ]),
+        ],
         || F32::from(F64::nan(Pos, Signaling, 1).as_f64() as f32),
     );
 
     // 32-to-64
-    check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(F32::nan(Pos, Quiet, 0).as_f32() as f64),
-    );
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || {
+        F64::from(F32::nan(Pos, Quiet, 0).as_f32() as f64)
+    });
     // The preferred payload is always a possibility.
     // Also checks that 0s are added on the right.
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F64::nan(Pos, Quiet, 0),
             F64::nan(Neg, Quiet, 0),
             F64::nan(Pos, Quiet, left1_payload_64),
             F64::nan(Neg, Quiet, left1_payload_64),
-        ]),
+        ],
         || F64::from(F32::nan(Pos, Quiet, all1_payload_32).as_f32() as f64),
     );
     // If the input is signaling, then the output *may* also be signaling.
     check_all_outcomes(
-        HashSet::from_iter([
+        [
             F64::nan(Pos, Quiet, 0),
             F64::nan(Neg, Quiet, 0),
             F64::nan(Pos, Quiet, left1_payload_64),
             F64::nan(Neg, Quiet, left1_payload_64),
             F64::nan(Pos, Signaling, left1_payload_64),
             F64::nan(Neg, Signaling, left1_payload_64),
-        ]),
+        ],
         || F64::from(F32::nan(Pos, Signaling, all1_payload_32).as_f32() as f64),
     );
 }
@@ -552,48 +534,35 @@ fn test_simd() {
     use std::simd::*;
 
     let nan = F32::nan(Neg, Quiet, 0).as_f32();
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(unsafe { simd_div(f32x4::splat(0.0), f32x4::splat(0.0)) }[0]),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(unsafe { simd_fmin(f32x4::splat(nan), f32x4::splat(nan)) }[0]),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(unsafe { simd_fmax(f32x4::splat(nan), f32x4::splat(nan)) }[0]),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || {
-            F32::from(
-                unsafe { simd_fma(f32x4::splat(nan), f32x4::splat(nan), f32x4::splat(nan)) }[0],
-            )
-        },
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(unsafe { simd_reduce_add_ordered::<_, f32>(f32x4::splat(nan), nan) }),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(unsafe { simd_reduce_max::<_, f32>(f32x4::splat(nan)) }),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(unsafe { simd_fsqrt(f32x4::splat(nan)) }[0]),
-    );
-    check_all_outcomes(
-        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
-        || F32::from(unsafe { simd_ceil(f32x4::splat(nan)) }[0]),
-    );
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(unsafe { simd_div(f32x4::splat(0.0), f32x4::splat(0.0)) }[0])
+    });
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(unsafe { simd_fmin(f32x4::splat(nan), f32x4::splat(nan)) }[0])
+    });
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(unsafe { simd_fmax(f32x4::splat(nan), f32x4::splat(nan)) }[0])
+    });
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(unsafe { simd_fma(f32x4::splat(nan), f32x4::splat(nan), f32x4::splat(nan)) }[0])
+    });
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(unsafe { simd_reduce_add_ordered::<_, f32>(f32x4::splat(nan), nan) })
+    });
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(unsafe { simd_reduce_max::<_, f32>(f32x4::splat(nan)) })
+    });
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(unsafe { simd_fsqrt(f32x4::splat(nan)) }[0])
+    });
+    check_all_outcomes([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)], || {
+        F32::from(unsafe { simd_ceil(f32x4::splat(nan)) }[0])
+    });
 
     // Casts
-    check_all_outcomes(
-        HashSet::from_iter([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)]),
-        || F64::from(unsafe { simd_cast::<f32x4, f64x4>(f32x4::splat(nan)) }[0]),
-    );
+    check_all_outcomes([F64::nan(Pos, Quiet, 0), F64::nan(Neg, Quiet, 0)], || {
+        F64::from(unsafe { simd_cast::<f32x4, f64x4>(f32x4::splat(nan)) }[0])
+    });
 }
 
 fn main() {
diff --git a/src/tools/miri/tests/pass/weak_memory/weak.rs b/src/tools/miri/tests/pass/weak_memory/weak.rs
deleted file mode 100644
index 199f83f052860..0000000000000
--- a/src/tools/miri/tests/pass/weak_memory/weak.rs
+++ /dev/null
@@ -1,151 +0,0 @@
-//@compile-flags: -Zmiri-ignore-leaks -Zmiri-fixed-schedule
-
-// Tests showing weak memory behaviours are exhibited. All tests
-// return true when the desired behaviour is seen.
-// This is scheduler and pseudo-RNG dependent, so each test is
-// run multiple times until one try returns true.
-// Spurious failure is possible, if you are really unlucky with
-// the RNG and always read the latest value from the store buffer.
-
-use std::sync::atomic::Ordering::*;
-use std::sync::atomic::{AtomicUsize, fence};
-use std::thread::spawn;
-
-#[allow(dead_code)]
-#[derive(Copy, Clone)]
-struct EvilSend<T>(pub T);
-
-unsafe impl<T> Send for EvilSend<T> {}
-unsafe impl<T> Sync for EvilSend<T> {}
-
-// We can't create static items because we need to run each test multiple times.
-fn static_atomic(val: usize) -> &'static AtomicUsize {
-    Box::leak(Box::new(AtomicUsize::new(val)))
-}
-
-// Spins until it reads the given value
-fn spin_until(loc: &AtomicUsize, val: usize) -> usize {
-    while loc.load(Relaxed) != val {
-        std::hint::spin_loop();
-    }
-    val
-}
-
-fn relaxed(initial_read: bool) -> bool {
-    let x = static_atomic(0);
-    let j1 = spawn(move || {
-        x.store(1, Relaxed);
-        // Preemption is disabled, so the store above will never be the
-        // latest store visible to another thread.
-        x.store(2, Relaxed);
-    });
-
-    let j2 = spawn(move || x.load(Relaxed));
-
-    j1.join().unwrap();
-    let r2 = j2.join().unwrap();
-
-    // There are three possible values here: 0 (from the initial read), 1 (from the first relaxed
-    // read), and 2 (the last read). The last case is boring and we cover the other two.
-    r2 == if initial_read { 0 } else { 1 }
-}
-
-// https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf Figure 8
-fn seq_cst() -> bool {
-    let x = static_atomic(0);
-
-    let j1 = spawn(move || {
-        x.store(1, Relaxed);
-    });
-
-    let j2 = spawn(move || {
-        x.store(2, SeqCst);
-        x.store(3, SeqCst);
-    });
-
-    let j3 = spawn(move || x.load(SeqCst));
-
-    j1.join().unwrap();
-    j2.join().unwrap();
-    let r3 = j3.join().unwrap();
-
-    r3 == 1
-}
-
-fn initialization_write(add_fence: bool) -> bool {
-    let x = static_atomic(11);
-
-    let wait = static_atomic(0);
-
-    let j1 = spawn(move || {
-        x.store(22, Relaxed);
-        // Relaxed is intentional. We want to test if the thread 2 reads the initialisation write
-        // after a relaxed write
-        wait.store(1, Relaxed);
-    });
-
-    let j2 = spawn(move || {
-        spin_until(wait, 1);
-        if add_fence {
-            fence(AcqRel);
-        }
-        x.load(Relaxed)
-    });
-
-    j1.join().unwrap();
-    let r2 = j2.join().unwrap();
-
-    r2 == 11
-}
-
-fn faa_replaced_by_load() -> bool {
-    // Example from https://github.com/llvm/llvm-project/issues/56450#issuecomment-1183695905
-    #[no_mangle]
-    pub fn rdmw(storing: &AtomicUsize, sync: &AtomicUsize, loading: &AtomicUsize) -> usize {
-        storing.store(1, Relaxed);
-        fence(Release);
-        // sync.fetch_add(0, Relaxed);
-        sync.load(Relaxed);
-        fence(Acquire);
-        loading.load(Relaxed)
-    }
-
-    let x = static_atomic(0);
-    let y = static_atomic(0);
-    let z = static_atomic(0);
-
-    // Since each thread is so short, we need to make sure that they truely run at the same time
-    // Otherwise t1 will finish before t2 even starts
-    let go = static_atomic(0);
-
-    let t1 = spawn(move || {
-        spin_until(go, 1);
-        rdmw(y, x, z)
-    });
-
-    let t2 = spawn(move || {
-        spin_until(go, 1);
-        rdmw(z, x, y)
-    });
-
-    go.store(1, Relaxed);
-
-    let a = t1.join().unwrap();
-    let b = t2.join().unwrap();
-    (a, b) == (0, 0)
-}
-
-/// Asserts that the function returns true at least once in 100 runs
-#[track_caller]
-fn assert_once(f: fn() -> bool) {
-    assert!(std::iter::repeat_with(|| f()).take(100).any(|x| x));
-}
-
-pub fn main() {
-    assert_once(|| relaxed(false));
-    assert_once(|| relaxed(true));
-    assert_once(seq_cst);
-    assert_once(|| initialization_write(false));
-    assert_once(|| initialization_write(true));
-    assert_once(faa_replaced_by_load);
-}
diff --git a/src/tools/miri/tests/ui.rs b/src/tools/miri/tests/ui.rs
index b7286d9a3673a..efaaf9fc84170 100644
--- a/src/tools/miri/tests/ui.rs
+++ b/src/tools/miri/tests/ui.rs
@@ -277,6 +277,8 @@ regexes! {
     r"\bsys/([a-z_]+)/[a-z]+\b"     => "sys/$1/PLATFORM",
     // erase paths into the crate registry
     r"[^ ]*/\.?cargo/registry/.*/(.*\.rs)"  => "CARGO_REGISTRY/.../$1",
+    // remove time print from GenMC estimation mode output.
+    "\nExpected verification time: .* ± .*" => "\nExpected verification time: [MEAN] ± [SD]",
 }
 
 enum Dependencies {
diff --git a/src/tools/miri/tests/utils/genmc.rs b/src/tools/miri/tests/utils/genmc.rs
new file mode 100644
index 0000000000000..29b3181992b11
--- /dev/null
+++ b/src/tools/miri/tests/utils/genmc.rs
@@ -0,0 +1,64 @@
+#![allow(unused)]
+
+use std::ffi::c_void;
+
+use libc::{self, pthread_attr_t, pthread_t};
+
+/// Spawn a thread using `pthread_create`, abort the process on any errors.
+pub unsafe fn spawn_pthread(
+    f: extern "C" fn(*mut c_void) -> *mut c_void,
+    value: *mut c_void,
+) -> pthread_t {
+    let mut thread_id: pthread_t = 0;
+
+    let attr: *const pthread_attr_t = std::ptr::null();
+
+    if unsafe { libc::pthread_create(&raw mut thread_id, attr, f, value) } != 0 {
+        std::process::abort();
+    }
+    thread_id
+}
+
+/// Unsafe because we do *not* check that `F` is `Send + 'static`.
+/// That makes it much easier to write tests...
+pub unsafe fn spawn_pthread_closure<F: FnOnce()>(f: F) -> pthread_t {
+    let mut thread_id: pthread_t = 0;
+    let attr: *const pthread_attr_t = std::ptr::null();
+    let f = Box::new(f);
+    extern "C" fn thread_func<F: FnOnce()>(f: *mut c_void) -> *mut c_void {
+        let f = unsafe { Box::from_raw(f as *mut F) };
+        f();
+        std::ptr::null_mut()
+    }
+    if unsafe {
+        libc::pthread_create(
+            &raw mut thread_id,
+            attr,
+            thread_func::<F>,
+            Box::into_raw(f) as *mut c_void,
+        )
+    } != 0
+    {
+        std::process::abort();
+    }
+    thread_id
+}
+
+// Join the given pthread, abort the process on any errors.
+pub unsafe fn join_pthread(thread_id: pthread_t) {
+    if unsafe { libc::pthread_join(thread_id, std::ptr::null_mut()) } != 0 {
+        std::process::abort();
+    }
+}
+
+/// Spawn `N` threads using `pthread_create` without any arguments, abort the process on any errors.
+pub unsafe fn spawn_pthreads_no_params<const N: usize>(
+    functions: [extern "C" fn(*mut c_void) -> *mut c_void; N],
+) -> [pthread_t; N] {
+    functions.map(|func| spawn_pthread(func, std::ptr::null_mut()))
+}
+
+// Join the `N` given pthreads, abort the process on any errors.
+pub unsafe fn join_pthreads<const N: usize>(thread_ids: [pthread_t; N]) {
+    let _ = thread_ids.map(|id| join_pthread(id));
+}
diff --git a/src/tools/tidy/src/lib.rs b/src/tools/tidy/src/lib.rs
index 2a9c3963d2d86..f7920e3205ab2 100644
--- a/src/tools/tidy/src/lib.rs
+++ b/src/tools/tidy/src/lib.rs
@@ -237,7 +237,7 @@ pub fn ensure_version_or_cargo_install(
     if !cargo_exit_code.success() {
         return Err(io::Error::other("cargo install failed"));
     }
-    let bin_path = tool_bin_dir.join(bin_name);
+    let bin_path = tool_bin_dir.join(bin_name).with_extension(env::consts::EXE_EXTENSION);
     assert!(
         matches!(bin_path.try_exists(), Ok(true)),
         "cargo install did not produce the expected binary"
diff --git a/tests/ui/inference/issue-72616.rs b/tests/ui/inference/issue-72616.rs
index 71e095cc6fc02..ba18575a57156 100644
--- a/tests/ui/inference/issue-72616.rs
+++ b/tests/ui/inference/issue-72616.rs
@@ -21,7 +21,6 @@ pub fn main() {
     {
         if String::from("a") == "a".try_into().unwrap() {}
         //~^ ERROR type annotations needed
-        //~| ERROR type annotations needed
     }
     {
         let _: String = match "_".try_into() {
diff --git a/tests/ui/inference/issue-72616.stderr b/tests/ui/inference/issue-72616.stderr
index a271639996fd2..6b47d56881134 100644
--- a/tests/ui/inference/issue-72616.stderr
+++ b/tests/ui/inference/issue-72616.stderr
@@ -16,23 +16,6 @@ LL -         if String::from("a") == "a".try_into().unwrap() {}
 LL +         if String::from("a") == <&str as TryInto<T>>::try_into("a").unwrap() {}
    |
 
-error[E0283]: type annotations needed
-  --> $DIR/issue-72616.rs:22:37
-   |
-LL |         if String::from("a") == "a".try_into().unwrap() {}
-   |                                     ^^^^^^^^
-   |
-   = note: multiple `impl`s satisfying `_: TryFrom<&str>` found in the following crates: `core`, `std`:
-           - impl TryFrom<&str> for std::sys::net::connection::socket::LookupHost;
-           - impl<T, U> TryFrom<U> for T
-             where U: Into<T>;
-   = note: required for `&str` to implement `TryInto<_>`
-help: try using a fully qualified path to specify the expected types
-   |
-LL -         if String::from("a") == "a".try_into().unwrap() {}
-LL +         if String::from("a") == <&str as TryInto<T>>::try_into("a").unwrap() {}
-   |
-
-error: aborting due to 2 previous errors
+error: aborting due to 1 previous error
 
 For more information about this error, try `rustc --explain E0283`.