diff --git a/Cargo.lock b/Cargo.lock
index a8df53f3a4113..52f5048115753 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3204,9 +3204,9 @@ dependencies = [
 
 [[package]]
 name = "rustc-build-sysroot"
-version = "0.5.9"
+version = "0.5.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdb13874a0e55baf4ac3d49d38206aecb31a55b75d6c4d04fd850b53942c8cc8"
+checksum = "3b881c015c729b43105bbd3702a9bdecee28fafaa21126d1d62e454ec011a4b7"
 dependencies = [
  "anyhow",
  "rustc_version",
diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs
index ea98bebd30555..6dc6d1026f621 100644
--- a/compiler/rustc_ast/src/token.rs
+++ b/compiler/rustc_ast/src/token.rs
@@ -22,8 +22,7 @@ pub enum CommentKind {
     Block,
 }
 
-// This type must not implement `Hash` due to the unusual `PartialEq` impl below.
-#[derive(Copy, Clone, Debug, Encodable, Decodable, HashStable_Generic)]
+#[derive(Copy, Clone, PartialEq, Debug, Encodable, Decodable, HashStable_Generic)]
 pub enum InvisibleOrigin {
     // From the expansion of a metavariable in a declarative macro.
     MetaVar(MetaVarKind),
@@ -45,20 +44,6 @@ impl InvisibleOrigin {
     }
 }
 
-impl PartialEq for InvisibleOrigin {
-    #[inline]
-    fn eq(&self, _other: &InvisibleOrigin) -> bool {
-        // When we had AST-based nonterminals we couldn't compare them, and the
-        // old `Nonterminal` type had an `eq` that always returned false,
-        // resulting in this restriction:
-        // https://doc.rust-lang.org/nightly/reference/macros-by-example.html#forwarding-a-matched-fragment
-        // This `eq` emulates that behaviour. We could consider lifting this
-        // restriction now but there are still cases involving invisible
-        // delimiters that make it harder than it first appears.
-        false
-    }
-}
-
 /// Annoyingly similar to `NonterminalKind`, but the slight differences are important.
 #[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
 pub enum MetaVarKind {
@@ -142,7 +127,8 @@ impl Delimiter {
         }
     }
 
-    // This exists because `InvisibleOrigin`s should be compared. It is only used for assertions.
+    // This exists because `InvisibleOrigin`s should not be compared. It is only used for
+    // assertions.
     pub fn eq_ignoring_invisible_origin(&self, other: &Delimiter) -> bool {
         match (self, other) {
             (Delimiter::Parenthesis, Delimiter::Parenthesis) => true,
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 4fd6110ac4a11..257c7b95666f7 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -217,6 +217,10 @@ pub(crate) unsafe fn create_module<'ll>(
             // LLVM 22.0 updated the default layout on avr: https://github.com/llvm/llvm-project/pull/153010
             target_data_layout = target_data_layout.replace("n8:16", "n8")
         }
+        if sess.target.arch == "nvptx64" {
+            // LLVM 22 updated the NVPTX layout to indicate 256-bit vector load/store: https://github.com/llvm/llvm-project/pull/155198
+            target_data_layout = target_data_layout.replace("-i256:256", "");
+        }
     }
 
     // Ensure the data-layout values hardcoded remain the defaults.
diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs
index 19c919c0e4efe..48b01ea2df197 100644
--- a/compiler/rustc_codegen_ssa/src/back/link.rs
+++ b/compiler/rustc_codegen_ssa/src/back/link.rs
@@ -58,6 +58,7 @@ use super::linker::{self, Linker};
 use super::metadata::{MetadataPosition, create_wrapper_file};
 use super::rpath::{self, RPathConfig};
 use super::{apple, versioned_llvm_target};
+use crate::base::needs_allocator_shim_for_linking;
 use crate::{
     CodegenResults, CompiledModule, CrateInfo, NativeLib, errors, looks_like_rust_object_file,
 };
@@ -2080,9 +2081,17 @@ fn add_local_crate_regular_objects(cmd: &mut dyn Linker, codegen_results: &Codeg
 }
 
 /// Add object files for allocator code linked once for the whole crate tree.
-fn add_local_crate_allocator_objects(cmd: &mut dyn Linker, codegen_results: &CodegenResults) {
-    if let Some(obj) = codegen_results.allocator_module.as_ref().and_then(|m| m.object.as_ref()) {
-        cmd.add_object(obj);
+fn add_local_crate_allocator_objects(
+    cmd: &mut dyn Linker,
+    codegen_results: &CodegenResults,
+    crate_type: CrateType,
+) {
+    if needs_allocator_shim_for_linking(&codegen_results.crate_info.dependency_formats, crate_type)
+    {
+        if let Some(obj) = codegen_results.allocator_module.as_ref().and_then(|m| m.object.as_ref())
+        {
+            cmd.add_object(obj);
+        }
     }
 }
 
@@ -2281,7 +2290,7 @@ fn linker_with_args(
         codegen_results,
         metadata,
     );
-    add_local_crate_allocator_objects(cmd, codegen_results);
+    add_local_crate_allocator_objects(cmd, codegen_results, crate_type);
 
     // Avoid linking to dynamic libraries unless they satisfy some undefined symbols
     // at the point at which they are specified on the command line.
diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs
index df1e91b12f904..a2efd420a327a 100644
--- a/compiler/rustc_codegen_ssa/src/back/linker.rs
+++ b/compiler/rustc_codegen_ssa/src/back/linker.rs
@@ -11,8 +11,9 @@ use rustc_metadata::{
 };
 use rustc_middle::bug;
 use rustc_middle::middle::dependency_format::Linkage;
-use rustc_middle::middle::exported_symbols;
-use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo, SymbolExportKind};
+use rustc_middle::middle::exported_symbols::{
+    self, ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel,
+};
 use rustc_middle::ty::TyCtxt;
 use rustc_session::Session;
 use rustc_session::config::{self, CrateType, DebugInfo, LinkerPluginLto, Lto, OptLevel, Strip};
@@ -22,6 +23,8 @@ use tracing::{debug, warn};
 
 use super::command::Command;
 use super::symbol_export;
+use crate::back::symbol_export::allocator_shim_symbols;
+use crate::base::needs_allocator_shim_for_linking;
 use crate::errors;
 
 #[cfg(test)]
@@ -1827,7 +1830,7 @@ fn exported_symbols_for_non_proc_macro(
     let export_threshold = symbol_export::crates_export_threshold(&[crate_type]);
     for_each_exported_symbols_include_dep(tcx, crate_type, |symbol, info, cnum| {
         // Do not export mangled symbols from cdylibs and don't attempt to export compiler-builtins
-        // from any cdylib. The latter doesn't work anyway as we use hidden visibility for
+        // from any dylib. The latter doesn't work anyway as we use hidden visibility for
         // compiler-builtins. Most linkers silently ignore it, but ld64 gives a warning.
         if info.level.is_below_threshold(export_threshold) && !tcx.is_compiler_builtins(cnum) {
             symbols.push((
@@ -1838,6 +1841,14 @@ fn exported_symbols_for_non_proc_macro(
         }
     });
 
+    // Mark allocator shim symbols as exported only if they were generated.
+    if export_threshold == SymbolExportLevel::Rust
+        && needs_allocator_shim_for_linking(tcx.dependency_formats(()), crate_type)
+        && tcx.allocator_kind(()).is_some()
+    {
+        symbols.extend(allocator_shim_symbols(tcx));
+    }
+
     symbols
 }
 
diff --git a/compiler/rustc_codegen_ssa/src/back/lto.rs b/compiler/rustc_codegen_ssa/src/back/lto.rs
index c95038375a1bd..e6df6a2469f37 100644
--- a/compiler/rustc_codegen_ssa/src/back/lto.rs
+++ b/compiler/rustc_codegen_ssa/src/back/lto.rs
@@ -8,8 +8,9 @@ use rustc_middle::ty::TyCtxt;
 use rustc_session::config::{CrateType, Lto};
 use tracing::info;
 
-use crate::back::symbol_export::{self, symbol_name_for_instance_in_crate};
+use crate::back::symbol_export::{self, allocator_shim_symbols, symbol_name_for_instance_in_crate};
 use crate::back::write::CodegenContext;
+use crate::base::allocator_kind_for_codegen;
 use crate::errors::{DynamicLinkingWithLTO, LtoDisallowed, LtoDylib, LtoProcMacro};
 use crate::traits::*;
 
@@ -115,6 +116,11 @@ pub(super) fn exported_symbols_for_lto(
         }
     }
 
+    // Mark allocator shim symbols as exported only if they were generated.
+    if export_threshold == SymbolExportLevel::Rust && allocator_kind_for_codegen(tcx).is_some() {
+        symbols_below_threshold.extend(allocator_shim_symbols(tcx).map(|(name, _kind)| name));
+    }
+
     symbols_below_threshold
 }
 
diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs
index d8a1480e911fe..b49e67217fb01 100644
--- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs
+++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs
@@ -18,7 +18,7 @@ use rustc_symbol_mangling::mangle_internal_symbol;
 use rustc_target::spec::TlsModel;
 use tracing::debug;
 
-use crate::base::allocator_kind_for_codegen;
+use crate::back::symbol_export;
 
 fn threshold(tcx: TyCtxt<'_>) -> SymbolExportLevel {
     crates_export_threshold(tcx.crate_types())
@@ -217,31 +217,6 @@ fn exported_non_generic_symbols_provider_local<'tcx>(
         ));
     }
 
-    // Mark allocator shim symbols as exported only if they were generated.
-    if allocator_kind_for_codegen(tcx).is_some() {
-        for symbol_name in ALLOCATOR_METHODS
-            .iter()
-            .map(|method| mangle_internal_symbol(tcx, global_fn_name(method.name).as_str()))
-            .chain([
-                mangle_internal_symbol(tcx, "__rust_alloc_error_handler"),
-                mangle_internal_symbol(tcx, OomStrategy::SYMBOL),
-                mangle_internal_symbol(tcx, NO_ALLOC_SHIM_IS_UNSTABLE),
-            ])
-        {
-            let exported_symbol = ExportedSymbol::NoDefId(SymbolName::new(tcx, &symbol_name));
-
-            symbols.push((
-                exported_symbol,
-                SymbolExportInfo {
-                    level: SymbolExportLevel::Rust,
-                    kind: SymbolExportKind::Text,
-                    used: false,
-                    rustc_std_internal_symbol: true,
-                },
-            ));
-        }
-    }
-
     // Sort so we get a stable incr. comp. hash.
     symbols.sort_by_cached_key(|s| s.0.symbol_name_for_local_instance(tcx));
 
@@ -516,6 +491,31 @@ pub(crate) fn provide(providers: &mut Providers) {
         upstream_monomorphizations_for_provider;
 }
 
+pub(crate) fn allocator_shim_symbols(
+    tcx: TyCtxt<'_>,
+) -> impl Iterator<Item = (String, SymbolExportKind)> {
+    ALLOCATOR_METHODS
+        .iter()
+        .map(move |method| mangle_internal_symbol(tcx, global_fn_name(method.name).as_str()))
+        .chain([
+            mangle_internal_symbol(tcx, "__rust_alloc_error_handler"),
+            mangle_internal_symbol(tcx, OomStrategy::SYMBOL),
+            mangle_internal_symbol(tcx, NO_ALLOC_SHIM_IS_UNSTABLE),
+        ])
+        .map(move |symbol_name| {
+            let exported_symbol = ExportedSymbol::NoDefId(SymbolName::new(tcx, &symbol_name));
+
+            (
+                symbol_export::exporting_symbol_name_for_instance_in_crate(
+                    tcx,
+                    exported_symbol,
+                    LOCAL_CRATE,
+                ),
+                SymbolExportKind::Text,
+            )
+        })
+}
+
 fn symbol_export_level(tcx: TyCtxt<'_>, sym_def_id: DefId) -> SymbolExportLevel {
     // We export anything that's not mangled at the "C" layer as it probably has
     // to do with ABI concerns. We do not, however, apply such treatment to
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 92582dcc39917..8586615f7c764 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -138,12 +138,23 @@ impl ModuleConfig {
 
         let emit_obj = if !should_emit_obj {
             EmitObj::None
-        } else if sess.target.obj_is_bitcode || sess.opts.cg.linker_plugin_lto.enabled() {
+        } else if sess.target.obj_is_bitcode
+            || (sess.opts.cg.linker_plugin_lto.enabled() && !no_builtins)
+        {
             // This case is selected if the target uses objects as bitcode, or
             // if linker plugin LTO is enabled. In the linker plugin LTO case
             // the assumption is that the final link-step will read the bitcode
             // and convert it to object code. This may be done by either the
             // native linker or rustc itself.
+            //
+            // Note, however, that the linker-plugin-lto requested here is
+            // explicitly ignored for `#![no_builtins]` crates. These crates are
+            // specifically ignored by rustc's LTO passes and wouldn't work if
+            // loaded into the linker. These crates define symbols that LLVM
+            // lowers intrinsics to, and these symbol dependencies aren't known
+            // until after codegen. As a result any crate marked
+            // `#![no_builtins]` is assumed to not participate in LTO and
+            // instead goes on to generate object code.
             EmitObj::Bitcode
         } else if need_bitcode_in_object(tcx) {
             EmitObj::ObjectCode(BitcodeSection::Full)
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 8abaf201abae2..97cdf8b697348 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -17,6 +17,7 @@ use rustc_hir::lang_items::LangItem;
 use rustc_hir::{ItemId, Target};
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::middle::debugger_visualizer::{DebuggerVisualizerFile, DebuggerVisualizerType};
+use rustc_middle::middle::dependency_format::Dependencies;
 use rustc_middle::middle::exported_symbols::{self, SymbolExportKind};
 use rustc_middle::middle::lang_items;
 use rustc_middle::mir::BinOp;
@@ -630,14 +631,30 @@ pub fn allocator_kind_for_codegen(tcx: TyCtxt<'_>) -> Option<AllocatorKind> {
     // If the crate doesn't have an `allocator_kind` set then there's definitely
     // no shim to generate. Otherwise we also check our dependency graph for all
     // our output crate types. If anything there looks like its a `Dynamic`
-    // linkage, then it's already got an allocator shim and we'll be using that
-    // one instead. If nothing exists then it's our job to generate the
-    // allocator!
-    let any_dynamic_crate = tcx.dependency_formats(()).iter().any(|(_, list)| {
+    // linkage for all crate types we may link as, then it's already got an
+    // allocator shim and we'll be using that one instead. If nothing exists
+    // then it's our job to generate the allocator! If crate types disagree
+    // about whether an allocator shim is necessary or not, we generate one
+    // and let needs_allocator_shim_for_linking decide at link time whether or
+    // not to use it for any particular linker invocation.
+    let all_crate_types_any_dynamic_crate = tcx.dependency_formats(()).iter().all(|(_, list)| {
         use rustc_middle::middle::dependency_format::Linkage;
         list.iter().any(|&linkage| linkage == Linkage::Dynamic)
     });
-    if any_dynamic_crate { None } else { tcx.allocator_kind(()) }
+    if all_crate_types_any_dynamic_crate { None } else { tcx.allocator_kind(()) }
+}
+
+/// Decide if this particular crate type needs an allocator shim linked in.
+/// This may return true even when allocator_kind_for_codegen returns false. In
+/// this case no allocator shim shall be linked.
+pub(crate) fn needs_allocator_shim_for_linking(
+    dependency_formats: &Dependencies,
+    crate_type: CrateType,
+) -> bool {
+    use rustc_middle::middle::dependency_format::Linkage;
+    let any_dynamic_crate =
+        dependency_formats[&crate_type].iter().any(|&linkage| linkage == Linkage::Dynamic);
+    !any_dynamic_crate
 }
 
 pub fn codegen_crate<B: ExtraBackendMethods>(
diff --git a/compiler/rustc_errors/src/lib.rs b/compiler/rustc_errors/src/lib.rs
index 71fc54f0d33f0..a56e0f3fae132 100644
--- a/compiler/rustc_errors/src/lib.rs
+++ b/compiler/rustc_errors/src/lib.rs
@@ -1160,7 +1160,7 @@ impl<'a> DiagCtxtHandle<'a> {
         // - It's only produce with JSON output.
         // - It's not emitted the usual way, via `emit_diagnostic`.
         // - The `$message_type` field is "unused_externs" rather than the usual
-        //   "diagnosic".
+        //   "diagnostic".
         //
         // We count it as a lint error because it has a lint level. The value
         // of `loud` (which comes from "unused-externs" or
diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
index 0324057e331a9..ab8e059b7b77f 100644
--- a/compiler/rustc_expand/src/mbe/macro_parser.rs
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -77,7 +77,7 @@ use std::rc::Rc;
 
 pub(crate) use NamedMatch::*;
 pub(crate) use ParseResult::*;
-use rustc_ast::token::{self, DocComment, NonterminalKind, Token};
+use rustc_ast::token::{self, DocComment, NonterminalKind, Token, TokenKind};
 use rustc_data_structures::fx::FxHashMap;
 use rustc_errors::ErrorGuaranteed;
 use rustc_lint_defs::pluralize;
@@ -397,7 +397,23 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {
     {
         ident1.name == ident2.name && is_raw1 == is_raw2
     } else {
-        t1.kind == t2.kind
+        // Note: we SHOULD NOT use `t1.kind == t2.kind` here, and we should instead compare the
+        // tokens using the special comparison logic below.
+        // It makes sure that variants containing `InvisibleOrigin` will
+        // never compare equal to one another.
+        //
+        // When we had AST-based nonterminals we couldn't compare them, and the
+        // old `Nonterminal` type had an `eq` that always returned false,
+        // resulting in this restriction:
+        // <https://doc.rust-lang.org/nightly/reference/macros-by-example.html#forwarding-a-matched-fragment>
+        // This comparison logic emulates that behaviour. We could consider lifting this
+        // restriction now but there are still cases involving invisible
+        // delimiters that make it harder than it first appears.
+        match (t1.kind, t2.kind) {
+            (TokenKind::OpenInvisible(_) | TokenKind::CloseInvisible(_), _)
+            | (_, TokenKind::OpenInvisible(_) | TokenKind::CloseInvisible(_)) => false,
+            (a, b) => a == b,
+        }
     }
 }
 
diff --git a/compiler/rustc_hir_analysis/src/check/region.rs b/compiler/rustc_hir_analysis/src/check/region.rs
index f5770b7312ddf..2ba7ed46f92c4 100644
--- a/compiler/rustc_hir_analysis/src/check/region.rs
+++ b/compiler/rustc_hir_analysis/src/check/region.rs
@@ -490,12 +490,8 @@ fn resolve_local<'tcx>(
             //
             // Iterate up to the enclosing destruction scope to find the same scope that will also
             // be used for the result of the block itself.
-            while let Some(s) = visitor.cx.var_parent {
-                let parent = visitor.scope_tree.parent_map.get(&s).cloned();
-                if let Some(Scope { data: ScopeData::Destruction, .. }) = parent {
-                    break;
-                }
-                visitor.cx.var_parent = parent;
+            if let Some(inner_scope) = visitor.cx.var_parent {
+                (visitor.cx.var_parent, _) = visitor.scope_tree.default_temporary_scope(inner_scope)
             }
         }
     }
diff --git a/compiler/rustc_interface/src/interface.rs b/compiler/rustc_interface/src/interface.rs
index 8f131f45bbddb..4c820b8877b75 100644
--- a/compiler/rustc_interface/src/interface.rs
+++ b/compiler/rustc_interface/src/interface.rs
@@ -13,7 +13,7 @@ use rustc_lint::LintStore;
 use rustc_middle::ty;
 use rustc_middle::ty::CurrentGcx;
 use rustc_middle::util::Providers;
-use rustc_parse::new_parser_from_source_str;
+use rustc_parse::new_parser_from_simple_source_str;
 use rustc_parse::parser::attr::AllowLeadingUnsafe;
 use rustc_query_impl::QueryCtxt;
 use rustc_query_system::query::print_query_stack;
@@ -68,7 +68,7 @@ pub(crate) fn parse_cfg(dcx: DiagCtxtHandle<'_>, cfgs: Vec<String>) -> Cfg {
                 };
             }
 
-            match new_parser_from_source_str(&psess, filename, s.to_string()) {
+            match new_parser_from_simple_source_str(&psess, filename, s.to_string()) {
                 Ok(mut parser) => match parser.parse_meta_item(AllowLeadingUnsafe::No) {
                     Ok(meta_item) if parser.token == token::Eof => {
                         if meta_item.path.segments.len() != 1 {
@@ -166,7 +166,7 @@ pub(crate) fn parse_check_cfg(dcx: DiagCtxtHandle<'_>, specs: Vec<String>) -> Ch
             error!("expected `cfg(name, values(\"value1\", \"value2\", ... \"valueN\"))`")
         };
 
-        let mut parser = match new_parser_from_source_str(&psess, filename, s.to_string()) {
+        let mut parser = match new_parser_from_simple_source_str(&psess, filename, s.to_string()) {
             Ok(parser) => parser,
             Err(errs) => {
                 errs.into_iter().for_each(|err| err.cancel());
diff --git a/compiler/rustc_middle/src/middle/region.rs b/compiler/rustc_middle/src/middle/region.rs
index 857d041224fa9..5367e5edd496a 100644
--- a/compiler/rustc_middle/src/middle/region.rs
+++ b/compiler/rustc_middle/src/middle/region.rs
@@ -299,4 +299,43 @@ impl ScopeTree {
 
         true
     }
+
+    /// Returns the scope of non-lifetime-extended temporaries within a given scope, as well as
+    /// whether we've recorded a potential backwards-incompatible change to lint on.
+    /// Returns `None` when no enclosing temporary scope is found, such as for static items.
+    pub fn default_temporary_scope(&self, inner: Scope) -> (Option<Scope>, Option<Scope>) {
+        let mut id = inner;
+        let mut backwards_incompatible = None;
+
+        while let Some(&p) = self.parent_map.get(&id) {
+            match p.data {
+                ScopeData::Destruction => {
+                    debug!("temporary_scope({inner:?}) = {id:?} [enclosing]");
+                    return (Some(id), backwards_incompatible);
+                }
+                ScopeData::IfThenRescope | ScopeData::MatchGuard => {
+                    debug!("temporary_scope({inner:?}) = {p:?} [enclosing]");
+                    return (Some(p), backwards_incompatible);
+                }
+                ScopeData::Node
+                | ScopeData::CallSite
+                | ScopeData::Arguments
+                | ScopeData::IfThen
+                | ScopeData::Remainder(_) => {
+                    // If we haven't already passed through a backwards-incompatible node,
+                    // then check if we are passing through one now and record it if so.
+                    // This is for now only working for cases where a temporary lifetime is
+                    // *shortened*.
+                    if backwards_incompatible.is_none() {
+                        backwards_incompatible =
+                            self.backwards_incompatible_scope.get(&p.local_id).copied();
+                    }
+                    id = p
+                }
+            }
+        }
+
+        debug!("temporary_scope({inner:?}) = None");
+        (None, backwards_incompatible)
+    }
 }
diff --git a/compiler/rustc_middle/src/ty/rvalue_scopes.rs b/compiler/rustc_middle/src/ty/rvalue_scopes.rs
index 7dfe2d280514f..8b92e48ed1a07 100644
--- a/compiler/rustc_middle/src/ty/rvalue_scopes.rs
+++ b/compiler/rustc_middle/src/ty/rvalue_scopes.rs
@@ -35,41 +35,8 @@ impl RvalueScopes {
         // if there's one. Static items, for instance, won't
         // have an enclosing scope, hence no scope will be
         // returned.
-        let mut id = Scope { local_id: expr_id, data: ScopeData::Node };
-        let mut backwards_incompatible = None;
-
-        while let Some(&p) = region_scope_tree.parent_map.get(&id) {
-            match p.data {
-                ScopeData::Destruction => {
-                    debug!("temporary_scope({expr_id:?}) = {id:?} [enclosing]");
-                    return (Some(id), backwards_incompatible);
-                }
-                ScopeData::IfThenRescope | ScopeData::MatchGuard => {
-                    debug!("temporary_scope({expr_id:?}) = {p:?} [enclosing]");
-                    return (Some(p), backwards_incompatible);
-                }
-                ScopeData::Node
-                | ScopeData::CallSite
-                | ScopeData::Arguments
-                | ScopeData::IfThen
-                | ScopeData::Remainder(_) => {
-                    // If we haven't already passed through a backwards-incompatible node,
-                    // then check if we are passing through one now and record it if so.
-                    // This is for now only working for cases where a temporary lifetime is
-                    // *shortened*.
-                    if backwards_incompatible.is_none() {
-                        backwards_incompatible = region_scope_tree
-                            .backwards_incompatible_scope
-                            .get(&p.local_id)
-                            .copied();
-                    }
-                    id = p
-                }
-            }
-        }
-
-        debug!("temporary_scope({expr_id:?}) = None");
-        (None, backwards_incompatible)
+        region_scope_tree
+            .default_temporary_scope(Scope { local_id: expr_id, data: ScopeData::Node })
     }
 
     /// Make an association between a sub-expression and an extended lifetime
diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs
index 197333d942d28..b790966acfd92 100644
--- a/compiler/rustc_parse/src/lib.rs
+++ b/compiler/rustc_parse/src/lib.rs
@@ -62,7 +62,20 @@ pub fn new_parser_from_source_str(
     source: String,
 ) -> Result<Parser<'_>, Vec<Diag<'_>>> {
     let source_file = psess.source_map().new_source_file(name, source);
-    new_parser_from_source_file(psess, source_file)
+    new_parser_from_source_file(psess, source_file, FrontmatterAllowed::Yes)
+}
+
+/// Creates a new parser from a simple (no frontmatter) source string.
+///
+/// On failure, the errors must be consumed via `unwrap_or_emit_fatal`, `emit`, `cancel`,
+/// etc., otherwise a panic will occur when they are dropped.
+pub fn new_parser_from_simple_source_str(
+    psess: &ParseSess,
+    name: FileName,
+    source: String,
+) -> Result<Parser<'_>, Vec<Diag<'_>>> {
+    let source_file = psess.source_map().new_source_file(name, source);
+    new_parser_from_source_file(psess, source_file, FrontmatterAllowed::No)
 }
 
 /// Creates a new parser from a filename. On failure, the errors must be consumed via
@@ -96,7 +109,7 @@ pub fn new_parser_from_file<'a>(
         }
         err.emit();
     });
-    new_parser_from_source_file(psess, source_file)
+    new_parser_from_source_file(psess, source_file, FrontmatterAllowed::Yes)
 }
 
 pub fn utf8_error<E: EmissionGuarantee>(
@@ -147,9 +160,10 @@ pub fn utf8_error<E: EmissionGuarantee>(
 fn new_parser_from_source_file(
     psess: &ParseSess,
     source_file: Arc<SourceFile>,
+    frontmatter_allowed: FrontmatterAllowed,
 ) -> Result<Parser<'_>, Vec<Diag<'_>>> {
     let end_pos = source_file.end_position();
-    let stream = source_file_to_stream(psess, source_file, None, FrontmatterAllowed::Yes)?;
+    let stream = source_file_to_stream(psess, source_file, None, frontmatter_allowed)?;
     let mut parser = Parser::new(psess, stream, None);
     if parser.token == token::Eof {
         parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None);
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs
index c32593a6d95ab..bb2cda77dffff 100644
--- a/compiler/rustc_span/src/analyze_source_file.rs
+++ b/compiler/rustc_span/src/analyze_source_file.rs
@@ -81,8 +81,8 @@ cfg_select! {
                 // use `loadu`, which supports unaligned loading.
                 let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
 
-                // For character in the chunk, see if its byte value is < 0, which
-                // indicates that it's part of a UTF-8 char.
+                // For each character in the chunk, see if its byte value is < 0,
+                // which indicates that it's part of a UTF-8 char.
                 let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
                 // Create a bit mask from the comparison results.
                 let multibyte_mask = _mm_movemask_epi8(multibyte_test);
@@ -132,8 +132,111 @@ cfg_select! {
             }
         }
     }
+    target_arch = "loongarch64" => {
+        fn analyze_source_file_dispatch(
+            src: &str,
+            lines: &mut Vec<RelativeBytePos>,
+            multi_byte_chars: &mut Vec<MultiByteChar>,
+        ) {
+            use std::arch::is_loongarch_feature_detected;
+
+            if is_loongarch_feature_detected!("lsx") {
+                unsafe {
+                    analyze_source_file_lsx(src, lines, multi_byte_chars);
+                }
+            } else {
+                analyze_source_file_generic(
+                    src,
+                    src.len(),
+                    RelativeBytePos::from_u32(0),
+                    lines,
+                    multi_byte_chars,
+                );
+            }
+        }
+
+        /// Checks 16 byte chunks of text at a time. If the chunk contains
+        /// something other than printable ASCII characters and newlines, the
+        /// function falls back to the generic implementation. Otherwise it uses
+        /// LSX intrinsics to quickly find all newlines.
+        #[target_feature(enable = "lsx")]
+        unsafe fn analyze_source_file_lsx(
+            src: &str,
+            lines: &mut Vec<RelativeBytePos>,
+            multi_byte_chars: &mut Vec<MultiByteChar>,
+        ) {
+            use std::arch::loongarch64::*;
+
+            const CHUNK_SIZE: usize = 16;
+
+            let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
+
+            // This variable keeps track of where we should start decoding a
+            // chunk. If a multi-byte character spans across chunk boundaries,
+            // we need to skip that part in the next chunk because we already
+            // handled it.
+            let mut intra_chunk_offset = 0;
+
+            for (chunk_index, chunk) in chunks.iter().enumerate() {
+                // All LSX memory instructions support unaligned access, so using
+                // vld is fine.
+                let chunk = unsafe { lsx_vld::<0>(chunk.as_ptr() as *const i8) };
+
+                // For each character in the chunk, see if its byte value is < 0,
+                // which indicates that it's part of a UTF-8 char.
+                let multibyte_mask = lsx_vmskltz_b(chunk);
+                // Create a bit mask from the comparison results.
+                let multibyte_mask = lsx_vpickve2gr_w::<0>(multibyte_mask);
+
+                // If the bit mask is all zero, we only have ASCII chars here:
+                if multibyte_mask == 0 {
+                    assert!(intra_chunk_offset == 0);
+
+                    // Check for newlines in the chunk
+                    let newlines_test = lsx_vseqi_b::<{b'\n' as i32}>(chunk);
+                    let newlines_mask = lsx_vmskltz_b(newlines_test);
+                    let mut newlines_mask = lsx_vpickve2gr_w::<0>(newlines_mask);
+
+                    let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
+
+                    while newlines_mask != 0 {
+                        let index = newlines_mask.trailing_zeros();
+
+                        lines.push(RelativeBytePos(index) + output_offset);
+
+                        // Clear the bit, so we can find the next one.
+                        newlines_mask &= newlines_mask - 1;
+                    }
+                } else {
+                    // The slow path.
+                    // There are multibyte chars in here, fallback to generic decoding.
+                    let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
+                    intra_chunk_offset = analyze_source_file_generic(
+                        &src[scan_start..],
+                        CHUNK_SIZE - intra_chunk_offset,
+                        RelativeBytePos::from_usize(scan_start),
+                        lines,
+                        multi_byte_chars,
+                    );
+                }
+            }
+
+            // There might still be a tail left to analyze
+            let tail_start = src.len() - tail.len() + intra_chunk_offset;
+            if tail_start < src.len() {
+                analyze_source_file_generic(
+                    &src[tail_start..],
+                    src.len() - tail_start,
+                    RelativeBytePos::from_usize(tail_start),
+                    lines,
+                    multi_byte_chars,
+                );
+            }
+        }
+    }
     _ => {
-        // The target (or compiler version) does not support SSE2 ...
+        // The target (or compiler version) does not support vector instructions
+        // our specialized implementations need (x86 SSE2, loongarch64 LSX)...
         fn analyze_source_file_dispatch(
             src: &str,
             lines: &mut Vec<RelativeBytePos>,
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs
index ae6755f076424..8907c5e4c4aeb 100644
--- a/compiler/rustc_span/src/lib.rs
+++ b/compiler/rustc_span/src/lib.rs
@@ -18,6 +18,7 @@
 // tidy-alphabetical-start
 #![allow(internal_features)]
 #![cfg_attr(bootstrap, feature(round_char_boundary))]
+#![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))]
 #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
 #![doc(rust_logo)]
 #![feature(array_windows)]
diff --git a/compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs b/compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs
index 598f0f19f0def..cada0dd640a49 100644
--- a/compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs
+++ b/compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs
@@ -6,7 +6,7 @@ use crate::spec::{
 pub(crate) fn target() -> Target {
     Target {
         arch: "nvptx64".into(),
-        data_layout: "e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64".into(),
+        data_layout: "e-p6:32:32-i64:64-i128:128-i256:256-v16:16-v32:32-n16:32:64".into(),
         llvm_target: "nvptx64-nvidia-cuda".into(),
         metadata: TargetMetadata {
             description: Some("--emit=asm generates PTX code that runs on NVIDIA GPUs".into()),
diff --git a/library/alloc/src/raw_vec/mod.rs b/library/alloc/src/raw_vec/mod.rs
index fd05f9ca464d8..b0027e964e467 100644
--- a/library/alloc/src/raw_vec/mod.rs
+++ b/library/alloc/src/raw_vec/mod.rs
@@ -468,10 +468,6 @@ impl<A: Allocator> RawVecInner<A> {
             return Ok(Self::new_in(alloc, elem_layout.alignment()));
         }
 
-        if let Err(err) = alloc_guard(layout.size()) {
-            return Err(err);
-        }
-
         let result = match init {
             AllocInit::Uninitialized => alloc.allocate(layout),
             #[cfg(not(no_global_oom_handling))]
@@ -662,7 +658,7 @@ impl<A: Allocator> RawVecInner<A> {
         let new_layout = layout_array(cap, elem_layout)?;
 
         let ptr = finish_grow(new_layout, self.current_memory(elem_layout), &mut self.alloc)?;
-        // SAFETY: finish_grow would have resulted in a capacity overflow if we tried to allocate more than `isize::MAX` items
+        // SAFETY: layout_array would have resulted in a capacity overflow if we tried to allocate more than `isize::MAX` items
 
         unsafe { self.set_ptr_and_cap(ptr, cap) };
         Ok(())
@@ -684,7 +680,7 @@ impl<A: Allocator> RawVecInner<A> {
         let new_layout = layout_array(cap, elem_layout)?;
 
         let ptr = finish_grow(new_layout, self.current_memory(elem_layout), &mut self.alloc)?;
-        // SAFETY: finish_grow would have resulted in a capacity overflow if we tried to allocate more than `isize::MAX` items
+        // SAFETY: layout_array would have resulted in a capacity overflow if we tried to allocate more than `isize::MAX` items
         unsafe {
             self.set_ptr_and_cap(ptr, cap);
         }
@@ -771,8 +767,6 @@ fn finish_grow<A>(
 where
     A: Allocator,
 {
-    alloc_guard(new_layout.size())?;
-
     let memory = if let Some((ptr, old_layout)) = current_memory {
         debug_assert_eq!(old_layout.align(), new_layout.align());
         unsafe {
@@ -799,23 +793,6 @@ fn handle_error(e: TryReserveError) -> ! {
     }
 }
 
-// We need to guarantee the following:
-// * We don't ever allocate `> isize::MAX` byte-size objects.
-// * We don't overflow `usize::MAX` and actually allocate too little.
-//
-// On 64-bit we just need to check for overflow since trying to allocate
-// `> isize::MAX` bytes will surely fail. On 32-bit and 16-bit we need to add
-// an extra guard for this in case we're running on a platform which can use
-// all 4GB in user-space, e.g., PAE or x32.
-#[inline]
-fn alloc_guard(alloc_size: usize) -> Result<(), TryReserveError> {
-    if usize::BITS < 64 && alloc_size > isize::MAX as usize {
-        Err(CapacityOverflow.into())
-    } else {
-        Ok(())
-    }
-}
-
 #[inline]
 fn layout_array(cap: usize, elem_layout: Layout) -> Result<Layout, TryReserveError> {
     elem_layout.repeat(cap).map(|(layout, _pad)| layout).map_err(|_| CapacityOverflow.into())
diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs
index c4bb5ab7b219d..452ec95266fe3 100644
--- a/library/core/src/array/mod.rs
+++ b/library/core/src/array/mod.rs
@@ -48,6 +48,7 @@ pub use iter::IntoIter;
 /// assert_eq!(strings, ["Hello there!", "Hello there!"]);
 /// ```
 #[inline]
+#[must_use = "cloning is often expensive and is not expected to have side effects"]
 #[stable(feature = "array_repeat", since = "CURRENT_RUSTC_VERSION")]
 pub fn repeat<T: Clone, const N: usize>(val: T) -> [T; N] {
     from_trusted_iterator(repeat_n(val, N))
diff --git a/library/core/src/iter/adapters/chain.rs b/library/core/src/iter/adapters/chain.rs
index 943b88e23305a..3ebdf7b472796 100644
--- a/library/core/src/iter/adapters/chain.rs
+++ b/library/core/src/iter/adapters/chain.rs
@@ -321,6 +321,7 @@ impl<A: Default, B: Default> Default for Chain<A, B> {
     ///
     /// // take requires `Default`
     /// let _: Chain<_, _> = mem::take(&mut foo.0);
+    /// ```
     fn default() -> Self {
         Chain::new(Default::default(), Default::default())
     }
diff --git a/library/core/src/iter/adapters/peekable.rs b/library/core/src/iter/adapters/peekable.rs
index a6522659620a0..a55de75d56c6e 100644
--- a/library/core/src/iter/adapters/peekable.rs
+++ b/library/core/src/iter/adapters/peekable.rs
@@ -317,6 +317,108 @@ impl<I: Iterator> Peekable<I> {
     {
         self.next_if(|next| next == expected)
     }
+
+    /// Consumes the next value of this iterator and applies a function `f` on it,
+    /// returning the result if the closure returns `Ok`.
+    ///
+    /// Otherwise if the closure returns `Err` the value is put back for the next iteration.
+    ///
+    /// The content of the `Err` variant is typically the original value of the closure,
+    /// but this is not required. If a different value is returned,
+    /// the next `peek()` or `next()` call will result in this new value.
+    /// This is similar to modifying the output of `peek_mut()`.
+    ///
+    /// If the closure panics, the next value will always be consumed and dropped
+    /// even if the panic is caught, because the closure never returned an `Err` value to put back.
+    ///
+    /// # Examples
+    ///
+    /// Parse the leading decimal number from an iterator of characters.
+    /// ```
+    /// #![feature(peekable_next_if_map)]
+    /// let mut iter = "125 GOTO 10".chars().peekable();
+    /// let mut line_num = 0_u32;
+    /// while let Some(digit) = iter.next_if_map(|c| c.to_digit(10).ok_or(c)) {
+    ///     line_num = line_num * 10 + digit;
+    /// }
+    /// assert_eq!(line_num, 125);
+    /// assert_eq!(iter.collect::<String>(), " GOTO 10");
+    /// ```
+    ///
+    /// Matching custom types.
+    /// ```
+    /// #![feature(peekable_next_if_map)]
+    ///
+    /// #[derive(Debug, PartialEq, Eq)]
+    /// enum Node {
+    ///     Comment(String),
+    ///     Red(String),
+    ///     Green(String),
+    ///     Blue(String),
+    /// }
+    ///
+    /// /// Combines all consecutive `Comment` nodes into a single one.
+    /// fn combine_comments(nodes: Vec<Node>) -> Vec<Node> {
+    ///     let mut result = Vec::with_capacity(nodes.len());
+    ///     let mut iter = nodes.into_iter().peekable();
+    ///     let mut comment_text = None::<String>;
+    ///     loop {
+    ///         // Typically the closure in .next_if_map() matches on the input,
+    ///         //  extracts the desired pattern into an `Ok`,
+    ///         //  and puts the rest into an `Err`.
+    ///         while let Some(text) = iter.next_if_map(|node| match node {
+    ///             Node::Comment(text) => Ok(text),
+    ///             other => Err(other),
+    ///         }) {
+    ///             comment_text.get_or_insert_default().push_str(&text);
+    ///         }
+    ///
+    ///         if let Some(text) = comment_text.take() {
+    ///             result.push(Node::Comment(text));
+    ///         }
+    ///         if let Some(node) = iter.next() {
+    ///             result.push(node);
+    ///         } else {
+    ///             break;
+    ///         }
+    ///     }
+    ///     result
+    /// }
+    ///# assert_eq!( // hiding the test to avoid cluttering the documentation.
+    ///#     combine_comments(vec![
+    ///#         Node::Comment("The".to_owned()),
+    ///#         Node::Comment("Quick".to_owned()),
+    ///#         Node::Comment("Brown".to_owned()),
+    ///#         Node::Red("Fox".to_owned()),
+    ///#         Node::Green("Jumped".to_owned()),
+    ///#         Node::Comment("Over".to_owned()),
+    ///#         Node::Blue("The".to_owned()),
+    ///#         Node::Comment("Lazy".to_owned()),
+    ///#         Node::Comment("Dog".to_owned()),
+    ///#     ]),
+    ///#     vec![
+    ///#         Node::Comment("TheQuickBrown".to_owned()),
+    ///#         Node::Red("Fox".to_owned()),
+    ///#         Node::Green("Jumped".to_owned()),
+    ///#         Node::Comment("Over".to_owned()),
+    ///#         Node::Blue("The".to_owned()),
+    ///#         Node::Comment("LazyDog".to_owned()),
+    ///#     ],
+    ///# )
+    /// ```
+    #[unstable(feature = "peekable_next_if_map", issue = "143702")]
+    pub fn next_if_map<R>(&mut self, f: impl FnOnce(I::Item) -> Result<R, I::Item>) -> Option<R> {
+        let unpeek = if let Some(item) = self.next() {
+            match f(item) {
+                Ok(result) => return Some(result),
+                Err(item) => Some(item),
+            }
+        } else {
+            None
+        };
+        self.peeked = Some(unpeek);
+        None
+    }
 }
 
 #[unstable(feature = "trusted_len", issue = "37572")]
diff --git a/library/core/src/num/int_macros.rs b/library/core/src/num/int_macros.rs
index 25864db5fea77..db70fb65d444e 100644
--- a/library/core/src/num/int_macros.rs
+++ b/library/core/src/num/int_macros.rs
@@ -1285,7 +1285,7 @@ macro_rules! int_impl {
         ///
         /// ```should_panic
         #[doc = concat!("let _ = ", stringify!($SelfT), "::MIN.strict_neg();")]
-        ///
+        /// ```
         #[stable(feature = "strict_overflow_ops", since = "CURRENT_RUSTC_VERSION")]
         #[rustc_const_stable(feature = "strict_overflow_ops", since = "CURRENT_RUSTC_VERSION")]
         #[must_use = "this returns the result of the operation, \
diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs
index c1e656fdea236..a5c8ae7e26edf 100644
--- a/library/core/src/num/uint_macros.rs
+++ b/library/core/src/num/uint_macros.rs
@@ -1620,7 +1620,7 @@ macro_rules! uint_impl {
         ///
         /// ```should_panic
         #[doc = concat!("let _ = 1", stringify!($SelfT), ".strict_neg();")]
-        ///
+        /// ```
         #[stable(feature = "strict_overflow_ops", since = "CURRENT_RUSTC_VERSION")]
         #[rustc_const_stable(feature = "strict_overflow_ops", since = "CURRENT_RUSTC_VERSION")]
         #[must_use = "this returns the result of the operation, \
diff --git a/library/core/src/ops/range.rs b/library/core/src/ops/range.rs
index c0a27775694c3..58a9431bd845d 100644
--- a/library/core/src/ops/range.rs
+++ b/library/core/src/ops/range.rs
@@ -836,6 +836,7 @@ pub trait RangeBounds<T: ?Sized> {
     /// assert!(!(0.0..1.0).contains(&f32::NAN));
     /// assert!(!(0.0..f32::NAN).contains(&0.5));
     /// assert!(!(f32::NAN..1.0).contains(&0.5));
+    /// ```
     #[inline]
     #[stable(feature = "range_contains", since = "1.35.0")]
     fn contains<U>(&self, item: &U) -> bool
diff --git a/library/core/src/ptr/mut_ptr.rs b/library/core/src/ptr/mut_ptr.rs
index 3fe4b08d459e0..ce6eee4f911ed 100644
--- a/library/core/src/ptr/mut_ptr.rs
+++ b/library/core/src/ptr/mut_ptr.rs
@@ -106,6 +106,7 @@ impl<T: PointeeSized> *mut T {
     ///
     /// // This dereference is UB. The pointer only has provenance for `x` but points to `y`.
     /// println!("{:?}", unsafe { &*bad });
+    /// ```
     #[unstable(feature = "set_ptr_value", issue = "75091")]
     #[must_use = "returns a new pointer rather than modifying its argument"]
     #[inline]
diff --git a/library/coretests/tests/iter/adapters/peekable.rs b/library/coretests/tests/iter/adapters/peekable.rs
index 7f4341b8902c8..f0549e8d6c2c3 100644
--- a/library/coretests/tests/iter/adapters/peekable.rs
+++ b/library/coretests/tests/iter/adapters/peekable.rs
@@ -271,3 +271,89 @@ fn test_peekable_non_fused() {
     assert_eq!(iter.peek(), None);
     assert_eq!(iter.next_back(), None);
 }
+
+#[test]
+fn test_peekable_next_if_map_mutation() {
+    fn collatz((mut num, mut len): (u64, u32)) -> Result<u32, (u64, u32)> {
+        let jump = num.trailing_zeros();
+        num >>= jump;
+        len += jump;
+        if num == 1 { Ok(len) } else { Err((3 * num + 1, len + 1)) }
+    }
+
+    let mut iter = once((3, 0)).peekable();
+    assert_eq!(iter.peek(), Some(&(3, 0)));
+    assert_eq!(iter.next_if_map(collatz), None);
+    assert_eq!(iter.peek(), Some(&(10, 1)));
+    assert_eq!(iter.next_if_map(collatz), None);
+    assert_eq!(iter.peek(), Some(&(16, 3)));
+    assert_eq!(iter.next_if_map(collatz), Some(7));
+    assert_eq!(iter.peek(), None);
+    assert_eq!(iter.next_if_map(collatz), None);
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
+fn test_peekable_next_if_map_panic() {
+    use core::cell::Cell;
+    use std::panic::{AssertUnwindSafe, catch_unwind};
+
+    struct BitsetOnDrop<'a> {
+        value: u32,
+        cell: &'a Cell<u32>,
+    }
+    impl<'a> Drop for BitsetOnDrop<'a> {
+        fn drop(&mut self) {
+            self.cell.update(|v| v | self.value);
+        }
+    }
+
+    let cell = &Cell::new(0);
+    let mut it = [
+        BitsetOnDrop { value: 1, cell },
+        BitsetOnDrop { value: 2, cell },
+        BitsetOnDrop { value: 4, cell },
+        BitsetOnDrop { value: 8, cell },
+    ]
+    .into_iter()
+    .peekable();
+
+    // sanity check, .peek() won't consume the value, .next() will transfer ownership.
+    let item = it.peek().unwrap();
+    assert_eq!(item.value, 1);
+    assert_eq!(cell.get(), 0);
+    let item = it.next().unwrap();
+    assert_eq!(item.value, 1);
+    assert_eq!(cell.get(), 0);
+    drop(item);
+    assert_eq!(cell.get(), 1);
+
+    // next_if_map returning Ok should transfer the value out.
+    let item = it.next_if_map(Ok).unwrap();
+    assert_eq!(item.value, 2);
+    assert_eq!(cell.get(), 1);
+    drop(item);
+    assert_eq!(cell.get(), 3);
+
+    // next_if_map returning Err should not drop anything.
+    assert_eq!(it.next_if_map::<()>(Err), None);
+    assert_eq!(cell.get(), 3);
+    assert_eq!(it.peek().unwrap().value, 4);
+    assert_eq!(cell.get(), 3);
+
+    // next_if_map panicking should consume and drop the item.
+    let result = catch_unwind({
+        let mut it = AssertUnwindSafe(&mut it);
+        move || it.next_if_map::<()>(|_| panic!())
+    });
+    assert!(result.is_err());
+    assert_eq!(cell.get(), 7);
+    assert_eq!(it.next().unwrap().value, 8);
+    assert_eq!(cell.get(), 15);
+    assert!(it.peek().is_none());
+
+    // next_if_map should *not* execute the closure if the iterator is exhausted.
+    assert!(it.next_if_map::<()>(|_| panic!()).is_none());
+    assert!(it.peek().is_none());
+    assert_eq!(cell.get(), 15);
+}
diff --git a/library/coretests/tests/lib.rs b/library/coretests/tests/lib.rs
index bf0a3ae7870a9..2a0600ed93d7e 100644
--- a/library/coretests/tests/lib.rs
+++ b/library/coretests/tests/lib.rs
@@ -83,6 +83,7 @@
 #![feature(numfmt)]
 #![feature(option_reduce)]
 #![feature(pattern)]
+#![feature(peekable_next_if_map)]
 #![feature(pointer_is_aligned_to)]
 #![feature(portable_simd)]
 #![feature(ptr_metadata)]
diff --git a/library/portable-simd/crates/core_simd/src/simd/num/int.rs b/library/portable-simd/crates/core_simd/src/simd/num/int.rs
index d25050c3e4b47..e7253313f036c 100644
--- a/library/portable-simd/crates/core_simd/src/simd/num/int.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num/int.rs
@@ -58,6 +58,7 @@ pub trait SimdInt: Copy + Sealed {
     /// let sat = x.saturating_sub(max);
     /// assert_eq!(unsat, Simd::from_array([1, MAX, MIN, 0]));
     /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
+    /// ```
     fn saturating_sub(self, second: Self) -> Self;
 
     /// Lanewise absolute value, implemented in Rust.
diff --git a/library/portable-simd/crates/core_simd/src/simd/num/uint.rs b/library/portable-simd/crates/core_simd/src/simd/num/uint.rs
index 45d978068b664..e3ba8658bd803 100644
--- a/library/portable-simd/crates/core_simd/src/simd/num/uint.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num/uint.rs
@@ -55,6 +55,7 @@ pub trait SimdUint: Copy + Sealed {
     /// let sat = x.saturating_sub(max);
     /// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0]));
     /// assert_eq!(sat, Simd::splat(0));
+    /// ```
     fn saturating_sub(self, second: Self) -> Self;
 
     /// Lanewise absolute difference.
diff --git a/library/std/src/path.rs b/library/std/src/path.rs
index 470d300d2d95b..cc91e5e0eda39 100644
--- a/library/std/src/path.rs
+++ b/library/std/src/path.rs
@@ -1575,8 +1575,6 @@ impl PathBuf {
     /// # Examples
     ///
     /// ```
-    /// #![feature(path_add_extension)]
-    ///
     /// use std::path::{Path, PathBuf};
     ///
     /// let mut p = PathBuf::from("/feel/the");
@@ -1596,7 +1594,7 @@ impl PathBuf {
     /// p.add_extension("");
     /// assert_eq!(Path::new("/feel/the.formatted.dark"), p.as_path());
     /// ```
-    #[unstable(feature = "path_add_extension", issue = "127292")]
+    #[stable(feature = "path_add_extension", since = "CURRENT_RUSTC_VERSION")]
     pub fn add_extension<S: AsRef<OsStr>>(&mut self, extension: S) -> bool {
         self._add_extension(extension.as_ref())
     }
@@ -2878,8 +2876,6 @@ impl Path {
     /// # Examples
     ///
     /// ```
-    /// #![feature(path_add_extension)]
-    ///
     /// use std::path::{Path, PathBuf};
     ///
     /// let path = Path::new("foo.rs");
@@ -2890,7 +2886,7 @@ impl Path {
     /// assert_eq!(path.with_added_extension("xz"), PathBuf::from("foo.tar.gz.xz"));
     /// assert_eq!(path.with_added_extension("").with_added_extension("txt"), PathBuf::from("foo.tar.gz.txt"));
     /// ```
-    #[unstable(feature = "path_add_extension", issue = "127292")]
+    #[stable(feature = "path_add_extension", since = "CURRENT_RUSTC_VERSION")]
     pub fn with_added_extension<S: AsRef<OsStr>>(&self, extension: S) -> PathBuf {
         let mut new_path = self.to_path_buf();
         new_path.add_extension(extension);
diff --git a/library/std/tests/path.rs b/library/std/tests/path.rs
index e1576a0d4231a..3577f0d9c7bb6 100644
--- a/library/std/tests/path.rs
+++ b/library/std/tests/path.rs
@@ -1,4 +1,4 @@
-#![feature(clone_to_uninit, path_add_extension, maybe_uninit_slice, normalize_lexically)]
+#![feature(clone_to_uninit, maybe_uninit_slice, normalize_lexically)]
 
 use std::clone::CloneToUninit;
 use std::ffi::OsStr;
diff --git a/src/bootstrap/src/core/build_steps/check.rs b/src/bootstrap/src/core/build_steps/check.rs
index a604e7c058593..49d12b64da510 100644
--- a/src/bootstrap/src/core/build_steps/check.rs
+++ b/src/bootstrap/src/core/build_steps/check.rs
@@ -389,7 +389,7 @@ impl Step for Rustc {
 
 /// Represents a compiler that can check something.
 ///
-/// If the compiler was created for `Mode::ToolRustc` or `Mode::Codegen`, it will also contain
+/// If the compiler was created for `Mode::ToolRustcPrivate` or `Mode::Codegen`, it will also contain
 /// .rmeta artifacts from rustc that was already checked using `build_compiler`.
 ///
 /// All steps that use this struct in a "general way" (i.e. they don't know exactly what kind of
@@ -469,7 +469,7 @@ pub fn prepare_compiler_for_check(
                 build_compiler
             }
         }
-        Mode::ToolRustc | Mode::Codegen => {
+        Mode::ToolRustcPrivate | Mode::Codegen => {
             // Check Rustc to produce the required rmeta artifacts for rustc_private, and then
             // return the build compiler that was used to check rustc.
             // We do not need to check examples/tests/etc. of Rustc for rustc_private, so we pass
@@ -767,19 +767,22 @@ fn run_tool_check_step(
 tool_check_step!(Rustdoc {
     path: "src/tools/rustdoc",
     alt_path: "src/librustdoc",
-    mode: |_builder| Mode::ToolRustc
+    mode: |_builder| Mode::ToolRustcPrivate
 });
 // Clippy, miri and Rustfmt are hybrids. They are external tools, but use a git subtree instead
 // of a submodule. Since the SourceType only drives the deny-warnings
 // behavior, treat it as in-tree so that any new warnings in clippy will be
 // rejected.
-tool_check_step!(Clippy { path: "src/tools/clippy", mode: |_builder| Mode::ToolRustc });
-tool_check_step!(Miri { path: "src/tools/miri", mode: |_builder| Mode::ToolRustc });
-tool_check_step!(CargoMiri { path: "src/tools/miri/cargo-miri", mode: |_builder| Mode::ToolRustc });
-tool_check_step!(Rustfmt { path: "src/tools/rustfmt", mode: |_builder| Mode::ToolRustc });
+tool_check_step!(Clippy { path: "src/tools/clippy", mode: |_builder| Mode::ToolRustcPrivate });
+tool_check_step!(Miri { path: "src/tools/miri", mode: |_builder| Mode::ToolRustcPrivate });
+tool_check_step!(CargoMiri {
+    path: "src/tools/miri/cargo-miri",
+    mode: |_builder| Mode::ToolRustcPrivate
+});
+tool_check_step!(Rustfmt { path: "src/tools/rustfmt", mode: |_builder| Mode::ToolRustcPrivate });
 tool_check_step!(RustAnalyzer {
     path: "src/tools/rust-analyzer",
-    mode: |_builder| Mode::ToolRustc,
+    mode: |_builder| Mode::ToolRustcPrivate,
     allow_features: tool::RustAnalyzer::ALLOW_FEATURES,
     enable_features: ["in-rust-tree"],
 });
diff --git a/src/bootstrap/src/core/build_steps/clippy.rs b/src/bootstrap/src/core/build_steps/clippy.rs
index 05f8b240291ea..2083c675e1fdd 100644
--- a/src/bootstrap/src/core/build_steps/clippy.rs
+++ b/src/bootstrap/src/core/build_steps/clippy.rs
@@ -366,8 +366,13 @@ impl Step for CodegenGcc {
         );
         self.build_compiler.configure_cargo(&mut cargo);
 
-        let _guard =
-            builder.msg(Kind::Clippy, "rustc_codegen_gcc", Mode::ToolRustc, build_compiler, target);
+        let _guard = builder.msg(
+            Kind::Clippy,
+            "rustc_codegen_gcc",
+            Mode::ToolRustcPrivate,
+            build_compiler,
+            target,
+        );
 
         let stamp = BuildStamp::new(&builder.cargo_out(build_compiler, Mode::Codegen, target))
             .with_prefix("rustc_codegen_gcc-check");
@@ -478,8 +483,8 @@ lint_any!(
     Bootstrap, "src/bootstrap", "bootstrap", Mode::ToolTarget;
     BuildHelper, "src/build_helper", "build_helper", Mode::ToolTarget;
     BuildManifest, "src/tools/build-manifest", "build-manifest", Mode::ToolTarget;
-    CargoMiri, "src/tools/miri/cargo-miri", "cargo-miri", Mode::ToolRustc;
-    Clippy, "src/tools/clippy", "clippy", Mode::ToolRustc;
+    CargoMiri, "src/tools/miri/cargo-miri", "cargo-miri", Mode::ToolRustcPrivate;
+    Clippy, "src/tools/clippy", "clippy", Mode::ToolRustcPrivate;
     CollectLicenseMetadata, "src/tools/collect-license-metadata", "collect-license-metadata", Mode::ToolTarget;
     Compiletest, "src/tools/compiletest", "compiletest", Mode::ToolTarget;
     CoverageDump, "src/tools/coverage-dump", "coverage-dump", Mode::ToolTarget;
@@ -487,14 +492,14 @@ lint_any!(
     Jsondoclint, "src/tools/jsondoclint", "jsondoclint", Mode::ToolTarget;
     LintDocs, "src/tools/lint-docs", "lint-docs", Mode::ToolTarget;
     LlvmBitcodeLinker, "src/tools/llvm-bitcode-linker", "llvm-bitcode-linker", Mode::ToolTarget;
-    Miri, "src/tools/miri", "miri", Mode::ToolRustc;
+    Miri, "src/tools/miri", "miri", Mode::ToolRustcPrivate;
     MiroptTestTools, "src/tools/miropt-test-tools", "miropt-test-tools", Mode::ToolTarget;
     OptDist, "src/tools/opt-dist", "opt-dist", Mode::ToolTarget;
     RemoteTestClient, "src/tools/remote-test-client", "remote-test-client", Mode::ToolTarget;
     RemoteTestServer, "src/tools/remote-test-server", "remote-test-server", Mode::ToolTarget;
-    RustAnalyzer, "src/tools/rust-analyzer", "rust-analyzer", Mode::ToolRustc;
-    Rustdoc, "src/librustdoc", "clippy", Mode::ToolRustc;
-    Rustfmt, "src/tools/rustfmt", "rustfmt", Mode::ToolRustc;
+    RustAnalyzer, "src/tools/rust-analyzer", "rust-analyzer", Mode::ToolRustcPrivate;
+    Rustdoc, "src/librustdoc", "clippy", Mode::ToolRustcPrivate;
+    Rustfmt, "src/tools/rustfmt", "rustfmt", Mode::ToolRustcPrivate;
     RustInstaller, "src/tools/rust-installer", "rust-installer", Mode::ToolTarget;
     Tidy, "src/tools/tidy", "tidy", Mode::ToolTarget;
     TestFloatParse, "src/tools/test-float-parse", "test-float-parse", Mode::ToolStd;
diff --git a/src/bootstrap/src/core/build_steps/doc.rs b/src/bootstrap/src/core/build_steps/doc.rs
index 0789eefa89460..eb198a0051abe 100644
--- a/src/bootstrap/src/core/build_steps/doc.rs
+++ b/src/bootstrap/src/core/build_steps/doc.rs
@@ -995,7 +995,7 @@ macro_rules! tool_doc {
                     // Build rustc docs so that we generate relative links.
                     run.builder.ensure(Rustc::from_build_compiler(run.builder, compilers.build_compiler(), target));
 
-                    (compilers.build_compiler(), Mode::ToolRustc)
+                    (compilers.build_compiler(), Mode::ToolRustcPrivate)
                 } else {
                     // bootstrap/host tools have to be documented with the stage 0 compiler
                     (prepare_doc_compiler(run.builder, run.builder.host_target, 1), Mode::ToolBootstrap)
diff --git a/src/bootstrap/src/core/build_steps/run.rs b/src/bootstrap/src/core/build_steps/run.rs
index d9de6b7ef96bf..9f7248b80f763 100644
--- a/src/bootstrap/src/core/build_steps/run.rs
+++ b/src/bootstrap/src/core/build_steps/run.rs
@@ -161,7 +161,7 @@ impl Step for Miri {
         let mut miri = tool::prepare_tool_cargo(
             builder,
             compilers.build_compiler(),
-            Mode::ToolRustc,
+            Mode::ToolRustcPrivate,
             host,
             Kind::Run,
             "src/tools/miri",
@@ -487,7 +487,7 @@ impl Step for Rustfmt {
         let mut rustfmt = tool::prepare_tool_cargo(
             builder,
             rustfmt_build.build_compiler,
-            Mode::ToolRustc,
+            Mode::ToolRustcPrivate,
             host,
             Kind::Run,
             "src/tools/rustfmt",
diff --git a/src/bootstrap/src/core/build_steps/test.rs b/src/bootstrap/src/core/build_steps/test.rs
index ee2cbe9385e48..22800aaa46564 100644
--- a/src/bootstrap/src/core/build_steps/test.rs
+++ b/src/bootstrap/src/core/build_steps/test.rs
@@ -455,7 +455,7 @@ impl Step for RustAnalyzer {
         let mut cargo = tool::prepare_tool_cargo(
             builder,
             self.compilers.build_compiler(),
-            Mode::ToolRustc,
+            Mode::ToolRustcPrivate,
             host,
             Kind::Test,
             crate_path,
@@ -518,7 +518,7 @@ impl Step for Rustfmt {
         let mut cargo = tool::prepare_tool_cargo(
             builder,
             build_compiler,
-            Mode::ToolRustc,
+            Mode::ToolRustcPrivate,
             target,
             Kind::Test,
             "src/tools/rustfmt",
@@ -571,7 +571,8 @@ impl Miri {
         cargo.env("MIRI_SYSROOT", &miri_sysroot);
 
         let mut cargo = BootstrapCommand::from(cargo);
-        let _guard = builder.msg(Kind::Build, "miri sysroot", Mode::ToolRustc, compiler, target);
+        let _guard =
+            builder.msg(Kind::Build, "miri sysroot", Mode::ToolRustcPrivate, compiler, target);
         cargo.run(builder);
 
         // # Determine where Miri put its sysroot.
@@ -648,7 +649,7 @@ impl Step for Miri {
         let mut cargo = tool::prepare_tool_cargo(
             builder,
             miri.build_compiler,
-            Mode::ToolRustc,
+            Mode::ToolRustcPrivate,
             host,
             Kind::Test,
             "src/tools/miri",
@@ -861,7 +862,7 @@ impl Step for Clippy {
         let mut cargo = tool::prepare_tool_cargo(
             builder,
             build_compiler,
-            Mode::ToolRustc,
+            Mode::ToolRustcPrivate,
             target,
             Kind::Test,
             "src/tools/clippy",
@@ -872,7 +873,7 @@ impl Step for Clippy {
         cargo.env("RUSTC_TEST_SUITE", builder.rustc(build_compiler));
         cargo.env("RUSTC_LIB_PATH", builder.rustc_libdir(build_compiler));
         let host_libs =
-            builder.stage_out(build_compiler, Mode::ToolRustc).join(builder.cargo_dir());
+            builder.stage_out(build_compiler, Mode::ToolRustcPrivate).join(builder.cargo_dir());
         cargo.env("HOST_LIBS", host_libs);
 
         // Build the standard library that the tests can use.
@@ -2411,7 +2412,7 @@ impl BookTest {
         let libs = if !self.dependencies.is_empty() {
             let mut lib_paths = vec![];
             for dep in self.dependencies {
-                let mode = Mode::ToolRustc;
+                let mode = Mode::ToolRustcPrivate;
                 let target = builder.config.host_target;
                 let cargo = tool::prepare_tool_cargo(
                     builder,
@@ -2996,7 +2997,7 @@ impl Step for CrateRustdoc {
         let mut cargo = tool::prepare_tool_cargo(
             builder,
             compiler,
-            Mode::ToolRustc,
+            Mode::ToolRustcPrivate,
             target,
             builder.kind,
             "src/tools/rustdoc",
diff --git a/src/bootstrap/src/core/build_steps/tool.rs b/src/bootstrap/src/core/build_steps/tool.rs
index 65c4c49908653..c5308034fe307 100644
--- a/src/bootstrap/src/core/build_steps/tool.rs
+++ b/src/bootstrap/src/core/build_steps/tool.rs
@@ -82,7 +82,7 @@ impl Step for ToolBuild {
         let path = self.path;
 
         match self.mode {
-            Mode::ToolRustc => {
+            Mode::ToolRustcPrivate => {
                 // FIXME: remove this, it's only needed for download-rustc...
                 if !self.build_compiler.is_forced_compiler() && builder.download_rustc() {
                     builder.std(self.build_compiler, self.build_compiler.host);
@@ -123,7 +123,7 @@ impl Step for ToolBuild {
 
         // Rustc tools (miri, clippy, cargo, rustfmt, rust-analyzer)
         // could use the additional optimizations.
-        if self.mode == Mode::ToolRustc && is_lto_stage(&self.build_compiler) {
+        if self.mode == Mode::ToolRustcPrivate && is_lto_stage(&self.build_compiler) {
             let lto = match builder.config.rust_lto {
                 RustcLto::Off => Some("off"),
                 RustcLto::Thin => Some("thin"),
@@ -607,7 +607,7 @@ impl Step for ErrorIndex {
             build_compiler: self.compilers.build_compiler,
             target: self.compilers.target(),
             tool: "error_index_generator",
-            mode: Mode::ToolRustc,
+            mode: Mode::ToolRustcPrivate,
             path: "src/tools/error_index_generator",
             source_type: SourceType::InTree,
             extra_features: Vec::new(),
@@ -671,7 +671,7 @@ impl Step for RemoteTestServer {
 /// Represents `Rustdoc` that either comes from the external stage0 sysroot or that is built
 /// locally.
 /// Rustdoc is special, because it both essentially corresponds to a `Compiler` (that can be
-/// externally provided), but also to a `ToolRustc` tool.
+/// externally provided), but also to a `ToolRustcPrivate` tool.
 #[derive(Debug, Clone, Hash, PartialEq, Eq)]
 pub struct Rustdoc {
     /// If the stage of `target_compiler` is `0`, then rustdoc is externally provided.
@@ -759,7 +759,7 @@ impl Step for Rustdoc {
                 // the wrong rustdoc being executed. To avoid the conflicting rustdocs, we name the "tool"
                 // rustdoc a different name.
                 tool: "rustdoc_tool_binary",
-                mode: Mode::ToolRustc,
+                mode: Mode::ToolRustcPrivate,
                 path: "src/tools/rustdoc",
                 source_type: SourceType::InTree,
                 extra_features,
@@ -1048,7 +1048,7 @@ impl Step for RustAnalyzer {
             build_compiler,
             target,
             tool: "rust-analyzer",
-            mode: Mode::ToolRustc,
+            mode: Mode::ToolRustcPrivate,
             path: "src/tools/rust-analyzer",
             extra_features: vec!["in-rust-tree".to_owned()],
             source_type: SourceType::InTree,
@@ -1105,7 +1105,7 @@ impl Step for RustAnalyzerProcMacroSrv {
             build_compiler: self.compilers.build_compiler,
             target: self.compilers.target(),
             tool: "rust-analyzer-proc-macro-srv",
-            mode: Mode::ToolRustc,
+            mode: Mode::ToolRustcPrivate,
             path: "src/tools/rust-analyzer/crates/proc-macro-srv-cli",
             extra_features: vec!["in-rust-tree".to_owned()],
             source_type: SourceType::InTree,
@@ -1352,7 +1352,7 @@ impl RustcPrivateCompilers {
     }
 }
 
-/// Creates a step that builds an extended `Mode::ToolRustc` tool
+/// Creates a step that builds an extended `Mode::ToolRustcPrivate` tool
 /// and installs it into the sysroot of a corresponding compiler.
 macro_rules! tool_rustc_extended {
     (
@@ -1466,7 +1466,7 @@ fn build_extended_rustc_tool(
         build_compiler,
         target,
         tool: tool_name,
-        mode: Mode::ToolRustc,
+        mode: Mode::ToolRustcPrivate,
         path,
         extra_features,
         source_type: SourceType::InTree,
diff --git a/src/bootstrap/src/core/builder/cargo.rs b/src/bootstrap/src/core/builder/cargo.rs
index cdf6fe573e583..a9a74b9bb0731 100644
--- a/src/bootstrap/src/core/builder/cargo.rs
+++ b/src/bootstrap/src/core/builder/cargo.rs
@@ -533,7 +533,7 @@ impl Builder<'_> {
         if cmd_kind == Kind::Doc {
             let my_out = match mode {
                 // This is the intended out directory for compiler documentation.
-                Mode::Rustc | Mode::ToolRustc | Mode::ToolBootstrap => {
+                Mode::Rustc | Mode::ToolRustcPrivate | Mode::ToolBootstrap => {
                     self.compiler_doc_out(target)
                 }
                 Mode::Std => {
@@ -583,7 +583,7 @@ impl Builder<'_> {
 
         // We synthetically interpret a stage0 compiler used to build tools as a
         // "raw" compiler in that it's the exact snapshot we download. For things like
-        // ToolRustc, we would have to use the artificial stage0-sysroot compiler instead.
+        // ToolRustcPrivate, we would have to use the artificial stage0-sysroot compiler instead.
         let use_snapshot =
             mode == Mode::ToolBootstrap || (mode == Mode::ToolTarget && build_compiler_stage == 0);
         assert!(!use_snapshot || build_compiler_stage == 0 || self.local_rebuild);
@@ -643,7 +643,8 @@ impl Builder<'_> {
         // sysroot. Passing this cfg enables raw-dylib support instead, which makes the native
         // library unnecessary. This can be removed when windows-rs enables raw-dylib
         // unconditionally.
-        if let Mode::Rustc | Mode::ToolRustc | Mode::ToolBootstrap | Mode::ToolTarget = mode {
+        if let Mode::Rustc | Mode::ToolRustcPrivate | Mode::ToolBootstrap | Mode::ToolTarget = mode
+        {
             rustflags.arg("--cfg=windows_raw_dylib");
         }
 
@@ -657,7 +658,7 @@ impl Builder<'_> {
         // - rust-analyzer, due to the rowan crate
         // so we exclude an entire category of steps here due to lack of fine-grained control over
         // rustflags.
-        if self.config.rust_randomize_layout && mode != Mode::ToolRustc {
+        if self.config.rust_randomize_layout && mode != Mode::ToolRustcPrivate {
             rustflags.arg("-Zrandomize-layout");
         }
 
@@ -717,7 +718,7 @@ impl Builder<'_> {
 
         match mode {
             Mode::Std | Mode::ToolBootstrap | Mode::ToolStd | Mode::ToolTarget => {}
-            Mode::Rustc | Mode::Codegen | Mode::ToolRustc => {
+            Mode::Rustc | Mode::Codegen | Mode::ToolRustcPrivate => {
                 // Build proc macros both for the host and the target unless proc-macros are not
                 // supported by the target.
                 if target != compiler.host && cmd_kind != Kind::Check {
@@ -778,7 +779,7 @@ impl Builder<'_> {
                 "binary-dep-depinfo,proc_macro_span,proc_macro_span_shrink,proc_macro_diagnostic"
                     .to_string()
             }
-            Mode::Std | Mode::Rustc | Mode::Codegen | Mode::ToolRustc => String::new(),
+            Mode::Std | Mode::Rustc | Mode::Codegen | Mode::ToolRustcPrivate => String::new(),
         };
 
         cargo.arg("-j").arg(self.jobs().to_string());
@@ -825,7 +826,7 @@ impl Builder<'_> {
             // rustc step and one that we just built. This isn't always a
             // problem, somehow -- not really clear why -- but we know that this
             // fixes things.
-            Mode::ToolRustc => metadata.push_str("tool-rustc"),
+            Mode::ToolRustcPrivate => metadata.push_str("tool-rustc"),
             // Same for codegen backends.
             Mode::Codegen => metadata.push_str("codegen"),
             _ => {}
@@ -917,7 +918,7 @@ impl Builder<'_> {
         let debuginfo_level = match mode {
             Mode::Rustc | Mode::Codegen => self.config.rust_debuginfo_level_rustc,
             Mode::Std => self.config.rust_debuginfo_level_std,
-            Mode::ToolBootstrap | Mode::ToolStd | Mode::ToolRustc | Mode::ToolTarget => {
+            Mode::ToolBootstrap | Mode::ToolStd | Mode::ToolRustcPrivate | Mode::ToolTarget => {
                 self.config.rust_debuginfo_level_tools
             }
         };
@@ -930,7 +931,7 @@ impl Builder<'_> {
             match mode {
                 Mode::Std => self.config.std_debug_assertions,
                 Mode::Rustc | Mode::Codegen => self.config.rustc_debug_assertions,
-                Mode::ToolBootstrap | Mode::ToolStd | Mode::ToolRustc | Mode::ToolTarget => {
+                Mode::ToolBootstrap | Mode::ToolStd | Mode::ToolRustcPrivate | Mode::ToolTarget => {
                     self.config.tools_debug_assertions
                 }
             }
@@ -1005,7 +1006,7 @@ impl Builder<'_> {
             }
             Mode::Std
             | Mode::ToolBootstrap
-            | Mode::ToolRustc
+            | Mode::ToolRustcPrivate
             | Mode::ToolStd
             | Mode::ToolTarget => {
                 if let Some(ref map_to) =
@@ -1078,7 +1079,7 @@ impl Builder<'_> {
         // requirement, but the `-L` library path is not propagated across
         // separate Cargo projects. We can add LLVM's library path to the
         // rustc args as a workaround.
-        if (mode == Mode::ToolRustc || mode == Mode::Codegen)
+        if (mode == Mode::ToolRustcPrivate || mode == Mode::Codegen)
             && let Some(llvm_config) = self.llvm_config(target)
         {
             let llvm_libdir =
diff --git a/src/bootstrap/src/lib.rs b/src/bootstrap/src/lib.rs
index 59c0f9faacac9..a2aeed20948e5 100644
--- a/src/bootstrap/src/lib.rs
+++ b/src/bootstrap/src/lib.rs
@@ -85,12 +85,12 @@ const LLD_FILE_NAMES: &[&str] = &["ld.lld", "ld64.lld", "lld-link", "wasm-ld"];
 const EXTRA_CHECK_CFGS: &[(Option<Mode>, &str, Option<&[&'static str]>)] = &[
     (Some(Mode::Rustc), "bootstrap", None),
     (Some(Mode::Codegen), "bootstrap", None),
-    (Some(Mode::ToolRustc), "bootstrap", None),
+    (Some(Mode::ToolRustcPrivate), "bootstrap", None),
     (Some(Mode::ToolStd), "bootstrap", None),
     (Some(Mode::Rustc), "llvm_enzyme", None),
     (Some(Mode::Codegen), "llvm_enzyme", None),
-    (Some(Mode::ToolRustc), "llvm_enzyme", None),
-    (Some(Mode::ToolRustc), "rust_analyzer", None),
+    (Some(Mode::ToolRustcPrivate), "llvm_enzyme", None),
+    (Some(Mode::ToolRustcPrivate), "rust_analyzer", None),
     (Some(Mode::ToolStd), "rust_analyzer", None),
     // Any library specific cfgs like `target_os`, `target_arch` should be put in
     // priority the `[lints.rust.unexpected_cfgs.check-cfg]` table
@@ -334,17 +334,18 @@ pub enum Mode {
     /// compiletest which needs libtest.
     ToolStd,
 
-    /// Build a tool which uses the locally built rustc and the target std,
+    /// Build a tool which uses the `rustc_private` mechanism, and thus
+    /// the locally built rustc rlib artifacts,
     /// placing the output in the "stageN-tools" directory. This is used for
-    /// anything that needs a fully functional rustc, such as rustdoc, clippy,
-    /// cargo, rustfmt, miri, etc.
-    ToolRustc,
+    /// everything that links to rustc as a library, such as rustdoc, clippy,
+    /// rustfmt, miri, etc.
+    ToolRustcPrivate,
 }
 
 impl Mode {
     pub fn is_tool(&self) -> bool {
         match self {
-            Mode::ToolBootstrap | Mode::ToolRustc | Mode::ToolStd | Mode::ToolTarget => true,
+            Mode::ToolBootstrap | Mode::ToolRustcPrivate | Mode::ToolStd | Mode::ToolTarget => true,
             Mode::Std | Mode::Codegen | Mode::Rustc => false,
         }
     }
@@ -353,7 +354,7 @@ impl Mode {
         match self {
             Mode::Std | Mode::Codegen => true,
             Mode::ToolBootstrap
-            | Mode::ToolRustc
+            | Mode::ToolRustcPrivate
             | Mode::ToolStd
             | Mode::ToolTarget
             | Mode::Rustc => false,
@@ -924,7 +925,7 @@ impl Build {
             Mode::Rustc => (Some(build_compiler.stage + 1), "rustc"),
             Mode::Codegen => (Some(build_compiler.stage + 1), "codegen"),
             Mode::ToolBootstrap => bootstrap_tool(),
-            Mode::ToolStd | Mode::ToolRustc => (Some(build_compiler.stage + 1), "tools"),
+            Mode::ToolStd | Mode::ToolRustcPrivate => (Some(build_compiler.stage + 1), "tools"),
             Mode::ToolTarget => {
                 // If we're not cross-compiling (the common case), share the target directory with
                 // bootstrap tools to reuse the build cache.
@@ -1145,7 +1146,7 @@ impl Build {
                 | Mode::ToolBootstrap
                 | Mode::ToolTarget
                 | Mode::ToolStd
-                | Mode::ToolRustc,
+                | Mode::ToolRustcPrivate,
             )
             | None => target_and_stage.stage + 1,
         };
diff --git a/src/doc/rustc-dev-guide/src/building/bootstrapping/writing-tools-in-bootstrap.md b/src/doc/rustc-dev-guide/src/building/bootstrapping/writing-tools-in-bootstrap.md
index 41d0cf8d9fb3a..c3660e24b1526 100644
--- a/src/doc/rustc-dev-guide/src/building/bootstrapping/writing-tools-in-bootstrap.md
+++ b/src/doc/rustc-dev-guide/src/building/bootstrapping/writing-tools-in-bootstrap.md
@@ -11,11 +11,8 @@ There are three types of tools you can write in bootstrap:
   Use this for tools that rely on the locally built std. The output goes into the "stageN-tools" directory.
   This mode is rarely used, mainly for `compiletest` which requires `libtest`.
 
-- **`Mode::ToolRustc`**
-  Use this for tools that depend on both the locally built `rustc` and the target `std`. This is more complex than
-  the other modes because the tool must be built with the same compiler used for `rustc` and placed in the "stageN-tools"
-  directory. When you choose `Mode::ToolRustc`, `ToolBuild` implementation takes care of this automatically.
-  If you need to use the builder’s compiler for something specific, you can get it from `ToolBuildResult`, which is
+- **`Mode::ToolRustcPrivate`**
+  Use this for tools that use the `rustc_private` mechanism, and thus depend on the locally built `rustc` and its rlib artifacts. This is more complex than the other modes because the tool must be built with the same compiler used for `rustc` and placed in the "stageN-tools" directory. When you choose `Mode::ToolRustcPrivate`, `ToolBuild` implementation takes care of this automatically. If you need to use the builder’s compiler for something specific, you can get it from `ToolBuildResult`, which is
   returned by the tool's [`Step`].
 
 Regardless of the tool type you must return `ToolBuildResult` from the tool’s [`Step`] implementation and use `ToolBuild` inside it.
diff --git a/src/tools/compiletest/src/util.rs b/src/tools/compiletest/src/util.rs
index 1f16a672a98e7..558e9a58697e7 100644
--- a/src/tools/compiletest/src/util.rs
+++ b/src/tools/compiletest/src/util.rs
@@ -45,7 +45,7 @@ impl Utf8PathBufExt for Utf8PathBuf {
 
 /// The name of the environment variable that holds dynamic library locations.
 pub fn dylib_env_var() -> &'static str {
-    if cfg!(windows) {
+    if cfg!(any(windows, target_os = "cygwin")) {
         "PATH"
     } else if cfg!(target_vendor = "apple") {
         "DYLD_LIBRARY_PATH"
diff --git a/src/tools/miri/.github/workflows/ci.yml b/src/tools/miri/.github/workflows/ci.yml
index 7d79c384f85a3..c0fed96d4e6e3 100644
--- a/src/tools/miri/.github/workflows/ci.yml
+++ b/src/tools/miri/.github/workflows/ci.yml
@@ -41,6 +41,11 @@ jobs:
             multiarch: s390x
             gcc_cross: s390x-linux-gnu
             qemu: true
+          - host_target: powerpc64le-unknown-linux-gnu
+            os: ubuntu-latest
+            multiarch: ppc64el
+            gcc_cross: powerpc64le-linux-gnu
+            qemu: true
           - host_target: aarch64-apple-darwin
             os: macos-latest
           - host_target: i686-pc-windows-msvc
diff --git a/src/tools/miri/CONTRIBUTING.md b/src/tools/miri/CONTRIBUTING.md
index 7d78fdddbad3d..073ad267476c4 100644
--- a/src/tools/miri/CONTRIBUTING.md
+++ b/src/tools/miri/CONTRIBUTING.md
@@ -255,6 +255,12 @@ when installing the Miri toolchain. Alternatively, set the `RUSTUP_TOOLCHAIN` en
 
 [`etc/rust_analyzer_helix.toml`]: https://github.com/rust-lang/miri/blob/master/etc/rust_analyzer_helix.toml
 
+### Zed
+
+Copy [`etc/rust_analyzer_zed.json`] to `.zed/settings.json` in the project root directory.
+
+[`etc/rust_analyzer_zed.json`]: https://github.com/rust-lang/miri/blob/master/etc/rust_analyzer_zed.json
+
 ### Advanced configuration
 
 If you are building Miri with a locally built rustc, set
diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock
index b46f0f8342093..4df17c83c7e48 100644
--- a/src/tools/miri/Cargo.lock
+++ b/src/tools/miri/Cargo.lock
@@ -1569,9 +1569,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-subscriber"
-version = "0.3.19"
+version = "0.3.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
+checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
 dependencies = [
  "sharded-slab",
  "thread_local",
diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml
index 99111092d39e7..924dfed2bcab4 100644
--- a/src/tools/miri/Cargo.toml
+++ b/src/tools/miri/Cargo.toml
@@ -39,7 +39,7 @@ features = ['unprefixed_malloc_on_supported_platforms']
 [target.'cfg(unix)'.dependencies]
 libc = "0.2"
 # native-lib dependencies
-libffi = { version = "4.0.0", optional = true }
+libffi = { version = "4.1.1", optional = true }
 libloading = { version = "0.8", optional = true }
 serde = { version = "1.0.219", features = ["derive"], optional = true }
 
diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md
index 7ccd27d7b83eb..517aa343a6d4d 100644
--- a/src/tools/miri/README.md
+++ b/src/tools/miri/README.md
@@ -319,8 +319,14 @@ environment variable. We first document the most relevant and most commonly used
   Can be used without a value; in that case the range defaults to `0..64`.
 * `-Zmiri-many-seeds-keep-going` tells Miri to really try all the seeds in the given range, even if
   a failing seed has already been found. This is useful to determine which fraction of seeds fails.
+* `-Zmiri-max-extra-rounding-error` tells Miri to always apply the maximum error to float operations
+  that do not have a guaranteed precision. The sign of the error is still non-deterministic.
 * `-Zmiri-no-extra-rounding-error` stops Miri from adding extra rounding errors to float operations
   that do not have a guaranteed precision.
+* `-Zmiri-no-short-fd-operations` stops Miri from artificially forcing `read`/`write` operations
+  to only process a part of their buffer. Note that whenever Miri uses host operations to
+  implement `read`/`write` (e.g. for file-backed file descriptors), the host system can still
+  introduce short reads/writes.
 * `-Zmiri-num-cpus` states the number of available CPUs to be reported by miri. By default, the
   number of available CPUs is `1`. Note that this flag does not affect how miri handles threads in
   any way.
diff --git a/src/tools/miri/cargo-miri/Cargo.lock b/src/tools/miri/cargo-miri/Cargo.lock
index b3f5dafab6437..ea9c04a3cb515 100644
--- a/src/tools/miri/cargo-miri/Cargo.lock
+++ b/src/tools/miri/cargo-miri/Cargo.lock
@@ -429,9 +429,9 @@ dependencies = [
 
 [[package]]
 name = "rustc-build-sysroot"
-version = "0.5.9"
+version = "0.5.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdb13874a0e55baf4ac3d49d38206aecb31a55b75d6c4d04fd850b53942c8cc8"
+checksum = "dd41ead66a69880951b2f7df3139db401d44451b4da123344d27eaa791b89c95"
 dependencies = [
  "anyhow",
  "rustc_version",
diff --git a/src/tools/miri/cargo-miri/Cargo.toml b/src/tools/miri/cargo-miri/Cargo.toml
index 77cb1df8e747f..64b56ea114ecf 100644
--- a/src/tools/miri/cargo-miri/Cargo.toml
+++ b/src/tools/miri/cargo-miri/Cargo.toml
@@ -18,7 +18,7 @@ directories = "6"
 rustc_version = "0.4"
 serde_json = "1.0.40"
 cargo_metadata = "0.21"
-rustc-build-sysroot = "0.5.8"
+rustc-build-sysroot = "0.5.10"
 
 # Enable some feature flags that dev-dependencies need but dependencies
 # do not.  This makes `./miri install` after `./miri build` faster.
diff --git a/src/tools/miri/cargo-miri/src/phases.rs b/src/tools/miri/cargo-miri/src/phases.rs
index efb9053f69a5a..0716f4add9d0d 100644
--- a/src/tools/miri/cargo-miri/src/phases.rs
+++ b/src/tools/miri/cargo-miri/src/phases.rs
@@ -65,16 +65,6 @@ fn forward_patched_extern_arg(args: &mut impl Iterator<Item = String>, cmd: &mut
 }
 
 pub fn phase_cargo_miri(mut args: impl Iterator<Item = String>) {
-    // Check for version and help flags even when invoked as `cargo-miri`.
-    if has_arg_flag("--help") || has_arg_flag("-h") {
-        show_help();
-        return;
-    }
-    if has_arg_flag("--version") || has_arg_flag("-V") {
-        show_version();
-        return;
-    }
-
     // Require a subcommand before any flags.
     // We cannot know which of those flags take arguments and which do not,
     // so we cannot detect subcommands later.
@@ -85,11 +75,36 @@ pub fn phase_cargo_miri(mut args: impl Iterator<Item = String>) {
         "setup" => MiriCommand::Setup,
         "test" | "t" | "run" | "r" | "nextest" => MiriCommand::Forward(subcommand),
         "clean" => MiriCommand::Clean,
-        _ =>
+        _ => {
+            // Check for version and help flags.
+            if has_arg_flag("--help") || has_arg_flag("-h") {
+                show_help();
+                return;
+            }
+            if has_arg_flag("--version") || has_arg_flag("-V") {
+                show_version();
+                return;
+            }
             show_error!(
                 "`cargo miri` supports the following subcommands: `run`, `test`, `nextest`, `clean`, and `setup`."
-            ),
+            )
+        }
     };
+    if has_arg_flag("--help") || has_arg_flag("-h") {
+        match subcommand {
+            MiriCommand::Forward(verb) => {
+                println!("`cargo miri {verb}` supports the same flags as `cargo {verb}`:\n");
+                let mut cmd = cargo();
+                cmd.arg(verb);
+                cmd.arg("--help");
+                exec(cmd);
+            }
+            _ => {
+                show_help();
+                return;
+            }
+        }
+    }
     let verbose = num_arg_flag("-v") + num_arg_flag("--verbose");
     let quiet = has_arg_flag("-q") || has_arg_flag("--quiet");
 
diff --git a/src/tools/miri/doc/img/perfetto_aggregate_statistics.png b/src/tools/miri/doc/img/perfetto_aggregate_statistics.png
new file mode 100644
index 0000000000000..d4fd3826f47e3
Binary files /dev/null and b/src/tools/miri/doc/img/perfetto_aggregate_statistics.png differ
diff --git a/src/tools/miri/doc/img/perfetto_aggregate_statistics_sql.png b/src/tools/miri/doc/img/perfetto_aggregate_statistics_sql.png
new file mode 100644
index 0000000000000..bda92d3885a8f
Binary files /dev/null and b/src/tools/miri/doc/img/perfetto_aggregate_statistics_sql.png differ
diff --git a/src/tools/miri/doc/img/perfetto_span.png b/src/tools/miri/doc/img/perfetto_span.png
new file mode 100644
index 0000000000000..1a7184f22ae60
Binary files /dev/null and b/src/tools/miri/doc/img/perfetto_span.png differ
diff --git a/src/tools/miri/doc/img/perfetto_subname_statistics.png b/src/tools/miri/doc/img/perfetto_subname_statistics.png
new file mode 100644
index 0000000000000..8c86b07e9253f
Binary files /dev/null and b/src/tools/miri/doc/img/perfetto_subname_statistics.png differ
diff --git a/src/tools/miri/doc/img/perfetto_timeline.png b/src/tools/miri/doc/img/perfetto_timeline.png
new file mode 100644
index 0000000000000..49f8a1fac1d2e
Binary files /dev/null and b/src/tools/miri/doc/img/perfetto_timeline.png differ
diff --git a/src/tools/miri/doc/img/perfetto_visualize_argument_values.png b/src/tools/miri/doc/img/perfetto_visualize_argument_values.png
new file mode 100644
index 0000000000000..1dcbacaf9cb25
Binary files /dev/null and b/src/tools/miri/doc/img/perfetto_visualize_argument_values.png differ
diff --git a/src/tools/miri/doc/img/perfetto_visualize_argument_values_misbehaving.png b/src/tools/miri/doc/img/perfetto_visualize_argument_values_misbehaving.png
new file mode 100644
index 0000000000000..beeba8a4a3ac9
Binary files /dev/null and b/src/tools/miri/doc/img/perfetto_visualize_argument_values_misbehaving.png differ
diff --git a/src/tools/miri/doc/img/perfetto_visualize_argument_values_sql.png b/src/tools/miri/doc/img/perfetto_visualize_argument_values_sql.png
new file mode 100644
index 0000000000000..c7b163b0a570b
Binary files /dev/null and b/src/tools/miri/doc/img/perfetto_visualize_argument_values_sql.png differ
diff --git a/src/tools/miri/doc/tracing.md b/src/tools/miri/doc/tracing.md
new file mode 100644
index 0000000000000..d7114af947dc1
--- /dev/null
+++ b/src/tools/miri/doc/tracing.md
@@ -0,0 +1,292 @@
+# Documentation for the tracing infrastructure in Miri
+
+Miri can be traced to understand how much time is spent in its various components (e.g. borrow tracker, data race checker, etc.). When tracing is enabled, running Miri will create a `.json` trace file that can be opened and analyzed in [Perfetto](https://ui.perfetto.dev/). For any questions regarding this documentation you may contact [Stypox](https://rust-lang.zulipchat.com/#narrow/dm/627563-Stypox) on Zulip.
+
+## Obtaining a trace file
+
+### From the Miri codebase
+
+All of the tracing functionality in Miri is gated by the `"tracing"` feature flag to ensure it does not create any overhead when unneeded. To compile Miri with this feature enabled, you can pass `--features=tracing` to `./miri`. Then, to make running Miri actually produce a trace file, you also need to set the `MIRI_TRACING` environment variable. For example:
+
+```sh
+MIRI_TRACING=1 ./miri run --features=tracing ./tests/pass/hello.rs
+```
+
+### From the rustc codebase
+
+If you are building Miri from within the rustc tree, you need to enable the `"tracing"` feature by adding this line to `bootstrap.toml`:
+
+```toml
+build.tool.miri.features = ["tracing"]
+```
+
+And then you could run the following:
+
+```sh
+MIRI_TRACING=1 ./x.py run miri --stage 1 --args ./src/tools/miri/tests/pass/hello.rs
+```
+
+### The trace file
+
+After running Miri with tracing enabled you will get a `.json` trace file that contains a list of all events and spans that occurred throughout the execution. The file follows [this format](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview).
+
+## Analyzing a trace file
+
+To analyze traces you can use [Perfetto UI](https://ui.perfetto.dev/), a trace analyzer made by Google that was originally a part of the Chrome browser. Just open Perfetto and drag and drop the `.json` file there. Official documentation for the controls in the UI can be found [here](https://perfetto.dev/docs/visualization/perfetto-ui).
+
+### The timeline
+
+You will see the boxes "Global Legacy Events" and "Process 1" on the left of the workspace: after clicking on either of them their timeline will expand and you will be able to zoom in and look at individual spans (and events).
+
+- "Process 1" contains tracing spans for the various components of Miri, all in a single timeline line (e.g. borrow tracker, data race checker, etc.)
+- "Global Legacy Events" contains auxiliary spans on two separate lines that allow understanding what code is being executed at any point in time:
+    - "frame": what is the current stack frame in the interpreted program
+    - "step": what statement/terminator in the MIR of the interpreted program is being executed
+
+Spans are represented as colored boxes in the timeline, while instantaneous events are represented by tiny arrows. (Events exist because rustc and Miri also use the `tracing` crate for debug logging, and those logs turn into events in the trace.)
+
+![](./img/perfetto_timeline.png)
+
+### Span/event data
+
+You can click on a span or an event to get more information about it, including some arguments that were passed when the span/event was entered/fired. In the following screenshot you can see the details of a "layouting" span that was generated by the following line in Miri's code:
+
+```rust
+let _trace = enter_trace_span!(M, layouting::fn_abi_of_instance, ?instance, ?extra_args);
+```
+
+![](./img/perfetto_span.png)
+
+### SQL tables
+
+Perfetto supports querying the span/event database using SQL queries (see the [docs](https://perfetto.dev/docs/analysis/perfetto-sql-syntax)). Just type `:` in the search bar at the top to enter SQL mode, and then you will be able to enter SQL queries there. The relevant SQL tables are:
+- `slices`: contains all spans and events; events can be distinguished from spans since their `dur` is 0. Relevant columns are:
+    - `id`: a unique primary-key ID for the span (assigned by Perfetto, not present in the trace file)
+    - `ts` and `dur`: the beginning and duration of the span, in nanoseconds
+    - `name`: the name of the span
+    - `parent_id`: the parent span ID, or null if there is no parent (assigned by Perfetto based on the timing at which spans occur, i.e. two nested spans must be one the child of the other)
+    - `arg_set_id`: a foreign key into the table of arguments (1-to-N)
+- `args`: contains all of the arguments of the various events/spans. Relevant columns are:
+    - `arg_set_id`: the key used to join the slices and args tables
+    - `key`: the name of the argument prepended with "args."
+    - `display_value`: the value of the argument
+
+Some useful queries are provided in the following sections.
+
+### Enhancing the timeline
+
+On the "Process 1" timeline line there are some spans with the same name, that are actually generated from different places in Miri's code. In those cases the span name indicates the component that was invoked (e.g. the data race checker), but not the specific function that was run. To inspect the specific function, we store a "subname" in an argument with the same name as the span, which unfortunately can be seen only after clicking on the span.
+
+To make it quicker to look at subnames, you can add a new timeline line that specifically shows the subnames for spans with a specific name. To do so:
+1. select any span with the name you care about (call this name `$NAME`)
+2. click on the dropdown highlighted in blue next on the argument with name `$NAME` (or `args.$NAME`)
+3. click on "Visualize argument values"
+4. a new timeline line will appear with only spans originally named `$NAME`, but now with the subname displayed instead
+
+The following screenshot shows the 4 steps for spans named "data_race":
+
+![](./img/perfetto_visualize_argument_values.png)
+
+### Visualizing which "frame" or "step" is being executed
+
+Unfortunately the instructions in [Enhancing the timeline](#enhancing-the-timeline) only work well with spans under "Process 1", but misbehave with spans under "Global Legacy Events" (see the screenshot below). This might be a bug in Perfetto, but nevertheless a workaround is available:
+
+1. click on the search bar at the top and write `:` to enter SQL mode
+2. copy-paste the following SQL, replace "SPAN_NAME" at the end with either "frame" or "step" (i.e. one of the two span names under "Global Legacy Events"), and press Enter to execute it:
+    ```sql
+    select slices.id, ts, dur, track_id, category, args.string_value as name, depth, stack_id, parent_stack_id, parent_id, slices.arg_set_id, thread_ts, thread_instruction_count, thread_instruction_delta, cat, slice_id
+    from slices inner join args using (arg_set_id)
+    where args.key = "args." || name and name = "SPAN_NAME"
+    ```
+3. at the top-right of the box at the bottom, click on "Show debug track"
+4. press on "Show" in the popup that just appeared
+5. a new debug track will appear with the names of steps or frames
+
+What the SQL does is to select only spans with the name "SPAN_NAME" and keep all of the span fields untouched, except for the name which is replaced with the subname. As explained in [Enhancing the timeline](#enhancing-the-timeline), remember that the subname is stored in an argument with the same name as the span.
+
+![](./img/perfetto_visualize_argument_values_sql.png)
+
+<img src="./img/perfetto_visualize_argument_values_misbehaving.png" width="300px">
+
+### Compute aggregate statistics
+
+The simplest way to get aggregate statistics about a time range is to:
+
+1. select a time range by drag-clicking along a trace line
+2. click on the "Current Selection" tab at the bottom if it's not already open
+3. see various tables/visualizations of how much time is spent in each span by clicking on "Slices", "Pivot Table" or "Slice Flamegraph"
+
+Note that the numbers shown in the "Slices" and "Pivot Table" tabs also include nested spans, so they cannot be used to compute statistics such as "X% of time is spent in spans named Y" because two spans named Y might be nested and their duration would be counted twice. For such statistics use the method in [Compute aggregate statistics (enhanced)](#compute-aggregate-statistics-enhanced).
+
+![](./img/perfetto_aggregate_statistics.png)
+
+### Compute aggregate statistics (enhanced)
+
+The following (long but not complicated) query can be used to find out how much time is spent in spans (grouped by their name). Only spans without a parent are considered towards the computations (see `where parent_id is null`): so for example if `validate_operand` in turn calls `layouting` (which generates a nested/child span), only the `validate_operand` statistics are increased. This query also excludes auxiliary spans (see `name != "frame" and name != "step"`).
+
+Note that this query does not allow selecting a time range, but that can be done by adding a condition, e.g. `ts + dur > MIN_T and ts < MAX_T` would match only spans that intersect the range `(MIN_T, MAX_T)`. Remember that the time unit is nanoseconds.
+
+```sql
+select "TOTAL PROGRAM DURATION" as name, count(*), max(ts + dur) as "sum(dur)", 100.0 as "%", null as "min(dur)", null as "max(dur)", null as "avg(dur)", null as "stddev(dur)"
+from slices
+
+union select "TOTAL OVER ALL SPANS (excluding events)" as name, count(*), sum(dur), cast(cast(sum(dur) as float) / (select max(ts + dur) from slices) * 1000 as int) / 10.0 as "%", min(dur), max(dur), cast(avg(dur) as int) as "avg(dur)", cast(sqrt(avg(dur*dur)-avg(dur)*avg(dur)) as int) as "stddev(dur)"
+from slices
+where parent_id is null and name != "frame" and name != "step" and dur > 0
+
+union select name, count(*), sum(dur), cast(cast(sum(dur) as float) / (select max(ts + dur) from slices) * 1000 as int) / 10.0 as "%", min(dur), max(dur), cast(avg(dur) as int) as "avg(dur)", cast(sqrt(avg(dur*dur)-avg(dur)*avg(dur)) as int) as "stddev(dur)"
+from slices
+where parent_id is null and name != "frame" and name != "step"
+group by name
+order by sum(dur) desc, count(*) desc
+```
+
+This is the kind of table you would get out:
+
+![](./img/perfetto_aggregate_statistics_sql.png)
+
+### Statistics about subnames of a span
+
+Use the following SQL to see statistics about the subnames of spans with the same name (replace "SPAN_NAME" with the name of the span you want to see subname statistics of):
+
+```sql
+select args.string_value as name, count(*), sum(dur), min(dur), max(dur), cast(avg(dur) as int) as "avg(dur)", cast(sqrt(avg(dur*dur)-avg(dur)*avg(dur)) as int) as "stddev(dur)"
+from slices inner join args using (arg_set_id)
+where args.key = "args." || name and name = "SPAN_NAME"
+group by args.string_value
+order by count(*) desc
+```
+
+For example, this is the table of how much time is spent in each borrow tracker function: 
+
+![](./img/perfetto_subname_statistics.png)
+
+### Finding long periods of time without any tracing
+
+The following SQL finds the longest periods of time where time is being spent, with the ability to click on IDs in the table of results to quickly reach the corresponding place. This can be useful to spot things that use up a significant amount of time but that are not yet covered by tracing calls.
+
+```sql
+with ordered as (
+    select s1.*, row_number() over (order by s1.ts) as rn
+    from slices as s1
+    where s1.parent_id is null and s1.dur > 0 and s1.name != "frame" and s1.name != "step"
+)
+select a.ts+a.dur as ts, b.ts-a.ts-a.dur as dur, a.id, a.track_id, a.category, a.depth, a.stack_id, a.parent_stack_id, a.parent_id, a.arg_set_id, a.thread_ts, a.thread_instruction_count, a.thread_instruction_delta, a.cat, a.slice_id, "empty" as name
+from ordered as a inner join ordered as b on a.rn=b.rn-1
+order by b.ts-a.ts-a.dur desc
+```
+
+### Saving Perfetto's state as a preset
+
+Unfortunately Perfetto does not seem to support saving the UI state as a preset that can be used to repeat the same analysis on multiple traces. You have to click through the various menus or run the various SQL queries every time to setup the UI as you want.
+
+## Adding new tracing calls to the code
+
+### The "tracing" feature
+
+Miri is highly interconnected with `rustc_const_eval`, and therefore collecting proper trace data about Miri also involves adding some tracing calls within `rustc_const_eval`'s codebase. As explained in [Obtaining a trace file](#obtaining-a-trace-file), tracing calls are disabled (and optimized out) when Miri's "tracing" feature is not enabled. However, while it is possible to check for the feature from Miri's codebase, it's not possible to do so from `rustc_const_eval` (since it's a separate crate, and it's even in a precompiled `.rlib` in case of out-of-tree builds).
+
+The solution to make it possible to check whether tracing is enabled at compile time even in `rustc_const_eval` was to add a function with this signature to the `Machine` trait:
+```rust
+fn enter_trace_span(span: impl FnOnce() -> tracing::Span) -> impl EnteredTraceSpan
+```
+
+where `EnteredTraceSpan` is just a marker trait implemented by `()` and `tracing::span::EnteredSpan`. This function returns `()` by default (without calling the `span` closure), except in `MiriMachine` where if tracing is enabled it will return `span().entered()`.
+
+The code in `rustc_const_eval` calls this function when it wants to do tracing, and the compiler will (hopefully) optimize out tracing calls when tracing is disabled.
+
+### The `enter_trace_span!()` macro
+
+To add tracing to a section of code in Miri or in `rustc_const_eval`, you can use the `enter_trace_span!()` macro, which takes care of the details explained in [The "tracing" feature](#the-tracing-feature).
+
+The `enter_trace_span!()` macro accepts the same syntax as `tracing::span!()` ([documentation](https://docs.rs/tracing/latest/tracing/#using-the-macros)) except for a few customizations, and returns an already entered trace span. The returned value is a drop guard that will exit the span when dropped, so **make sure to give it a proper scope** by storing it in a variable like this:
+
+```rust
+let _trace = enter_trace_span!("My span");
+```
+
+When calling this macro from `rustc_const_eval` you need to pass a type implementing the `Machine` trait as the first argument (since it will be used to call `Machine::enter_trace_span()`). This is usually available in various parts of `rustc_const_eval` under the name `M`, since most of `rustc_const_eval`'s code is `Machine`-agnostic.
+
+```rust
+let _trace = enter_trace_span!("My span");    // from Miri
+let _trace = enter_trace_span!(M, "My span"); // from rustc_const_eval
+```
+
+You can make sense of the syntaxes explained below also by looking at this Perfetto screenshot from [Span/event data](#spanevent-data).
+
+![](./img/perfetto_span.png)
+
+### Syntax accepted by `tracing::span!()`
+
+The full documentation for the `tracing::span!()` syntax can be found [here](https://docs.rs/tracing/latest/tracing/#using-the-macros) under "Using the Macros". A few possibly confusing syntaxes are listed here:
+```rust
+// logs a span named "hello" with a field named "arg" of value 42 (works only because
+// 42 implements the tracing::Value trait, otherwise use one of the options below)
+let _trace = enter_trace_span!(M, "hello", arg = 42);
+// logs a field called "my_display_var" using the Display implementation
+let _trace = enter_trace_span!(M, "hello", %my_display_var);
+// logs a field called "my_debug_var" using the Debug implementation
+let _trace = enter_trace_span!(M, "hello", ?my_debug_var);
+```
+
+### `NAME::SUBNAME` syntax
+
+In addition to the syntax accepted by `tracing::span!()`, the `enter_trace_span!()` macro optionally allows passing the span name (i.e. the first macro argument) in the form `NAME::SUBNAME` (without quotes) to indicate that the span has name "NAME" (usually the name of the component) and has an additional more specific name "SUBNAME" (usually the function name). The latter is passed to the tracing crate as a span field with the name "NAME". This allows not being distracted by subnames when looking at the trace in Perfetto, but when deeper introspection is needed within a component, it's still possible to view the subnames directly with a few steps (see [Enhancing the timeline](#enhancing-the-timeline)).
+```rust
+// for example, the first will expand to the second
+let _trace = enter_trace_span!(M, borrow_tracker::on_stack_pop);
+let _trace = enter_trace_span!(M, "borrow_tracker", borrow_tracker = "on_stack_pop");
+```
+
+### `tracing_separate_thread` parameter
+
+Miri saves traces using the the `tracing_chrome` `tracing::Layer` so that they can be visualized in Perfetto. To instruct `tracing_chrome` to put some spans on a separate trace thread/line than other spans when viewed in Perfetto, you can pass `tracing_separate_thread = tracing::field::Empty` to the tracing macros. This is useful to separate out spans which just indicate the current step or program frame being processed by the interpreter. As explained in [The timeline](#the-timeline), those spans end up under the "Global Legacy Events" track. You should use a value of `tracing::field::Empty` so that other tracing layers (e.g. the logger) will ignore the `tracing_separate_thread` field. For example:
+```rust
+let _trace = enter_trace_span!(M, step::eval_statement, tracing_separate_thread = tracing::field::Empty);
+```
+
+### Executing something else when tracing is disabled
+
+The `EnteredTraceSpan` trait contains a `or_if_tracing_disabled()` function that you can use to e.g. log a line as an alternative to the tracing span for when tracing is disabled. For example:
+```rust
+let _trace = enter_trace_span!(M, step::eval_statement)
+    .or_if_tracing_disabled(|| tracing::info!("eval_statement"));
+```
+
+## Implementation details
+
+Here we explain how tracing is implemented internally.
+
+The events and spans generated throughout the codebase are collected by [the `tracing` crate](https://crates.io/crates/tracing), which then dispatches them to the code that writes to the trace file, but also to the logger if logging is enabled. 
+
+### Choice of tracing library
+
+The crate that was chosen for collecting traces is [tracing](https://crates.io/crates/tracing), since:
+- it is very well maintained
+- it supports various different trace formats through plug-and-play `Layer`s (in Miri we are using `tracing_chrome` to export traces for perfetto, see [The `tracing_chrome` layer](#the-tracing_chrome-layer))
+- spans and events are collected with not just their name, but also file, line, module, and any number of custom arguments
+- it was already used in Miri and rustc as a logging framework 
+
+One major drawback of the tracing crate is, however, its big overhead. Entering and exiting a span takes on the order of 100ns, and many of Miri's spans are shorter than that, so their measurements are completely off and the program execution increases significantly. E.g. at the point of writing this documentation, enabling tracing makes Miri 5x slower. Note that this used to be even worse, see [Time measurements](#time-measurements).
+
+### The `tracing_chrome` layer
+
+Miri uses [tracing-chrome](https://github.com/thoren-d/tracing-chrome) as the `Layer` that collects spans and events from the tracing crate and saves them to a file that can be opened in Perfetto. Although the crate [is published](https://crates.io/crates/tracing-chrome) on crates.io, it was not possible to depend on it from Miri, because it would bring in a separate compilation of the `tracing` crate. This is because Miri does not directly depend on `tracing`, and instead uses rustc's version through rustc-private, and apparently cargo can't realize that the same library is being built again when rustc-private is involved.
+
+So the solution was to copy-paste [the only file](https://github.com/thoren-d/tracing-chrome/blob/develop/src/lib.rs) in tracing-chrome into Miri. Nevertheless, this gave the possibility to make some changes to tracing-chrome, which you can read about in documentation at the top of [the file](https://github.com/rust-lang/miri/blob/master/src/bin/log/tracing_chrome.rs) that was copied to Miri.
+
+### Time measurements
+
+tracing-chrome originally used `std::time::Instant` to measure time, however on some x86/x86_64 Linux systems it might be unbearably slow since the underlying system call (`clock_gettime`) would take ≈1.3µs. Read more [here](https://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/) about how the Linux kernel chooses the clock source.
+
+Therefore, on x86/x86_64 Linux systems with a CPU that has an invariant TSC counter, we read from that instead to measure time, which takes only ≈13ns. There are unfortunately a lot of caveats to this approach though, as explained [in the code](https://github.com/rust-lang/miri/blob/master/src/bin/log/tracing_chrome_instant.rs) and [in the PR](https://github.com/rust-lang/miri/pull/4524). The most impactful one is that: every thread spawned in Miri that wants to trace something (which requires measuring time) needs to pin itself to a single CPU core (using `sched_setaffinity`).
+
+## Other useful stuff
+
+### Making a flamegraph
+
+After compiling Miri, you can run the following command to make a flamegraph using Linux' `perf`. It can be useful to spot functions that use up a significant amount of time but that are not yet covered by tracing calls.
+
+```sh
+perf record  --call-graph dwarf -F 999 ./miri/target/debug/miri --edition 2021 --sysroot ~/.cache/miri ./tests/pass/hashmap.rs && perf script | inferno-collapse-perf | inferno-flamegraph > flamegraph.svg
+```
diff --git a/src/tools/miri/etc/rust_analyzer_zed.json b/src/tools/miri/etc/rust_analyzer_zed.json
new file mode 100644
index 0000000000000..839914c8b68ed
--- /dev/null
+++ b/src/tools/miri/etc/rust_analyzer_zed.json
@@ -0,0 +1,41 @@
+{
+    "lsp": {
+        "rust-analyzer": {
+            "initialization_options": {
+                "rustc": {
+                    "source": "discover"
+                },
+                "linkedProjects": [
+                    "./Cargo.toml",
+                    "./cargo-miri/Cargo.toml",
+                    "./genmc-sys/Cargo.toml",
+                    "./miri-script/Cargo.toml"
+                ],
+                "check": {
+                    "invocationStrategy": "once",
+                    "overrideCommand": [
+                        "./miri",
+                        "clippy", // make this `check` when working with a locally built rustc
+                        "--message-format=json"
+                    ]
+                },
+                "cargo": {
+                    "extraEnv": {
+                        "MIRI_AUTO_OPS": "no",
+                        "MIRI_IN_RA": "1"
+                    },
+                    // Contrary to what the name suggests, this also affects proc macros.
+                    "buildScripts": {
+                        "invocationStrategy": "once",
+                        "overrideCommand": [
+                            "./miri",
+                            "check",
+                            "--no-default-features",
+                            "--message-format=json"
+                        ]
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 59adc572eaa4b..d44488399f8b9 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-f605b57042ffeb320d7ae44490113a827139b766
+51ff895062ba60a7cba53f57af928c3fb7b0f2f4
diff --git a/src/tools/miri/src/bin/log/mod.rs b/src/tools/miri/src/bin/log/mod.rs
index f3b2fdb5348e0..22f74dd46b540 100644
--- a/src/tools/miri/src/bin/log/mod.rs
+++ b/src/tools/miri/src/bin/log/mod.rs
@@ -1,2 +1,3 @@
 pub mod setup;
 mod tracing_chrome;
+mod tracing_chrome_instant;
diff --git a/src/tools/miri/src/bin/log/tracing_chrome.rs b/src/tools/miri/src/bin/log/tracing_chrome.rs
index 3379816550cfe..310887a13a5ce 100644
--- a/src/tools/miri/src/bin/log/tracing_chrome.rs
+++ b/src/tools/miri/src/bin/log/tracing_chrome.rs
@@ -7,12 +7,15 @@
 //! (`git log -- path/to/tracing_chrome.rs`), but in summary:
 //! - the file attributes were changed and `extern crate` was added at the top
 //! - if a tracing span has a field called "tracing_separate_thread", it will be given a separate
-//! span ID even in [TraceStyle::Threaded] mode, to make it appear on a separate line when viewing
-//! the trace in <https://ui.perfetto.dev>. This is the syntax to trigger this behavior:
+//!   span ID even in [TraceStyle::Threaded] mode, to make it appear on a separate line when viewing
+//!   the trace in <https://ui.perfetto.dev>. This is the syntax to trigger this behavior:
 //!   ```rust
 //!   tracing::info_span!("my_span", tracing_separate_thread = tracing::field::Empty, /* ... */)
 //!   ```
-//! - use i64 instead of u64 for the "id" in [ChromeLayer::get_root_id] to be compatible with Perfetto
+//! - use i64 instead of u64 for the "id" in [ChromeLayer::get_root_id] to be compatible with
+//!   Perfetto
+//! - use [ChromeLayer::with_elapsed_micros_subtracting_tracing] to make time measurements faster on
+//!   Linux x86/x86_64 and to subtract time spent tracing from the timestamps in the trace file
 //!
 //! Depending on the tracing-chrome crate from crates.io is unfortunately not possible, since it
 //! depends on `tracing_core` which conflicts with rustc_private's `tracing_core` (meaning it would
@@ -50,9 +53,22 @@ use std::{
     thread::JoinHandle,
 };
 
+use crate::log::tracing_chrome_instant::TracingChromeInstant;
+
+/// Contains thread-local data for threads that send tracing spans or events.
+struct ThreadData {
+    /// A unique ID for this thread, will populate "tid" field in the output trace file.
+    tid: usize,
+    /// A clone of [ChromeLayer::out] to avoid the expensive operation of accessing a mutex
+    /// every time. This is used to send [Message]s to the thread that saves trace data to file.
+    out: Sender<Message>,
+    /// The instant in time this thread was started. All events happening on this thread will be
+    /// saved to the trace file with a timestamp (the "ts" field) measured relative to this instant.
+    start: TracingChromeInstant,
+}
+
 thread_local! {
-    static OUT: RefCell<Option<Sender<Message>>> = const { RefCell::new(None) };
-    static TID: RefCell<Option<usize>> = const { RefCell::new(None) };
+    static THREAD_DATA: RefCell<Option<ThreadData>> = const { RefCell::new(None) };
 }
 
 type NameFn<S> = Box<dyn Fn(&EventOrSpan<'_, '_, S>) -> String + Send + Sync>;
@@ -64,7 +80,6 @@ where
     S: Subscriber + for<'span> LookupSpan<'span> + Send + Sync,
 {
     out: Arc<Mutex<Sender<Message>>>,
-    start: std::time::Instant,
     max_tid: AtomicUsize,
     include_args: bool,
     include_locations: bool,
@@ -323,7 +338,6 @@ where
 {
     fn new(mut builder: ChromeLayerBuilder<S>) -> (ChromeLayer<S>, FlushGuard) {
         let (tx, rx) = mpsc::channel();
-        OUT.with(|val| val.replace(Some(tx.clone())));
 
         let out_writer = builder
             .out_writer
@@ -443,7 +457,6 @@ where
         };
         let layer = ChromeLayer {
             out: Arc::new(Mutex::new(tx)),
-            start: std::time::Instant::now(),
             max_tid: AtomicUsize::new(0),
             name_fn: builder.name_fn.take(),
             cat_fn: builder.cat_fn.take(),
@@ -456,22 +469,7 @@ where
         (layer, guard)
     }
 
-    fn get_tid(&self) -> (usize, bool) {
-        TID.with(|value| {
-            let tid = *value.borrow();
-            match tid {
-                Some(tid) => (tid, false),
-                None => {
-                    let tid = self.max_tid.fetch_add(1, Ordering::SeqCst);
-                    value.replace(Some(tid));
-                    (tid, true)
-                }
-            }
-        })
-    }
-
-    fn get_callsite(&self, data: EventOrSpan<S>) -> Callsite {
-        let (tid, new_thread) = self.get_tid();
+    fn get_callsite(&self, data: EventOrSpan<S>, tid: usize) -> Callsite {
         let name = self.name_fn.as_ref().map(|name_fn| name_fn(&data));
         let target = self.cat_fn.as_ref().map(|cat_fn| cat_fn(&data));
         let meta = match data {
@@ -502,14 +500,6 @@ where
             (None, None)
         };
 
-        if new_thread {
-            let name = match std::thread::current().name() {
-                Some(name) => name.to_owned(),
-                None => tid.to_string(),
-            };
-            self.send_message(Message::NewThread(tid, name));
-        }
-
         Callsite {
             tid,
             name,
@@ -548,31 +538,55 @@ where
         }
     }
 
-    fn enter_span(&self, span: SpanRef<S>, ts: f64) {
-        let callsite = self.get_callsite(EventOrSpan::Span(&span));
+    fn enter_span(&self, span: SpanRef<S>, ts: f64, tid: usize, out: &Sender<Message>) {
+        let callsite = self.get_callsite(EventOrSpan::Span(&span), tid);
         let root_id = self.get_root_id(span);
-        self.send_message(Message::Enter(ts, callsite, root_id));
+        let _ignored = out.send(Message::Enter(ts, callsite, root_id));
     }
 
-    fn exit_span(&self, span: SpanRef<S>, ts: f64) {
-        let callsite = self.get_callsite(EventOrSpan::Span(&span));
+    fn exit_span(&self, span: SpanRef<S>, ts: f64, tid: usize, out: &Sender<Message>) {
+        let callsite = self.get_callsite(EventOrSpan::Span(&span), tid);
         let root_id = self.get_root_id(span);
-        self.send_message(Message::Exit(ts, callsite, root_id));
+        let _ignored = out.send(Message::Exit(ts, callsite, root_id));
     }
 
-    fn get_ts(&self) -> f64 {
-        self.start.elapsed().as_nanos() as f64 / 1000.0
-    }
+    /// Helper function that measures how much time is spent while executing `f` and accounts for it
+    /// in subsequent calls, with the aim to reduce biases in the data collected by `tracing_chrome`
+    /// by subtracting the time spent inside tracing functions from the timeline. This makes it so
+    /// that the time spent inside the `tracing_chrome` functions does not impact the timestamps
+    /// inside the trace file (i.e. `ts`), even if such functions are slow (e.g. because they need
+    /// to format arguments on the same thread those arguments are collected on, otherwise memory
+    /// safety would be broken).
+    ///
+    /// `f` is called with the microseconds elapsed since the current thread was started (**not**
+    /// since the program start!), with the current thread ID (i.e. `tid`), and with a [Sender] that
+    /// can be used to send a [Message] to the thread that collects [Message]s and saves them to the
+    /// trace file.
+    #[inline(always)]
+    fn with_elapsed_micros_subtracting_tracing(&self, f: impl Fn(f64, usize, &Sender<Message>)) {
+        THREAD_DATA.with(|value| {
+            let mut thread_data = value.borrow_mut();
+            let (ThreadData { tid, out, start }, new_thread) = match thread_data.as_mut() {
+                Some(thread_data) => (thread_data, false),
+                None => {
+                    let tid = self.max_tid.fetch_add(1, Ordering::SeqCst);
+                    let out = self.out.lock().unwrap().clone();
+                    let start = TracingChromeInstant::setup_for_thread_and_start(tid);
+                    *thread_data = Some(ThreadData { tid, out, start });
+                    (thread_data.as_mut().unwrap(), true)
+                }
+            };
 
-    fn send_message(&self, message: Message) {
-        OUT.with(move |val| {
-            if val.borrow().is_some() {
-                let _ignored = val.borrow().as_ref().unwrap().send(message);
-            } else {
-                let out = self.out.lock().unwrap().clone();
-                let _ignored = out.send(message);
-                val.replace(Some(out));
-            }
+            start.with_elapsed_micros_subtracting_tracing(|ts| {
+                if new_thread {
+                    let name = match std::thread::current().name() {
+                        Some(name) => name.to_owned(),
+                        None => tid.to_string(),
+                    };
+                    let _ignored = out.send(Message::NewThread(*tid, name));
+                }
+                f(ts, *tid, out);
+            });
         });
     }
 }
@@ -586,52 +600,58 @@ where
             return;
         }
 
-        let ts = self.get_ts();
-        self.enter_span(ctx.span(id).expect("Span not found."), ts);
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            self.enter_span(ctx.span(id).expect("Span not found."), ts, tid, out);
+        });
     }
 
     fn on_record(&self, id: &span::Id, values: &span::Record<'_>, ctx: Context<'_, S>) {
         if self.include_args {
-            let span = ctx.span(id).unwrap();
-            let mut exts = span.extensions_mut();
+            self.with_elapsed_micros_subtracting_tracing(|_, _, _| {
+                let span = ctx.span(id).unwrap();
+                let mut exts = span.extensions_mut();
 
-            let args = exts.get_mut::<ArgsWrapper>();
+                let args = exts.get_mut::<ArgsWrapper>();
 
-            if let Some(args) = args {
-                let args = Arc::make_mut(&mut args.args);
-                values.record(&mut JsonVisitor { object: args });
-            }
+                if let Some(args) = args {
+                    let args = Arc::make_mut(&mut args.args);
+                    values.record(&mut JsonVisitor { object: args });
+                }
+            });
         }
     }
 
     fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) {
-        let ts = self.get_ts();
-        let callsite = self.get_callsite(EventOrSpan::Event(event));
-        self.send_message(Message::Event(ts, callsite));
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            let callsite = self.get_callsite(EventOrSpan::Event(event), tid);
+            let _ignored = out.send(Message::Event(ts, callsite));
+        });
     }
 
     fn on_exit(&self, id: &span::Id, ctx: Context<'_, S>) {
         if let TraceStyle::Async = self.trace_style {
             return;
         }
-        let ts = self.get_ts();
-        self.exit_span(ctx.span(id).expect("Span not found."), ts);
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            self.exit_span(ctx.span(id).expect("Span not found."), ts, tid, out);
+        });
     }
 
     fn on_new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) {
-        if self.include_args {
-            let mut args = Object::new();
-            attrs.record(&mut JsonVisitor { object: &mut args });
-            ctx.span(id).unwrap().extensions_mut().insert(ArgsWrapper {
-                args: Arc::new(args),
-            });
-        }
-        if let TraceStyle::Threaded = self.trace_style {
-            return;
-        }
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            if self.include_args {
+                let mut args = Object::new();
+                attrs.record(&mut JsonVisitor { object: &mut args });
+                ctx.span(id).unwrap().extensions_mut().insert(ArgsWrapper {
+                    args: Arc::new(args),
+                });
+            }
+            if let TraceStyle::Threaded = self.trace_style {
+                return;
+            }
 
-        let ts = self.get_ts();
-        self.enter_span(ctx.span(id).expect("Span not found."), ts);
+            self.enter_span(ctx.span(id).expect("Span not found."), ts, tid, out);
+        });
     }
 
     fn on_close(&self, id: span::Id, ctx: Context<'_, S>) {
@@ -639,8 +659,9 @@ where
             return;
         }
 
-        let ts = self.get_ts();
-        self.exit_span(ctx.span(&id).expect("Span not found."), ts);
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            self.exit_span(ctx.span(&id).expect("Span not found."), ts, tid, out);
+        });
     }
 }
 
diff --git a/src/tools/miri/src/bin/log/tracing_chrome_instant.rs b/src/tools/miri/src/bin/log/tracing_chrome_instant.rs
new file mode 100644
index 0000000000000..f400bc20a7b5d
--- /dev/null
+++ b/src/tools/miri/src/bin/log/tracing_chrome_instant.rs
@@ -0,0 +1,183 @@
+//! Code in this class was in part inspired by
+//! <https://github.com/tikv/minstant/blob/27c9ec5ec90b5b67113a748a4defee0d2519518c/src/tsc_now.rs>.
+//! A useful resource is also
+//! <https://www.pingcap.com/blog/how-we-trace-a-kv-database-with-less-than-5-percent-performance-impact/>,
+//! although this file does not implement TSC synchronization but insteads pins threads to CPUs,
+//! since the former is not reliable (i.e. it might lead to non-monotonic time measurements).
+//! Another useful resource for future improvements might be measureme's time measurement utils:
+//! <https://github.com/rust-lang/measureme/blob/master/measureme/src/counters.rs>.
+//! Documentation about how the Linux kernel chooses a clock source can be found here:
+//! <https://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/>.
+#![cfg(feature = "tracing")]
+
+/// This alternative `TracingChromeInstant` implementation was made entirely to suit the needs of
+/// [crate::log::tracing_chrome], and shouldn't be used for anything else. It featues two functions:
+/// - [TracingChromeInstant::setup_for_thread_and_start], which sets up the current thread to do
+///   proper time tracking and returns a point in time to use as "t=0", and
+/// - [TracingChromeInstant::with_elapsed_micros_subtracting_tracing], which allows
+///   obtaining how much time elapsed since [TracingChromeInstant::setup_for_thread_and_start] was
+///   called while accounting for (and subtracting) the time spent inside tracing-related functions.
+///
+/// This measures time using [std::time::Instant], except for x86/x86_64 Linux machines, where
+/// [std::time::Instant] is too slow (~1.5us) and thus `rdtsc` is used instead (~5ns).
+pub enum TracingChromeInstant {
+    WallTime {
+        /// The time at which this instant was created, shifted forward to account
+        /// for time spent in tracing functions as explained in
+        /// [TracingChromeInstant::with_elapsed_micros_subtracting_tracing]'s comments.
+        start_instant: std::time::Instant,
+    },
+    #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+    Tsc {
+        /// The value in the TSC counter when this instant was created, shifted forward to account
+        /// for time spent in tracing functions as explained in
+        /// [TracingChromeInstant::with_elapsed_micros_subtracting_tracing]'s comments.
+        start_tsc: u64,
+        /// The period of the TSC counter in microseconds.
+        tsc_to_microseconds: f64,
+    },
+}
+
+impl TracingChromeInstant {
+    /// Can be thought of as the same as [std::time::Instant::now()], but also does some setup to
+    /// make TSC stable in case TSC is available. This is supposed to be called (at most) once per
+    /// thread since the thread setup takes a few milliseconds.
+    ///
+    /// WARNING: If TSC is available, `incremental_thread_id` is used to pick to which CPU to pin
+    /// the current thread. Thread IDs should be assigned contiguously starting from 0. Be aware
+    /// that the current thread will be restricted to one CPU for the rest of the execution!
+    pub fn setup_for_thread_and_start(incremental_thread_id: usize) -> TracingChromeInstant {
+        #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+        if *tsc::IS_TSC_AVAILABLE.get_or_init(tsc::is_tsc_available) {
+            // We need to lock this thread to a specific CPU, because CPUs' TSC timers might be out
+            // of sync.
+            tsc::set_cpu_affinity(incremental_thread_id);
+
+            // Can only use tsc_to_microseconds() and rdtsc() after having set the CPU affinity!
+            // We compute tsc_to_microseconds anew for every new thread just in case some CPU core
+            // has a different TSC frequency.
+            let tsc_to_microseconds = tsc::tsc_to_microseconds();
+            let start_tsc = tsc::rdtsc();
+            return TracingChromeInstant::Tsc { start_tsc, tsc_to_microseconds };
+        }
+
+        let _ = incremental_thread_id; // otherwise we get a warning when the TSC branch is disabled
+        TracingChromeInstant::WallTime { start_instant: std::time::Instant::now() }
+    }
+
+    /// Calls `f` with the time elapsed in microseconds since this [TracingChromeInstant] was built
+    /// by [TracingChromeInstant::setup_for_thread_and_start], while subtracting all time previously
+    /// spent executing other `f`s passed to this function. This behavior allows subtracting time
+    /// spent in functions that log tracing data (which `f` is supposed to be) from the tracing time
+    /// measurements.
+    ///
+    /// Note: microseconds are used as the time unit since that's what Chrome trace files should
+    /// contain, see the definition of the "ts" field in
+    /// <https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview>.
+    #[inline(always)]
+    pub fn with_elapsed_micros_subtracting_tracing(&mut self, f: impl Fn(f64)) {
+        match self {
+            TracingChromeInstant::WallTime { start_instant } => {
+                // Obtain the current time (before executing `f`).
+                let instant_before_f = std::time::Instant::now();
+
+                // Using the current time (`instant_before_f`) and the `start_instant` stored in
+                // `self`, calculate the elapsed time (in microseconds) since this instant was
+                // instantiated, accounting for any time that was previously spent executing `f`.
+                // The "accounting" part is not computed in this line, but is rather done by
+                // shifting forward the `start_instant` down below.
+                let ts = (instant_before_f - *start_instant).as_nanos() as f64 / 1000.0;
+
+                // Run the function (supposedly a function internal to the tracing infrastructure).
+                f(ts);
+
+                // Measure how much time was spent executing `f` and shift `start_instant` forward
+                // by that amount. This "removes" that time from the trace.
+                *start_instant += std::time::Instant::now() - instant_before_f;
+            }
+
+            #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+            TracingChromeInstant::Tsc { start_tsc, tsc_to_microseconds } => {
+                // the comments above also apply here, since it's the same logic
+                let tsc_before_f = tsc::rdtsc();
+                let ts = ((tsc_before_f - *start_tsc) as f64) * (*tsc_to_microseconds);
+                f(ts);
+                *start_tsc += tsc::rdtsc() - tsc_before_f;
+            }
+        }
+    }
+}
+
+#[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+mod tsc {
+
+    pub static IS_TSC_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
+
+    /// Reads the timestamp-counter register. Will give monotonic answers only when called from the
+    /// same thread, because the TSC of different CPUs might be out of sync.
+    #[inline(always)]
+    pub(super) fn rdtsc() -> u64 {
+        #[cfg(target_arch = "x86")]
+        use core::arch::x86::_rdtsc;
+        #[cfg(target_arch = "x86_64")]
+        use core::arch::x86_64::_rdtsc;
+
+        unsafe { _rdtsc() }
+    }
+
+    /// Estimates the frequency of the TSC counter by waiting 10ms in a busy loop and
+    /// looking at how much the TSC increased in the meantime.
+    pub(super) fn tsc_to_microseconds() -> f64 {
+        const BUSY_WAIT: std::time::Duration = std::time::Duration::from_millis(10);
+        let tsc_start = rdtsc();
+        let instant_start = std::time::Instant::now();
+        while instant_start.elapsed() < BUSY_WAIT {
+            // `thread::sleep()` is not very precise at waking up the program at the right time,
+            // so use a busy loop instead.
+            core::hint::spin_loop();
+        }
+        let tsc_end = rdtsc();
+        (BUSY_WAIT.as_nanos() as f64) / 1000.0 / ((tsc_end - tsc_start) as f64)
+    }
+
+    /// Checks whether the TSC counter is available and runs at a constant rate independently
+    /// of CPU frequency even across different power states of the CPU (i.e. checks for the
+    /// `invariant_tsc` CPUID flag).
+    pub(super) fn is_tsc_available() -> bool {
+        #[cfg(target_arch = "x86")]
+        use core::arch::x86::__cpuid;
+        #[cfg(target_arch = "x86_64")]
+        use core::arch::x86_64::__cpuid;
+
+        // implemented like https://docs.rs/raw-cpuid/latest/src/raw_cpuid/extended.rs.html#965-967
+        const LEAF: u32 = 0x80000007; // this is the leaf for "advanced power management info"
+        let cpuid = unsafe { __cpuid(LEAF) };
+        (cpuid.edx & (1 << 8)) != 0 // EDX bit 8 indicates invariant TSC
+    }
+
+    /// Forces the current thread to run on a single CPU, which ensures the TSC counter is monotonic
+    /// (since TSCs of different CPUs might be out-of-sync). `incremental_thread_id` is used to pick
+    /// to which CPU to pin the current thread, and should be an incremental number that starts from
+    /// 0.
+    pub(super) fn set_cpu_affinity(incremental_thread_id: usize) {
+        let cpu_id = match std::thread::available_parallelism() {
+            Ok(available_parallelism) => incremental_thread_id % available_parallelism,
+            _ => panic!("Could not determine CPU count to properly set CPU affinity"),
+        };
+
+        let mut set = unsafe { std::mem::zeroed::<libc::cpu_set_t>() };
+        unsafe { libc::CPU_SET(cpu_id, &mut set) };
+
+        // Set the current thread's core affinity.
+        if unsafe {
+            libc::sched_setaffinity(
+                0, // Defaults to current thread
+                size_of::<libc::cpu_set_t>(),
+                &set as *const _,
+            )
+        } != 0
+        {
+            panic!("Could not set CPU affinity")
+        }
+    }
+}
diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs
index ae1b25f8857a0..9b0bee72aeff6 100644
--- a/src/tools/miri/src/bin/miri.rs
+++ b/src/tools/miri/src/bin/miri.rs
@@ -556,7 +556,11 @@ fn main() {
         } else if arg == "-Zmiri-deterministic-floats" {
             miri_config.float_nondet = false;
         } else if arg == "-Zmiri-no-extra-rounding-error" {
-            miri_config.float_rounding_error = false;
+            miri_config.float_rounding_error = miri::FloatRoundingErrorMode::None;
+        } else if arg == "-Zmiri-max-extra-rounding-error" {
+            miri_config.float_rounding_error = miri::FloatRoundingErrorMode::Max;
+        } else if arg == "-Zmiri-no-short-fd-operations" {
+            miri_config.short_fd_operations = false;
         } else if arg == "-Zmiri-strict-provenance" {
             miri_config.provenance_mode = ProvenanceMode::Strict;
         } else if arg == "-Zmiri-permissive-provenance" {
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
index ad2a67160f484..bed65440dc9aa 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
@@ -125,81 +125,64 @@ pub struct NewPermission {
     /// Whether a read access should be performed on the non-frozen
     /// part on a retag.
     nonfreeze_access: bool,
+    /// Permission for memory outside the range.
+    outside_perm: Permission,
     /// Whether this pointer is part of the arguments of a function call.
     /// `protector` is `Some(_)` for all pointers marked `noalias`.
     protector: Option<ProtectorKind>,
 }
 
 impl<'tcx> NewPermission {
-    /// Determine NewPermission of the reference from the type of the pointee.
-    fn from_ref_ty(
+    /// Determine NewPermission of the reference/Box from the type of the pointee.
+    ///
+    /// A `ref_mutability` of `None` indicates a `Box` type.
+    fn new(
         pointee: Ty<'tcx>,
-        mutability: Mutability,
-        kind: RetagKind,
+        ref_mutability: Option<Mutability>,
+        retag_kind: RetagKind,
         cx: &crate::MiriInterpCx<'tcx>,
     ) -> Option<Self> {
         let ty_is_unpin = pointee.is_unpin(*cx.tcx, cx.typing_env());
-        let is_protected = kind == RetagKind::FnEntry;
-        let protector = is_protected.then_some(ProtectorKind::StrongProtector);
-
-        Some(match mutability {
-            Mutability::Mut if ty_is_unpin =>
-                NewPermission {
-                    freeze_perm: Permission::new_reserved(
-                        /* ty_is_freeze */ true,
-                        is_protected,
-                    ),
-                    freeze_access: true,
-                    nonfreeze_perm: Permission::new_reserved(
-                        /* ty_is_freeze */ false,
-                        is_protected,
-                    ),
-                    // If we have a mutable reference, then the non-frozen part will
-                    // have state `ReservedIM` or `Reserved`, which can have an initial read access
-                    // performed on it because you cannot have multiple mutable borrows.
-                    nonfreeze_access: true,
-                    protector,
-                },
-            Mutability::Not =>
-                NewPermission {
-                    freeze_perm: Permission::new_frozen(),
-                    freeze_access: true,
-                    nonfreeze_perm: Permission::new_cell(),
-                    // If it is a shared reference, then the non-frozen
-                    // part will have state `Cell`, which should not have an initial access,
-                    // as this can cause data races when using thread-safe data types like
-                    // `Mutex<T>`.
-                    nonfreeze_access: false,
-                    protector,
-                },
-            _ => return None,
-        })
-    }
+        let ty_is_freeze = pointee.is_freeze(*cx.tcx, cx.typing_env());
+        let is_protected = retag_kind == RetagKind::FnEntry;
 
-    /// Compute permission for `Box`-like type (`Box` always, and also `Unique` if enabled).
-    /// These pointers allow deallocation so need a different kind of protector not handled
-    /// by `from_ref_ty`.
-    fn from_unique_ty(
-        ty: Ty<'tcx>,
-        kind: RetagKind,
-        cx: &crate::MiriInterpCx<'tcx>,
-    ) -> Option<Self> {
-        let pointee = ty.builtin_deref(true).unwrap();
-        pointee.is_unpin(*cx.tcx, cx.typing_env()).then_some(()).map(|()| {
-            // Regular `Unpin` box, give it `noalias` but only a weak protector
-            // because it is valid to deallocate it within the function.
-            let is_protected = kind == RetagKind::FnEntry;
-            let protector = is_protected.then_some(ProtectorKind::WeakProtector);
-            NewPermission {
-                freeze_perm: Permission::new_reserved(/* ty_is_freeze */ true, is_protected),
-                freeze_access: true,
-                nonfreeze_perm: Permission::new_reserved(
-                    /* ty_is_freeze */ false,
-                    is_protected,
-                ),
-                nonfreeze_access: true,
-                protector,
-            }
+        if matches!(ref_mutability, Some(Mutability::Mut) | None if !ty_is_unpin) {
+            // Mutable reference / Box to pinning type: retagging is a NOP.
+            // FIXME: with `UnsafePinned`, this should do proper per-byte tracking.
+            return None;
+        }
+
+        let freeze_perm = match ref_mutability {
+            // Shared references are frozen.
+            Some(Mutability::Not) => Permission::new_frozen(),
+            // Mutable references and Boxes are reserved.
+            _ => Permission::new_reserved_frz(),
+        };
+        let nonfreeze_perm = match ref_mutability {
+            // Shared references are "transparent".
+            Some(Mutability::Not) => Permission::new_cell(),
+            // *Protected* mutable references and boxes are reserved without regarding for interior mutability.
+            _ if is_protected => Permission::new_reserved_frz(),
+            // Unprotected mutable references and boxes start in `ReservedIm`.
+            _ => Permission::new_reserved_im(),
+        };
+
+        // Everything except for `Cell` gets an initial access.
+        let initial_access = |perm: &Permission| !perm.is_cell();
+
+        Some(NewPermission {
+            freeze_perm,
+            freeze_access: initial_access(&freeze_perm),
+            nonfreeze_perm,
+            nonfreeze_access: initial_access(&nonfreeze_perm),
+            outside_perm: if ty_is_freeze { freeze_perm } else { nonfreeze_perm },
+            protector: is_protected.then_some(if ref_mutability.is_some() {
+                // Strong protector for references
+                ProtectorKind::StrongProtector
+            } else {
+                // Weak protector for boxes
+                ProtectorKind::WeakProtector
+            }),
         })
     }
 }
@@ -313,30 +296,20 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
         let span = this.machine.current_span();
 
-        // Store initial permissions and their corresponding range.
-        let mut perms_map: DedupRangeMap<LocationState> = DedupRangeMap::new(
-            ptr_size,
-            LocationState::new_accessed(Permission::new_disabled(), IdempotentForeignAccess::None), // this will be overwritten
-        );
-        // Keep track of whether the node has any part that allows for interior mutability.
-        // FIXME: This misses `PhantomData<UnsafeCell<T>>` which could be considered a marker
-        // for requesting interior mutability.
-        let mut has_unsafe_cell = false;
-
         // When adding a new node, the SIFA of its parents needs to be updated, potentially across
         // the entire memory range. For the parts that are being accessed below, the access itself
-        // trivially takes care of that. However, we have to do some more work to also deal with
-        // the parts that are not being accessed. Specifically what we do is that we
-        // call `update_last_accessed_after_retag` on the SIFA of the permission set for the part of
-        // memory outside `perm_map` -- so that part is definitely taken care of. The remaining concern
-        // is the part of memory that is in the range of `perms_map`, but not accessed below.
-        // There we have two cases:
-        // * If we do have an `UnsafeCell` (`has_unsafe_cell` becomes true), then the non-accessed part
-        //   uses `nonfreeze_perm`, so the `nonfreeze_perm` initialized parts are also fine. We enforce
-        //   the `freeze_perm` parts to be accessed, and thus everything is taken care of.
-        // * If there is no `UnsafeCell`, then `freeze_perm` is used everywhere (both inside and outside the initial range),
-        //   and we update everything to have the `freeze_perm`'s SIFA, so there are no issues. (And this assert below is not
-        //   actually needed in this case).
+        // trivially takes care of that. However, we have to do some more work to also deal with the
+        // parts that are not being accessed. Specifically what we do is that we call
+        // `update_last_accessed_after_retag` on the SIFA of the permission set for the part of
+        // memory outside `perm_map` -- so that part is definitely taken care of. The remaining
+        // concern is the part of memory that is in the range of `perms_map`, but not accessed
+        // below. There we have two cases:
+        // * If the type is `!Freeze`, then the non-accessed part uses `nonfreeze_perm`, so the
+        //   `nonfreeze_perm` initialized parts are also fine. We enforce the `freeze_perm` parts to
+        //   be accessed via the assert below, and thus everything is taken care of.
+        // * If the type is `Freeze`, then `freeze_perm` is used everywhere (both inside and outside
+        //   the initial range), and we update everything to have the `freeze_perm`'s SIFA, so there
+        //   are no issues. (And this assert below is not actually needed in this case).
         assert!(new_perm.freeze_access);
 
         let protected = new_perm.protector.is_some();
@@ -350,66 +323,48 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             .get_tree_borrows_params()
             .precise_interior_mut;
 
-        let default_perm = if !precise_interior_mut {
-            // NOTE: Using `ty_is_freeze` doesn't give the same result as going through the range
-            // and computing `has_unsafe_cell`.  This is because of zero-sized `UnsafeCell`, for which
-            // `has_unsafe_cell` is false, but `!ty_is_freeze` is true.
-            let ty_is_freeze = place.layout.ty.is_freeze(*this.tcx, this.typing_env());
-            let (perm, access) = if ty_is_freeze {
+        // Compute initial "inside" permissions.
+        let loc_state = |frozen: bool| -> LocationState {
+            let (perm, access) = if frozen {
                 (new_perm.freeze_perm, new_perm.freeze_access)
             } else {
                 (new_perm.nonfreeze_perm, new_perm.nonfreeze_access)
             };
             let sifa = perm.strongest_idempotent_foreign_access(protected);
-            let new_loc = if access {
+            if access {
                 LocationState::new_accessed(perm, sifa)
             } else {
                 LocationState::new_non_accessed(perm, sifa)
-            };
-
-            for (_loc_range, loc) in perms_map.iter_mut_all() {
-                *loc = new_loc;
             }
-
-            perm
+        };
+        let perms_map = if !precise_interior_mut {
+            // For `!Freeze` types, just pretend the entire thing is an `UnsafeCell`.
+            let ty_is_freeze = place.layout.ty.is_freeze(*this.tcx, this.typing_env());
+            let state = loc_state(ty_is_freeze);
+            DedupRangeMap::new(ptr_size, state)
         } else {
+            // The initial state will be overwritten by the visitor below.
+            let mut perms_map: DedupRangeMap<LocationState> = DedupRangeMap::new(
+                ptr_size,
+                LocationState::new_accessed(
+                    Permission::new_disabled(),
+                    IdempotentForeignAccess::None,
+                ),
+            );
             this.visit_freeze_sensitive(place, ptr_size, |range, frozen| {
-                has_unsafe_cell = has_unsafe_cell || !frozen;
-
-                // We are only ever `Frozen` inside the frozen bits.
-                let (perm, access) = if frozen {
-                    (new_perm.freeze_perm, new_perm.freeze_access)
-                } else {
-                    (new_perm.nonfreeze_perm, new_perm.nonfreeze_access)
-                };
-                let sifa = perm.strongest_idempotent_foreign_access(protected);
-                // NOTE: Currently, `access` is false if and only if `perm` is Cell, so this `if`
-                // doesn't not change whether any code is UB or not. We could just always use
-                // `new_accessed` and everything would stay the same. But that seems conceptually
-                // odd, so we keep the initial "accessed" bit of the `LocationState` in sync with whether
-                // a read access is performed below.
-                let new_loc = if access {
-                    LocationState::new_accessed(perm, sifa)
-                } else {
-                    LocationState::new_non_accessed(perm, sifa)
-                };
-
-                // Store initial permissions.
+                let state = loc_state(frozen);
                 for (_loc_range, loc) in perms_map.iter_mut(range.start, range.size) {
-                    *loc = new_loc;
+                    *loc = state;
                 }
-
                 interp_ok(())
             })?;
-
-            // Allow lazily writing to surrounding data if we found an `UnsafeCell`.
-            if has_unsafe_cell { new_perm.nonfreeze_perm } else { new_perm.freeze_perm }
+            perms_map
         };
 
         let alloc_extra = this.get_alloc_extra(alloc_id)?;
         let mut tree_borrows = alloc_extra.borrow_tracker_tb().borrow_mut();
 
-        for (perm_range, perm) in perms_map.iter_mut_all() {
+        for (perm_range, perm) in perms_map.iter_all() {
             if perm.is_accessed() {
                 // Some reborrows incur a read access to the parent.
                 // Adjust range to be relative to allocation start (rather than to `place`).
@@ -447,7 +402,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             orig_tag,
             new_tag,
             perms_map,
-            default_perm,
+            new_perm.outside_perm,
             protected,
             span,
         )?;
@@ -514,7 +469,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let this = self.eval_context_mut();
         let new_perm = match val.layout.ty.kind() {
             &ty::Ref(_, pointee, mutability) =>
-                NewPermission::from_ref_ty(pointee, mutability, kind, this),
+                NewPermission::new(pointee, Some(mutability), kind, this),
             _ => None,
         };
         if let Some(new_perm) = new_perm {
@@ -571,8 +526,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             fn visit_box(&mut self, box_ty: Ty<'tcx>, place: &PlaceTy<'tcx>) -> InterpResult<'tcx> {
                 // Only boxes for the global allocator get any special treatment.
                 if box_ty.is_box_global(*self.ecx.tcx) {
+                    let pointee = place.layout.ty.builtin_deref(true).unwrap();
                     let new_perm =
-                        NewPermission::from_unique_ty(place.layout.ty, self.kind, self.ecx);
+                        NewPermission::new(pointee, /* not a ref */ None, self.kind, self.ecx);
                     self.retag_ptr_inplace(place, new_perm)?;
                 }
                 interp_ok(())
@@ -591,7 +547,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 match place.layout.ty.kind() {
                     &ty::Ref(_, pointee, mutability) => {
                         let new_perm =
-                            NewPermission::from_ref_ty(pointee, mutability, self.kind, self.ecx);
+                            NewPermission::new(pointee, Some(mutability), self.kind, self.ecx);
                         self.retag_ptr_inplace(place, new_perm)?;
                     }
                     ty::RawPtr(_, _) => {
@@ -643,14 +599,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             // never be ReservedIM, the value of the `ty_is_freeze`
             // argument doesn't matter
             // (`ty_is_freeze || true` in `new_reserved` will always be `true`).
-            freeze_perm: Permission::new_reserved(
-                /* ty_is_freeze */ true, /* protected */ true,
-            ),
+            freeze_perm: Permission::new_reserved_frz(),
             freeze_access: true,
-            nonfreeze_perm: Permission::new_reserved(
-                /* ty_is_freeze */ false, /* protected */ true,
-            ),
+            nonfreeze_perm: Permission::new_reserved_frz(),
             nonfreeze_access: true,
+            outside_perm: Permission::new_reserved_frz(),
             protector: Some(ProtectorKind::StrongProtector),
         };
         this.tb_retag_place(place, new_perm)
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs
index 38863ca0734a1..390435e58d173 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs
@@ -272,28 +272,15 @@ impl Permission {
 
     /// Default initial permission of a reborrowed mutable reference that is either
     /// protected or not interior mutable.
-    fn new_reserved_frz() -> Self {
+    pub fn new_reserved_frz() -> Self {
         Self { inner: ReservedFrz { conflicted: false } }
     }
 
     /// Default initial permission of an unprotected interior mutable reference.
-    fn new_reserved_im() -> Self {
+    pub fn new_reserved_im() -> Self {
         Self { inner: ReservedIM }
     }
 
-    /// Wrapper around `new_reserved_frz` and `new_reserved_im` that decides
-    /// which to call based on the interior mutability and the retag kind (whether there
-    /// is a protector is relevant because being protected takes priority over being
-    /// interior mutable)
-    pub fn new_reserved(ty_is_freeze: bool, protected: bool) -> Self {
-        // As demonstrated by `tests/fail/tree_borrows/reservedim_spurious_write.rs`,
-        // interior mutability and protectors interact poorly.
-        // To eliminate the case of Protected Reserved IM we override interior mutability
-        // in the case of a protected reference: protected references are always considered
-        // "freeze" in their reservation phase.
-        if ty_is_freeze || protected { Self::new_reserved_frz() } else { Self::new_reserved_im() }
-    }
-
     /// Default initial permission of a reborrowed shared reference.
     pub fn new_frozen() -> Self {
         Self { inner: Frozen }
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs
index bb3fc2d80b3fa..d9b3696e4f805 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs
@@ -610,7 +610,7 @@ mod spurious_read {
             },
             y: LocStateProt {
                 state: LocationState::new_non_accessed(
-                    Permission::new_reserved(/* freeze */ true, /* protected */ true),
+                    Permission::new_reserved_frz(),
                     IdempotentForeignAccess::default(),
                 ),
                 prot: true,
diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs
index 4c531a8d1f526..caed98f04f32e 100644
--- a/src/tools/miri/src/eval.rs
+++ b/src/tools/miri/src/eval.rs
@@ -32,65 +32,6 @@ pub enum MiriEntryFnType {
 /// will hang the program.
 const MAIN_THREAD_YIELDS_AT_SHUTDOWN: u32 = 256;
 
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum AlignmentCheck {
-    /// Do not check alignment.
-    None,
-    /// Check alignment "symbolically", i.e., using only the requested alignment for an allocation and not its real base address.
-    Symbolic,
-    /// Check alignment on the actual physical integer address.
-    Int,
-}
-
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum RejectOpWith {
-    /// Isolated op is rejected with an abort of the machine.
-    Abort,
-
-    /// If not Abort, miri returns an error for an isolated op.
-    /// Following options determine if user should be warned about such error.
-    /// Do not print warning about rejected isolated op.
-    NoWarning,
-
-    /// Print a warning about rejected isolated op, with backtrace.
-    Warning,
-
-    /// Print a warning about rejected isolated op, without backtrace.
-    WarningWithoutBacktrace,
-}
-
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum IsolatedOp {
-    /// Reject an op requiring communication with the host. By
-    /// default, miri rejects the op with an abort. If not, it returns
-    /// an error code, and prints a warning about it. Warning levels
-    /// are controlled by `RejectOpWith` enum.
-    Reject(RejectOpWith),
-
-    /// Execute op requiring communication with the host, i.e. disable isolation.
-    Allow,
-}
-
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum BacktraceStyle {
-    /// Prints a terser backtrace which ideally only contains relevant information.
-    Short,
-    /// Prints a backtrace with all possible information.
-    Full,
-    /// Prints only the frame that the error occurs in.
-    Off,
-}
-
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum ValidationMode {
-    /// Do not perform any kind of validation.
-    No,
-    /// Validate the interior of the value, but not things behind references.
-    Shallow,
-    /// Fully recursively validate references.
-    Deep,
-}
-
 /// Configuration needed to spawn a Miri instance.
 #[derive(Clone)]
 pub struct MiriConfig {
@@ -171,7 +112,9 @@ pub struct MiriConfig {
     /// Whether floating-point operations can behave non-deterministically.
     pub float_nondet: bool,
     /// Whether floating-point operations can have a non-deterministic rounding error.
-    pub float_rounding_error: bool,
+    pub float_rounding_error: FloatRoundingErrorMode,
+    /// Whether Miri artifically introduces short reads/writes on file descriptors.
+    pub short_fd_operations: bool,
 }
 
 impl Default for MiriConfig {
@@ -213,7 +156,8 @@ impl Default for MiriConfig {
             fixed_scheduling: false,
             force_intrinsic_fallback: false,
             float_nondet: true,
-            float_rounding_error: true,
+            float_rounding_error: FloatRoundingErrorMode::Random,
+            short_fd_operations: true,
         }
     }
 }
diff --git a/src/tools/miri/src/intrinsics/mod.rs b/src/tools/miri/src/intrinsics/mod.rs
index b5e81460773ba..5e46768b0e6cf 100644
--- a/src/tools/miri/src/intrinsics/mod.rs
+++ b/src/tools/miri/src/intrinsics/mod.rs
@@ -10,7 +10,7 @@ use rustc_abi::Size;
 use rustc_apfloat::ieee::{IeeeFloat, Semantics};
 use rustc_apfloat::{self, Float, Round};
 use rustc_middle::mir;
-use rustc_middle::ty::{self, FloatTy, ScalarInt};
+use rustc_middle::ty::{self, FloatTy};
 use rustc_span::{Symbol, sym};
 
 use self::atomic::EvalContextExt as _;
@@ -230,7 +230,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     let res = apply_random_float_error_ulp(
                         this,
                         res,
-                        2, // log2(4)
+                        4,
                     );
 
                     // Clamp the result to the guaranteed range of this function according to the C standard,
@@ -274,7 +274,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     let res = apply_random_float_error_ulp(
                         this,
                         res,
-                        2, // log2(4)
+                        4,
                     );
 
                     // Clamp the result to the guaranteed range of this function according to the C standard,
@@ -336,9 +336,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                     // Apply a relative error of 4ULP to introduce some non-determinism
                     // simulating imprecise implementations and optimizations.
-                    apply_random_float_error_ulp(
-                        this, res, 2, // log2(4)
-                    )
+                    apply_random_float_error_ulp(this, res, 4)
                 });
                 let res = this.adjust_nan(res, &[f1, f2]);
                 this.write_scalar(res, dest)?;
@@ -354,9 +352,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                     // Apply a relative error of 4ULP to introduce some non-determinism
                     // simulating imprecise implementations and optimizations.
-                    apply_random_float_error_ulp(
-                        this, res, 2, // log2(4)
-                    )
+                    apply_random_float_error_ulp(this, res, 4)
                 });
                 let res = this.adjust_nan(res, &[f1, f2]);
                 this.write_scalar(res, dest)?;
@@ -373,9 +369,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                     // Apply a relative error of 4ULP to introduce some non-determinism
                     // simulating imprecise implementations and optimizations.
-                    apply_random_float_error_ulp(
-                        this, res, 2, // log2(4)
-                    )
+                    apply_random_float_error_ulp(this, res, 4)
                 });
                 let res = this.adjust_nan(res, &[f]);
                 this.write_scalar(res, dest)?;
@@ -391,9 +385,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                     // Apply a relative error of 4ULP to introduce some non-determinism
                     // simulating imprecise implementations and optimizations.
-                    apply_random_float_error_ulp(
-                        this, res, 2, // log2(4)
-                    )
+                    apply_random_float_error_ulp(this, res, 4)
                 });
                 let res = this.adjust_nan(res, &[f]);
                 this.write_scalar(res, dest)?;
@@ -448,7 +440,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 }
                 // Apply a relative error of 4ULP to simulate non-deterministic precision loss
                 // due to optimizations.
-                let res = apply_random_float_error_to_imm(this, res, 2 /* log2(4) */)?;
+                let res = crate::math::apply_random_float_error_to_imm(this, res, 4)?;
                 this.write_immediate(*res, dest)?;
             }
 
@@ -486,31 +478,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
     }
 }
 
-/// Applies a random ULP floating point error to `val` and returns the new value.
-/// So if you want an X ULP error, `ulp_exponent` should be log2(X).
-///
-/// Will fail if `val` is not a floating point number.
-fn apply_random_float_error_to_imm<'tcx>(
-    ecx: &mut MiriInterpCx<'tcx>,
-    val: ImmTy<'tcx>,
-    ulp_exponent: u32,
-) -> InterpResult<'tcx, ImmTy<'tcx>> {
-    let scalar = val.to_scalar_int()?;
-    let res: ScalarInt = match val.layout.ty.kind() {
-        ty::Float(FloatTy::F16) =>
-            apply_random_float_error_ulp(ecx, scalar.to_f16(), ulp_exponent).into(),
-        ty::Float(FloatTy::F32) =>
-            apply_random_float_error_ulp(ecx, scalar.to_f32(), ulp_exponent).into(),
-        ty::Float(FloatTy::F64) =>
-            apply_random_float_error_ulp(ecx, scalar.to_f64(), ulp_exponent).into(),
-        ty::Float(FloatTy::F128) =>
-            apply_random_float_error_ulp(ecx, scalar.to_f128(), ulp_exponent).into(),
-        _ => bug!("intrinsic called with non-float input type"),
-    };
-
-    interp_ok(ImmTy::from_scalar_int(res, val.layout))
-}
-
 /// For the intrinsics:
 /// - sinf32, sinf64
 /// - cosf32, cosf64
diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs
index 5ed6d6b346c7a..0856411b8e836 100644
--- a/src/tools/miri/src/lib.rs
+++ b/src/tools/miri/src/lib.rs
@@ -138,15 +138,14 @@ pub use crate::data_structures::mono_hash_map::MonoHashMap;
 pub use crate::diagnostics::{
     EvalContextExt as _, NonHaltingDiagnostic, TerminationInfo, report_error,
 };
-pub use crate::eval::{
-    AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, MiriEntryFnType, RejectOpWith,
-    ValidationMode, create_ecx, eval_entry,
-};
+pub use crate::eval::{MiriConfig, MiriEntryFnType, create_ecx, eval_entry};
 pub use crate::helpers::{AccessKind, EvalContextExt as _, ToU64 as _, ToUsize as _};
 pub use crate::intrinsics::EvalContextExt as _;
 pub use crate::machine::{
-    AllocExtra, DynMachineCallback, FrameExtra, MachineCallback, MemoryKind, MiriInterpCx,
-    MiriInterpCxExt, MiriMachine, MiriMemoryKind, PrimitiveLayouts, Provenance, ProvenanceExtra,
+    AlignmentCheck, AllocExtra, BacktraceStyle, DynMachineCallback, FloatRoundingErrorMode,
+    FrameExtra, IsolatedOp, MachineCallback, MemoryKind, MiriInterpCx, MiriInterpCxExt,
+    MiriMachine, MiriMemoryKind, PrimitiveLayouts, Provenance, ProvenanceExtra, RejectOpWith,
+    ValidationMode,
 };
 pub use crate::operator::EvalContextExt as _;
 pub use crate::provenance_gc::{EvalContextExt as _, LiveAllocs, VisitProvenance, VisitWith};
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
index 0b2ce90041477..e4540fb9bc58b 100644
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@@ -49,6 +49,75 @@ pub const SIGRTMAX: i32 = 42;
 /// base address for each evaluation would produce unbounded memory usage.
 const ADDRS_PER_ANON_GLOBAL: usize = 32;
 
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum AlignmentCheck {
+    /// Do not check alignment.
+    None,
+    /// Check alignment "symbolically", i.e., using only the requested alignment for an allocation and not its real base address.
+    Symbolic,
+    /// Check alignment on the actual physical integer address.
+    Int,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum RejectOpWith {
+    /// Isolated op is rejected with an abort of the machine.
+    Abort,
+
+    /// If not Abort, miri returns an error for an isolated op.
+    /// Following options determine if user should be warned about such error.
+    /// Do not print warning about rejected isolated op.
+    NoWarning,
+
+    /// Print a warning about rejected isolated op, with backtrace.
+    Warning,
+
+    /// Print a warning about rejected isolated op, without backtrace.
+    WarningWithoutBacktrace,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum IsolatedOp {
+    /// Reject an op requiring communication with the host. By
+    /// default, miri rejects the op with an abort. If not, it returns
+    /// an error code, and prints a warning about it. Warning levels
+    /// are controlled by `RejectOpWith` enum.
+    Reject(RejectOpWith),
+
+    /// Execute op requiring communication with the host, i.e. disable isolation.
+    Allow,
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum BacktraceStyle {
+    /// Prints a terser backtrace which ideally only contains relevant information.
+    Short,
+    /// Prints a backtrace with all possible information.
+    Full,
+    /// Prints only the frame that the error occurs in.
+    Off,
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum ValidationMode {
+    /// Do not perform any kind of validation.
+    No,
+    /// Validate the interior of the value, but not things behind references.
+    Shallow,
+    /// Fully recursively validate references.
+    Deep,
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum FloatRoundingErrorMode {
+    /// Apply a random error (the default).
+    Random,
+    /// Don't apply any error.
+    None,
+    /// Always apply the maximum error (with a random sign).
+    Max,
+}
+
 /// Extra data stored with each stack frame
 pub struct FrameExtra<'tcx> {
     /// Extra data for the Borrow Tracker.
@@ -599,7 +668,10 @@ pub struct MiriMachine<'tcx> {
     /// Whether floating-point operations can behave non-deterministically.
     pub float_nondet: bool,
     /// Whether floating-point operations can have a non-deterministic rounding error.
-    pub float_rounding_error: bool,
+    pub float_rounding_error: FloatRoundingErrorMode,
+
+    /// Whether Miri artifically introduces short reads/writes on file descriptors.
+    pub short_fd_operations: bool,
 }
 
 impl<'tcx> MiriMachine<'tcx> {
@@ -761,6 +833,7 @@ impl<'tcx> MiriMachine<'tcx> {
             force_intrinsic_fallback: config.force_intrinsic_fallback,
             float_nondet: config.float_nondet,
             float_rounding_error: config.float_rounding_error,
+            short_fd_operations: config.short_fd_operations,
         }
     }
 
@@ -937,6 +1010,7 @@ impl VisitProvenance for MiriMachine<'_> {
             force_intrinsic_fallback: _,
             float_nondet: _,
             float_rounding_error: _,
+            short_fd_operations: _,
         } = self;
 
         threads.visit_provenance(visit);
@@ -1077,7 +1151,8 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
                 .target_features
                 .iter()
                 .filter(|&feature| {
-                    feature.kind != TargetFeatureKind::Implied && !ecx.tcx.sess.target_features.contains(&feature.name)
+                    feature.kind != TargetFeatureKind::Implied
+                        && !ecx.tcx.sess.target_features.contains(&feature.name)
                 })
                 .fold(String::new(), |mut s, feature| {
                     if !s.is_empty() {
@@ -1208,7 +1283,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
         ecx: &mut InterpCx<'tcx, Self>,
         val: ImmTy<'tcx>,
     ) -> InterpResult<'tcx, ImmTy<'tcx>> {
-        crate::math::apply_random_float_error_to_imm(ecx, val, 2 /* log2(4) */)
+        crate::math::apply_random_float_error_to_imm(ecx, val, 4)
     }
 
     #[inline(always)]
diff --git a/src/tools/miri/src/math.rs b/src/tools/miri/src/math.rs
index e9e5a1070c9e4..604d6c0fd732b 100644
--- a/src/tools/miri/src/math.rs
+++ b/src/tools/miri/src/math.rs
@@ -6,67 +6,95 @@ use rustc_middle::ty::{self, FloatTy, ScalarInt};
 use crate::*;
 
 /// Disturbes a floating-point result by a relative error in the range (-2^scale, 2^scale).
-///
-/// For a 2^N ULP error, you can use an `err_scale` of `-(F::PRECISION - 1 - N)`.
-/// In other words, a 1 ULP (absolute) error is the same as a `2^-(F::PRECISION-1)` relative error.
-/// (Subtracting 1 compensates for the integer bit.)
 pub(crate) fn apply_random_float_error<F: rustc_apfloat::Float>(
     ecx: &mut crate::MiriInterpCx<'_>,
     val: F,
     err_scale: i32,
 ) -> F {
-    if !ecx.machine.float_nondet || !ecx.machine.float_rounding_error {
+    if !ecx.machine.float_nondet
+        || matches!(ecx.machine.float_rounding_error, FloatRoundingErrorMode::None)
+        // relative errors don't do anything to zeros... avoid messing up the sign
+        || val.is_zero()
+    {
         return val;
     }
-
     let rng = ecx.machine.rng.get_mut();
+
     // Generate a random integer in the range [0, 2^PREC).
     // (When read as binary, the position of the first `1` determines the exponent,
     // and the remaining bits fill the mantissa. `PREC` is one plus the size of the mantissa,
     // so this all works out.)
-    let r = F::from_u128(rng.random_range(0..(1 << F::PRECISION))).value;
+    let r = F::from_u128(match ecx.machine.float_rounding_error {
+        FloatRoundingErrorMode::Random => rng.random_range(0..(1 << F::PRECISION)),
+        FloatRoundingErrorMode::Max => (1 << F::PRECISION) - 1, // force max error
+        FloatRoundingErrorMode::None => unreachable!(),
+    })
+    .value;
     // Multiply this with 2^(scale - PREC). The result is between 0 and
     // 2^PREC * 2^(scale - PREC) = 2^scale.
     let err = r.scalbn(err_scale.strict_sub(F::PRECISION.try_into().unwrap()));
     // give it a random sign
     let err = if rng.random() { -err } else { err };
-    // multiple the value with (1+err)
-    (val * (F::from_u128(1).value + err).value).value
+    // Compute `val*(1+err)`, distributed out as `val + val*err` to avoid the imprecise addition
+    // error being amplified by multiplication.
+    (val + (val * err).value).value
 }
 
-/// [`apply_random_float_error`] gives instructions to apply a 2^N ULP error.
-/// This function implements these instructions such that applying a 2^N ULP error is less error prone.
-/// So for a 2^N ULP error, you would pass N as the `ulp_exponent` argument.
+/// Applies an error of `[-N, +N]` ULP to the given value.
 pub(crate) fn apply_random_float_error_ulp<F: rustc_apfloat::Float>(
     ecx: &mut crate::MiriInterpCx<'_>,
     val: F,
-    ulp_exponent: u32,
+    max_error: u32,
 ) -> F {
-    let n = i32::try_from(ulp_exponent)
-        .expect("`err_scale_for_ulp`: exponent is too large to create an error scale");
-    // we know this fits
-    let prec = i32::try_from(F::PRECISION).unwrap();
-    let err_scale = -(prec - n - 1);
-    apply_random_float_error(ecx, val, err_scale)
+    // We could try to be clever and reuse `apply_random_float_error`, but that is hard to get right
+    // (see <https://github.com/rust-lang/miri/pull/4558#discussion_r2316838085> for why) so we
+    // implement the logic directly instead.
+    if !ecx.machine.float_nondet
+        || matches!(ecx.machine.float_rounding_error, FloatRoundingErrorMode::None)
+        // FIXME: also disturb zeros? That requires a lot more cases in `fixed_float_value`
+        // and might make the std test suite quite unhappy.
+        || val.is_zero()
+    {
+        return val;
+    }
+    let rng = ecx.machine.rng.get_mut();
+
+    let max_error = i64::from(max_error);
+    let error = match ecx.machine.float_rounding_error {
+        FloatRoundingErrorMode::Random => rng.random_range(-max_error..=max_error),
+        FloatRoundingErrorMode::Max =>
+            if rng.random() {
+                max_error
+            } else {
+                -max_error
+            },
+        FloatRoundingErrorMode::None => unreachable!(),
+    };
+    // If upwards ULP and downwards ULP differ, we take the average.
+    let ulp = (((val.next_up().value - val).value + (val - val.next_down().value).value).value
+        / F::from_u128(2).value)
+        .value;
+    // Shift the value by N times the ULP
+    (val + (ulp * F::from_i128(error.into()).value).value).value
 }
 
-/// Applies a random 16ULP floating point error to `val` and returns the new value.
+/// Applies an error of `[-N, +N]` ULP to the given value.
 /// Will fail if `val` is not a floating point number.
 pub(crate) fn apply_random_float_error_to_imm<'tcx>(
     ecx: &mut MiriInterpCx<'tcx>,
     val: ImmTy<'tcx>,
-    ulp_exponent: u32,
+    max_error: u32,
 ) -> InterpResult<'tcx, ImmTy<'tcx>> {
     let scalar = val.to_scalar_int()?;
     let res: ScalarInt = match val.layout.ty.kind() {
         ty::Float(FloatTy::F16) =>
-            apply_random_float_error_ulp(ecx, scalar.to_f16(), ulp_exponent).into(),
+            apply_random_float_error_ulp(ecx, scalar.to_f16(), max_error).into(),
         ty::Float(FloatTy::F32) =>
-            apply_random_float_error_ulp(ecx, scalar.to_f32(), ulp_exponent).into(),
+            apply_random_float_error_ulp(ecx, scalar.to_f32(), max_error).into(),
         ty::Float(FloatTy::F64) =>
-            apply_random_float_error_ulp(ecx, scalar.to_f64(), ulp_exponent).into(),
+            apply_random_float_error_ulp(ecx, scalar.to_f64(), max_error).into(),
         ty::Float(FloatTy::F128) =>
-            apply_random_float_error_ulp(ecx, scalar.to_f128(), ulp_exponent).into(),
+            apply_random_float_error_ulp(ecx, scalar.to_f128(), max_error).into(),
         _ => bug!("intrinsic called with non-float input type"),
     };
 
diff --git a/src/tools/miri/src/shims/files.rs b/src/tools/miri/src/shims/files.rs
index 0d4642c6ad0ef..8c29cb040b55a 100644
--- a/src/tools/miri/src/shims/files.rs
+++ b/src/tools/miri/src/shims/files.rs
@@ -168,8 +168,9 @@ pub trait FileDescription: std::fmt::Debug + FileDescriptionExt {
     }
 
     /// Determines whether this FD non-deterministically has its reads and writes shortened.
-    fn nondet_short_accesses(&self) -> bool {
-        true
+    fn short_fd_operations(&self) -> bool {
+        // We only enable this for FD kinds where we think short accesses gain useful test coverage.
+        false
     }
 
     /// Seeks to the given offset (which can be relative to the beginning, end, or current position).
@@ -395,6 +396,13 @@ impl FileDescription for FileHandle {
         communicate_allowed && self.file.is_terminal()
     }
 
+    fn short_fd_operations(&self) -> bool {
+        // While short accesses on file-backed FDs are very rare (at least for sufficiently small
+        // accesses), they can realistically happen when a signal interrupts the syscall.
+        // FIXME: we should return `false` if this is a named pipe...
+        true
+    }
+
     fn as_unix<'tcx>(&self, ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
         assert!(
             ecx.target_os_is_unix(),
diff --git a/src/tools/miri/src/shims/native_lib/ffi.rs b/src/tools/miri/src/shims/native_lib/ffi.rs
new file mode 100644
index 0000000000000..0badf22bb7651
--- /dev/null
+++ b/src/tools/miri/src/shims/native_lib/ffi.rs
@@ -0,0 +1,46 @@
+//! Support code for dealing with libffi.
+
+use libffi::low::CodePtr;
+use libffi::middle::{Arg as ArgPtr, Cif, Type as FfiType};
+
+/// Perform the actual FFI call.
+///
+/// # Safety
+///
+/// The safety invariants of the foreign function being called must be upheld (if any).
+pub unsafe fn call<R: libffi::high::CType>(fun: CodePtr, args: &mut [OwnedArg]) -> R {
+    let arg_ptrs: Vec<_> = args.iter().map(|arg| arg.ptr()).collect();
+    let cif = Cif::new(args.iter_mut().map(|arg| arg.ty.take().unwrap()), R::reify().into_middle());
+    // SAFETY: Caller upholds that the function is safe to call, and since we
+    // were passed a slice reference we know the `OwnedArg`s won't have been
+    // dropped by this point.
+    unsafe { cif.call(fun, &arg_ptrs) }
+}
+
+/// An argument for an FFI call.
+#[derive(Debug, Clone)]
+pub struct OwnedArg {
+    /// The type descriptor for this argument.
+    ty: Option<FfiType>,
+    /// Corresponding bytes for the value.
+    bytes: Box<[u8]>,
+}
+
+impl OwnedArg {
+    /// Instantiates an argument from a type descriptor and bytes.
+    pub fn new(ty: FfiType, bytes: Box<[u8]>) -> Self {
+        Self { ty: Some(ty), bytes }
+    }
+
+    /// Creates a libffi argument pointer pointing to this argument's bytes.
+    /// NB: Since `libffi::middle::Arg` ignores the lifetime of the reference
+    /// it's derived from, it is up to the caller to ensure the `OwnedArg` is
+    /// not dropped before unsafely calling `libffi::middle::Cif::call()`!
+    fn ptr(&self) -> ArgPtr {
+        // FIXME: Using `&self.bytes[0]` to reference the whole array is
+        // definitely unsound under SB, but we're waiting on
+        // https://github.com/libffi-rs/libffi-rs/commit/112a37b3b6ffb35bd75241fbcc580de40ba74a73
+        // to land in a release so that we don't need to do this.
+        ArgPtr::new(&self.bytes[0])
+    }
+}
diff --git a/src/tools/miri/src/shims/native_lib/mod.rs b/src/tools/miri/src/shims/native_lib/mod.rs
index 74b9b704fea5e..da8f785e37345 100644
--- a/src/tools/miri/src/shims/native_lib/mod.rs
+++ b/src/tools/miri/src/shims/native_lib/mod.rs
@@ -2,14 +2,15 @@
 
 use std::ops::Deref;
 
-use libffi::high::call as ffi;
 use libffi::low::CodePtr;
-use rustc_abi::{BackendRepr, HasDataLayout, Size};
-use rustc_middle::mir::interpret::Pointer;
-use rustc_middle::ty::{self as ty, IntTy, UintTy};
+use libffi::middle::Type as FfiType;
+use rustc_abi::{HasDataLayout, Size};
+use rustc_middle::ty::{self as ty, IntTy, Ty, UintTy};
 use rustc_span::Symbol;
 use serde::{Deserialize, Serialize};
 
+mod ffi;
+
 #[cfg_attr(
     not(all(
         target_os = "linux",
@@ -20,6 +21,7 @@ use serde::{Deserialize, Serialize};
 )]
 pub mod trace;
 
+use self::ffi::OwnedArg;
 use crate::*;
 
 /// The final results of an FFI trace, containing every relevant event detected
@@ -70,12 +72,12 @@ impl AccessRange {
 impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
 trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
     /// Call native host function and return the output as an immediate.
-    fn call_native_with_args<'a>(
+    fn call_native_with_args(
         &mut self,
         link_name: Symbol,
         dest: &MPlaceTy<'tcx>,
-        ptr: CodePtr,
-        libffi_args: Vec<libffi::high::Arg<'a>>,
+        fun: CodePtr,
+        libffi_args: &mut [OwnedArg],
     ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
         let this = self.eval_context_mut();
         #[cfg(target_os = "linux")]
@@ -93,55 +95,55 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     // Unsafe because of the call to native code.
                     // Because this is calling a C function it is not necessarily sound,
                     // but there is no way around this and we've checked as much as we can.
-                    let x = unsafe { ffi::call::<i8>(ptr, libffi_args.as_slice()) };
+                    let x = unsafe { ffi::call::<i8>(fun, libffi_args) };
                     Scalar::from_i8(x)
                 }
                 ty::Int(IntTy::I16) => {
-                    let x = unsafe { ffi::call::<i16>(ptr, libffi_args.as_slice()) };
+                    let x = unsafe { ffi::call::<i16>(fun, libffi_args) };
                     Scalar::from_i16(x)
                 }
                 ty::Int(IntTy::I32) => {
-                    let x = unsafe { ffi::call::<i32>(ptr, libffi_args.as_slice()) };
+                    let x = unsafe { ffi::call::<i32>(fun, libffi_args) };
                     Scalar::from_i32(x)
                 }
                 ty::Int(IntTy::I64) => {
-                    let x = unsafe { ffi::call::<i64>(ptr, libffi_args.as_slice()) };
+                    let x = unsafe { ffi::call::<i64>(fun, libffi_args) };
                     Scalar::from_i64(x)
                 }
                 ty::Int(IntTy::Isize) => {
-                    let x = unsafe { ffi::call::<isize>(ptr, libffi_args.as_slice()) };
+                    let x = unsafe { ffi::call::<isize>(fun, libffi_args) };
                     Scalar::from_target_isize(x.try_into().unwrap(), this)
                 }
                 // uints
                 ty::Uint(UintTy::U8) => {
-                    let x = unsafe { ffi::call::<u8>(ptr, libffi_args.as_slice()) };
+                    let x = unsafe { ffi::call::<u8>(fun, libffi_args) };
                     Scalar::from_u8(x)
                 }
                 ty::Uint(UintTy::U16) => {
-                    let x = unsafe { ffi::call::<u16>(ptr, libffi_args.as_slice()) };
+                    let x = unsafe { ffi::call::<u16>(fun, libffi_args) };
                     Scalar::from_u16(x)
                 }
                 ty::Uint(UintTy::U32) => {
-                    let x = unsafe { ffi::call::<u32>(ptr, libffi_args.as_slice()) };
+                    let x = unsafe { ffi::call::<u32>(fun, libffi_args) };
                     Scalar::from_u32(x)
                 }
                 ty::Uint(UintTy::U64) => {
-                    let x = unsafe { ffi::call::<u64>(ptr, libffi_args.as_slice()) };
+                    let x = unsafe { ffi::call::<u64>(fun, libffi_args) };
                     Scalar::from_u64(x)
                 }
                 ty::Uint(UintTy::Usize) => {
-                    let x = unsafe { ffi::call::<usize>(ptr, libffi_args.as_slice()) };
+                    let x = unsafe { ffi::call::<usize>(fun, libffi_args) };
                     Scalar::from_target_usize(x.try_into().unwrap(), this)
                 }
                 // Functions with no declared return type (i.e., the default return)
                 // have the output_type `Tuple([])`.
                 ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
-                    unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) };
+                    unsafe { ffi::call::<()>(fun, libffi_args) };
                     return interp_ok(ImmTy::uninit(dest.layout));
                 }
                 ty::RawPtr(..) => {
-                    let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) };
-                    let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
+                    let x = unsafe { ffi::call::<*const ()>(fun, libffi_args) };
+                    let ptr = StrictPointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
                     Scalar::from_pointer(ptr, this)
                 }
                 _ =>
@@ -267,6 +269,150 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
         interp_ok(())
     }
+
+    /// Extract the value from the result of reading an operand from the machine
+    /// and convert it to a `OwnedArg`.
+    fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, OwnedArg> {
+        let this = self.eval_context_ref();
+
+        // This should go first so that we emit unsupported before doing a bunch
+        // of extra work for types that aren't supported yet.
+        let ty = this.ty_to_ffitype(v.layout.ty)?;
+
+        // Helper to print a warning when a pointer is shared with the native code.
+        let expose = |prov: Provenance| -> InterpResult<'tcx> {
+            // The first time this happens, print a warning.
+            if !this.machine.native_call_mem_warned.replace(true) {
+                // Newly set, so first time we get here.
+                this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
+            }
+
+            this.expose_provenance(prov)?;
+            interp_ok(())
+        };
+
+        // Compute the byte-level representation of the argument. If there's a pointer in there, we
+        // expose it inside the AM. Later in `visit_reachable_allocs`, the "meta"-level provenance
+        // for accessing the pointee gets exposed; this is crucial to justify the C code effectively
+        // casting the integer in `byte` to a pointer and using that.
+        let bytes = match v.as_mplace_or_imm() {
+            either::Either::Left(mplace) => {
+                // Get the alloc id corresponding to this mplace, alongside
+                // a pointer that's offset to point to this particular
+                // mplace (not one at the base addr of the allocation).
+                let sz = mplace.layout.size.bytes_usize();
+                if sz == 0 {
+                    throw_unsup_format!("attempting to pass a ZST over FFI");
+                }
+                let (id, ofs, _) = this.ptr_get_alloc_id(mplace.ptr(), sz.try_into().unwrap())?;
+                let ofs = ofs.bytes_usize();
+                let range = ofs..ofs.strict_add(sz);
+                // Expose all provenances in the allocation within the byte range of the struct, if
+                // any. These pointers are being directly passed to native code by-value.
+                let alloc = this.get_alloc_raw(id)?;
+                for prov in alloc.provenance().get_range(this, range.clone().into()) {
+                    expose(prov)?;
+                }
+                // Read the bytes that make up this argument. We cannot use the normal getter as
+                // those would fail if any part of the argument is uninitialized. Native code
+                // is kind of outside the interpreter, after all...
+                Box::from(alloc.inspect_with_uninit_and_ptr_outside_interpreter(range))
+            }
+            either::Either::Right(imm) => {
+                let mut bytes: Box<[u8]> = vec![0; imm.layout.size.bytes_usize()].into();
+
+                // A little helper to write scalars to our byte array.
+                let mut write_scalar = |this: &MiriInterpCx<'tcx>, sc: Scalar, pos: usize| {
+                    // If a scalar is a pointer, then expose its provenance.
+                    if let interpret::Scalar::Ptr(p, _) = sc {
+                        expose(p.provenance)?;
+                    }
+                    write_target_uint(
+                        this.data_layout().endian,
+                        &mut bytes[pos..][..sc.size().bytes_usize()],
+                        sc.to_scalar_int()?.to_bits_unchecked(),
+                    )
+                    .unwrap();
+                    interp_ok(())
+                };
+
+                // Write the scalar into the `bytes` buffer.
+                match *imm {
+                    Immediate::Scalar(sc) => write_scalar(this, sc, 0)?,
+                    Immediate::ScalarPair(sc_first, sc_second) => {
+                        // The first scalar has an offset of zero; compute the offset of the 2nd.
+                        let ofs_second = {
+                            let rustc_abi::BackendRepr::ScalarPair(a, b) = imm.layout.backend_repr
+                            else {
+                                span_bug!(
+                                    this.cur_span(),
+                                    "op_to_ffi_arg: invalid scalar pair layout: {:#?}",
+                                    imm.layout
+                                )
+                            };
+                            a.size(this).align_to(b.align(this).abi).bytes_usize()
+                        };
+
+                        write_scalar(this, sc_first, 0)?;
+                        write_scalar(this, sc_second, ofs_second)?;
+                    }
+                    Immediate::Uninit => {
+                        // Nothing to write.
+                    }
+                }
+
+                bytes
+            }
+        };
+        interp_ok(OwnedArg::new(ty, bytes))
+    }
+
+    /// Parses an ADT to construct the matching libffi type.
+    fn adt_to_ffitype(
+        &self,
+        orig_ty: Ty<'_>,
+        adt_def: ty::AdtDef<'tcx>,
+        args: &'tcx ty::List<ty::GenericArg<'tcx>>,
+    ) -> InterpResult<'tcx, FfiType> {
+        // TODO: Certain non-C reprs should be okay also.
+        if !adt_def.repr().c() {
+            throw_unsup_format!("passing a non-#[repr(C)] struct over FFI: {orig_ty}")
+        }
+        // TODO: unions, etc.
+        if !adt_def.is_struct() {
+            throw_unsup_format!(
+                "unsupported argument type for native call: {orig_ty} is an enum or union"
+            );
+        }
+
+        let this = self.eval_context_ref();
+        let mut fields = vec![];
+        for field in &adt_def.non_enum_variant().fields {
+            fields.push(this.ty_to_ffitype(field.ty(*this.tcx, args))?);
+        }
+
+        interp_ok(FfiType::structure(fields))
+    }
+
+    /// Gets the matching libffi type for a given Ty.
+    fn ty_to_ffitype(&self, ty: Ty<'tcx>) -> InterpResult<'tcx, FfiType> {
+        interp_ok(match ty.kind() {
+            ty::Int(IntTy::I8) => FfiType::i8(),
+            ty::Int(IntTy::I16) => FfiType::i16(),
+            ty::Int(IntTy::I32) => FfiType::i32(),
+            ty::Int(IntTy::I64) => FfiType::i64(),
+            ty::Int(IntTy::Isize) => FfiType::isize(),
+            // the uints
+            ty::Uint(UintTy::U8) => FfiType::u8(),
+            ty::Uint(UintTy::U16) => FfiType::u16(),
+            ty::Uint(UintTy::U32) => FfiType::u32(),
+            ty::Uint(UintTy::U64) => FfiType::u64(),
+            ty::Uint(UintTy::Usize) => FfiType::usize(),
+            ty::RawPtr(..) => FfiType::pointer(),
+            ty::Adt(adt_def, args) => self.adt_to_ffitype(ty, *adt_def, args)?,
+            _ => throw_unsup_format!("unsupported argument type for native call: {}", ty),
+        })
+    }
 }
 
 impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
@@ -295,36 +441,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         // Do we have ptrace?
         let tracing = trace::Supervisor::is_enabled();
 
-        // Get the function arguments, and convert them to `libffi`-compatible form.
-        let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
+        // Get the function arguments, copy them, and prepare the type descriptions.
+        let mut libffi_args = Vec::<OwnedArg>::with_capacity(args.len());
         for arg in args.iter() {
-            if !matches!(arg.layout.backend_repr, BackendRepr::Scalar(_)) {
-                throw_unsup_format!("only scalar argument types are supported for native calls")
-            }
-            let imm = this.read_immediate(arg)?;
-            libffi_args.push(imm_to_carg(&imm, this)?);
-            // If we are passing a pointer, expose its provenance. Below, all exposed memory
-            // (previously exposed and new exposed) will then be properly prepared.
-            if matches!(arg.layout.ty.kind(), ty::RawPtr(..)) {
-                let ptr = imm.to_scalar().to_pointer(this)?;
-                let Some(prov) = ptr.provenance else {
-                    // Pointer without provenance may not access any memory anyway, skip.
-                    continue;
-                };
-                // The first time this happens, print a warning.
-                if !this.machine.native_call_mem_warned.replace(true) {
-                    // Newly set, so first time we get here.
-                    this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
-                }
-
-                this.expose_provenance(prov)?;
-            }
+            libffi_args.push(this.op_to_ffi_arg(arg, tracing)?);
         }
-        // Convert arguments to `libffi::high::Arg` type.
-        let libffi_args = libffi_args
-            .iter()
-            .map(|arg| arg.arg_downcast())
-            .collect::<Vec<libffi::high::Arg<'_>>>();
 
         // Prepare all exposed memory (both previously exposed, and just newly exposed since a
         // pointer was passed as argument). Uninitialised memory is left as-is, but any data
@@ -343,8 +464,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
 
             if !tracing {
-                // Expose all provenances in this allocation, since the native code can do $whatever.
-                // Can be skipped when tracing; in that case we'll expose just the actually-read parts later.
+                // Expose all provenances in this allocation, since the native code can do
+                // $whatever. Can be skipped when tracing; in that case we'll expose just the
+                // actually-read parts later.
                 for prov in alloc.provenance().provenances() {
                     this.expose_provenance(prov)?;
                 }
@@ -354,7 +476,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             if info.mutbl.is_mut() {
                 let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
                 // These writes could initialize everything and wreck havoc with the pointers.
-                // We can skip that when tracing; in that case we'll later do that only for the memory that got actually written.
+                // We can skip that when tracing; in that case we'll later do that only for the
+                // memory that got actually written.
                 if !tracing {
                     alloc.process_native_write(&cx.tcx, None);
                 }
@@ -367,7 +490,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
         // Call the function and store output, depending on return type in the function signature.
         let (ret, maybe_memevents) =
-            this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
+            this.call_native_with_args(link_name, dest, code_ptr, &mut libffi_args)?;
 
         if tracing {
             this.tracing_apply_accesses(maybe_memevents.unwrap())?;
@@ -377,83 +500,3 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         interp_ok(true)
     }
 }
-
-#[derive(Debug, Clone)]
-/// Enum of supported arguments to external C functions.
-// We introduce this enum instead of just calling `ffi::arg` and storing a list
-// of `libffi::high::Arg` directly, because the `libffi::high::Arg` just wraps a reference
-// to the value it represents: https://docs.rs/libffi/latest/libffi/high/call/struct.Arg.html
-// and we need to store a copy of the value, and pass a reference to this copy to C instead.
-enum CArg {
-    /// 8-bit signed integer.
-    Int8(i8),
-    /// 16-bit signed integer.
-    Int16(i16),
-    /// 32-bit signed integer.
-    Int32(i32),
-    /// 64-bit signed integer.
-    Int64(i64),
-    /// isize.
-    ISize(isize),
-    /// 8-bit unsigned integer.
-    UInt8(u8),
-    /// 16-bit unsigned integer.
-    UInt16(u16),
-    /// 32-bit unsigned integer.
-    UInt32(u32),
-    /// 64-bit unsigned integer.
-    UInt64(u64),
-    /// usize.
-    USize(usize),
-    /// Raw pointer, stored as C's `void*`.
-    RawPtr(*mut std::ffi::c_void),
-}
-
-impl<'a> CArg {
-    /// Convert a `CArg` to a `libffi` argument type.
-    fn arg_downcast(&'a self) -> libffi::high::Arg<'a> {
-        match self {
-            CArg::Int8(i) => ffi::arg(i),
-            CArg::Int16(i) => ffi::arg(i),
-            CArg::Int32(i) => ffi::arg(i),
-            CArg::Int64(i) => ffi::arg(i),
-            CArg::ISize(i) => ffi::arg(i),
-            CArg::UInt8(i) => ffi::arg(i),
-            CArg::UInt16(i) => ffi::arg(i),
-            CArg::UInt32(i) => ffi::arg(i),
-            CArg::UInt64(i) => ffi::arg(i),
-            CArg::USize(i) => ffi::arg(i),
-            CArg::RawPtr(i) => ffi::arg(i),
-        }
-    }
-}
-
-/// Extract the scalar value from the result of reading a scalar from the machine,
-/// and convert it to a `CArg`.
-fn imm_to_carg<'tcx>(v: &ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'tcx, CArg> {
-    interp_ok(match v.layout.ty.kind() {
-        // If the primitive provided can be converted to a type matching the type pattern
-        // then create a `CArg` of this primitive value with the corresponding `CArg` constructor.
-        // the ints
-        ty::Int(IntTy::I8) => CArg::Int8(v.to_scalar().to_i8()?),
-        ty::Int(IntTy::I16) => CArg::Int16(v.to_scalar().to_i16()?),
-        ty::Int(IntTy::I32) => CArg::Int32(v.to_scalar().to_i32()?),
-        ty::Int(IntTy::I64) => CArg::Int64(v.to_scalar().to_i64()?),
-        ty::Int(IntTy::Isize) =>
-            CArg::ISize(v.to_scalar().to_target_isize(cx)?.try_into().unwrap()),
-        // the uints
-        ty::Uint(UintTy::U8) => CArg::UInt8(v.to_scalar().to_u8()?),
-        ty::Uint(UintTy::U16) => CArg::UInt16(v.to_scalar().to_u16()?),
-        ty::Uint(UintTy::U32) => CArg::UInt32(v.to_scalar().to_u32()?),
-        ty::Uint(UintTy::U64) => CArg::UInt64(v.to_scalar().to_u64()?),
-        ty::Uint(UintTy::Usize) =>
-            CArg::USize(v.to_scalar().to_target_usize(cx)?.try_into().unwrap()),
-        ty::RawPtr(..) => {
-            let s = v.to_scalar().to_pointer(cx)?.addr();
-            // This relies on the `expose_provenance` in the `visit_reachable_allocs` callback
-            // above.
-            CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
-        }
-        _ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty),
-    })
-}
diff --git a/src/tools/miri/src/shims/native_lib/trace/child.rs b/src/tools/miri/src/shims/native_lib/trace/child.rs
index b998ba822dde4..95b0617a0261b 100644
--- a/src/tools/miri/src/shims/native_lib/trace/child.rs
+++ b/src/tools/miri/src/shims/native_lib/trace/child.rs
@@ -90,14 +90,6 @@ impl Supervisor {
         // Unwinding might be messed up due to partly protected memory, so let's abort if something
         // breaks inside here.
         let res = std::panic::abort_unwind(|| {
-            // SAFETY: We do not access machine memory past this point until the
-            // supervisor is ready to allow it.
-            // FIXME: this is sketchy, as technically the memory is still in the Rust Abstract Machine,
-            // and the compiler would be allowed to reorder accesses below this block...
-            unsafe {
-                Self::protect_pages(alloc.pages(), mman::ProtFlags::PROT_NONE).unwrap();
-            }
-
             // Send over the info.
             // NB: if we do not wait to receive a blank confirmation response, it is
             // possible that the supervisor is alerted of the SIGSTOP *before* it has
@@ -110,16 +102,14 @@ impl Supervisor {
             // count.
             signal::raise(signal::SIGSTOP).unwrap();
 
-            let res = f();
+            // SAFETY: We have coordinated with the supervisor to ensure that this memory will keep
+            // working as normal, just with extra tracing. So even if the compiler moves memory
+            // accesses down to after the `mprotect`, they won't actually segfault.
+            unsafe {
+                Self::protect_pages(alloc.pages(), mman::ProtFlags::PROT_NONE).unwrap();
+            }
 
-            // We can't use IPC channels here to signal that FFI mode has ended,
-            // since they might allocate memory which could get us stuck in a SIGTRAP
-            // with no easy way out! While this could be worked around, it is much
-            // simpler and more robust to simply use the signals which are left for
-            // arbitrary usage. Since this will block until we are continued by the
-            // supervisor, we can assume past this point that everything is back to
-            // normal.
-            signal::raise(signal::SIGUSR1).unwrap();
+            let res = f();
 
             // SAFETY: We set memory back to normal, so this is safe.
             unsafe {
@@ -130,6 +120,12 @@ impl Supervisor {
                 .unwrap();
             }
 
+            // Signal the supervisor that we are done. Will block until the supervisor continues us.
+            // This will also shut down the segfault handler, so it's important that all memory is
+            // reset back to normal above. There must not be a window in time where accessing the
+            // pages we protected above actually causes the program to abort.
+            signal::raise(signal::SIGUSR1).unwrap();
+
             res
         });
 
diff --git a/src/tools/miri/src/shims/native_lib/trace/parent.rs b/src/tools/miri/src/shims/native_lib/trace/parent.rs
index 83f6c7a13fcfe..3ae98259ab35b 100644
--- a/src/tools/miri/src/shims/native_lib/trace/parent.rs
+++ b/src/tools/miri/src/shims/native_lib/trace/parent.rs
@@ -18,6 +18,11 @@ const ARCH_WORD_SIZE: usize = 4;
 #[cfg(target_arch = "x86_64")]
 const ARCH_WORD_SIZE: usize = 8;
 
+// x86 max instruction length is 15 bytes:
+// https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html
+// See vol. 3B section 24.25.
+const ARCH_MAX_INSTR_SIZE: usize = 15;
+
 /// The address of the page set to be edited, initialised to a sentinel null
 /// pointer.
 static PAGE_ADDR: AtomicPtr<u8> = AtomicPtr::new(std::ptr::null_mut());
@@ -132,10 +137,10 @@ impl Iterator for ChildListener {
                                 return Some(ExecEvent::Syscall(pid));
                             },
                         // Child with the given pid was stopped by the given signal.
-                        // It's somewhat dubious when this is returned instead of
-                        // WaitStatus::Stopped, but for our purposes they are the
-                        // same thing.
-                        wait::WaitStatus::PtraceEvent(pid, signal, _) =>
+                        // It's somewhat unclear when which of these two is returned;
+                        // we just treat them the same.
+                        wait::WaitStatus::Stopped(pid, signal)
+                        | wait::WaitStatus::PtraceEvent(pid, signal, _) =>
                             if self.attached {
                                 // This is our end-of-FFI signal!
                                 if signal == signal::SIGUSR1 {
@@ -148,19 +153,6 @@ impl Iterator for ChildListener {
                                 // Just pass along the signal.
                                 ptrace::cont(pid, signal).unwrap();
                             },
-                        // Child was stopped at the given signal. Same logic as for
-                        // WaitStatus::PtraceEvent.
-                        wait::WaitStatus::Stopped(pid, signal) =>
-                            if self.attached {
-                                if signal == signal::SIGUSR1 {
-                                    self.attached = false;
-                                    return Some(ExecEvent::End);
-                                } else {
-                                    return Some(ExecEvent::Status(pid, signal));
-                                }
-                            } else {
-                                ptrace::cont(pid, signal).unwrap();
-                            },
                         _ => (),
                     },
                 // This case should only trigger when all children died.
@@ -250,7 +242,7 @@ pub fn sv_loop(
                 // We can't trust simply calling `Pid::this()` in the child process to give the right
                 // PID for us, so we get it this way.
                 curr_pid = wait_for_signal(None, signal::SIGSTOP, InitialCont::No).unwrap();
-
+                // Continue until next syscall.
                 ptrace::syscall(curr_pid, None).unwrap();
             }
             // Child wants to end tracing.
@@ -289,8 +281,7 @@ pub fn sv_loop(
                         }
                     }
                 },
-            // Child entered a syscall; we wait for exits inside of this, so it
-            // should never trigger on return from a syscall we care about.
+            // Child entered or exited a syscall. For now we ignore this and just continue.
             ExecEvent::Syscall(pid) => {
                 ptrace::syscall(pid, None).unwrap();
             }
@@ -344,8 +335,8 @@ fn wait_for_signal(
                 return Err(ExecEnd(Some(code)));
             }
             wait::WaitStatus::Signaled(_, _, _) => return Err(ExecEnd(None)),
-            wait::WaitStatus::Stopped(pid, signal) => (signal, pid),
-            wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid),
+            wait::WaitStatus::Stopped(pid, signal)
+            | wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid),
             // This covers PtraceSyscall and variants that are impossible with
             // the flags set (e.g. WaitStatus::StillAlive).
             _ => {
@@ -486,7 +477,27 @@ fn handle_segfault(
     let stack_ptr = ch_stack.strict_add(CALLBACK_STACK_SIZE / 2);
     let regs_bak = ptrace::getregs(pid).unwrap();
     let mut new_regs = regs_bak;
-    let ip_prestep = regs_bak.ip();
+
+    // Read at least one instruction from the ip. It's possible that the instruction
+    // that triggered the segfault was short and at the end of the mapped text area,
+    // so some of these reads may fail; in that case, just write empty bytes. If all
+    // reads failed, the disassembler will report an error.
+    let instr = (0..(ARCH_MAX_INSTR_SIZE.div_ceil(ARCH_WORD_SIZE)))
+        .flat_map(|ofs| {
+            // This reads one word of memory; we divided by `ARCH_WORD_SIZE` above to compensate for that.
+            ptrace::read(
+                pid,
+                std::ptr::without_provenance_mut(
+                    regs_bak.ip().strict_add(ARCH_WORD_SIZE.strict_mul(ofs)),
+                ),
+            )
+            .unwrap_or_default()
+            .to_ne_bytes()
+        })
+        .collect::<Vec<_>>();
+
+    // Now figure out the size + type of access and log it down.
+    capstone_disassemble(&instr, addr, cs, acc_events).expect("Failed to disassemble instruction");
 
     // Move the instr ptr into the deprotection code.
     #[expect(clippy::as_conversions)]
@@ -526,33 +537,8 @@ fn handle_segfault(
         ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap();
     }
 
-    // Save registers and grab the bytes that were executed. This would
-    // be really nasty if it was a jump or similar but those thankfully
-    // won't do memory accesses and so can't trigger this!
     let regs_bak = ptrace::getregs(pid).unwrap();
     new_regs = regs_bak;
-    let ip_poststep = regs_bak.ip();
-
-    // Ensure that we've actually gone forwards.
-    assert!(ip_poststep > ip_prestep);
-    // But not by too much. 64 bytes should be "big enough" on ~any architecture.
-    assert!(ip_prestep.strict_add(64) > ip_poststep);
-
-    // We need to do reads/writes in word-sized chunks.
-    let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE);
-    let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| {
-        // This only needs to be a valid pointer in the child process, not ours.
-        ret.append(
-            &mut ptrace::read(pid, std::ptr::without_provenance_mut(ip))
-                .unwrap()
-                .to_ne_bytes()
-                .to_vec(),
-        );
-        ret
-    });
-
-    // Now figure out the size + type of access and log it down.
-    capstone_disassemble(&instr, addr, cs, acc_events).expect("Failed to disassemble instruction");
 
     // Reprotect everything and continue.
     #[expect(clippy::as_conversions)]
diff --git a/src/tools/miri/src/shims/unix/fd.rs b/src/tools/miri/src/shims/unix/fd.rs
index e226a55d8b1ee..95e26ef5d5d83 100644
--- a/src/tools/miri/src/shims/unix/fd.rs
+++ b/src/tools/miri/src/shims/unix/fd.rs
@@ -264,14 +264,25 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             return this.set_last_error_and_return(LibcError("EBADF"), dest);
         };
 
+        // Handle the zero-sized case. The man page says:
+        // > If count is zero, read() may detect the errors described below.  In the absence of any
+        // > errors, or if read() does not check for errors, a read() with a count of 0 returns zero
+        // > and has no other effects.
+        if count == 0 {
+            this.write_null(dest)?;
+            return interp_ok(());
+        }
         // Non-deterministically decide to further reduce the count, simulating a partial read (but
-        // never to 0, that has different behavior).
-        let count =
-            if fd.nondet_short_accesses() && count >= 2 && this.machine.rng.get_mut().random() {
-                count / 2
-            } else {
-                count
-            };
+        // never to 0, that would indicate EOF).
+        let count = if this.machine.short_fd_operations
+            && fd.short_fd_operations()
+            && count >= 2
+            && this.machine.rng.get_mut().random()
+        {
+            count / 2 // since `count` is at least 2, the result is still at least 1
+        } else {
+            count
+        };
 
         trace!("read: FD mapped to {fd:?}");
         // We want to read at most `count` bytes. We are sure that `count` is not negative
@@ -338,14 +349,29 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             return this.set_last_error_and_return(LibcError("EBADF"), dest);
         };
 
-        // Non-deterministically decide to further reduce the count, simulating a partial write (but
-        // never to 0, that has different behavior).
-        let count =
-            if fd.nondet_short_accesses() && count >= 2 && this.machine.rng.get_mut().random() {
-                count / 2
-            } else {
-                count
-            };
+        // Handle the zero-sized case. The man page says:
+        // > If count is zero and fd refers to a regular file, then write() may return a failure
+        // > status if one of the errors below is detected.  If no errors are detected, or error
+        // > detection is not performed, 0 is returned without causing any other effect.   If  count
+        // > is  zero  and  fd refers to a file other than a regular file, the results are not
+        // > specified.
+        if count == 0 {
+            // For now let's not open the can of worms of what exactly "not specified" could mean...
+            this.write_null(dest)?;
+            return interp_ok(());
+        }
+        // Non-deterministically decide to further reduce the count, simulating a partial write.
+        // We avoid reducing the write size to 0: the docs seem to be entirely fine with that,
+        // but the standard library is not (https://github.com/rust-lang/rust/issues/145959).
+        let count = if this.machine.short_fd_operations
+            && fd.short_fd_operations()
+            && count >= 2
+            && this.machine.rng.get_mut().random()
+        {
+            count / 2
+        } else {
+            count
+        };
 
         let finish = {
             let dest = dest.clone();
diff --git a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs
index 2d35ef064db8b..ee7deb8d38308 100644
--- a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs
+++ b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs
@@ -37,11 +37,6 @@ impl FileDescription for EventFd {
         "event"
     }
 
-    fn nondet_short_accesses(&self) -> bool {
-        // We always read and write exactly one `u64`.
-        false
-    }
-
     fn close<'tcx>(
         self,
         _communicate_allowed: bool,
diff --git a/src/tools/miri/src/shims/unix/unnamed_socket.rs b/src/tools/miri/src/shims/unix/unnamed_socket.rs
index 817ddd7954df4..7eb82851033d8 100644
--- a/src/tools/miri/src/shims/unix/unnamed_socket.rs
+++ b/src/tools/miri/src/shims/unix/unnamed_socket.rs
@@ -123,6 +123,14 @@ impl FileDescription for AnonSocket {
         anonsocket_write(self, ptr, len, ecx, finish)
     }
 
+    fn short_fd_operations(&self) -> bool {
+        // Pipes guarantee that sufficiently small accesses are not broken apart:
+        // <https://pubs.opengroup.org/onlinepubs/9799919799/functions/write.html#tag_17_699_08>.
+        // For now, we don't bother checking for the size, and just entirely disable
+        // short accesses on pipes.
+        matches!(self.fd_type, AnonSocketType::Socketpair)
+    }
+
     fn as_unix<'tcx>(&self, _ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
         self
     }
diff --git a/src/tools/miri/src/shims/windows/handle.rs b/src/tools/miri/src/shims/windows/handle.rs
index 8a965ea316d72..92d6321bed1fd 100644
--- a/src/tools/miri/src/shims/windows/handle.rs
+++ b/src/tools/miri/src/shims/windows/handle.rs
@@ -289,9 +289,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         }
 
         if this.ptr_is_null(target_handle_ptr)? {
-            throw_unsup_format!(
-                "`DuplicateHandle` `lpTargetHandle` parameter is null, which is unsupported"
-            );
+            throw_machine_stop!(TerminationInfo::Abort(
+                "`DuplicateHandle` `lpTargetHandle` parameter must not be null, as otherwise the \
+                newly created handle is leaked"
+                    .to_string()
+            ));
         }
 
         if options != this.eval_windows("c", "DUPLICATE_SAME_ACCESS") {
diff --git a/src/tools/miri/tests/deps/Cargo.lock b/src/tools/miri/tests/deps/Cargo.lock
index 4b783ebdc4eed..65ca4215c6001 100644
--- a/src/tools/miri/tests/deps/Cargo.lock
+++ b/src/tools/miri/tests/deps/Cargo.lock
@@ -296,9 +296,9 @@ dependencies = [
 
 [[package]]
 name = "slab"
-version = "0.4.10"
+version = "0.4.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d"
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
 
 [[package]]
 name = "socket2"
diff --git a/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs b/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs
new file mode 100644
index 0000000000000..7d51050f32b28
--- /dev/null
+++ b/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs
@@ -0,0 +1,9 @@
+//@compile-flags: -Zmiri-tree-borrows
+
+fn main() {
+    // Since the "inside" part is `!Freeze`, the permission to mutate is gone.
+    let pair = ((), 1);
+    let x = &pair.0;
+    let ptr = (&raw const *x).cast::<i32>().cast_mut();
+    unsafe { ptr.write(0) }; //~ERROR: /write access .* forbidden/
+}
diff --git a/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.stderr b/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.stderr
new file mode 100644
index 0000000000000..e9800468c57a5
--- /dev/null
+++ b/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.stderr
@@ -0,0 +1,21 @@
+error: Undefined Behavior: write access through <TAG> at ALLOC[0x0] is forbidden
+  --> tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs:LL:CC
+   |
+LL |     unsafe { ptr.write(0) };
+   |              ^^^^^^^^^^^^ Undefined Behavior occurred here
+   |
+   = help: this indicates a potential bug in the program: it performed an invalid operation, but the Tree Borrows rules it violated are still experimental
+   = help: see https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/tree-borrows.md for further information
+   = help: the accessed tag <TAG> has state Frozen which forbids this child write access
+help: the accessed tag <TAG> was created here, in the initial state Frozen
+  --> tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs:LL:CC
+   |
+LL |     let x = &pair.0;
+   |             ^^^^^^^
+   = note: BACKTRACE (of the first span):
+   = note: inside `main` at tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/native-lib/aggregate_arguments.c b/src/tools/miri/tests/native-lib/aggregate_arguments.c
new file mode 100644
index 0000000000000..8ad687f2aec9f
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/aggregate_arguments.c
@@ -0,0 +1,52 @@
+#include <stdint.h>
+
+// See comments in build_native_lib()
+#define EXPORT __attribute__((visibility("default")))
+
+/* Test: fail/pass_struct_expose_only_range */
+
+typedef struct HasPointer {
+  uint8_t *ptr;
+} HasPointer;
+
+EXPORT uint8_t access_struct_ptr(const HasPointer s) {
+  return *s.ptr;
+}
+
+/* Test: test_pass_struct */
+
+typedef struct PassMe {
+    int32_t value;
+    int64_t other_value;
+} PassMe;
+
+EXPORT int64_t pass_struct(const PassMe pass_me) {
+  return pass_me.value + pass_me.other_value;
+}
+
+/* Test: test_pass_struct_complex */
+
+typedef struct Part1 {
+    uint16_t high;
+    uint16_t low;
+} Part1;
+
+typedef struct Part2 {
+    uint32_t bits;
+} Part2;
+
+typedef struct ComplexStruct {
+    Part1 part_1;
+    Part2 part_2;
+    uint32_t part_3;
+} ComplexStruct;
+
+EXPORT int32_t pass_struct_complex(const ComplexStruct complex, uint16_t high, uint16_t low, uint32_t bits) {
+  if (complex.part_1.high == high && complex.part_1.low == low
+      && complex.part_2.bits == bits
+      && complex.part_3 == bits)
+    return 0;
+  else {
+    return 1;
+  }
+}
diff --git a/src/tools/miri/tests/native-lib/fail/pass_struct_expose_only_range.rs b/src/tools/miri/tests/native-lib/fail/pass_struct_expose_only_range.rs
new file mode 100644
index 0000000000000..a2b43031a2959
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/fail/pass_struct_expose_only_range.rs
@@ -0,0 +1,23 @@
+//@compile-flags: -Zmiri-permissive-provenance
+
+#[repr(C)]
+#[derive(Copy, Clone)]
+struct HasPointer {
+    ptr: *const u8,
+}
+
+extern "C" {
+    fn access_struct_ptr(s: HasPointer) -> u8;
+}
+
+fn main() {
+    let structs = vec![HasPointer { ptr: &0 }, HasPointer { ptr: &1 }];
+    unsafe {
+        let r = access_struct_ptr(structs[1]);
+        assert_eq!(r, 1);
+        // There are two pointers stored in the allocation backing `structs`; ensure
+        // we only exposed the one that was actually passed to C.
+        let _val = *std::ptr::with_exposed_provenance::<u8>(structs[1].ptr.addr()); // fine, ptr got sent to C
+        let _val = *std::ptr::with_exposed_provenance::<u8>(structs[0].ptr.addr()); //~ ERROR: memory access failed
+    };
+}
diff --git a/src/tools/miri/tests/native-lib/fail/pass_struct_expose_only_range.stderr b/src/tools/miri/tests/native-lib/fail/pass_struct_expose_only_range.stderr
new file mode 100644
index 0000000000000..a8f85001c233a
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/fail/pass_struct_expose_only_range.stderr
@@ -0,0 +1,28 @@
+warning: sharing memory with a native function called via FFI
+  --> tests/native-lib/fail/pass_struct_expose_only_range.rs:LL:CC
+   |
+LL |         let r = access_struct_ptr(structs[1]);
+   |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function
+   |
+   = help: when memory is shared with a native function call, Miri stops tracking initialization and provenance for that memory
+   = help: in particular, Miri assumes that the native call initializes all memory it has access to
+   = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
+   = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
+   = note: BACKTRACE:
+   = note: inside `main` at tests/native-lib/fail/pass_struct_expose_only_range.rs:LL:CC
+
+error: Undefined Behavior: memory access failed: attempting to access 1 byte, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
+  --> tests/native-lib/fail/pass_struct_expose_only_range.rs:LL:CC
+   |
+LL |         let _val = *std::ptr::with_exposed_provenance::<u8>(structs[0].ptr.addr());
+   |                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Undefined Behavior occurred here
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE:
+   = note: inside `main` at tests/native-lib/fail/pass_struct_expose_only_range.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error; 1 warning emitted
+
diff --git a/src/tools/miri/tests/native-lib/fail/struct_not_extern_c.rs b/src/tools/miri/tests/native-lib/fail/struct_not_extern_c.rs
new file mode 100644
index 0000000000000..cf8315e0fd9d1
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/fail/struct_not_extern_c.rs
@@ -0,0 +1,19 @@
+// Only works on Unix targets
+//@ignore-target: windows wasm
+//@only-on-host
+
+#![allow(improper_ctypes)]
+
+pub struct PassMe {
+    pub value: i32,
+    pub other_value: i64,
+}
+
+extern "C" {
+    fn pass_struct(s: PassMe) -> i64;
+}
+
+fn main() {
+    let pass_me = PassMe { value: 42, other_value: 1337 };
+    unsafe { pass_struct(pass_me) }; //~ ERROR: unsupported operation: passing a non-#[repr(C)] struct over FFI
+}
diff --git a/src/tools/miri/tests/native-lib/fail/struct_not_extern_c.stderr b/src/tools/miri/tests/native-lib/fail/struct_not_extern_c.stderr
new file mode 100644
index 0000000000000..90e59a31da429
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/fail/struct_not_extern_c.stderr
@@ -0,0 +1,14 @@
+error: unsupported operation: passing a non-#[repr(C)] struct over FFI: PassMe
+  --> tests/native-lib/fail/struct_not_extern_c.rs:LL:CC
+   |
+LL |     unsafe { pass_struct(pass_me) };
+   |              ^^^^^^^^^^^^^^^^^^^^ unsupported operation occurred here
+   |
+   = help: this is likely not a bug in the program; it indicates that the program performed an operation that Miri does not support
+   = note: BACKTRACE:
+   = note: inside `main` at tests/native-lib/fail/struct_not_extern_c.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/native-lib/fail/uninit_struct.rs b/src/tools/miri/tests/native-lib/fail/uninit_struct.rs
new file mode 100644
index 0000000000000..cf61c7f3915a6
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/fail/uninit_struct.rs
@@ -0,0 +1,27 @@
+#[repr(C)]
+#[derive(Copy, Clone)]
+struct ComplexStruct {
+    part_1: Part1,
+    part_2: Part2,
+    part_3: u32,
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+struct Part1 {
+    high: u16,
+    low: u16,
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+struct Part2 {
+    bits: u32,
+}
+
+extern "C" {
+    fn pass_struct_complex(s: ComplexStruct, high: u16, low: u16, bits: u32) -> i32;
+}
+
+fn main() {
+    let arg = std::mem::MaybeUninit::<ComplexStruct>::uninit();
+    unsafe { pass_struct_complex(*arg.as_ptr(), 0, 0, 0) }; //~ ERROR: Undefined Behavior: constructing invalid value
+}
diff --git a/src/tools/miri/tests/native-lib/fail/uninit_struct.stderr b/src/tools/miri/tests/native-lib/fail/uninit_struct.stderr
new file mode 100644
index 0000000000000..0fe6ad9c77bbc
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/fail/uninit_struct.stderr
@@ -0,0 +1,15 @@
+error: Undefined Behavior: constructing invalid value at .part_1.high: encountered uninitialized memory, but expected an integer
+  --> tests/native-lib/fail/uninit_struct.rs:LL:CC
+   |
+LL |     unsafe { pass_struct_complex(*arg.as_ptr(), 0, 0, 0) };
+   |                                  ^^^^^^^^^^^^^ Undefined Behavior occurred here
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE:
+   = note: inside `main` at tests/native-lib/fail/uninit_struct.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/native-lib/pass/aggregate_arguments.rs b/src/tools/miri/tests/native-lib/pass/aggregate_arguments.rs
new file mode 100644
index 0000000000000..55acb240612f8
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/pass/aggregate_arguments.rs
@@ -0,0 +1,52 @@
+fn main() {
+    test_pass_struct();
+    test_pass_struct_complex();
+}
+
+/// Test passing a basic struct as an argument.
+fn test_pass_struct() {
+    // Exactly two fields, so that we hit the ScalarPair case.
+    #[repr(C)]
+    struct PassMe {
+        value: i32,
+        other_value: i64,
+    }
+
+    extern "C" {
+        fn pass_struct(s: PassMe) -> i64;
+    }
+
+    let pass_me = PassMe { value: 42, other_value: 1337 };
+    assert_eq!(unsafe { pass_struct(pass_me) }, 42 + 1337);
+}
+
+/// Test passing a more complex struct as an argument.
+fn test_pass_struct_complex() {
+    #[repr(C)]
+    struct ComplexStruct {
+        part_1: Part1,
+        part_2: Part2,
+        part_3: u32,
+    }
+    #[repr(C)]
+    struct Part1 {
+        high: u16,
+        low: u16,
+    }
+    #[repr(C)]
+    struct Part2 {
+        bits: u32,
+    }
+
+    extern "C" {
+        fn pass_struct_complex(s: ComplexStruct, high: u16, low: u16, bits: u32) -> i32;
+    }
+
+    let high = 0xabcd;
+    let low = 0xef01;
+    let bits = 0xabcdef01;
+
+    let complex =
+        ComplexStruct { part_1: Part1 { high, low }, part_2: Part2 { bits }, part_3: bits };
+    assert_eq!(unsafe { pass_struct_complex(complex, high, low, bits) }, 0);
+}
diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs b/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs
index 4f3c37f00c1d7..bab73f7cf1782 100644
--- a/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs
+++ b/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs
@@ -1,6 +1,7 @@
 //@revisions: trace notrace
 //@[trace] only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
 //@[trace] compile-flags: -Zmiri-native-lib-enable-tracing
+//@compile-flags: -Zmiri-permissive-provenance
 
 fn main() {
     test_access_pointer();
diff --git a/src/tools/miri/tests/pass-dep/shims/windows-fs.rs b/src/tools/miri/tests/pass-dep/shims/windows-fs.rs
index 4ca19046b676b..7b756603d929b 100644
--- a/src/tools/miri/tests/pass-dep/shims/windows-fs.rs
+++ b/src/tools/miri/tests/pass-dep/shims/windows-fs.rs
@@ -2,20 +2,20 @@
 //@compile-flags: -Zmiri-disable-isolation
 #![allow(nonstandard_style)]
 
-use std::io::{ErrorKind, Read, Write};
+use std::io::{ErrorKind, Read, Seek, SeekFrom, Write};
 use std::os::windows::ffi::OsStrExt;
-use std::os::windows::io::AsRawHandle;
+use std::os::windows::io::{AsRawHandle, FromRawHandle};
 use std::path::Path;
-use std::{fs, ptr};
+use std::{fs, mem, ptr};
 
 #[path = "../../utils/mod.rs"]
 mod utils;
 
 use windows_sys::Wdk::Storage::FileSystem::{NtReadFile, NtWriteFile};
 use windows_sys::Win32::Foundation::{
-    CloseHandle, ERROR_ACCESS_DENIED, ERROR_ALREADY_EXISTS, ERROR_IO_DEVICE, GENERIC_READ,
-    GENERIC_WRITE, GetLastError, RtlNtStatusToDosError, STATUS_ACCESS_DENIED,
-    STATUS_IO_DEVICE_ERROR, STATUS_SUCCESS, SetLastError,
+    CloseHandle, DUPLICATE_SAME_ACCESS, DuplicateHandle, ERROR_ACCESS_DENIED, ERROR_ALREADY_EXISTS,
+    ERROR_IO_DEVICE, FALSE, GENERIC_READ, GENERIC_WRITE, GetLastError, RtlNtStatusToDosError,
+    STATUS_ACCESS_DENIED, STATUS_IO_DEVICE_ERROR, STATUS_SUCCESS, SetLastError,
 };
 use windows_sys::Win32::Storage::FileSystem::{
     BY_HANDLE_FILE_INFORMATION, CREATE_ALWAYS, CREATE_NEW, CreateFileW, DeleteFileW,
@@ -24,6 +24,7 @@ use windows_sys::Win32::Storage::FileSystem::{
     FILE_SHARE_WRITE, GetFileInformationByHandle, OPEN_ALWAYS, OPEN_EXISTING, SetFilePointerEx,
 };
 use windows_sys::Win32::System::IO::IO_STATUS_BLOCK;
+use windows_sys::Win32::System::Threading::GetCurrentProcess;
 
 fn main() {
     unsafe {
@@ -36,6 +37,7 @@ fn main() {
         test_ntstatus_to_dos();
         test_file_read_write();
         test_file_seek();
+        test_dup_handle();
     }
 }
 
@@ -273,6 +275,39 @@ unsafe fn test_file_read_write() {
     assert_eq!(GetLastError(), 1234);
 }
 
+unsafe fn test_dup_handle() {
+    let temp = utils::tmp().join("test_dup.txt");
+
+    let mut file1 = fs::File::options().read(true).write(true).create(true).open(&temp).unwrap();
+
+    file1.write_all(b"Hello, World!\n").unwrap();
+    file1.seek(SeekFrom::Start(0)).unwrap();
+
+    let first_handle = file1.as_raw_handle();
+
+    let cur_proc = GetCurrentProcess();
+    let mut second_handle = mem::zeroed();
+    let res = DuplicateHandle(
+        cur_proc,
+        first_handle,
+        cur_proc,
+        &mut second_handle,
+        0,
+        FALSE,
+        DUPLICATE_SAME_ACCESS,
+    );
+    assert!(res != 0);
+
+    let mut buf1 = [0; 5];
+    file1.read(&mut buf1).unwrap();
+    assert_eq!(&buf1, b"Hello");
+
+    let mut file2 = fs::File::from_raw_handle(second_handle);
+    let mut buf2 = [0; 5];
+    file2.read(&mut buf2).unwrap();
+    assert_eq!(&buf2, b", Wor");
+}
+
 unsafe fn test_file_seek() {
     let temp = utils::tmp().join("test_file_seek.txt");
     let mut file = fs::File::options().create(true).write(true).read(true).open(&temp).unwrap();
diff --git a/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs b/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs
index 6a625e597df18..82976326a8df3 100644
--- a/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs
+++ b/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs
@@ -23,7 +23,8 @@ fn main() {
     not_unpin_not_protected();
     write_does_not_invalidate_all_aliases();
     box_into_raw_allows_interior_mutable_alias();
-    cell_inside_struct()
+    cell_inside_struct();
+    zst();
 }
 
 // Make sure that reading from an `&mut` does, like reborrowing to `&`,
@@ -287,3 +288,22 @@ fn cell_inside_struct() {
     // Writing to `field1`, which is reserved, should also be allowed.
     (*a).field1 = 88;
 }
+
+/// ZST reborrows on various kinds of dangling pointers are valid.
+fn zst() {
+    unsafe {
+        // Integer pointer.
+        let ptr = ptr::without_provenance_mut::<()>(15);
+        let _ref = &mut *ptr;
+
+        // Out-of-bounds pointer.
+        let mut b = Box::new(0u8);
+        let ptr = (&raw mut *b).wrapping_add(15) as *mut ();
+        let _ref = &mut *ptr;
+
+        // Deallocated pointer.
+        let ptr = &raw mut *b as *mut ();
+        drop(b);
+        let _ref = &mut *ptr;
+    }
+}
diff --git a/src/tools/miri/tests/pass/concurrency/sync.rs b/src/tools/miri/tests/pass/concurrency/sync.rs
index a92359758dadf..142ac9cc8ca49 100644
--- a/src/tools/miri/tests/pass/concurrency/sync.rs
+++ b/src/tools/miri/tests/pass/concurrency/sync.rs
@@ -9,7 +9,8 @@ use std::time::{Duration, Instant};
 
 // We are expecting to sleep for 10ms. How long of a sleep we are accepting?
 // Even with 1000ms we still see this test fail on macOS runners.
-const MAX_SLEEP_TIME_MS: u64 = 2000;
+// On a aarch64-pc-windows-msvc runner, we saw 2.7s!
+const MAX_SLEEP_TIME_MS: u64 = 4000;
 
 // Check if Rust barriers are working.
 
diff --git a/src/tools/miri/tests/pass/float.rs b/src/tools/miri/tests/pass/float.rs
index fe7316c6680df..2be262d76a46e 100644
--- a/src/tools/miri/tests/pass/float.rs
+++ b/src/tools/miri/tests/pass/float.rs
@@ -39,9 +39,8 @@ macro_rules! assert_approx_eq {
     }};
 
     ($a:expr, $b: expr) => {
-        // accept up to 12ULP (4ULP for host floats and 4ULP for miri artificial error and 4 for any additional effects
-        // due to having multiple error sources.
-        assert_approx_eq!($a, $b, 12);
+        // accept up to 8ULP (4ULP for host floats and 4ULP for miri artificial error).
+        assert_approx_eq!($a, $b, 8);
     };
 }
 
@@ -176,6 +175,7 @@ fn assert_eq_msg<T: PartialEq + Debug>(x: T, y: T, msg: impl Display) {
 }
 
 /// Check that floats have bitwise equality
+#[track_caller]
 fn assert_biteq<F: Float>(a: F, b: F, msg: impl Display) {
     let ab = a.to_bits();
     let bb = b.to_bits();
@@ -189,6 +189,7 @@ fn assert_biteq<F: Float>(a: F, b: F, msg: impl Display) {
 }
 
 /// Check that two floats have equality
+#[track_caller]
 fn assert_feq<F: Float>(a: F, b: F, msg: impl Display) {
     let ab = a.to_bits();
     let bb = b.to_bits();
diff --git a/src/tools/miri/tests/pass/float_extra_rounding_error.rs b/src/tools/miri/tests/pass/float_extra_rounding_error.rs
new file mode 100644
index 0000000000000..24d7cf2cceee2
--- /dev/null
+++ b/src/tools/miri/tests/pass/float_extra_rounding_error.rs
@@ -0,0 +1,31 @@
+//! Check that the flags to control the extra rounding error work.
+//@revisions: random max none
+//@[max]compile-flags: -Zmiri-max-extra-rounding-error
+//@[none]compile-flags: -Zmiri-no-extra-rounding-error
+#![feature(cfg_select)]
+
+use std::collections::HashSet;
+use std::hint::black_box;
+
+fn main() {
+    let expected = cfg_select! {
+        random => 9, // -4 ..= +4 ULP error
+        max => 2,
+        none => 1,
+    };
+    // Call `sin(0.5)` a bunch of times and see how many different values we get.
+    let mut values = HashSet::new();
+    for _ in 0..(expected * 16) {
+        let val = black_box(0.5f64).sin();
+        values.insert(val.to_bits());
+    }
+    assert_eq!(values.len(), expected);
+
+    if !cfg!(none) {
+        // Ensure the smallest and biggest value are 8 ULP apart.
+        // We can just subtract the raw bit representations for this.
+        let min = *values.iter().min().unwrap();
+        let max = *values.iter().max().unwrap();
+        assert_eq!(min.abs_diff(max), 8);
+    }
+}
diff --git a/src/tools/miri/tests/pass/shims/fs.rs b/src/tools/miri/tests/pass/shims/fs.rs
index e7f11c54704d4..022dcc5dcbafa 100644
--- a/src/tools/miri/tests/pass/shims/fs.rs
+++ b/src/tools/miri/tests/pass/shims/fs.rs
@@ -72,7 +72,9 @@ fn test_file() {
 
     // Writing to a file opened for reading should error (and not stop interpretation). std does not
     // categorize the error so we don't check for details.
-    file.write(&[]).unwrap_err();
+    file.write(&[0]).unwrap_err();
+    // However, writing 0 bytes can succeed or fail.
+    let _ignore = file.write(&[]);
 
     // Removing file should succeed.
     remove_file(&path).unwrap();
diff --git a/src/tools/miri/tests/pass/shims/x86/rounding-error.rs b/src/tools/miri/tests/pass/shims/x86/rounding-error.rs
new file mode 100644
index 0000000000000..bf56111b2e496
--- /dev/null
+++ b/src/tools/miri/tests/pass/shims/x86/rounding-error.rs
@@ -0,0 +1,37 @@
+// We're testing x86 target specific features
+//@only-target: x86_64 i686
+
+//! rsqrt and rcp SSE/AVX operations are approximate. We use that as license to treat them as
+//! non-deterministic. Ensure that we do indeed see random results within the expected error bounds.
+
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+use std::collections::HashSet;
+
+fn main() {
+    let mut vals = HashSet::new();
+    for _ in 0..50 {
+        unsafe {
+            // Compute the inverse square root of 4.0, four times.
+            let a = _mm_setr_ps(4.0, 4.0, 4.0, 4.0);
+            let exact = 0.5;
+            let r = _mm_rsqrt_ps(a);
+            let r: [f32; 4] = std::mem::transmute(r);
+            // Check the results.
+            for r in r {
+                vals.insert(r.to_bits());
+                // Ensure the relative error is less than 2^-12.
+                let rel_error = (r - exact) / exact;
+                let log_error = rel_error.abs().log2();
+                assert!(
+                    rel_error == 0.0 || log_error < -12.0,
+                    "got an error of {rel_error} = 2^{log_error}"
+                );
+            }
+        }
+    }
+    // Ensure we saw a bunch of different results.
+    assert!(vals.len() >= 50);
+}
diff --git a/src/tools/miri/tests/pass/tree_borrows/cell-lazy-write-to-surrounding.rs b/src/tools/miri/tests/pass/tree_borrows/cell-lazy-write-to-surrounding.rs
index abe08f2cd2261..7352784ac7a5e 100644
--- a/src/tools/miri/tests/pass/tree_borrows/cell-lazy-write-to-surrounding.rs
+++ b/src/tools/miri/tests/pass/tree_borrows/cell-lazy-write-to-surrounding.rs
@@ -14,9 +14,11 @@ fn main() {
     foo(&arr[0]);
 
     let pair = (Cell::new(1), 1);
-    // TODO: Ideally, this would result in UB since the second element
-    // in `pair` is Frozen.  We would need some way to express a
-    // "shared reference with permission to access surrounding
-    // interior mutable data".
     foo(&pair.0);
+
+    // As long as the "inside" part is `!Freeze`, the permission to mutate the "outside" is preserved.
+    let pair = (Cell::new(()), 1);
+    let x = &pair.0;
+    let ptr = (&raw const *x).cast::<i32>().cast_mut();
+    unsafe { ptr.write(0) };
 }
diff --git a/src/tools/miri/tests/ui.rs b/src/tools/miri/tests/ui.rs
index f021d5194cd93..b7286d9a3673a 100644
--- a/src/tools/miri/tests/ui.rs
+++ b/src/tools/miri/tests/ui.rs
@@ -60,6 +60,7 @@ fn build_native_lib(target: &str) -> PathBuf {
             native_lib_path.to_str().unwrap(),
             // FIXME: Automate gathering of all relevant C source files in the directory.
             "tests/native-lib/scalar_arguments.c",
+            "tests/native-lib/aggregate_arguments.c",
             "tests/native-lib/ptr_read_access.c",
             "tests/native-lib/ptr_write_access.c",
             // Ensure we notice serious problems in the C code.
diff --git a/src/tools/miri/tests/utils/libc.rs b/src/tools/miri/tests/utils/libc.rs
index 1a3cd067c04bf..4757a5a268c65 100644
--- a/src/tools/miri/tests/utils/libc.rs
+++ b/src/tools/miri/tests/utils/libc.rs
@@ -34,10 +34,7 @@ pub unsafe fn write_all(
         if res < 0 {
             return res;
         }
-        if res == 0 {
-            // EOF?
-            break;
-        }
+        // Apparently a return value of 0 is just a short write, nothing special (unlike reads).
         written_so_far += res as libc::size_t;
     }
     return written_so_far as libc::ssize_t;
diff --git a/tests/run-make/multiline-args-value/cfg.stderr b/tests/run-make/multiline-args-value/cfg.stderr
new file mode 100644
index 0000000000000..9b06f84be4884
--- /dev/null
+++ b/tests/run-make/multiline-args-value/cfg.stderr
@@ -0,0 +1,4 @@
+error: invalid `--cfg` argument: `---
+       ---
+       key` (expected `key` or `key="value"`)
+
diff --git a/tests/run-make/multiline-args-value/check-cfg.stderr b/tests/run-make/multiline-args-value/check-cfg.stderr
new file mode 100644
index 0000000000000..4a499cc0a1e24
--- /dev/null
+++ b/tests/run-make/multiline-args-value/check-cfg.stderr
@@ -0,0 +1,7 @@
+error: invalid `--check-cfg` argument: `---
+       ---
+       cfg(key)`
+  |
+  = note: expected `cfg(name, values("value1", "value2", ... "valueN"))`
+  = note: visit <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more details
+
diff --git a/tests/run-make/multiline-args-value/rmake.rs b/tests/run-make/multiline-args-value/rmake.rs
new file mode 100644
index 0000000000000..825cbd158c0a3
--- /dev/null
+++ b/tests/run-make/multiline-args-value/rmake.rs
@@ -0,0 +1,34 @@
+use run_make_support::{cwd, diff, rustc};
+
+fn test_and_compare(flag: &str, val: &str) {
+    let mut cmd = rustc();
+
+    let output =
+        cmd.input("").arg("--crate-type=lib").arg(&format!("--{flag}")).arg(val).run_fail();
+
+    assert_eq!(output.stdout_utf8(), "");
+    diff()
+        .expected_file(format!("{flag}.stderr"))
+        .actual_text("output", output.stderr_utf8())
+        .run();
+}
+
+fn main() {
+    // Verify that frontmatter isn't allowed in `--cfg` arguments.
+    // https://github.com/rust-lang/rust/issues/146130
+    test_and_compare(
+        "cfg",
+        r#"---
+---
+key"#,
+    );
+
+    // Verify that frontmatter isn't allowed in `--check-cfg` arguments.
+    // https://github.com/rust-lang/rust/issues/146130
+    test_and_compare(
+        "check-cfg",
+        r#"---
+---
+cfg(key)"#,
+    );
+}
diff --git a/tests/ui/drop/if-let-super-let.rs b/tests/ui/drop/if-let-super-let.rs
new file mode 100644
index 0000000000000..c6543e6d3dc97
--- /dev/null
+++ b/tests/ui/drop/if-let-super-let.rs
@@ -0,0 +1,112 @@
+//! Test for #145328: ensure the lifetime of a `super let` binding within an `if let` scrutinee is
+//! at most the scope of the `if` condition's temporaries. Additionally, test `pin!` since it's
+//! implemented in terms of `super let` and exposes this behavior.
+//@ run-pass
+//@ revisions: e2021 e2024
+//@ [e2021] edition: 2021
+//@ [e2024] edition: 2024
+
+#![feature(if_let_guard)]
+#![feature(super_let)]
+#![expect(irrefutable_let_patterns)]
+
+use std::cell::RefCell;
+use std::pin::pin;
+
+fn main() {
+    // The `super let` bindings here should have the same scope as `if let` temporaries.
+    // In Rust 2021, this means it lives past the end of the `if` expression.
+    // In Rust 2024, this means it lives to the end of the `if`'s success block.
+    assert_drop_order(0..=2, |o| {
+        #[cfg(e2021)]
+        (
+            if let _ = { super let _x = o.log(2); } { o.push(0) },
+            o.push(1),
+        );
+        #[cfg(e2024)]
+        (
+            if let _ = { super let _x = o.log(1); } { o.push(0) },
+            o.push(2),
+        );
+    });
+    assert_drop_order(0..=2, |o| {
+        #[cfg(e2021)]
+        (
+            if let true = { super let _x = o.log(2); false } {} else { o.push(0) },
+            o.push(1),
+        );
+        #[cfg(e2024)]
+        (
+            if let true = { super let _x = o.log(0); false } {} else { o.push(1) },
+            o.push(2),
+        );
+    });
+
+    // `pin!` should behave likewise.
+    assert_drop_order(0..=2, |o| {
+        #[cfg(e2021)] (if let _ = pin!(o.log(2)) { o.push(0) }, o.push(1));
+        #[cfg(e2024)] (if let _ = pin!(o.log(1)) { o.push(0) }, o.push(2));
+    });
+    assert_drop_order(0..=2, |o| {
+        #[cfg(e2021)]
+        (
+            if let None = Some(pin!(o.log(2))) {} else { o.push(0) },
+            o.push(1),
+        );
+        #[cfg(e2024)]
+        (
+            if let None = Some(pin!(o.log(0))) {} else { o.push(1) },
+            o.push(2),
+        );
+    });
+
+    // `super let` bindings' scope should also be consistent with `if let` temporaries in guards.
+    // Here, that means the `super let` binding in the second guard condition operand should be
+    // dropped before the first operand's temporary. This is consistent across Editions.
+    assert_drop_order(0..=1, |o| {
+        match () {
+            _ if let _ = o.log(1)
+                && let _ = { super let _x = o.log(0); } => {}
+            _ => unreachable!(),
+        }
+    });
+    assert_drop_order(0..=1, |o| {
+        match () {
+            _ if let _ = o.log(1)
+                && let _ = pin!(o.log(0)) => {}
+            _ => unreachable!(),
+        }
+    });
+}
+
+// # Test scaffolding...
+
+struct DropOrder(RefCell<Vec<u64>>);
+struct LogDrop<'o>(&'o DropOrder, u64);
+
+impl DropOrder {
+    fn log(&self, n: u64) -> LogDrop<'_> {
+        LogDrop(self, n)
+    }
+    fn push(&self, n: u64) {
+        self.0.borrow_mut().push(n);
+    }
+}
+
+impl<'o> Drop for LogDrop<'o> {
+    fn drop(&mut self) {
+        self.0.push(self.1);
+    }
+}
+
+#[track_caller]
+fn assert_drop_order(
+    ex: impl IntoIterator<Item = u64>,
+    f: impl Fn(&DropOrder),
+) {
+    let order = DropOrder(RefCell::new(Vec::new()));
+    f(&order);
+    let order = order.0.into_inner();
+    let expected: Vec<u64> = ex.into_iter().collect();
+    assert_eq!(order, expected);
+}
diff --git a/tests/ui/linking/mixed-allocator-shim.rs b/tests/ui/linking/mixed-allocator-shim.rs
new file mode 100644
index 0000000000000..e4f20a11ebb37
--- /dev/null
+++ b/tests/ui/linking/mixed-allocator-shim.rs
@@ -0,0 +1,16 @@
+//@ build-pass
+//@ compile-flags: --crate-type staticlib,dylib -Zstaticlib-prefer-dynamic
+//@ no-prefer-dynamic
+//@ needs-crate-type: dylib
+
+// Test that compiling for multiple crate types in a single compilation with
+// mismatching allocator shim requirements doesn't result in the allocator shim
+// missing entirely.
+// In this particular test the dylib crate type will statically link libstd and
+// thus need an allocator shim, while the staticlib crate type will dynamically
+// link libstd and thus not need an allocator shim.
+// The -Zstaticlib-prefer-dynamic flag could be avoided by doing it the other
+// way around, but testing that the staticlib correctly has the allocator shim
+// in that case would require a run-make test instead.
+
+pub fn foo() {}
diff --git a/tests/ui/sanitizer/cfi/no_builtins.rs b/tests/ui/sanitizer/cfi/no_builtins.rs
deleted file mode 100644
index 949057689aba1..0000000000000
--- a/tests/ui/sanitizer/cfi/no_builtins.rs
+++ /dev/null
@@ -1,22 +0,0 @@
-// Verifies that `#![no_builtins]` crates can be built with linker-plugin-lto and CFI.
-// See Issue #142284
-//
-//@ needs-sanitizer-cfi
-//@ compile-flags: -Clinker-plugin-lto -Copt-level=0 -Zsanitizer=cfi -Ctarget-feature=-crt-static
-//@ compile-flags: --crate-type rlib
-//@ build-pass
-
-#![no_builtins]
-#![no_std]
-
-pub static FUNC: fn() = initializer;
-
-pub fn initializer() {
-    call(fma_with_fma);
-}
-
-pub fn call(fn_ptr: fn()) {
-    fn_ptr();
-}
-
-pub fn fma_with_fma() {}