From 11b6f3b0e270112fd75d2751b0d1df23eb1b0a5e Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Sun, 1 Jun 2025 01:25:46 +0100 Subject: [PATCH 1/6] .cargo/root, CARGO_ROOT, --root limit config + workspace searches If a root directory is specified then cargo will only search ancestors looking for `.cargo` config files and workspaces up until the root directory is reached (instead of walking to the root of the filesystem). A root can be specified in three ways: 1. The existence of a `.cargo/root` file (discovered by checking parents up towards the root of the filesystem) 2. By setting the `CARGO_ROOT` environment variable 3. By passing `--root` on the command line If more than one is specified then the effective root is the one that's most-specific / closest to the current working directory. ### What does this PR try to resolve? Fixes: #5418 (for my use case then #6805 isn't a practical workaround) This goes some way to allow nesting of workspaces, by adding a way to limit the directories that Cargo searches when looking for config files and workspace manifests. This does not enable nesting in the form of workspace inheritance - but does provide the means to technically have a filesystem with nested workspaces (that aren't aware of each other) and be able to hide any outer (unrelated) workspace while building a nested workspace. This gives more flexibility for tools that want to use cargo as an implementation detail. In particular this allows you to sandbox the build of nested third-party workspaces that may be (unknowingly) dynamically unpacked within an outer workspace, in situations where neither the workspace being built and the outer workspace are owned by the tool that is managing the build. For example a tool based on rustdoc-json should be able to fetch and build documentation for third-party crates under any user-specified build/target directory without having to worry about spooky action at a distance due to config files and workspaces in ancestor directories. In my case, I have a runtime for coding with LLMs that is given a repo to work on and is expected to keep its artifacts contained to a `.ai/` directory. This runtime supports building markdown documentation for Rust crates, which involves using cargo to generate rustdoc-json data. That tool is expected to keep its artifacts contained within `.ai/docs/rust/build/`. It's possible that the project itself is Rust based and could define a workspace or `.cargo/config.toml` but from the pov of this toolchain those have nothing to do with the crate whose documentation is being generated (which are packages downloaded from crates.io). ### How to test and review this PR? TODO: write tests --- src/bin/cargo/cli.rs | 77 +++++++++++++++++++ src/cargo/core/workspace.rs | 4 +- src/cargo/util/context/mod.rs | 5 ++ src/doc/src/reference/config.md | 13 ++++ .../src/reference/environment-variables.md | 4 + 5 files changed, 101 insertions(+), 2 deletions(-) diff --git a/src/bin/cargo/cli.rs b/src/bin/cargo/cli.rs index 188fafdc9e5..32e38fba032 100644 --- a/src/bin/cargo/cli.rs +++ b/src/bin/cargo/cli.rs @@ -2,6 +2,7 @@ use anyhow::{anyhow, Context as _}; use cargo::core::{features, CliUnstable}; use cargo::util::context::TermConfig; use cargo::{drop_print, drop_println, CargoResult}; +use cargo_util::paths; use clap::builder::UnknownArgumentValueParser; use itertools::Itertools; use std::collections::HashMap; @@ -17,6 +18,34 @@ use crate::util::is_rustup; use cargo::core::shell::ColorChoice; use cargo::util::style; +fn closest_valid_root<'a>( + cwd: &std::path::Path, + config_root: Option<&'a std::path::Path>, + env_root: Option<&'a std::path::Path>, + cli_root: Option<&'a std::path::Path>, +) -> anyhow::Result> { + for (name, root) in [ + (".cargo/root", config_root), + ("CARGO_ROOT", env_root), + ("--root", cli_root), + ] { + if let Some(root) = root { + if !cwd.starts_with(root) { + return Err(anyhow::format_err!( + "the {} `{}` is not a parent of the current working directory `{}`", + name, + root.display(), + cwd.display() + )); + } + } + } + Ok([config_root, env_root, cli_root] + .into_iter() + .flatten() + .max_by_key(|root| root.components().count())) +} + #[tracing::instrument(skip_all)] pub fn main(gctx: &mut GlobalContext) -> CliResult { // CAUTION: Be careful with using `config` until it is configured below. @@ -25,6 +54,7 @@ pub fn main(gctx: &mut GlobalContext) -> CliResult { let args = cli(gctx).try_get_matches()?; + let mut need_reload = false; // Update the process-level notion of cwd if let Some(new_cwd) = args.get_one::("directory") { // This is a temporary hack. @@ -46,6 +76,45 @@ pub fn main(gctx: &mut GlobalContext) -> CliResult { .into()); } std::env::set_current_dir(&new_cwd).context("could not change to requested directory")?; + need_reload = true; + } + + // A root directory can be specified via CARGO_ROOT, --root or the existence of a `.cargo/root` file. + // If more than one is specified, the effective root is the one closest to the current working directory. + + let cwd = std::env::current_dir().context("could not get current working directory")?; + // Windows UNC paths are OK here + let cwd = cwd + .canonicalize() + .context("could not canonicalize current working directory")?; + let config_root = paths::ancestors(&cwd, gctx.search_stop_path()) + .find(|current| current.join(".cargo").join("root").exists()); + let env_root = gctx + .get_env_os("CARGO_ROOT") + .map(std::path::PathBuf::from) + .map(|p| { + p.canonicalize() + .context("could not canonicalize CARGO_ROOT") + }) + .transpose()?; + let env_root = env_root.as_deref(); + + let cli_root = args + .get_one::("root") + .map(|p| { + p.canonicalize() + .context("could not canonicalize requested root directory") + }) + .transpose()?; + let cli_root = cli_root.as_deref(); + + if let Some(root) = closest_valid_root(&cwd, config_root, env_root, cli_root)? { + tracing::debug!("root directory: {}", root.display()); + gctx.set_search_stop_path(root); + need_reload = true; + } + + if need_reload { gctx.reload_cwd()?; } @@ -640,6 +709,14 @@ See 'cargo help <>' for more information on a sp .value_parser(["auto", "always", "never"]) .ignore_case(true), ) + .arg( + Arg::new("root") + .help("Define a root that limits searching for workspaces and .cargo/ directories") + .long("root") + .value_name("ROOT") + .value_hint(clap::ValueHint::DirPath) + .value_parser(clap::builder::ValueParser::path_buf()), + ) .arg( Arg::new("directory") .help("Change to DIRECTORY before doing anything (nightly-only)") diff --git a/src/cargo/core/workspace.rs b/src/cargo/core/workspace.rs index 57e50ebefee..642d49138a2 100644 --- a/src/cargo/core/workspace.rs +++ b/src/cargo/core/workspace.rs @@ -2028,7 +2028,7 @@ fn find_workspace_root_with_loader( let roots = gctx.ws_roots.borrow(); // Iterate through the manifests parent directories until we find a workspace // root. Note we skip the first item since that is just the path itself - for current in manifest_path.ancestors().skip(1) { + for current in paths::ancestors(manifest_path, gctx.search_stop_path()).skip(1) { if let Some(ws_config) = roots.get(current) { if !ws_config.is_excluded(manifest_path) { // Add `Cargo.toml` since ws_root is the root and not the file @@ -2061,7 +2061,7 @@ fn find_root_iter<'a>( manifest_path: &'a Path, gctx: &'a GlobalContext, ) -> impl Iterator + 'a { - LookBehind::new(paths::ancestors(manifest_path, None).skip(2)) + LookBehind::new(paths::ancestors(manifest_path, gctx.search_stop_path()).skip(2)) .take_while(|path| !path.curr.ends_with("target/package")) // Don't walk across `CARGO_HOME` when we're looking for the // workspace root. Sometimes a package will be organized with diff --git a/src/cargo/util/context/mod.rs b/src/cargo/util/context/mod.rs index 93f5e7d6106..86d72696a45 100644 --- a/src/cargo/util/context/mod.rs +++ b/src/cargo/util/context/mod.rs @@ -567,6 +567,11 @@ impl GlobalContext { } } + /// Gets the path where ancestor config file and workspace searching will stop. + pub fn search_stop_path(&self) -> Option<&Path> { + self.search_stop_path.as_deref() + } + /// Sets the path where ancestor config file searching will stop. The /// given path is included, but its ancestors are not. pub fn set_search_stop_path>(&mut self, path: P) { diff --git a/src/doc/src/reference/config.md b/src/doc/src/reference/config.md index 105de8f502c..77bc1c8cf10 100644 --- a/src/doc/src/reference/config.md +++ b/src/doc/src/reference/config.md @@ -44,6 +44,19 @@ those configuration files if it is invoked from the workspace root > and is the preferred form. If both files exist, Cargo will use the file > without the extension. +The root of the search hierarchy can be constrained in three ways: + +1. By creating a `.cargo/root` file (empty) +2. By setting the `CARGO_ROOT` environment variable +3. Passing `--root`. + +If a root directory is given then Cargo will search parent directories up until +it reaches the root directory, instead of searching all the way up to the root +of the filesystem. Cargo will still check `$CARGO_HOME/config.toml` even if it +is outside of the root directory. If multiple paths are specified then the +effective root is the one that's most-specific (closest to the current working +directory). + ## Configuration format Configuration files are written in the [TOML format][toml] (like the diff --git a/src/doc/src/reference/environment-variables.md b/src/doc/src/reference/environment-variables.md index ae29e6f38e4..76d0225419c 100644 --- a/src/doc/src/reference/environment-variables.md +++ b/src/doc/src/reference/environment-variables.md @@ -20,6 +20,10 @@ system: location of this directory. Once a crate is cached it is not removed by the clean command. For more details refer to the [guide](../guide/cargo-home.md). +* `CARGO_ROOT` --- Instead of letting Cargo search every ancestor directory, up + to the root of the filesystem, looking for `.cargo` config directories or + workspace manifests, this limits how far Cargo can search. It doesn't stop + Cargo from reading `$CARGO_HOME/config.toml`, even if it's outside the root. * `CARGO_TARGET_DIR` --- Location of where to place all generated artifacts, relative to the current working directory. See [`build.target-dir`] to set via config. From 9ad19bdcda4ebbd583fc9597d5fc30d4ee3030b6 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Wed, 4 Jun 2025 21:25:49 +0100 Subject: [PATCH 2/6] Allow a list of roots in CARGO_ROOTS and repeat --root args This generalizes `CARGO_ROOTS` so it can accept a list of directories, similar to `GIT_CEILING_DIRECTORIES`. This way a default configuration could come from a user's `.bashrc` and could be extended later by specific build tools. Similarly this also makes it so `--root` can be passed repeatedly. --- src/bin/cargo/cli.rs | 125 +++++++++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 51 deletions(-) diff --git a/src/bin/cargo/cli.rs b/src/bin/cargo/cli.rs index 32e38fba032..021184512ca 100644 --- a/src/bin/cargo/cli.rs +++ b/src/bin/cargo/cli.rs @@ -20,30 +20,23 @@ use cargo::util::style; fn closest_valid_root<'a>( cwd: &std::path::Path, - config_root: Option<&'a std::path::Path>, - env_root: Option<&'a std::path::Path>, - cli_root: Option<&'a std::path::Path>, + roots: &[&'a std::path::Path], ) -> anyhow::Result> { - for (name, root) in [ - (".cargo/root", config_root), - ("CARGO_ROOT", env_root), - ("--root", cli_root), - ] { - if let Some(root) = root { - if !cwd.starts_with(root) { - return Err(anyhow::format_err!( - "the {} `{}` is not a parent of the current working directory `{}`", - name, - root.display(), - cwd.display() - )); - } - } - } - Ok([config_root, env_root, cli_root] - .into_iter() - .flatten() - .max_by_key(|root| root.components().count())) + let cwd = cwd + .canonicalize() + .context("could not canonicalize current working directory")?; + + // Assumes that all roots are canonicalized. + let ancestor_roots = + roots + .iter() + .filter_map(|r| if cwd.starts_with(r) { Some(*r) } else { None }); + + Ok(ancestor_roots.into_iter().max_by_key(|root| { + // Prefer the root that is closest to the current working directory. + // This is done by counting the number of components in the path. + root.components().count() + })) } #[tracing::instrument(skip_all)] @@ -54,7 +47,6 @@ pub fn main(gctx: &mut GlobalContext) -> CliResult { let args = cli(gctx).try_get_matches()?; - let mut need_reload = false; // Update the process-level notion of cwd if let Some(new_cwd) = args.get_one::("directory") { // This is a temporary hack. @@ -76,47 +68,77 @@ pub fn main(gctx: &mut GlobalContext) -> CliResult { .into()); } std::env::set_current_dir(&new_cwd).context("could not change to requested directory")?; - need_reload = true; } - // A root directory can be specified via CARGO_ROOT, --root or the existence of a `.cargo/root` file. - // If more than one is specified, the effective root is the one closest to the current working directory. + // A root directories can be specified via CARGO_ROOTS, --root or the existence of a `.cargo/root` files. + // If more than one root is specified, the effective root is the one closest to the current working directory. + // If CARGO_ROOTS is not set, the user's home directory is used as a default root. let cwd = std::env::current_dir().context("could not get current working directory")?; // Windows UNC paths are OK here let cwd = cwd .canonicalize() .context("could not canonicalize current working directory")?; - let config_root = paths::ancestors(&cwd, gctx.search_stop_path()) + + // XXX: before looking for `.cargo/root` should we first try and resolve roots + // from `CARGO_ROOTS` + --root so we can avoid triggering automounter issues? + let root_marker = paths::ancestors(&cwd, gctx.search_stop_path()) .find(|current| current.join(".cargo").join("root").exists()); - let env_root = gctx - .get_env_os("CARGO_ROOT") - .map(std::path::PathBuf::from) - .map(|p| { - p.canonicalize() - .context("could not canonicalize CARGO_ROOT") - }) - .transpose()?; - let env_root = env_root.as_deref(); - - let cli_root = args - .get_one::("root") - .map(|p| { - p.canonicalize() - .context("could not canonicalize requested root directory") - }) - .transpose()?; - let cli_root = cli_root.as_deref(); - if let Some(root) = closest_valid_root(&cwd, config_root, env_root, cli_root)? { + let mut roots: Vec = Vec::new(); + + if let Some(root_marker) = root_marker { + let pb = root_marker + .canonicalize() + .context("could not canonicalize .cargo/root")?; + roots.push(pb); + } + + if let Some(paths_os) = gctx.get_env_os("CARGO_ROOTS") { + for path in std::env::split_paths(&paths_os) { + let pb = path.canonicalize().context(format!( + "could not canonicalize CARGO_ROOTS entry `{}`", + path.display() + ))?; + roots.push(pb); + } + } else if let Some(home) = std::env::home_dir() { + // To be safe by default, and not attempt to read config files outside of the + // user's home directory, we implicitly add the home directory as a root. + // Ref: https://github.com/rust-lang/rfcs/pull/3279 + let home = home + .canonicalize() + .context("could not canonicalize home directory")?; + tracing::debug!( + "implicitly adding home directory as root: {}", + home.display() + ); + roots.push(home); + } + + if let Some(cli_roots) = args.get_many::("root") { + for cli_root in cli_roots { + let pb = cli_root + .canonicalize() + .context("could not canonicalize requested root directory")?; + roots.push(pb); + } + } + + let roots: Vec<_> = roots.iter().map(|p| p.as_path()).collect(); + + if let Some(root) = closest_valid_root(&cwd, &roots)? { tracing::debug!("root directory: {}", root.display()); gctx.set_search_stop_path(root); - need_reload = true; + } else { + tracing::debug!("root limited to cwd: {}", cwd.display()); + // If we are not running with _any_ root then we are conservative and don't + // allow any ancestor traversal. + gctx.set_search_stop_path(&cwd); } - if need_reload { - gctx.reload_cwd()?; - } + // Reload now that we have established the cwd and root + gctx.reload_cwd()?; let (expanded_args, global_args) = expand_aliases(gctx, args, vec![])?; @@ -714,6 +736,7 @@ See 'cargo help <>' for more information on a sp .help("Define a root that limits searching for workspaces and .cargo/ directories") .long("root") .value_name("ROOT") + .action(ArgAction::Append) .value_hint(clap::ValueHint::DirPath) .value_parser(clap::builder::ValueParser::path_buf()), ) From 0416ba716fa1283710967ee48ef5b25c3b642671 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Thu, 5 Jun 2025 15:42:39 +0100 Subject: [PATCH 3/6] Resolve separate roots for config and manifest searches It's no longer assumed that we can resolve a single stop_at_root path for both finding config files and manifests. GlobalContext now maintains a sorted list of user-configured roots and caches two resolved SearchRoutes; one for loading config files, the other for manifests (both automatically invalidated if roots are added or start paths change). A `SearchRoute` represents a starting directory and the closest root directory, where parent traversal will stop. If searching starts from outside of all configured roots then no ancestor traversal is allowed. - This is a safety measure to reduce the risk of loading unsafe config files that aren't owned by the user (e.g. under `c:/.cargo` on Windows or `/tmp` on Linux) By default the user's home directory is added as a root if CARGO_ROOTS is not set. - Combined with the rule above, this means that Cargo has safe defaults and will not load configs outside of your home directory, unless you explicitly set CARGO_ROOTS. - This is also a measure to avoid triggering home directory automounters TODO: there still needs to be a special case for loading a project manifest, before searching for a workspace. Cargo should be allowed to walk ancestors outside of all configured roots, when loading the first Cargo.toml. In this case the directory of the manifest should immediately be set as a root. This would be a safety / ergonomics trade-off to allow building of crates under `/tmp/package/nested/dir` but still disallow attempts to load `/tmp/Cargo.toml` as a workspace. TODO: Cargo should log a warning when searching for files outside of any root, so it's easy to see why ancestor config files or manifests are not being loaded. --- .../benches/global_cache_tracker.rs | 2 +- crates/cargo-util/src/paths.rs | 13 +- src/bin/cargo/cli.rs | 168 ++++++++-------- src/cargo/core/workspace.rs | 13 +- src/cargo/ops/cargo_new.rs | 2 +- src/cargo/ops/registry/owner.rs | 2 +- src/cargo/ops/registry/yank.rs | 2 +- src/cargo/util/command_prelude.rs | 13 +- src/cargo/util/context/mod.rs | 185 ++++++++++++++++-- src/cargo/util/important_paths.rs | 7 +- tests/testsuite/config.rs | 2 +- 11 files changed, 281 insertions(+), 128 deletions(-) diff --git a/benches/benchsuite/benches/global_cache_tracker.rs b/benches/benchsuite/benches/global_cache_tracker.rs index f879b07eb41..7da1accb96c 100644 --- a/benches/benchsuite/benches/global_cache_tracker.rs +++ b/benches/benchsuite/benches/global_cache_tracker.rs @@ -35,7 +35,7 @@ fn initialize_context() -> GlobalContext { let cwd = homedir.clone(); let mut gctx = GlobalContext::new(shell, cwd, homedir); gctx.nightly_features_allowed = true; - gctx.set_search_stop_path(root()); + gctx.set_config_search_root(root()); gctx.configure( 0, false, diff --git a/crates/cargo-util/src/paths.rs b/crates/cargo-util/src/paths.rs index dd7086180b0..525efa3171c 100644 --- a/crates/cargo-util/src/paths.rs +++ b/crates/cargo-util/src/paths.rs @@ -441,14 +441,14 @@ pub struct PathAncestors<'a> { impl<'a> PathAncestors<'a> { fn new(path: &'a Path, stop_root_at: Option<&Path>) -> PathAncestors<'a> { - let stop_at = env::var("__CARGO_TEST_ROOT") - .ok() - .map(PathBuf::from) - .or_else(|| stop_root_at.map(|p| p.to_path_buf())); + tracing::trace!( + "creating path ancestors iterator from `{}` to `{}`", + path.display(), + stop_root_at.map_or("".to_string(), |p| p.display().to_string()) + ); PathAncestors { current: Some(path), - //HACK: avoid reading `~/.cargo/config` when testing Cargo itself. - stop_at, + stop_at: stop_root_at.map(|p| p.to_path_buf()), } } } @@ -466,6 +466,7 @@ impl<'a> Iterator for PathAncestors<'a> { } } + tracing::trace!("next path ancestor: `{}`", path.display()); Some(path) } else { None diff --git a/src/bin/cargo/cli.rs b/src/bin/cargo/cli.rs index 021184512ca..e3062adbe85 100644 --- a/src/bin/cargo/cli.rs +++ b/src/bin/cargo/cli.rs @@ -18,35 +18,94 @@ use crate::util::is_rustup; use cargo::core::shell::ColorChoice; use cargo::util::style; -fn closest_valid_root<'a>( - cwd: &std::path::Path, - roots: &[&'a std::path::Path], -) -> anyhow::Result> { - let cwd = cwd - .canonicalize() - .context("could not canonicalize current working directory")?; - - // Assumes that all roots are canonicalized. - let ancestor_roots = - roots - .iter() - .filter_map(|r| if cwd.starts_with(r) { Some(*r) } else { None }); - - Ok(ancestor_roots.into_iter().max_by_key(|root| { - // Prefer the root that is closest to the current working directory. - // This is done by counting the number of components in the path. - root.components().count() - })) -} - #[tracing::instrument(skip_all)] pub fn main(gctx: &mut GlobalContext) -> CliResult { // CAUTION: Be careful with using `config` until it is configured below. // In general, try to avoid loading config values unless necessary (like // the [alias] table). + // Register root directories. + // + // A root directory limits how far Cargo can search for files. + // + // Internally there are two notable roots we need to resolve: + // 1. The root when searching for config files (starting directory = cwd). + // 2. The root when searching for manifests (starting directory = manifest-path or cwd directory) + // + // Root directories can be specified via CARGO_ROOTS, --root or the existence of `.cargo/root` files. + // + // If CARGO_ROOTS is not set, the user's home directory is used as a default root. + // - This is a safety measure to avoid reading unsafe config files outside of the user's home + // directory. + // - This is also a measure to avoid triggering home directory automounter issues on some + // systems. + // + // The roots are deduplicated and sorted by their length so we can quickly find the closest root + // to a given starting directory (longest ancestor). + // + // A `SearchRoute` represents a route from a starting directory to the closest root directory. + // + // When there are no roots above a given starting directory, then a `SearchRoute` will use the + // starting directory itself is used as the root. + // - This is a safety measure to avoid reading unsafe config files in unknown locations (such as + // `/tmp`). + // + // There are two cached `SearchRoute`s, one for config files and one for workspace manifests, + // which are used to avoid repeatedly finding the nearest root directory. + + // Should it be an error if a given root doesn't exist? + // A user might configure a root under a `/mnt` directory that is not always mounted? + + let roots_env = gctx.get_env_os("CARGO_ROOTS").map(|s| s.to_owned()); + if let Some(paths_os) = roots_env { + for path in std::env::split_paths(&paths_os) { + gctx.add_root(&path)?; + } + } else { + //HACK: avoid reading `~/.cargo/config` when testing Cargo itself. + let test_root = gctx.get_env_os("__CARGO_TEST_ROOT").map(|s| s.to_owned()); + if let Some(test_root) = test_root { + tracing::debug!( + "no CARGO_ROOTS set, using __CARGO_TEST_ROOT as root: {}", + test_root.display() + ); + // This is a hack to avoid reading `~/.cargo/config` when testing Cargo itself. + gctx.add_root(&test_root)?; + } else if let Some(home) = std::env::home_dir() { + tracing::debug!( + "no CARGO_ROOTS set, using home directory as root: {}", + home.display() + ); + // To be safe by default, and not attempt to read config files outside of the + // user's home directory, we implicitly add the home directory as a root. + // Ref: https://github.com/rust-lang/rfcs/pull/3279 + // + // This is also a measure to avoid triggering home directory automounter issues + gctx.add_root(&home)?; + } + } + let args = cli(gctx).try_get_matches()?; + if let Some(cli_roots) = args.get_many::("root") { + for cli_root in cli_roots { + gctx.add_root(cli_root)?; + } + } + + // Look for any `.cargo/root` markers after we have registered all other roots so + // that other roots can stop us from triggering automounter issues. + let search_route = gctx.find_config_search_route(gctx.cwd()); + + let root_marker = paths::ancestors(&search_route.start, search_route.root.as_deref()) + .find(|current| current.join(".cargo").join("root").exists()); + if let Some(marker) = root_marker { + tracing::debug!("found .cargo/root marker at {}", marker.display()); + gctx.add_root(marker)?; + } else { + tracing::debug!("no .cargo/root marker found"); + } + // Update the process-level notion of cwd if let Some(new_cwd) = args.get_one::("directory") { // This is a temporary hack. @@ -70,73 +129,6 @@ pub fn main(gctx: &mut GlobalContext) -> CliResult { std::env::set_current_dir(&new_cwd).context("could not change to requested directory")?; } - // A root directories can be specified via CARGO_ROOTS, --root or the existence of a `.cargo/root` files. - // If more than one root is specified, the effective root is the one closest to the current working directory. - // If CARGO_ROOTS is not set, the user's home directory is used as a default root. - - let cwd = std::env::current_dir().context("could not get current working directory")?; - // Windows UNC paths are OK here - let cwd = cwd - .canonicalize() - .context("could not canonicalize current working directory")?; - - // XXX: before looking for `.cargo/root` should we first try and resolve roots - // from `CARGO_ROOTS` + --root so we can avoid triggering automounter issues? - let root_marker = paths::ancestors(&cwd, gctx.search_stop_path()) - .find(|current| current.join(".cargo").join("root").exists()); - - let mut roots: Vec = Vec::new(); - - if let Some(root_marker) = root_marker { - let pb = root_marker - .canonicalize() - .context("could not canonicalize .cargo/root")?; - roots.push(pb); - } - - if let Some(paths_os) = gctx.get_env_os("CARGO_ROOTS") { - for path in std::env::split_paths(&paths_os) { - let pb = path.canonicalize().context(format!( - "could not canonicalize CARGO_ROOTS entry `{}`", - path.display() - ))?; - roots.push(pb); - } - } else if let Some(home) = std::env::home_dir() { - // To be safe by default, and not attempt to read config files outside of the - // user's home directory, we implicitly add the home directory as a root. - // Ref: https://github.com/rust-lang/rfcs/pull/3279 - let home = home - .canonicalize() - .context("could not canonicalize home directory")?; - tracing::debug!( - "implicitly adding home directory as root: {}", - home.display() - ); - roots.push(home); - } - - if let Some(cli_roots) = args.get_many::("root") { - for cli_root in cli_roots { - let pb = cli_root - .canonicalize() - .context("could not canonicalize requested root directory")?; - roots.push(pb); - } - } - - let roots: Vec<_> = roots.iter().map(|p| p.as_path()).collect(); - - if let Some(root) = closest_valid_root(&cwd, &roots)? { - tracing::debug!("root directory: {}", root.display()); - gctx.set_search_stop_path(root); - } else { - tracing::debug!("root limited to cwd: {}", cwd.display()); - // If we are not running with _any_ root then we are conservative and don't - // allow any ancestor traversal. - gctx.set_search_stop_path(&cwd); - } - // Reload now that we have established the cwd and root gctx.reload_cwd()?; diff --git a/src/cargo/core/workspace.rs b/src/cargo/core/workspace.rs index 642d49138a2..b9791d8ed88 100644 --- a/src/cargo/core/workspace.rs +++ b/src/cargo/core/workspace.rs @@ -21,7 +21,7 @@ use crate::core::{ use crate::core::{EitherManifest, Package, SourceId, VirtualManifest}; use crate::ops; use crate::sources::{PathSource, SourceConfigMap, CRATES_IO_INDEX, CRATES_IO_REGISTRY}; -use crate::util::context::FeatureUnification; +use crate::util::context::{FeatureUnification, SearchRoute}; use crate::util::edit_distance; use crate::util::errors::{CargoResult, ManifestError}; use crate::util::interning::InternedString; @@ -746,6 +746,7 @@ impl<'gctx> Workspace<'gctx> { /// Returns an error if `manifest_path` isn't actually a valid manifest or /// if some other transient error happens. fn find_root(&mut self, manifest_path: &Path) -> CargoResult> { + debug!("find_root - {}", manifest_path.display()); let current = self.packages.load(manifest_path)?; match current .workspace_config() @@ -2023,12 +2024,14 @@ fn find_workspace_root_with_loader( gctx: &GlobalContext, mut loader: impl FnMut(&Path) -> CargoResult>, ) -> CargoResult> { + let search_route = gctx.find_manifest_search_route(manifest_path); + // Check if there are any workspace roots that have already been found that would work { let roots = gctx.ws_roots.borrow(); // Iterate through the manifests parent directories until we find a workspace // root. Note we skip the first item since that is just the path itself - for current in paths::ancestors(manifest_path, gctx.search_stop_path()).skip(1) { + for current in paths::ancestors(&search_route.start, search_route.root.as_deref()).skip(1) { if let Some(ws_config) = roots.get(current) { if !ws_config.is_excluded(manifest_path) { // Add `Cargo.toml` since ws_root is the root and not the file @@ -2038,7 +2041,7 @@ fn find_workspace_root_with_loader( } } - for ances_manifest_path in find_root_iter(manifest_path, gctx) { + for ances_manifest_path in find_root_iter(&search_route, gctx) { debug!("find_root - trying {}", ances_manifest_path.display()); if let Some(ws_root_path) = loader(&ances_manifest_path)? { return Ok(Some(ws_root_path)); @@ -2058,10 +2061,10 @@ fn read_root_pointer(member_manifest: &Path, root_link: &str) -> PathBuf { } fn find_root_iter<'a>( - manifest_path: &'a Path, + search_route: &'a SearchRoute, gctx: &'a GlobalContext, ) -> impl Iterator + 'a { - LookBehind::new(paths::ancestors(manifest_path, gctx.search_stop_path()).skip(2)) + LookBehind::new(paths::ancestors(&search_route.start, search_route.root.as_deref()).skip(2)) .take_while(|path| !path.curr.ends_with("target/package")) // Don't walk across `CARGO_HOME` when we're looking for the // workspace root. Sometimes a package will be organized with diff --git a/src/cargo/ops/cargo_new.rs b/src/cargo/ops/cargo_new.rs index 88d2d6bf162..d2675883151 100644 --- a/src/cargo/ops/cargo_new.rs +++ b/src/cargo/ops/cargo_new.rs @@ -802,7 +802,7 @@ fn mk(gctx: &GlobalContext, opts: &MkOptions<'_>) -> CargoResult<()> { } let manifest_path = paths::normalize_path(&path.join("Cargo.toml")); - if let Ok(root_manifest_path) = find_root_manifest_for_wd(&manifest_path) { + if let Ok(root_manifest_path) = find_root_manifest_for_wd(gctx, &manifest_path) { let root_manifest = paths::read(&root_manifest_path)?; // Sometimes the root manifest is not a valid manifest, so we only try to parse it if it is. // This should not block the creation of the new project. It is only a best effort to diff --git a/src/cargo/ops/registry/owner.rs b/src/cargo/ops/registry/owner.rs index 7c8246fdfbf..06e4b570925 100644 --- a/src/cargo/ops/registry/owner.rs +++ b/src/cargo/ops/registry/owner.rs @@ -28,7 +28,7 @@ pub fn modify_owners(gctx: &GlobalContext, opts: &OwnersOptions) -> CargoResult< let name = match opts.krate { Some(ref name) => name.clone(), None => { - let manifest_path = find_root_manifest_for_wd(gctx.cwd())?; + let manifest_path = find_root_manifest_for_wd(gctx, gctx.cwd())?; let ws = Workspace::new(&manifest_path, gctx)?; ws.current()?.package_id().name().to_string() } diff --git a/src/cargo/ops/registry/yank.rs b/src/cargo/ops/registry/yank.rs index f46b9332f6b..45f2edca579 100644 --- a/src/cargo/ops/registry/yank.rs +++ b/src/cargo/ops/registry/yank.rs @@ -26,7 +26,7 @@ pub fn yank( let name = match krate { Some(name) => name, None => { - let manifest_path = find_root_manifest_for_wd(gctx.cwd())?; + let manifest_path = find_root_manifest_for_wd(gctx, gctx.cwd())?; let ws = Workspace::new(&manifest_path, gctx)?; ws.current()?.package_id().name().to_string() } diff --git a/src/cargo/util/command_prelude.rs b/src/cargo/util/command_prelude.rs index 8e375f7e1ea..eb389b448ab 100644 --- a/src/cargo/util/command_prelude.rs +++ b/src/cargo/util/command_prelude.rs @@ -1077,7 +1077,7 @@ pub fn root_manifest(manifest_path: Option<&Path>, gctx: &GlobalContext) -> Carg } Ok(path) } else { - find_root_manifest_for_wd(gctx.cwd()) + find_root_manifest_for_wd(gctx, gctx.cwd()) } } @@ -1132,7 +1132,7 @@ fn get_profile_candidates() -> Vec { fn get_workspace_profile_candidates() -> CargoResult> { let gctx = new_gctx_for_completions()?; - let ws = Workspace::new(&find_root_manifest_for_wd(gctx.cwd())?, &gctx)?; + let ws = Workspace::new(&find_root_manifest_for_wd(&gctx, gctx.cwd())?, &gctx)?; let profiles = Profiles::new(&ws, InternedString::new("dev"))?; let mut candidates = Vec::new(); @@ -1216,7 +1216,7 @@ fn get_bin_candidates() -> Vec { fn get_targets_from_metadata() -> CargoResult> { let cwd = std::env::current_dir()?; let gctx = GlobalContext::new(shell::Shell::new(), cwd.clone(), cargo_home_with_cwd(&cwd)?); - let ws = Workspace::new(&find_root_manifest_for_wd(&cwd)?, &gctx)?; + let ws = Workspace::new(&find_root_manifest_for_wd(&gctx, &cwd)?, &gctx)?; let packages = ws.members().collect::>(); @@ -1271,7 +1271,10 @@ fn get_target_triples_from_rustup() -> CargoResult CargoResult> { let cwd = std::env::current_dir()?; let gctx = GlobalContext::new(shell::Shell::new(), cwd.clone(), cargo_home_with_cwd(&cwd)?); - let ws = Workspace::new(&find_root_manifest_for_wd(&PathBuf::from(&cwd))?, &gctx); + let ws = Workspace::new( + &find_root_manifest_for_wd(&gctx, &PathBuf::from(&cwd))?, + &gctx, + ); let rustc = gctx.load_global_rustc(ws.as_ref().ok())?; @@ -1374,7 +1377,7 @@ pub fn get_pkg_id_spec_candidates() -> Vec { fn get_packages() -> CargoResult> { let gctx = new_gctx_for_completions()?; - let ws = Workspace::new(&find_root_manifest_for_wd(gctx.cwd())?, &gctx)?; + let ws = Workspace::new(&find_root_manifest_for_wd(&gctx, gctx.cwd())?, &gctx)?; let requested_kinds = CompileKind::from_requested_targets(ws.gctx(), &[])?; let mut target_data = RustcTargetData::new(&ws, &requested_kinds)?; diff --git a/src/cargo/util/context/mod.rs b/src/cargo/util/context/mod.rs index 86d72696a45..e445cf2c229 100644 --- a/src/cargo/util/context/mod.rs +++ b/src/cargo/util/context/mod.rs @@ -159,6 +159,30 @@ pub struct CredentialCacheValue { pub operation_independent: bool, } +/// A point-to-point route for searching config files or manifests, resolved +/// based on a starting directory and a set of root directories. +#[derive(Clone, Debug)] +pub struct SearchRoute { + /// The first directory in the route to search (inclusive) + /// The path is canonicalized. + pub start: PathBuf, + /// The last directory in the route to search (inclusive) + /// If not set then the route ends at the root of the filesystem. + /// The path is canonicalized. + pub root: Option, +} + +impl SearchRoute { + pub fn sentinal(path: impl AsRef) -> Self { + let start = path + .as_ref() + .canonicalize() + .expect("failed to canonicalize path"); + let root = Some(start.clone()); + Self { start, root } + } +} + /// Configuration information for cargo. This is not specific to a build, it is information /// relating to cargo itself. #[derive(Debug)] @@ -175,8 +199,13 @@ pub struct GlobalContext { cli_config: Option>, /// The current working directory of cargo cwd: PathBuf, - /// Directory where config file searching should stop (inclusive). - search_stop_path: Option, + /// The full set of root directories that limit config file searching. + /// Sorted by path length, longest first. + sorted_roots: Vec, + /// Directories to search for config files (invalidated if roots or cwd changes). + config_search_route: RefCell>, + /// Directories to search for manifest files (invalidated if roots or starting point changes). + manifest_search_route: RefCell>, /// The location of the cargo executable (path to current process) cargo_exe: LazyCell, /// The location of the rustdoc executable @@ -281,11 +310,17 @@ impl GlobalContext { _ => true, }; + // By default only allow searching the current directory, until roots + // are set. + //let config_search_route = SearchRoute::sentinal(&cwd); + GlobalContext { home_path: Filesystem::new(homedir), shell: RefCell::new(shell), cwd, - search_stop_path: None, + sorted_roots: Vec::new(), + config_search_route: RefCell::new(None), + manifest_search_route: RefCell::new(None), values: LazyCell::new(), credential_values: LazyCell::new(), cli_config: None, @@ -567,17 +602,132 @@ impl GlobalContext { } } - /// Gets the path where ancestor config file and workspace searching will stop. - pub fn search_stop_path(&self) -> Option<&Path> { - self.search_stop_path.as_deref() + pub fn add_root>(&mut self, path: P) -> CargoResult<()> { + let path = path + .as_ref() + .canonicalize() + .context("couldn't canonicalize path")?; + if !self.sorted_roots.iter().any(|root| root == &path) { + self.sorted_roots.push(path); + self.sorted_roots + .sort_by_key(|p| std::cmp::Reverse(p.components().count())); + self.config_search_route = RefCell::new(None); + self.manifest_search_route = RefCell::new(None); + } + Ok(()) + } + + /// Find shortest route between `start` and one of the roots in [Self::sorted_roots]. + /// + /// If no root is found then fallback to root == start so we only search one directory. + /// - This is a safety measure to reduce the risk of reading unsafe state from locations + /// that are not owned by the user (for example building under `/tmp`). + fn find_search_route>(&self, start: P) -> CargoResult { + let start = start + .as_ref() + .canonicalize() + .context("couldn't canonicalize path")?; + + tracing::debug!("Searching sorted roots for start {:?}", self.sorted_roots); + let root = self + .sorted_roots + .iter() + .filter_map(|root| { + if start.starts_with(root) { + tracing::debug!( + "Found candidate search root {:?} for start {:?}", + root, + start + ); + Some(root.clone()) + } else { + tracing::debug!( + "Skipping candidate search root {:?} for start {:?}", + root, + start + ); + None + } + }) + .next(); + + // Cargo no longer allows searching up to the root of the filesystem unless the root is + // explicitly added to `sorted_roots`. + let root = root.unwrap_or_else(|| start.clone()); + Ok(SearchRoute { + start, + root: Some(root), + }) + } + + /// Ignore any configured roots and define a config search route between + /// `cwd` and `path`. If `path` is `None`, then the search route will go to + /// the root of the filesystem which could be unsafe. + /// + /// Normally [`update_config_search_route`] should be used instead. + pub fn set_config_search_root>(&mut self, root: Option

) { + let Ok(start) = self.cwd().canonicalize() else { + *self.config_search_route.borrow_mut() = None; + return; + }; + if let Some(path) = root { + let root = path.into(); + debug_assert!(self.cwd.starts_with(&root)); + let Ok(root) = root.canonicalize() else { + *self.config_search_route.borrow_mut() = None; + return; + }; + *self.config_search_route.borrow_mut() = Some(SearchRoute { + start, + root: Some(root), + }); + } else { + *self.config_search_route.borrow_mut() = Some(SearchRoute { start, root: None }); + } + } + + pub fn find_config_search_route>(&self, start: P) -> SearchRoute { + tracing::trace!( + "Finding config search route starting at {:?}", + start.as_ref() + ); + + let start = start + .as_ref() + .canonicalize() + .expect("failed to canonicalize cwd"); + // Return the existing route if the start == path. + if let Some(route) = &*self.config_search_route.borrow() { + if route.start == start { + tracing::trace!("Using cached config search route: {:?}", route); + return route.clone(); + } + } + + let config_search_route = self + .find_search_route(start) + .expect("failed to find config file search route"); + *self.config_search_route.borrow_mut() = Some(config_search_route.clone()); + config_search_route } - /// Sets the path where ancestor config file searching will stop. The - /// given path is included, but its ancestors are not. - pub fn set_search_stop_path>(&mut self, path: P) { - let path = path.into(); - debug_assert!(self.cwd.starts_with(&path)); - self.search_stop_path = Some(path); + pub fn find_manifest_search_route>(&self, start: P) -> SearchRoute { + let start = start + .as_ref() + .canonicalize() + .expect("failed to canonicalize path"); + // Return the existing route if the start == path. + if let Some(route) = &*self.manifest_search_route.borrow() { + if route.start == start { + return route.clone(); + } + } + + let manifest_search_route = self + .find_search_route(start) + .expect("failed to find manifest search route"); + *self.manifest_search_route.borrow_mut() = Some(manifest_search_route.clone()); + manifest_search_route.clone() } /// Switches the working directory to [`std::env::current_dir`] @@ -594,6 +744,7 @@ impl GlobalContext { })?; self.cwd = cwd; + *self.config_search_route.borrow_mut() = None; self.home_path = Filesystem::new(homedir); self.reload_rooted_at(self.cwd.clone())?; Ok(()) @@ -1262,7 +1413,7 @@ impl GlobalContext { let mut result = Vec::new(); let mut seen = HashSet::new(); let home = self.home_path.clone().into_path_unlocked(); - self.walk_tree(&self.cwd, &home, |path| { + self.walk_config_search_route(&self.cwd, &home, |path| { let mut cv = self._load_file(path, &mut seen, false, WhyLoad::FileDiscovery)?; if self.cli_unstable().config_include { self.load_unmerged_include(&mut cv, &mut seen, &mut result)?; @@ -1303,7 +1454,7 @@ impl GlobalContext { let mut cfg = CV::Table(HashMap::new(), Definition::Path(PathBuf::from("."))); let home = self.home_path.clone().into_path_unlocked(); - self.walk_tree(path, &home, |path| { + self.walk_config_search_route(path, &home, |path| { let value = self.load_file(path)?; cfg.merge(value, false).with_context(|| { format!("failed to merge configuration at `{}`", path.display()) @@ -1681,13 +1832,14 @@ impl GlobalContext { } } - fn walk_tree(&self, pwd: &Path, home: &Path, mut walk: F) -> CargoResult<()> + fn walk_config_search_route(&self, pwd: &Path, home: &Path, mut walk: F) -> CargoResult<()> where F: FnMut(&Path) -> CargoResult<()>, { let mut seen_dir = HashSet::new(); - for current in paths::ancestors(pwd, self.search_stop_path.as_deref()) { + let search_route = self.find_config_search_route(pwd); + for current in paths::ancestors(&search_route.start, search_route.root.as_deref()) { let config_root = current.join(".cargo"); if let Some(path) = self.get_file_path(&config_root, "config", true)? { walk(&path)?; @@ -3157,7 +3309,6 @@ mod tests { #[test] fn disables_multiplexing() { let mut gctx = GlobalContext::new(Shell::new(), "".into(), "".into()); - gctx.set_search_stop_path(std::path::PathBuf::new()); gctx.set_env(Default::default()); let mut http = CargoHttpConfig::default(); diff --git a/src/cargo/util/important_paths.rs b/src/cargo/util/important_paths.rs index 224c4ab8b86..dbc66e09131 100644 --- a/src/cargo/util/important_paths.rs +++ b/src/cargo/util/important_paths.rs @@ -2,13 +2,16 @@ use crate::util::errors::CargoResult; use cargo_util::paths; use std::path::{Path, PathBuf}; +use super::GlobalContext; + /// Finds the root `Cargo.toml`. -pub fn find_root_manifest_for_wd(cwd: &Path) -> CargoResult { +pub fn find_root_manifest_for_wd(gctx: &GlobalContext, cwd: &Path) -> CargoResult { let valid_cargo_toml_file_name = "Cargo.toml"; let invalid_cargo_toml_file_name = "cargo.toml"; let mut invalid_cargo_toml_path_exists = false; - for current in paths::ancestors(cwd, None) { + let search_route = gctx.find_manifest_search_route(cwd); + for current in paths::ancestors(&search_route.start, search_route.root.as_deref()) { let manifest = current.join(valid_cargo_toml_file_name); if manifest.exists() { return Ok(manifest); diff --git a/tests/testsuite/config.rs b/tests/testsuite/config.rs index 80be8dabc1c..a82ab611d63 100644 --- a/tests/testsuite/config.rs +++ b/tests/testsuite/config.rs @@ -110,7 +110,7 @@ impl GlobalContextBuilder { let mut gctx = GlobalContext::new(shell, cwd, homedir); gctx.nightly_features_allowed = self.enable_nightly_features || !self.unstable.is_empty(); gctx.set_env(self.env.clone()); - gctx.set_search_stop_path(&root); + gctx.set_config_search_root(&root); gctx.configure( 0, false, From be2503fd4d32c1297818516d6165503a01174075 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Thu, 5 Jun 2025 17:11:13 +0100 Subject: [PATCH 4/6] Allow ancestor traversal when loading package manifest outside of roots As a special-case; unlike for configs and workspaces, this allows Cargo to search all the way up to the root of the filesystem when loading a package manifest, IFF Cargo is running outside of all configured root directories. This exception doesn't apply if Cargo is running within a configured root directory. This is a trade off between safety and convenience (+ backwards compatibility) that ensures it is possible to unpack a package outside of your home directory (such as `/tmp/my-package`) and then start a build from a subdirectory like `/tmp/my-package/sub/dir` such that Cargo will traverse parent directories looking for `/tmp/my-package/Cargo.toml` but it will not try and load untrusted configs under `/tmp/.cargo` or workspace manifests under `/tmp/Cargo.toml` that could have been created by another user. FIXME: If the first manifest loaded is in fact a workspace manifest we need to make sure we add that as a root directory so that when nested packages within the workspace are built they are able to find their way back to the top of the workspace. Otherwise each nested package build will be limited to the directory of the package. --- src/cargo/core/workspace.rs | 2 +- src/cargo/util/context/mod.rs | 100 +++++++++++++++++++++--------- src/cargo/util/important_paths.rs | 2 +- 3 files changed, 74 insertions(+), 30 deletions(-) diff --git a/src/cargo/core/workspace.rs b/src/cargo/core/workspace.rs index b9791d8ed88..362256adf60 100644 --- a/src/cargo/core/workspace.rs +++ b/src/cargo/core/workspace.rs @@ -2024,7 +2024,7 @@ fn find_workspace_root_with_loader( gctx: &GlobalContext, mut loader: impl FnMut(&Path) -> CargoResult>, ) -> CargoResult> { - let search_route = gctx.find_manifest_search_route(manifest_path); + let search_route = gctx.find_workspace_manifest_search_route(manifest_path); // Check if there are any workspace roots that have already been found that would work { diff --git a/src/cargo/util/context/mod.rs b/src/cargo/util/context/mod.rs index e445cf2c229..f376168694b 100644 --- a/src/cargo/util/context/mod.rs +++ b/src/cargo/util/context/mod.rs @@ -617,12 +617,10 @@ impl GlobalContext { Ok(()) } - /// Find shortest route between `start` and one of the roots in [Self::sorted_roots]. - /// - /// If no root is found then fallback to root == start so we only search one directory. - /// - This is a safety measure to reduce the risk of reading unsafe state from locations - /// that are not owned by the user (for example building under `/tmp`). - fn find_search_route>(&self, start: P) -> CargoResult { + fn find_nearest_root>( + &self, + start: P, + ) -> CargoResult> { let start = start .as_ref() .canonicalize() @@ -632,31 +630,33 @@ impl GlobalContext { let root = self .sorted_roots .iter() - .filter_map(|root| { - if start.starts_with(root) { - tracing::debug!( - "Found candidate search root {:?} for start {:?}", - root, - start - ); - Some(root.clone()) - } else { - tracing::debug!( - "Skipping candidate search root {:?} for start {:?}", - root, - start - ); - None - } - }) - .next(); + .find(|root| start.starts_with(root)) + .cloned(); + + if let Some(root) = root { + tracing::debug!("Found candidate root {:?}", root); + Ok(Some((start, root))) + } else { + tracing::debug!("No candidate root found for start {:?}", start); + Ok(None) + } + } + + /// Find shortest route between `start` and one of the roots in [Self::sorted_roots]. + /// + /// If no root is found then fallback to root == start so we only search one directory. + /// - This is a safety measure to reduce the risk of reading unsafe state from locations + /// that are not owned by the user (for example building under `/tmp`). + fn find_search_route>(&self, start: P) -> CargoResult { + let start = start.as_ref(); + let route = self.find_nearest_root(&start)?; // Cargo no longer allows searching up to the root of the filesystem unless the root is // explicitly added to `sorted_roots`. - let root = root.unwrap_or_else(|| start.clone()); + let route = route.unwrap_or_else(|| (start.to_owned(), start.to_owned())); Ok(SearchRoute { - start, - root: Some(root), + start: route.0, + root: Some(route.1), }) } @@ -711,7 +711,51 @@ impl GlobalContext { config_search_route } - pub fn find_manifest_search_route>(&self, start: P) -> SearchRoute { + pub fn find_package_manifest_search_route>(&self, start: P) -> SearchRoute { + tracing::trace!( + "Finding package manifest search route starting at {:?}", + start.as_ref() + ); + + let start = start + .as_ref() + .canonicalize() + .expect("failed to canonicalize path"); + // Return the existing route if the start == path. + if let Some(route) = &*self.manifest_search_route.borrow() { + if route.start == start { + return route.clone(); + } + } + + // As a special case, we allow the traversal of parent directories, when + // outside of all root directories to find the package manifest. + // + // This is a trade off between safety and convenience, so it's e.g. + // possible to unpack a package under `/tmp` and start a build from + // `/tmp/my-package/sub/dir` and find `/tmp/my-package/Cargo.toml`, but + // not allow a potentially unsafe `/tmp/Cargo.toml` workspace to be loaded. + let manifest_search_route = + if let Some((start, root)) = self.find_nearest_root(&start).ok().flatten() { + SearchRoute { + start, + root: Some(root), + } + } else { + SearchRoute { + start, + root: None, // Allow searching up to the root of the filesystem. + } + }; + *self.manifest_search_route.borrow_mut() = Some(manifest_search_route.clone()); + manifest_search_route.clone() + } + + pub fn find_workspace_manifest_search_route>(&self, start: P) -> SearchRoute { + tracing::trace!( + "Finding workspace manifest search route starting at {:?}", + start.as_ref() + ); let start = start .as_ref() .canonicalize() diff --git a/src/cargo/util/important_paths.rs b/src/cargo/util/important_paths.rs index dbc66e09131..53c497cb21c 100644 --- a/src/cargo/util/important_paths.rs +++ b/src/cargo/util/important_paths.rs @@ -10,7 +10,7 @@ pub fn find_root_manifest_for_wd(gctx: &GlobalContext, cwd: &Path) -> CargoResul let invalid_cargo_toml_file_name = "cargo.toml"; let mut invalid_cargo_toml_path_exists = false; - let search_route = gctx.find_manifest_search_route(cwd); + let search_route = gctx.find_package_manifest_search_route(cwd); for current in paths::ancestors(&search_route.start, search_route.root.as_deref()) { let manifest = current.join(valid_cargo_toml_file_name); if manifest.exists() { From 92ca3bfd4a88d6ed86242afa630b37672a769e1d Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Thu, 5 Jun 2025 22:00:28 +0100 Subject: [PATCH 5/6] Track the first manifest path as an implicit, fallback root To allow a package to be unpacked outside of all root directories, such as `/tmp/my-package` and allow a build to start from `/tmp/my-package/sub/dir`, Cargo is allowed to traverse parent directories until it finds its first manifest, and this then becomes a fallback root directory that will stop Cargo from looking any higher for a workspace (which could attempt to load `/tmp/Cargo.toml` that may not be safe). XXX: Instead of having a special case for the first manifest; maybe it could be better to instead _always_ add the directory of a manifest as a root directory if it's not a subdirectory of any existing root. That would have a mostly-equivalent result to the current behaviour but _might_ be simpler to document? --- src/bin/cargo/commands/locate_project.rs | 2 ++ src/cargo/util/context/mod.rs | 30 +++++++++++++++++++++++- src/cargo/util/important_paths.rs | 6 +++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/bin/cargo/commands/locate_project.rs b/src/bin/cargo/commands/locate_project.rs index 439f2d5792e..c560d5638ba 100644 --- a/src/bin/cargo/commands/locate_project.rs +++ b/src/bin/cargo/commands/locate_project.rs @@ -30,10 +30,12 @@ pub fn exec(gctx: &mut GlobalContext, args: &ArgMatches) -> CliResult { let workspace; let root = match WhatToFind::parse(args) { WhatToFind::CurrentManifest => { + tracing::trace!("locate-project::exec(): finding root manifest"); root_manifest = args.root_manifest(gctx)?; &root_manifest } WhatToFind::Workspace => { + tracing::trace!("locate-project::exec(): finding workspace root manifest"); workspace = args.workspace(gctx)?; workspace.root_manifest() } diff --git a/src/cargo/util/context/mod.rs b/src/cargo/util/context/mod.rs index f376168694b..f445ac47e52 100644 --- a/src/cargo/util/context/mod.rs +++ b/src/cargo/util/context/mod.rs @@ -202,6 +202,9 @@ pub struct GlobalContext { /// The full set of root directories that limit config file searching. /// Sorted by path length, longest first. sorted_roots: Vec, + /// In case we are running outside of any user-configured root directory, we + /// add the directory of the first manifest as as root directory. + fallback_manifest_root: RefCell>, /// Directories to search for config files (invalidated if roots or cwd changes). config_search_route: RefCell>, /// Directories to search for manifest files (invalidated if roots or starting point changes). @@ -319,6 +322,7 @@ impl GlobalContext { shell: RefCell::new(shell), cwd, sorted_roots: Vec::new(), + fallback_manifest_root: RefCell::new(None), config_search_route: RefCell::new(None), manifest_search_route: RefCell::new(None), values: LazyCell::new(), @@ -617,6 +621,24 @@ impl GlobalContext { Ok(()) } + pub fn ensure_fallback_root>(&self, path: P) { + let path = path + .as_ref() + .canonicalize() + .expect("failed to canonicalize path"); + if self.fallback_manifest_root.borrow_mut().is_none() { + tracing::debug!("Setting fallback manifest root to {:?}", path); + *self.fallback_manifest_root.borrow_mut() = Some(path); + *self.config_search_route.borrow_mut() = None; + *self.manifest_search_route.borrow_mut() = None; + } else { + tracing::debug!( + "Fallback manifest root already set to {:?}, not changing", + self.fallback_manifest_root.borrow() + ); + } + } + fn find_nearest_root>( &self, start: P, @@ -626,7 +648,10 @@ impl GlobalContext { .canonicalize() .context("couldn't canonicalize path")?; - tracing::debug!("Searching sorted roots for start {:?}", self.sorted_roots); + tracing::debug!( + "Searching sorted roots for closest ancestor {:?}", + self.sorted_roots + ); let root = self .sorted_roots .iter() @@ -636,6 +661,9 @@ impl GlobalContext { if let Some(root) = root { tracing::debug!("Found candidate root {:?}", root); Ok(Some((start, root))) + } else if let Some(fallback_root) = self.fallback_manifest_root.borrow().as_ref() { + tracing::debug!("Using manifest path as fallback root"); + Ok(Some((start, fallback_root.clone()))) } else { tracing::debug!("No candidate root found for start {:?}", start); Ok(None) diff --git a/src/cargo/util/important_paths.rs b/src/cargo/util/important_paths.rs index 53c497cb21c..8c3779434de 100644 --- a/src/cargo/util/important_paths.rs +++ b/src/cargo/util/important_paths.rs @@ -14,6 +14,12 @@ pub fn find_root_manifest_for_wd(gctx: &GlobalContext, cwd: &Path) -> CargoResul for current in paths::ancestors(&search_route.start, search_route.root.as_deref()) { let manifest = current.join(valid_cargo_toml_file_name); if manifest.exists() { + // In case we are running outside of any root directory, the directory for the + // first root manifest we find will become the fallback root. This is part of + // a safety trade-off that allows us to traverse unknown ancestors to find + // a package, but limits the risk of continuing to traverse and load manifests + // that we might not own (such as `/tmp/Cargo.toml`) + gctx.ensure_fallback_root(current); return Ok(manifest); } if current.join(invalid_cargo_toml_file_name).exists() { From d3167edb974e86f0af5395d67d2e7bb070e5e17f Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Fri, 6 Jun 2025 11:49:20 +0100 Subject: [PATCH 6/6] Emit shell warning if running outside of all roots To help make it clear why Cargo may not load all ancestor configs or find workspace manifests in ancestor directories, Cargo emits a warning: ``` warning: Cargo is running outside of any root directory, limiting loading of ancestor configs and manifest ``` --- src/bin/cargo/cli.rs | 5 +++++ src/cargo/util/context/mod.rs | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/bin/cargo/cli.rs b/src/bin/cargo/cli.rs index e3062adbe85..3d0ed462fa5 100644 --- a/src/bin/cargo/cli.rs +++ b/src/bin/cargo/cli.rs @@ -9,6 +9,7 @@ use std::collections::HashMap; use std::ffi::OsStr; use std::ffi::OsString; use std::fmt::Write; +use tracing::warn; use super::commands; use super::list_commands; @@ -132,6 +133,10 @@ pub fn main(gctx: &mut GlobalContext) -> CliResult { // Reload now that we have established the cwd and root gctx.reload_cwd()?; + if gctx.find_nearest_root(gctx.cwd())?.is_none() { + gctx.shell().warn("Cargo is running outside of any root directory, limiting loading of ancestor configs and manifest")?; + } + let (expanded_args, global_args) = expand_aliases(gctx, args, vec![])?; let is_verbose = expanded_args.verbose() > 0; diff --git a/src/cargo/util/context/mod.rs b/src/cargo/util/context/mod.rs index f445ac47e52..0e5f69adc47 100644 --- a/src/cargo/util/context/mod.rs +++ b/src/cargo/util/context/mod.rs @@ -639,7 +639,7 @@ impl GlobalContext { } } - fn find_nearest_root>( + pub fn find_nearest_root>( &self, start: P, ) -> CargoResult> {