From 3f7d871213f9f05cae7037392fb297f98cca4577 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Fri, 14 Nov 2025 13:36:38 +0100 Subject: [PATCH] clean: Optimize clean with multiple -p specifiers This commit optimizes implementation of `cargo clean -p` by reducing the amount of directory walks that take place. We now batch calls to `rm_rf_prefix_list`, thus potentially avoiding multiple walks over a single subdirectory. In practice this helps us significantly reduce the runtime for clearing large workspaces (as implemented in #16263); for Zed, `cargo clean --workspace` went down from 73 seconds to 3 seconds. We have 216 workspace members. Co-authored-by: dino --- .../compiler/build_context/target_info.rs | 7 ++ src/cargo/ops/cargo_clean.rs | 87 ++++++++++++------- 2 files changed, 65 insertions(+), 29 deletions(-) diff --git a/src/cargo/core/compiler/build_context/target_info.rs b/src/cargo/core/compiler/build_context/target_info.rs index 90e501a2342..40f36c7f9cd 100644 --- a/src/cargo/core/compiler/build_context/target_info.rs +++ b/src/cargo/core/compiler/build_context/target_info.rs @@ -140,6 +140,13 @@ impl FileType { should_replace_hyphens: true, } } + + pub fn output_prefix_suffix(&self, target: &Target) -> (String, String) { + ( + format!("{}{}-", self.prefix, target.crate_name()), + self.suffix.clone(), + ) + } } impl TargetInfo { diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index a70972fae1b..07dabb81b13 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -9,7 +9,8 @@ use crate::util::interning::InternedString; use crate::util::{GlobalContext, Progress, ProgressStyle}; use anyhow::bail; use cargo_util::paths; -use std::collections::{HashMap, HashSet}; +use std::borrow::Cow; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::fs; use std::path::{Path, PathBuf}; use std::rc::Rc; @@ -194,6 +195,7 @@ fn clean_specs( // Try to reduce the amount of times we iterate over the same target directory by storing away // the directories we've iterated over (and cleaned for a given package). let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default(); + let mut packages_to_clean: BTreeMap, BTreeMap<_, BTreeSet<_>>> = Default::default(); for pkg in packages { let pkg_dir = format!("{}-*", pkg.name()); clean_ctx.progress.on_cleaning_package(&pkg.name())?; @@ -218,8 +220,8 @@ fn clean_specs( continue; } let crate_name: Rc = target.crate_name().into(); - let path_dot: &str = &format!("{crate_name}."); - let path_dash: &str = &format!("{crate_name}-"); + let path_dot = format!("{crate_name}."); + let path_dash = format!("{crate_name}-"); for &mode in &[ CompileMode::Build, CompileMode::Test, @@ -251,13 +253,23 @@ fn clean_specs( ), }; let mut dir_glob_str = escape_glob_path(dir)?; - let dir_glob = Path::new(&dir_glob_str); + if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) { + dir_glob_str.push(std::path::MAIN_SEPARATOR); + } + dir_glob_str.push('*'); + let dir_glob_str: Rc = dir_glob_str.into(); + for file_type in file_types { // Some files include a hash in the filename, some don't. - let hashed_name = file_type.output_filename(target, Some("*")); + let (prefix, suffix) = file_type.output_prefix_suffix(target); let unhashed_name = file_type.output_filename(target, None); + packages_to_clean + .entry(Rc::from(dir_glob_str.as_ref())) + .or_default() + .entry(prefix) + .or_default() + .extend([Cow::Owned(suffix)]); - clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?; clean_ctx.rm_rf(&dir.join(&unhashed_name))?; // Remove the uplifted copy. @@ -272,37 +284,47 @@ fn clean_specs( let unhashed_dep_info = dir.join(format!("{}.d", crate_name)); clean_ctx.rm_rf(&unhashed_dep_info)?; - if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) { - dir_glob_str.push(std::path::MAIN_SEPARATOR); - } - dir_glob_str.push('*'); - let dir_glob_str: Rc = dir_glob_str.into(); if cleaned_packages .entry(dir_glob_str.clone()) .or_default() .insert(crate_name.clone()) { - let paths = [ - // Remove dep-info file generated by rustc. It is not tracked in - // file_types. It does not have a prefix. - (path_dash, ".d"), - // Remove split-debuginfo files generated by rustc. - (path_dot, ".o"), - (path_dot, ".dwo"), - (path_dot, ".dwp"), - ]; - clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?; + let to_clean = packages_to_clean.entry(dir_glob_str).or_default(); + // Remove dep-info file generated by rustc. It is not tracked in + // file_types. It does not have a prefix. + to_clean + .entry(path_dash.clone()) + .or_default() + .extend([Cow::Borrowed(".d")]); + // Remove split-debuginfo files generated by rustc. + to_clean.entry(path_dot.clone()).or_default().extend([ + Cow::Borrowed(".o"), + Cow::Borrowed(".dwo"), + Cow::Borrowed(".dwp"), + ]); } // TODO: what to do about build_script_build? - let dir = escape_glob_path(layout.build_dir().incremental())?; - let incremental = Path::new(&dir).join(format!("{}-*", crate_name)); - clean_ctx.rm_rf_glob(&incremental)?; + let mut dir = escape_glob_path(layout.build_dir().incremental())?; + if !dir.ends_with(std::path::MAIN_SEPARATOR) { + dir.push(std::path::MAIN_SEPARATOR); + } + dir.push('*'); + packages_to_clean + .entry(dir.into()) + .or_default() + .entry(path_dash.clone()) + .or_default() + .extend([Cow::Borrowed("")]); } } } } + for (dir_glob_str, entries_to_clean) in packages_to_clean { + clean_ctx.rm_rf_prefix_list(&dir_glob_str, &entries_to_clean)?; + } + Ok(()) } @@ -381,15 +403,22 @@ impl<'gctx> CleanContext<'gctx> { fn rm_rf_prefix_list( &mut self, pattern: &str, - path_matchers: &[(&str, &str)], + path_matchers: &BTreeMap>>, ) -> CargoResult<()> { for path in glob::glob(pattern)? { let path = path?; let filename = path.file_name().and_then(|name| name.to_str()).unwrap(); - if path_matchers - .iter() - .any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix)) - { + let first_char = filename.chars().next(); + let mut subrange = path_matchers.iter(); + // .range(..filename.to_owned()) + // .rev() + // .take_while(|(prefix, _suffixes)| first_char == prefix.chars().next()); + if subrange.any(|(prefix, suffixes)| { + filename.starts_with(prefix) + && suffixes + .iter() + .any(|suffix| filename.ends_with(suffix.as_ref())) + }) { self.rm_rf(&path)?; } }