From 162a9ea7d15f968160220bc58dd0a7b79a4e2131 Mon Sep 17 00:00:00 2001 From: Tim de Jager Date: Wed, 28 Jan 2026 17:19:18 +0100 Subject: [PATCH 1/7] feat: add rattler_glob as a crate --- Cargo.lock | 36 ++ crates/rattler_glob/Cargo.toml | 27 + crates/rattler_glob/src/glob_hash.rs | 286 ++++++++++ crates/rattler_glob/src/glob_hash_cache.rs | 132 +++++ crates/rattler_glob/src/glob_mtime.rs | 204 +++++++ crates/rattler_glob/src/glob_set/mod.rs | 356 ++++++++++++ crates/rattler_glob/src/glob_set/walk.rs | 396 +++++++++++++ crates/rattler_glob/src/glob_set/walk_root.rs | 535 ++++++++++++++++++ crates/rattler_glob/src/lib.rs | 42 ++ ...test__glob_hash_case_1_satisfiability.snap | 10 + ...ash_case_2_satisfiability_ignore_lock.snap | 10 + ...hash__test__glob_hash_case_3_non_glob.snap | 9 + .../source-dependency/pixi.lock | 1 + .../source-dependency/pixi.toml | 3 + 14 files changed, 2047 insertions(+) create mode 100644 crates/rattler_glob/Cargo.toml create mode 100644 crates/rattler_glob/src/glob_hash.rs create mode 100644 crates/rattler_glob/src/glob_hash_cache.rs create mode 100644 crates/rattler_glob/src/glob_mtime.rs create mode 100644 crates/rattler_glob/src/glob_set/mod.rs create mode 100644 crates/rattler_glob/src/glob_set/walk.rs create mode 100644 crates/rattler_glob/src/glob_set/walk_root.rs create mode 100644 crates/rattler_glob/src/lib.rs create mode 100644 crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_1_satisfiability.snap create mode 100644 crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_2_satisfiability_ignore_lock.snap create mode 100644 crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_3_non_glob.snap create mode 100644 tests/data/satisfiability/source-dependency/pixi.lock create mode 100644 tests/data/satisfiability/source-dependency/pixi.toml diff --git a/Cargo.lock b/Cargo.lock index 1eb1bb48e..8211ba788 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2949,6 +2949,22 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "ignore" +version = "0.4.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -4722,6 +4738,26 @@ dependencies = [ "url", ] +[[package]] +name = "rattler_glob" +version = "0.1.0" +dependencies = [ + "dashmap", + "fs-err", + "ignore", + "insta", + "itertools 0.14.0", + "memchr", + "parking_lot 0.12.5", + "rattler_digest", + "rstest", + "serde", + "tempfile", + "thiserror 2.0.17", + "tokio", + "tracing", +] + [[package]] name = "rattler_index" version = "0.27.5" diff --git a/crates/rattler_glob/Cargo.toml b/crates/rattler_glob/Cargo.toml new file mode 100644 index 000000000..d08adb353 --- /dev/null +++ b/crates/rattler_glob/Cargo.toml @@ -0,0 +1,27 @@ +[package] +description = "A crate to deal with glob patterns" +edition.workspace = true +homepage.workspace = true +license.workspace = true +name = "rattler_glob" +readme.workspace = true +repository.workspace = true +version = "0.1.0" + +[dependencies] +dashmap = { workspace = true } +fs-err = { workspace = true } +ignore = "0.4" +itertools = { workspace = true } +memchr = { workspace = true } +parking_lot = { workspace = true } +rattler_digest = { workspace = true } +thiserror = { workspace = true } +tokio = { workspace = true, features = ["sync", "rt"] } +tracing = { workspace = true } + +[dev-dependencies] +insta = { workspace = true, features = ["yaml", "redactions"] } +rstest = { workspace = true } +serde = { workspace = true, features = ["derive"] } +tempfile = { workspace = true } diff --git a/crates/rattler_glob/src/glob_hash.rs b/crates/rattler_glob/src/glob_hash.rs new file mode 100644 index 000000000..e3c29bc22 --- /dev/null +++ b/crates/rattler_glob/src/glob_hash.rs @@ -0,0 +1,286 @@ +//! This module contains the `GlobHash` struct which is used to calculate a hash of the files that match the given glob patterns. +//! Use this if you want to calculate a hash of a set of files that match a glob pattern. +//! This is useful for finding out if you need to rebuild a target based on the files that match a glob pattern. +use std::{ + fs::File, + io::{self, BufRead, Read, Write}, + path::{Path, PathBuf}, +}; + +use rattler_digest::{digest::Digest, Sha256, Sha256Hash}; +use thiserror::Error; + +use crate::{GlobSet, GlobSetError}; + +/// Contains a hash of the files that match the given glob patterns. +#[derive(Debug, Clone, Default)] +pub struct GlobHash { + /// The hash of the files that match the given glob patterns. + pub hash: Sha256Hash, + #[cfg(test)] + matching_files: Vec, +} + +/// Errors that can occur when computing a glob hash. +#[derive(Error, Debug)] +pub enum GlobHashError { + /// Failed to normalize line endings while reading a file. + #[error("during line normalization, failed to access {}", .0.display())] + NormalizeLineEnds(PathBuf, #[source] io::Error), + + /// The hash computation was cancelled (e.g., task was aborted). + #[error("the operation was cancelled")] + Cancelled, + + /// An error occurred while building or walking the glob set. + #[error(transparent)] + GlobSetIgnore(#[from] GlobSetError), +} + +impl GlobHash { + /// Calculate a hash of the files that match the given glob patterns. + /// + /// This function walks the directory tree starting from `root_dir`, finds all files + /// matching the provided glob patterns, and computes a combined SHA-256 hash of their + /// paths and contents. The hash is computed deterministically (files are sorted by path). + /// + /// Line endings are normalized during hashing: `\r\n` sequences are converted to `\n` + /// in text files, while binary files (detected by the presence of null bytes) are + /// hashed verbatim. + /// + /// # Arguments + /// * `root_dir` - The root directory to search from + /// * `globs` - An iterator of glob patterns (supports gitignore-style syntax) + /// + /// # Returns + /// A `GlobHash` containing the computed hash, or an error if the operation failed. + /// + /// # Example + /// ```no_run + /// use rattler_glob::GlobHash; + /// use std::path::Path; + /// + /// let hash = GlobHash::from_patterns( + /// Path::new("/my/project"), + /// ["src/**/*.rs", "!src/generated/**"], + /// ).unwrap(); + /// + /// println!("Hash: {:x}", hash.hash); + /// ``` + pub fn from_patterns<'a>( + root_dir: &Path, + globs: impl IntoIterator, + ) -> Result { + // If the root is not a directory or does not exist, return an empty map. + if !root_dir.is_dir() { + return Ok(Self::default()); + } + + let glob_set = GlobSet::create(globs)?; + // Collect matching entries and convert to concrete DirEntry list, propagating errors. + let mut entries = glob_set.collect_matching(root_dir)?; + + // Sort deterministically by path + entries.sort_by_key(|e| e.path().to_path_buf()); + + #[cfg(test)] + let mut matching_files = Vec::new(); + + let mut hasher = Sha256::default(); + for entry in entries { + // Construct a normalized file path to ensure consistent hashing across + // platforms. And add it to the hash. + let relative_path = entry.path().strip_prefix(root_dir).unwrap_or(entry.path()); + let normalized_file_path = relative_path.to_string_lossy().replace("\\", "/"); + rattler_digest::digest::Update::update(&mut hasher, normalized_file_path.as_bytes()); + + #[cfg(test)] + matching_files.push(normalized_file_path); + + // Concatenate the contents of the file to the hash. + File::open(entry.path()) + .and_then(|mut file| normalize_line_endings(&mut file, &mut hasher)) + .map_err(move |e| { + GlobHashError::NormalizeLineEnds(entry.path().to_path_buf(), e) + })?; + } + + let hash = hasher.finalize(); + + Ok(Self { + hash, + #[cfg(test)] + matching_files, + }) + } +} + +/// This function copies the contents of the reader to the writer but normalizes +/// the line endings (e.g. replaces `\r\n` with `\n`) in text files. +fn normalize_line_endings(reader: &mut R, writer: &mut W) -> io::Result<()> { + let mut reader = io::BufReader::new(reader); + + // Check if binary by looking for null bytes + let buffer = reader.fill_buf()?; + if buffer.contains(&0) { + std::io::copy(&mut reader, writer)?; + return Ok(()); + } + + let mut pending_cr = false; + + loop { + let buffer = reader.fill_buf()?; + if buffer.is_empty() { + break; + } + + let mut written_to = 0; + + for (i, &byte) in buffer.iter().enumerate() { + if byte == b'\r' { + // Flush any previous pending \r (it was standalone) + if pending_cr { + writer.write_all(b"\r")?; + } + // Write everything up to this \r + writer.write_all(&buffer[written_to..i])?; + written_to = i + 1; + pending_cr = true; + } else if byte == b'\n' { + // Write everything up to this \n + writer.write_all(&buffer[written_to..i])?; + written_to = i + 1; + // Write \n - pending \r is discarded (normalizes \r\n → \n) + writer.write_all(b"\n")?; + pending_cr = false; + } else if pending_cr { + // Previous \r was standalone, write it now + writer.write_all(b"\r")?; + pending_cr = false; + } + } + + // Write remaining data in buffer + writer.write_all(&buffer[written_to..])?; + + let len = buffer.len(); + reader.consume(len); + } + + // Handle trailing \r at EOF + if pending_cr { + writer.write_all(b"\r")?; + } + + Ok(()) +} + +#[cfg(test)] +mod test { + use std::path::Path; + + use itertools::Itertools; + use rstest::*; + + use super::*; + + #[fixture] + pub fn testname() -> String { + let thread_name = std::thread::current().name().unwrap().to_string(); + let test_name = thread_name.rsplit("::").next().unwrap_or(&thread_name); + format!("glob_hash_{test_name}") + } + + #[rstest] + #[case::satisfiability(vec!["tests/data/satisfiability/source-dependency/**/*"])] + #[case::satisfiability_ignore_lock(vec!["tests/data/satisfiability/source-dependency/**/*", "!tests/data/satisfiability/source-dependency/**/*.lock"])] + #[case::non_glob(vec!["tests/data/satisfiability/source-dependency/pixi.toml"])] + fn test_input_hash(testname: String, #[case] globs: Vec<&str>) { + let root_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(Path::parent) + .unwrap(); + let glob_hash = GlobHash::from_patterns(root_dir, globs.iter().copied()).unwrap(); + let snapshot = format!( + "Globs:\n{}\nHash: {:x}\nMatched files:\n{}", + globs + .iter() + .format_with("\n", |glob, f| f(&format_args!("- {glob}"))), + glob_hash.hash, + glob_hash + .matching_files + .iter() + .format_with("\n", |glob, f| f(&format_args!("- {glob}"))) + ); + insta::assert_snapshot!(testname, snapshot); + } + + #[test] + fn test_normalize_line_endings() { + let input = + "\rHello\r\nWorld\r\nYou are the best\nThere is no-one\r\r \rlike you.\r".repeat(8196); + let mut normalized: Vec = Vec::new(); + normalize_line_endings(&mut input.as_bytes(), &mut normalized).unwrap(); + let output = String::from_utf8(normalized).unwrap(); + assert_eq!(output, input.replace("\r\n", "\n")); + } + + /// A reader that returns data in small chunks, used to test buffer boundary behavior. + struct ChunkedReader<'a> { + data: &'a [u8], + chunk_size: usize, + } + + impl<'a> Read for ChunkedReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let len = std::cmp::min(self.chunk_size, std::cmp::min(buf.len(), self.data.len())); + if len == 0 { + return Ok(0); + } + buf[..len].copy_from_slice(&self.data[..len]); + self.data = &self.data[len..]; + Ok(len) + } + } + + #[test] + fn test_crlf_spanning_buffer_boundary() { + // Test case where \r\n spans across buffer boundaries. + // We use a chunk size of 1 to ensure \r and \n are in different reads. + let input = b"Hello\r\nWorld\r\nEnd"; + let mut reader = ChunkedReader { + data: input, + chunk_size: 1, // Force each byte to be read separately + }; + let mut output: Vec = Vec::new(); + normalize_line_endings(&mut reader, &mut output).unwrap(); + assert_eq!(output, b"Hello\nWorld\nEnd"); + } + + #[test] + fn test_standalone_cr_across_boundary() { + // Test that standalone \r (not followed by \n) is preserved even across boundaries. + let input = b"Hello\rWorld"; + let mut reader = ChunkedReader { + data: input, + chunk_size: 1, + }; + let mut output: Vec = Vec::new(); + normalize_line_endings(&mut reader, &mut output).unwrap(); + assert_eq!(output, b"Hello\rWorld"); + } + + #[test] + fn test_cr_at_end_of_input() { + // Test that \r at the very end of input is preserved. + let input = b"Hello\r"; + let mut reader = ChunkedReader { + data: input, + chunk_size: 1, + }; + let mut output: Vec = Vec::new(); + normalize_line_endings(&mut reader, &mut output).unwrap(); + assert_eq!(output, b"Hello\r"); + } +} diff --git a/crates/rattler_glob/src/glob_hash_cache.rs b/crates/rattler_glob/src/glob_hash_cache.rs new file mode 100644 index 000000000..7c146948f --- /dev/null +++ b/crates/rattler_glob/src/glob_hash_cache.rs @@ -0,0 +1,132 @@ +//! This module contains the `GlobHashCache` struct which is used to cache the computation of glob hashes. This cache is an in-process cache +//! so it's purpose is to re-use computed hashes across multiple calls to the same glob hash computation for the same set of input files. +//! The input files are deemed not to change between calls. +use std::{ + collections::BTreeSet, + convert::identity, + hash::Hash, + path::PathBuf, + sync::{Arc, Weak}, +}; + +use dashmap::{DashMap, Entry}; +use tokio::sync::broadcast; + +use super::{GlobHash, GlobHashError}; + +/// A key for the cache of glob hashes. +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct GlobHashKey { + /// The root directory of the glob patterns. + root: PathBuf, + /// The glob patterns. + globs: BTreeSet, +} + +impl GlobHashKey { + /// Creates a new `GlobHashKey` from the given root directory and glob patterns. + /// + /// If the provided `root` is a file, its parent directory will be used instead. + /// If the file has no parent (e.g., a root path like `/` or `C:\`), the path + /// is used unchanged. + pub fn new(root: impl Into, globs: BTreeSet) -> Self { + let mut root = root.into(); + // Ensure that `root` points to a directory, not a file. + if root.is_file() { + if let Some(parent) = root.parent() { + root = parent.to_owned(); + } + } + + Self { root, globs } + } +} + +#[derive(Debug)] +enum HashCacheEntry { + /// The value is currently being computed. + Pending(Weak>), + + /// We have a value for this key. + Done(GlobHash), +} + +/// An object that caches the computation of glob hashes. It deduplicates +/// requests for the same hash. +/// +/// Its is safe and efficient to use this object from multiple threads. +#[derive(Debug, Default, Clone)] +pub struct GlobHashCache { + cache: Arc>, +} + +impl GlobHashCache { + /// Computes the input hash of the given key. If the hash is already in the + /// cache, it will return the cached value. If the hash is not in the + /// cache, it will compute the hash (deduplicating any request) and return + /// it. + pub async fn compute_hash(&self, key: GlobHashKey) -> Result { + match self.cache.entry(key.clone()) { + Entry::Vacant(entry) => { + // Construct a channel over which we will be sending the result and store it in + // the map. If another requests comes in for the same hash it will find this + // entry. + let (tx, _) = broadcast::channel(1); + let tx = Arc::new(tx); + let weak_tx = Arc::downgrade(&tx); + entry.insert(HashCacheEntry::Pending(weak_tx)); + + // Spawn the computation of the hash + let computation_key = key.clone(); + let result = tokio::task::spawn_blocking(move || { + GlobHash::from_patterns( + &computation_key.root, + computation_key.globs.iter().map(String::as_str), + ) + }) + .await + .map_or_else( + |err| match err.try_into_panic() { + Ok(panic) => std::panic::resume_unwind(panic), + Err(_) => Err(GlobHashError::Cancelled), + }, + identity, + )?; + + // Store the result in the cache + self.cache.insert(key, HashCacheEntry::Done(result.clone())); + + // Broadcast the result, ignore the error. If the receiver is dropped, we don't + // care. + let _ = tx.send(result.clone()); + + Ok(result) + } + Entry::Occupied(entry) => { + match entry.get() { + HashCacheEntry::Pending(weak_tx) => { + let sender = weak_tx.clone(); + let mut subscriber = sender + .upgrade() + .ok_or(GlobHashError::Cancelled)? + .subscribe(); + drop(entry); + subscriber + .recv() + .await + .map_err(|_recv_error| GlobHashError::Cancelled) + } + HashCacheEntry::Done(hash) => { + // We have a value for this key. + Ok(hash.clone()) + } + } + } + } + } + + /// Clears all memoized glob hashes. In-flight computations are unaffected. + pub fn clear(&self) { + self.cache.clear(); + } +} diff --git a/crates/rattler_glob/src/glob_mtime.rs b/crates/rattler_glob/src/glob_mtime.rs new file mode 100644 index 000000000..5c3a22b85 --- /dev/null +++ b/crates/rattler_glob/src/glob_mtime.rs @@ -0,0 +1,204 @@ +use std::{ + path::{Path, PathBuf}, + time::SystemTime, +}; + +use thiserror::Error; + +use crate::{GlobSet, GlobSetError}; + +/// Contains the newest modification time for the files that match the given glob patterns. +#[derive(Debug, Clone)] +pub enum GlobModificationTime { + /// No files matched the given glob patterns. + NoMatches, + /// Files matched the glob patterns, and this variant contains the newest modification time and designated file. + MatchesFound { + /// The newest modification time for the files that match the given glob patterns. + modified_at: SystemTime, + /// The designated file with the newest modification time. + designated_file: PathBuf, + }, +} + +/// Errors that can occur when computing glob modification times. +#[derive(Error, Debug)] +pub enum GlobModificationTimeError { + /// Failed to retrieve the modification time for a matched file. + #[error("error calculating modification time for {}", .0.display())] + CalculateMTime(PathBuf, #[source] std::io::Error), + + /// An error occurred while building or walking the glob set. + #[error(transparent)] + GlobSetIgnore(#[from] GlobSetError), +} + +impl GlobModificationTime { + /// Returns the modification time if files were found, or `None` if no files matched. + pub fn modified_at(&self) -> Option { + match self { + Self::NoMatches => None, + Self::MatchesFound { modified_at, .. } => Some(*modified_at), + } + } + + /// Returns the path of the file with the newest modification time, or `None` if no files matched. + pub fn file(&self) -> Option<&Path> { + match self { + Self::NoMatches => None, + Self::MatchesFound { + designated_file, .. + } => Some(designated_file), + } + } + + /// Returns `true` if any files matched the glob patterns. + pub fn is_found(&self) -> bool { + matches!(self, Self::MatchesFound { .. }) + } + + /// Calculate the newest modification time for the files that match the given glob patterns. + /// + /// This function walks the directory tree starting from `root_dir`, finds all files + /// matching the provided glob patterns, and returns the newest modification time + /// along with the path of that file. + /// + /// # Arguments + /// * `root_dir` - The root directory to search from + /// * `globs` - An iterator of glob patterns (supports gitignore-style syntax) + /// + /// # Returns + /// A `GlobModificationTime` indicating whether files were found and their newest + /// modification time, or an error if the operation failed. + /// + /// # Example + /// ```no_run + /// use rattler_glob::GlobModificationTime; + /// use std::path::Path; + /// + /// let mtime = GlobModificationTime::from_patterns( + /// Path::new("/my/project"), + /// ["src/**/*.rs"], + /// ).unwrap(); + /// + /// if let Some(time) = mtime.modified_at() { + /// println!("Newest file modified at: {:?}", time); + /// } + /// ``` + pub fn from_patterns<'a>( + root_dir: &Path, + globs: impl IntoIterator, + ) -> Result { + // Delegate to the ignore-based implementation for performance. + Self::from_patterns_ignore(root_dir, globs) + } + + /// Same as `from_patterns` but uses the `ignore` crate for walking/matching. + pub fn from_patterns_ignore<'a>( + root_dir: &Path, + globs: impl IntoIterator, + ) -> Result { + // Normalize root to a directory if a file was passed. + let mut root = root_dir.to_owned(); + if !root.is_dir() { + root.pop(); + } + + let glob_set = GlobSet::create(globs)?; + let entries = glob_set.collect_matching(root_dir)?; + + let mut latest = None; + let mut designated_file = PathBuf::new(); + + for entry in entries { + let matched_path = entry.path().to_path_buf(); + let md = match entry.metadata() { + Ok(md) => md, + Err(e) => { + return Err(GlobModificationTimeError::CalculateMTime( + matched_path, + std::io::Error::other(e.to_string()), + )); + } + }; + let modified = md + .modified() + .map_err(|e| GlobModificationTimeError::CalculateMTime(matched_path.clone(), e))?; + + if latest.is_some_and(|cur| cur >= modified) { + continue; + } + latest = Some(modified); + designated_file = matched_path; + } + + match latest { + Some(modified_at) => Ok(Self::MatchesFound { + modified_at, + designated_file, + }), + None => Ok(Self::NoMatches), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::File; + use std::time::{Duration, SystemTime}; + use tempfile::tempdir; + + #[test] + fn test_glob_modification_time() { + // Create a temporary directory + let temp_dir = tempdir().unwrap(); + let dir_path = temp_dir.path(); + + // Two minutes ago + let now = SystemTime::now() - Duration::from_secs(120); + + // Create files with different modification times + let files = [ + // Three minutes ago + ("file1.txt", now - Duration::from_secs(60)), + // Two minutes ago + ("file2.txt", now), + // One minute ago <- should select this + ("file3.txt", now + Duration::from_secs(60)), + ]; + + // Create files with different modification times + for (name, mtime) in files { + let path = dir_path.join(name); + File::create(&path).unwrap().set_modified(mtime).unwrap(); + } + + // Use glob patterns to match `.txt` files + let glob_mod_time = GlobModificationTime::from_patterns(dir_path, ["*.txt"]).unwrap(); + + match glob_mod_time { + GlobModificationTime::MatchesFound { + modified_at, + designated_file, + } => { + // Assert that the designated file is `file3.txt` with the latest modification time + assert_eq!(designated_file, dir_path.join("file3.txt")); + assert_eq!(modified_at, now + Duration::from_secs(60)); + } + GlobModificationTime::NoMatches => panic!("Expected matches but found none"), + } + } + + #[test] + fn test_glob_modification_time_no_matches() { + // Create a temporary directory + let temp_dir = tempdir().unwrap(); + let dir_path = temp_dir.path(); + + // Use glob patterns that match no files + let glob_mod_time = GlobModificationTime::from_patterns(dir_path, ["*.md"]).unwrap(); + + assert!(matches!(glob_mod_time, GlobModificationTime::NoMatches)); + } +} diff --git a/crates/rattler_glob/src/glob_set/mod.rs b/crates/rattler_glob/src/glob_set/mod.rs new file mode 100644 index 000000000..543153e6e --- /dev/null +++ b/crates/rattler_glob/src/glob_set/mod.rs @@ -0,0 +1,356 @@ +//! Convenience wrapper around `ignore` that provides glob matching with intuitive semantics. +//! +//! This module provides [`GlobSet`], which matches files using gitignore-style patterns but with +//! behavioral tweaks that make it more intuitive for typical use cases. +//! +//! # Behavioral Differences from Standard Gitignore +//! +//! ## Pattern Rebasing +//! +//! Patterns containing `..` components (e.g., `../src/*.rs`) are automatically rebased to work +//! from a common ancestor directory. This allows patterns to reference files outside the immediate +//! search root while still using a single efficient directory walker. +//! +//! For example, searching from `/project/subdir` with patterns `["../src/*.rs", "*.txt"]`: +//! - The walker starts from `/project` (the **effective walk root**) +//! - `../src/*.rs` becomes `src/*.rs` +//! - `*.txt` becomes `subdir/*.txt` +//! +//! See the [`walk_root`] module for implementation details. +//! +//! ## Global Exclusions +//! +//! Negated patterns starting with `**/` (e.g., `!**/build.rs`) are treated as global exclusions +//! and skip rebasing. This ensures `!**/build.rs` excludes every `build.rs` file regardless of +//! where the effective root ends up. +//! +//! ## Anchored Literals +//! +//! Plain file names without glob metacharacters (e.g., `config.toml`) are anchored to the search +//! root, matching only at that location rather than anywhere in the tree. This differs from +//! standard gitignore behavior where unanchored patterns match at any depth. +//! +//! Similarly, negated literals (e.g., `!config.toml`) only exclude the file at the root, not +//! copies in subdirectories. + +mod walk; +mod walk_root; + +use std::path::{Path, PathBuf}; + +use thiserror::Error; + +use walk_root::{WalkRoot, WalkRootsError}; + +/// A glob set implemented using the `ignore` crate (globset + fast walker). +pub struct GlobSet { + /// Include patterns (gitignore-style), without leading '!'. + walk_roots: WalkRoot, +} + +/// Errors that can occur when creating or walking a glob set. +#[derive(Error, Debug)] +pub enum GlobSetError { + /// Failed to build the glob override patterns. + #[error("failed to build globs")] + BuildOverrides(#[source] ignore::Error), + + /// An error occurred while walking the directory tree. + #[error("walk error at {0}")] + Walk(PathBuf, #[source] ignore::Error), + + /// An error occurred while building the walk roots from glob patterns. + #[error(transparent)] + WalkRoots(#[from] WalkRootsError), +} + +impl GlobSet { + /// Create a new [`GlobSet`] from a list of patterns. Leading '!' indicates exclusion. + /// + /// # Errors + /// Returns a [`GlobSetError`] if the glob patterns are invalid. + pub fn create<'t>(globs: impl IntoIterator) -> Result { + Ok(GlobSet { + walk_roots: WalkRoot::build(globs)?, + }) + } + + /// Walks files matching all include/exclude patterns using a single parallel walker. + /// Returns a flat Vec of results to keep lifetimes simple and predictable. + pub fn collect_matching(&self, root_dir: &Path) -> Result, GlobSetError> { + if self.walk_roots.is_empty() { + return Ok(vec![]); + } + + let rebased = self.walk_roots.rebase(root_dir)?; + walk::walk_globs(&rebased.root, &rebased.globs) + } +} + +#[cfg(test)] +mod tests { + use super::GlobSet; + use fs_err::{self as fs, File}; + use insta::assert_yaml_snapshot; + use std::path::{Path, PathBuf}; + use tempfile::tempdir; + + fn relative_path(path: &Path, root: &Path) -> PathBuf { + if let Ok(rel) = path.strip_prefix(root) { + return rel.to_path_buf(); + } + if let Some(parent) = root.parent() { + if let Ok(rel) = path.strip_prefix(parent) { + return std::path::Path::new("..").join(rel); + } + } + path.to_path_buf() + } + + fn sorted_paths(entries: Vec, root: &std::path::Path) -> Vec { + let mut paths: Vec<_> = entries + .into_iter() + .map(|entry| { + relative_path(entry.path(), root) + .display() + .to_string() + .replace('\\', "/") + }) + .collect(); + paths.sort(); + paths + } + + // Test out a normal non-reseated globbing approach + #[test] + fn collect_matching_inclusion_exclusion() { + let temp_dir = tempdir().unwrap(); + let root_path = temp_dir.path(); + + File::create(root_path.join("include1.txt")).unwrap(); + File::create(root_path.join("include2.log")).unwrap(); + File::create(root_path.join("exclude.txt")).unwrap(); + fs::create_dir(root_path.join("subdir")).unwrap(); + File::create(root_path.join("subdir/include_subdir.txt")).unwrap(); + + let glob_set = GlobSet::create(vec!["**/*.txt", "!exclude.txt"]).unwrap(); + let entries = glob_set.collect_matching(root_path).unwrap(); + + let paths = sorted_paths(entries, root_path); + assert_yaml_snapshot!(paths, @r###" + - include1.txt + - subdir/include_subdir.txt + "###); + } + + // Check some general globbing support and make sure the correct things do not match + #[test] + fn collect_matching_relative_globs() { + let temp_dir = tempdir().unwrap(); + let root_path = temp_dir.path(); + let search_root = root_path.join("workspace"); + fs::create_dir(&search_root).unwrap(); + + fs::create_dir(root_path.join("subdir")).unwrap(); + File::create(root_path.join("subdir/some_inner_source.cpp")).unwrap(); + File::create(root_path.join("subdir/dont-match.txt")).unwrap(); + File::create(search_root.join("match.txt")).unwrap(); + + let glob_set = GlobSet::create(vec!["../**/*.cpp", "*.txt"]).unwrap(); + let entries = glob_set.collect_matching(&search_root).unwrap(); + + let paths = sorted_paths(entries, &search_root); + assert_yaml_snapshot!(paths, @r###" + - "../subdir/some_inner_source.cpp" + - match.txt + "###); + } + + // Check that single matching file glob works with rebasing + #[test] + fn collect_matching_file_glob() { + let temp_dir = tempdir().unwrap(); + let root_path = temp_dir.path().join("workspace"); + fs::create_dir(&root_path).unwrap(); + + File::create(root_path.join("pixi.toml")).unwrap(); + + let glob_set = GlobSet::create(vec!["pixi.toml", "../*.cpp"]).unwrap(); + let entries = glob_set.collect_matching(&root_path).unwrap(); + + let paths = sorted_paths(entries, &root_path); + assert_yaml_snapshot!(paths, @"- pixi.toml"); + } + + // Check that global ignores !**/ patterns ignore everything even if the root has been + // rebased to a parent folder, this is just a convenience assumed to be preferable + // from a user standpoint + #[test] + fn check_global_ignore_ignores() { + let temp_dir = tempdir().unwrap(); + let root_path = temp_dir.path().join("workspace"); + fs::create_dir(&root_path).unwrap(); + + File::create(root_path.join("pixi.toml")).unwrap(); + File::create(root_path.join("foo.txt")).unwrap(); + // This would be picked up otherwise + File::create(temp_dir.path().join("foo.txt")).unwrap(); + + let glob_set = GlobSet::create(vec!["pixi.toml", "!**/foo.txt"]).unwrap(); + let entries = glob_set.collect_matching(&root_path).unwrap(); + + let paths = sorted_paths(entries, &root_path); + assert_yaml_snapshot!(paths, @"- pixi.toml"); + } + + // Check that we can ignore a subset of file when using the rebasing + // So we want to match all `.txt` and `*.toml` files except in the root location + // where want to exclude `foo.txt` + #[test] + fn check_subset_ignore() { + let temp_dir = tempdir().unwrap(); + let root_path = temp_dir.path().join("workspace"); + fs::create_dir(&root_path).unwrap(); + + File::create(root_path.join("pixi.toml")).unwrap(); + // This should not be picked up + File::create(root_path.join("foo.txt")).unwrap(); + // But because of the non-global ignore this should be + File::create(temp_dir.path().join("foo.txt")).unwrap(); + + let glob_set = GlobSet::create(vec!["../*.{toml,txt}", "!foo.txt"]).unwrap(); + let entries = glob_set.collect_matching(&root_path).unwrap(); + + let paths = sorted_paths(entries, &root_path); + assert_yaml_snapshot!(paths, @r###" + - "../foo.txt" + - pixi.toml + "###); + } + + #[test] + fn check_we_ignore_hidden_files() { + let temp_dir = tempdir().unwrap(); + let root_path = temp_dir.path().join("workspace"); + fs::create_dir(&root_path).unwrap(); + + let hidden_pixi_folder = root_path.join(".pixi"); + + fs::create_dir(&hidden_pixi_folder).unwrap(); + // This should not be picked up + File::create(hidden_pixi_folder.join("foo_hidden.txt")).unwrap(); + // But because of the non-global ignore this should be + File::create(root_path.as_path().join("foo_public.txt")).unwrap(); + + let glob_set = GlobSet::create(vec!["*.txt"]).unwrap(); + let entries = glob_set.collect_matching(&root_path).unwrap(); + + let paths = sorted_paths(entries, &root_path); + assert_yaml_snapshot!(paths, @"- foo_public.txt"); + } + + #[test] + fn check_hidden_folders_are_included() { + let temp_dir = tempdir().unwrap(); + let root_path = temp_dir.path().join("workspace"); + fs::create_dir(&root_path).unwrap(); + + let hidden_pixi_folder = root_path.join(".pixi"); + + let hidden_foobar_folder = root_path.join(".foobar"); + + let hidden_recursive_folder = root_path + .join("recursive") + .join("foobar") + .join(".deep_hidden"); + + fs::create_dir(&hidden_pixi_folder).unwrap(); + fs::create_dir(&hidden_foobar_folder).unwrap(); + fs::create_dir_all(&hidden_recursive_folder).unwrap(); + + File::create(hidden_pixi_folder.join("foo_hidden.txt")).unwrap(); + File::create(hidden_foobar_folder.as_path().join("foo_from_foobar.txt")).unwrap(); + File::create(hidden_foobar_folder.as_path().join("build.txt")).unwrap(); + + File::create(hidden_recursive_folder.join("foo_from_deep_hidden.txt")).unwrap(); + + File::create(root_path.as_path().join("some_text.txt")).unwrap(); + let glob_set = GlobSet::create(vec![ + "**", + ".foobar/foo_from_foobar.txt", + "**/.deep_hidden/**", + ]) + .unwrap(); + + let entries = glob_set.collect_matching(&root_path).unwrap(); + + let paths = sorted_paths(entries, &root_path); + assert_yaml_snapshot!(paths, @r#" + - ".foobar/foo_from_foobar.txt" + - recursive/foobar/.deep_hidden/foo_from_deep_hidden.txt + - some_text.txt + "#); + } + + #[test] + fn check_hidden_folder_is_whitelisted_with_star() { + let temp_dir = tempdir().unwrap(); + let root_path = temp_dir.path().join("workspace"); + fs::create_dir(&root_path).unwrap(); + + let hidden_pixi_folder = root_path.join(".pixi").join("subdir"); + + fs::create_dir_all(&hidden_pixi_folder).unwrap(); + + File::create(hidden_pixi_folder.join("foo_hidden.txt")).unwrap(); + + File::create(root_path.as_path().join("some_text.txt")).unwrap(); + let glob_set = GlobSet::create(vec![".pixi/subdir/**"]).unwrap(); + + let entries = glob_set.collect_matching(&root_path).unwrap(); + + let paths = sorted_paths(entries, &root_path); + assert_yaml_snapshot!(paths, @r###"- ".pixi/subdir/foo_hidden.txt""###); + } + + #[test] + fn check_hidden_folders_are_not_included() { + let temp_dir = tempdir().unwrap(); + let root_path = temp_dir.path().join("workspace"); + fs::create_dir(&root_path).unwrap(); + + let hidden_pixi_folder = root_path.join(".pixi"); + + fs::create_dir(&hidden_pixi_folder).unwrap(); + + File::create(hidden_pixi_folder.join("foo_hidden.txt")).unwrap(); + + File::create(root_path.as_path().join("some_text.txt")).unwrap(); + // We want to match everything except hidden folders + let glob_set = GlobSet::create(vec!["**"]).unwrap(); + + let entries = glob_set.collect_matching(&root_path).unwrap(); + + let paths = sorted_paths(entries, &root_path); + assert_yaml_snapshot!(paths, @"- some_text.txt"); + } + + /// Because we are using ignore which uses gitignore style parsing of globs we need to do some extra processing + /// to make this more like unix globs in this case we check this explicitly here + #[test] + fn single_file_match() { + let temp_dir = tempdir().unwrap(); + let workspace = temp_dir.path().join("workspace"); + fs::create_dir(&workspace).unwrap(); + let subdir = workspace.join("subdir"); + fs::create_dir(&subdir).unwrap(); + + File::create(subdir.join("pixi.toml")).unwrap(); + + let glob_set = GlobSet::create(vec!["pixi.toml"]).unwrap(); + let entries = glob_set.collect_matching(&workspace).unwrap(); + + let paths = sorted_paths(entries, &workspace); + assert_yaml_snapshot!(paths, @"[]"); + } +} diff --git a/crates/rattler_glob/src/glob_set/walk.rs b/crates/rattler_glob/src/glob_set/walk.rs new file mode 100644 index 000000000..8b8e2bde6 --- /dev/null +++ b/crates/rattler_glob/src/glob_set/walk.rs @@ -0,0 +1,396 @@ +//! Contains the directory walking implementation +use itertools::Itertools; +use parking_lot::Mutex; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use crate::glob_set::walk_root::SimpleGlob; + +use super::GlobSetError; + +type SharedResults = Arc>>>>; + +struct CollectBuilder { + // Shared aggregation storage wrapped in an Option so we can `take` at the end. + sink: SharedResults, + // The root we are walking, used for error reporting + err_root: PathBuf, +} + +struct CollectVisitor { + // Local per-thread buffer to append results without holding the lock. + local: Vec>, + // Reference to the shared sink. + sink: SharedResults, + // The root we are walking, used for error reporting + err_root: PathBuf, +} + +impl Drop for CollectVisitor { + // This merges the outputs on the drop + fn drop(&mut self) { + let mut sink = self.sink.lock(); + sink.get_or_insert_with(Vec::new).append(&mut self.local); + } +} + +impl<'s> ignore::ParallelVisitorBuilder<'s> for CollectBuilder { + fn build(&mut self) -> Box { + // Build a visitor that maintains an internal list + Box::new(CollectVisitor { + local: Vec::new(), + sink: Arc::clone(&self.sink), + err_root: self.err_root.clone(), + }) + } +} + +impl ignore::ParallelVisitor for CollectVisitor { + /// This function loops over all matches, ignores directories, and ignores `PermissionDenied` and + /// `NotFound` errors. + fn visit(&mut self, dir_entry: Result) -> ignore::WalkState { + match dir_entry { + Ok(dir_entry) => { + if dir_entry.file_type().is_some_and(|ft| ft.is_dir()) { + return ignore::WalkState::Continue; + } + self.local.push(Ok(dir_entry)); + } + Err(e) => { + if let Some(ioe) = e.io_error() { + match ioe.kind() { + std::io::ErrorKind::NotFound | std::io::ErrorKind::PermissionDenied => {} + _ => self + .local + .push(Err(GlobSetError::Walk(self.err_root.clone(), e))), + } + } else { + self.local + .push(Err(GlobSetError::Walk(self.err_root.clone(), e))); + } + } + } + ignore::WalkState::Continue + } +} + +/// Walk over the globs in the specific root +pub fn walk_globs( + effective_walk_root: &Path, + globs: &[SimpleGlob], +) -> Result, GlobSetError> { + let mut ob = ignore::overrides::OverrideBuilder::new(effective_walk_root); + let glob_patterns = globs + .iter() + .map(|g| anchor_literal_pattern(g.to_pattern())) + .collect_vec(); + + // Always add ignore hidden folders unless the user explicitly included them + // because we add patterns as overrides, which overrides any `WalkBuilder` settings. + let ignore_patterns = set_ignore_hidden_patterns(&glob_patterns); + + for provided_pattern in &glob_patterns { + ob.add(provided_pattern) + .map_err(GlobSetError::BuildOverrides)?; + } + + let enable_ignoring_hidden = if let Some(ref patterns) = ignore_patterns { + // If we added negated patterns for hidden folders, we want to allow searching through hidden folders + // unless the user explicitly included them + tracing::debug!("Adding ignore patterns for hidden folders: {:?}", patterns); + for pattern in patterns { + ob.add(pattern).map_err(GlobSetError::BuildOverrides)?; + } + false + } else { + true + }; + + let overrides = ob.build().map_err(GlobSetError::BuildOverrides)?; + + let mut builder = ignore::WalkBuilder::new(effective_walk_root); + + let walker_builder = builder + .git_ignore(false) + .git_exclude(true) + .hidden(enable_ignoring_hidden) + .git_global(false) + .ignore(false) + .overrides(overrides) + .build_parallel(); + + let collected: SharedResults = Arc::new(Mutex::new(Some(Vec::new()))); + let start = std::time::Instant::now(); + + let mut builder = CollectBuilder { + sink: Arc::clone(&collected), + err_root: effective_walk_root.to_path_buf(), + }; + walker_builder.visit(&mut builder); + + let results = collected.lock().take().unwrap_or_default(); + + // Log some statistics as long as we are unsure with regards to performance + let matched = results.len(); + let elapsed = start.elapsed(); + let (include, excludes): (Vec<_>, Vec<_>) = globs.iter().partition(|g| !g.is_negated()); + let include_patterns = include.iter().map(|g| g.to_pattern()).join(", "); + let exclude_patterns = excludes.iter().map(|g| g.to_pattern()).join(", "); + + tracing::debug!( + include = include_patterns, + excludes = exclude_patterns, + matched, + elapsed_ms = elapsed.as_millis(), + root = ?effective_walk_root, + "glob pass completed" + ); + + results.into_iter().collect() +} + +/// Ensures plain file names behave as "current directory" matches for the ignore crate. +/// +/// Gitignore syntax treats bare literals (e.g. `pixi.toml`) as "match anywhere below the root". +/// To keep parity with the previous wax-based globbing, which treated them like Unix globs anchored +/// to the working directory, we prepend a `/` so the override only applies at the search root. +/// Literals are anchored whether they are positive or negated—`foo` matches only the root file and +/// `!foo` excludes only that file—while anything containing meta characters or directory separators +/// is left untouched and keeps gitignore semantics. +fn anchor_literal_pattern(pattern: String) -> String { + fn needs_anchor(body: &str) -> bool { + if body.is_empty() { + return false; + } + // These will not occur when used in conjunction with GlobWalkRoot, but lets keep + // them for if this is not used in conjunction with these + if body.starts_with("./") || body.starts_with('/') || body.starts_with("../") { + return false; + } + if body.contains('/') { + return false; + } + if body.chars().any(|c| matches!(c, '*' | '?' | '[' | '{')) { + return false; + } + true + } + + let (negated, body) = if let Some(rest) = pattern.strip_prefix('!') { + (true, rest) + } else { + (false, pattern.as_str()) + }; + + if needs_anchor(body) { + let mut anchored = String::with_capacity(pattern.len() + 2); + if negated { + anchored.push('!'); + } + anchored.push('/'); + anchored.push_str(body); + anchored + } else { + pattern + } +} + +/// Ensures that hidden folders (starting with a dot) are always ignored unless explicitly included. +/// The ones that are requested are added back as a whitelist. +/// The initial problem was that when using glob like: `**` ( which means include everything ) +/// overrides our `WalkerBuilder` setting, where we explicitly ignore hidden folders. +/// Imagine a user-provided globs like this: +/// +/// ```text +/// "**", ".foo/bar.txt" +/// ``` +/// To make it work, we need first to ignore all hidden folders after users' globs, so it becomes like this: +/// ```text +/// "**", ".foo/bar.txt" "!{**/.*, .*, .**/*}" +/// ``` +/// +/// Then, we need to whitelist the `.foo` folder (treat it as a special glob, we don't know why, just re-adding back `.foo/bar.txt` doesn't work ) +/// Ignore everything from foo: `"!.foo/*"`, and then `whitelist` the `.foo/bar.txt` again. +/// So the final globs will look like this: +/// +/// ```text +/// ["**", ".foo/bar.txt", "!{**/.*, .*, .**/*}", ".foo", "!.foo/*", ".foo/bar.txt"] +/// ``` +/// +/// This is a special use case, when the user combines ** with some hidden folders. +/// Otherwise ( in case of ** ), we just ignore every hidden folder +/// Or in case of requesting a simple hidden folder, it will search just for it without any additional negation patterns. +pub fn set_ignore_hidden_patterns(patterns: &[String]) -> Option> { + // Detect if user explicitly included hidden folders + // e.g. ".*", "**/.*", ".foobar/*", "**/.deep_hidden/**", etc. + let user_includes_hidden = patterns.iter().any(|p| { + // Check if pattern starts with a dot (whitelist) + p.starts_with('.') || + // Check if pattern contains a hidden folder path component + p.contains("/.") && !p.starts_with("!.") + }); + + // Check if negation patterns for all hidden files/folders already exist + let has_negation_for_all_folders = patterns.iter().any(|p| p.starts_with("!**/.*")); + + let requested_everything = patterns + .iter() + .any(|p| p == "**" || p == "./**" || p == "**/*" || p == "./**/*"); + + if has_negation_for_all_folders { + // If user negated all hidden folders, we do not need to add anything + return None; + } + + let search_all_hidden = patterns + .iter() + .any(|p| p == ".*" || p == ".**" || p == "**/.*" || p == "./.*" || p == ".**/*"); + + tracing::debug!( + user_includes_hidden, + has_negation_for_all_folders, + search_all_hidden, + requested_everything, + "Determining hidden folder handling: ", + ); + + // If user requested searching through hidden folders, + // we allow searching them all and don't add any negation patterns + if search_all_hidden { + return patterns.to_vec().into(); + } + + // If user has explicitly included hidden folders and no negation exists, + // add the negation pattern at the end, then whitelist specific folders + // Example: + // Input: ["**", ".foo/bar.txt"] + // Output: ["**", ".foo", "!{**/.*, .*, .**/*}", ".foo", "!.foo/*", ".foo/bar.txt"] + // This is because `ignore` globs work as a whitelist ignore + // so first, we need to ignore all hidden files/folders, + // then add back the requested ones ( just the folder name, for some reason we don't know why .foo/bar.txt doesn't work ) + // then ignore all its contents, then add back the specific file. + // This is a special case only when the user asks for all folders/files ( ** glob), which overrides all WalkBuilder settings + // or user requested hidden folders explicitly + if requested_everything || (user_includes_hidden && !has_negation_for_all_folders) { + let mut result = patterns.to_vec(); + + // result.push("!{**/.*, .*, .**/*}".to_string()); + result.push("!{**/.*, .*, .**/*}".to_string()); + + // Now add back any explicitly whitelisted hidden folders/files + for pattern in patterns { + if (pattern.starts_with('.') || pattern.contains("/.")) && !pattern.starts_with("!.") { + // Check if this is a specific file path (not a glob pattern) + let is_specific_file = !pattern.contains('*') + && !pattern.contains('?') + && !pattern.contains('[') + && pattern.contains('/'); + + if is_specific_file { + // Transform specific file paths: .nichita/foo.txt + if let Some(last_slash) = pattern.rfind('/') { + let dir = &pattern[..last_slash]; + + // Add: directory, negation of all its contents, then the specific file + result.push(dir.to_string()); + + let negate_all = format!("!{dir}/*"); + result.push(negate_all); + + // Always re-add the specific file pattern at the end + result.push(pattern.clone()); + } + } else { + // Extract the hidden folder name from patterns like: + // ".pixi/*" -> ".pixi" + // "**/.deep_pixi/**" -> ".deep_pixi" + // ".build/CMakeFiles/**" -> ".build" + let hidden_folder = if pattern.starts_with('.') { + // Pattern like ".pixi/*" or ".build/CMakeFiles/**" + // Extract just the first hidden folder component + if let Some(slash_idx) = pattern.find('/') { + &pattern[..slash_idx] + } else { + pattern + } + } else if let Some(idx) = pattern.find("/.") { + // Pattern like "**/.deep_pixi/**" + let after_slash = &pattern[idx + 1..]; + if let Some(slash_idx) = after_slash.find('/') { + &after_slash[..slash_idx] + } else { + // No '/' exists in after_slash, so this is the whole string + after_slash + } + } else { + continue; + }; + + // Re-add the whitelisted folder and its contents + result.push(hidden_folder.to_string()); + } + } + } + + return Some(result.into_iter().collect()); + } + + None +} + +#[cfg(test)] +mod tests { + use crate::glob_set::walk::set_ignore_hidden_patterns; + + use super::anchor_literal_pattern; + + #[test] + fn anchors_literal_file_patterns() { + assert_eq!( + anchor_literal_pattern("pixi.toml".to_string()), + "/pixi.toml" + ); + // Patterns that already specify a subdirectory should stay untouched. + assert_eq!( + anchor_literal_pattern("foo/bar/baz.txt".to_string()), + "foo/bar/baz.txt" + ); + } + + #[test] + fn leaves_non_literal_patterns_untouched() { + assert_eq!( + anchor_literal_pattern("!pixi.toml".to_string()), + "!/pixi.toml" + ); + assert_eq!(anchor_literal_pattern("*.toml".to_string()), "*.toml"); + assert_eq!(anchor_literal_pattern("!*.toml".to_string()), "!*.toml"); + assert_eq!( + anchor_literal_pattern("src/lib.rs".to_string()), + "src/lib.rs" + ); + assert_eq!( + anchor_literal_pattern("../pixi.toml".to_string()), + "../pixi.toml" + ); + } + + #[test] + fn adds_negated_patterns_when_no_hidden_includes() { + let input = vec!["**".to_string()]; + let expected = vec!["**".to_string(), "!{**/.*, .*, .**/*}".to_string()]; + assert_eq!(set_ignore_hidden_patterns(&input), Some(expected)); + } + + #[test] + fn hidden_folder_is_whitelisted_at_the_end() { + let input = vec!["**".to_string(), ".nichita".to_string()]; + let expected = vec![ + "**".to_string(), + ".nichita".to_string(), + "!{**/.*, .*, .**/*}".to_string(), + ".nichita".to_string(), + ]; + assert_eq!(set_ignore_hidden_patterns(&input), Some(expected)); + } +} diff --git a/crates/rattler_glob/src/glob_set/walk_root.rs b/crates/rattler_glob/src/glob_set/walk_root.rs new file mode 100644 index 000000000..37db0c69b --- /dev/null +++ b/crates/rattler_glob/src/glob_set/walk_root.rs @@ -0,0 +1,535 @@ +//! Plan the effective glob walk root for a set of patterns that may contain relative components. +//! +//! # Effective Walk Root +//! +//! When glob patterns contain relative path components like `..`, they may reference directories +//! outside the user's specified search root. For example, if you're searching from `/project/subdir` +//! with the pattern `../src/*.rs`, the pattern actually targets `/project/src/`. +//! +//! The **effective walk root** is the common ancestor directory from which all provided patterns +//! can be evaluated. This module calculates how many `..` segments need to be traversed to find +//! that common ancestor. +//! +//! # Rebasing +//! +//! **Rebasing** is the process of adjusting glob patterns so they work correctly relative to the +//! effective walk root. When patterns are rebased: +//! +//! - The `..` components are resolved by moving the walk root up the directory tree +//! - Any concrete path components between the `..` segments and the glob portion are spliced +//! back into the pattern +//! +//! ## Example +//! +//! Given search root `/project/subdir` and patterns: +//! - `../src/*.rs` (targets `/project/src/`) +//! - `*.txt` (targets `/project/subdir/`) +//! +//! The effective walk root becomes `/project` (one level up), and patterns are rebased to: +//! - `src/*.rs` +//! - `subdir/*.txt` +//! +//! This allows a single directory walker to efficiently match all patterns from one starting point. +//! +//! # Global Exclusions +//! +//! Negated patterns starting with `**/` (e.g., `!**/build.rs`) are treated as global exclusions +//! and are **not** rebased. This ensures wildcard exclusions continue to apply everywhere, +//! regardless of where the effective root ends up. + +use std::path::{Component, Path, PathBuf}; + +/// Simple handler to work with our globs +/// basically splits up negation +#[derive(Clone, Debug)] +pub struct SimpleGlob { + glob: String, + negated: bool, +} + +impl SimpleGlob { + pub fn new(glob: String, negated: bool) -> Self { + Self { glob, negated } + } + + #[cfg(test)] + /// Returns the pattern without leading ! + pub fn normalized_pattern(&self) -> &str { + &self.glob + } + + pub fn is_negated(&self) -> bool { + self.negated + } + + /// Returns a proper glob pattern + pub fn to_pattern(&self) -> String { + if self.negated { + format!("!{}", self.glob) + } else { + self.glob.clone() + } + } +} + +#[derive(thiserror::Error, Debug)] +pub enum WalkRootsError { + #[error("after processing glob '{glob}', split into '{prefix}' and empty glob")] + EmptyGlob { prefix: String, glob: String }, + + #[error("expected prefix '{prefix}' to be relative, it is absolute")] + AbsolutePrefix { prefix: String }, + + #[error("cannot ascend {required} level(s) from '{root}'")] + CannotAscend { required: usize, root: PathBuf }, +} + +#[derive(Debug)] +struct GlobSpec { + // Is this a ! glob + negated: bool, + // How many `..` path components does this contain + parent_dirs: usize, + // The `foo/bar/` concrete components + concrete_components: Vec, + // Original glob pattern + pattern: String, + // Determines if we want to rebase the glob + skip_rebase: bool, +} + +/// Contains the globs and the joinable path +pub struct WalkRoot { + // The parsed glob specifications + specs: Vec, + // The maximum number of parent dirs we need to ascend + max_parent_dirs: usize, +} + +/// Globs rebased to a common root +pub struct RebasedGlobs { + // The new root directory to search from + pub root: PathBuf, + // The globs with the rebased patterns + pub globs: Vec, +} + +impl WalkRoot { + /// Build a list of globs into a structure that we can use to rebase or reparent + /// the globs when given + pub fn build<'t>(globs: impl IntoIterator) -> Result { + let mut specs = Vec::new(); + let mut max_parent_dirs = 0usize; + + for glob in globs { + let negated = glob.starts_with('!'); + let glob = if negated { &glob[1..] } else { glob }; + + // First split of any relative part information + let (prefix, pattern) = split_path_and_glob(glob); + + // Having an empty glob is an error + if pattern.is_empty() { + return Err(WalkRootsError::EmptyGlob { + prefix: prefix.to_string(), + glob: glob.to_string(), + }); + } + + let normalized_prefix = normalize_relative(Path::new(prefix)); + // This will determine how we need to rebase the globs + let mut parent_dirs = 0usize; + let mut concrete_components = Vec::new(); + + // Loop over components and split into concrete and relative parts + for comp in normalized_prefix.components() { + match comp { + Component::ParentDir => parent_dirs += 1, + Component::CurDir => {} + Component::Normal(s) => { + concrete_components.push(s.to_string_lossy().into_owned()); + } + Component::RootDir | Component::Prefix(_) => { + return Err(WalkRootsError::AbsolutePrefix { + prefix: prefix.to_string(), + }); + } + } + } + + // We skip !**/ patterns for rebasing, as we would probably always want to apply those + let skip_rebase = + negated && normalized_prefix.as_os_str().is_empty() && pattern.starts_with("**/"); + + max_parent_dirs = max_parent_dirs.max(parent_dirs); + specs.push(GlobSpec { + negated, + parent_dirs, + concrete_components, + pattern: pattern.to_string(), + skip_rebase, + }); + } + + Ok(Self { + specs, + max_parent_dirs, + }) + } + + pub fn is_empty(&self) -> bool { + self.specs.is_empty() + } + + /// Rebase the globs into the designated roots + /// How this rebasing works is determined by the input globs. + /// This only actually does something when we have some "relative" globs + /// Like `../../*.rs` or something of the sort + pub fn rebase(&self, root: &Path) -> Result { + if self.specs.is_empty() { + return Ok(RebasedGlobs { + root: root.to_path_buf(), + globs: Vec::new(), + }); + } + + // Count all available components in the path + let available = root + .components() + .filter(|c| matches!(c, Component::Normal(_) | Component::Prefix(_))) + .count(); + + if available < self.max_parent_dirs { + // This happens when we have a glob somewhere like + // `../../../foo` but we try to search in `/tmp` + // in that case we cannot ascend up high enough + return Err(WalkRootsError::CannotAscend { + required: self.max_parent_dirs, + root: root.to_path_buf(), + }); + } + + // We are going to modify till we get to the root + let mut effective_root = root.to_path_buf(); + let mut popped = Vec::with_capacity(self.max_parent_dirs); + for _ in 0..self.max_parent_dirs { + let name = effective_root + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .expect("bug: checked available components beforehand"); + effective_root.pop(); + popped.push(name); + } + popped.reverse(); + + let mut rebased = Vec::with_capacity(self.specs.len()); + for spec in &self.specs { + // Skip rebasing entirely + if spec.skip_rebase { + rebased.push(SimpleGlob::new(spec.pattern.clone(), spec.negated)); + continue; + } + + let distance_from_prefix = self.max_parent_dirs.saturating_sub(spec.parent_dirs); + + // Create the glob prefix + let mut components = Vec::new(); + components.extend(popped.iter().take(distance_from_prefix).cloned()); + components.extend(spec.concrete_components.iter().cloned()); + + let rebased_pattern = if components.is_empty() { + // No rebasing needs to be performed + spec.pattern.clone() + } else { + // Rebase the glob with the calculated parent + format!("{}/{}", components.join("/"), spec.pattern) + }; + + rebased.push(SimpleGlob::new(rebased_pattern, spec.negated)); + } + + Ok(RebasedGlobs { + root: effective_root, + globs: rebased, + }) + } +} + +/// Split a pattern into `(path_prefix, glob_part)`. +/// - `path_prefix` ends at the last separator before the first glob metachar (`* ? [ {`) +/// and includes that separator (e.g. "src/"). +/// - `glob_part` is the rest starting from the component that contains the first meta. +/// If no glob is present, returns `("", input)`. +/// +/// Examples: +/// "../.././../*.{rs,cc}" -> ("../.././../", "*.{rs,cc}") +/// "src/*/test?.rs" -> ("src/", "*/test?.rs") +/// "*.rs" -> ("", "*.rs") +/// "plain/path" -> ("", "plain/path") +pub fn split_path_and_glob(input: &str) -> (&str, &str) { + fn is_meta(c: char) -> bool { + matches!(c, '*' | '?' | '[' | '{') + } + + fn is_sep(c: char) -> bool { + c == '/' + } + for (i, ch) in input.char_indices() { + if is_meta(ch) { + if let Some(sep_idx) = input[..i].rfind(|c: char| is_sep(c)) { + return (&input[..=sep_idx], &input[sep_idx + 1..]); + } else { + return ("", input); + } + } + } + + // In this case we have not found any meta patterns and we can assume the glob can be in the form of a file match like + // foo/bar.txt, because we will need to add a current directory `./` separator as we are using ignore and gitignore style + // glob rules + ("", input) +} + +/// Normalize paths like `../.././` into paths like `../../` +/// Also resolves components with parent dir like `recipe/..` into an empty path +pub fn normalize_relative(path: &Path) -> PathBuf { + let mut out = Vec::new(); + for comp in path.components() { + match comp { + Component::CurDir => {} + Component::ParentDir => { + // Pop the last normal component if present, drop if at root, otherwise keep the ParentDir + match out.last() { + Some(Component::Normal(_)) => { + out.pop(); + } + Some(Component::RootDir) => { + // Can't go above root directory - ignore this ParentDir + } + _ => { + out.push(comp); + } + } + } + _ => out.push(comp), + } + } + out.iter().collect() +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use super::{normalize_relative, split_path_and_glob, WalkRoot}; + use insta::assert_yaml_snapshot; + use serde::Serialize; + + #[derive(Serialize)] + struct SnapshotWalk { + root: String, + globs: Vec, + } + + #[derive(Serialize)] + struct SnapshotGlob { + pattern: String, + negated: bool, + } + + fn snapshot_walk_roots(plan: &WalkRoot, root: &Path) -> SnapshotWalk { + let rebased = plan.rebase(root).expect("rebase should succeed"); + let root_str = rebased.root.display().to_string().replace('\\', "/"); + let globs = rebased + .globs + .iter() + .map(|g| SnapshotGlob { + pattern: g.normalized_pattern().to_string(), + negated: g.is_negated(), + }) + .collect(); + SnapshotWalk { + root: root_str, + globs, + } + } + + #[test] + fn test_split_path_and_glob() { + assert_eq!( + split_path_and_glob("../.././../*.{rs,cc}"), + ("../.././../", "*.{rs,cc}") + ); + assert_eq!( + split_path_and_glob("src/*/test?.rs"), + ("src/", "*/test?.rs") + ); + assert_eq!(split_path_and_glob("*.rs"), ("", "*.rs")); + assert_eq!(split_path_and_glob("plain/path"), ("", "plain/path")); + assert_eq!(split_path_and_glob("foo[ab]/bar"), ("", "foo[ab]/bar")); + assert_eq!(split_path_and_glob("pixi.toml"), ("", "pixi.toml")); + } + + #[test] + fn test_normalize() { + assert_eq!( + normalize_relative(Path::new("./.././.././")), + Path::new("../../") + ); + // Test that recipe/.. normalizes to empty path + assert_eq!(normalize_relative(Path::new("recipe/../")), Path::new("")); + // Test that foo/bar/../baz normalizes to foo/baz + assert_eq!( + normalize_relative(Path::new("foo/bar/../baz")), + Path::new("foo/baz") + ); + // Test that ../recipe/.. normalizes to .. + assert_eq!( + normalize_relative(Path::new("../recipe/..")), + Path::new("..") + ); + // Test absolute paths with .. (can't go above root) + assert_eq!(normalize_relative(Path::new("/..")), Path::new("/")); + assert_eq!(normalize_relative(Path::new("/../foo")), Path::new("/foo")); + assert_eq!(normalize_relative(Path::new("/foo/..")), Path::new("/")); + assert_eq!(normalize_relative(Path::new("/.")), Path::new("/")); + assert_eq!( + normalize_relative(Path::new("/foo/bar/../..")), + Path::new("/") + ); + } + + // Couple of test cases to check that rebasing works as expected + #[test] + fn determine_groups_globs_by_normalized_prefix() { + let globs = [ + "./src/**/*.rs", + "!./src/**/*.tmp", + "../include/*.c", + "!.pixi/**", + "!**/.pixi/**", + "**/*.cpp", + ]; + + let walk_roots = WalkRoot::build(globs).expect("determine should succeed"); + + assert_yaml_snapshot!( + snapshot_walk_roots(&walk_roots, Path::new("workspace/baz")), + @r###" + root: workspace + globs: + - pattern: baz/src/**/*.rs + negated: false + - pattern: baz/src/**/*.tmp + negated: true + - pattern: include/*.c + negated: false + - pattern: baz/.pixi/** + negated: true + - pattern: "**/.pixi/**" + negated: true + - pattern: baz/**/*.cpp + negated: false + "### + ); + } + + // Check that nothing happens when rebasing + #[test] + fn determine_handles_globs_without_prefix() { + let globs = ["*.rs", "!*.tmp"]; + + let walk_roots = WalkRoot::build(globs).expect("determine should succeed"); + + assert_yaml_snapshot!( + snapshot_walk_roots(&walk_roots, Path::new("workspace/baz")), + @r###" + root: workspace/baz + globs: + - pattern: "*.rs" + negated: false + - pattern: "*.tmp" + negated: true + "### + ); + } + + #[test] + fn iterates_over_roots_and_globs() { + let globs = ["src/**/*.rs", "!src/**/generated.rs", "docs/**/*.md"]; + + let walk_roots = WalkRoot::build(globs).expect("determine should succeed"); + assert_yaml_snapshot!( + snapshot_walk_roots(&walk_roots, Path::new("workspace")), + @r###" + root: workspace + globs: + - pattern: src/**/*.rs + negated: false + - pattern: src/**/generated.rs + negated: true + - pattern: docs/**/*.md + negated: false + "### + ); + } + + #[test] + fn determine_negated_directory_glob_sticks_to_root() { + let globs = ["!.pixi/**", "../*.{cc,cpp}"]; + + let walk_roots = WalkRoot::build(globs).expect("determine should succeed"); + + assert_yaml_snapshot!( + snapshot_walk_roots(&walk_roots, Path::new("workspace/baz")), + @r###" + root: workspace + globs: + - pattern: baz/.pixi/** + negated: true + - pattern: "*.{cc,cpp}" + negated: false + "### + ); + } + + #[test] + fn single_file_match() { + let globs = ["pixi.toml", "../*.{cc,cpp}"]; + + let walk_roots = WalkRoot::build(globs).expect("determine should succeed"); + + assert_yaml_snapshot!( + snapshot_walk_roots(&walk_roots, Path::new("workspace/baz")), + @r###" + root: workspace + globs: + - pattern: baz/pixi.toml + negated: false + - pattern: "*.{cc,cpp}" + negated: false + "### + ); + } + + #[test] + fn test_recipe_parent_dir_glob() { + // This test verifies that globs like "recipe/../**" are properly normalized + // to just "**" instead of incorrectly becoming "recipe/**" + let globs = ["recipe/**", "recipe/../**"]; + + let walk_roots = WalkRoot::build(globs).expect("build should succeed"); + + assert_yaml_snapshot!( + snapshot_walk_roots(&walk_roots, Path::new("workspace")), + @r###" + root: workspace + globs: + - pattern: recipe/** + negated: false + - pattern: "**" + negated: false + "### + ); + } +} diff --git a/crates/rattler_glob/src/lib.rs b/crates/rattler_glob/src/lib.rs new file mode 100644 index 000000000..ca3c11f05 --- /dev/null +++ b/crates/rattler_glob/src/lib.rs @@ -0,0 +1,42 @@ +#![deny(missing_docs)] +//! A crate for working with glob patterns and computing hashes or modification times +//! over matched files. +//! +//! This crate provides utilities for: +//! - Matching files using glob patterns with gitignore-style semantics +//! - Computing hashes over matched files (useful for cache invalidation) +//! - Finding the newest modification time among matched files +//! - Caching glob hash computations +//! +//! # Glob Semantics +//! +//! The glob matching in this crate uses gitignore-style patterns with some notable +//! behavioral tweaks to make it more intuitive for typical use cases: +//! +//! - **Relative patterns and rebasing**: Patterns containing `..` components (e.g., +//! `../src/*.rs`) are automatically **rebased** to work from a common ancestor directory +//! called the **effective walk root**. For example, searching from `/project/subdir` with +//! patterns `["../src/*.rs", "*.txt"]` will walk from `/project`, transforming the patterns +//! to `src/*.rs` and `subdir/*.txt` respectively. This allows efficient single-pass matching +//! even when patterns reference different directories. +//! +//! - **Global exclusions**: Negated patterns starting with `**/` (e.g., `!**/build.rs`) +//! are treated as global exclusions and skip rebasing, ensuring they apply everywhere +//! regardless of where the effective root ends up. +//! +//! - **Anchored literals**: Plain file names without meta characters (e.g., `config.toml`) +//! are anchored to the search root, matching only at that location rather than +//! anywhere in the tree. This differs from standard gitignore behavior. +//! +//! - **Hidden files**: By default, hidden files and directories (starting with `.`) +//! are excluded unless explicitly included in the pattern. + +mod glob_hash; +mod glob_hash_cache; +mod glob_mtime; +mod glob_set; + +pub use glob_hash::{GlobHash, GlobHashError}; +pub use glob_hash_cache::{GlobHashCache, GlobHashKey}; +pub use glob_mtime::{GlobModificationTime, GlobModificationTimeError}; +pub use glob_set::{GlobSet, GlobSetError}; diff --git a/crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_1_satisfiability.snap b/crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_1_satisfiability.snap new file mode 100644 index 000000000..cdb933db2 --- /dev/null +++ b/crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_1_satisfiability.snap @@ -0,0 +1,10 @@ +--- +source: crates/rattler_glob/src/glob_hash.rs +expression: snapshot +--- +Globs: +- tests/data/satisfiability/source-dependency/**/* +Hash: ddf785bcb41be51ffe8893e3407d35f421d86f728ed93498c73240529363fef1 +Matched files: +- tests/data/satisfiability/source-dependency/pixi.lock +- tests/data/satisfiability/source-dependency/pixi.toml diff --git a/crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_2_satisfiability_ignore_lock.snap b/crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_2_satisfiability_ignore_lock.snap new file mode 100644 index 000000000..ac722d50b --- /dev/null +++ b/crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_2_satisfiability_ignore_lock.snap @@ -0,0 +1,10 @@ +--- +source: crates/rattler_glob/src/glob_hash.rs +expression: snapshot +--- +Globs: +- tests/data/satisfiability/source-dependency/**/* +- !tests/data/satisfiability/source-dependency/**/*.lock +Hash: 21ed4a291565e4c1ca3fa1dc8a22671e127b61ad674f2c36f9f9a1e4b0b92c48 +Matched files: +- tests/data/satisfiability/source-dependency/pixi.toml diff --git a/crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_3_non_glob.snap b/crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_3_non_glob.snap new file mode 100644 index 000000000..366429e29 --- /dev/null +++ b/crates/rattler_glob/src/snapshots/rattler_glob__glob_hash__test__glob_hash_case_3_non_glob.snap @@ -0,0 +1,9 @@ +--- +source: crates/rattler_glob/src/glob_hash.rs +expression: snapshot +--- +Globs: +- tests/data/satisfiability/source-dependency/pixi.toml +Hash: 21ed4a291565e4c1ca3fa1dc8a22671e127b61ad674f2c36f9f9a1e4b0b92c48 +Matched files: +- tests/data/satisfiability/source-dependency/pixi.toml diff --git a/tests/data/satisfiability/source-dependency/pixi.lock b/tests/data/satisfiability/source-dependency/pixi.lock new file mode 100644 index 000000000..9f8539df4 --- /dev/null +++ b/tests/data/satisfiability/source-dependency/pixi.lock @@ -0,0 +1 @@ +lock-content diff --git a/tests/data/satisfiability/source-dependency/pixi.toml b/tests/data/satisfiability/source-dependency/pixi.toml new file mode 100644 index 000000000..f2b594796 --- /dev/null +++ b/tests/data/satisfiability/source-dependency/pixi.toml @@ -0,0 +1,3 @@ +[project] +name = "test-project" + From 42780c1f7a34c088efe0f88acc6933212b43a424 Mon Sep 17 00:00:00 2001 From: Tim de Jager Date: Wed, 28 Jan 2026 17:28:22 +0100 Subject: [PATCH 2/7] Update crates/rattler_glob/src/glob_hash_cache.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/rattler_glob/src/glob_hash_cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rattler_glob/src/glob_hash_cache.rs b/crates/rattler_glob/src/glob_hash_cache.rs index 7c146948f..1ec1cfb7a 100644 --- a/crates/rattler_glob/src/glob_hash_cache.rs +++ b/crates/rattler_glob/src/glob_hash_cache.rs @@ -1,5 +1,5 @@ //! This module contains the `GlobHashCache` struct which is used to cache the computation of glob hashes. This cache is an in-process cache -//! so it's purpose is to re-use computed hashes across multiple calls to the same glob hash computation for the same set of input files. +//! so its purpose is to re-use computed hashes across multiple calls to the same glob hash computation for the same set of input files. //! The input files are deemed not to change between calls. use std::{ collections::BTreeSet, From 3cf78e759fc071a4777260b182acc9dcdef7b687 Mon Sep 17 00:00:00 2001 From: Tim de Jager Date: Wed, 28 Jan 2026 17:28:31 +0100 Subject: [PATCH 3/7] Update crates/rattler_glob/src/glob_hash_cache.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/rattler_glob/src/glob_hash_cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rattler_glob/src/glob_hash_cache.rs b/crates/rattler_glob/src/glob_hash_cache.rs index 1ec1cfb7a..593d04cb5 100644 --- a/crates/rattler_glob/src/glob_hash_cache.rs +++ b/crates/rattler_glob/src/glob_hash_cache.rs @@ -54,7 +54,7 @@ enum HashCacheEntry { /// An object that caches the computation of glob hashes. It deduplicates /// requests for the same hash. /// -/// Its is safe and efficient to use this object from multiple threads. +/// It is safe and efficient to use this object from multiple threads. #[derive(Debug, Default, Clone)] pub struct GlobHashCache { cache: Arc>, From f3e1ecaf0c829775c331c4662c069282bb19622a Mon Sep 17 00:00:00 2001 From: Tim de Jager Date: Wed, 28 Jan 2026 17:28:40 +0100 Subject: [PATCH 4/7] Update crates/rattler_glob/src/glob_set/walk.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/rattler_glob/src/glob_set/walk.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/rattler_glob/src/glob_set/walk.rs b/crates/rattler_glob/src/glob_set/walk.rs index 8b8e2bde6..7a13ce543 100644 --- a/crates/rattler_glob/src/glob_set/walk.rs +++ b/crates/rattler_glob/src/glob_set/walk.rs @@ -274,7 +274,6 @@ pub fn set_ignore_hidden_patterns(patterns: &[String]) -> Option> { if requested_everything || (user_includes_hidden && !has_negation_for_all_folders) { let mut result = patterns.to_vec(); - // result.push("!{**/.*, .*, .**/*}".to_string()); result.push("!{**/.*, .*, .**/*}".to_string()); // Now add back any explicitly whitelisted hidden folders/files From 4f8a305c9ce434285589ff5c10f23d98d35da317 Mon Sep 17 00:00:00 2001 From: Tim de Jager Date: Wed, 28 Jan 2026 17:28:49 +0100 Subject: [PATCH 5/7] Update crates/rattler_glob/Cargo.toml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/rattler_glob/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/rattler_glob/Cargo.toml b/crates/rattler_glob/Cargo.toml index d08adb353..58b32c2d0 100644 --- a/crates/rattler_glob/Cargo.toml +++ b/crates/rattler_glob/Cargo.toml @@ -13,7 +13,6 @@ dashmap = { workspace = true } fs-err = { workspace = true } ignore = "0.4" itertools = { workspace = true } -memchr = { workspace = true } parking_lot = { workspace = true } rattler_digest = { workspace = true } thiserror = { workspace = true } From 139c8f0125ec49ac493c0880aab70d2d97615f86 Mon Sep 17 00:00:00 2001 From: Tim de Jager Date: Wed, 28 Jan 2026 17:28:58 +0100 Subject: [PATCH 6/7] Update crates/rattler_glob/Cargo.toml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/rattler_glob/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rattler_glob/Cargo.toml b/crates/rattler_glob/Cargo.toml index 58b32c2d0..478e59f91 100644 --- a/crates/rattler_glob/Cargo.toml +++ b/crates/rattler_glob/Cargo.toml @@ -1,5 +1,5 @@ [package] -description = "A crate to deal with glob patterns" +description = "A crate for glob pattern matching with hash computation and modification time tracking" edition.workspace = true homepage.workspace = true license.workspace = true From 6746b9a6e2b10ff62ff78d13fea789519a825baa Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Wed, 28 Jan 2026 18:57:13 +0100 Subject: [PATCH 7/7] Convert ignore dependency to workspace dependency in rattler_glob (#2013) --- Cargo.lock | 1 - Cargo.toml | 1 + crates/rattler_glob/Cargo.toml | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8211ba788..1a8256e1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4747,7 +4747,6 @@ dependencies = [ "ignore", "insta", "itertools 0.14.0", - "memchr", "parking_lot 0.12.5", "rattler_digest", "rstest", diff --git a/Cargo.toml b/Cargo.toml index ab30fb61d..9f1f102fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,6 +85,7 @@ http = "1.3" http-cache-semantics = "2.1.0" humansize = "2.1.3" humantime = "2.2.0" +ignore = "0.4" indexmap = "2.10.0" indicatif = "0.18.0" insta = { version = "1.43.1" } diff --git a/crates/rattler_glob/Cargo.toml b/crates/rattler_glob/Cargo.toml index 478e59f91..c4e8d4dc5 100644 --- a/crates/rattler_glob/Cargo.toml +++ b/crates/rattler_glob/Cargo.toml @@ -11,7 +11,7 @@ version = "0.1.0" [dependencies] dashmap = { workspace = true } fs-err = { workspace = true } -ignore = "0.4" +ignore = { workspace = true } itertools = { workspace = true } parking_lot = { workspace = true } rattler_digest = { workspace = true }