diff --git a/deny.toml b/deny.toml index b3ef3b00e..93b2729f3 100644 --- a/deny.toml +++ b/deny.toml @@ -1,19 +1,19 @@ [licenses] allow = [ - "0BSD", - "Apache-2.0 WITH LLVM-exception", - "Apache-2.0", - "BSD-2-Clause", - "BSD-3-Clause", - "BSL-1.0", - "bzip2-1.0.6", - "CC0-1.0", - "CDLA-Permissive-2.0", - "ISC", - "MIT", - "MPL-2.0", - "Unicode-3.0", - "Zlib", + "0BSD", + "Apache-2.0 WITH LLVM-exception", + "Apache-2.0", + "BSD-2-Clause", + "BSD-3-Clause", + "BSL-1.0", + "bzip2-1.0.6", + "CC0-1.0", + "CDLA-Permissive-2.0", + "ISC", + "MIT", + "MPL-2.0", + "Unicode-3.0", + "Zlib", ] confidence-threshold = 0.8 private = { ignore = true } diff --git a/src/build.rs b/src/build.rs index 3b368613d..2279baafb 100644 --- a/src/build.rs +++ b/src/build.rs @@ -124,35 +124,77 @@ pub async fn run_build( let directories = output.build_configuration.directories.clone(); - let output = if output.recipe.cache.is_some() { - output.build_or_fetch_cache(tool_configuration).await? - } else { - output - .fetch_sources(tool_configuration, apply_patch_custom) - .await - .into_diagnostic()? + // Handle cache outputs first + let mut output = output; + let warn_additional_sources = |output: &Output| { + if !output.recipe.sources().is_empty() && output.finalized_cache_sources.is_some() { + tracing::warn!( + "Output defines sources in addition to cache sources. \ + This may overwrite files from the cache. \ + Consider using 'target_directory' in source definitions to avoid conflicts." + ); + } }; + match ( + output.recipe.cache.is_some(), + output.cache_outputs_to_build.is_empty(), + ) { + (true, _) => { + output = output.build_or_fetch_cache(tool_configuration).await?; + warn_additional_sources(&output); + } + (false, false) => { + let cache_outputs = output.cache_outputs_to_build.clone(); + for cache_output in cache_outputs.iter() { + output = output + .build_or_fetch_cache_output(cache_output, tool_configuration) + .await?; + } + warn_additional_sources(&output); + } + (false, true) => { + output = output + .fetch_sources(tool_configuration, apply_patch_custom) + .await + .into_diagnostic()?; + } + } + let output = output .resolve_dependencies(tool_configuration, RunExportsDownload::DownloadMissing) .await .into_diagnostic()?; - output - .install_environments(tool_configuration) - .await - .into_diagnostic()?; + // Fast-path optimization: When an output inherits from a cache and has no explicit build script, + // we can skip environment installation and script execution entirely. This is a key benefit + // of the outputs-based cache approach as it allows for significant performance + // improvements when building multiple outputs that depend on the same intermediate artifacts. + match ( + output.finalized_cache_dependencies.is_some(), + output.recipe.build().script().is_default(), + ) { + (true, true) => tracing::info!( + "Using fast-path optimization: output inherited cache and has no build.script; skipping environment setup and script execution for improved performance." + ), + _ => { + output + .install_environments(tool_configuration) + .await + .into_diagnostic()?; - match output.run_build_script().await { - Ok(_) => {} - Err(InterpreterError::Debug(info)) => { - tracing::info!("{}", info); - return Err(miette::miette!( - "Script not executed because debug mode is enabled" - )); - } - Err(InterpreterError::ExecutionFailed(_)) => { - return Err(miette::miette!("Script failed to execute")); + match output.run_build_script().await { + Ok(_) => {} + Err(InterpreterError::Debug(info)) => { + tracing::info!("{}", info); + return Err(miette::miette!( + "Script not executed because debug mode is enabled" + )); + } + Err(InterpreterError::ExecutionFailed(_)) => { + return Err(miette::miette!("Script failed to execute")); + } + } } } diff --git a/src/cache.rs b/src/cache.rs index 22f1b093e..9bc28460e 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,9 +1,10 @@ //! Functions to deal with the build cache use std::{ collections::{BTreeMap, HashSet}, - path::PathBuf, + path::{Path, PathBuf}, }; +use content_inspector::{ContentType, inspect}; use fs_err as fs; use miette::{Context, IntoDiagnostic}; use serde::{Deserialize, Serialize}; @@ -11,12 +12,11 @@ use sha2::{Digest, Sha256}; use crate::{ env_vars, - metadata::Output, - metadata::build_reindexed_channels, - packaging::Files, + metadata::{Output, build_reindexed_channels}, + packaging::{Files, contains_prefix_text, rewrite_prefix_in_file}, recipe::{ Jinja, - parser::{Dependency, Requirements, Source}, + parser::{CacheOutput, CacheRequirements, Dependency, Requirements, RunExports, Source}, }, render::resolved_dependencies::{ FinalizedDependencies, RunExportsDownload, install_environments, resolve_dependencies, @@ -26,8 +26,46 @@ use crate::{ fetch_sources, patch::apply_patch_custom, }, + tool_configuration::Configuration, }; +#[cfg(target_family = "unix")] +use crate::packaging::contains_prefix_binary; + +/// Check if a file contains the prefix and determine if it's binary or text +/// Returns (has_prefix, is_text) +fn check_file_for_prefix(file_path: &Path, prefix: &Path) -> (bool, bool) { + let Ok(content) = fs::read(file_path) else { + return (false, false); + }; + + let inspected = inspect(&content); + let looks_like_text = + inspected.is_text() && matches!(inspected, ContentType::UTF_8 | ContentType::UTF_8_BOM); + + if looks_like_text { + return contains_prefix_text(file_path, prefix) + .map(|found| (found.is_some(), true)) + .unwrap_or((false, true)); + } + + #[cfg(target_family = "unix")] + { + contains_prefix_binary(file_path, prefix) + .map(|has| (has, false)) + .unwrap_or((false, false)) + } + + #[cfg(target_family = "windows")] + { + let prefix_bytes = prefix.to_string_lossy(); + let has_prefix = content + .windows(prefix_bytes.len()) + .any(|window| window == prefix_bytes.as_bytes()); + (has_prefix, false) + } +} + /// Error type for cache key generation #[derive(Debug, thiserror::Error)] pub enum CacheKeyError { @@ -61,9 +99,89 @@ pub struct Cache { /// The prefix that was used at build time (needs to be replaced when /// restoring the files) pub prefix: PathBuf, + + /// The work_dir that was used at build time (used to rewrite restored files + /// if the absolute path changes between cache build and restore) + #[serde(default)] + pub work_dir: PathBuf, + + /// The run exports declared by the cache at build time (rendered form is computed later) + #[serde(default)] + pub run_exports: RunExports, + + /// Files (relative to prefix/work_dir) that contain the old prefix string and + /// should be rewritten when restoring to a different location. + #[serde(default)] + pub files_with_prefix: Vec, + + /// Files (relative to prefix/work_dir) that contain the old prefix string and + /// are binary files. These need to be handled during prefix replacement. + #[serde(default)] + pub binary_files_with_prefix: Vec, + + /// Files (relative to work_dir) that contain the old work_dir path and + /// should be rewritten when restoring to a different location. + #[serde(default)] + pub files_with_work_dir: Vec, + + /// Source files from the cache build (relative to work_dir) + /// Used to detect potential conflicts when outputs add additional sources + #[serde(default)] + pub source_files: Vec, } impl Output { + /// Compute cache key for a specific cache output + pub fn cache_key_for( + &self, + cache_name: &str, + cache_reqs: &CacheRequirements, + ) -> Result { + let requirement_names: HashSet<_> = cache_reqs + .build + .iter() + .chain(cache_reqs.host.iter()) + .filter_map(|dep| match dep { + Dependency::Spec(spec) if spec.version.is_none() && spec.build.is_none() => spec + .name + .as_ref() + .map(|name| name.as_normalized().to_string()), + _ => None, + }) + .chain( + self.recipe + .cache + .iter() + .flat_map(|cache| cache.build.variant.use_keys.iter().cloned()), + ) + .collect(); + + let mut selected_variant = BTreeMap::new(); + for key in &requirement_names { + if let Some(value) = self.variant().get(&key.as_str().into()) { + selected_variant.insert(key.as_str(), value.clone()); + } + } + + selected_variant.insert( + "host_platform", + self.host_platform().platform.to_string().into(), + ); + selected_variant.insert( + "build_platform", + self.build_configuration + .build_platform + .platform + .to_string() + .into(), + ); + + let rebuild_key = (cache_name, cache_reqs, selected_variant); + let mut hasher = Sha256::new(); + rebuild_key.serialize(&mut serde_json::Serializer::new(&mut hasher))?; + Ok(format!("{:x}", hasher.finalize())) + } + /// Compute a cache key that contains all the information that was used to /// build the cache, including the relevant variant information. pub fn cache_key(&self) -> Result { @@ -110,7 +228,9 @@ impl Output { .into(), ); - let cache_key = (cache, selected_variant, self.prefix()); + // Do NOT include absolute paths to keep the cache key stable across + // different build roots. + let cache_key = (cache, selected_variant); // serialize to json and hash let mut hasher = Sha256::new(); cache_key.serialize(&mut serde_json::Serializer::new(&mut hasher))?; @@ -128,6 +248,7 @@ impl Output { ) -> Result { let cache_prefix_dir = cache_dir.join("prefix"); let copied_prefix = CopyDir::new(&cache_prefix_dir, self.prefix()) + .overwrite(true) .run() .into_diagnostic()?; @@ -137,27 +258,329 @@ impl Output { &cache_dir_work, &self.build_configuration.directories.work_dir, ) + .overwrite(true) .run() .into_diagnostic()?; - let combined_files = copied_prefix.copied_paths().len() + copied_cache.copied_paths().len(); + // Track cached files for conflict detection. + let cached_prefix_files = copied_prefix.copied_paths_owned(); + let cached_work_files = copied_cache.copied_paths_owned(); + + // If the output also specifies additional sources, proactively warn when + // extraction would clobber files restored from cache work_dir. + if !self.recipe.source.is_empty() { + for rel in &cached_work_files { + let target = self.build_configuration.directories.work_dir.join(rel); + if target.exists() { + let message = format!( + "Source extraction may overwrite restored cache work file: {}", + target.display() + ); + tracing::warn!(message); + self.record_warning(&message); + } + } + } + let combined_files = cached_prefix_files.len() + cached_work_files.len(); tracing::info!( "Restored {} source and prefix files from cache", combined_files ); + // If the cache was built under a different prefix, rewrite occurrences of + // the old prefix in restored text and binary files. + if cache.prefix != *self.prefix() { + for rel in cache + .files_with_prefix + .iter() + .chain(cache.binary_files_with_prefix.iter()) + { + for base in [ + self.prefix(), + &self.build_configuration.directories.work_dir, + ] { + let path = base.join(rel); + if path.exists() + && rewrite_prefix_in_file(&path, &cache.prefix, self.prefix()).is_err() + { + tracing::warn!( + "Failed to rewrite restored file {} with new prefix", + path.display() + ); + } + } + } + } + + // If the cache was built under a different work_dir, rewrite occurrences + // of the old work_dir path in restored text files located under the new work_dir. + if !cache.work_dir.as_os_str().is_empty() + && cache.work_dir != self.build_configuration.directories.work_dir + { + for rel in cache.files_with_work_dir.iter() { + let path = self.build_configuration.directories.work_dir.join(rel); + if path.exists() + && rewrite_prefix_in_file( + &path, + &cache.work_dir, + &self.build_configuration.directories.work_dir, + ) + .is_err() + { + tracing::warn!( + "Failed to rewrite restored work_dir file {} with new work path", + path.display() + ); + } + } + } + Ok(Output { finalized_cache_dependencies: Some(cache.finalized_dependencies.clone()), finalized_cache_sources: Some(cache.finalized_sources.clone()), + // Recipe already has run_exports merged during inheritance resolution, + // so we don't need to merge them again here + recipe: self.recipe.clone(), + restored_cache_prefix_files: Some(cached_prefix_files), + restored_cache_work_dir_files: Some(cached_work_files.clone()), ..self.clone() }) } + /// Build or fetch a specific cache output + pub async fn build_or_fetch_cache_output( + mut self, + cache_output: &CacheOutput, + tool_configuration: &Configuration, + ) -> Result { + let cache_name = cache_output.name.as_normalized(); + let cache_key = self + .cache_key_for(cache_name, &cache_output.requirements) + .into_diagnostic()?; + + tracing::info!("Building cache: {} with key: {}", cache_name, cache_key); + + let cache_dir = self + .build_configuration + .directories + .cache_dir + .join(format!("{}_{}", cache_name, cache_key)); + + // Check if cache exists + if cache_dir.exists() { + let cache_json = cache_dir.join("cache.json"); + if let Ok(text) = fs::read_to_string(&cache_json) { + match serde_json::from_str::(&text) { + Ok(cache) => { + tracing::info!("Restoring cache from {:?}", cache_dir); + self = self + .fetch_sources(tool_configuration, apply_patch_custom) + .await + .into_diagnostic()?; + return self.restore_cache(cache, cache_dir).await; + } + Err(e) => { + tracing::warn!( + "Failed to parse cache.json at {}: {} - rebuilding", + cache_json.display(), + e + ); + fs::remove_dir_all(&cache_dir).into_diagnostic()?; + } + } + } + } + + // Build the cache + let rendered_sources = fetch_sources( + &cache_output.source, + &self.build_configuration.directories, + &self.system_tools, + tool_configuration, + apply_patch_custom, + ) + .await + .into_diagnostic()?; + + let target_platform = self.build_configuration.target_platform; + let mut env_vars = env_vars::vars(&self, "BUILD"); + env_vars.extend(env_vars::os_vars(self.prefix(), &target_platform)); + + let channels = build_reindexed_channels(&self.build_configuration, tool_configuration) + .await + .into_diagnostic()?; + + // Convert CacheRequirements to Requirements + let requirements = Requirements { + build: cache_output.requirements.build.clone(), + host: cache_output.requirements.host.clone(), + run: Vec::new(), + run_constraints: Vec::new(), + run_exports: RunExports::default(), + ignore_run_exports: cache_output.ignore_run_exports.clone().unwrap_or_default(), + }; + + let finalized_dependencies = resolve_dependencies( + &requirements, + &self, + &channels, + tool_configuration, + RunExportsDownload::DownloadMissing, + ) + .await + .into_diagnostic()?; + + install_environments(&self, &finalized_dependencies, tool_configuration) + .await + .into_diagnostic()?; + + let selector_config = self.build_configuration.selector_config(); + let mut jinja = Jinja::new(selector_config); + for (k, v) in self.recipe.context.iter() { + jinja.context_mut().insert(k.clone(), v.clone().into()); + } + + let build_prefix = if cache_output.build.script.is_some() { + Some(&self.build_configuration.directories.build_prefix) + } else { + None + }; + + if let Some(script) = &cache_output.build.script { + script + .run_script( + env_vars, + &self.build_configuration.directories.work_dir, + &self.build_configuration.directories.recipe_dir, + &self.build_configuration.directories.host_prefix, + build_prefix, + Some(jinja), + None, + self.build_configuration.debug, + ) + .await + .into_diagnostic()?; + } + + // Collect new files and save cache + let new_files = Files::from_prefix( + self.prefix(), + &cache_output.build.always_include_files, + &cache_output.build.files, + ) + .into_diagnostic()?; + + fs::create_dir_all(&cache_dir).into_diagnostic()?; + let prefix_cache_dir = cache_dir.join("prefix"); + fs::create_dir_all(&prefix_cache_dir).into_diagnostic()?; + + let mut copied_files = Vec::new(); + let copy_options = CopyOptions::default(); + let mut creation_cache = HashSet::new(); + + // Track files that contain the old prefix for later path rewriting + let mut files_with_prefix: Vec = Vec::new(); + let mut binary_files_with_prefix: Vec = Vec::new(); + + for file in &new_files.new_files { + if file.is_dir() && !file.is_symlink() { + continue; + } + let stripped = file.strip_prefix(self.prefix()).unwrap(); + let dest = prefix_cache_dir.join(stripped); + copy_file(file, &dest, &mut creation_cache, ©_options).into_diagnostic()?; + copied_files.push(stripped.to_path_buf()); + let (has_prefix, is_text) = check_file_for_prefix(file, self.prefix()); + + if has_prefix { + match is_text { + true => files_with_prefix.push(stripped.to_path_buf()), + false => binary_files_with_prefix.push(stripped.to_path_buf()), + } + } + } + + let work_dir_files = CopyDir::new( + &self.build_configuration.directories.work_dir, + &cache_dir.join("work_dir"), + ) + .run() + .into_diagnostic()?; + + let cache = Cache { + requirements: requirements.clone(), + finalized_dependencies: finalized_dependencies.clone(), + finalized_sources: rendered_sources.clone(), + prefix_files: copied_files, + work_dir_files: work_dir_files.copied_paths().to_vec(), + prefix: self.prefix().to_path_buf(), + work_dir: self.build_configuration.directories.work_dir.clone(), + run_exports: cache_output.run_exports.clone(), + files_with_prefix, + binary_files_with_prefix, + files_with_work_dir: { + let mut files = Vec::new(); + for rel in work_dir_files.copied_paths() { + let abs = self.build_configuration.directories.work_dir.join(rel); + if abs.is_dir() { + continue; + } + match contains_prefix_text(&abs, &self.build_configuration.directories.work_dir) + { + Ok(Some(_)) => files.push(rel.to_path_buf()), + Ok(None) => {} + Err(_) => {} + } + } + files + }, + source_files: work_dir_files.copied_paths().to_vec(), + }; + + let cache_json = serde_json::to_string(&cache).into_diagnostic()?; + fs::write(cache_dir.join("cache.json"), cache_json).into_diagnostic()?; + + // The files are already in PREFIX from the build script, so we don't need to restore them. + // However, we need to track them so subsequent cache builds that inherit from this one + // know which files are available (e.g., extended-cache inheriting from base-cache). + // The files will remain in PREFIX for the next cache build in the sequence. + + let mut all_restored_prefix_files = self.restored_cache_prefix_files.unwrap_or_default(); + all_restored_prefix_files.extend(cache.prefix_files.clone()); + + let mut all_restored_work_dir_files = + self.restored_cache_work_dir_files.unwrap_or_default(); + all_restored_work_dir_files.extend(cache.work_dir_files.clone()); + + Ok(Output { + finalized_cache_dependencies: Some(finalized_dependencies), + finalized_cache_sources: Some(rendered_sources), + restored_cache_prefix_files: Some(all_restored_prefix_files), + restored_cache_work_dir_files: Some(all_restored_work_dir_files), + ..self + }) + } + /// This will fetch sources and build the cache if it doesn't exist /// Note: this modifies the output in place pub(crate) async fn build_or_fetch_cache( + self, + tool_configuration: &Configuration, + ) -> Result { + if let Some(synthetic_cache) = self.recipe.synthetic_cache_output() { + // Convert to synthetic cache output + self.build_or_fetch_cache_output(&synthetic_cache, tool_configuration) + .await + } else { + Ok(self) + } + } + + /// Didn't remove this one completely just in case. + #[allow(dead_code)] + async fn build_or_fetch_cache_legacy( mut self, - tool_configuration: &crate::tool_configuration::Configuration, + tool_configuration: &Configuration, ) -> Result { if let Some(cache) = self.recipe.cache.clone() { // if we don't have a cache, we need to run the cache build with our current @@ -278,6 +701,8 @@ impl Output { let mut creation_cache = HashSet::new(); let mut copied_files = Vec::new(); let copy_options = CopyOptions::default(); + let mut files_with_prefix: Vec = Vec::new(); + let mut binary_files_with_prefix: Vec = Vec::new(); for file in &new_files.new_files { // skip directories (if they are not a symlink) @@ -291,6 +716,13 @@ impl Output { let dest = &prefix_cache_dir.join(stripped); copy_file(file, dest, &mut creation_cache, ©_options).into_diagnostic()?; copied_files.push(stripped.to_path_buf()); + let (has_prefix, is_text) = check_file_for_prefix(file, self.prefix()); + if has_prefix { + match is_text { + true => files_with_prefix.push(stripped.to_path_buf()), + false => binary_files_with_prefix.push(stripped.to_path_buf()), + } + } } // We also need to copy the work dir files to the cache @@ -309,10 +741,17 @@ impl Output { prefix_files: copied_files, work_dir_files: work_dir_files.copied_paths().to_vec(), prefix: self.prefix().to_path_buf(), + work_dir: self.build_configuration.directories.work_dir.clone(), + run_exports: RunExports::default(), + files_with_prefix, + binary_files_with_prefix, + files_with_work_dir: Vec::new(), + source_files: work_dir_files.copied_paths().to_vec(), }; let cache_file = cache_dir.join("cache.json"); - fs::write(cache_file, serde_json::to_string(&cache).unwrap()).into_diagnostic()?; + let cache_json = serde_json::to_string(&cache).into_diagnostic()?; + fs::write(cache_file, cache_json).into_diagnostic()?; // remove prefix to get it in pristine state and restore the cache fs::remove_dir_all(self.prefix()).into_diagnostic()?; diff --git a/src/console_utils.rs b/src/console_utils.rs index df2b0f92c..1847b0081 100644 --- a/src/console_utils.rs +++ b/src/console_utils.rs @@ -543,8 +543,8 @@ impl Layer for GitHubActionsLayer { let message = String::from_utf8_lossy(&message); match *metadata.level() { - Level::ERROR => println!("::error ::{}", message), - Level::WARN => println!("::warning ::{}", message), + Level::ERROR => eprintln!("::error ::{}", message), + Level::WARN => eprintln!("::warning ::{}", message), _ => {} } } diff --git a/src/lib.rs b/src/lib.rs index b30a73714..b62aa904e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,10 +67,12 @@ use rattler_conda_types::{ use rattler_config::config::build::PackageFormatAndCompression; use rattler_solve::SolveStrategy; use rattler_virtual_packages::VirtualPackageOverrides; -use recipe::parser::{Dependency, TestType, find_outputs_from_src}; +use recipe::parser::output::resolve_cache_inheritance_with_caches; +use recipe::parser::{Dependency, Recipe, TestType, find_outputs_from_src}; use recipe::variable::Variable; use render::resolved_dependencies::RunExportsDownload; use selectors::SelectorConfig; +use serde_json::{Value, to_string_pretty, to_value}; use source::patch::apply_patch_custom; use source_code::Source; use system_tools::SystemTools; @@ -212,6 +214,35 @@ pub async fn get_build_output( // First find all outputs from the recipe let named_source = Source::from_path(recipe_path).into_diagnostic()?; let outputs = find_outputs_from_src(named_source.clone())?; + let has_cache_or_inheritance = outputs.iter().any(|output| { + output.as_mapping().is_some_and(|mapping| { + mapping.contains_key("cache") + || mapping + .get("package") + .and_then(|p| p.as_mapping()) + .and_then(|pm| pm.get("inherit")) + .is_some() + }) + }); + + let (outputs, global_cache_outputs, inheritance_relationships) = if has_cache_or_inheritance { + let has_toplevel_cache = outputs.iter().any(|output| { + matches!( + Recipe::from_output_node(output, selector_config.clone()), + Ok(recipe) if recipe.cache.is_some() + ) + }); + + let jinja = recipe::Jinja::new(selector_config.clone()); + resolve_cache_inheritance_with_caches( + outputs, + has_toplevel_cache, + selector_config.experimental, + &jinja, + )? + } else { + (outputs, Vec::new(), HashMap::new()) + }; // Check if there is a `variants.yaml` or `conda_build_config.yaml` file next to // the recipe that we should potentially use. @@ -253,8 +284,13 @@ pub async fn get_build_output( variant_config.variants.insert(normalized_key, variables); } - let outputs_and_variants = - variant_config.find_variants(&outputs, named_source, &selector_config)?; + let outputs_and_variants = variant_config.find_variants( + &outputs, + named_source, + &selector_config, + &global_cache_outputs, + &inheritance_relationships, + )?; tracing::info!("Found {} variants\n", outputs_and_variants.len()); for discovered_output in &outputs_and_variants { @@ -399,6 +435,13 @@ pub async fn get_build_output( finalized_sources: None, finalized_cache_dependencies: None, finalized_cache_sources: None, + restored_cache_prefix_files: None, + restored_cache_work_dir_files: None, + cache_outputs_to_build: recipe.get_cache_outputs_for_package( + recipe.package.name.as_normalized(), + &global_cache_outputs, + &inheritance_relationships, + ), system_tools: SystemTools::new(), build_summary: Arc::new(Mutex::new(BuildSummary::default())), extra_meta: Some( @@ -1049,10 +1092,26 @@ pub async fn build_recipes( outputs }; - println!( - "{}", - serde_json::to_string_pretty(&outputs).into_diagnostic()? - ); + let mut json_value = to_value(&outputs).into_diagnostic()?; + if let Value::Array(outputs_array) = &mut json_value { + outputs_array.iter_mut().for_each(|output| { + if let Some(Value::Object(recipe)) = output.get_mut("recipe") { + let requirements = recipe + .entry("requirements".to_string()) + .or_insert_with(|| Value::Object(Default::default())); + + if let Value::Object(requirements_map) = requirements { + for key in ["build", "host", "run", "run_constraints"] { + requirements_map + .entry(key.to_string()) + .or_insert_with(|| Value::Array(Vec::new())); + } + } + } + }); + } + + println!("{}", to_string_pretty(&json_value).into_diagnostic()?); return Ok(()); } diff --git a/src/metadata.rs b/src/metadata.rs index d7a480319..7ec4dea57 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -2,7 +2,6 @@ pub use crate::types::{ BuildConfiguration, Debug, Output, PlatformWithVirtualPackages, build_reindexed_channels, }; - #[cfg(test)] mod test { use chrono::TimeZone; diff --git a/src/packaging.rs b/src/packaging.rs index 834f7a97b..4212a4c04 100644 --- a/src/packaging.rs +++ b/src/packaging.rs @@ -21,7 +21,9 @@ mod file_finder; mod file_mapper; mod metadata; pub use file_finder::{Files, TempFiles, content_type}; -pub use metadata::{contains_prefix_binary, contains_prefix_text, create_prefix_placeholder}; +pub use metadata::{ + contains_prefix_binary, contains_prefix_text, create_prefix_placeholder, rewrite_prefix_in_file, +}; use tempfile::NamedTempFile; use crate::{ @@ -563,20 +565,29 @@ fn create_empty_build_folder( } impl Output { - /// Create a conda package from any new files in the host prefix. Note: the + /// Create a conda package from files in the host prefix. Note: the /// previous stages should have been completed before calling this /// function. pub async fn create_package( &self, tool_configuration: &tool_configuration::Configuration, ) -> Result<(PathBuf, PathsJson), PackagingError> { - let span = tracing::info_span!("Packaging new files"); - let _enter = span.enter(); - let files_after = Files::from_prefix( - &self.build_configuration.directories.host_prefix, - self.recipe.build().always_include_files(), - self.recipe.build().files(), - )?; + // When cache files are present, the output's build.files should apply to all + // relevant files, not just "new" ones compared to conda-meta + let files_after = match self.has_restored_cache_files() { + true => Files::from_prefix_with_filters( + &self.build_configuration.directories.host_prefix, + self.recipe.build().always_include_files(), + self.recipe.build().files(), + self.restored_cache_prefix_files.as_ref(), + self.restored_cache_work_dir_files.as_ref(), + )?, + false => Files::from_prefix( + &self.build_configuration.directories.host_prefix, + self.recipe.build().always_include_files(), + self.recipe.build().files(), + )?, + }; package_conda(self, tool_configuration, &files_after) } diff --git a/src/packaging/file_finder.rs b/src/packaging/file_finder.rs index 0d68c9e78..91cac74f2 100644 --- a/src/packaging/file_finder.rs +++ b/src/packaging/file_finder.rs @@ -213,6 +213,48 @@ impl Files { }) } + /// Find all files in the given prefix and apply the provided filters, including restored cache files. + /// This method is used when cache files have been restored to the prefix and we need to select files + /// based on the output's build configuration. + pub fn from_prefix_with_filters( + prefix: &Path, + always_include: &GlobVec, + files: &GlobVec, + restored_cache_prefix_files: Option<&Vec>, + _restored_cache_work_dir_files: Option<&Vec>, + ) -> Result { + if !prefix.exists() { + return Ok(Files { + new_files: HashSet::new(), + old_files: HashSet::new(), + prefix: prefix.to_owned(), + }); + } + + let mut result = Self::from_prefix(prefix, always_include, files)?; + let current_files = record_files(prefix)?; + + if let Some(cache_files) = restored_cache_prefix_files { + for cache_file in cache_files { + let full_path = prefix.join(cache_file); + if !current_files.contains(&full_path) { + continue; + } + let file_without_prefix = full_path + .strip_prefix(prefix) + .expect("File should be in prefix"); + if files.is_empty() + || files.is_match(file_without_prefix) + || always_include.is_match(file_without_prefix) + { + result.new_files.insert(full_path); + } + } + } + + Ok(result) + } + /// Copy the new files to a temporary directory and return the temporary directory and the files that were copied. pub fn to_temp_folder(&self, output: &Output) -> Result { let temp_dir = TempDir::with_prefix(output.name().as_normalized())?; @@ -262,6 +304,7 @@ impl TempFiles { mod test { use std::{collections::HashSet, path::PathBuf}; + use super::*; use crate::packaging::file_finder::{check_is_case_sensitive, find_new_files}; #[test] @@ -328,4 +371,58 @@ mod test { // but we can verify the function doesn't panic and returns a boolean let _is_case_sensitive = result.unwrap(); } + + #[test] + fn test_from_prefix_with_filters() { + let temp_dir = TempDir::new().unwrap(); + let prefix = temp_dir.path(); + let test_files = [ + ("bin/executable", "test"), + ("lib/library.so", "test"), + ("README.md", "test"), + ("config.txt", "test"), + ("bin/cached_executable", "cached"), + ("lib/cached_library.so", "cached"), + ("README.cached", "cached"), + ]; + + for (path, content) in &test_files { + if path.contains('/') { + fs::create_dir_all(prefix.join(path).parent().unwrap()).unwrap(); + } + fs::write(prefix.join(path), content).unwrap(); + } + + let files_glob = GlobVec::from_vec(["bin/*", "lib/*"].to_vec(), None); + let always_include_glob = GlobVec::from_vec(["README.*"].to_vec(), None); + let cache_files = vec![ + "bin/cached_executable".into(), + "lib/cached_library.so".into(), + "README.cached".into(), + ]; + + let result = Files::from_prefix_with_filters( + prefix, + &always_include_glob, + &files_glob, + Some(&cache_files), + None, + ) + .unwrap(); + + let expected_files = [ + "bin/executable", + "lib/library.so", + "README.md", + "bin/cached_executable", + "lib/cached_library.so", + "README.cached", + ]; + + for file in &expected_files { + assert!(result.new_files.contains(&prefix.join(file))); + } + assert!(!result.new_files.contains(&prefix.join("config.txt"))); + assert_eq!(result.new_files.len(), 6); + } } diff --git a/src/packaging/metadata.rs b/src/packaging/metadata.rs index 156820b8c..841dba0c4 100644 --- a/src/packaging/metadata.rs +++ b/src/packaging/metadata.rs @@ -5,6 +5,7 @@ use std::os::unix::prelude::OsStrExt; use std::{ borrow::Cow, collections::HashSet, + io::{self, ErrorKind}, ops::Deref, path::{Path, PathBuf}, }; @@ -13,6 +14,7 @@ use content_inspector::ContentType; use fs_err as fs; use fs_err::File; use itertools::Itertools; +use memchr::memmem; use rattler_conda_types::{ ChannelUrl, NoArchType, Platform, package::{ @@ -59,7 +61,7 @@ pub fn contains_prefix_binary(file_path: &Path, prefix: &Path) -> Result = to_forward_slash_lossy(prefix); - if memchr::memmem::find_iter(mmap.as_ref(), forward_slash.deref()) + if memmem::find_iter(mmap.as_ref(), forward_slash.deref()) .next() .is_some() { @@ -118,6 +120,95 @@ pub fn contains_prefix_text( Ok(detected_prefix) } +/// Rewrite prefix in a file by determining if it's binary or text and replace them +pub fn rewrite_prefix_in_file( + file_path: &Path, + old_prefix: &Path, + new_prefix: &Path, +) -> Result<(), PackagingError> { + let content = fs::read(file_path)?; + let content_type = content_inspector::inspect(&content); + let is_text = content_type.is_text() + && matches!(content_type, ContentType::UTF_8 | ContentType::UTF_8_BOM); + + if is_text { + return rewrite_text_prefix(file_path, content, old_prefix, new_prefix); + } + + rewrite_binary_prefix(file_path, old_prefix, new_prefix) +} + +fn rewrite_text_prefix( + file_path: &Path, + content: Vec, + old_prefix: &Path, + new_prefix: &Path, +) -> Result<(), PackagingError> { + let contents = String::from_utf8(content).map_err(|err| { + PackagingError::IoError(io::Error::new(ErrorKind::InvalidData, err.utf8_error())) + })?; + + let old_prefix_str = old_prefix.to_string_lossy().into_owned(); + + if !contents.contains(old_prefix_str.as_str()) { + return Ok(()); + } + + let new_prefix_str = new_prefix.to_string_lossy(); + let updated = contents.replace(old_prefix_str.as_str(), new_prefix_str.as_ref()); + fs::write(file_path, updated)?; + Ok(()) +} + +fn rewrite_binary_prefix( + file_path: &Path, + old_prefix: &Path, + new_prefix: &Path, +) -> Result<(), PackagingError> { + #[cfg(target_family = "unix")] + { + use std::os::unix::prelude::OsStrExt; + + let old_prefix_bytes = old_prefix.as_os_str().as_bytes(); + let new_prefix_bytes = new_prefix.as_os_str().as_bytes(); + + if old_prefix_bytes.is_empty() { + return Ok(()); + } + + if new_prefix_bytes.len() > old_prefix_bytes.len() { + return Err(PackagingError::IoError(io::Error::new( + ErrorKind::InvalidInput, + "New prefix is longer than old prefix, cannot replace in binary file", + ))); + } + + let file = File::options().read(true).write(true).open(file_path)?; + let mut mmap = unsafe { memmap2::MmapOptions::new().map_mut(&file) }?; + + let mut search_start = 0; + while let Some(found) = memmem::find(&mmap[search_start..], old_prefix_bytes) { + let pos = search_start + found; + mmap[pos..pos + new_prefix_bytes.len()].copy_from_slice(new_prefix_bytes); + let padding_range = pos + new_prefix_bytes.len()..pos + old_prefix_bytes.len(); + mmap[padding_range].fill(0); + search_start = pos + 1; + } + + mmap.flush()?; + Ok(()) + } + + #[cfg(target_family = "windows")] + { + let _ = (file_path, old_prefix, new_prefix); + Err(PackagingError::IoError(io::Error::new( + ErrorKind::Unsupported, + "Binary prefix replacement is not supported on Windows", + ))) + } +} + /// Create a prefix placeholder object for the given file and prefix. /// This function will also search in the file for the prefix and determine if /// the file is binary or text. diff --git a/src/post_process/checks.rs b/src/post_process/checks.rs index 23c1b0203..eca118011 100644 --- a/src/post_process/checks.rs +++ b/src/post_process/checks.rs @@ -17,6 +17,7 @@ use crate::{ use crate::render::resolved_dependencies::RunExportDependency; use globset::{Glob, GlobSet, GlobSetBuilder}; use rattler_conda_types::{PackageName, PrefixRecord}; +use tracing::warn; #[derive(thiserror::Error, Debug)] pub enum LinkingCheckError { @@ -205,14 +206,20 @@ fn find_system_libs(output: &Output) -> Result { .directories .build_prefix .join(prefix_record_name); - let record = PrefixRecord::from_path(sysroot_path).unwrap(); - let so_glob = Glob::new("*.so*")?.compile_matcher(); - for file in record.files { - if let Some(file_name) = file.file_name() { - if so_glob.is_match(file_name) { - system_libs.add(Glob::new(&file_name.to_string_lossy())?); + if let Ok(record) = PrefixRecord::from_path(sysroot_path.clone()) { + let so_glob = Glob::new("*.so*")?.compile_matcher(); + for file in record.files { + if let Some(file_name) = file.file_name() { + if so_glob.is_match(file_name) { + system_libs.add(Glob::new(&file_name.to_string_lossy())?); + } } } + } else { + warn!( + "Skipping sysroot linking check; prefix record missing at {}", + sysroot_path.display() + ); } } system_libs.build() diff --git a/src/recipe/jinja.rs b/src/recipe/jinja.rs index facef7a8a..9780b2534 100644 --- a/src/recipe/jinja.rs +++ b/src/recipe/jinja.rs @@ -9,7 +9,7 @@ use std::sync::Arc; use std::{collections::BTreeMap, str::FromStr}; use minijinja::value::{Kwargs, Object, from_args}; -use minijinja::{Environment, Value}; +use minijinja::{AutoEscape, Environment, Value}; use rattler_conda_types::{Arch, PackageName, ParseStrictness, Platform, Version, VersionSpec}; use crate::normalized_key::NormalizedKey; @@ -57,7 +57,8 @@ pub struct Jinja { impl Jinja { /// Create a new Jinja instance with the given selector configuration. pub fn new(config: SelectorConfig) -> Self { - let env = set_jinja(&config); + let mut env = set_jinja(&config); + env.set_auto_escape_callback(|_| AutoEscape::None); let context = config.into_context(); Self { env, context } } diff --git a/src/recipe/parser.rs b/src/recipe/parser.rs index 693db14e8..b805779e0 100644 --- a/src/recipe/parser.rs +++ b/src/recipe/parser.rs @@ -3,8 +3,10 @@ //! This phase parses YAML and [`SelectorConfig`] into a [`Recipe`], where //! if-selectors are handled and any jinja string is processed, resulting in a rendered recipe. use indexmap::IndexMap; +use rattler_conda_types::PackageName; use serde::{Deserialize, Serialize}; use std::borrow::Cow; +use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use crate::{ @@ -18,14 +20,19 @@ use crate::{ selectors::SelectorConfig, source_code::SourceCode, }; +use marked_yaml; mod about; mod build; mod cache; +mod cache_output; +mod common_output; mod glob_vec; mod helper; -mod output; +pub mod output; +mod output_parser; mod package; +pub(crate) mod parsing_utils; mod regex; mod requirements; mod script; @@ -37,9 +44,16 @@ pub use self::{ about::About, build::{Build, BuildString, DynamicLinking, PrefixDetection, Python}, cache::Cache, + cache_output::{CacheBuild, CacheOutput, CacheRequirements}, + common_output::{ALLOWED_KEYS_MULTI_OUTPUTS, DEEP_MERGE_KEYS, InheritSpec}, glob_vec::{GlobCheckerVec, GlobVec, GlobWithSource}, output::find_outputs_from_src, + output_parser::{Output, OutputType}, package::{OutputPackage, Package}, + parsing_utils::{ + StandardTryConvert, invalid_field_error, missing_field_error, parse_bool, + parse_required_string, validate_mapping_keys, validate_multi_output_root_keys, + }, regex::SerializableRegex, requirements::{ Dependency, IgnoreRunExports, Language, PinCompatible, PinSubpackage, Requirements, @@ -84,6 +98,12 @@ pub struct Recipe { /// Extra information as a map with string keys and any value #[serde(default, skip_serializing_if = "IndexMap::is_empty")] pub extra: IndexMap, + /// Cache outputs discovered during inheritance resolution + #[serde(skip)] + pub cache_outputs: Vec, + /// Cache inheritance relationships (package name -> cache names) + #[serde(skip)] + pub cache_inheritance: HashMap>, } pub(crate) trait CollectErrors: Iterator> + Sized { @@ -168,6 +188,22 @@ impl Recipe { pub fn from_node( root_node: &Node, jinja_opt: SelectorConfig, + ) -> Result> { + Self::from_node_with_options(root_node, jinja_opt, false) + } + + /// Create recipes from an output [`Node`] that may contain inheritance metadata. + pub fn from_output_node( + root_node: &Node, + jinja_opt: SelectorConfig, + ) -> Result> { + Self::from_node_with_options(root_node, jinja_opt, true) + } + + fn from_node_with_options( + root_node: &Node, + jinja_opt: SelectorConfig, + allow_inherit: bool, ) -> Result> { let experimental = jinja_opt.experimental; let mut jinja = Jinja::new(jinja_opt); @@ -253,6 +289,8 @@ impl Recipe { let mut about = About::default(); let mut cache = None; let mut extra = IndexMap::default(); + let cache_outputs_result = Vec::new(); + let cache_inheritance = HashMap::new(); rendered_node .iter() @@ -260,7 +298,9 @@ impl Recipe { let key_str = key.as_str(); match key_str { "schema_version" => schema_version = value.try_convert(key_str)?, - "package" => package = Some(value.try_convert(key_str)?), + "package" => { + package = Some(value.try_convert(key_str)?); + } "recipe" => { return Err(vec![_partialerror!( *key.span(), @@ -285,6 +325,27 @@ impl Recipe { "requirements" => requirements = value.try_convert(key_str)?, "tests" => tests = value.try_convert(key_str)?, "about" => about = value.try_convert(key_str)?, + "inherit" => { + if allow_inherit { + if experimental { + let _: InheritSpec = + value.try_convert("output.inherit")?; + } else { + return Err(vec![_partialerror!( + *value.span(), + ErrorKind::ExperimentalOnly("inherit".to_string()), + help = "The `inherit` key requires enabling experimental mode (`--experimental`)" + )]); + } + } else { + return Err(vec![_partialerror!( + *key.span(), + ErrorKind::InvalidField("inherit".to_string().into()), + help = "The 'inherit' key is only valid in outputs, not at the recipe level. \ + To use top-level cache, use the 'cache' key instead and omit 'inherit' in outputs." + )]); + } + } "context" => {} "extra" => extra = value.as_mapping().ok_or_else(|| { vec![_partialerror!( @@ -334,6 +395,8 @@ impl Recipe { tests, about, extra, + cache_outputs: cache_outputs_result, + cache_inheritance, }; Ok(recipe) @@ -368,6 +431,103 @@ impl Recipe { pub const fn about(&self) -> &About { &self.about } + + /// Convert top-level cache to a synthetic CacheOutput + pub fn synthetic_cache_output(&self) -> Option { + self.cache.as_ref().map(|cache| CacheOutput { + name: PackageName::new_unchecked(format!( + "__recipe_{}_cache", + self.package.name.as_normalized() + )), + source: cache.source.clone(), + build: CacheBuild { + script: Some(cache.build.script.clone()), + variant: cache.build.variant().clone(), + files: GlobVec::default(), + always_include_files: GlobVec::default(), + }, + requirements: CacheRequirements { + build: cache.requirements.build.clone(), + host: cache.requirements.host.clone(), + }, + run_exports: RunExports::default(), + ignore_run_exports: Some(cache.requirements.ignore_run_exports.clone()), + about: None, + span: marked_yaml::Span::new_blank(), + }) + } + + /// Check if recipe uses legacy top-level cache format + pub fn has_toplevel_cache(&self) -> bool { + self.cache.is_some() + } + + /// Extract cache outputs from resolved inheritance + /// This method looks at the resolved outputs and extracts cache outputs that this package depends on + pub fn extract_cache_outputs_from_inheritance(&self) -> Vec { + self.cache_outputs.clone() + } + + /// Get cache outputs that this package depends on based on inheritance + /// This is called during Output creation to populate cache_outputs_to_build + /// Includes transitive dependencies (e.g., if A inherits from B and B inherits from C, + /// then A depends on both C and B, in that order) + pub fn get_cache_outputs_for_package( + &self, + package_name: &str, + all_cache_outputs: &[CacheOutput], + inheritance_relationships: &HashMap>, + ) -> Vec { + fn find_cache<'a>(caches: &'a [CacheOutput], name: &str) -> Option<&'a CacheOutput> { + caches + .iter() + .find(|cache| cache.name.as_normalized() == name) + } + + fn collect( + name: &str, + relationships: &HashMap>, + caches: &[CacheOutput], + visiting: &mut HashSet, + seen: &mut HashSet, + acc: &mut Vec, + ) { + if !visiting.insert(name.to_owned()) { + tracing::warn!( + "Circular cache dependency detected involving '{}' - this likely indicates a configuration error", + name + ); + return; + } + + if let Some(children) = relationships.get(name) { + for child in children { + collect(child, relationships, caches, visiting, seen, acc); + + if seen.insert(child.clone()) { + if let Some(cache_output) = find_cache(caches, child) { + acc.push(cache_output.clone()); + } + } + } + } + + visiting.remove(name); + } + + let mut result = Vec::new(); + let mut seen = HashSet::new(); + let mut visiting = HashSet::new(); + collect( + package_name, + inheritance_relationships, + all_cache_outputs, + &mut visiting, + &mut seen, + &mut result, + ); + result + } } #[cfg(test)] @@ -431,7 +591,7 @@ mod tests { include_str!("../../test-data/recipes/test-parsing/recipe_bad_skip_multi.yaml"); let recipes = find_outputs_from_src(raw_recipe).unwrap(); for recipe in recipes { - let recipe = Recipe::from_node(&recipe, SelectorConfig::default()); + let recipe = Recipe::from_output_node(&recipe, SelectorConfig::default()); if recipe.is_ok() { assert_eq!(recipe.unwrap().package().name().as_normalized(), "zlib-dev"); continue; diff --git a/src/recipe/parser/about.rs b/src/recipe/parser/about.rs index b36ec0373..3b94b7e04 100644 --- a/src/recipe/parser/about.rs +++ b/src/recipe/parser/about.rs @@ -54,10 +54,35 @@ pub struct About { } impl About { + fn merge_if_absent(target: &mut Option, source: &Option) { + if target.is_none() { + if let Some(value) = source { + *target = Some(value.clone()); + } + } + } + /// Returns true if the about has its default configuration. pub fn is_default(&self) -> bool { self == &Self::default() } + + /// Deep merge another About into this one + /// Values in self take precedence over values in other + pub fn merge_from(&mut self, other: &About) { + Self::merge_if_absent(&mut self.homepage, &other.homepage); + Self::merge_if_absent(&mut self.repository, &other.repository); + Self::merge_if_absent(&mut self.documentation, &other.documentation); + Self::merge_if_absent(&mut self.license, &other.license); + Self::merge_if_absent(&mut self.license_family, &other.license_family); + Self::merge_if_absent(&mut self.summary, &other.summary); + Self::merge_if_absent(&mut self.description, &other.description); + Self::merge_if_absent(&mut self.prelink_message, &other.prelink_message); + + if self.license_file.is_empty() && !other.license_file.is_empty() { + self.license_file = other.license_file.clone(); + } + } } impl TryConvertNode for RenderedNode { @@ -140,11 +165,13 @@ impl TryConvertNode for RenderedScalarNode { #[cfg(test)] mod test { + use super::*; use crate::{ assert_miette_snapshot, recipe::{Recipe, jinja::SelectorConfig}, variant_config::ParseErrors, }; + use std::path::Path; #[test] fn invalid_url() { @@ -181,4 +208,40 @@ mod test { assert_miette_snapshot!(err); } + + #[test] + fn test_merge_from() { + let mut about1 = About { + homepage: Some(Url::parse("https://example.com").unwrap()), + repository: Some(Url::parse("https://github.com/example/repo").unwrap()), + summary: Some("Original summary".to_string()), + description: None, + license_family: None, + license_file: GlobVec::from_vec(vec!["LICENSE"], None), + ..Default::default() + }; + + let about2 = About { + homepage: Some(Url::parse("https://other.com").unwrap()), + description: Some("Other description".to_string()), + license_family: Some("MIT".to_string()), + license_file: GlobVec::from_vec(vec!["COPYING"], None), + ..Default::default() + }; + + about1.merge_from(&about2); + + assert_eq!( + about1.homepage.as_ref().unwrap().as_str(), + "https://example.com/" + ); + assert_eq!( + about1.repository.as_ref().unwrap().as_str(), + "https://github.com/example/repo" + ); + assert_eq!(about1.summary.as_ref().unwrap(), "Original summary"); + assert!(about1.license_file.is_match(Path::new("LICENSE"))); + assert_eq!(about1.description.as_ref().unwrap(), "Other description"); + assert_eq!(about1.license_family.as_ref().unwrap(), "MIT"); + } } diff --git a/src/recipe/parser/build.rs b/src/recipe/parser/build.rs index 3b1749435..7d4904f34 100644 --- a/src/recipe/parser/build.rs +++ b/src/recipe/parser/build.rs @@ -82,6 +82,7 @@ pub struct Build { pub skip: Skip, /// The build script can be either a list of commands or a path to a script. By /// default, the build script is set to `build.sh` or `build.bat` on Unix and Windows respectively. + /// When using multi-output recipes, the default script is not automatically inferred; you must set it explicitly. #[serde(default, skip_serializing_if = "Script::is_default")] pub script: Script, /// A noarch package runs on any platform. It can be either a python package or a generic package. @@ -207,6 +208,64 @@ pub struct PostProcess { } impl Build { + fn merge_when(target: &mut T, source: &T, should_replace: P) + where + T: Clone, + P: Fn(&T) -> bool, + { + if should_replace(target) && !should_replace(source) { + *target = source.clone(); + } + } + + /// Deep merge another Build into this one. + /// Values in self take precedence over values in other. + /// + /// Merged fields: python, dynamic_linking, prefix_detection, variant, + /// post_process, merge_build_and_host_envs, noarch, files, + /// always_include_files, always_copy_files + /// + /// Excluded fields (preserved from self): number, string, skip, script + /// These fields are treated as identity fields specific to each output. + pub fn merge_from(&mut self, other: &Build) { + Self::merge_when(&mut self.python, &other.python, Python::is_default); + Self::merge_when( + &mut self.dynamic_linking, + &other.dynamic_linking, + DynamicLinking::is_default, + ); + Self::merge_when( + &mut self.prefix_detection, + &other.prefix_detection, + PrefixDetection::is_default, + ); + Self::merge_when( + &mut self.variant, + &other.variant, + VariantKeyUsage::is_default, + ); + Self::merge_when( + &mut self.post_process, + &other.post_process, + |v: &Vec| v.is_empty(), + ); + + self.merge_build_and_host_envs |= other.merge_build_and_host_envs; + + Self::merge_when(&mut self.noarch, &other.noarch, NoArchType::is_none); + Self::merge_when(&mut self.files, &other.files, GlobVec::is_empty); + Self::merge_when( + &mut self.always_include_files, + &other.always_include_files, + GlobVec::is_empty, + ); + Self::merge_when( + &mut self.always_copy_files, + &other.always_copy_files, + GlobVec::is_empty, + ); + } + /// Get the merge build host flag. pub const fn merge_build_and_host_envs(&self) -> bool { self.merge_build_and_host_envs diff --git a/src/recipe/parser/cache.rs b/src/recipe/parser/cache.rs index 4da1f7c47..ac41edc88 100644 --- a/src/recipe/parser/cache.rs +++ b/src/recipe/parser/cache.rs @@ -9,7 +9,7 @@ use crate::{ }; use serde::{Deserialize, Serialize}; -use super::{Build, Requirements, Source}; +use super::{Build, Requirements, Source, cache_output::CacheOutput}; /// A cache build that can be used to split up a build into multiple outputs #[derive(Debug, Default, Clone, Serialize, Deserialize)] @@ -21,6 +21,9 @@ pub struct Cache { pub build: Build, /// The requirements for building the cache pub requirements: Requirements, + /// Cache outputs that define intermediate build artifacts + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub outputs: Vec, } impl TryConvertNode for RenderedNode { @@ -35,13 +38,17 @@ impl TryConvertNode for RenderedMappingNode { fn try_convert(&self, _name: &str) -> Result> { let mut cache = Cache::default(); - validate_keys! { - cache, - self.iter(), - source, - build, - requirements - }; + validate_keys!(cache, self.iter(), source, build, requirements, outputs); + + for (key, value) in self.iter() { + match key.as_str() { + "source" => cache.source = value.try_convert("cache.source")?, + "build" => cache.build = value.try_convert("cache.build")?, + "requirements" => cache.requirements = value.try_convert("cache.requirements")?, + "outputs" => cache.outputs = value.try_convert("cache.outputs")?, + _ => {} + } + } Ok(cache) } diff --git a/src/recipe/parser/cache_output.rs b/src/recipe/parser/cache_output.rs new file mode 100644 index 000000000..d6d0eabb7 --- /dev/null +++ b/src/recipe/parser/cache_output.rs @@ -0,0 +1,390 @@ +//! Cache output structures for v1 recipes according to the CEP specification +//! +//! This module defines the cache output type which is an intermediate build artifact +//! that can be inherited by regular package outputs. +//! +//! Cache outputs enable the "fast path" optimization where package outputs that inherit +//! from a cache and specify no explicit build script can skip environment installation +//! and script execution, significantly speeding up the build process. + +use crate::{ + _partialerror, + recipe::{ + custom_yaml::{ + HasSpan, RenderedMappingNode, RenderedNode, RenderedScalarNode, TryConvertNode, + }, + error::{ErrorKind, PartialParsingError}, + parser::{ + StandardTryConvert, build::VariantKeyUsage, invalid_field_error, missing_field_error, + validate_mapping_keys, + }, + }, +}; +use marked_yaml::Span; +use rattler_conda_types::PackageName; +use serde::de::{self, Deserialize as DeserializeTrait, Deserializer}; +use serde::{Deserialize, Serialize}; + +use super::glob_vec::GlobVec; +use super::requirements::RunExports; +use super::{Script, Source}; + +/// A cache output that produces intermediate build artifacts +/// +/// Cache outputs can be inherited by package outputs using the `inherit` key in the +/// package output definition. This enables the "fast path" optimization where package +/// outputs that inherit from a cache and have no explicit build script can skip +/// environment installation and script execution, significantly improving build performance. +#[derive(Debug, Clone, Serialize)] +pub struct CacheOutput { + /// The name of the cache output + pub name: PackageName, + /// Sources for this cache output + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub source: Vec, + /// Build configuration for the cache + pub build: CacheBuild, + /// Requirements for building the cache (only build and host allowed) + pub requirements: CacheRequirements, + /// Run exports declared by the cache; can be inherited by packages + #[serde(default, skip_serializing_if = "RunExports::is_empty")] + pub run_exports: RunExports, + /// Run exports to ignore + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ignore_run_exports: Option, + /// About information that can be inherited by packages + #[serde(default, skip_serializing_if = "Option::is_none")] + pub about: Option, + /// Span of the output mapping (for diagnostics) + #[serde(skip)] + pub span: marked_yaml::Span, +} + +#[derive(Deserialize)] +#[serde(field_identifier, rename_all = "lowercase")] +enum Field { + Name, + Source, + Build, + Requirements, + RunExports, + IgnoreRunExports, + About, +} + +fn set_once(slot: &mut Option, value: T, field: &'static str) -> Result<(), E> { + if slot.replace(value).is_some() { + Err(de::Error::duplicate_field(field)) + } else { + Ok(()) + } +} + +fn ensure_cache_build_script(build: &CacheBuild) -> Result<(), E> { + if build.script.is_none() { + Err(E::custom( + "cache outputs require an explicit build script (build.script field is required)", + )) + } else { + Ok(()) + } +} + +// Manual implementation of Deserialize for CacheOutput +impl<'de> DeserializeTrait<'de> for CacheOutput { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct CacheOutputVisitor; + + impl<'de> de::Visitor<'de> for CacheOutputVisitor { + type Value = CacheOutput; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("struct CacheOutput") + } + + fn visit_map(self, mut map: V) -> Result + where + V: de::MapAccess<'de>, + { + let mut name = None; + let mut source = None; + let mut build: Option = None; + let mut requirements = None; + let mut run_exports = None; + let mut ignore_run_exports = None; + let mut about = None; + + while let Some(key) = map.next_key()? { + match key { + Field::Name => { + set_once(&mut name, map.next_value()?, "name")?; + } + Field::Source => { + set_once(&mut source, map.next_value()?, "source")?; + } + Field::Build => { + set_once(&mut build, map.next_value()?, "build")?; + } + Field::Requirements => { + set_once(&mut requirements, map.next_value()?, "requirements")?; + } + Field::RunExports => { + set_once(&mut run_exports, map.next_value()?, "run_exports")?; + } + Field::IgnoreRunExports => { + set_once( + &mut ignore_run_exports, + map.next_value()?, + "ignore_run_exports", + )?; + } + Field::About => { + set_once(&mut about, map.next_value()?, "about")?; + } + } + } + + let name = name.ok_or_else(|| de::Error::missing_field("name"))?; + let source = source.unwrap_or_default(); + let build = build.ok_or_else(|| de::Error::missing_field("build"))?; + ensure_cache_build_script(&build)?; + let requirements = requirements.unwrap_or_default(); + let run_exports = run_exports.unwrap_or_default(); + + Ok(CacheOutput { + name, + source, + build, + requirements, + run_exports, + ignore_run_exports, + about, + span: Span::new_blank(), + }) + } + } + + deserializer.deserialize_map(CacheOutputVisitor) + } +} + +/// Build configuration specific to cache outputs +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct CacheBuild { + /// The build script - only script key is allowed for cache outputs + #[serde(default, skip_serializing_if = "Option::is_none")] + pub script: Option