diff --git a/Cargo.lock b/Cargo.lock index ee67aa3d..ad02a118 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2382,7 +2382,7 @@ dependencies = [ [[package]] name = "pixi-pack" -version = "0.3.2" +version = "0.3.3" dependencies = [ "anyhow", "async-std", diff --git a/Cargo.toml b/Cargo.toml index f5468e5a..8bcc8188 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pixi-pack" description = "A command line tool to pack and unpack conda environments for easy sharing" -version = "0.3.2" +version = "0.3.3" edition = "2021" [features] diff --git a/README.md b/README.md index 010b2495..aa0d5676 100644 --- a/README.md +++ b/README.md @@ -165,6 +165,23 @@ pixi-pack pack --inject local-package-1.0.0-hbefa133_0.conda --manifest-pack pix This can be particularly useful if you build the project itself and want to include the built package in the environment but still want to use `pixi.lock` from the project. Before creating the pack, `pixi-pack` will ensure that the injected packages' dependencies and constraints are compatible with the packages in the environment. +### Cache downloaded packages + +You can cache downloaded packages to speed up subsequent pack operations by using the `--use-cache` flag: + +```bash +pixi-pack pack --use-cache ~/.pixi-pack/cache +``` + +This will store all downloaded packages in the specified directory and reuse them in future pack operations. The cache follows the same structure as conda channels, organizing packages by platform subdirectories (e.g., linux-64, win-64, etc.). + +Using a cache is particularly useful when: + +- Creating multiple packs with overlapping dependencies +- Working with large packages that take time to download +- Operating in environments with limited bandwidth +- Running CI/CD pipelines where package caching can significantly improve build times + ### Unpacking without `pixi-pack` If you don't have `pixi-pack` available on your target system, you can still install the environment if you have `conda` or `micromamba` available. diff --git a/src/main.rs b/src/main.rs index fb5cd0aa..d632c429 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,6 +54,10 @@ enum Commands { #[arg(short, long)] output_file: Option, + /// Use a cache directory for downloaded packages + #[arg(long)] + use_cache: Option, + /// Inject an additional conda package into the final prefix #[arg(short, long, num_args(0..))] inject: Vec, @@ -67,7 +71,6 @@ enum Commands { #[arg(long, default_value = "false")] create_executable: bool, }, - /// Unpack a pixi environment Unpack { /// Where to unpack the environment. @@ -126,6 +129,7 @@ async fn main() -> Result<()> { inject, ignore_pypi_errors, create_executable, + use_cache, } => { let output_file = output_file.unwrap_or_else(|| default_output_file(platform, create_executable)); @@ -144,6 +148,7 @@ async fn main() -> Result<()> { injected_packages: inject, ignore_pypi_errors, create_executable, + cache_dir: use_cache, }; tracing::debug!("Running pack command with options: {:?}", options); pack(options).await? diff --git a/src/pack.rs b/src/pack.rs index 9e3ac2d3..a14ed884 100644 --- a/src/pack.rs +++ b/src/pack.rs @@ -39,11 +39,11 @@ pub struct PackOptions { pub output_file: PathBuf, pub manifest_path: PathBuf, pub metadata: PixiPackMetadata, + pub cache_dir: Option, pub injected_packages: Vec, pub ignore_pypi_errors: bool, pub create_executable: bool, } - fn load_lockfile(manifest_path: &Path) -> Result { if !manifest_path.exists() { anyhow::bail!( @@ -128,7 +128,7 @@ pub async fn pack(options: PackOptions) -> Result<()> { stream::iter(conda_packages_from_lockfile.iter()) .map(Ok) .try_for_each_concurrent(50, |package| async { - download_package(&client, package, &channel_dir).await?; + download_package(&client, package, &channel_dir, options.cache_dir.as_deref()).await?; bar.pb.inc(1); Ok(()) }) @@ -254,6 +254,7 @@ async fn download_package( client: &ClientWithMiddleware, package: &CondaBinaryData, output_dir: &Path, + cache_dir: Option<&Path>, ) -> Result<()> { let output_dir = output_dir.join(&package.package_record.subdir); create_dir_all(&output_dir) @@ -261,7 +262,21 @@ async fn download_package( .map_err(|e| anyhow!("could not create download directory: {}", e))?; let file_name = &package.file_name; - let mut dest = File::create(output_dir.join(file_name)).await?; + let output_path = output_dir.join(file_name); + + // Check cache first if enabled + if let Some(cache_dir) = cache_dir { + let cache_path = cache_dir + .join(&package.package_record.subdir) + .join(file_name); + if cache_path.exists() { + tracing::debug!("Using cached package from {}", cache_path.display()); + fs::copy(&cache_path, &output_path).await?; + return Ok(()); + } + } + + let mut dest = File::create(&output_path).await?; tracing::debug!("Fetching package {}", package.location); let url = match &package.location { @@ -281,9 +296,16 @@ async fn download_package( dest.write_all(&chunk).await?; } + // Save to cache if enabled + if let Some(cache_dir) = cache_dir { + let cache_subdir = cache_dir.join(&package.package_record.subdir); + create_dir_all(&cache_subdir).await?; + let cache_path = cache_subdir.join(file_name); + fs::copy(&output_path, &cache_path).await?; + } + Ok(()) } - async fn archive_directory( input_dir: &Path, archive_target: &Path, diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 17f8ea88..3971b135 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,8 +1,10 @@ #![allow(clippy::too_many_arguments)] use sha2::{Digest, Sha256}; +use std::collections::HashMap; use std::{fs, io}; use std::{path::PathBuf, process::Command}; +use walkdir::WalkDir; use pixi_pack::{ unarchive, PackOptions, PixiPackMetadata, UnpackOptions, DEFAULT_PIXI_PACK_VERSION, @@ -61,6 +63,7 @@ fn options( injected_packages: vec![], ignore_pypi_errors, create_executable, + cache_dir: None, }, unpack_options: UnpackOptions { pack_file, @@ -71,7 +74,6 @@ fn options( output_dir, } } - #[fixture] fn required_fs_objects() -> Vec<&'static str> { let mut required_fs_objects = vec!["conda-meta/history", "include", "share"]; @@ -569,3 +571,74 @@ async fn test_manifest_path_dir(#[with(PathBuf::from("examples/simple-python"))] assert!(pack_result.is_ok(), "{:?}", pack_result); assert!(pack_file.is_file()); } +#[rstest] +#[tokio::test] +async fn test_package_caching( + #[with(PathBuf::from("examples/simple-python/pixi.toml"))] options: Options, +) { + let temp_cache = tempdir().expect("Couldn't create a temp cache dir"); + let cache_dir = temp_cache.path().to_path_buf(); + + // First pack with cache - should download packages + let mut pack_options = options.pack_options.clone(); + pack_options.cache_dir = Some(cache_dir.clone()); + let pack_result = pixi_pack::pack(pack_options).await; + assert!(pack_result.is_ok(), "{:?}", pack_result); + + // Get files and their modification times after first pack + let mut initial_cache_files = HashMap::new(); + for entry in WalkDir::new(&cache_dir) { + let entry = entry.unwrap(); + if entry.file_type().is_file() { + let path = entry.path().to_path_buf(); + let modified_time = fs::metadata(&path).unwrap().modified().unwrap(); + initial_cache_files.insert(path, modified_time); + } + } + assert!( + !initial_cache_files.is_empty(), + "Cache should contain downloaded files" + ); + + // Calculate first pack's SHA256, reusing test_reproducible_shasum + let first_sha256 = sha256_digest_bytes(&options.pack_options.output_file); + insta::assert_snapshot!( + format!("sha256-{}", options.pack_options.platform), + &first_sha256 + ); + + // Small delay to ensure any new writes would have different timestamps + tokio::time::sleep(tokio::time::Duration::from_millis(10)).await; + + // Second pack with same cache - should use cached packages + let temp_dir2 = tempdir().expect("Couldn't create second temp dir"); + let mut pack_options2 = options.pack_options.clone(); + pack_options2.cache_dir = Some(cache_dir.clone()); + let output_file2 = temp_dir2.path().join("environment.tar"); + pack_options2.output_file = output_file2.clone(); + + let pack_result2 = pixi_pack::pack(pack_options2).await; + assert!(pack_result2.is_ok(), "{:?}", pack_result2); + + // Check that cache files weren't modified + for (path, initial_mtime) in initial_cache_files { + let current_mtime = fs::metadata(&path).unwrap().modified().unwrap(); + assert_eq!( + initial_mtime, + current_mtime, + "Cache file {} was modified when it should have been reused", + path.display() + ); + } + + // Verify second pack produces identical output + let second_sha256 = sha256_digest_bytes(&output_file2); + assert_eq!( + first_sha256, second_sha256, + "Pack outputs should be identical when using cache" + ); + + // Both output files should exist and be valid + assert!(options.pack_options.output_file.exists()); + assert!(output_file2.exists()); +}