From 002bc1d457c2d3b80ec6e95ecec37be297bd90f8 Mon Sep 17 00:00:00 2001 From: Sam Pfeiffer Date: Sun, 23 Feb 2025 16:35:48 +0000 Subject: [PATCH 1/6] Implement a --use-cache flag that will download to a user-provided cache folder for reuse later --- README.md | 17 ++++++++++++ src/main.rs | 10 ++++--- src/pack.rs | 31 +++++++++++++++++----- tests/integration_test.rs | 56 ++++++++++++++++++++++++++++++++++++--- 4 files changed, 102 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 010b2495..296a759c 100644 --- a/README.md +++ b/README.md @@ -165,6 +165,23 @@ pixi-pack pack --inject local-package-1.0.0-hbefa133_0.conda --manifest-pack pix This can be particularly useful if you build the project itself and want to include the built package in the environment but still want to use `pixi.lock` from the project. Before creating the pack, `pixi-pack` will ensure that the injected packages' dependencies and constraints are compatible with the packages in the environment. +### Cache downloaded packages + +You can cache downloaded packages to speed up subsequent pack operations by using the `--use-cache` flag: + +```bash +pixi-pack pack --use-cache ~/.pixi-pack/cache +``` + +This will store all downloaded packages in the specified directory and reuse them in future pack operations. The cache follows the same structure as conda channels, organizing packages by platform subdirectories (e.g., linux-64, win-64, etc.). + +Using a cache is particularly useful when: + +* Creating multiple packs with overlapping dependencies +* Working with large packages that take time to download +* Operating in environments with limited bandwidth +* Running CI/CD pipelines where package caching can significantly improve build times + ### Unpacking without `pixi-pack` If you don't have `pixi-pack` available on your target system, you can still install the environment if you have `conda` or `micromamba` available. diff --git a/src/main.rs b/src/main.rs index fb5cd0aa..af522920 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,6 +54,10 @@ enum Commands { #[arg(short, long)] output_file: Option, + /// Use a cache directory for downloaded packages + #[arg(long)] + use_cache: Option, + /// Inject an additional conda package into the final prefix #[arg(short, long, num_args(0..))] inject: Vec, @@ -67,7 +71,6 @@ enum Commands { #[arg(long, default_value = "false")] create_executable: bool, }, - /// Unpack a pixi environment Unpack { /// Where to unpack the environment. @@ -126,6 +129,7 @@ async fn main() -> Result<()> { inject, ignore_pypi_errors, create_executable, + use_cache, } => { let output_file = output_file.unwrap_or_else(|| default_output_file(platform, create_executable)); @@ -144,11 +148,11 @@ async fn main() -> Result<()> { injected_packages: inject, ignore_pypi_errors, create_executable, + cache_dir: use_cache, }; tracing::debug!("Running pack command with options: {:?}", options); pack(options).await? - } - Commands::Unpack { + } Commands::Unpack { output_directory, env_name, pack_file, diff --git a/src/pack.rs b/src/pack.rs index 9e3ac2d3..5fbbfea9 100644 --- a/src/pack.rs +++ b/src/pack.rs @@ -39,11 +39,11 @@ pub struct PackOptions { pub output_file: PathBuf, pub manifest_path: PathBuf, pub metadata: PixiPackMetadata, + pub cache_dir: Option, pub injected_packages: Vec, pub ignore_pypi_errors: bool, pub create_executable: bool, } - fn load_lockfile(manifest_path: &Path) -> Result { if !manifest_path.exists() { anyhow::bail!( @@ -128,13 +128,12 @@ pub async fn pack(options: PackOptions) -> Result<()> { stream::iter(conda_packages_from_lockfile.iter()) .map(Ok) .try_for_each_concurrent(50, |package| async { - download_package(&client, package, &channel_dir).await?; + download_package(&client, package, &channel_dir, options.cache_dir.as_deref()).await?; bar.pb.inc(1); Ok(()) }) .await - .map_err(|e: anyhow::Error| anyhow!("could not download package: {}", e))?; - bar.pb.finish_and_clear(); + .map_err(|e: anyhow::Error| anyhow!("could not download package: {}", e))?; bar.pb.finish_and_clear(); let mut conda_packages: Vec<(String, PackageRecord)> = Vec::new(); @@ -254,6 +253,7 @@ async fn download_package( client: &ClientWithMiddleware, package: &CondaBinaryData, output_dir: &Path, + cache_dir: Option<&Path>, ) -> Result<()> { let output_dir = output_dir.join(&package.package_record.subdir); create_dir_all(&output_dir) @@ -261,7 +261,19 @@ async fn download_package( .map_err(|e| anyhow!("could not create download directory: {}", e))?; let file_name = &package.file_name; - let mut dest = File::create(output_dir.join(file_name)).await?; + let output_path = output_dir.join(file_name); + + // Check cache first if enabled + if let Some(cache_dir) = cache_dir { + let cache_path = cache_dir.join(&package.package_record.subdir).join(file_name); + if cache_path.exists() { + tracing::debug!("Using cached package from {}", cache_path.display()); + fs::copy(&cache_path, &output_path).await?; + return Ok(()); + } + } + + let mut dest = File::create(&output_path).await?; tracing::debug!("Fetching package {}", package.location); let url = match &package.location { @@ -281,9 +293,16 @@ async fn download_package( dest.write_all(&chunk).await?; } + // Save to cache if enabled + if let Some(cache_dir) = cache_dir { + let cache_subdir = cache_dir.join(&package.package_record.subdir); + create_dir_all(&cache_subdir).await?; + let cache_path = cache_subdir.join(file_name); + fs::copy(&output_path, &cache_path).await?; + } + Ok(()) } - async fn archive_directory( input_dir: &Path, archive_target: &Path, diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 17f8ea88..f27cf9e2 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,6 +1,7 @@ #![allow(clippy::too_many_arguments)] use sha2::{Digest, Sha256}; +use walkdir::WalkDir; use std::{fs, io}; use std::{path::PathBuf, process::Command}; @@ -61,6 +62,7 @@ fn options( injected_packages: vec![], ignore_pypi_errors, create_executable, + cache_dir: None, }, unpack_options: UnpackOptions { pack_file, @@ -70,9 +72,7 @@ fn options( }, output_dir, } -} - -#[fixture] +}#[fixture] fn required_fs_objects() -> Vec<&'static str> { let mut required_fs_objects = vec!["conda-meta/history", "include", "share"]; let openssl_required_file = match Platform::current() { @@ -569,3 +569,53 @@ async fn test_manifest_path_dir(#[with(PathBuf::from("examples/simple-python"))] assert!(pack_result.is_ok(), "{:?}", pack_result); assert!(pack_file.is_file()); } + +#[rstest] +#[tokio::test] +async fn test_package_caching( + #[with(PathBuf::from("examples/simple-python/pixi.toml"))] options: Options, +) { + let temp_cache = tempdir().expect("Couldn't create a temp cache dir"); + let cache_dir = temp_cache.path().to_path_buf(); + + // First pack with cache - should download packages + let mut pack_options = options.pack_options.clone(); + pack_options.cache_dir = Some(cache_dir.clone()); + let pack_result = pixi_pack::pack(pack_options).await; + assert!(pack_result.is_ok(), "{:?}", pack_result); + + // Get file count in cache after first pack + let cache_files_count = WalkDir::new(&cache_dir) + .into_iter() + .filter_map(Result::ok) + .filter(|e| e.file_type().is_file()) + .count(); + assert!(cache_files_count > 0, "Cache should contain downloaded files"); + + // Second pack with same cache - should use cached packages + let temp_dir2 = tempdir().expect("Couldn't create second temp dir"); + let mut pack_options2 = options.pack_options.clone(); + pack_options2.cache_dir = Some(cache_dir.clone()); + let output_file2 = temp_dir2.path().join("environment.tar"); + pack_options2.output_file = output_file2.clone(); + + let pack_result2 = pixi_pack::pack(pack_options2).await; + assert!(pack_result2.is_ok(), "{:?}", pack_result2); + + // Verify cache files weren't downloaded again by checking modification times + let cache_files: Vec<_> = WalkDir::new(&cache_dir) + .into_iter() + .filter_map(Result::ok) + .filter(|e| e.file_type().is_file()) + .collect(); + + assert_eq!( + cache_files.len(), + cache_files_count, + "Cache file count should remain the same" + ); + + // Both output files should exist and be valid + assert!(options.pack_options.output_file.exists()); + assert!(output_file2.exists()); +} \ No newline at end of file From 262165929121933c0602edf4b9f71caf5c7b752d Mon Sep 17 00:00:00 2001 From: Sam Pfeiffer Date: Sun, 23 Feb 2025 16:46:57 +0000 Subject: [PATCH 2/6] Autoformat --- src/main.rs | 3 ++- src/pack.rs | 7 +++++-- tests/integration_test.rs | 14 +++++++++----- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/main.rs b/src/main.rs index af522920..d632c429 100644 --- a/src/main.rs +++ b/src/main.rs @@ -152,7 +152,8 @@ async fn main() -> Result<()> { }; tracing::debug!("Running pack command with options: {:?}", options); pack(options).await? - } Commands::Unpack { + } + Commands::Unpack { output_directory, env_name, pack_file, diff --git a/src/pack.rs b/src/pack.rs index 5fbbfea9..a14ed884 100644 --- a/src/pack.rs +++ b/src/pack.rs @@ -133,7 +133,8 @@ pub async fn pack(options: PackOptions) -> Result<()> { Ok(()) }) .await - .map_err(|e: anyhow::Error| anyhow!("could not download package: {}", e))?; bar.pb.finish_and_clear(); + .map_err(|e: anyhow::Error| anyhow!("could not download package: {}", e))?; + bar.pb.finish_and_clear(); let mut conda_packages: Vec<(String, PackageRecord)> = Vec::new(); @@ -265,7 +266,9 @@ async fn download_package( // Check cache first if enabled if let Some(cache_dir) = cache_dir { - let cache_path = cache_dir.join(&package.package_record.subdir).join(file_name); + let cache_path = cache_dir + .join(&package.package_record.subdir) + .join(file_name); if cache_path.exists() { tracing::debug!("Using cached package from {}", cache_path.display()); fs::copy(&cache_path, &output_path).await?; diff --git a/tests/integration_test.rs b/tests/integration_test.rs index f27cf9e2..791ad78d 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,9 +1,9 @@ #![allow(clippy::too_many_arguments)] use sha2::{Digest, Sha256}; -use walkdir::WalkDir; use std::{fs, io}; use std::{path::PathBuf, process::Command}; +use walkdir::WalkDir; use pixi_pack::{ unarchive, PackOptions, PixiPackMetadata, UnpackOptions, DEFAULT_PIXI_PACK_VERSION, @@ -72,7 +72,8 @@ fn options( }, output_dir, } -}#[fixture] +} +#[fixture] fn required_fs_objects() -> Vec<&'static str> { let mut required_fs_objects = vec!["conda-meta/history", "include", "share"]; let openssl_required_file = match Platform::current() { @@ -590,7 +591,10 @@ async fn test_package_caching( .filter_map(Result::ok) .filter(|e| e.file_type().is_file()) .count(); - assert!(cache_files_count > 0, "Cache should contain downloaded files"); + assert!( + cache_files_count > 0, + "Cache should contain downloaded files" + ); // Second pack with same cache - should use cached packages let temp_dir2 = tempdir().expect("Couldn't create second temp dir"); @@ -598,7 +602,7 @@ async fn test_package_caching( pack_options2.cache_dir = Some(cache_dir.clone()); let output_file2 = temp_dir2.path().join("environment.tar"); pack_options2.output_file = output_file2.clone(); - + let pack_result2 = pixi_pack::pack(pack_options2).await; assert!(pack_result2.is_ok(), "{:?}", pack_result2); @@ -618,4 +622,4 @@ async fn test_package_caching( // Both output files should exist and be valid assert!(options.pack_options.output_file.exists()); assert!(output_file2.exists()); -} \ No newline at end of file +} From 1b4cce984386e2c5ab52f3b5a2797cde1f7afdcd Mon Sep 17 00:00:00 2001 From: Sam Pfeiffer Date: Sun, 23 Feb 2025 16:50:08 +0000 Subject: [PATCH 3/6] Reformatting as a precommit check failed --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 296a759c..aa0d5676 100644 --- a/README.md +++ b/README.md @@ -177,10 +177,10 @@ This will store all downloaded packages in the specified directory and reuse the Using a cache is particularly useful when: -* Creating multiple packs with overlapping dependencies -* Working with large packages that take time to download -* Operating in environments with limited bandwidth -* Running CI/CD pipelines where package caching can significantly improve build times +- Creating multiple packs with overlapping dependencies +- Working with large packages that take time to download +- Operating in environments with limited bandwidth +- Running CI/CD pipelines where package caching can significantly improve build times ### Unpacking without `pixi-pack` From daf95d04ad2fc2815d4b3f918f5e90e462dbd75f Mon Sep 17 00:00:00 2001 From: Sam Pfeiffer Date: Sun, 23 Feb 2025 19:52:53 +0000 Subject: [PATCH 4/6] Actually check the timestamp of the files in the test, also add sha256 check in the test --- tests/integration_test.rs | 60 +++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 791ad78d..93651174 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,6 +1,7 @@ #![allow(clippy::too_many_arguments)] use sha2::{Digest, Sha256}; +use std::collections::HashMap; use std::{fs, io}; use std::{path::PathBuf, process::Command}; use walkdir::WalkDir; @@ -570,7 +571,6 @@ async fn test_manifest_path_dir(#[with(PathBuf::from("examples/simple-python"))] assert!(pack_result.is_ok(), "{:?}", pack_result); assert!(pack_file.is_file()); } - #[rstest] #[tokio::test] async fn test_package_caching( @@ -585,16 +585,24 @@ async fn test_package_caching( let pack_result = pixi_pack::pack(pack_options).await; assert!(pack_result.is_ok(), "{:?}", pack_result); - // Get file count in cache after first pack - let cache_files_count = WalkDir::new(&cache_dir) - .into_iter() - .filter_map(Result::ok) - .filter(|e| e.file_type().is_file()) - .count(); - assert!( - cache_files_count > 0, - "Cache should contain downloaded files" - ); + // Get files and their modification times after first pack + let mut initial_cache_files = HashMap::new(); + for entry in WalkDir::new(&cache_dir) { + let entry = entry.unwrap(); + if entry.file_type().is_file() { + let path = entry.path().to_path_buf(); + let modified_time = fs::metadata(&path).unwrap().modified().unwrap(); + initial_cache_files.insert(path, modified_time); + } + } + assert!(!initial_cache_files.is_empty(), "Cache should contain downloaded files"); + + // Calculate first pack's SHA256, reusing test_reproducible_shasum + let first_sha256 = sha256_digest_bytes(&options.pack_options.output_file); + insta::assert_snapshot!(format!("sha256-{}", options.pack_options.platform), &first_sha256); + + // Small delay to ensure any new writes would have different timestamps + tokio::time::sleep(tokio::time::Duration::from_millis(10)).await; // Second pack with same cache - should use cached packages let temp_dir2 = tempdir().expect("Couldn't create second temp dir"); @@ -602,24 +610,26 @@ async fn test_package_caching( pack_options2.cache_dir = Some(cache_dir.clone()); let output_file2 = temp_dir2.path().join("environment.tar"); pack_options2.output_file = output_file2.clone(); - + let pack_result2 = pixi_pack::pack(pack_options2).await; assert!(pack_result2.is_ok(), "{:?}", pack_result2); - // Verify cache files weren't downloaded again by checking modification times - let cache_files: Vec<_> = WalkDir::new(&cache_dir) - .into_iter() - .filter_map(Result::ok) - .filter(|e| e.file_type().is_file()) - .collect(); - - assert_eq!( - cache_files.len(), - cache_files_count, - "Cache file count should remain the same" - ); + // Check that cache files weren't modified + for (path, initial_mtime) in initial_cache_files { + let current_mtime = fs::metadata(&path).unwrap().modified().unwrap(); + assert_eq!( + initial_mtime, + current_mtime, + "Cache file {} was modified when it should have been reused", + path.display() + ); + } + + // Verify second pack produces identical output + let second_sha256 = sha256_digest_bytes(&output_file2); + assert_eq!(first_sha256, second_sha256, "Pack outputs should be identical when using cache"); // Both output files should exist and be valid assert!(options.pack_options.output_file.exists()); assert!(output_file2.exists()); -} +} \ No newline at end of file From 572d1ab17ca2a13764c976229f1a3fb6e9a8573a Mon Sep 17 00:00:00 2001 From: Sam Pfeiffer Date: Sun, 23 Feb 2025 19:54:46 +0000 Subject: [PATCH 5/6] Fix formatting --- tests/integration_test.rs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 93651174..3971b135 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -595,11 +595,17 @@ async fn test_package_caching( initial_cache_files.insert(path, modified_time); } } - assert!(!initial_cache_files.is_empty(), "Cache should contain downloaded files"); + assert!( + !initial_cache_files.is_empty(), + "Cache should contain downloaded files" + ); // Calculate first pack's SHA256, reusing test_reproducible_shasum let first_sha256 = sha256_digest_bytes(&options.pack_options.output_file); - insta::assert_snapshot!(format!("sha256-{}", options.pack_options.platform), &first_sha256); + insta::assert_snapshot!( + format!("sha256-{}", options.pack_options.platform), + &first_sha256 + ); // Small delay to ensure any new writes would have different timestamps tokio::time::sleep(tokio::time::Duration::from_millis(10)).await; @@ -610,7 +616,7 @@ async fn test_package_caching( pack_options2.cache_dir = Some(cache_dir.clone()); let output_file2 = temp_dir2.path().join("environment.tar"); pack_options2.output_file = output_file2.clone(); - + let pack_result2 = pixi_pack::pack(pack_options2).await; assert!(pack_result2.is_ok(), "{:?}", pack_result2); @@ -618,7 +624,7 @@ async fn test_package_caching( for (path, initial_mtime) in initial_cache_files { let current_mtime = fs::metadata(&path).unwrap().modified().unwrap(); assert_eq!( - initial_mtime, + initial_mtime, current_mtime, "Cache file {} was modified when it should have been reused", path.display() @@ -627,9 +633,12 @@ async fn test_package_caching( // Verify second pack produces identical output let second_sha256 = sha256_digest_bytes(&output_file2); - assert_eq!(first_sha256, second_sha256, "Pack outputs should be identical when using cache"); + assert_eq!( + first_sha256, second_sha256, + "Pack outputs should be identical when using cache" + ); // Both output files should exist and be valid assert!(options.pack_options.output_file.exists()); assert!(output_file2.exists()); -} \ No newline at end of file +} From 4783274bf36bd1b85e790b448fc85265d7067ecb Mon Sep 17 00:00:00 2001 From: Pavel Zwerschke Date: Sun, 23 Feb 2025 23:38:20 +0100 Subject: [PATCH 6/6] bump version --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ee67aa3d..ad02a118 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2382,7 +2382,7 @@ dependencies = [ [[package]] name = "pixi-pack" -version = "0.3.2" +version = "0.3.3" dependencies = [ "anyhow", "async-std", diff --git a/Cargo.toml b/Cargo.toml index f5468e5a..8bcc8188 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pixi-pack" description = "A command line tool to pack and unpack conda environments for easy sharing" -version = "0.3.2" +version = "0.3.3" edition = "2021" [features]