From 07fb53545026bc2a0862130f61abf9c018c8fae0 Mon Sep 17 00:00:00 2001 From: kayagokalp Date: Fri, 9 May 2025 11:03:30 -0700 Subject: [PATCH 1/4] wip --- forc-pkg/src/cache.rs | 0 forc-pkg/src/lock.rs | 4 ++++ forc-pkg/src/manifest/mod.rs | 12 ++++++++++++ forc-pkg/src/source/git/mod.rs | 17 +++++++++++++++++ forc-pkg/src/source/mod.rs | 6 +++++- 5 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 forc-pkg/src/cache.rs diff --git a/forc-pkg/src/cache.rs b/forc-pkg/src/cache.rs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/forc-pkg/src/lock.rs b/forc-pkg/src/lock.rs index f5db529cc7b..e4b927b4936 100644 --- a/forc-pkg/src/lock.rs +++ b/forc-pkg/src/lock.rs @@ -39,6 +39,7 @@ pub struct PkgLock { source: String, dependencies: Option>, contract_dependencies: Option>, + checksum: String, } /// `PkgDepLine` is a terse, single-line, git-diff-friendly description of a package's @@ -116,12 +117,15 @@ impl PkgLock { None }; + let checksum = todo!("hash the source itself"); + Self { name, version, source, dependencies, contract_dependencies, + checksum, } } diff --git a/forc-pkg/src/manifest/mod.rs b/forc-pkg/src/manifest/mod.rs index 05844618265..105512db230 100644 --- a/forc-pkg/src/manifest/mod.rs +++ b/forc-pkg/src/manifest/mod.rs @@ -56,6 +56,8 @@ pub trait GenericManifestFile { /// Returns a mapping of member member names to package manifest files. fn member_manifests(&self) -> Result; + /// Returns the checksum of the source described by this `ManifestFile`. + fn checksum(&self) -> Result; } #[derive(Clone, Debug)] @@ -137,6 +139,16 @@ impl GenericManifestFile for ManifestFile { ManifestFile::Workspace(workspace_manifest) => workspace_manifest.lock_path(), } } + + /// Returns the checksum of the source described by this `ManifestFile`. + fn checksum(&self) -> Result { + // We basically want to read Forc.toml, and anything under src/ folder. + // given that it ends with a .sw file. + // + // If this is a workspace manifest file, then we also need to read the + // top level Forc.toml and do the logic above for each member manifest. + todo!() + } } impl TryInto for ManifestFile { diff --git a/forc-pkg/src/source/git/mod.rs b/forc-pkg/src/source/git/mod.rs index e7b8e441e5e..45775b28ca5 100644 --- a/forc-pkg/src/source/git/mod.rs +++ b/forc-pkg/src/source/git/mod.rs @@ -17,6 +17,8 @@ use std::{ str::FromStr, }; +use super::Checksum; + #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Deserialize, Serialize)] pub struct Url { url: gix_url::Url, @@ -56,6 +58,9 @@ pub struct Pinned { pub source: Source, /// The hash to which we have pinned the source. pub commit_hash: String, + /// Calculated sha256-hash for this given pinned instance. + /// Prefixed with `0x`. + pub checksum: String, } /// Error returned upon failed parsing of `Pinned::from_str`. @@ -143,6 +148,12 @@ impl Reference { } } +impl Checksum for Pinned { + fn checksum(&self) -> &str { + &self.checksum + } +} + impl Pinned { pub const PREFIX: &'static str = "git"; } @@ -233,6 +244,12 @@ impl source::DepPath for Pinned { } } +impl source::Checksum for Pinned { + fn checksum(&self) -> String { + self.checksum + } +} + impl fmt::Display for Url { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let url_string = self.url.to_bstring().to_string(); diff --git a/forc-pkg/src/source/mod.rs b/forc-pkg/src/source/mod.rs index 3d335732d58..61341b7f6d5 100644 --- a/forc-pkg/src/source/mod.rs +++ b/forc-pkg/src/source/mod.rs @@ -32,7 +32,7 @@ use sway_utils::{DEFAULT_IPFS_GATEWAY_URL, DEFAULT_REGISTRY_IPFS_GATEWAY_URL}; /// Pin this source at a specific "version", return the local directory to fetch into. trait Pin { - type Pinned: Fetch + Hash; + type Pinned: Fetch + Hash + Checksum; fn pin(&self, ctx: PinCtx) -> Result<(Self::Pinned, PathBuf)>; } @@ -46,6 +46,10 @@ trait DepPath { fn dep_path(&self, name: &str) -> Result; } +trait Checksum { + fn checksum(&self) -> &str; +} + type FetchId = u64; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] From e16466457dba5f4d045f03454985df526ef719ae Mon Sep 17 00:00:00 2001 From: Kaya Gokalp Date: Mon, 12 May 2025 14:44:52 -0700 Subject: [PATCH 2/4] chore: add checksum tests --- Cargo.lock | 1 + forc-pkg/Cargo.toml | 1 + forc-pkg/src/manifest/mod.rs | 578 ++++++++++++++++++++++++++++++++- forc-pkg/src/source/git/mod.rs | 17 +- forc-pkg/src/source/mod.rs | 1 + 5 files changed, 579 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb3b2fb85ac..b99cbece05c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3041,6 +3041,7 @@ dependencies = [ "serde_ignored", "serde_json", "serde_with", + "sha2 0.10.8", "sway-core", "sway-error", "sway-features", diff --git a/forc-pkg/Cargo.toml b/forc-pkg/Cargo.toml index 4aea3ba4132..84edd337ab3 100644 --- a/forc-pkg/Cargo.toml +++ b/forc-pkg/Cargo.toml @@ -25,6 +25,7 @@ ipfs-api-backend-hyper = { workspace = true, features = ["with-builder"] } petgraph = { workspace = true, features = ["serde-1"] } reqwest.workspace = true scopeguard.workspace = true +sha2.workspace = true semver = { workspace = true, features = ["serde"] } serde = { workspace = true, features = ["derive"] } serde_ignored.workspace = true diff --git a/forc-pkg/src/manifest/mod.rs b/forc-pkg/src/manifest/mod.rs index 105512db230..d8cf338f6d5 100644 --- a/forc-pkg/src/manifest/mod.rs +++ b/forc-pkg/src/manifest/mod.rs @@ -7,6 +7,7 @@ use forc_util::{validate_name, validate_project_name}; use semver::Version; use serde::{de, Deserialize, Serialize}; use serde_with::{serde_as, DisplayFromStr}; +use sha2::{Digest, Sha256}; use std::{ collections::{BTreeMap, HashMap}, fmt::Display, @@ -140,14 +141,85 @@ impl GenericManifestFile for ManifestFile { } } - /// Returns the checksum of the source described by this `ManifestFile`. + // Returns the checksum of the source described by this `ManifestFile`. fn checksum(&self) -> Result { - // We basically want to read Forc.toml, and anything under src/ folder. - // given that it ends with a .sw file. - // - // If this is a workspace manifest file, then we also need to read the - // top level Forc.toml and do the logic above for each member manifest. - todo!() + /// Helper function to add source (.sw) files to the hash + fn add_source_files_to_hash(hasher: &mut sha2::Sha256, base_path: &Path) -> Result<()> { + use std::collections::BTreeMap; + use walkdir::WalkDir; + + // Using BTreeMap for deterministic ordering by relative path + let mut source_files = BTreeMap::new(); + + // Find src directory + let src_path = base_path.join("src"); + if src_path.exists() { + // Collect all .sw files under src/ + for entry in WalkDir::new(&src_path) { + let entry = entry?; + let path = entry.path(); + + if path.is_file() && path.extension() == Some(std::ffi::OsStr::new("sw")) { + // Calculate relative path from base_path for consistent naming + let rel_path = path + .strip_prefix(base_path) + .unwrap_or_else(|_| path) + .to_string_lossy() + .to_string(); + + // Read file contents + let content = std::fs::read(path).map_err(|e| { + anyhow::anyhow!("Failed to read file {}: {}", path.display(), e) + })?; + + source_files.insert(rel_path, content); + } + } + } + + // Add all files to the hasher in sorted order (guaranteed by BTreeMap) + for (rel_path, content) in source_files { + hasher.update(rel_path.as_bytes()); + // Add a separator to prevent concatenation attacks + hasher.update(b"\0"); + hasher.update(&content); + // Add another separator between files + hasher.update(b"\0\0"); + } + + Ok(()) + } + // Initialize hasher + let mut hasher = Sha256::new(); + + // Add normalized manifest content to the hash + match self { + ManifestFile::Package(package) => { + // Add serialized package data (normalized representation) to hash + let serialized = serde_json::to_string(&package) + .map_err(|e| anyhow::anyhow!("Failed to serialize package manifest: {}", e))?; + hasher.update(serialized.as_bytes()); + + // Add source files + add_source_files_to_hash(&mut hasher, &package.path())?; + } + ManifestFile::Workspace(workspace) => { + // Add serialized workspace data to hash + let serialized = serde_json::to_string(workspace).map_err(|e| { + anyhow::anyhow!("Failed to serialize workspace manifest: {}", e) + })?; + hasher.update(serialized.as_bytes()); + + // Add source files from each member + for member in workspace.members() { + add_source_files_to_hash(&mut hasher, &member)?; + } + } + } + + // Return the final hash as a hex string + let result = hasher.finalize(); + Ok(format!("{:x}", result)) } } @@ -180,7 +252,7 @@ impl TryInto for ManifestFile { type PatchMap = BTreeMap; /// A [PackageManifest] that was deserialized from a file at a particular path. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct PackageManifestFile { /// The deserialized `Forc.toml`. manifest: PackageManifest, @@ -913,7 +985,7 @@ fn default_url() -> String { } /// A [WorkspaceManifest] that was deserialized from a file at a particular path. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct WorkspaceManifestFile { /// The derserialized `Forc.toml` manifest: WorkspaceManifest, @@ -1162,9 +1234,493 @@ pub fn find_dir_within(dir: &Path, pkg_name: &str) -> Option { #[cfg(test)] mod tests { - use std::str::FromStr; - use super::*; + use std::{fs, str::FromStr}; + use tempfile::{tempdir, TempDir}; + + // Helper function to create a temporary package for testing + fn create_test_package( + name: &str, + source_files: Vec<(&str, &str)>, + ) -> Result<(TempDir, PackageManifestFile)> { + let temp_dir = tempdir()?; + let base_path = temp_dir.path(); + + // Create package structure + fs::create_dir_all(base_path.join("src"))?; + + // Create Forc.toml + let forc_toml = format!( + r#" + [project] + authors = ["Test"] + entry = "main.sw" + license = "MIT" + name = "{}" + + [dependencies] + "#, + name + ); + fs::write(base_path.join("Forc.toml"), forc_toml)?; + + // Create source files + for (file_name, content) in source_files { + // Handle nested directories in the file path + let file_path = base_path.join("src").join(file_name); + if let Some(parent) = file_path.parent() { + fs::create_dir_all(parent)?; + } + fs::write(file_path, content)?; + } + + // Create the manifest file + let manifest_file = PackageManifestFile::from_file(base_path.join("Forc.toml"))?; + + Ok((temp_dir, manifest_file)) + } + + // Helper to create a workspace + fn create_test_workspace( + members: Vec<(&str, Vec<(&str, &str)>)>, + ) -> Result<(TempDir, WorkspaceManifestFile)> { + let temp_dir = tempdir()?; + let base_path = temp_dir.path(); + + // Create workspace Forc.toml + let mut workspace_toml = "[workspace]\nmembers = [".to_string(); + + for (i, (name, _)) in members.iter().enumerate() { + if i > 0 { + workspace_toml.push_str(", "); + } + workspace_toml.push_str(&format!("\"{name}\"")); + } + workspace_toml.push_str("]\n"); + + fs::write(base_path.join("Forc.toml"), workspace_toml)?; + + // Create each member + for (name, source_files) in members { + let member_path = base_path.join(name); + fs::create_dir_all(&member_path.join("src"))?; + + // Create member Forc.toml + let forc_toml = format!( + r#" + [project] + authors = ["Test"] + entry = "main.sw" + license = "MIT" + name = "{}" + + [dependencies] + "#, + name + ); + fs::write(member_path.join("Forc.toml"), forc_toml)?; + + // Create source files + for (file_name, content) in source_files { + // Handle nested directories in the file path + let file_path = member_path.join("src").join(file_name); + if let Some(parent) = file_path.parent() { + fs::create_dir_all(parent)?; + } + fs::write(file_path, content)?; + } + } + + // Create the workspace manifest file + let manifest_file = WorkspaceManifestFile::from_file(base_path.join("Forc.toml"))?; + + Ok((temp_dir, manifest_file)) + } + + #[test] + fn test_package_checksum() -> Result<()> { + // Create a test package with specific source files + let (_temp_dir, pkg_manifest) = create_test_package( + "test_pkg", + vec![ + ("main.sw", "fn main() -> u64 { 42 }"), + ("lib.sw", "fn helper() -> bool { true }"), + ], + )?; + + // Get the checksum + let manifest_file = ManifestFile::Package(Box::new(pkg_manifest)); + let checksum1 = manifest_file.checksum()?; + + // Verify the checksum is a valid SHA-256 hash (64 hex chars) + assert_eq!(checksum1.len(), 64); + assert!(checksum1.chars().all(|c| c.is_ascii_hexdigit())); + + Ok(()) + } + + #[test] + fn test_checksum_changes_with_content() -> Result<()> { + // Create first package + let (_temp_dir1, pkg_manifest1) = + create_test_package("test_pkg", vec![("main.sw", "fn main() -> u64 { 42 }")])?; + + // Create second package with different content + let (_temp_dir2, pkg_manifest2) = + create_test_package("test_pkg", vec![("main.sw", "fn main() -> u64 { 43 }")])?; + + let manifest_file1 = ManifestFile::Package(Box::new(pkg_manifest1)); + let manifest_file2 = ManifestFile::Package(Box::new(pkg_manifest2)); + + let checksum1 = manifest_file1.checksum()?; + let checksum2 = manifest_file2.checksum()?; + + // Different content should have different checksums + assert_ne!(checksum1, checksum2); + + Ok(()) + } + + #[test] + fn test_checksum_consistent() -> Result<()> { + // Create the same package twice + let (_temp_dir1, pkg_manifest1) = + create_test_package("test_pkg", vec![("main.sw", "fn main() -> u64 { 42 }")])?; + + let (_temp_dir2, pkg_manifest2) = + create_test_package("test_pkg", vec![("main.sw", "fn main() -> u64 { 42 }")])?; + + let manifest_file1 = ManifestFile::Package(Box::new(pkg_manifest1)); + let manifest_file2 = ManifestFile::Package(Box::new(pkg_manifest2)); + + let checksum1 = manifest_file1.checksum()?; + let checksum2 = manifest_file2.checksum()?; + + // Same content should have the same checksum + assert_eq!(checksum1, checksum2); + + Ok(()) + } + + #[test] + fn test_workspace_checksum() -> Result<()> { + // Create a test workspace with members + let (_temp_dir, workspace_manifest) = create_test_workspace(vec![ + ("pkg1", vec![("main.sw", "fn main() -> u64 { 1 }")]), + ("pkg2", vec![("main.sw", "fn main() -> u64 { 2 }")]), + ])?; + + let manifest_file = ManifestFile::Workspace(workspace_manifest); + let checksum = manifest_file.checksum()?; + + // Verify the checksum is a valid SHA-256 hash + assert_eq!(checksum.len(), 64); + assert!(checksum.chars().all(|c| c.is_ascii_hexdigit())); + + Ok(()) + } + + #[test] + fn test_checksum_affected_by_file_paths() -> Result<()> { + // Create first package with specific file name + let (_temp_dir1, pkg_manifest1) = + create_test_package("test_pkg", vec![("file1.sw", "fn test() -> u64 { 42 }")])?; + + // Create second package with same content but different file name + let (_temp_dir2, pkg_manifest2) = + create_test_package("test_pkg", vec![("file2.sw", "fn test() -> u64 { 42 }")])?; + + let manifest_file1 = ManifestFile::Package(Box::new(pkg_manifest1)); + let manifest_file2 = ManifestFile::Package(Box::new(pkg_manifest2)); + + let checksum1 = manifest_file1.checksum()?; + let checksum2 = manifest_file2.checksum()?; + + // Same content but different file names should have different checksums + assert_ne!(checksum1, checksum2); + + Ok(()) + } + + #[test] + fn test_checksum_changes_with_dependencies() -> Result<()> { + // Create first package with a specific dependency version + let temp_dir1 = tempdir()?; + let base_path1 = temp_dir1.path(); + fs::create_dir_all(base_path1.join("src"))?; + + let forc_toml1 = r#" + [project] + authors = ["Test"] + entry = "main.sw" + license = "MIT" + name = "test_pkg" + + [dependencies] + dep1 = "1.0.0" + "#; + fs::write(base_path1.join("Forc.toml"), forc_toml1)?; + fs::write( + base_path1.join("src").join("main.sw"), + "fn main() -> u64 { 42 }", + )?; + let pkg_manifest1 = PackageManifestFile::from_file(base_path1.join("Forc.toml"))?; + + // Create second package with a different dependency version + let temp_dir2 = tempdir()?; + let base_path2 = temp_dir2.path(); + fs::create_dir_all(base_path2.join("src"))?; + + let forc_toml2 = r#" + [project] + authors = ["Test"] + entry = "main.sw" + license = "MIT" + name = "test_pkg" + + [dependencies] + dep1 = "2.0.0" // Different version + "#; + fs::write(base_path2.join("Forc.toml"), forc_toml2)?; + fs::write( + base_path2.join("src").join("main.sw"), + "fn main() -> u64 { 42 }", + )?; + let pkg_manifest2 = PackageManifestFile::from_file(base_path2.join("Forc.toml"))?; + + let manifest_file1 = ManifestFile::Package(Box::new(pkg_manifest1)); + let manifest_file2 = ManifestFile::Package(Box::new(pkg_manifest2)); + + let checksum1 = manifest_file1.checksum()?; + let checksum2 = manifest_file2.checksum()?; + + // Different dependency versions should result in different checksums + assert_ne!(checksum1, checksum2); + + Ok(()) + } + + #[test] + fn test_checksum_unaffected_by_dependency_order() -> Result<()> { + // Create first package with dependencies in one order + let temp_dir1 = tempdir()?; + let base_path1 = temp_dir1.path(); + fs::create_dir_all(base_path1.join("src"))?; + + let forc_toml1 = r#" + [project] + authors = ["Test"] + entry = "main.sw" + license = "MIT" + name = "test_pkg" + + [dependencies] + dep1 = "1.0.0" + dep2 = "1.0.0" + "#; + fs::write(base_path1.join("Forc.toml"), forc_toml1)?; + fs::write( + base_path1.join("src").join("main.sw"), + "fn main() -> u64 { 42 }", + )?; + let pkg_manifest1 = PackageManifestFile::from_file(base_path1.join("Forc.toml"))?; + + // Create second package with dependencies in different order + let temp_dir2 = tempdir()?; + let base_path2 = temp_dir2.path(); + fs::create_dir_all(base_path2.join("src"))?; + + let forc_toml2 = r#" + [project] + authors = ["Test"] + entry = "main.sw" + license = "MIT" + name = "test_pkg" + + [dependencies] + dep2 = "1.0.0" // Different order + dep1 = "1.0.0" + "#; + fs::write(base_path2.join("Forc.toml"), forc_toml2)?; + fs::write( + base_path2.join("src").join("main.sw"), + "fn main() -> u64 { 42 }", + )?; + let pkg_manifest2 = PackageManifestFile::from_file(base_path2.join("Forc.toml"))?; + + let manifest_file1 = ManifestFile::Package(Box::new(pkg_manifest1)); + let manifest_file2 = ManifestFile::Package(Box::new(pkg_manifest2)); + + let checksum1 = manifest_file1.checksum()?; + let checksum2 = manifest_file2.checksum()?; + + // Different dependency order should still result in the same checksum + // because serde_json will normalize the serialization + assert_eq!(checksum1, checksum2); + + Ok(()) + } + + #[test] + fn test_checksum_affected_by_filename_in_subfolder() -> Result<()> { + // Create first package with file in a specific subfolder structure + let temp_dir1 = tempdir()?; + let base_path1 = temp_dir1.path(); + fs::create_dir_all(base_path1.join("src").join("subfolder"))?; + + let forc_toml1 = r#" + [project] + authors = ["Test"] + entry = "main.sw" + license = "MIT" + name = "test_pkg" + "#; + fs::write(base_path1.join("Forc.toml"), forc_toml1)?; + fs::write( + base_path1.join("src").join("main.sw"), + "fn main() -> u64 { 42 }", + )?; + fs::write( + base_path1.join("src").join("subfolder").join("utils.sw"), + "fn helper() -> bool { true }", + )?; + let pkg_manifest1 = PackageManifestFile::from_file(base_path1.join("Forc.toml"))?; + + // Create second package with same file but in a different location + let temp_dir2 = tempdir()?; + let base_path2 = temp_dir2.path(); + fs::create_dir_all(base_path2.join("src").join("other_folder"))?; + + let forc_toml2 = r#" + [project] + authors = ["Test"] + entry = "main.sw" + license = "MIT" + name = "test_pkg" + "#; + fs::write(base_path2.join("Forc.toml"), forc_toml2)?; + fs::write( + base_path2.join("src").join("main.sw"), + "fn main() -> u64 { 42 }", + )?; + fs::write( + base_path2.join("src").join("other_folder").join("utils.sw"), + "fn helper() -> bool { true }", + )?; + let pkg_manifest2 = PackageManifestFile::from_file(base_path2.join("Forc.toml"))?; + + let manifest_file1 = ManifestFile::Package(Box::new(pkg_manifest1)); + let manifest_file2 = ManifestFile::Package(Box::new(pkg_manifest2)); + + let checksum1 = manifest_file1.checksum()?; + let checksum2 = manifest_file2.checksum()?; + + // Different file paths should result in different checksums + assert_ne!(checksum1, checksum2); + + Ok(()) + } + + #[test] + fn test_checksum_changes_with_whitespace_in_files() -> Result<()> { + // Create first package with specific formatting + let temp_dir1 = tempdir()?; + let base_path1 = temp_dir1.path(); + fs::create_dir_all(base_path1.join("src"))?; + + let forc_toml1 = r#" + [project] + authors = ["Test"] + entry = "main.sw" + license = "MIT" + name = "test_pkg" + "#; + fs::write(base_path1.join("Forc.toml"), forc_toml1)?; + fs::write( + base_path1.join("src").join("main.sw"), + "fn main() -> u64 { 42 }", + )?; + let pkg_manifest1 = PackageManifestFile::from_file(base_path1.join("Forc.toml"))?; + + // Create second package with different whitespace + let temp_dir2 = tempdir()?; + let base_path2 = temp_dir2.path(); + fs::create_dir_all(base_path2.join("src"))?; + + let forc_toml2 = r#" + [project] + authors = ["Test"] + entry = "main.sw" + license = "MIT" + name = "test_pkg" + "#; + fs::write(base_path2.join("Forc.toml"), forc_toml2)?; + fs::write( + base_path2.join("src").join("main.sw"), + "fn main() -> u64 {\n 42\n}", + )?; // Different whitespace + let pkg_manifest2 = PackageManifestFile::from_file(base_path2.join("Forc.toml"))?; + + let manifest_file1 = ManifestFile::Package(Box::new(pkg_manifest1)); + let manifest_file2 = ManifestFile::Package(Box::new(pkg_manifest2)); + + let checksum1 = manifest_file1.checksum()?; + let checksum2 = manifest_file2.checksum()?; + + // Different whitespace should result in different checksums + assert_ne!(checksum1, checksum2); + + Ok(()) + } + + #[test] + fn test_checksum_with_identical_file_contents() -> Result<()> { + // Create test package with multiple files having the same content + let temp_dir = tempdir()?; + let base_path = temp_dir.path(); + fs::create_dir_all(base_path.join("src"))?; + + let forc_toml = r#" + [project] + authors = ["Test"] + entry = "main.sw" + license = "MIT" + name = "test_pkg" + "#; + fs::write(base_path.join("Forc.toml"), forc_toml)?; + + // Create two files with the same content + let file_content = "fn helper() -> bool { true }"; + fs::write(base_path.join("src").join("file1.sw"), file_content)?; + fs::write(base_path.join("src").join("file2.sw"), file_content)?; + + let pkg_manifest = PackageManifestFile::from_file(base_path.join("Forc.toml"))?; + let manifest_file = ManifestFile::Package(Box::new(pkg_manifest)); + + let checksum = manifest_file.checksum()?; + + // Verify the checksum is generated correctly (should include both files despite same content) + assert_eq!(checksum.len(), 64); + + // Compare with a package that has only one of the files + let temp_dir2 = tempdir()?; + let base_path2 = temp_dir2.path(); + fs::create_dir_all(base_path2.join("src"))?; + + fs::write(base_path2.join("Forc.toml"), forc_toml)?; + fs::write(base_path2.join("src").join("file1.sw"), file_content)?; + // Not writing file2.sw + + let pkg_manifest2 = PackageManifestFile::from_file(base_path2.join("Forc.toml"))?; + let manifest_file2 = ManifestFile::Package(Box::new(pkg_manifest2)); + + let checksum2 = manifest_file2.checksum()?; + + // Different number of files should result in different checksums + assert_ne!(checksum, checksum2); + + Ok(()) + } #[test] fn deserialize_contract_dependency() { diff --git a/forc-pkg/src/source/git/mod.rs b/forc-pkg/src/source/git/mod.rs index 45775b28ca5..337466963a2 100644 --- a/forc-pkg/src/source/git/mod.rs +++ b/forc-pkg/src/source/git/mod.rs @@ -1,5 +1,6 @@ mod auth; +use super::Checksum; use crate::manifest::GenericManifestFile; use crate::{ manifest::{self, PackageManifestFile}, @@ -17,8 +18,6 @@ use std::{ str::FromStr, }; -use super::Checksum; - #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Deserialize, Serialize)] pub struct Url { url: gix_url::Url, @@ -152,6 +151,10 @@ impl Checksum for Pinned { fn checksum(&self) -> &str { &self.checksum } + + fn verify_checksum(&self, checksum: &str) -> bool { + self.checksum == checksum + } } impl Pinned { @@ -176,6 +179,7 @@ impl source::Pin for Source { Pinned { source: self.clone(), commit_hash, + checksum: todo!(), } } else if let Reference::DefaultBranch | Reference::Branch(_) = self.reference { // If the reference is to a branch or to the default branch we need to fetch @@ -189,6 +193,7 @@ impl source::Pin for Source { Ok(Some((_local_path, commit_hash))) => Pinned { source: self.clone(), commit_hash, + checksum: todo!(), }, _ => { // If the checkout we are looking for does not exists locally or an @@ -244,12 +249,6 @@ impl source::DepPath for Pinned { } } -impl source::Checksum for Pinned { - fn checksum(&self) -> String { - self.checksum - } -} - impl fmt::Display for Url { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let url_string = self.url.to_bstring().to_string(); @@ -340,6 +339,7 @@ impl FromStr for Pinned { Ok(Self { source, commit_hash, + checksum: todo!(), }) } } @@ -511,6 +511,7 @@ pub fn pin(fetch_id: u64, name: &str, source: Source) -> Result { Ok(Pinned { source, commit_hash, + checksum: todo!(), }) } diff --git a/forc-pkg/src/source/mod.rs b/forc-pkg/src/source/mod.rs index 61341b7f6d5..7011fa28b5c 100644 --- a/forc-pkg/src/source/mod.rs +++ b/forc-pkg/src/source/mod.rs @@ -48,6 +48,7 @@ trait DepPath { trait Checksum { fn checksum(&self) -> &str; + fn verify_checksum(&self, checksum: &str) -> bool; } type FetchId = u64; From cce46c9104f49a67ba3e9118941aca4c1ae19cc6 Mon Sep 17 00:00:00 2001 From: Kaya Gokalp Date: Tue, 13 May 2025 13:45:43 -0700 Subject: [PATCH 3/4] only cache manifest level --- forc-pkg/src/lock.rs | 4 - forc-pkg/src/manifest/mod.rs | 219 ++++++++++++++------------------- forc-pkg/src/source/git/mod.rs | 18 --- forc-pkg/src/source/mod.rs | 7 +- 4 files changed, 90 insertions(+), 158 deletions(-) diff --git a/forc-pkg/src/lock.rs b/forc-pkg/src/lock.rs index e4b927b4936..f5db529cc7b 100644 --- a/forc-pkg/src/lock.rs +++ b/forc-pkg/src/lock.rs @@ -39,7 +39,6 @@ pub struct PkgLock { source: String, dependencies: Option>, contract_dependencies: Option>, - checksum: String, } /// `PkgDepLine` is a terse, single-line, git-diff-friendly description of a package's @@ -117,15 +116,12 @@ impl PkgLock { None }; - let checksum = todo!("hash the source itself"); - Self { name, version, source, dependencies, contract_dependencies, - checksum, } } diff --git a/forc-pkg/src/manifest/mod.rs b/forc-pkg/src/manifest/mod.rs index d8cf338f6d5..190b930a129 100644 --- a/forc-pkg/src/manifest/mod.rs +++ b/forc-pkg/src/manifest/mod.rs @@ -143,83 +143,10 @@ impl GenericManifestFile for ManifestFile { // Returns the checksum of the source described by this `ManifestFile`. fn checksum(&self) -> Result { - /// Helper function to add source (.sw) files to the hash - fn add_source_files_to_hash(hasher: &mut sha2::Sha256, base_path: &Path) -> Result<()> { - use std::collections::BTreeMap; - use walkdir::WalkDir; - - // Using BTreeMap for deterministic ordering by relative path - let mut source_files = BTreeMap::new(); - - // Find src directory - let src_path = base_path.join("src"); - if src_path.exists() { - // Collect all .sw files under src/ - for entry in WalkDir::new(&src_path) { - let entry = entry?; - let path = entry.path(); - - if path.is_file() && path.extension() == Some(std::ffi::OsStr::new("sw")) { - // Calculate relative path from base_path for consistent naming - let rel_path = path - .strip_prefix(base_path) - .unwrap_or_else(|_| path) - .to_string_lossy() - .to_string(); - - // Read file contents - let content = std::fs::read(path).map_err(|e| { - anyhow::anyhow!("Failed to read file {}: {}", path.display(), e) - })?; - - source_files.insert(rel_path, content); - } - } - } - - // Add all files to the hasher in sorted order (guaranteed by BTreeMap) - for (rel_path, content) in source_files { - hasher.update(rel_path.as_bytes()); - // Add a separator to prevent concatenation attacks - hasher.update(b"\0"); - hasher.update(&content); - // Add another separator between files - hasher.update(b"\0\0"); - } - - Ok(()) - } - // Initialize hasher - let mut hasher = Sha256::new(); - - // Add normalized manifest content to the hash match self { - ManifestFile::Package(package) => { - // Add serialized package data (normalized representation) to hash - let serialized = serde_json::to_string(&package) - .map_err(|e| anyhow::anyhow!("Failed to serialize package manifest: {}", e))?; - hasher.update(serialized.as_bytes()); - - // Add source files - add_source_files_to_hash(&mut hasher, &package.path())?; - } - ManifestFile::Workspace(workspace) => { - // Add serialized workspace data to hash - let serialized = serde_json::to_string(workspace).map_err(|e| { - anyhow::anyhow!("Failed to serialize workspace manifest: {}", e) - })?; - hasher.update(serialized.as_bytes()); - - // Add source files from each member - for member in workspace.members() { - add_source_files_to_hash(&mut hasher, &member)?; - } - } + ManifestFile::Package(package) => package.checksum(), + ManifestFile::Workspace(workspace) => workspace.checksum(), } - - // Return the final hash as a hex string - let result = hasher.finalize(); - Ok(format!("{:x}", result)) } } @@ -684,6 +611,18 @@ impl GenericManifestFile for PackageManifestFile { Ok(member_manifest_files) } + + fn checksum(&self) -> Result { + let mut hasher = Sha256::new(); + let serialized = serde_json::to_string(&self.manifest) + .map_err(|e| anyhow::anyhow!("Failed to serialize package manifest: {}", e))?; + hasher.update(serialized.as_bytes()); + + add_source_files_to_hash(&mut hasher, self.dir())?; + + let result = hasher.finalize(); + Ok(format!("{:x}", result)) + } } impl PackageManifest { @@ -1112,6 +1051,21 @@ impl GenericManifestFile for WorkspaceManifestFile { Ok(member_manifest_files) } + + fn checksum(&self) -> Result { + let mut hasher = Sha256::new(); + let serialized = serde_json::to_string(&self.manifest) + .map_err(|e| anyhow::anyhow!("Failed to serialize workspace manifest: {}", e))?; + hasher.update(serialized.as_bytes()); + + // Add source files from each member + for member in self.members() { + add_source_files_to_hash(&mut hasher, member)?; + } + + let result = hasher.finalize(); + Ok(format!("{:x}", result)) + } } impl WorkspaceManifest { @@ -1232,6 +1186,53 @@ pub fn find_dir_within(dir: &Path, pkg_name: &str) -> Option { find_within(dir, pkg_name).and_then(|path| path.parent().map(Path::to_path_buf)) } +/// Helper function to add source (.sw) files to the hash +fn add_source_files_to_hash(hasher: &mut sha2::Sha256, base_path: &Path) -> Result<()> { + use std::collections::BTreeMap; + use walkdir::WalkDir; + + // Using BTreeMap for deterministic ordering by relative path + let mut source_files = BTreeMap::new(); + + // Find src directory + let src_path = base_path.join("src"); + if src_path.exists() { + // Collect all .sw files under src/ + for entry in WalkDir::new(&src_path) { + let entry = entry?; + let path = entry.path(); + + if path.is_file() && path.extension() == Some(std::ffi::OsStr::new("sw")) { + // Calculate relative path from base_path for consistent naming + let rel_path = path + .strip_prefix(base_path) + .unwrap_or(path) + .to_string_lossy() + .to_string(); + + println!("adding {:?}", rel_path); + // Read file contents + let content = std::fs::read(path).map_err(|e| { + anyhow::anyhow!("Failed to read file {}: {}", path.display(), e) + })?; + + source_files.insert(rel_path, content); + } + } + } + + // Add all files to the hasher in sorted order (guaranteed by BTreeMap) + for (rel_path, content) in source_files { + hasher.update(rel_path.as_bytes()); + // Add a separator to prevent concatenation attacks + hasher.update(b"\0"); + hasher.update(&content); + // Add another separator between files + hasher.update(b"\0\0"); + } + + Ok(()) +} #[cfg(test)] mod tests { use super::*; @@ -1423,12 +1424,16 @@ mod tests { #[test] fn test_checksum_affected_by_file_paths() -> Result<()> { // Create first package with specific file name - let (_temp_dir1, pkg_manifest1) = - create_test_package("test_pkg", vec![("file1.sw", "fn test() -> u64 { 42 }")])?; + let (_temp_dir1, pkg_manifest1) = create_test_package( + "test_pkg", + vec![("main.sw", ""), ("file1.sw", "fn test() -> u64 { 42 }")], + )?; // Create second package with same content but different file name - let (_temp_dir2, pkg_manifest2) = - create_test_package("test_pkg", vec![("file2.sw", "fn test() -> u64 { 42 }")])?; + let (_temp_dir2, pkg_manifest2) = create_test_package( + "test_pkg", + vec![("main.sw", ""), ("file2.sw", "fn test() -> u64 { 42 }")], + )?; let manifest_file1 = ManifestFile::Package(Box::new(pkg_manifest1)); let manifest_file2 = ManifestFile::Package(Box::new(pkg_manifest2)); @@ -1479,7 +1484,7 @@ mod tests { name = "test_pkg" [dependencies] - dep1 = "2.0.0" // Different version + dep1 = "2.0.0" "#; fs::write(base_path2.join("Forc.toml"), forc_toml2)?; fs::write( @@ -1538,7 +1543,7 @@ mod tests { name = "test_pkg" [dependencies] - dep2 = "1.0.0" // Different order + dep2 = "1.0.0" dep1 = "1.0.0" "#; fs::write(base_path2.join("Forc.toml"), forc_toml2)?; @@ -1622,6 +1627,9 @@ mod tests { } #[test] + // NOTE: We should probably do the caching in AST level instead of this + // string level operations. This test illustrates why we should be doing it + // on AST. fn test_checksum_changes_with_whitespace_in_files() -> Result<()> { // Create first package with specific formatting let temp_dir1 = tempdir()?; @@ -1673,55 +1681,6 @@ mod tests { Ok(()) } - #[test] - fn test_checksum_with_identical_file_contents() -> Result<()> { - // Create test package with multiple files having the same content - let temp_dir = tempdir()?; - let base_path = temp_dir.path(); - fs::create_dir_all(base_path.join("src"))?; - - let forc_toml = r#" - [project] - authors = ["Test"] - entry = "main.sw" - license = "MIT" - name = "test_pkg" - "#; - fs::write(base_path.join("Forc.toml"), forc_toml)?; - - // Create two files with the same content - let file_content = "fn helper() -> bool { true }"; - fs::write(base_path.join("src").join("file1.sw"), file_content)?; - fs::write(base_path.join("src").join("file2.sw"), file_content)?; - - let pkg_manifest = PackageManifestFile::from_file(base_path.join("Forc.toml"))?; - let manifest_file = ManifestFile::Package(Box::new(pkg_manifest)); - - let checksum = manifest_file.checksum()?; - - // Verify the checksum is generated correctly (should include both files despite same content) - assert_eq!(checksum.len(), 64); - - // Compare with a package that has only one of the files - let temp_dir2 = tempdir()?; - let base_path2 = temp_dir2.path(); - fs::create_dir_all(base_path2.join("src"))?; - - fs::write(base_path2.join("Forc.toml"), forc_toml)?; - fs::write(base_path2.join("src").join("file1.sw"), file_content)?; - // Not writing file2.sw - - let pkg_manifest2 = PackageManifestFile::from_file(base_path2.join("Forc.toml"))?; - let manifest_file2 = ManifestFile::Package(Box::new(pkg_manifest2)); - - let checksum2 = manifest_file2.checksum()?; - - // Different number of files should result in different checksums - assert_ne!(checksum, checksum2); - - Ok(()) - } - #[test] fn deserialize_contract_dependency() { let contract_dep_str = r#"{"path": "../", "salt": "0x1111111111111111111111111111111111111111111111111111111111111111" }"#; diff --git a/forc-pkg/src/source/git/mod.rs b/forc-pkg/src/source/git/mod.rs index 337466963a2..e7b8e441e5e 100644 --- a/forc-pkg/src/source/git/mod.rs +++ b/forc-pkg/src/source/git/mod.rs @@ -1,6 +1,5 @@ mod auth; -use super::Checksum; use crate::manifest::GenericManifestFile; use crate::{ manifest::{self, PackageManifestFile}, @@ -57,9 +56,6 @@ pub struct Pinned { pub source: Source, /// The hash to which we have pinned the source. pub commit_hash: String, - /// Calculated sha256-hash for this given pinned instance. - /// Prefixed with `0x`. - pub checksum: String, } /// Error returned upon failed parsing of `Pinned::from_str`. @@ -147,16 +143,6 @@ impl Reference { } } -impl Checksum for Pinned { - fn checksum(&self) -> &str { - &self.checksum - } - - fn verify_checksum(&self, checksum: &str) -> bool { - self.checksum == checksum - } -} - impl Pinned { pub const PREFIX: &'static str = "git"; } @@ -179,7 +165,6 @@ impl source::Pin for Source { Pinned { source: self.clone(), commit_hash, - checksum: todo!(), } } else if let Reference::DefaultBranch | Reference::Branch(_) = self.reference { // If the reference is to a branch or to the default branch we need to fetch @@ -193,7 +178,6 @@ impl source::Pin for Source { Ok(Some((_local_path, commit_hash))) => Pinned { source: self.clone(), commit_hash, - checksum: todo!(), }, _ => { // If the checkout we are looking for does not exists locally or an @@ -339,7 +323,6 @@ impl FromStr for Pinned { Ok(Self { source, commit_hash, - checksum: todo!(), }) } } @@ -511,7 +494,6 @@ pub fn pin(fetch_id: u64, name: &str, source: Source) -> Result { Ok(Pinned { source, commit_hash, - checksum: todo!(), }) } diff --git a/forc-pkg/src/source/mod.rs b/forc-pkg/src/source/mod.rs index 7011fa28b5c..3d335732d58 100644 --- a/forc-pkg/src/source/mod.rs +++ b/forc-pkg/src/source/mod.rs @@ -32,7 +32,7 @@ use sway_utils::{DEFAULT_IPFS_GATEWAY_URL, DEFAULT_REGISTRY_IPFS_GATEWAY_URL}; /// Pin this source at a specific "version", return the local directory to fetch into. trait Pin { - type Pinned: Fetch + Hash + Checksum; + type Pinned: Fetch + Hash; fn pin(&self, ctx: PinCtx) -> Result<(Self::Pinned, PathBuf)>; } @@ -46,11 +46,6 @@ trait DepPath { fn dep_path(&self, name: &str) -> Result; } -trait Checksum { - fn checksum(&self) -> &str; - fn verify_checksum(&self, checksum: &str) -> bool; -} - type FetchId = u64; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] From d55d7f85ef1a0bc0a0401fc958eb34e10707e2bb Mon Sep 17 00:00:00 2001 From: Kaya Gokalp Date: Tue, 13 May 2025 13:59:21 -0700 Subject: [PATCH 4/4] lints --- forc-pkg/Cargo.toml | 2 +- forc-pkg/src/manifest/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/forc-pkg/Cargo.toml b/forc-pkg/Cargo.toml index 84edd337ab3..551509fcd20 100644 --- a/forc-pkg/Cargo.toml +++ b/forc-pkg/Cargo.toml @@ -25,12 +25,12 @@ ipfs-api-backend-hyper = { workspace = true, features = ["with-builder"] } petgraph = { workspace = true, features = ["serde-1"] } reqwest.workspace = true scopeguard.workspace = true -sha2.workspace = true semver = { workspace = true, features = ["serde"] } serde = { workspace = true, features = ["derive"] } serde_ignored.workspace = true serde_json.workspace = true serde_with.workspace = true +sha2.workspace = true sway-core.workspace = true sway-error.workspace = true sway-features.workspace = true diff --git a/forc-pkg/src/manifest/mod.rs b/forc-pkg/src/manifest/mod.rs index 190b930a129..1ccad64a060 100644 --- a/forc-pkg/src/manifest/mod.rs +++ b/forc-pkg/src/manifest/mod.rs @@ -1304,7 +1304,7 @@ mod tests { // Create each member for (name, source_files) in members { let member_path = base_path.join(name); - fs::create_dir_all(&member_path.join("src"))?; + fs::create_dir_all(member_path.join("src"))?; // Create member Forc.toml let forc_toml = format!(