From dda959973460be348b034636ab9a7e99674b10bf Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 30 Mar 2025 14:02:10 +0800 Subject: [PATCH 1/2] deduplicate `ToNormalPathComponent` logic --- gix-fs/src/stack.rs | 60 ++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/gix-fs/src/stack.rs b/gix-fs/src/stack.rs index da1f8457a13..2c4d1c0e3a1 100644 --- a/gix-fs/src/stack.rs +++ b/gix-fs/src/stack.rs @@ -5,14 +5,14 @@ use std::path::{Component, Path, PathBuf}; /// pub mod to_normal_path_components { - use std::ffi::OsString; + use std::path::PathBuf; /// The error used in [`ToNormalPathComponents::to_normal_path_components()`](super::ToNormalPathComponents::to_normal_path_components()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { - #[error("Input path \"{path}\" contains relative or absolute components", path = std::path::Path::new(.0.as_os_str()).display())] - NotANormalComponent(OsString), + #[error("Input path \"{path}\" contains relative or absolute components", path = .0.display())] + NotANormalComponent(PathBuf), #[error("Could not convert to UTF8 or from UTF8 due to ill-formed input")] IllegalUtf8, } @@ -26,54 +26,54 @@ pub trait ToNormalPathComponents { impl ToNormalPathComponents for &Path { fn to_normal_path_components(&self) -> impl Iterator> { - self.components().map(|component| match component { - Component::Normal(os_str) => Ok(os_str), - _ => Err(to_normal_path_components::Error::NotANormalComponent( - self.as_os_str().to_owned(), - )), - }) + self.components().map(|c| component_to_os_str(c, self)) } } impl ToNormalPathComponents for PathBuf { fn to_normal_path_components(&self) -> impl Iterator> { - self.components().map(|component| match component { - Component::Normal(os_str) => Ok(os_str), - _ => Err(to_normal_path_components::Error::NotANormalComponent( - self.as_os_str().to_owned(), - )), - }) + self.components().map(|c| component_to_os_str(c, self)) + } +} + +fn component_to_os_str<'a>( + component: Component<'a>, + path_with_component: &Path, +) -> Result<&'a OsStr, to_normal_path_components::Error> { + match component { + Component::Normal(os_str) => Ok(os_str), + _ => Err(to_normal_path_components::Error::NotANormalComponent( + path_with_component.to_owned(), + )), } } impl ToNormalPathComponents for &BStr { fn to_normal_path_components(&self) -> impl Iterator> { - self.split(|b| *b == b'/').filter(|c| !c.is_empty()).map(|component| { - gix_path::try_from_byte_slice(component.as_bstr()) - .map_err(|_| to_normal_path_components::Error::IllegalUtf8) - .map(Path::as_os_str) - }) + self.split(|b| *b == b'/').filter_map(bytes_component_to_os_str) } } impl ToNormalPathComponents for &str { fn to_normal_path_components(&self) -> impl Iterator> { - self.split('/').filter(|c| !c.is_empty()).map(|component| { - gix_path::try_from_byte_slice(component.as_bytes()) - .map_err(|_| to_normal_path_components::Error::IllegalUtf8) - .map(Path::as_os_str) - }) + self.split('/').filter_map(|c| bytes_component_to_os_str(c.as_bytes())) } } impl ToNormalPathComponents for &BString { fn to_normal_path_components(&self) -> impl Iterator> { - self.split(|b| *b == b'/').filter(|c| !c.is_empty()).map(|component| { - gix_path::try_from_byte_slice(component.as_bstr()) - .map_err(|_| to_normal_path_components::Error::IllegalUtf8) - .map(Path::as_os_str) - }) + self.split(|b| *b == b'/').filter_map(bytes_component_to_os_str) + } +} + +fn bytes_component_to_os_str(component: &[u8]) -> Option> { + if component.is_empty() { + return None; } + gix_path::try_from_byte_slice(component.as_bstr()) + .map_err(|_| to_normal_path_components::Error::IllegalUtf8) + .map(Path::as_os_str) + .into() } /// Access From c70d912fabfa7bbda185bc390fb3ea304173e5dc Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 30 Mar 2025 14:15:14 +0800 Subject: [PATCH 2/2] feat!: blob diff resources and outcomes now inform about the usage of text-conv filters. This is needed to prevent applications from treating such buffers like user data. --- gix-diff/src/blob/pipeline.rs | 18 ++++++--- gix-diff/src/blob/platform.rs | 48 +++++++++++++++++----- gix-diff/tests/diff/blob/pipeline.rs | 59 +++++++++++++++++----------- 3 files changed, 88 insertions(+), 37 deletions(-) diff --git a/gix-diff/src/blob/pipeline.rs b/gix-diff/src/blob/pipeline.rs index a4a02d80f37..f11a5147b58 100644 --- a/gix-diff/src/blob/pipeline.rs +++ b/gix-diff/src/blob/pipeline.rs @@ -42,7 +42,13 @@ impl WorktreeRoots { #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] pub enum Data { /// The data to use for diffing was written into the buffer that was passed during the call to [`Pipeline::convert_to_diffable()`]. - Buffer, + Buffer { + /// If `true`, a [binary to text filter](Driver::binary_to_text_command) was used to obtain the buffer, + /// making it a derived value. + /// + /// Applications should check for this to avoid treating the buffer content as (original) resource content. + is_derived: bool, + }, /// The size that the binary blob had at the given revision, without having applied filters, as it's either /// considered binary or above the big-file threshold. /// @@ -274,7 +280,7 @@ impl Pipeline { })?; target.map(|target| { out.extend_from_slice(gix_path::into_bstr(target).as_ref()); - Data::Buffer + Data::Buffer { is_derived: false } }) } else { let need_size_only = is_binary == Some(true); @@ -312,7 +318,7 @@ impl Pipeline { None } else { run_cmd(rela_path, cmd, out)?; - Some(Data::Buffer) + Some(Data::Buffer { is_derived: true }) } } None => { @@ -370,7 +376,7 @@ impl Pipeline { out.clear(); Data::Binary { size } } else { - Data::Buffer + Data::Buffer { is_derived: false } }) } None => None, @@ -403,6 +409,7 @@ impl Pipeline { .try_find(id, out) .map_err(gix_object::find::existing_object::Error::Find)? .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?; + let mut is_derived = false; if matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable) && convert == Mode::ToWorktreeAndBinaryToText || (convert == Mode::ToGitUnlessBinaryToTextIsPresent @@ -460,6 +467,7 @@ impl Pipeline { })?; out.clear(); run_cmd(rela_path, cmd, out)?; + is_derived = true; } None => match res { ToWorktreeOutcome::Unchanged(_) => {} @@ -490,7 +498,7 @@ impl Pipeline { out.clear(); Data::Binary { size } } else { - Data::Buffer + Data::Buffer { is_derived } } }; Some(data) diff --git a/gix-diff/src/blob/platform.rs b/gix-diff/src/blob/platform.rs index 8e67897ddd4..163f2678f71 100644 --- a/gix-diff/src/blob/platform.rs +++ b/gix-diff/src/blob/platform.rs @@ -101,7 +101,10 @@ pub mod resource { Resource { driver_index: value.conversion.driver_index, data: value.conversion.data.map_or(Data::Missing, |data| match data { - pipeline::Data::Buffer => Data::Buffer(&value.buffer), + pipeline::Data::Buffer { is_derived } => Data::Buffer { + buf: &value.buffer, + is_derived, + }, pipeline::Data::Binary { size } => Data::Binary { size }, }), mode: value.mode, @@ -137,7 +140,15 @@ pub mod resource { /// The object is missing, either because it didn't exist in the working tree or because its `id` was null. Missing, /// The textual data as processed to be in a diffable state. - Buffer(&'a [u8]), + Buffer { + /// The buffer bytes. + buf: &'a [u8], + /// If `true`, a [binary to text filter](super::super::Driver::binary_to_text_command) was used to obtain the buffer, + /// making it a derived value. + /// + /// Applications should check for this to avoid treating the buffer content as (original) resource content. + is_derived: bool, + }, /// The size that the binary blob had at the given revision, without having applied filters, as it's either /// considered binary or above the big-file threshold. /// @@ -156,10 +167,18 @@ pub mod resource { /// Return ourselves as slice of bytes if this instance stores data. pub fn as_slice(&self) -> Option<&'a [u8]> { match self { - Data::Buffer(d) => Some(d), + Data::Buffer { buf, .. } => Some(buf), Data::Binary { .. } | Data::Missing => None, } } + + /// Returns `true` if the data in this instance is derived. + pub fn is_derived(&self) -> bool { + match self { + Data::Missing | Data::Binary { .. } => false, + Data::Buffer { is_derived, .. } => *is_derived, + } + } } } @@ -194,9 +213,8 @@ pub mod set_resource { /// pub mod prepare_diff { - use bstr::BStr; - use crate::blob::platform::Resource; + use bstr::BStr; /// The kind of operation that should be performed based on the configuration of the resources involved in the diff. #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -234,6 +252,11 @@ pub mod prepare_diff { pub struct Outcome<'a> { /// The kind of diff that was actually performed. This may include skipping the internal diff as well. pub operation: Operation<'a>, + /// If `true`, a [binary to text filter](super::super::Driver::binary_to_text_command) was used to obtain the buffer + /// of `old` or `new`, making it a derived value. + /// + /// Applications should check for this to avoid treating the buffer content as (original) resource content. + pub old_or_new_is_derived: bool, /// The old or source of the diff operation. pub old: Resource<'a>, /// The new or destination of the diff operation. @@ -421,7 +444,7 @@ impl Platform { cmd.args(["/dev/null", ".", "."]); None } - resource::Data::Buffer(buf) => { + resource::Data::Buffer { buf, is_derived: _ } => { let mut tmp = gix_tempfile::new( std::env::temp_dir(), gix_tempfile::ContainingDirectory::Exists, @@ -524,10 +547,15 @@ impl Platform { .diff_cache .get(new_key) .ok_or(prepare_diff::Error::SourceOrDestinationUnset)?; - let mut out = prepare_diff::Outcome { - operation: prepare_diff::Operation::SourceOrDestinationIsBinary, - old: Resource::new(old_key, old), - new: Resource::new(new_key, new), + let mut out = { + let old = Resource::new(old_key, old); + let new = Resource::new(new_key, new); + prepare_diff::Outcome { + operation: prepare_diff::Operation::SourceOrDestinationIsBinary, + old_or_new_is_derived: old.data.is_derived() || new.data.is_derived(), + old, + new, + } }; match (old.conversion.data, new.conversion.data) { diff --git a/gix-diff/tests/diff/blob/pipeline.rs b/gix-diff/tests/diff/blob/pipeline.rs index 7bd3e7f567b..664e080bf3d 100644 --- a/gix-diff/tests/diff/blob/pipeline.rs +++ b/gix-diff/tests/diff/blob/pipeline.rs @@ -45,7 +45,7 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert!(out.driver_index.is_none(), "there was no driver"); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!(buf.as_bstr(), a_content, "there is no transformations configured"); let link_name = "link"; @@ -62,7 +62,7 @@ pub(crate) mod convert_to_diffable { )?; assert!(out.driver_index.is_none()); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!( buf.as_bstr(), a_name, @@ -86,7 +86,7 @@ pub(crate) mod convert_to_diffable { )?; assert!(out.driver_index.is_none(), "there was no driver"); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!(buf.as_bstr(), b_content, "there is no transformations configured"); } @@ -203,7 +203,7 @@ pub(crate) mod convert_to_diffable { assert!(out.driver_index.is_none()); assert_eq!( out.data, - Some(pipeline::Data::Buffer), + Some(pipeline::Data::Buffer { is_derived: false }), "links are always read and never considered large" ); assert_eq!(buf.as_bstr(), large_content); @@ -340,7 +340,7 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert!(out.driver_index.is_none(), "there was no driver"); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!( buf.as_bstr(), a_content, @@ -361,7 +361,7 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert!(out.driver_index.is_none(), "there was no driver"); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!( buf.as_bstr(), "a\nb", @@ -386,7 +386,7 @@ pub(crate) mod convert_to_diffable { )?; assert!(out.driver_index.is_none()); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!( buf.as_bstr(), link_content, @@ -411,7 +411,7 @@ pub(crate) mod convert_to_diffable { )?; assert!(out.driver_index.is_none(), "there was no driver"); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!(buf.as_bstr(), "b-content\r\n", "LF to CRLF by worktree filtering"); let mut db = ObjectDb::default(); @@ -429,7 +429,7 @@ pub(crate) mod convert_to_diffable { )?; assert!(out.driver_index.is_none(), "there was no driver"); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!(buf.as_bstr(), "b\n", "no filtering was performed at all"); Ok(()) @@ -524,11 +524,11 @@ pub(crate) mod convert_to_diffable { )?; assert_eq!(out.driver_index, Some(0)); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: true })); assert_eq!( buf.as_bstr(), "\0b", - "if binary-text-conversion is set, we don't care if it outputs zero bytes, let everything pass" + "if binary-text-conversion is set, we don't care if it outputs null-bytes, let everything pass" ); Ok(()) @@ -613,7 +613,12 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert_eq!(out.driver_index, Some(0)); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!( + out.data, + Some(pipeline::Data::Buffer { + is_derived: !matches!(mode, pipeline::Mode::ToGit) + }) + ); assert_eq!(buf.as_bstr(), "to-text\na\n", "filter was applied"); } @@ -630,7 +635,7 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert_eq!(out.driver_index, Some(0)); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!(buf.as_bstr(), "a\n", "unconditionally use git according to mode"); let id = db.insert("a\n"); @@ -648,7 +653,7 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert_eq!(out.driver_index, Some(0)); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: true })); assert_eq!(buf.as_bstr(), "to-text\na\n", "filter was applied"); } @@ -665,7 +670,7 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert_eq!(out.driver_index, Some(0)); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!( buf.as_bstr(), "a\n", @@ -723,7 +728,7 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert_eq!(out.driver_index, Some(4), "despite missing, we get driver information"); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!( buf.as_bstr(), "link-target", @@ -796,7 +801,12 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert_eq!(out.driver_index, Some(2)); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!( + out.data, + Some(pipeline::Data::Buffer { + is_derived: !matches!(mode, pipeline::Mode::ToGit) + }) + ); assert_eq!( buf.as_bstr(), "to-text\nc\n", @@ -819,7 +829,7 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert_eq!(out.driver_index, Some(2)); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: true })); assert_eq!( buf.as_bstr(), "to-text\nc\n", @@ -895,7 +905,12 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert_eq!(out.driver_index, Some(3)); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!( + out.data, + Some(pipeline::Data::Buffer { + is_derived: !matches!(mode, pipeline::Mode::ToGit) + }) + ); assert_eq!( buf.as_bstr(), "to-text\nd\n", @@ -915,7 +930,7 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert_eq!(out.driver_index, Some(3)); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: true })); assert_eq!( buf.as_bstr(), "to-text\nd-in-db", @@ -937,7 +952,7 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert_eq!(out.driver_index, None); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!( buf.as_bstr(), "e\n", @@ -958,7 +973,7 @@ pub(crate) mod convert_to_diffable { &mut buf, )?; assert_eq!(out.driver_index, None); - assert_eq!(out.data, Some(pipeline::Data::Buffer)); + assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!( buf.as_bstr(), "e-in-db",