diff --git a/Cargo.lock b/Cargo.lock index fc8ad7835c1..ea52a8fbffc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2187,6 +2187,7 @@ dependencies = [ "gix-hash 0.15.1", "gix-hashtable 0.6.0", "gix-odb", + "gix-path 0.10.13", "gix-testtools", "gix-utils 0.1.13", "gix-validate 0.9.2", diff --git a/gix-object/Cargo.toml b/gix-object/Cargo.toml index 8a7edb06fbb..4456b05ef53 100644 --- a/gix-object/Cargo.toml +++ b/gix-object/Cargo.toml @@ -50,6 +50,7 @@ gix-hashtable = { version = "^0.6.0", path = "../gix-hashtable" } gix-validate = { version = "^0.9.2", path = "../gix-validate" } gix-actor = { version = "^0.33.1", path = "../gix-actor" } gix-date = { version = "^0.9.2", path = "../gix-date" } +gix-path = { version = "^0.10.12", path = "../gix-path" } gix-utils = { version = "^0.1.13", path = "../gix-utils" } itoa = "1.0.1" diff --git a/gix-object/src/tree/ref_iter.rs b/gix-object/src/tree/ref_iter.rs index 8f9521e1b74..f13d9500383 100644 --- a/gix-object/src/tree/ref_iter.rs +++ b/gix-object/src/tree/ref_iter.rs @@ -8,6 +8,77 @@ impl<'a> TreeRefIter<'a> { pub fn from_bytes(data: &'a [u8]) -> TreeRefIter<'a> { TreeRefIter { data } } + + /// Follow a sequence of `path` components starting from this instance, and look them up in `odb` one by one using `buffer` + /// until the last component is looked up and its tree entry is returned. + /// + /// # Performance Notes + /// + /// Searching tree entries is currently done in sequence, which allows the search to be allocation free. It would be possible + /// to reuse a vector and use a binary search instead, which might be able to improve performance over all. + /// However, a benchmark should be created first to have some data and see which trade-off to choose here. + pub fn lookup_entry( + &self, + odb: impl crate::Find, + buffer: &'a mut Vec, + path: I, + ) -> Result, crate::find::Error> + where + I: IntoIterator, + P: PartialEq, + { + buffer.clear(); + + let mut path = path.into_iter().peekable(); + buffer.extend_from_slice(self.data); + while let Some(component) = path.next() { + match TreeRefIter::from_bytes(buffer) + .filter_map(Result::ok) + .find(|entry| component.eq(entry.filename)) + { + Some(entry) => { + if path.peek().is_none() { + return Ok(Some(entry.into())); + } else { + let next_id = entry.oid.to_owned(); + let obj = odb.try_find(&next_id, buffer)?; + let Some(obj) = obj else { return Ok(None) }; + if !obj.kind.is_tree() { + return Ok(None); + } + } + } + None => return Ok(None), + } + } + Ok(None) + } + + /// Like [`Self::lookup_entry()`], but takes any [`AsRef`](`std::path::Path`) directly via `relative_path`, + /// a path relative to this tree. + /// `odb` and `buffer` are used to lookup intermediate trees. + /// + /// # Note + /// + /// If any path component contains illformed UTF-8 and thus can't be converted to bytes on platforms which can't do so natively, + /// the returned component will be empty which makes the lookup fail. + pub fn lookup_entry_by_path( + &self, + odb: impl crate::Find, + buffer: &'a mut Vec, + relative_path: impl AsRef, + ) -> Result, crate::find::Error> { + use crate::bstr::ByteSlice; + self.lookup_entry( + odb, + buffer, + relative_path.as_ref().components().map(|c: std::path::Component<'_>| { + gix_path::os_str_into_bstr(c.as_os_str()) + .unwrap_or_else(|_| "".into()) + .as_bytes() + }), + ) + } } impl<'a> TreeRef<'a> { diff --git a/gix-object/tests/object/tree/iter.rs b/gix-object/tests/object/tree/iter.rs index 6ba637b2546..1f597499238 100644 --- a/gix-object/tests/object/tree/iter.rs +++ b/gix-object/tests/object/tree/iter.rs @@ -1,4 +1,9 @@ -use gix_object::{bstr::ByteSlice, tree, tree::EntryRef, TreeRefIter}; +use gix_object::{ + bstr::ByteSlice, + tree::{self, EntryRef}, + TreeRefIter, +}; +use pretty_assertions::assert_eq; use crate::{fixture_name, hex_to_id}; @@ -52,3 +57,74 @@ fn everything() -> crate::Result { ); Ok(()) } + +mod lookup_entry { + use crate::hex_to_id; + use gix_object::tree::EntryKind; + use utils::entry; + + #[test] + fn top_level_directory() -> crate::Result { + assert_eq!( + utils::lookup_entry_by_path("bin")?, + entry( + "bin", + EntryKind::Blob, + hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + ) + ); + Ok(()) + } + + #[test] + fn nested_file() -> crate::Result { + assert_eq!( + utils::lookup_entry_by_path("file/a")?, + entry( + "a", + EntryKind::Blob, + hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + ) + ); + Ok(()) + } + + #[test] + fn non_existing_nested_file() -> crate::Result { + for path in ["file/does-not-exist", "non-existing", "file/a/through-file"] { + let actual = utils::lookup_entry_by_path(path)?; + assert_eq!(actual, None); + } + Ok(()) + } + + mod utils { + use crate::hex_to_id; + + use gix_object::{tree, FindExt}; + + pub(super) fn entry(filename: &str, mode: tree::EntryKind, oid: gix_hash::ObjectId) -> Option { + Some(tree::Entry { + mode: mode.into(), + filename: filename.into(), + oid, + }) + } + + pub(super) fn tree_odb() -> gix_testtools::Result { + let root = gix_testtools::scripted_fixture_read_only("make_trees.sh")?; + Ok(gix_odb::at(root.join(".git/objects"))?) + } + + pub(super) fn lookup_entry_by_path(path: &str) -> gix_testtools::Result> { + let odb = tree_odb()?; + let root_tree_id = hex_to_id("ff7e7d2aecae1c3fb15054b289a4c58aa65b8646"); + + let mut buf = Vec::new(); + let root_tree = odb.find_tree_iter(&root_tree_id, &mut buf)?; + + let mut buf = Vec::new(); + root_tree.lookup_entry_by_path(&odb, &mut buf, path) + } + } +}