Skip to content

Commit a4c687d

Browse files
committed
feat: provide facilities to perform rewrite-tracking for tree-diffs in plumbing.
This effectively pulls down a higher-level implementation in `gix` to the plumbing level, to allow it to be used there as well.
1 parent 2b7b1d0 commit a4c687d

File tree

10 files changed

+1849
-1
lines changed

10 files changed

+1849
-1
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-diff/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ autotests = false
1515
[features]
1616
default = ["blob"]
1717
## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation.
18-
blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace"]
18+
blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace", "dep:gix-traverse"]
1919
## Data structures implement `serde::Serialize` and `serde::Deserialize`.
2020
serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"]
2121
## Make it possible to compile to the `wasm32-unknown-unknown` target.
@@ -34,6 +34,7 @@ gix-path = { version = "^0.10.11", path = "../gix-path", optional = true }
3434
gix-fs = { version = "^0.11.3", path = "../gix-fs", optional = true }
3535
gix-tempfile = { version = "^14.0.0", path = "../gix-tempfile", optional = true }
3636
gix-trace = { version = "^0.1.10", path = "../gix-trace", optional = true }
37+
gix-traverse = { version = "^0.41.0", path = "../gix-traverse", optional = true }
3738

3839
thiserror = "1.0.32"
3940
imara-diff = { version = "0.1.7", optional = true }

gix-diff/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ pub mod rewrites;
4747
pub mod tree;
4848
pub use tree::function::diff as tree;
4949

50+
///
51+
#[cfg(feature = "blob")]
52+
pub mod tree_with_rewrites;
53+
#[cfg(feature = "blob")]
54+
pub use tree_with_rewrites::function::diff as tree_with_rewrites;
55+
5056
///
5157
#[cfg(feature = "blob")]
5258
pub mod blob;

gix-diff/src/tree_with_rewrites/change.rs

Lines changed: 467 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
use bstr::BStr;
2+
use gix_object::TreeRefIter;
3+
4+
use super::{Action, ChangeRef, Error, Options};
5+
use crate::rewrites;
6+
use crate::rewrites::tracker;
7+
8+
/// Call `for_each` repeatedly with all changes that are needed to convert `lhs` to `rhs`.
9+
/// Provide a `resource_cache` to speed up obtaining blobs for similarity checks.
10+
/// `tree_diff_state` can be used to re-use tree-diff memory between calls.
11+
/// `objects` are used to lookup trees while performing the diff.
12+
/// Use `options` to further configure how the rename tracking is performed.
13+
///
14+
/// Reusing `resource_cache` between multiple invocations saves a lot of IOps as it avoids the creation
15+
/// of a temporary `resource_cache` that triggers reading or checking for multiple gitattribute files.
16+
/// Note that it's recommended to call [`clear_resource_cache()`](`crate::blob::Platform::clear_resource_cache()`)
17+
/// between the calls to avoid runaway memory usage, as the cache isn't limited.
18+
///
19+
/// Note that to do rename tracking like `git` does, one has to configure the `resource_cache` with
20+
/// a conversion pipeline that uses [`crate::blob::pipeline::Mode::ToGit`].
21+
///
22+
/// `rhs` or `lhs` can be empty to indicate deletion or addition of an entire tree.
23+
///
24+
/// Note that the rewrite outcome is only available if [rewrite-tracking was enabled](Options::rewrites).
25+
pub fn diff<E>(
26+
lhs: TreeRefIter<'_>,
27+
rhs: TreeRefIter<'_>,
28+
resource_cache: &mut crate::blob::Platform,
29+
tree_diff_state: &mut crate::tree::State,
30+
objects: &impl gix_object::FindObjectOrHeader,
31+
for_each: impl FnMut(ChangeRef<'_>) -> Result<Action, E>,
32+
options: Options,
33+
) -> Result<Option<rewrites::Outcome>, Error>
34+
where
35+
E: Into<Box<dyn std::error::Error + Sync + Send + 'static>>,
36+
{
37+
let mut delegate = Delegate {
38+
src_tree: lhs,
39+
recorder: crate::tree::Recorder::default().track_location(options.location),
40+
visit: for_each,
41+
location: options.location,
42+
objects,
43+
tracked: options.rewrites.map(rewrites::Tracker::new),
44+
err: None,
45+
};
46+
match crate::tree(lhs, rhs, tree_diff_state, objects, &mut delegate) {
47+
Ok(()) => {
48+
let outcome = delegate.process_tracked_changes(resource_cache)?;
49+
match delegate.err {
50+
Some(err) => Err(Error::ForEach(err.into())),
51+
None => Ok(outcome),
52+
}
53+
}
54+
Err(crate::tree::Error::Cancelled) => delegate
55+
.err
56+
.map_or(Err(Error::Diff(crate::tree::Error::Cancelled)), |err| {
57+
Err(Error::ForEach(err.into()))
58+
}),
59+
Err(err) => Err(err.into()),
60+
}
61+
}
62+
63+
struct Delegate<'a, 'old, VisitFn, E, Objects> {
64+
src_tree: TreeRefIter<'old>,
65+
recorder: crate::tree::Recorder,
66+
objects: &'a Objects,
67+
visit: VisitFn,
68+
tracked: Option<rewrites::Tracker<crate::tree::visit::Change>>,
69+
location: Option<crate::tree::recorder::Location>,
70+
err: Option<E>,
71+
}
72+
73+
impl<VisitFn, E, Objects> Delegate<'_, '_, VisitFn, E, Objects>
74+
where
75+
Objects: gix_object::FindObjectOrHeader,
76+
VisitFn: for<'delegate> FnMut(ChangeRef<'_>) -> Result<Action, E>,
77+
E: Into<Box<dyn std::error::Error + Sync + Send + 'static>>,
78+
{
79+
/// Call `visit` on an attached version of `change`.
80+
fn emit_change(
81+
change: crate::tree::visit::Change,
82+
location: &BStr,
83+
visit: &mut VisitFn,
84+
stored_err: &mut Option<E>,
85+
) -> crate::tree::visit::Action {
86+
use crate::tree::visit::Change::*;
87+
let change = match change {
88+
Addition {
89+
entry_mode,
90+
oid,
91+
relation,
92+
} => ChangeRef::Addition {
93+
location,
94+
relation,
95+
entry_mode,
96+
id: oid,
97+
},
98+
Deletion {
99+
entry_mode,
100+
oid,
101+
relation,
102+
} => ChangeRef::Deletion {
103+
entry_mode,
104+
location,
105+
relation,
106+
id: oid,
107+
},
108+
Modification {
109+
previous_entry_mode,
110+
previous_oid,
111+
entry_mode,
112+
oid,
113+
} => ChangeRef::Modification {
114+
location,
115+
previous_entry_mode,
116+
entry_mode,
117+
previous_id: previous_oid,
118+
id: oid,
119+
},
120+
};
121+
match visit(change) {
122+
Ok(Action::Cancel) => crate::tree::visit::Action::Cancel,
123+
Ok(Action::Continue) => crate::tree::visit::Action::Continue,
124+
Err(err) => {
125+
*stored_err = Some(err);
126+
crate::tree::visit::Action::Cancel
127+
}
128+
}
129+
}
130+
131+
fn process_tracked_changes(
132+
&mut self,
133+
diff_cache: &mut crate::blob::Platform,
134+
) -> Result<Option<rewrites::Outcome>, Error> {
135+
use crate::rewrites::tracker::Change as _;
136+
let tracked = match self.tracked.as_mut() {
137+
Some(t) => t,
138+
None => return Ok(None),
139+
};
140+
141+
let outcome = tracked.emit(
142+
|dest, source| match source {
143+
Some(source) => {
144+
let (oid, mode) = dest.change.oid_and_entry_mode();
145+
let change = ChangeRef::Rewrite {
146+
source_location: source.location,
147+
source_entry_mode: source.entry_mode,
148+
source_id: source.id,
149+
source_relation: source.change.relation(),
150+
entry_mode: mode,
151+
id: oid.to_owned(),
152+
relation: dest.change.relation(),
153+
diff: source.diff,
154+
location: dest.location,
155+
copy: match source.kind {
156+
tracker::visit::SourceKind::Rename => false,
157+
tracker::visit::SourceKind::Copy => true,
158+
},
159+
};
160+
match (self.visit)(change) {
161+
Ok(Action::Cancel) => crate::tree::visit::Action::Cancel,
162+
Ok(Action::Continue) => crate::tree::visit::Action::Continue,
163+
Err(err) => {
164+
self.err = Some(err);
165+
crate::tree::visit::Action::Cancel
166+
}
167+
}
168+
}
169+
None => Self::emit_change(dest.change, dest.location, &mut self.visit, &mut self.err),
170+
},
171+
diff_cache,
172+
self.objects,
173+
|push| {
174+
let mut delegate = tree_to_changes::Delegate::new(push, self.location);
175+
let state = gix_traverse::tree::breadthfirst::State::default();
176+
gix_traverse::tree::breadthfirst(self.src_tree, state, self.objects, &mut delegate)
177+
},
178+
)?;
179+
Ok(Some(outcome))
180+
}
181+
}
182+
183+
impl<VisitFn, E, Objects> crate::tree::Visit for Delegate<'_, '_, VisitFn, E, Objects>
184+
where
185+
Objects: gix_object::FindObjectOrHeader,
186+
VisitFn: for<'delegate> FnMut(ChangeRef<'_>) -> Result<Action, E>,
187+
E: Into<Box<dyn std::error::Error + Sync + Send + 'static>>,
188+
{
189+
fn pop_front_tracked_path_and_set_current(&mut self) {
190+
self.recorder.pop_front_tracked_path_and_set_current();
191+
}
192+
193+
fn push_back_tracked_path_component(&mut self, component: &BStr) {
194+
self.recorder.push_back_tracked_path_component(component);
195+
}
196+
197+
fn push_path_component(&mut self, component: &BStr) {
198+
self.recorder.push_path_component(component);
199+
}
200+
201+
fn pop_path_component(&mut self) {
202+
self.recorder.pop_path_component();
203+
}
204+
205+
fn visit(&mut self, change: crate::tree::visit::Change) -> crate::tree::visit::Action {
206+
match self.tracked.as_mut() {
207+
Some(tracked) => tracked
208+
.try_push_change(change, self.recorder.path())
209+
.map_or(crate::tree::visit::Action::Continue, |change| {
210+
Self::emit_change(change, self.recorder.path(), &mut self.visit, &mut self.err)
211+
}),
212+
None => Self::emit_change(change, self.recorder.path(), &mut self.visit, &mut self.err),
213+
}
214+
}
215+
}
216+
217+
mod tree_to_changes {
218+
use crate::tree::visit::Change;
219+
use gix_object::tree::EntryRef;
220+
221+
use bstr::BStr;
222+
223+
pub struct Delegate<'a> {
224+
push: &'a mut dyn FnMut(Change, &BStr),
225+
recorder: gix_traverse::tree::Recorder,
226+
}
227+
228+
impl<'a> Delegate<'a> {
229+
pub fn new(push: &'a mut dyn FnMut(Change, &BStr), location: Option<crate::tree::recorder::Location>) -> Self {
230+
let location = location.map(|t| match t {
231+
crate::tree::recorder::Location::FileName => gix_traverse::tree::recorder::Location::FileName,
232+
crate::tree::recorder::Location::Path => gix_traverse::tree::recorder::Location::Path,
233+
});
234+
Self {
235+
push,
236+
recorder: gix_traverse::tree::Recorder::default().track_location(location),
237+
}
238+
}
239+
}
240+
241+
impl gix_traverse::tree::Visit for Delegate<'_> {
242+
fn pop_front_tracked_path_and_set_current(&mut self) {
243+
self.recorder.pop_front_tracked_path_and_set_current();
244+
}
245+
246+
fn push_back_tracked_path_component(&mut self, component: &BStr) {
247+
self.recorder.push_back_tracked_path_component(component);
248+
}
249+
250+
fn push_path_component(&mut self, component: &BStr) {
251+
self.recorder.push_path_component(component);
252+
}
253+
254+
fn pop_path_component(&mut self) {
255+
self.recorder.pop_path_component();
256+
}
257+
258+
fn visit_tree(&mut self, _entry: &EntryRef<'_>) -> gix_traverse::tree::visit::Action {
259+
gix_traverse::tree::visit::Action::Continue
260+
}
261+
262+
fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> gix_traverse::tree::visit::Action {
263+
if entry.mode.is_blob() {
264+
(self.push)(
265+
Change::Modification {
266+
previous_entry_mode: entry.mode,
267+
previous_oid: gix_hash::ObjectId::null(entry.oid.kind()),
268+
entry_mode: entry.mode,
269+
oid: entry.oid.to_owned(),
270+
},
271+
self.recorder.path(),
272+
);
273+
}
274+
gix_traverse::tree::visit::Action::Continue
275+
}
276+
}
277+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
use crate::tree::recorder::Location;
2+
use crate::Rewrites;
3+
4+
mod change;
5+
pub use change::{Change, ChangeRef};
6+
7+
/// The error returned by [`tree_with_rewrites()`](super::tree_with_rewrites()).
8+
#[derive(Debug, thiserror::Error)]
9+
#[allow(missing_docs)]
10+
pub enum Error {
11+
#[error(transparent)]
12+
Diff(#[from] crate::tree::Error),
13+
#[error("The user-provided callback failed")]
14+
ForEach(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
15+
#[error("Failure during rename tracking")]
16+
RenameTracking(#[from] crate::rewrites::tracker::emit::Error),
17+
}
18+
19+
/// Returned by the [`tree_with_rewrites()`](super::tree_with_rewrites()) function to control flow.
20+
#[derive(Default, Clone, Copy, PartialOrd, PartialEq, Ord, Eq, Hash)]
21+
pub enum Action {
22+
/// Continue the traversal of changes.
23+
#[default]
24+
Continue,
25+
/// Stop the traversal of changes and stop calling the function that returned it.
26+
Cancel,
27+
}
28+
29+
/// Options for use in [`tree_with_rewrites()`](super::tree_with_rewrites()).
30+
#[derive(Default, Clone, Debug)]
31+
pub struct Options {
32+
/// Determine how locations of changes, i.e. their repository-relative path, should be tracked.
33+
/// If `None`, locations will always be empty.
34+
pub location: Option<Location>,
35+
/// If not `None`, rename tracking will be performed accordingly.
36+
pub rewrites: Option<Rewrites>,
37+
}
38+
39+
pub(super) mod function;

gix-diff/tests/diff/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ fn hex_to_id(hex: &str) -> gix_hash::ObjectId {
77
mod blob;
88
mod rewrites;
99
mod tree;
10+
mod tree_with_rewrites;
1011

1112
mod util {
1213
use std::collections::HashMap;

0 commit comments

Comments
 (0)