Skip to content

Commit bd85560

Browse files
committed
feat: add tree() and commit() merge support, en par with merge-ORT as far as tests go.
Note that this judgement of quality is based on a limited amount of partially complex test, but it's likely that in practice there will be deviations of sorts. Also, given the complexity of the implementation it is definitely under-tested, but with that it's mostly en par with Git, unfortunatly. On the bright side, some of the tests are very taxing and I'd hope this means something for real-world quality.
1 parent 4fdf350 commit bd85560

File tree

15 files changed

+3440
-20
lines changed

15 files changed

+3440
-20
lines changed

Cargo.lock

Lines changed: 8 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crate-status.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -338,14 +338,20 @@ Check out the [performance discussion][gix-diff-performance] as well.
338338

339339
### gix-merge
340340

341-
* [x] three-way merge analysis of **blobs** with choice of how to resolve conflicts
341+
* [x] three-way content-merge analysis of **blobs** with choice of how to resolve conflicts
342+
- [x] respect git attributes and drivers.
342343
- [ ] choose how to resolve conflicts on the data-structure
343-
- [ ] produce a new blob based on data-structure containing possible resolutions
344+
- [ ] more efficient handling of paths with `merge=binary` attributes (do not load them into memory)
345+
- [x] produce a new blob based on data-structure containing possible resolutions
344346
- [x] `merge` style
345347
- [x] `diff3` style
346348
- [x] `zdiff` style
349+
- [ ] various newlines-related options during the merge (see https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-ignore-space-change).
347350
- [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same)
348-
* [ ] diff-heuristics match Git perfectly
351+
* [x] **tree**-diff-heuristics match Git for its test-cases
352+
- [ ] a way to generate an index with stages
353+
- *currently the data it provides won't generate index entries, and possibly can't be used for it yet*
354+
- [ ] submodule merges (*right now they count as conflicts if they differ*)
349355
* [x] API documentation
350356
* [ ] Examples
351357

gix-merge/Cargo.toml

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,33 +15,36 @@ workspace = true
1515
doctest = false
1616

1717
[features]
18-
default = ["blob"]
19-
## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation.
20-
blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace", "dep:gix-quote"]
2118
## Data structures implement `serde::Serialize` and `serde::Deserialize`.
2219
serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"]
2320

2421
[dependencies]
2522
gix-hash = { version = "^0.15.0", path = "../gix-hash" }
2623
gix-object = { version = "^0.45.0", path = "../gix-object" }
27-
gix-filter = { version = "^0.14.0", path = "../gix-filter", optional = true }
28-
gix-worktree = { version = "^0.37.0", path = "../gix-worktree", default-features = false, features = ["attributes"], optional = true }
29-
gix-command = { version = "^0.3.10", path = "../gix-command", optional = true }
30-
gix-path = { version = "^0.10.12", path = "../gix-path", optional = true }
31-
gix-fs = { version = "^0.12.0", path = "../gix-fs", optional = true }
32-
gix-tempfile = { version = "^15.0.0", path = "../gix-tempfile", optional = true }
33-
gix-trace = { version = "^0.1.11", path = "../gix-trace", optional = true }
34-
gix-quote = { version = "^0.4.13", path = "../gix-quote", optional = true }
24+
gix-filter = { version = "^0.14.0", path = "../gix-filter" }
25+
gix-worktree = { version = "^0.37.0", path = "../gix-worktree", default-features = false, features = ["attributes"] }
26+
gix-command = { version = "^0.3.10", path = "../gix-command" }
27+
gix-path = { version = "^0.10.12", path = "../gix-path" }
28+
gix-fs = { version = "^0.12.0", path = "../gix-fs" }
29+
gix-tempfile = { version = "^15.0.0", path = "../gix-tempfile" }
30+
gix-trace = { version = "^0.1.11", path = "../gix-trace" }
31+
gix-quote = { version = "^0.4.13", path = "../gix-quote" }
32+
gix-revision = { version = "^0.30.0", path = "../gix-revision", default-features = false, features = ["merge_base"] }
33+
gix-revwalk = { version = "^0.16.0", path = "../gix-revwalk" }
34+
gix-diff = { version = "^0.47.0", path = "../gix-diff", default-features = false, features = ["blob"] }
3535

3636
thiserror = "1.0.63"
37-
imara-diff = { version = "0.1.7", optional = true }
37+
imara-diff = { version = "0.1.7" }
3838
bstr = { version = "1.5.0", default-features = false }
3939
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
4040

4141
document-features = { version = "0.2.0", optional = true }
4242

4343
[dev-dependencies]
4444
gix-testtools = { path = "../tests/tools" }
45+
gix-odb = { path = "../gix-odb" }
46+
gix-utils = { version = "^0.1.12", path = "../gix-utils" }
47+
termtree = "0.5.1"
4548
pretty_assertions = "1.4.0"
4649

4750
[package.metadata.docs.rs]

gix-merge/src/blob/platform/merge.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use std::path::PathBuf;
77
pub struct Options {
88
/// If `true`, the resources being merged are contained in a virtual ancestor,
99
/// which is the case when merge bases are merged into one.
10+
/// This flag affects the choice of merge drivers.
1011
pub is_virtual_ancestor: bool,
1112
/// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side.
1213
pub resolve_binary_with: Option<builtin_driver::binary::ResolveWith>,

gix-merge/src/commit.rs

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
/// The error returned by [`commit()`](crate::commit()).
2+
#[derive(Debug, thiserror::Error)]
3+
#[allow(missing_docs)]
4+
pub enum Error {
5+
#[error(transparent)]
6+
MergeBase(#[from] gix_revision::merge_base::Error),
7+
#[error(transparent)]
8+
MergeTree(#[from] crate::tree::Error),
9+
#[error("Failed to write tree for merged merge-base or virtual commit")]
10+
WriteObject(gix_object::write::Error),
11+
#[error("No common ancestor between {our_commit_id} and {their_commit_id}")]
12+
NoMergeBase {
13+
/// The commit on our side that was to be merged.
14+
our_commit_id: gix_hash::ObjectId,
15+
/// The commit on their side that was to be merged.
16+
their_commit_id: gix_hash::ObjectId,
17+
},
18+
#[error("Could not find ancestor, our or their commit to extract tree from")]
19+
FindCommit(#[from] gix_object::find::existing_object::Error),
20+
}
21+
22+
/// A way to configure [`commit()`](crate::commit()).
23+
#[derive(Default, Debug, Clone)]
24+
pub struct Options {
25+
/// If `true`, merging unrelated commits is allowed, with the merge-base being assumed as empty tree.
26+
pub allow_missing_merge_base: bool,
27+
/// Options to define how trees should be merged.
28+
pub tree_merge: crate::tree::Options,
29+
/// If `true`, do not merge multiple merge-bases into one. Instead, just use the first one.
30+
// TODO: test
31+
#[doc(alias = "no_recursive", alias = "git2")]
32+
pub use_first_merge_base: bool,
33+
}
34+
35+
/// The result of [`commit()`](crate::commit()).
36+
#[derive(Clone)]
37+
pub struct Outcome<'a> {
38+
/// The outcome of the actual tree-merge.
39+
pub tree_merge: crate::tree::Outcome<'a>,
40+
/// The tree id of the base commit we used. This is either…
41+
/// * the single merge-base we found
42+
/// * the first of multiple merge-bases if [`use_first_merge_base`](Options::use_first_merge_base) was `true`.
43+
/// * the merged tree of all merge-bases, which then isn't linked to an actual commit.
44+
/// * an empty tree, if [`allow_missing_merge_base`](Options::allow_missing_merge_base) is enabled.
45+
pub merge_base_tree_id: gix_hash::ObjectId,
46+
/// The object ids of all the commits which were found to be merge-bases, or `None` if there was no merge-base.
47+
pub merge_bases: Option<Vec<gix_hash::ObjectId>>,
48+
/// A list of virtual commits that were created to merge multiple merge-bases into one.
49+
/// As they are not reachable by anything they will be garbage collected, but knowing them provides options.
50+
pub virtual_merge_bases: Vec<gix_hash::ObjectId>,
51+
}
52+
53+
pub(super) mod function {
54+
use crate::commit::{Error, Options};
55+
use gix_object::FindExt;
56+
use std::borrow::Cow;
57+
58+
/// Like [`tree()`](crate::tree()), but it takes only two commits, `our_commit` and `their_commit` to automatically
59+
/// compute the merge-bases among them.
60+
/// If there are multiple merge bases, these will be auto-merged into one, recursively, if
61+
/// [`allow_missing_merge_base`](Options::allow_missing_merge_base) is `true`.
62+
///
63+
/// `labels` are names where [`current`](crate::blob::builtin_driver::text::Labels::current) is a name for `our_commit`
64+
/// and [`other`](crate::blob::builtin_driver::text::Labels::other) is a name for `their_commit`.
65+
/// If [`ancestor`](crate::blob::builtin_driver::text::Labels::ancestor) is unset, it will be set by us based on the
66+
/// merge-bases of `our_commit` and `their_commit`.
67+
///
68+
/// The `graph` is used to find the merge-base between `our_commit` and `their_commit`, and can also act as cache
69+
/// to speed up subsequent merge-base queries.
70+
///
71+
/// Use `abbreviate_hash(id)` to shorten the given `id` according to standard git shortening rules. It's used in case
72+
/// the ancestor-label isn't explicitly set so that the merge base label becomes the shortened `id`.
73+
/// Note that it's a dyn closure only to make it possible to recursively call this function in case of multiple merge-bases.
74+
///
75+
/// `write_object` is used only if it's allowed to merge multiple merge-bases into one, and if there
76+
/// are multiple merge bases, and to write merged buffers as blobs.
77+
///
78+
/// ### Performance
79+
///
80+
/// Note that `objects` *should* have an object cache to greatly accelerate tree-retrieval.
81+
#[allow(clippy::too_many_arguments)]
82+
pub fn commit<'objects>(
83+
our_commit: gix_hash::ObjectId,
84+
their_commit: gix_hash::ObjectId,
85+
labels: crate::blob::builtin_driver::text::Labels<'_>,
86+
graph: &mut gix_revwalk::Graph<'_, '_, gix_revwalk::graph::Commit<gix_revision::merge_base::Flags>>,
87+
diff_resource_cache: &mut gix_diff::blob::Platform,
88+
blob_merge: &mut crate::blob::Platform,
89+
objects: &'objects (impl gix_object::FindObjectOrHeader + gix_object::Write),
90+
abbreviate_hash: &mut dyn FnMut(&gix_hash::oid) -> String,
91+
options: Options,
92+
) -> Result<super::Outcome<'objects>, Error> {
93+
let merge_bases = gix_revision::merge_base(our_commit, &[their_commit], graph)?;
94+
let mut virtual_merge_bases = Vec::new();
95+
let mut state = gix_diff::tree::State::default();
96+
let mut commit_to_tree =
97+
|commit_id: gix_hash::ObjectId| objects.find_commit(&commit_id, &mut state.buf1).map(|c| c.tree());
98+
99+
let (merge_base_tree_id, ancestor_name): (_, Cow<'_, str>) = match merge_bases.clone() {
100+
Some(base_commit) if base_commit.len() == 1 => {
101+
(commit_to_tree(base_commit[0])?, abbreviate_hash(&base_commit[0]).into())
102+
}
103+
Some(mut base_commits) => {
104+
let virtual_base_tree = if options.use_first_merge_base {
105+
let first = *base_commits.first().expect("if Some() there is at least one.");
106+
commit_to_tree(first)?
107+
} else {
108+
let mut merged_commit_id = base_commits.pop().expect("at least one base");
109+
let mut options = options.clone();
110+
options.tree_merge.blob_merge.is_virtual_ancestor = true;
111+
let labels = crate::blob::builtin_driver::text::Labels {
112+
current: Some("Temporary merge branch 1".into()),
113+
other: Some("Temporary merge branch 2".into()),
114+
..labels
115+
};
116+
while let Some(next_commit_id) = base_commits.pop() {
117+
options.tree_merge.call_depth += 1;
118+
let mut out = commit(
119+
merged_commit_id,
120+
next_commit_id,
121+
labels,
122+
graph,
123+
diff_resource_cache,
124+
blob_merge,
125+
objects,
126+
abbreviate_hash,
127+
options.clone(),
128+
)?;
129+
let merged_tree_id = out
130+
.tree_merge
131+
.tree
132+
.write(|tree| objects.write(tree))
133+
.map_err(Error::WriteObject)?;
134+
135+
merged_commit_id =
136+
create_virtual_commit(objects, merged_commit_id, next_commit_id, merged_tree_id)?;
137+
138+
virtual_merge_bases.extend(out.virtual_merge_bases);
139+
virtual_merge_bases.push(merged_commit_id);
140+
}
141+
commit_to_tree(merged_commit_id)?
142+
};
143+
(virtual_base_tree, "merged common ancestors".into())
144+
}
145+
None => {
146+
if options.allow_missing_merge_base {
147+
(gix_hash::ObjectId::empty_tree(our_commit.kind()), "empty tree".into())
148+
} else {
149+
return Err(Error::NoMergeBase {
150+
our_commit_id: our_commit,
151+
their_commit_id: their_commit,
152+
});
153+
}
154+
}
155+
};
156+
157+
let mut labels = labels; // TODO(borrowchk): this re-assignment shouldn't be needed.
158+
if labels.ancestor.is_none() {
159+
labels.ancestor = Some(ancestor_name.as_ref().into());
160+
}
161+
162+
let our_tree_id = objects.find_commit(&our_commit, &mut state.buf1)?.tree();
163+
let their_tree_id = objects.find_commit(&their_commit, &mut state.buf1)?.tree();
164+
165+
let outcome = crate::tree(
166+
&merge_base_tree_id,
167+
&our_tree_id,
168+
&their_tree_id,
169+
labels,
170+
objects,
171+
|buf| objects.write_buf(gix_object::Kind::Blob, buf),
172+
&mut state,
173+
diff_resource_cache,
174+
blob_merge,
175+
options.tree_merge,
176+
)?;
177+
178+
Ok(super::Outcome {
179+
tree_merge: outcome,
180+
merge_bases,
181+
merge_base_tree_id,
182+
virtual_merge_bases,
183+
})
184+
}
185+
186+
fn create_virtual_commit(
187+
objects: &(impl gix_object::Find + gix_object::Write),
188+
parent_a: gix_hash::ObjectId,
189+
parent_b: gix_hash::ObjectId,
190+
tree_id: gix_hash::ObjectId,
191+
) -> Result<gix_hash::ObjectId, Error> {
192+
let mut buf = Vec::new();
193+
let mut commit: gix_object::Commit = objects.find_commit(&parent_a, &mut buf)?.into();
194+
commit.parents = vec![parent_a, parent_b].into();
195+
commit.tree = tree_id;
196+
objects.write(&commit).map_err(Error::WriteObject)
197+
}
198+
}

gix-merge/src/lib.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,10 @@
22
#![forbid(unsafe_code)]
33

44
///
5-
#[cfg(feature = "blob")]
65
pub mod blob;
6+
///
7+
pub mod commit;
8+
pub use commit::function::commit;
9+
///
10+
pub mod tree;
11+
pub use tree::function::tree;

0 commit comments

Comments
 (0)