Skip to content

Commit c0cce2a

Browse files
committed
Add more of our own tests to trigger the remaining code
This also allows for deviations in case the Git result is worse.
1 parent 1a60c28 commit c0cce2a

File tree

10 files changed

+1452
-709
lines changed

10 files changed

+1452
-709
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crate-status.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,15 +338,20 @@ Check out the [performance discussion][gix-diff-performance] as well.
338338

339339
### gix-merge
340340

341-
* [x] three-way merge analysis of **blobs** with choice of how to resolve conflicts
341+
* [x] three-way content-merge analysis of **blobs** with choice of how to resolve conflicts
342+
- [x] respect git attributes and drivers.
342343
- [ ] choose how to resolve conflicts on the data-structure
344+
- [ ] more efficient handling of paths with `merge=binary` attributes (do not load them into memory)
343345
- [x] produce a new blob based on data-structure containing possible resolutions
344346
- [x] `merge` style
345347
- [x] `diff3` style
346348
- [x] `zdiff` style
347349
- [ ] various newlines-related options during the merge (see https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-ignore-space-change).
348350
- [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same)
349-
* [ ] diff-heuristics match Git perfectly
351+
* [x] **tree**-diff-heuristics match Git for its test-cases
352+
- [ ] a way to generate an index with stages
353+
- *currently the data it provides won't generate index entries, and possibly can't be used for it yet*
354+
- [ ] submodule merges (*right now they count as conflicts if they differ*)
350355
* [x] API documentation
351356
* [ ] Examples
352357

gix-merge/src/tree/function.rs

Lines changed: 409 additions & 183 deletions
Large diffs are not rendered by default.

gix-merge/src/tree/mod.rs

Lines changed: 72 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ pub enum Error {
2222
BlobMerge(#[from] crate::blob::platform::merge::Error),
2323
#[error("Failed to write merged blob content as blob to the object database")]
2424
WriteBlobToOdb(Box<dyn std::error::Error + Send + Sync + 'static>),
25+
#[error("The merge was performed, but the binary merge result couldn't be selected as it wasn't found")]
26+
MergeResourceNotFound,
2527
}
2628

2729
/// The outcome produced by [`tree()`](crate::tree()).
@@ -43,31 +45,23 @@ pub struct Outcome<'a> {
4345
pub failed_on_first_unresolved_conflict: bool,
4446
}
4547

48+
/// Determine what should be considered an unresolved conflict.
49+
///
50+
/// Note that no matter which variant, [conflicts](Conflict) with [resolution failure](`ResolutionFailure`)
51+
/// will always be unresolved.
52+
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
53+
pub enum UnresolvedConflict {
54+
/// Only consider content merges with conflict markers as unresolved.
55+
ConflictMarkers,
56+
/// Whenever there was any rename, or conflict markers, it is unresolved.
57+
Renames,
58+
}
59+
4660
impl Outcome<'_> {
4761
/// Return `true` if there is any conflict that would still need to be resolved as they would yield undesirable trees.
48-
/// Note that this interpretation of conflicts and their resolution, see
49-
/// [`has_unresolved_conflicts_strict`](Self::has_unresolved_conflicts_strict).
50-
pub fn has_unresolved_conflicts(&self) -> bool {
51-
self.conflicts.iter().any(|c| {
52-
c.resolution.is_err()
53-
|| c.content_merge().map_or(false, |info| {
54-
matches!(info.resolution, crate::blob::Resolution::Conflict)
55-
})
56-
})
57-
}
58-
59-
/// Return `true` only if there was any (even resolved) conflict in the tree structure, or if there are still conflict markers.
60-
pub fn has_unresolved_conflicts_strict(&self) -> bool {
61-
self.conflicts.iter().any(|c| match &c.resolution {
62-
Ok(success) => match success {
63-
Resolution::OursAddedTheirsAddedTypeMismatch { .. }
64-
| Resolution::OursModifiedTheirsRenamedAndChangedThenRename { .. } => true,
65-
Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { merged_blob } => {
66-
matches!(merged_blob.resolution, crate::blob::Resolution::Conflict)
67-
}
68-
},
69-
Err(_failure) => true,
70-
})
62+
/// This is based on `how` to determine what should be considered unresolved.
63+
pub fn has_unresolved_conflicts(&self, how: UnresolvedConflict) -> bool {
64+
function::is_unresolved(&self.conflicts, how)
7165
}
7266
}
7367

@@ -135,14 +129,19 @@ impl Conflict {
135129
pub fn content_merge(&self) -> Option<ContentMerge> {
136130
match &self.resolution {
137131
Ok(success) => match success {
138-
Resolution::OursAddedTheirsAddedTypeMismatch { .. } => None,
132+
Resolution::SourceLocationAffectedByRename { .. } => None,
139133
Resolution::OursModifiedTheirsRenamedAndChangedThenRename { merged_blob, .. } => *merged_blob,
140134
Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { merged_blob } => Some(*merged_blob),
141135
},
142136
Err(failure) => match failure {
143-
ResolutionFailure::OursModifiedTheirsDirectoryThenOursRenamed {
144-
renamed_path_to_modified_blob: _,
137+
ResolutionFailure::OursRenamedTheirsRenamedDifferently { merged_blob } => *merged_blob,
138+
ResolutionFailure::Unknown
139+
| ResolutionFailure::OursModifiedTheirsDeleted
140+
| ResolutionFailure::OursModifiedTheirsRenamedTypeMismatch
141+
| ResolutionFailure::OursModifiedTheirsDirectoryThenOursRenamed {
142+
renamed_unique_path_to_modified_blob: _,
145143
}
144+
| ResolutionFailure::OursAddedTheirsAddedTypeMismatch { .. }
146145
| ResolutionFailure::OursDeletedTheirsRenamed => None,
147146
},
148147
}
@@ -154,11 +153,20 @@ impl Conflict {
154153
/// Note that all resolutions are side-agnostic, so *ours* could also have been *theirs* and vice versa.
155154
#[derive(Debug, Clone)]
156155
pub enum Resolution {
156+
/// *ours* had a renamed directory and *theirs* made a change in the now renamed directory.
157+
/// We moved that change into its location.
158+
SourceLocationAffectedByRename {
159+
/// The repository-relative path to the location that the change ended up in after
160+
/// being affected by a renamed directory.
161+
final_location: BString,
162+
},
157163
/// *ours* was a modified blob and *theirs* renamed that blob.
158164
/// We moved the changed blob from *ours* to its new location, and merged it successfully.
159165
/// If this is a `copy`, the source of the copy was set to be the changed blob as well so both match.
160166
OursModifiedTheirsRenamedAndChangedThenRename {
161-
/// If not `None`, the content of the involved blob had to be merged.
167+
/// If one side added the executable bit, we always add it in the merged result.
168+
merged_mode: Option<gix_object::tree::EntryMode>,
169+
/// If `Some(…)`, the content of the involved blob had to be merged.
162170
merged_blob: Option<ContentMerge>,
163171
/// The repository relative path to the location the blob finally ended up in.
164172
/// It's `Some()` only if *they* rewrote the blob into a directory which *we* renamed on *our* side.
@@ -172,11 +180,38 @@ pub enum Resolution {
172180
/// The outcome of the content merge.
173181
merged_blob: ContentMerge,
174182
},
183+
}
184+
185+
/// Describes of a conflict involving *our* change and *their* failed to be resolved.
186+
#[derive(Debug, Clone)]
187+
pub enum ResolutionFailure {
188+
/// *ours* was renamed, but *theirs* was renamed differently. Both versions will be present in the tree,
189+
OursRenamedTheirsRenamedDifferently {
190+
/// If `Some(…)`, the content of the involved blob had to be merged.
191+
merged_blob: Option<ContentMerge>,
192+
},
193+
/// *ours* was modified, but *theirs* was turned into a directory, so *ours* was renamed to a non-conflicting path.
194+
OursModifiedTheirsDirectoryThenOursRenamed {
195+
/// The path at which `ours` can be found in the tree - it's in the same directory that it was in before.
196+
renamed_unique_path_to_modified_blob: BString,
197+
},
175198
/// *ours* was added (or renamed into place) with a different mode than theirs, e.g. blob and symlink, and we kept
199+
/// the symlink in its original location, renaming the other side to `their_unique_location`.
176200
OursAddedTheirsAddedTypeMismatch {
177-
/// The location at which *their* state was placed to resolve the name and type clash.
178-
their_final_location: BString,
201+
/// The location at which *their* state was placed to resolve the name and type clash, named to indicate
202+
/// where the entry is coming from.
203+
their_unique_location: BString,
179204
},
205+
/// *ours* was modified, and they renamed the same file, but there is also a non-mergable type-change.
206+
/// Here we keep both versions of the file.
207+
OursModifiedTheirsRenamedTypeMismatch,
208+
/// *ours* was deleted, but *theirs* was renamed.
209+
OursDeletedTheirsRenamed,
210+
/// *ours* was modified and *theirs* was deleted. We keep the modified one and ignore the deletion.
211+
OursModifiedTheirsDeleted,
212+
/// *ours* and *theirs* are in an untested state so it can't be handled yet, and is considered a conflict
213+
/// without adding our *or* their side to the resulting tree.
214+
Unknown,
180215
}
181216

182217
/// Information about a blob content merge for use in a [`Resolution`].
@@ -190,18 +225,6 @@ pub struct ContentMerge {
190225
pub resolution: crate::blob::Resolution,
191226
}
192227

193-
/// Describes of a conflict involving *our* change and *their* failed to be resolved.
194-
#[derive(Debug, Clone)]
195-
pub enum ResolutionFailure {
196-
/// *ours* was modified, but *theirs* was turned into a directory, so *ours* was renamed to a non-conflicting path.
197-
OursModifiedTheirsDirectoryThenOursRenamed {
198-
/// The path at which `ours` can be found in the tree - it's in the same directory that it was in before.
199-
renamed_path_to_modified_blob: BString,
200-
},
201-
/// *ours* was deleted, but *theirs* was renamed.
202-
OursDeletedTheirsRenamed,
203-
}
204-
205228
/// A way to configure [`tree()`](crate::tree()).
206229
#[derive(Default, Debug, Clone)]
207230
pub struct Options {
@@ -211,14 +234,19 @@ pub struct Options {
211234
pub blob_merge: crate::blob::platform::merge::Options,
212235
/// The context to use when invoking merge-drivers.
213236
pub blob_merge_command_ctx: gix_command::Context,
214-
/// If `true`, the first conflict will cause the entire
215-
pub fail_on_conflict: bool,
237+
/// If `Some(what-is-unresolved)`, the first unresolved conflict will cause the entire merge to stop.
238+
/// This is useful to see if there is any conflict, without performing the whole operation.
239+
// TODO: Maybe remove this if the cost of figuring out conflicts is so low - after all, the data structures
240+
// and initial diff is the expensive thing right now, which are always done upfront.
241+
// However, this could change once we know do everything during the traversal, which probably doesn't work
242+
// without caching stuff and is too complicated to actually do.
243+
pub fail_on_conflict: Option<UnresolvedConflict>,
216244
/// If greater than 0, each level indicates another merge-of-merge. This can be greater than
217245
/// 0 when merging merge-bases, which are merged like a pyramid.
218246
/// This value also affects the size of merge-conflict markers, to allow differentiating
219247
/// merge conflicts on each level.
220248
pub call_depth: u8,
221-
// TODO(Perf) add a flag to allow parallelizing the tree-diff itself.
249+
// TODO(Perf) add a flag to allow parallelizing the tree-diff itself once there was some profiling.
222250
}
223251

224252
pub(super) mod function;

gix-merge/src/tree/utils.rs

Lines changed: 55 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
//! ## About `debug_assert!()
2+
//!
3+
//! The idea is to have code that won't panic in production. Thus, if in production that assertion would fail,
4+
//! we will rather let the code run and hope it will either be correct enough or fail in more graceful ways later.
5+
//!
6+
//! Once such a case becomes a bug and is reproduced in testing, the debug-assertion will kick in and hopefully
7+
//! contribute to finding a fix faster.
18
use crate::blob::ResourceKind;
29
use crate::tree::{Conflict, ConflictMapping, Error, Options, Resolution, ResolutionFailure};
310
use bstr::ByteSlice;
@@ -6,27 +13,23 @@ use gix_diff::tree_with_rewrites::{Change, ChangeRef};
613
use gix_hash::ObjectId;
714
use gix_object::tree;
815
use gix_object::tree::EntryMode;
9-
use std::borrow::Cow;
1016
use std::collections::HashMap;
1117

1218
/// Assuming that `their_location` is the destination of *their* rewrite, check if *it* passes
1319
/// over a directory rewrite in *our* tree. If so, rewrite it so that we get the path
1420
/// it would have had if it had been renamed along with *our* directory.
15-
pub fn possibly_rewritten_location<'a>(
21+
pub fn possibly_rewritten_location(
1622
check_tree: &mut TreeNodes,
17-
their_location: &'a BStr,
23+
their_location: &BStr,
1824
our_changes: &ChangeListRef,
19-
) -> Cow<'a, BStr> {
20-
check_tree
21-
.check_conflict(their_location)
22-
.and_then(|pc| match pc {
23-
PossibleConflict::PassedRewrittenDirectory { change_idx } => {
24-
let passed_change = &our_changes[change_idx];
25-
rewrite_location_with_renamed_directory(their_location, &passed_change.inner).map(Cow::Owned)
26-
}
27-
_ => None,
28-
})
29-
.unwrap_or(Cow::Borrowed(their_location))
25+
) -> Option<BString> {
26+
check_tree.check_conflict(their_location).and_then(|pc| match pc {
27+
PossibleConflict::PassedRewrittenDirectory { change_idx } => {
28+
let passed_change = &our_changes[change_idx];
29+
rewrite_location_with_renamed_directory(their_location, &passed_change.inner)
30+
}
31+
_ => None,
32+
})
3033
}
3134

3235
pub fn rewrite_location_with_renamed_directory(their_location: &BStr, passed_change: &Change) -> Option<BString> {
@@ -140,9 +143,11 @@ where
140143
};
141144
let prep = blob_merge.prepare_merge(objects, with_extra_markers(options, extra_markers))?;
142145
let (pick, resolution) = prep.merge(buf, labels, &options.blob_merge_command_ctx)?;
143-
// TODO: properly handle binary/other buffers. This API is troublesome, fix it
144-
let merged_content = prep.buffer_by_pick(pick).unwrap_or(buf);
145-
let merged_blob_id = write_blob_to_odb(merged_content).map_err(|err| Error::WriteBlobToOdb(err.into()))?;
146+
147+
let merged_blob_id = prep
148+
.id_by_pick(pick, buf, write_blob_to_odb)
149+
.map_err(|err| Error::WriteBlobToOdb(err.into()))?
150+
.ok_or(Error::MergeResourceNotFound)?;
146151
Ok((merged_blob_id, resolution))
147152
}
148153

@@ -161,10 +166,17 @@ pub struct TrackedChange {
161166
pub inner: Change,
162167
/// If `true`, this change counts as written to the tree using a [`tree::Editor`].
163168
pub was_written: bool,
164-
/// If `true`, this change must be placed into the tree before handling it.
165-
/// This makes sure that new changes aren't visible too early, which would mean the algorihtm
169+
/// If `Some(ours_idx_to_ignore)`, this change must be placed into the tree before handling it.
170+
/// This makes sure that new changes aren't visible too early, which would mean the algorithm
166171
/// knows things too early which can be misleading.
167-
pub needs_tree_insertion: bool,
172+
/// The `ours_idx_to_ignore` assures that the same rewrite won't be used as matching side, which
173+
/// would lead to strange effects. Only set if it's a rewrite though.
174+
pub needs_tree_insertion: Option<Option<usize>>,
175+
/// A new `(location, change_idx)` pair for the change that can happen if the location is touching a rewrite in a parent
176+
/// directory, but otherwise doesn't have a match. This means we shall redo the operation but with
177+
/// the changed path.
178+
/// The second tuple entry `change_idx` is the change-idx we passed over, which refers to the other side that interfered.
179+
pub rewritten_location: Option<(BString, usize)>,
168180
}
169181

170182
pub type ChangeList = Vec<TrackedChange>;
@@ -179,6 +191,7 @@ pub fn track(change: ChangeRef<'_>, changes: &mut ChangeList) {
179191
if change.entry_mode().is_tree() && matches!(change, ChangeRef::Modification { .. }) {
180192
return;
181193
}
194+
let is_tree = change.entry_mode().is_tree();
182195
changes.push(TrackedChange {
183196
inner: match change.into_owned() {
184197
Change::Rewrite {
@@ -196,22 +209,24 @@ pub fn track(change: ChangeRef<'_>, changes: &mut ChangeList) {
196209
},
197210
other => other,
198211
},
199-
was_written: false,
200-
needs_tree_insertion: false,
212+
was_written: is_tree,
213+
needs_tree_insertion: None,
214+
rewritten_location: None,
201215
});
202216
}
203217

204218
/// Unconditionally apply `change` to `editor`.
205-
pub fn apply_change(editor: &mut tree::Editor<'_>, change: &Change) -> Result<(), tree::editor::Error> {
219+
pub fn apply_change(
220+
editor: &mut tree::Editor<'_>,
221+
change: &Change,
222+
alternative_location: Option<&BString>,
223+
) -> Result<(), tree::editor::Error> {
206224
use to_components_bstring_ref as to_components;
207-
// TODO(performance): we could apply tree changes if they are renames-by-identity, and then save the
208-
// work needed to set all the children recursively. This would require more elaborate
209-
// tracking though.
210225
if change.entry_mode().is_tree() {
211226
return Ok(());
212227
}
213228

214-
match change {
229+
let (location, mode, id) = match change {
215230
Change::Addition {
216231
location,
217232
entry_mode,
@@ -223,8 +238,11 @@ pub fn apply_change(editor: &mut tree::Editor<'_>, change: &Change) -> Result<()
223238
entry_mode,
224239
id,
225240
..
226-
} => editor.upsert(to_components(location), entry_mode.kind(), *id)?,
227-
Change::Deletion { location, .. } => editor.remove(to_components(location))?,
241+
} => (location, entry_mode, id),
242+
Change::Deletion { location, .. } => {
243+
editor.remove(to_components(alternative_location.unwrap_or(location)))?;
244+
return Ok(());
245+
}
228246
Change::Rewrite {
229247
source_location,
230248
entry_mode,
@@ -236,17 +254,21 @@ pub fn apply_change(editor: &mut tree::Editor<'_>, change: &Change) -> Result<()
236254
if !*copy {
237255
editor.remove(to_components(source_location))?;
238256
}
239-
editor.upsert(to_components(location), entry_mode.kind(), *id)?
257+
(location, entry_mode, id)
240258
}
241259
};
260+
261+
editor.upsert(
262+
to_components(alternative_location.unwrap_or(location)),
263+
mode.kind(),
264+
*id,
265+
)?;
242266
Ok(())
243267
}
244268

245269
/// A potential conflict that needs to be checked. It comes in several varieties and always happens
246270
/// if paths overlap in some way between *theirs* and *ours*.
247271
#[derive(Debug)]
248-
// TODO: remove this when all fields are used.
249-
#[allow(dead_code)]
250272
pub enum PossibleConflict {
251273
/// *our* changes have a tree here, but *they* place a non-tree or edit an existing item (that we removed).
252274
TreeToNonTree {
@@ -270,7 +292,7 @@ pub enum PossibleConflict {
270292
}
271293

272294
impl PossibleConflict {
273-
fn change_idx(&self) -> Option<usize> {
295+
pub(super) fn change_idx(&self) -> Option<usize> {
274296
match self {
275297
PossibleConflict::TreeToNonTree { change_idx, .. } | PossibleConflict::NonTreeToTree { change_idx, .. } => {
276298
*change_idx
@@ -483,7 +505,6 @@ impl TreeNodes {
483505
}
484506
}
485507

486-
// TODO: This overwrites nodes, possibly rewrites, is that OK or can there be issues later?
487508
debug_assert!(
488509
!matches!(new_change, Change::Rewrite { .. }),
489510
"BUG: we thought we wouldn't do that current.location is related?"
1.29 MB
Binary file not shown.

0 commit comments

Comments
 (0)