Skip to content

Commit efec046

Browse files
committed
Add more of our own tests to trigger the remaining code
This also allows for deviations in case the Git result is worse.
1 parent 290c797 commit efec046

File tree

7 files changed

+542
-157
lines changed

7 files changed

+542
-157
lines changed

crate-status.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,15 +338,16 @@ Check out the [performance discussion][gix-diff-performance] as well.
338338

339339
### gix-merge
340340

341-
* [x] three-way merge analysis of **blobs** with choice of how to resolve conflicts
341+
* [x] three-way content-merge analysis of **blobs** with choice of how to resolve conflicts
342342
- [ ] choose how to resolve conflicts on the data-structure
343343
- [x] produce a new blob based on data-structure containing possible resolutions
344344
- [x] `merge` style
345345
- [x] `diff3` style
346346
- [x] `zdiff` style
347347
- [ ] various newlines-related options during the merge (see https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-ignore-space-change).
348348
- [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same)
349-
* [ ] diff-heuristics match Git perfectly
349+
* [x] **tree**-diff-heuristics match Git for its test-cases
350+
- [ ] index-correctness (currently the data it provides won't generate index entries, and possibly can't be used for it)
350351
* [x] API documentation
351352
* [ ] Examples
352353

gix-merge/src/tree/function.rs

Lines changed: 196 additions & 67 deletions
Large diffs are not rendered by default.

gix-merge/src/tree/mod.rs

Lines changed: 57 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -43,31 +43,25 @@ pub struct Outcome<'a> {
4343
pub failed_on_first_unresolved_conflict: bool,
4444
}
4545

46+
/// Determine what should be considered an unresolved conflict.
47+
///
48+
/// Note that no matter which variant, [conflicts](Conflict) with [resolution failure](`ResolutionFailure`)
49+
/// will always be unresolved.
50+
///
51+
/// However, those whose resolution was
52+
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
53+
pub enum UnresolvedConflict {
54+
/// Only consider content merges with conflict markers as unresolved.
55+
ConflictMarkers,
56+
/// Whenever there was any rename, or conflict markers, it is unresolved.
57+
Renames,
58+
}
59+
4660
impl Outcome<'_> {
4761
/// Return `true` if there is any conflict that would still need to be resolved as they would yield undesirable trees.
48-
/// Note that this interpretation of conflicts and their resolution, see
49-
/// [`has_unresolved_conflicts_strict`](Self::has_unresolved_conflicts_strict).
50-
pub fn has_unresolved_conflicts(&self) -> bool {
51-
self.conflicts.iter().any(|c| {
52-
c.resolution.is_err()
53-
|| c.content_merge().map_or(false, |info| {
54-
matches!(info.resolution, crate::blob::Resolution::Conflict)
55-
})
56-
})
57-
}
58-
59-
/// Return `true` only if there was any (even resolved) conflict in the tree structure, or if there are still conflict markers.
60-
pub fn has_unresolved_conflicts_strict(&self) -> bool {
61-
self.conflicts.iter().any(|c| match &c.resolution {
62-
Ok(success) => match success {
63-
Resolution::OursAddedTheirsAddedTypeMismatch { .. }
64-
| Resolution::OursModifiedTheirsRenamedAndChangedThenRename { .. } => true,
65-
Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { merged_blob } => {
66-
matches!(merged_blob.resolution, crate::blob::Resolution::Conflict)
67-
}
68-
},
69-
Err(_failure) => true,
70-
})
62+
/// This is based on `how` to determine what should be considered unresolved.
63+
pub fn has_unresolved_conflicts(&self, how: UnresolvedConflict) -> bool {
64+
function::is_unresolved(&self.conflicts, how)
7165
}
7266
}
7367

@@ -135,14 +129,16 @@ impl Conflict {
135129
pub fn content_merge(&self) -> Option<ContentMerge> {
136130
match &self.resolution {
137131
Ok(success) => match success {
138-
Resolution::OursAddedTheirsAddedTypeMismatch { .. } => None,
132+
Resolution::SourceLocationAffectedByRename { .. } => None,
139133
Resolution::OursModifiedTheirsRenamedAndChangedThenRename { merged_blob, .. } => *merged_blob,
140134
Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { merged_blob } => Some(*merged_blob),
141135
},
142136
Err(failure) => match failure {
137+
ResolutionFailure::OursRenamedTheirsRenamedDifferently { merged_blob } => *merged_blob,
143138
ResolutionFailure::OursModifiedTheirsDirectoryThenOursRenamed {
144-
renamed_path_to_modified_blob: _,
139+
renamed_unique_path_to_modified_blob: _,
145140
}
141+
| ResolutionFailure::OursAddedTheirsAddedTypeMismatch { .. }
146142
| ResolutionFailure::OursDeletedTheirsRenamed => None,
147143
},
148144
}
@@ -154,11 +150,18 @@ impl Conflict {
154150
/// Note that all resolutions are side-agnostic, so *ours* could also have been *theirs* and vice versa.
155151
#[derive(Debug, Clone)]
156152
pub enum Resolution {
153+
/// *ours* had a renamed directory and *theirs* made a change in the now renamed directory.
154+
/// We moved that change into its location.
155+
SourceLocationAffectedByRename {
156+
/// The repository-relative path to the location that the change ended up in after
157+
/// being affected by a renamed directory.
158+
final_location: BString,
159+
},
157160
/// *ours* was a modified blob and *theirs* renamed that blob.
158161
/// We moved the changed blob from *ours* to its new location, and merged it successfully.
159162
/// If this is a `copy`, the source of the copy was set to be the changed blob as well so both match.
160163
OursModifiedTheirsRenamedAndChangedThenRename {
161-
/// If not `None`, the content of the involved blob had to be merged.
164+
/// If `Some(…)`, the content of the involved blob had to be merged.
162165
merged_blob: Option<ContentMerge>,
163166
/// The repository relative path to the location the blob finally ended up in.
164167
/// It's `Some()` only if *they* rewrote the blob into a directory which *we* renamed on *our* side.
@@ -172,11 +175,30 @@ pub enum Resolution {
172175
/// The outcome of the content merge.
173176
merged_blob: ContentMerge,
174177
},
178+
}
179+
180+
/// Describes of a conflict involving *our* change and *their* failed to be resolved.
181+
#[derive(Debug, Clone)]
182+
pub enum ResolutionFailure {
183+
/// *ours* was renamed, but *theirs* was renamed differently. Both versions will be present in the tree,
184+
OursRenamedTheirsRenamedDifferently {
185+
/// If `Some(…)`, the content of the involved blob had to be merged.
186+
merged_blob: Option<ContentMerge>,
187+
},
188+
/// *ours* was modified, but *theirs* was turned into a directory, so *ours* was renamed to a non-conflicting path.
189+
OursModifiedTheirsDirectoryThenOursRenamed {
190+
/// The path at which `ours` can be found in the tree - it's in the same directory that it was in before.
191+
renamed_unique_path_to_modified_blob: BString,
192+
},
175193
/// *ours* was added (or renamed into place) with a different mode than theirs, e.g. blob and symlink, and we kept
194+
/// the symlink in its original location, renaming
176195
OursAddedTheirsAddedTypeMismatch {
177-
/// The location at which *their* state was placed to resolve the name and type clash.
178-
their_final_location: BString,
196+
/// The location at which *their* state was placed to resolve the name and type clash, named to indicate
197+
/// where the entry is coming from.
198+
their_unique_location: BString,
179199
},
200+
/// *ours* was deleted, but *theirs* was renamed.
201+
OursDeletedTheirsRenamed,
180202
}
181203

182204
/// Information about a blob content merge for use in a [`Resolution`].
@@ -190,18 +212,6 @@ pub struct ContentMerge {
190212
pub resolution: crate::blob::Resolution,
191213
}
192214

193-
/// Describes of a conflict involving *our* change and *their* failed to be resolved.
194-
#[derive(Debug, Clone)]
195-
pub enum ResolutionFailure {
196-
/// *ours* was modified, but *theirs* was turned into a directory, so *ours* was renamed to a non-conflicting path.
197-
OursModifiedTheirsDirectoryThenOursRenamed {
198-
/// The path at which `ours` can be found in the tree - it's in the same directory that it was in before.
199-
renamed_path_to_modified_blob: BString,
200-
},
201-
/// *ours* was deleted, but *theirs* was renamed.
202-
OursDeletedTheirsRenamed,
203-
}
204-
205215
/// A way to configure [`tree()`](crate::tree()).
206216
#[derive(Default, Debug, Clone)]
207217
pub struct Options {
@@ -211,8 +221,13 @@ pub struct Options {
211221
pub blob_merge: crate::blob::platform::merge::Options,
212222
/// The context to use when invoking merge-drivers.
213223
pub blob_merge_command_ctx: gix_command::Context,
214-
/// If `true`, the first conflict will cause the entire
215-
pub fail_on_conflict: bool,
224+
/// If `Some(what-is-unresolved)`, the first unresolved conflict will cause the entire merge to stop.
225+
/// This is useful to see if there is any conflict, without performing the whole operation.
226+
// TODO: Maybe remove this if the cost of figuring out conflicts is so low - after all, the data structures
227+
// and initial diff is the expensive thing right now, which are always done upfront.
228+
// However, this could change once we know do everything during the traversal, which probably doesn't work
229+
// without caching stuff and is too complicated to actually do.
230+
pub fail_on_conflict: Option<UnresolvedConflict>,
216231
/// If greater than 0, each level indicates another merge-of-merge. This can be greater than
217232
/// 0 when merging merge-bases, which are merged like a pyramid.
218233
/// This value also affects the size of merge-conflict markers, to allow differentiating

gix-merge/src/tree/utils.rs

Lines changed: 43 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
//! ## About `debug_assert!()
2+
//!
3+
//! The idea is to have code that won't panic in production. Thus, if in production that assertion would fail,
4+
//! we will rather let the code run and hope it will either be correct enough or fail in more graceful ways later.
5+
//!
6+
//! Once such a case becomes a bug and is reproduced in testing, the debug-assertion will kick in and hopefully
7+
//! contribute to finding a fix faster.
18
use crate::blob::ResourceKind;
29
use crate::tree::{Conflict, ConflictMapping, Error, Options, Resolution, ResolutionFailure};
310
use bstr::ByteSlice;
@@ -6,27 +13,23 @@ use gix_diff::tree_with_rewrites::{Change, ChangeRef};
613
use gix_hash::ObjectId;
714
use gix_object::tree;
815
use gix_object::tree::EntryMode;
9-
use std::borrow::Cow;
1016
use std::collections::HashMap;
1117

1218
/// Assuming that `their_location` is the destination of *their* rewrite, check if *it* passes
1319
/// over a directory rewrite in *our* tree. If so, rewrite it so that we get the path
1420
/// it would have had if it had been renamed along with *our* directory.
15-
pub fn possibly_rewritten_location<'a>(
21+
pub fn possibly_rewritten_location(
1622
check_tree: &mut TreeNodes,
17-
their_location: &'a BStr,
23+
their_location: &BStr,
1824
our_changes: &ChangeListRef,
19-
) -> Cow<'a, BStr> {
20-
check_tree
21-
.check_conflict(their_location)
22-
.and_then(|pc| match pc {
23-
PossibleConflict::PassedRewrittenDirectory { change_idx } => {
24-
let passed_change = &our_changes[change_idx];
25-
rewrite_location_with_renamed_directory(their_location, &passed_change.inner).map(Cow::Owned)
26-
}
27-
_ => None,
28-
})
29-
.unwrap_or(Cow::Borrowed(their_location))
25+
) -> Option<BString> {
26+
check_tree.check_conflict(their_location).and_then(|pc| match pc {
27+
PossibleConflict::PassedRewrittenDirectory { change_idx } => {
28+
let passed_change = &our_changes[change_idx];
29+
rewrite_location_with_renamed_directory(their_location, &passed_change.inner)
30+
}
31+
_ => None,
32+
})
3033
}
3134

3235
pub fn rewrite_location_with_renamed_directory(their_location: &BStr, passed_change: &Change) -> Option<BString> {
@@ -165,6 +168,11 @@ pub struct TrackedChange {
165168
/// This makes sure that new changes aren't visible too early, which would mean the algorihtm
166169
/// knows things too early which can be misleading.
167170
pub needs_tree_insertion: bool,
171+
/// A new location for the change that can happen if the location is touching a rewrite in a parent
172+
/// directory, but otherwise doesn't have a match. This means we shall redo the operation but with
173+
/// the changed path.
174+
/// The second touple entry is the change-idx we passed over.
175+
pub rewritten_location: Option<(BString, usize)>,
168176
}
169177

170178
pub type ChangeList = Vec<TrackedChange>;
@@ -179,6 +187,7 @@ pub fn track(change: ChangeRef<'_>, changes: &mut ChangeList) {
179187
if change.entry_mode().is_tree() && matches!(change, ChangeRef::Modification { .. }) {
180188
return;
181189
}
190+
let is_tree = change.entry_mode().is_tree();
182191
changes.push(TrackedChange {
183192
inner: match change.into_owned() {
184193
Change::Rewrite {
@@ -196,22 +205,24 @@ pub fn track(change: ChangeRef<'_>, changes: &mut ChangeList) {
196205
},
197206
other => other,
198207
},
199-
was_written: false,
208+
was_written: is_tree,
200209
needs_tree_insertion: false,
210+
rewritten_location: None,
201211
});
202212
}
203213

204214
/// Unconditionally apply `change` to `editor`.
205-
pub fn apply_change(editor: &mut tree::Editor<'_>, change: &Change) -> Result<(), tree::editor::Error> {
215+
pub fn apply_change(
216+
editor: &mut tree::Editor<'_>,
217+
change: &Change,
218+
alternative_location: Option<&BString>,
219+
) -> Result<(), tree::editor::Error> {
206220
use to_components_bstring_ref as to_components;
207-
// TODO(performance): we could apply tree changes if they are renames-by-identity, and then save the
208-
// work needed to set all the children recursively. This would require more elaborate
209-
// tracking though.
210221
if change.entry_mode().is_tree() {
211222
return Ok(());
212223
}
213224

214-
match change {
225+
let (location, mode, id) = match change {
215226
Change::Addition {
216227
location,
217228
entry_mode,
@@ -223,8 +234,11 @@ pub fn apply_change(editor: &mut tree::Editor<'_>, change: &Change) -> Result<()
223234
entry_mode,
224235
id,
225236
..
226-
} => editor.upsert(to_components(location), entry_mode.kind(), *id)?,
227-
Change::Deletion { location, .. } => editor.remove(to_components(location))?,
237+
} => (location, entry_mode, id),
238+
Change::Deletion { location, .. } => {
239+
editor.remove(to_components(alternative_location.unwrap_or(location)))?;
240+
return Ok(());
241+
}
228242
Change::Rewrite {
229243
source_location,
230244
entry_mode,
@@ -236,17 +250,21 @@ pub fn apply_change(editor: &mut tree::Editor<'_>, change: &Change) -> Result<()
236250
if !*copy {
237251
editor.remove(to_components(source_location))?;
238252
}
239-
editor.upsert(to_components(location), entry_mode.kind(), *id)?
253+
(location, entry_mode, id)
240254
}
241255
};
256+
257+
editor.upsert(
258+
to_components(alternative_location.unwrap_or(location)),
259+
mode.kind(),
260+
*id,
261+
)?;
242262
Ok(())
243263
}
244264

245265
/// A potential conflict that needs to be checked. It comes in several varieties and always happens
246266
/// if paths overlap in some way between *theirs* and *ours*.
247267
#[derive(Debug)]
248-
// TODO: remove this when all fields are used.
249-
#[allow(dead_code)]
250268
pub enum PossibleConflict {
251269
/// *our* changes have a tree here, but *they* place a non-tree or edit an existing item (that we removed).
252270
TreeToNonTree {
@@ -483,7 +501,6 @@ impl TreeNodes {
483501
}
484502
}
485503

486-
// TODO: This overwrites nodes, possibly rewrites, is that OK or can there be issues later?
487504
debug_assert!(
488505
!matches!(new_change, Change::Rewrite { .. }),
489506
"BUG: we thought we wouldn't do that current.location is related?"
543 KB
Binary file not shown.

0 commit comments

Comments
 (0)