From dccc204430535f8c9eb74d7861365dbb2c42bb02 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Sat, 24 May 2025 10:55:47 -0700 Subject: [PATCH 1/7] merge-ort: update comments to modern testfile location In commit 919df3195553 (Collect merge-related tests to t64xx, 2020-08-10), merge related tests were moved from t60xx to t64xx. Some comments in merge-ort relating to some tricky code referenced specific testcases within certain testfiles for additional information, but referred to their historical testfile names; update the testfile names to mention their modern location. Signed-off-by: Elijah Newren --- merge-ort.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 47b3d1730ece36..d87ba6dd42bf7a 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -2163,7 +2163,7 @@ static int handle_content_merge(struct merge_options *opt, /* * FIXME: If opt->priv->call_depth && !clean, then we really * should not make result->mode match either a->mode or - * b->mode; that causes t6036 "check conflicting mode for + * b->mode; that causes t6416 "check conflicting mode for * regular file" to fail. It would be best to use some other * mode, but we'll confuse all kinds of stuff if we use one * where S_ISREG(result->mode) isn't true, and if we use @@ -2520,7 +2520,7 @@ static void compute_collisions(struct strmap *collisions, * happening, and fall back to no-directory-rename detection * behavior for those paths. * - * See testcases 9e and all of section 5 from t6043 for examples. + * See testcases 9e and all of section 5 from t6423 for examples. */ for (i = 0; i < pairs->nr; ++i) { struct strmap_entry *rename_info; @@ -2618,7 +2618,7 @@ static char *check_for_directory_rename(struct merge_options *opt, * That's why otherinfo and dir_rename_exclusions is here. * * As it turns out, this also prevents N-way transient rename - * confusion; See testcases 9c and 9d of t6043. + * confusion; See testcases 9c and 9d of t6423. */ new_dir = rename_info->value; /* old_dir = rename_info->key; */ otherinfo = strmap_get_entry(dir_rename_exclusions, new_dir); From 58df0710efc042b014a0c8282ce1d7fa62fbb760 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 5 Jun 2025 10:01:14 -0700 Subject: [PATCH 2/7] merge-ort: drop unnecessary temporary in check_for_directory_rename() check_for_directory_rename() had a weirdly coded check for whether a strmap contained a certain key. Replace the temporary variable and call to strmap_get_entry() with the more natural strmap_contains() call. Signed-off-by: Elijah Newren --- merge-ort.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index d87ba6dd42bf7a..9b9d82ed10f7d8 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -2580,7 +2580,6 @@ static char *check_for_directory_rename(struct merge_options *opt, { char *new_path; struct strmap_entry *rename_info; - struct strmap_entry *otherinfo; const char *new_dir; int other_side = 3 - side_index; @@ -2615,14 +2614,13 @@ static char *check_for_directory_rename(struct merge_options *opt, * to not let Side1 do the rename to dumbdir, since we know that is * the source of one of our directory renames. * - * That's why otherinfo and dir_rename_exclusions is here. + * That's why dir_rename_exclusions is here. * * As it turns out, this also prevents N-way transient rename * confusion; See testcases 9c and 9d of t6423. */ new_dir = rename_info->value; /* old_dir = rename_info->key; */ - otherinfo = strmap_get_entry(dir_rename_exclusions, new_dir); - if (otherinfo) { + if (strmap_contains(dir_rename_exclusions, new_dir)) { path_msg(opt, INFO_DIR_RENAME_SKIPPED_DUE_TO_RERENAME, 1, rename_info->key, path, new_dir, NULL, _("WARNING: Avoiding applying %s -> %s rename " From 1ea7bfd3bff81a648a5ed1d0112ec404ab64fc91 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 16 Jul 2025 16:54:08 -0700 Subject: [PATCH 3/7] t6423: document two bugs with rename-to-self testcases When commit 98a1a00d5301 (t6423: add a testcase causing a failed assertion in process_renames, 2025-03-06) was added, I tweaked the commit message, and moved the test into t6423. However, that still left two other things missing that made this test unlike the others in the same testfile: * It didn't have an English description of the test setup like all other tests in t6423 * It didn't check that the right number of files were present at the end The former issue is a minor detail that isn't that critical, but the latter feels more important. If it had been done, I might have noticed another bug. In particular, this testcase involves Side A: rename world -> tools/world and Side B: rename tools/ -> Side B: remove world The tools/ -> rename turns the world -> tools/world rename into world -> world, i.e. a rename-to-self case. But, it's a path conflict because merge.directoryRenames defaults to false. There's no content conflict because Side A didn't modify world, so we should just take the content of world from Side B -- i.e. delete it. So, we have a conflict on the path, but not on its content. We could consider letting the content trump since it is unconflicted, but if we are going to leave a conflict, it should certainly represent that 'world' existed both in the base version and on Side A. Currently it doesn't. Add a description of this test, add some checking of the number of entries in the index at the end of the merge, and mark the test as expecting to fail for now. A subsequent commit will fix this bug. While at it, I found another related bug from a nearly identical setup but setting merge.directoryRenames=true. Copy testcase 12n into 12n2, changing it to use merge instead of cherry-pick, and turn on directory renames for this test. In this case, since there is no content conflict and no path conflict, it should be okay to delete the file. Unfortunately, the code resolves without conflict but silently leaves world despite the fact it should be deleted. It might also be okay if the code spuriously thought there was a modify/delete conflict here; that would at least notify users to look closer and then when they notice there was no change since the base version, they can easily resolve. A conflict notice is much better than silently providing the wrong resolution. Cover this with the 12n2 testcase, which for now is marked as expecting to fail as well. Signed-off-by: Elijah Newren --- t/t6423-merge-rename-directories.sh | 100 +++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/t/t6423-merge-rename-directories.sh b/t/t6423-merge-rename-directories.sh index f48ed6d03534ee..2def1522bd59ba 100755 --- a/t/t6423-merge-rename-directories.sh +++ b/t/t6423-merge-rename-directories.sh @@ -5056,6 +5056,25 @@ test_expect_success '12m: Change parent of renamed-dir to symlink on other side' ) ' +# Testcase 12n, Directory rename transitively makes rename back to self +# +# (Since this is a cherry-pick instead of merge, the labels are a bit weird. +# O, the original commit, is A~1 rather than what branch O points to.) +# +# Commit O: tools/hello +# world +# Commit A: tools/hello +# tools/world +# Commit B: hello +# In words: +# A: world -> tools/world +# B: tools/ -> /, i.e. rename all of tools to toplevel directory +# delete world +# +# Expected: +# CONFLICT (file location): tools/world vs. world +# + test_setup_12n () { git init 12n && ( @@ -5084,7 +5103,7 @@ test_setup_12n () { ) } -test_expect_success '12n: Directory rename transitively makes rename back to self' ' +test_expect_failure '12n: Directory rename transitively makes rename back to self' ' test_setup_12n && ( cd 12n && @@ -5092,7 +5111,84 @@ test_expect_success '12n: Directory rename transitively makes rename back to sel git checkout -q B^0 && test_must_fail git cherry-pick A^0 >out && - grep "CONFLICT (file location).*should perhaps be moved" out + test_grep "CONFLICT (file location).*should perhaps be moved" out && + + # Should have 1 entry for hello, and 2 for world + test_stdout_line_count = 3 git ls-files -s && + test_stdout_line_count = 1 git ls-files -s hello && + test_stdout_line_count = 2 git ls-files -s world + ) +' + +# Testcase 12n2, Directory rename transitively makes rename back to self +# +# Commit O: tools/hello +# world +# Commit A: tools/hello +# tools/world +# Commit B: hello +# In words: +# A: world -> tools/world +# B: tools/ -> /, i.e. rename all of tools to toplevel directory +# delete world +# +# Expected: +# CONFLICT (file location): tools/world vs. world +# + +test_setup_12n2 () { + git init 12n2 && + ( + cd 12n2 && + + mkdir tools && + echo hello >tools/hello && + git add tools/hello && + echo world >world && + git add world && + git commit -m "O" && + + git branch O && + git branch A && + git branch B && + + git switch A && + git mv world tools/world && + git commit -m "Move world into tools/" && + + git switch B && + git mv tools/hello hello && + git rm world && + git commit -m "Move hello from tools/ to toplevel" + ) +} + +test_expect_failure '12n2: Directory rename transitively makes rename back to self' ' + test_setup_12n2 && + ( + cd 12n2 && + + git checkout -q B^0 && + + test_might_fail git -c merge.directoryRenames=true merge A^0 >out && + + # Should have 1 entry for hello, and either 0 or 2 for world + # + # NOTE: Since merge.directoryRenames=true, there is no path + # conflict for world vs. tools/world; it should end up at + # world. The fact that world was unmodified on side A, means + # there was no content conflict; we should just take the + # content from side B -- i.e. delete the file. So merging + # could just delete world. + # + # However, rename-to-self-via-directory-rename is a bit more + # challenging. Relax this test to allow world to be treated + # as a modify/delete conflict as well, meaning it will have + # two higher order stages, that just so happen to match. + # + test_stdout_line_count = 1 git ls-files -s hello && + test_stdout_line_count = 2 git ls-files -s world && + test_grep "CONFLICT (modify/delete).*world deleted in HEAD" out ) ' From 29b5e00c556a3d39fb9ca57bf3903f43280def5d Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 17 Jul 2025 11:06:29 -0700 Subject: [PATCH 4/7] t6423: fix missed staging of file in testcases 12i,12j,12k Commit 806f83287f8d (t6423: test directory renames causing rename-to-self, 2021-06-30) introduced testcase 12i-12k but omitted staging one of the files and copy-pasted that mistake to the other tests. This means the merge runs with an unstaged change, even though that isn't related to what is being tested and makes the test look more complicated than it is. The cover letter for the series associated with the above commit (see Message-ID: pull.1039.git.git.1624727121.gitgitgadget@gmail.com) noted that these testcases triggered two bugs in merge-recursive but only one in merge-ort; in merge-recursive these testcases also triggered a silent deletion of the file in question when it shouldn't be deleted. What I didn't realize at the time was that the deletion bug in merge-ort was merely being sidestepped by the "relevant renames" optimization but can actually be triggered. A subsequent commit will deal with that additional bug, but it was complicated by the mistaken forgotten staging, so this commit first fixes that issue. Signed-off-by: Elijah Newren --- t/t6423-merge-rename-directories.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/t/t6423-merge-rename-directories.sh b/t/t6423-merge-rename-directories.sh index 2def1522bd59ba..e1251b4e12cea8 100755 --- a/t/t6423-merge-rename-directories.sh +++ b/t/t6423-merge-rename-directories.sh @@ -4747,6 +4747,7 @@ test_setup_12i () { git switch B && git mv source/bar source/subdir/bar && echo more baz >>source/baz && + git add source/baz && git commit -m B ) } @@ -4771,7 +4772,7 @@ test_expect_success '12i: Directory rename causes rename-to-self' ' git status --porcelain -uno >actual && cat >expect <<-\EOF && UU source/bar - M source/baz + M source/baz EOF test_cmp expect actual ) @@ -4806,6 +4807,7 @@ test_setup_12j () { git switch B && git mv bar subdir/bar && echo more baz >>baz && + git add baz && git commit -m B ) } @@ -4830,7 +4832,7 @@ test_expect_success '12j: Directory rename to root causes rename-to-self' ' git status --porcelain -uno >actual && cat >expect <<-\EOF && UU bar - M baz + M baz EOF test_cmp expect actual ) @@ -4865,6 +4867,7 @@ test_setup_12k () { git switch B && git mv dirA/bar dirB/bar && echo more baz >>dirA/baz && + git add dirA/baz && git commit -m B ) } @@ -4889,7 +4892,7 @@ test_expect_success '12k: Directory rename with sibling causes rename-to-self' ' git status --porcelain -uno >actual && cat >expect <<-\EOF && UU dirA/bar - M dirA/baz + M dirA/baz EOF test_cmp expect actual ) From c4caba16c72636c7b726c6753815f7c5f77ecd1d Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 6 Aug 2025 11:00:28 -0700 Subject: [PATCH 5/7] merge-ort: clarify the interning of strings in opt->priv->path Because merge-ort is dealing with potentially all the pathnames in the repository, it sometimes needs to do an awful lot of string comparisons. Because of this, struct merge_options_internal's path member was envisioned from the beginning to contain an interned value for every path in order to allow us to compare strings via pointer comparison instead of using strcmp. See * 5b59c3db059d (merge-ort: setup basic internal data structures, 2020-12-13) * f591c4724615 (merge-ort: copy and adapt merge_3way() from merge-recursive.c, 2021-01-01) for some of the early comments. However, the original comment was slightly misleading when it switched from mentioning paths to only mentioning directories. Fix that, and while at it also point to an example in the code which applies the extra needed care to permit the pointer comparison optimization. Signed-off-by: Elijah Newren --- merge-ort.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 9b9d82ed10f7d8..325b19b182ad41 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -316,9 +316,14 @@ struct merge_options_internal { * (e.g. "drivers/firmware/raspberrypi.c"). * * store all relevant paths in the repo, both directories and * files (e.g. drivers, drivers/firmware would also be included) - * * these keys serve to intern all the path strings, which allows - * us to do pointer comparison on directory names instead of - * strcmp; we just have to be careful to use the interned strings. + * * these keys serve to intern *all* path strings, which allows us + * to do pointer comparisons on file & directory names instead of + * using strcmp; however, for this pointer-comparison optimization + * to work, any code path that independently computes a path needs + * to check for it existing in this strmap, and if so, point to + * the path in this strmap instead of their computed copy. See + * the "reuse known pointer" comment in + * apply_directory_rename_modifications() for an example. * * The values of paths: * * either a pointer to a merged_info, or a conflict_info struct From c00b6821e760ad79a1bf73f3997fc7ab7b6d494a Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 17 Jul 2025 11:06:29 -0700 Subject: [PATCH 6/7] merge-ort: fix incorrect file handling We have multiple bugs here -- accidental silent file deletion, accidental silent file retention for files that should be deleted, and incorrect number of entries left in the index. The series merged at commit d3b88be1b450 (Merge branch 'en/merge-dir-rename-corner-case-fix', 2021-07-16) introduced testcase 12i-12k in t6423 which checked for rename-to-self cases, and fixed bugs that merge-ort and merge-recursive had with these testcases. At the time, I noted that merge-ort had one bug for these cases, while merge-recursive had two. It turns out that merge-ort did in fact have another bug, but the "relevant renames" optimizations were masking it. If we modify testcase 12i from t6423 to modify the file in the commit that renames it (but only modify it enough that it can still be detected as a rename), then we can trigger silent deletion of the file. Tweak testcase 12i slightly to make the file in question have more than one line in it. This leaves the testcase intact other than changing the initial contents of this one file. The purpose of this tweak is to minimize the changes between this testcase and a new one that we want to add. Then duplicate testcase 12i as 12i2, changing it so that it adds a single line to the file in question when it is renamed; testcase 12i2 then serves as a testcase for this merge-ort bug that I previously overlooked. Further, commit 98a1a00d5301 (t6423: add a testcase causing a failed assertion in process_renames, 2025-03-06), fixed an issue with rename-to-self but added a new testcase, 12n, that only checked for whether the merge ran to completion. A few commits ago, we modified this test to check for the number of entries in the index -- but noted that the number was wrong. And we also noted a silently-keep-instead-of-delete bug at the same time in the new testcase 12n2. In summary, we have the following bugs with rename-to-self cases: * silent deletion of file expected to be kept (t6423 testcase 12i2) * silent retention of file expected to be removed (t6423 testcase 12n2) * wrong number of extries left in the index (t6423 testcase 12n) All of these bugs arise because in a rename-to-self case, when we have a rename A->B, both A and B name the same file. The code in process_renames() assumes A & B are different, and tries to move the higher order stages and file contents so that they are associated just with the new path, but the assumptions of A & B being different can cause A to be deleted when it's not supposed to be or mark B as resolved and kept in place when it's supposed to be deleted. Since A & B are already the same path in the rename-to-self case, simply skip the steps in process_renames() for such files to fix these bugs. Signed-off-by: Elijah Newren --- merge-ort.c | 14 ++++++ t/t6423-merge-rename-directories.sh | 69 +++++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 3 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 325b19b182ad41..e7bdcf7b967e7a 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -2878,6 +2878,20 @@ static int process_renames(struct merge_options *opt, newinfo = new_ent->value; } + /* + * Directory renames can result in rename-to-self; the code + * below assumes we have A->B with different A & B, and tries + * to move all entries to path B. If A & B are the same path, + * the logic can get confused, so skip further processing when + * A & B are already the same path. + * + * As a reminder, we can avoid strcmp here because all paths + * are interned in opt->priv->paths; see the comment above + * "paths" in struct merge_options_internal. + */ + if (oldpath == newpath) + continue; + /* * If pair->one->path isn't in opt->priv->paths, that means * that either directory rename detection removed that diff --git a/t/t6423-merge-rename-directories.sh b/t/t6423-merge-rename-directories.sh index e1251b4e12cea8..49eb10392bed62 100755 --- a/t/t6423-merge-rename-directories.sh +++ b/t/t6423-merge-rename-directories.sh @@ -4731,7 +4731,7 @@ test_setup_12i () { mkdir -p source/subdir && echo foo >source/subdir/foo && - echo bar >source/bar && + printf "%d\n" 1 2 3 4 5 6 7 >source/bar && echo baz >source/baz && git add source && git commit -m orig && @@ -4778,6 +4778,69 @@ test_expect_success '12i: Directory rename causes rename-to-self' ' ) ' +# Testcase 12i2, Identical to 12i except that source/subdir/bar modified on unrenamed side +# Commit O: source/{subdir/foo, bar, baz_1} +# Commit A: source/{foo, bar_2, baz_1} +# Commit B: source/{subdir/{foo, bar}, baz_2} +# Expected: source/{foo, bar, baz_2}, with conflicts on +# source/bar vs. source/subdir/bar + +test_setup_12i2 () { + git init 12i2 && + ( + cd 12i2 && + + mkdir -p source/subdir && + echo foo >source/subdir/foo && + printf "%d\n" 1 2 3 4 5 6 7 >source/bar && + echo baz >source/baz && + git add source && + git commit -m orig && + + git branch O && + git branch A && + git branch B && + + git switch A && + git mv source/subdir/foo source/foo && + echo 8 >> source/bar && + git add source/bar && + git commit -m A && + + git switch B && + git mv source/bar source/subdir/bar && + echo more baz >>source/baz && + git add source/baz && + git commit -m B + ) +} + +test_expect_success '12i2: Directory rename causes rename-to-self' ' + test_setup_12i2 && + ( + cd 12i2 && + + git checkout A^0 && + + test_must_fail git -c merge.directoryRenames=conflict merge -s recursive B^0 && + + test_path_is_missing source/subdir && + test_path_is_file source/bar && + test_path_is_file source/baz && + + git ls-files >actual && + uniq tracked && + test_line_count = 3 tracked && + + git status --porcelain -uno >actual && + cat >expect <<-\EOF && + UU source/bar + M source/baz + EOF + test_cmp expect actual + ) +' + # Testcase 12j, Directory rename to root causes rename-to-self # Commit O: {subdir/foo, bar, baz_1} # Commit A: {foo, bar, baz_1} @@ -5106,7 +5169,7 @@ test_setup_12n () { ) } -test_expect_failure '12n: Directory rename transitively makes rename back to self' ' +test_expect_success '12n: Directory rename transitively makes rename back to self' ' test_setup_12n && ( cd 12n && @@ -5166,7 +5229,7 @@ test_setup_12n2 () { ) } -test_expect_failure '12n2: Directory rename transitively makes rename back to self' ' +test_expect_success '12n2: Directory rename transitively makes rename back to self' ' test_setup_12n2 && ( cd 12n2 && From c2eef5ef5dc5d030859fd3269e2382429515a6b9 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 14 Jul 2025 07:44:17 -0700 Subject: [PATCH 7/7] merge-ort: fix directory rename on top of source of other rename/delete At GitHub, we've got a real-world repository that has been triggering failures of the form: git: merge-ort.c:3007: process_renames: Assertion `newinfo && !newinfo->merged.clean' failed. which comes from the line: VERIFY_CI(newinfo); Unfortunately, this one has been quite complex to unravel, and is a bit complex to explain. So, I'm going to carefully try to explain each relevant piece needed to understand the fix, then carefully build up from a simple testcase to some of the relevant testcases. == New special case we need to consider == Rename pairs in the diffcore machinery connect the source path of a rename with the destination path of a rename. Since we have rename pairs to consider on both sides of history since the merge base, merging has to consider a few special cases of possible overlap: A) two rename pairs having the same target path B) two rename pairs having the same source path C) the source path of one rename pair being the target path of a different rename pair Some of these came up often enough that we gave them names: A) a rename/rename(2to1) conflict (looks similar to an add/add conflict) B) a rename/rename(1to2) conflict, which represents the same path being renamed differently on the two sides of history C) not yet named merge-ort is well-prepared to handle cases (A) and (B), as was merge-recursive (which was merge-ort's predecessor). Case (C) was briefly considered during the years of merge-recursive maintenance, but the full extent of support it got was a few FIXME/TODO comments littered around the code highlighting some of the places that would probably need to be fixed to support it. When I wrote merge-ort I ignored case (C) entirely, since I believed that case (C) was only possible if we were to support break detection during merges. Not only had break detection never been supported by any merge algorithm, I thought break detection wasn't worth the effort to support in a merge algorithm. However, it turns out that case (C) can be triggered without break detection, if there's enough moving pieces. Before I dive into how to trigger case (C) with directory renames plus other renames, it might be helpful to use a simpler example with break detection first. And before we get to that it may help to explain some more basics of handling renames in the merge algorithm. So, let me first backup and provide a quick refresher on each of * handling renames * what break detection would mean, if supported in merging * handling directory renames From there, I'll build up from a basic directory rename detection case to one that triggers a failure currently. == Handling renames == In the merge machinery when we have a rename of a path A -> B, processing that rename needs to remove path A, and make sure that path B has the relevant information. Note that if the content was also modified on both sides, this may mean that we have 3 different stages that need to be stored at path B instead of having some stored at path A. Having all stages stored at path B makes it much easier for users to investigate and resolve the content conflict associated with a renamed path. For example: * "git status" doesn't have to figure out how to list paths A & B and attempt to connect them for users; it can just list path B. * Users can use "git ls-files -u B" (instead of trying to find the previous name of the file so they can list both, i.e. "git ls-files -u A B") * Users can resolve via "git add B" (without needing to "git rm A") == What break detection would mean == If break detection were supported, we might have cases where A -> B *and* C -> A, meaning that both rename pairs might believe they need to update A. In particular, the processing of A -> B would need to be careful to not clear out all stages of A and mark it resolved, while both renames would need to figure out which stages of A belong with A and which belong with B, so that both paths have the right stages associated with them. merge-ort (like merge-recursive before it) makes no attempt to handle break detection; it runs with break detection turned off. It would need to be retrofitted to handle such cases. == Directory rename detection == If one side of history renames directory D/ -> E/, and the other side of history adds new files to D/, then directory rename detection notices and suggests moving those new files to E/. A similar thing is done for paths renamed into D/, causing them to be transitively renamed into E/. The default in the merge machinery is to report a conflict whenever a directory rename might modify the location of a path, so that users can decide whether they wanted the original path or the directory-rename-induced location. However, that means the default codepath still runs through all the directory rename detection logic, it just supplements it with providing conflict notices when it is done. == Building up increasingly complex testcases == I'll start with a really simple directory rename example, and then slowly add twists that explain new pieces until we get to the problematic cases: === Testcase 1 === Let's start with a concrete example, where particular files/directories of interest that exist or are changed on each side are called out: Original: our side: rename B/file -> C/file their side: rename C/ -> A/ For this case, we'd expect to see the original B/file appear not at C/file but at A/file. (We would also expect a conflict notice that the user will want to choose between C/file and A/file, but I'm going to ignore conflict notices from here on by assuming merge.directoryRenames is set to `true` rather than `conflict`; the only difference that assumption makes is whether that makes the merge be considered to be conflicted and whether it prints a conflict notice; what is written to the index or working directory is unchanged.) === Testcase 2 === Modify testcase 1 by having A/file exist from the start: Original: A/file exists our side: rename B/file -> C/file their side: rename C/ -> A/ In such a case, to avoid user confusion at what looks kind of like an add/add conflict (even though the original path at A/file was not added by either side of the merge), we turn off directory rename detection for this path and print a "in the way" warning to the user: CONFLICT (implicit dir rename): Existing file/dir ... in the way ... The testcases in section 5 of t6423 explore these in more detail. === Testcase 3 === Let's modify testcase 1 in a slightly different way: have A/file be added by their side rather than it already existing. Original: our side: rename B/file -> C/file their side: rename C/ -> A/ add A/file In this case, the directory rename detection basically transforms our side's original B/file -> C/file into a B/file -> A/file, and so we get a rename/add conflict, with one version of A/file coming from the renamed file, and another coming from the new A/file, each stored as stages 2 and 3 in conflicts. This kind of add/add conflict is perhaps slightly more complex than a regular add/add conflict, but with the printed messages it makes sense where it came from and we have different stages of the file to work with to resolve the conflict. === Testcase 4 === Let's do something similar to testcase 3, but have the opposite side of history add A/file: Original: our side: rename B/file -> C/file add A/file their side: rename C/ -> A/ Now if we allow directory rename detection to modify C/file to A/file, then we also get a rename/add conflict, but in this case we'd need both higher order stages being recorded on side 2, which makes no sense. The index can't store multiple stage 2 entries, and even if we could, it would probably be confusing for users to work with. So, similar to what we do when there was an A/file in the original version, we simply turn off directory rename detection for cases like this and provide the "in the way" CONFLICT notice to the user. === Testcase 5 === We're slowly getting closer. Let's mix it up by having A/file exist at the beginning but not exist on their side: original: A/file exists our side: rename B/file -> C/file their side: rename C/ -> A/ rename A/file -> D/file For this case, you could say that since A/file -> D/file, it's no longer in the way of C/file being moved by directory rename detection to A/file. But that would give us a case where A/file is both the source and the target of a rename, similar to break detection, which the code isn't currently equipped to handle. This is not yet the case that causes current failures; to the current code, this kind of looks like testcase 4 in that A/file is in the way on our side (since A/file was in the original and was umodified by our side). So, it results in a "in the way" notification with directory rename detection being turned off for A/file so that B/file ends up at C/file. Perhaps the resolution could be improved in the future, but our "in the way" checks prevented such problems by noticing that A/file exists on our side and thus turns off directory rename detection from affecting C/file's location. So, while the merge result could be perhaps improved, the fact that this is currently handled by giving the user an "in the way" message gives the user a chance to resolve and prevents the code from tripping itself up. === Testcase 6 === Let's modify testcase 5 a bit more, to also delete A/file on our side: original: A/file exists our side: rename B/file -> C/file delete A/file their side: rename C/ -> A/ rename A/file -> D/file Now the "in the way" logic doesn't detect that there's an A/file in the way (neither side has an A/file anymore), so it's fine to transitively rename C/file further to A/file...except that we end up with A/file being both the source of one rename, and the target of a different rename. Each rename pair tries to handle the resolution of the source and target paths of its own rename. But when we go to process the second rename pair in process_renames(), we do not expect either the source or the destination to be marked as handled already; so, when we hit the sanity checks that these are not handled: VERIFY_CI(oldinfo); VERIFY_CI(newinfo); then one of these is going to throw an assertion failure since the previous rename pair already marked both of its paths as handled. This will give us an error of the form: git: merge-ort.c:3007: process_renames: Assertion `newinfo && !newinfo->merged.clean' failed. This is the failure we're currently triggering, and it fundamentally depends on: * a path existing in the original * that original path being removed or renamed on *both* sides * some kind of directory rename moving some *other* path into that original path This was added as testcase 12q in t6423. === Testcase 7 === Bonus bug found while investigating! Let's go back to the comparison between testcases 2 & 3, and set up a file present on their side that we need to consider: Original: A/file exists our side: rename B/file -> C/file rename A/file -> D/file their side: rename C/ -> A/ Here, there is no A/file in the way on our side like testcase 4. There is an A/file present on their side like testcase 3, which was an add/add conflict, but that's associated with the file be renamed to D/file. So, that really shouldn't be an add/add conflict because we instead want all modes of the original A/file to be transported to D/file. Unfortunately, the current code kind of treats it like an add/add conflict instead...but even worse. There is also a valid mode for A/file in the original, which normally goes to stage 1. However, an add/add conflict should be represented in the index with no mode at stage 1 (for the original side), only modes at stages 2 and 3 (for our and their side), so for an add/add we'd expect that mode for A/file in the original version to be cleared out (or be transported to D/file). Unfortunately, the code currently leaves not only the stage 3 entry for A/file intact, it also leaves the stage 1 entry for A/file. This results in `git ls-files -u A/file` output of the form: 100644 d00491fd7e5bb6fa28c517a0bb32b8b506539d4d 1 A/file 100644 0cfbf08886fca9a91cb753ec8734c84fcbe52c9f 2 A/file 100644 d00491fd7e5bb6fa28c517a0bb32b8b506539d4d 3 A/file This would likely cause users to believe this isn't an add/add conflict; rather, this would lead them to believe that A/file was only modified on our side and that therefore it should not have been a conflict in the first place. And while resolving the conflict in favor of our side is the correct resolution (because stages 1 and 3 should have been cleared out in the first place), this is certainly likely to cause confusion for anyone attempting to investigate why this path was marked as conflicted. This was added as testcase 12p in t6423. == Attempted solutions that I discarded == 1) For each side of history, create a strset of the sources of each rename on the other side of history. Then when using directory renames to modify existing renames, verify that we aren't renaming to a source of another rename. Unfortunately, the "relevant renames" optimization in merge-ort means we often don't detect renames -- we just see a delete and an add -- which is easy to forget and makes debugging testcases harder, but it also turns out that this solution in insufficient to solve the related problems in the area (more on that below). 2) Modify the code to be aware of the possibility of renaming to the source of another side's rename, and make all the conflict resolution logic for each case (including existing rename/rename(2to1) and rename/rename(1to2) cases) handle the additional complexity. It turns out there was much more code to audit than I wanted, for a really niche case. I didn't like how many changes were needed, and aborted. == Solution == We do not want the stages of unrelated files appearing at the same path in the index except when dealing with an add/add conflict. While we previously handled this for stages 2 & 3, we also need to worry about stage 1. So check for a stage 1 index entry being in the way of a directory rename. However, if we can detect that the stage 1 index entry is actually from a related file due to a directory-rename-causes-rename-to-self situation, then we can allow the stage 1 entry to remain. From this wording, you may note that it's not just rename cases that are a problem; bugs could be triggered with directory renames vs simple adds. That leads us to... == Testcases 8+ == Another bonus bug, found via understanding our final solutions (and the failure of our first attempted solution)! Let's tweak testcase 7 a bit: Original: A/file exists our side: delete A/file add -> C/file their side: delete A/file rename C/ -> A/ Here, there doesn't seem to be a big problem. Sure C/file gets modified via the directory rename of C/ -> A/ so that it becomes A/file, but there's no file in the way, right? Actually, here we have a problem that the stage 1 entry of A/file would be combined with the stage 2 entry of C/file, and make it look like a modify/delete conflict. Perhaps there is some extra checking that could be added to the code to make it attempt to clear out the stage 1 entry of A/file, but the various rename-to-self-via-directory-rename testcases make that a bit more difficult. For now, it's easier to just treat this as a path-in-the-way situation and not allow the directory rename to modify C/file. That sounds all well and good, but it does have an interesting side effect. Due to the "relevant renames" optimizations in merge-ort (i.e. only detect the renames you need), 100% renames whose files weren't modified on the other side often go undetected. This means that if we modify this testcase slightly to: Original: A/file exists our side: A/file -> C/file their side: rename C/ -> A/ Then although this looks like where the directory rename just moves C/file back to A/file and there's no problem, we may not detect the A/file -> C/file rename. Instead it will look like a deletion of A/file and an addition of C/file. The directory rename then appears to be moving C/file to A/file, which is on top of an "unrelated" file (or at least a file it doesn't know is related). So, we will report path-in-the-way conflicts now in cases where we didn't before. That's better than silently and accidentally combining stages of unrelated files and making them look like a modify/delete; users can investigate the reported conflict and simply resolve it. This means we tweak the expected solution for testcases 12i, 12j, and 12k. (Those three tests are basically the same test repeated three times, but I was worried when I added those that subtle differences in parent/child, sibling/sibling, and toplevel directories might mess up how rename-to-self testcases actually get handled.) Signed-off-by: Elijah Newren --- merge-ort.c | 18 +- t/t6423-merge-rename-directories.sh | 357 ++++++++++++++++++++++++++-- 2 files changed, 355 insertions(+), 20 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index e7bdcf7b967e7a..a0e29417157537 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -2318,14 +2318,20 @@ static char *apply_dir_rename(struct strmap_entry *rename_info, return strbuf_detach(&new_path, NULL); } -static int path_in_way(struct strmap *paths, const char *path, unsigned side_mask) +static int path_in_way(struct strmap *paths, + const char *path, + unsigned side_mask, + struct diff_filepair *p) { struct merged_info *mi = strmap_get(paths, path); struct conflict_info *ci; if (!mi) return 0; INITIALIZE_CI(ci, mi); - return mi->clean || (side_mask & (ci->filemask | ci->dirmask)); + return mi->clean || (side_mask & (ci->filemask | ci->dirmask)) + /* See testcases 12[npq] of t6423 for this next condition */ + || ((ci->filemask & 0x01) && + strcmp(p->one->path, path)); } /* @@ -2337,6 +2343,7 @@ static int path_in_way(struct strmap *paths, const char *path, unsigned side_mas static char *handle_path_level_conflicts(struct merge_options *opt, const char *path, unsigned side_index, + struct diff_filepair *p, struct strmap_entry *rename_info, struct strmap *collisions) { @@ -2371,7 +2378,7 @@ static char *handle_path_level_conflicts(struct merge_options *opt, */ if (c_info->reported_already) { clean = 0; - } else if (path_in_way(&opt->priv->paths, new_path, 1 << side_index)) { + } else if (path_in_way(&opt->priv->paths, new_path, 1 << side_index, p)) { c_info->reported_already = 1; strbuf_add_separated_string_list(&collision_paths, ", ", &c_info->source_files); @@ -2578,6 +2585,7 @@ static void free_collisions(struct strmap *collisions) static char *check_for_directory_rename(struct merge_options *opt, const char *path, unsigned side_index, + struct diff_filepair *p, struct strmap *dir_renames, struct strmap *dir_rename_exclusions, struct strmap *collisions, @@ -2634,7 +2642,7 @@ static char *check_for_directory_rename(struct merge_options *opt, return NULL; } - new_path = handle_path_level_conflicts(opt, path, side_index, + new_path = handle_path_level_conflicts(opt, path, side_index, p, rename_info, &collisions[side_index]); *clean_merge &= (new_path != NULL); @@ -3436,7 +3444,7 @@ static int collect_renames(struct merge_options *opt, } new_path = check_for_directory_rename(opt, p->two->path, - side_index, + side_index, p, dir_renames_for_side, rename_exclusions, collisions, diff --git a/t/t6423-merge-rename-directories.sh b/t/t6423-merge-rename-directories.sh index 49eb10392bed62..533ac85dc83409 100755 --- a/t/t6423-merge-rename-directories.sh +++ b/t/t6423-merge-rename-directories.sh @@ -4759,10 +4759,29 @@ test_expect_success '12i: Directory rename causes rename-to-self' ' git checkout A^0 && - test_must_fail git -c merge.directoryRenames=conflict merge -s recursive B^0 && - - test_path_is_missing source/subdir && - test_path_is_file source/bar && + # NOTE: A potentially better resolution would be for + # source/bar -> source/subdir/bar + # to use the directory rename to become + # source/bar -> source/bar + # (a rename to self), and thus we end up with bar with + # a path conflict (given merge.directoryRenames=conflict). + # However, since the relevant renames optimization + # prevents us from noticing + # source/bar -> source/subdir/bar + # as a rename and looking at it just as + # delete source/bar + # add source/subdir/bar + # the directory rename of source/subdir/bar -> source/bar does + # not look like a rename-to-self situation but a + # rename-on-top-of-other-file situation. We do not want + # stage 1 entries from an unrelated file, so we expect an + # error about there being a file in the way. + + test_must_fail git -c merge.directoryRenames=conflict merge -s recursive B^0 >out && + + grep "CONFLICT (implicit dir rename).*source/bar in the way" out && + test_path_is_missing source/bar && + test_path_is_file source/subdir/bar && test_path_is_file source/baz && git ls-files >actual && @@ -4771,8 +4790,8 @@ test_expect_success '12i: Directory rename causes rename-to-self' ' git status --porcelain -uno >actual && cat >expect <<-\EOF && - UU source/bar M source/baz + R source/bar -> source/subdir/bar EOF test_cmp expect actual ) @@ -4882,10 +4901,29 @@ test_expect_success '12j: Directory rename to root causes rename-to-self' ' git checkout A^0 && - test_must_fail git -c merge.directoryRenames=conflict merge -s recursive B^0 && - - test_path_is_missing subdir && - test_path_is_file bar && + # NOTE: A potentially better resolution would be for + # bar -> subdir/bar + # to use the directory rename to become + # bar -> bar + # (a rename to self), and thus we end up with bar with + # a path conflict (given merge.directoryRenames=conflict). + # However, since the relevant renames optimization + # prevents us from noticing + # bar -> subdir/bar + # as a rename and looking at it just as + # delete bar + # add subdir/bar + # the directory rename of subdir/bar -> bar does not look + # like a rename-to-self situation but a + # rename-on-top-of-other-file situation. We do not want + # stage 1 entries from an unrelated file, so we expect an + # error about there being a file in the way. + + test_must_fail git -c merge.directoryRenames=conflict merge -s recursive B^0 >out && + grep "CONFLICT (implicit dir rename).*bar in the way" out && + + test_path_is_missing bar && + test_path_is_file subdir/bar && test_path_is_file baz && git ls-files >actual && @@ -4894,8 +4932,8 @@ test_expect_success '12j: Directory rename to root causes rename-to-self' ' git status --porcelain -uno >actual && cat >expect <<-\EOF && - UU bar M baz + R bar -> subdir/bar EOF test_cmp expect actual ) @@ -4942,10 +4980,29 @@ test_expect_success '12k: Directory rename with sibling causes rename-to-self' ' git checkout A^0 && - test_must_fail git -c merge.directoryRenames=conflict merge -s recursive B^0 && - - test_path_is_missing dirB && - test_path_is_file dirA/bar && + # NOTE: A potentially better resolution would be for + # dirA/bar -> dirB/bar + # to use the directory rename (dirB/ -> dirA/) to become + # dirA/bar -> dirA/bar + # (a rename to self), and thus we end up with bar with + # a path conflict (given merge.directoryRenames=conflict). + # However, since the relevant renames optimization + # prevents us from noticing + # dirA/bar -> dirB/bar + # as a rename and looking at it just as + # delete dirA/bar + # add dirB/bar + # the directory rename of dirA/bar -> dirB/bar does + # not look like a rename-to-self situation but a + # rename-on-top-of-other-file situation. We do not want + # stage 1 entries from an unrelated file, so we expect an + # error about there being a file in the way. + + test_must_fail git -c merge.directoryRenames=conflict merge -s recursive B^0 >out && + grep "CONFLICT (implicit dir rename).*dirA/bar in the way" out && + + test_path_is_missing dirA/bar && + test_path_is_file dirB/bar && test_path_is_file dirA/baz && git ls-files >actual && @@ -4954,8 +5011,8 @@ test_expect_success '12k: Directory rename with sibling causes rename-to-self' ' git status --porcelain -uno >actual && cat >expect <<-\EOF && - UU dirA/bar M dirA/baz + R dirA/bar -> dirB/bar EOF test_cmp expect actual ) @@ -5258,6 +5315,276 @@ test_expect_success '12n2: Directory rename transitively makes rename back to se ) ' +# Testcase 12o, Directory rename hits other rename source; file still in way on same side +# Commit O: A/file1_1 +# A/stuff +# B/file1_2 +# B/stuff +# C/other +# Commit A: A/file1_1 +# A/stuff +# B/stuff +# C/file1_2 +# C/other +# Commit B: D/file2_1 +# A/stuff +# B/file1_2 +# B/stuff +# A/other +# In words: +# A: rename B/file1_2 -> C/file1_2 +# B: rename C/ -> A/ +# rename A/file1_1 -> D/file2_1 +# Rationale: +# A/stuff is unmodified, it shows up in final output +# B/stuff is unmodified, it shows up in final output +# C/other touched on one side (rename to A), so A/other shows up in output +# A/file1 is renamed to D/file2 +# B/file1 -> C/file1 and even though C/ -> A/, A/file1 is +# "in the way" so we don't do the directory rename +# Expected: A/stuff +# B/stuff +# A/other +# D/file2 +# C/file1 +# + CONFLICT (implicit dir rename): A/file1 in way of C/file1 +# + +test_setup_12o () { + git init 12o && + ( + cd 12o && + + mkdir -p A B C && + echo 1 >A/file1 && + echo 2 >B/file1 && + echo other >C/other && + echo Astuff >A/stuff && + echo Bstuff >B/stuff && + git add . && + git commit -m "O" && + + git branch O && + git branch A && + git branch B && + + git switch A && + git mv B/file1 C/ && + git add . && + git commit -m "A" && + + git switch B && + mkdir -p D && + git mv A/file1 D/file2 && + git mv C/other A/other && + git add . && + git commit -m "B" + ) +} + +test_expect_success '12o: Directory rename hits other rename source; file still in way on same side' ' + test_setup_12o && + ( + cd 12o && + + git checkout -q A^0 && + + test_must_fail git -c merge.directoryRenames=conflict merge -s recursive B^0 >out && + + test_stdout_line_count = 5 git ls-files -s && + test_stdout_line_count = 1 git ls-files -s A/other && + test_stdout_line_count = 1 git ls-files -s A/stuff && + test_stdout_line_count = 1 git ls-files -s B/stuff && + test_stdout_line_count = 1 git ls-files -s D/file2 && + + grep "CONFLICT (implicit dir rename).*Existing file/dir at A/file1 in the way" out && + test_stdout_line_count = 1 git ls-files -s C/file1 + ) +' + +# Testcase 12p, Directory rename hits other rename source; file still in way on other side +# Commit O: A/file1_1 +# A/stuff +# B/file1_2 +# B/stuff +# C/other +# Commit A: D/file2_1 +# A/stuff +# B/stuff +# C/file1_2 +# C/other +# Commit B: A/file1_1 +# A/stuff +# B/file1_2 +# B/stuff +# A/other +# Short version: +# A: rename A/file1_1 -> D/file2_1 +# rename B/file1_2 -> C/file1_2 +# B: Rename C/ -> A/ +# Rationale: +# A/stuff is unmodified, it shows up in final output +# B/stuff is unmodified, it shows up in final output +# C/other touched on one side (rename to A), so A/other shows up in output +# A/file1 is renamed to D/file2 +# B/file1 -> C/file1 and even though C/ -> A/, A/file1 is +# "in the way" so we don't do the directory rename +# Expected: A/stuff +# B/stuff +# A/other +# D/file2 +# C/file1 +# + CONFLICT (implicit dir rename): A/file1 in way of C/file1 +# + +test_setup_12p () { + git init 12p && + ( + cd 12p && + + mkdir -p A B C && + echo 1 >A/file1 && + echo 2 >B/file1 && + echo other >C/other && + echo Astuff >A/stuff && + echo Bstuff >B/stuff && + git add . && + git commit -m "O" && + + git branch O && + git branch A && + git branch B && + + git switch A && + mkdir -p D && + git mv A/file1 D/file2 && + git mv B/file1 C/ && + git add . && + git commit -m "A" && + + git switch B && + git mv C/other A/other && + git add . && + git commit -m "B" + ) +} + +test_expect_success '12p: Directory rename hits other rename source; file still in way on other side' ' + test_setup_12p && + ( + cd 12p && + + git checkout -q A^0 && + + test_must_fail git -c merge.directoryRenames=conflict merge -s recursive B^0 >out && + + test_stdout_line_count = 5 git ls-files -s && + test_stdout_line_count = 1 git ls-files -s A/other && + test_stdout_line_count = 1 git ls-files -s A/stuff && + test_stdout_line_count = 1 git ls-files -s B/stuff && + test_stdout_line_count = 1 git ls-files -s D/file2 && + + grep "CONFLICT (implicit dir rename).*Existing file/dir at A/file1 in the way" out && + test_stdout_line_count = 1 git ls-files -s C/file1 + ) +' + +# Testcase 12q, Directory rename hits other rename source; file removed though +# Commit O: A/file1_1 +# A/stuff +# B/file1_2 +# B/stuff +# C/other +# Commit A: A/stuff +# B/stuff +# C/file1_2 +# C/other +# Commit B: D/file2_1 +# A/stuff +# B/file1_2 +# B/stuff +# A/other +# In words: +# A: delete A/file1_1, rename B/file1_2 -> C/file1_2 +# B: Rename C/ -> A/, rename A/file1_1 -> D/file2_1 +# Rationale: +# A/stuff is unmodified, it shows up in final output +# B/stuff is unmodified, it shows up in final output +# C/other touched on one side (rename to A), so A/other shows up in output +# A/file1 is rename/delete to D/file2, so two stages for D/file2 +# B/file1 -> C/file1 and even though C/ -> A/, A/file1 as a source was +# "in the way" (ish) so we don't do the directory rename +# Expected: A/stuff +# B/stuff +# A/other +# D/file2 (two stages) +# C/file1 +# + CONFLICT (implicit dir rename): A/file1 in way of C/file1 +# + CONFLICT (rename/delete): D/file2 +# + +test_setup_12q () { + git init 12q && + ( + cd 12q && + + mkdir -p A B C && + echo 1 >A/file1 && + echo 2 >B/file1 && + echo other >C/other && + echo Astuff >A/stuff && + echo Bstuff >B/stuff && + git add . && + git commit -m "O" && + + git branch O && + git branch A && + git branch B && + + git switch A && + git rm A/file1 && + git mv B/file1 C/ && + git add . && + git commit -m "A" && + + git switch B && + mkdir -p D && + git mv A/file1 D/file2 && + git mv C/other A/other && + git add . && + git commit -m "B" + ) +} + +test_expect_success '12q: Directory rename hits other rename source; file removed though' ' + test_setup_12q && + ( + cd 12q && + + git checkout -q A^0 && + + test_must_fail git -c merge.directoryRenames=conflict merge -s recursive B^0 >out && + + grep "CONFLICT (rename/delete).*A/file1.*D/file2" out && + grep "CONFLICT (implicit dir rename).*Existing file/dir at A/file1 in the way" out && + + test_stdout_line_count = 6 git ls-files -s && + test_stdout_line_count = 1 git ls-files -s A/other && + test_stdout_line_count = 1 git ls-files -s A/stuff && + test_stdout_line_count = 1 git ls-files -s B/stuff && + test_stdout_line_count = 2 git ls-files -s D/file2 && + + # This is a slightly suboptimal resolution; allowing the + # rename of C/ -> A/ to affect C/file1 and further rename + # it to A/file1 would probably be preferable, but since + # A/file1 existed as the source of another rename, allowing + # the dir rename of C/file1 -> A/file1 would mean modifying + # the code so that renames do not adjust both their source + # and target paths in all cases. + ! grep "CONFLICT (file location)" out && + test_stdout_line_count = 1 git ls-files -s C/file1 + ) +' ########################################################################### # SECTION 13: Checking informational and conflict messages