Skip to content

Commit 5c8b63d

Browse files
committed
Merge branch 'yc/histogram-hunk-shift-fix' into seen
The final clean-up phase of the diff output could turn the result of histogram diff algorithm suboptimal, which has been corrected. Comments? We do want a real review around here... cf. <CALnO6CC3WTBjaLR7yAr-w5eaqzyd2qF5MAyfV2wQY3+TDEbEsw@mail.gmail.com> * yc/histogram-hunk-shift-fix: xdiff: re-diff shifted change groups when using histogram algorithm
2 parents 57c4ac8 + 1e7247f commit 5c8b63d

File tree

3 files changed

+181
-0
lines changed

3 files changed

+181
-0
lines changed

t/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,7 @@ integration_tests = [
504504
't4071-diff-minimal.sh',
505505
't4072-diff-max-depth.sh',
506506
't4073-diff-stat-name-width.sh',
507+
't4074-diff-shifted-matched-group.sh',
507508
't4100-apply-stat.sh',
508509
't4101-apply-nonl.sh',
509510
't4102-apply-rename.sh',
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#!/bin/sh
2+
3+
test_description='shifted diff groups re-diffing during histogram diff'
4+
5+
. ./test-lib.sh
6+
7+
test_expect_success 'shifted diff group should re-diff to minimize patch' '
8+
test_write_lines A x A A A x A A A >file1 &&
9+
test_write_lines A x A Z A x A A A >file2 &&
10+
11+
file1_h=$(git rev-parse --short $(git hash-object file1)) &&
12+
file2_h=$(git rev-parse --short $(git hash-object file2)) &&
13+
14+
cat >expect <<-EOF &&
15+
diff --git a/file1 b/file2
16+
index $file1_h..$file2_h 100644
17+
--- a/file1
18+
+++ b/file2
19+
@@ -1,7 +1,7 @@
20+
A
21+
x
22+
A
23+
-A
24+
+Z
25+
A
26+
x
27+
A
28+
EOF
29+
30+
test_expect_code 1 git diff --no-index --histogram file1 file2 >output &&
31+
test_cmp expect output
32+
'
33+
34+
test_expect_success 're-diff should preserve diff flags' '
35+
test_write_lines a b c a b c >file1 &&
36+
test_write_lines x " b" z a b c >file2 &&
37+
38+
file1_h=$(git rev-parse --short $(git hash-object file1)) &&
39+
file2_h=$(git rev-parse --short $(git hash-object file2)) &&
40+
41+
cat >expect <<-EOF &&
42+
diff --git a/file1 b/file2
43+
index $file1_h..$file2_h 100644
44+
--- a/file1
45+
+++ b/file2
46+
@@ -1,6 +1,6 @@
47+
-a
48+
-b
49+
-c
50+
+x
51+
+ b
52+
+z
53+
a
54+
b
55+
c
56+
EOF
57+
58+
test_expect_code 1 git diff --no-index --histogram file1 file2 >output &&
59+
test_cmp expect output &&
60+
61+
cat >expect_iwhite <<-EOF &&
62+
diff --git a/file1 b/file2
63+
index $file1_h..$file2_h 100644
64+
--- a/file1
65+
+++ b/file2
66+
@@ -1,6 +1,6 @@
67+
-a
68+
+x
69+
b
70+
-c
71+
+z
72+
a
73+
b
74+
c
75+
EOF
76+
77+
test_expect_code 1 git diff --no-index --histogram --ignore-all-space file1 file2 >output_iwhite &&
78+
test_cmp expect_iwhite output_iwhite
79+
'
80+
81+
test_expect_success 'shifting on either side should trigger re-diff properly' '
82+
test_write_lines a b c a b c a b c >file1 &&
83+
test_write_lines a b c a1 a2 a3 b c1 a b c >file2 &&
84+
85+
file1_h=$(git rev-parse --short $(git hash-object file1)) &&
86+
file2_h=$(git rev-parse --short $(git hash-object file2)) &&
87+
88+
cat >expect1 <<-EOF &&
89+
diff --git a/file1 b/file2
90+
index $file1_h..$file2_h 100644
91+
--- a/file1
92+
+++ b/file2
93+
@@ -1,9 +1,11 @@
94+
a
95+
b
96+
c
97+
-a
98+
+a1
99+
+a2
100+
+a3
101+
b
102+
-c
103+
+c1
104+
a
105+
b
106+
c
107+
EOF
108+
109+
test_expect_code 1 git diff --no-index --histogram file1 file2 >output1 &&
110+
test_cmp expect1 output1 &&
111+
112+
cat >expect2 <<-EOF &&
113+
diff --git a/file2 b/file1
114+
index $file2_h..$file1_h 100644
115+
--- a/file2
116+
+++ b/file1
117+
@@ -1,11 +1,9 @@
118+
a
119+
b
120+
c
121+
-a1
122+
-a2
123+
-a3
124+
+a
125+
b
126+
-c1
127+
+c
128+
a
129+
b
130+
c
131+
EOF
132+
133+
test_expect_code 1 git diff --no-index --histogram file2 file1 >output2 &&
134+
test_cmp expect2 output2
135+
'
136+
137+
test_done

xdiff/xdiffi.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,7 @@ static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g)
983983
*/
984984
int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
985985
struct xdlgroup g, go;
986+
struct xdlgroup g_orig, go_orig;
986987
long earliest_end, end_matching_other;
987988
long groupsize;
988989

@@ -996,6 +997,9 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
996997
if (g.end == g.start)
997998
goto next;
998999

1000+
g_orig = g;
1001+
go_orig = go;
1002+
9991003
/*
10001004
* Now shift the change up and then down as far as possible in
10011005
* each direction. If it bumps into any other changes, merge
@@ -1105,6 +1109,45 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
11051109
}
11061110
}
11071111

1112+
/*
1113+
* If this has a matching group from the other file, it could
1114+
* either be the original match from the diff algorithm, or
1115+
* arrived at by shifting and joining groups. When it's the
1116+
* latter, it's possible for the two newly joined sides to have
1117+
* matching lines. Re-diff the group to mark these matching
1118+
* lines as unchanged and remove from the diff output.
1119+
*
1120+
* Only do this for histogram diff as its LCS algorithm makes
1121+
* this scenario possible. In contrast, patience diff finds LCS
1122+
* of unique lines that groups cannot be shifted across.
1123+
* Myer's diff (standalone or used as fall-back in patience
1124+
* diff) already finds minimal edits so it is not possible for
1125+
* shifted groups to result in a smaller diff. (Without
1126+
* XDF_NEED_MINIMAL, Myer's isn't technically guaranteed to be
1127+
* minimal, but it should be so most of the time)
1128+
*/
1129+
if (end_matching_other != -1 &&
1130+
XDF_DIFF_ALG(flags) == XDF_HISTOGRAM_DIFF &&
1131+
(g.start != g_orig.start ||
1132+
g.end != g_orig.end ||
1133+
go.start != go_orig.start ||
1134+
go.end != go_orig.end)) {
1135+
xpparam_t xpp;
1136+
xdfenv_t xe;
1137+
1138+
memset(&xpp, 0, sizeof(xpp));
1139+
xpp.flags = flags & ~XDF_DIFF_ALGORITHM_MASK;
1140+
1141+
memcpy(&xe.xdf1, xdf, sizeof(xdfile_t));
1142+
memcpy(&xe.xdf2, xdfo, sizeof(xdfile_t));
1143+
1144+
if (xdl_fall_back_diff(&xe, &xpp,
1145+
g.start + 1, g.end - g.start,
1146+
go.start + 1, go.end - go.start)) {
1147+
return -1;
1148+
}
1149+
}
1150+
11081151
next:
11091152
/* Move past the just-processed group: */
11101153
if (group_next(xdf, &g))

0 commit comments

Comments
 (0)