Skip to content

Commit d7c6c23

Browse files
committed
Merge branch 'jt/diff-anchored-patience'
"git diff" learned a variant of the "--patience" algorithm, to which the user can specify which 'unique' line to be used as anchoring points. * jt/diff-anchored-patience: diff: support anchoring line(s)
2 parents 6d2c461 + 2477ab2 commit d7c6c23

File tree

6 files changed

+169
-7
lines changed

6 files changed

+169
-7
lines changed

Documentation/diff-options.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,16 @@ endif::git-format-patch[]
8080
--histogram::
8181
Generate a diff using the "histogram diff" algorithm.
8282

83+
--anchored=<text>::
84+
Generate a diff using the "anchored diff" algorithm.
85+
+
86+
This option may be specified more than once.
87+
+
88+
If a line exists in both the source and destination, exists only once,
89+
and starts with this text, this algorithm attempts to prevent it from
90+
appearing as a deletion or addition in the output. It uses the "patience
91+
diff" algorithm internally.
92+
8393
--diff-algorithm={patience|minimal|histogram|myers}::
8494
Choose a diff algorithm. The variants are as follows:
8595
+

diff.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3210,6 +3210,8 @@ static void builtin_diff(const char *name_a,
32103210
ecbdata.opt = o;
32113211
ecbdata.header = header.len ? &header : NULL;
32123212
xpp.flags = o->xdl_opts;
3213+
xpp.anchors = o->anchors;
3214+
xpp.anchors_nr = o->anchors_nr;
32133215
xecfg.ctxlen = o->context;
32143216
xecfg.interhunkctxlen = o->interhunkcontext;
32153217
xecfg.flags = XDL_EMIT_FUNCNAMES;
@@ -3302,6 +3304,8 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
33023304
memset(&xpp, 0, sizeof(xpp));
33033305
memset(&xecfg, 0, sizeof(xecfg));
33043306
xpp.flags = o->xdl_opts;
3307+
xpp.anchors = o->anchors;
3308+
xpp.anchors_nr = o->anchors_nr;
33053309
xecfg.ctxlen = o->context;
33063310
xecfg.interhunkctxlen = o->interhunkcontext;
33073311
if (xdi_diff_outf(&mf1, &mf2, diffstat_consume, diffstat,
@@ -4594,9 +4598,18 @@ int diff_opt_parse(struct diff_options *options,
45944598
DIFF_XDL_SET(options, INDENT_HEURISTIC);
45954599
else if (!strcmp(arg, "--no-indent-heuristic"))
45964600
DIFF_XDL_CLR(options, INDENT_HEURISTIC);
4597-
else if (!strcmp(arg, "--patience"))
4601+
else if (!strcmp(arg, "--patience")) {
4602+
int i;
45984603
options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
4599-
else if (!strcmp(arg, "--histogram"))
4604+
/*
4605+
* Both --patience and --anchored use PATIENCE_DIFF
4606+
* internally, so remove any anchors previously
4607+
* specified.
4608+
*/
4609+
for (i = 0; i < options->anchors_nr; i++)
4610+
free(options->anchors[i]);
4611+
options->anchors_nr = 0;
4612+
} else if (!strcmp(arg, "--histogram"))
46004613
options->xdl_opts = DIFF_WITH_ALG(options, HISTOGRAM_DIFF);
46014614
else if ((argcount = parse_long_opt("diff-algorithm", av, &optarg))) {
46024615
long value = parse_algorithm_value(optarg);
@@ -4608,6 +4621,11 @@ int diff_opt_parse(struct diff_options *options,
46084621
options->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
46094622
options->xdl_opts |= value;
46104623
return argcount;
4624+
} else if (skip_prefix(arg, "--anchored=", &arg)) {
4625+
options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
4626+
ALLOC_GROW(options->anchors, options->anchors_nr + 1,
4627+
options->anchors_alloc);
4628+
options->anchors[options->anchors_nr++] = xstrdup(arg);
46114629
}
46124630

46134631
/* flags options */

diff.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,10 @@ struct diff_options {
166166
const char *stat_sep;
167167
long xdl_opts;
168168

169+
/* see Documentation/diff-options.txt */
170+
char **anchors;
171+
size_t anchors_nr, anchors_alloc;
172+
169173
int stat_width;
170174
int stat_name_width;
171175
int stat_graph_width;

t/t4065-diff-anchored.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/bin/sh
2+
3+
test_description='anchored diff algorithm'
4+
5+
. ./test-lib.sh
6+
7+
test_expect_success '--anchored' '
8+
printf "a\nb\nc\n" >pre &&
9+
printf "c\na\nb\n" >post &&
10+
11+
# normally, c is moved to produce the smallest diff
12+
test_expect_code 1 git diff --no-index pre post >diff &&
13+
grep "^+c" diff &&
14+
15+
# with anchor, a is moved
16+
test_expect_code 1 git diff --no-index --anchored=c pre post >diff &&
17+
grep "^+a" diff
18+
'
19+
20+
test_expect_success '--anchored multiple' '
21+
printf "a\nb\nc\nd\ne\nf\n" >pre &&
22+
printf "c\na\nb\nf\nd\ne\n" >post &&
23+
24+
# with 1 anchor, c is not moved, but f is moved
25+
test_expect_code 1 git diff --no-index --anchored=c pre post >diff &&
26+
grep "^+a" diff && # a is moved instead of c
27+
grep "^+f" diff &&
28+
29+
# with 2 anchors, c and f are not moved
30+
test_expect_code 1 git diff --no-index --anchored=c --anchored=f pre post >diff &&
31+
grep "^+a" diff &&
32+
grep "^+d" diff # d is moved instead of f
33+
'
34+
35+
test_expect_success '--anchored with nonexistent line has no effect' '
36+
printf "a\nb\nc\n" >pre &&
37+
printf "c\na\nb\n" >post &&
38+
39+
test_expect_code 1 git diff --no-index --anchored=x pre post >diff &&
40+
grep "^+c" diff
41+
'
42+
43+
test_expect_success '--anchored with non-unique line has no effect' '
44+
printf "a\nb\nc\nd\ne\nc\n" >pre &&
45+
printf "c\na\nb\nc\nd\ne\n" >post &&
46+
47+
test_expect_code 1 git diff --no-index --anchored=c pre post >diff &&
48+
grep "^+c" diff
49+
'
50+
51+
test_expect_success 'diff still produced with impossible multiple --anchored' '
52+
printf "a\nb\nc\n" >pre &&
53+
printf "c\na\nb\n" >post &&
54+
55+
test_expect_code 1 git diff --no-index --anchored=a --anchored=c pre post >diff &&
56+
mv post expected_post &&
57+
58+
# Ensure that the diff is correct by applying it and then
59+
# comparing the result with the original
60+
git apply diff &&
61+
diff expected_post post
62+
'
63+
64+
test_expect_success 'later algorithm arguments override earlier ones' '
65+
printf "a\nb\nc\n" >pre &&
66+
printf "c\na\nb\n" >post &&
67+
68+
test_expect_code 1 git diff --no-index --patience --anchored=c pre post >diff &&
69+
grep "^+a" diff &&
70+
71+
test_expect_code 1 git diff --no-index --anchored=c --patience pre post >diff &&
72+
grep "^+c" diff &&
73+
74+
test_expect_code 1 git diff --no-index --histogram --anchored=c pre post >diff &&
75+
grep "^+a" diff &&
76+
77+
test_expect_code 1 git diff --no-index --anchored=c --histogram pre post >diff &&
78+
grep "^+c" diff
79+
'
80+
81+
test_expect_success '--anchored works with other commands like "git show"' '
82+
printf "a\nb\nc\n" >file &&
83+
git add file &&
84+
git commit -m foo &&
85+
printf "c\na\nb\n" >file &&
86+
git add file &&
87+
git commit -m foo &&
88+
89+
# with anchor, a is moved
90+
git show --patience --anchored=c >diff &&
91+
grep "^+a" diff
92+
'
93+
94+
test_done

xdiff/xdiff.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ typedef struct s_mmbuffer {
8686

8787
typedef struct s_xpparam {
8888
unsigned long flags;
89+
90+
/* See Documentation/diff-options.txt. */
91+
char **anchors;
92+
size_t anchors_nr;
8993
} xpparam_t;
9094

9195
typedef struct s_xdemitcb {

xdiff/xpatience.c

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@ struct hashmap {
6262
* initially, "next" reflects only the order in file1.
6363
*/
6464
struct entry *next, *previous;
65+
66+
/*
67+
* If 1, this entry can serve as an anchor. See
68+
* Documentation/diff-options.txt for more information.
69+
*/
70+
unsigned anchor : 1;
6571
} *entries, *first, *last;
6672
/* were common records found? */
6773
unsigned long has_matches;
@@ -70,8 +76,19 @@ struct hashmap {
7076
xpparam_t const *xpp;
7177
};
7278

79+
static int is_anchor(xpparam_t const *xpp, const char *line)
80+
{
81+
int i;
82+
for (i = 0; i < xpp->anchors_nr; i++) {
83+
if (!strncmp(line, xpp->anchors[i], strlen(xpp->anchors[i])))
84+
return 1;
85+
}
86+
return 0;
87+
}
88+
7389
/* The argument "pass" is 1 for the first file, 2 for the second. */
74-
static void insert_record(int line, struct hashmap *map, int pass)
90+
static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
91+
int pass)
7592
{
7693
xrecord_t **records = pass == 1 ?
7794
map->env->xdf1.recs : map->env->xdf2.recs;
@@ -110,6 +127,7 @@ static void insert_record(int line, struct hashmap *map, int pass)
110127
return;
111128
map->entries[index].line1 = line;
112129
map->entries[index].hash = record->ha;
130+
map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1]->ptr);
113131
if (!map->first)
114132
map->first = map->entries + index;
115133
if (map->last) {
@@ -147,11 +165,11 @@ static int fill_hashmap(mmfile_t *file1, mmfile_t *file2,
147165

148166
/* First, fill with entries from the first file */
149167
while (count1--)
150-
insert_record(line1++, result, 1);
168+
insert_record(xpp, line1++, result, 1);
151169

152170
/* Then search for matches in the second file */
153171
while (count2--)
154-
insert_record(line2++, result, 2);
172+
insert_record(xpp, line2++, result, 2);
155173

156174
return 0;
157175
}
@@ -192,14 +210,28 @@ static struct entry *find_longest_common_sequence(struct hashmap *map)
192210
int longest = 0, i;
193211
struct entry *entry;
194212

213+
/*
214+
* If not -1, this entry in sequence must never be overridden.
215+
* Therefore, overriding entries before this has no effect, so
216+
* do not do that either.
217+
*/
218+
int anchor_i = -1;
219+
195220
for (entry = map->first; entry; entry = entry->next) {
196221
if (!entry->line2 || entry->line2 == NON_UNIQUE)
197222
continue;
198223
i = binary_search(sequence, longest, entry);
199224
entry->previous = i < 0 ? NULL : sequence[i];
200-
sequence[++i] = entry;
201-
if (i == longest)
225+
++i;
226+
if (i <= anchor_i)
227+
continue;
228+
sequence[i] = entry;
229+
if (entry->anchor) {
230+
anchor_i = i;
231+
longest = anchor_i + 1;
232+
} else if (i == longest) {
202233
longest++;
234+
}
203235
}
204236

205237
/* No common unique lines were found */

0 commit comments

Comments
 (0)