Skip to content

Commit d9c66f0

Browse files
dschogitster
authored andcommitted
range-diff: first rudimentary implementation
At this stage, `git range-diff` can determine corresponding commits of two related commit ranges. This makes use of the recently introduced implementation of the linear assignment algorithm. The core of this patch is a straight port of the ideas of tbdiff, the apparently dormant project at https://github.com/trast/tbdiff. The output does not at all match `tbdiff`'s output yet, as this patch really concentrates on getting the patch matching part right. Note: due to differences in the diff algorithm (`tbdiff` uses the Python module `difflib`, Git uses its xdiff fork), the cost matrix calculated by `range-diff` is different (but very similar) to the one calculated by `tbdiff`. Therefore, it is possible that they find different matching commits in corner cases (e.g. when a patch was split into two patches of roughly equal length). Signed-off-by: Johannes Schindelin <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 348ae56 commit d9c66f0

File tree

4 files changed

+363
-1
lines changed

4 files changed

+363
-1
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,7 @@ LIB_OBJS += progress.o
925925
LIB_OBJS += prompt.o
926926
LIB_OBJS += protocol.o
927927
LIB_OBJS += quote.o
928+
LIB_OBJS += range-diff.o
928929
LIB_OBJS += reachable.o
929930
LIB_OBJS += read-cache.o
930931
LIB_OBJS += reflog-walk.o

builtin/range-diff.c

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "cache.h"
22
#include "builtin.h"
33
#include "parse-options.h"
4+
#include "range-diff.h"
45

56
static const char * const builtin_range_diff_usage[] = {
67
N_("git range-diff [<options>] <old-base>..<old-tip> <new-base>..<new-tip>"),
@@ -17,9 +18,51 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix)
1718
N_("Percentage by which creation is weighted")),
1819
OPT_END()
1920
};
21+
int res = 0;
22+
struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT;
2023

2124
argc = parse_options(argc, argv, NULL, options,
2225
builtin_range_diff_usage, 0);
2326

24-
return 0;
27+
if (argc == 2) {
28+
if (!strstr(argv[0], ".."))
29+
die(_("no .. in range: '%s'"), argv[0]);
30+
strbuf_addstr(&range1, argv[0]);
31+
32+
if (!strstr(argv[1], ".."))
33+
die(_("no .. in range: '%s'"), argv[1]);
34+
strbuf_addstr(&range2, argv[1]);
35+
} else if (argc == 3) {
36+
strbuf_addf(&range1, "%s..%s", argv[0], argv[1]);
37+
strbuf_addf(&range2, "%s..%s", argv[0], argv[2]);
38+
} else if (argc == 1) {
39+
const char *b = strstr(argv[0], "..."), *a = argv[0];
40+
int a_len;
41+
42+
if (!b) {
43+
error(_("single arg format must be symmetric range"));
44+
usage_with_options(builtin_range_diff_usage, options);
45+
}
46+
47+
a_len = (int)(b - a);
48+
if (!a_len) {
49+
a = "HEAD";
50+
a_len = strlen(a);
51+
}
52+
b += 3;
53+
if (!*b)
54+
b = "HEAD";
55+
strbuf_addf(&range1, "%s..%.*s", b, a_len, a);
56+
strbuf_addf(&range2, "%.*s..%s", a_len, a, b);
57+
} else {
58+
error(_("need two commit ranges"));
59+
usage_with_options(builtin_range_diff_usage, options);
60+
}
61+
62+
res = show_range_diff(range1.buf, range2.buf, creation_factor);
63+
64+
strbuf_release(&range1);
65+
strbuf_release(&range2);
66+
67+
return res;
2568
}

range-diff.c

Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
#include "cache.h"
2+
#include "range-diff.h"
3+
#include "string-list.h"
4+
#include "run-command.h"
5+
#include "argv-array.h"
6+
#include "hashmap.h"
7+
#include "xdiff-interface.h"
8+
#include "linear-assignment.h"
9+
10+
struct patch_util {
11+
/* For the search for an exact match */
12+
struct hashmap_entry e;
13+
const char *diff, *patch;
14+
15+
int i;
16+
int diffsize;
17+
size_t diff_offset;
18+
/* the index of the matching item in the other branch, or -1 */
19+
int matching;
20+
struct object_id oid;
21+
};
22+
23+
/*
24+
* Reads the patches into a string list, with the `util` field being populated
25+
* as struct object_id (will need to be free()d).
26+
*/
27+
static int read_patches(const char *range, struct string_list *list)
28+
{
29+
struct child_process cp = CHILD_PROCESS_INIT;
30+
FILE *in;
31+
struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT;
32+
struct patch_util *util = NULL;
33+
int in_header = 1;
34+
35+
argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges",
36+
"--reverse", "--date-order", "--decorate=no",
37+
"--no-abbrev-commit", range,
38+
NULL);
39+
cp.out = -1;
40+
cp.no_stdin = 1;
41+
cp.git_cmd = 1;
42+
43+
if (start_command(&cp))
44+
return error_errno(_("could not start `log`"));
45+
in = fdopen(cp.out, "r");
46+
if (!in) {
47+
error_errno(_("could not read `log` output"));
48+
finish_command(&cp);
49+
return -1;
50+
}
51+
52+
while (strbuf_getline(&line, in) != EOF) {
53+
const char *p;
54+
55+
if (skip_prefix(line.buf, "commit ", &p)) {
56+
if (util) {
57+
string_list_append(list, buf.buf)->util = util;
58+
strbuf_reset(&buf);
59+
}
60+
util = xcalloc(sizeof(*util), 1);
61+
if (get_oid(p, &util->oid)) {
62+
error(_("could not parse commit '%s'"), p);
63+
free(util);
64+
string_list_clear(list, 1);
65+
strbuf_release(&buf);
66+
strbuf_release(&line);
67+
fclose(in);
68+
finish_command(&cp);
69+
return -1;
70+
}
71+
util->matching = -1;
72+
in_header = 1;
73+
continue;
74+
}
75+
76+
if (starts_with(line.buf, "diff --git")) {
77+
in_header = 0;
78+
strbuf_addch(&buf, '\n');
79+
if (!util->diff_offset)
80+
util->diff_offset = buf.len;
81+
strbuf_addbuf(&buf, &line);
82+
} else if (in_header) {
83+
if (starts_with(line.buf, "Author: ")) {
84+
strbuf_addbuf(&buf, &line);
85+
strbuf_addstr(&buf, "\n\n");
86+
} else if (starts_with(line.buf, " ")) {
87+
strbuf_addbuf(&buf, &line);
88+
strbuf_addch(&buf, '\n');
89+
}
90+
continue;
91+
} else if (starts_with(line.buf, "@@ "))
92+
strbuf_addstr(&buf, "@@");
93+
else if (!line.buf[0] || starts_with(line.buf, "index "))
94+
/*
95+
* A completely blank (not ' \n', which is context)
96+
* line is not valid in a diff. We skip it
97+
* silently, because this neatly handles the blank
98+
* separator line between commits in git-log
99+
* output.
100+
*
101+
* We also want to ignore the diff's `index` lines
102+
* because they contain exact blob hashes in which
103+
* we are not interested.
104+
*/
105+
continue;
106+
else
107+
strbuf_addbuf(&buf, &line);
108+
109+
strbuf_addch(&buf, '\n');
110+
util->diffsize++;
111+
}
112+
fclose(in);
113+
strbuf_release(&line);
114+
115+
if (util)
116+
string_list_append(list, buf.buf)->util = util;
117+
strbuf_release(&buf);
118+
119+
if (finish_command(&cp))
120+
return -1;
121+
122+
return 0;
123+
}
124+
125+
static int patch_util_cmp(const void *dummy, const struct patch_util *a,
126+
const struct patch_util *b, const char *keydata)
127+
{
128+
return strcmp(a->diff, keydata ? keydata : b->diff);
129+
}
130+
131+
static void find_exact_matches(struct string_list *a, struct string_list *b)
132+
{
133+
struct hashmap map;
134+
int i;
135+
136+
hashmap_init(&map, (hashmap_cmp_fn)patch_util_cmp, NULL, 0);
137+
138+
/* First, add the patches of a to a hash map */
139+
for (i = 0; i < a->nr; i++) {
140+
struct patch_util *util = a->items[i].util;
141+
142+
util->i = i;
143+
util->patch = a->items[i].string;
144+
util->diff = util->patch + util->diff_offset;
145+
hashmap_entry_init(util, strhash(util->diff));
146+
hashmap_add(&map, util);
147+
}
148+
149+
/* Now try to find exact matches in b */
150+
for (i = 0; i < b->nr; i++) {
151+
struct patch_util *util = b->items[i].util, *other;
152+
153+
util->i = i;
154+
util->patch = b->items[i].string;
155+
util->diff = util->patch + util->diff_offset;
156+
hashmap_entry_init(util, strhash(util->diff));
157+
other = hashmap_remove(&map, util, NULL);
158+
if (other) {
159+
if (other->matching >= 0)
160+
BUG("already assigned!");
161+
162+
other->matching = i;
163+
util->matching = other->i;
164+
}
165+
}
166+
167+
hashmap_free(&map, 0);
168+
}
169+
170+
static void diffsize_consume(void *data, char *line, unsigned long len)
171+
{
172+
(*(int *)data)++;
173+
}
174+
175+
static int diffsize(const char *a, const char *b)
176+
{
177+
xpparam_t pp = { 0 };
178+
xdemitconf_t cfg = { 0 };
179+
mmfile_t mf1, mf2;
180+
int count = 0;
181+
182+
mf1.ptr = (char *)a;
183+
mf1.size = strlen(a);
184+
mf2.ptr = (char *)b;
185+
mf2.size = strlen(b);
186+
187+
cfg.ctxlen = 3;
188+
if (!xdi_diff_outf(&mf1, &mf2, diffsize_consume, &count, &pp, &cfg))
189+
return count;
190+
191+
error(_("failed to generate diff"));
192+
return COST_MAX;
193+
}
194+
195+
static void get_correspondences(struct string_list *a, struct string_list *b,
196+
int creation_factor)
197+
{
198+
int n = a->nr + b->nr;
199+
int *cost, c, *a2b, *b2a;
200+
int i, j;
201+
202+
ALLOC_ARRAY(cost, st_mult(n, n));
203+
ALLOC_ARRAY(a2b, n);
204+
ALLOC_ARRAY(b2a, n);
205+
206+
for (i = 0; i < a->nr; i++) {
207+
struct patch_util *a_util = a->items[i].util;
208+
209+
for (j = 0; j < b->nr; j++) {
210+
struct patch_util *b_util = b->items[j].util;
211+
212+
if (a_util->matching == j)
213+
c = 0;
214+
else if (a_util->matching < 0 && b_util->matching < 0)
215+
c = diffsize(a_util->diff, b_util->diff);
216+
else
217+
c = COST_MAX;
218+
cost[i + n * j] = c;
219+
}
220+
221+
c = a_util->matching < 0 ?
222+
a_util->diffsize * creation_factor / 100 : COST_MAX;
223+
for (j = b->nr; j < n; j++)
224+
cost[i + n * j] = c;
225+
}
226+
227+
for (j = 0; j < b->nr; j++) {
228+
struct patch_util *util = b->items[j].util;
229+
230+
c = util->matching < 0 ?
231+
util->diffsize * creation_factor / 100 : COST_MAX;
232+
for (i = a->nr; i < n; i++)
233+
cost[i + n * j] = c;
234+
}
235+
236+
for (i = a->nr; i < n; i++)
237+
for (j = b->nr; j < n; j++)
238+
cost[i + n * j] = 0;
239+
240+
compute_assignment(n, n, cost, a2b, b2a);
241+
242+
for (i = 0; i < a->nr; i++)
243+
if (a2b[i] >= 0 && a2b[i] < b->nr) {
244+
struct patch_util *a_util = a->items[i].util;
245+
struct patch_util *b_util = b->items[a2b[i]].util;
246+
247+
a_util->matching = a2b[i];
248+
b_util->matching = i;
249+
}
250+
251+
free(cost);
252+
free(a2b);
253+
free(b2a);
254+
}
255+
256+
static const char *short_oid(struct patch_util *util)
257+
{
258+
return find_unique_abbrev(&util->oid, DEFAULT_ABBREV);
259+
}
260+
261+
static void output(struct string_list *a, struct string_list *b)
262+
{
263+
int i;
264+
265+
for (i = 0; i < b->nr; i++) {
266+
struct patch_util *util = b->items[i].util, *prev;
267+
268+
if (util->matching < 0)
269+
printf("-: -------- > %d: %s\n",
270+
i + 1, short_oid(util));
271+
else {
272+
prev = a->items[util->matching].util;
273+
printf("%d: %s ! %d: %s\n",
274+
util->matching + 1, short_oid(prev),
275+
i + 1, short_oid(util));
276+
}
277+
}
278+
279+
for (i = 0; i < a->nr; i++) {
280+
struct patch_util *util = a->items[i].util;
281+
282+
if (util->matching < 0)
283+
printf("%d: %s < -: --------\n",
284+
i + 1, short_oid(util));
285+
}
286+
}
287+
288+
int show_range_diff(const char *range1, const char *range2,
289+
int creation_factor)
290+
{
291+
int res = 0;
292+
293+
struct string_list branch1 = STRING_LIST_INIT_DUP;
294+
struct string_list branch2 = STRING_LIST_INIT_DUP;
295+
296+
if (read_patches(range1, &branch1))
297+
res = error(_("could not parse log for '%s'"), range1);
298+
if (!res && read_patches(range2, &branch2))
299+
res = error(_("could not parse log for '%s'"), range2);
300+
301+
if (!res) {
302+
find_exact_matches(&branch1, &branch2);
303+
get_correspondences(&branch1, &branch2, creation_factor);
304+
output(&branch1, &branch2);
305+
}
306+
307+
string_list_clear(&branch1, 1);
308+
string_list_clear(&branch2, 1);
309+
310+
return res;
311+
}

range-diff.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#ifndef RANGE_DIFF_H
2+
#define RANGE_DIFF_H
3+
4+
int show_range_diff(const char *range1, const char *range2,
5+
int creation_factor);
6+
7+
#endif

0 commit comments

Comments
 (0)