Skip to content

Commit a15696b

Browse files
committed
Merge branch 'ap/combine-diff-coalesce-lost'
Attempts to minimize "diff -c/--cc" output by coalescing the same lines removed from the parents better, but with an O(n^2) complexity. * ap/combine-diff-coalesce-lost: combine-diff: coalesce lost lines optimally
2 parents 0d2f94a + 99d3206 commit a15696b

File tree

2 files changed

+320
-64
lines changed

2 files changed

+320
-64
lines changed

combine-diff.c

Lines changed: 191 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,24 @@ static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr,
7474

7575
/* Lines lost from parent */
7676
struct lline {
77-
struct lline *next;
77+
struct lline *next, *prev;
7878
int len;
7979
unsigned long parent_map;
8080
char line[FLEX_ARRAY];
8181
};
8282

83+
/* Lines lost from current parent (before coalescing) */
84+
struct plost {
85+
struct lline *lost_head, *lost_tail;
86+
int len;
87+
};
88+
8389
/* Lines surviving in the merge result */
8490
struct sline {
85-
struct lline *lost_head, **lost_tail;
86-
struct lline *next_lost;
91+
/* Accumulated and coalesced lost lines */
92+
struct lline *lost;
93+
int lenlost;
94+
struct plost plost;
8795
char *bol;
8896
int len;
8997
/* bit 0 up to (N-1) are on if the parent has this line (i.e.
@@ -95,34 +103,6 @@ struct sline {
95103
unsigned long *p_lno;
96104
};
97105

98-
static char *grab_blob(const unsigned char *sha1, unsigned int mode,
99-
unsigned long *size, struct userdiff_driver *textconv,
100-
const char *path)
101-
{
102-
char *blob;
103-
enum object_type type;
104-
105-
if (S_ISGITLINK(mode)) {
106-
blob = xmalloc(100);
107-
*size = snprintf(blob, 100,
108-
"Subproject commit %s\n", sha1_to_hex(sha1));
109-
} else if (is_null_sha1(sha1)) {
110-
/* deleted blob */
111-
*size = 0;
112-
return xcalloc(1, 1);
113-
} else if (textconv) {
114-
struct diff_filespec *df = alloc_filespec(path);
115-
fill_filespec(df, sha1, 1, mode);
116-
*size = fill_textconv(textconv, df, &blob);
117-
free_filespec(df);
118-
} else {
119-
blob = read_sha1_file(sha1, &type, size);
120-
if (type != OBJ_BLOB)
121-
die("object '%s' is not a blob!", sha1_to_hex(sha1));
122-
}
123-
return blob;
124-
}
125-
126106
static int match_string_spaces(const char *line1, int len1,
127107
const char *line2, int len2,
128108
long flags)
@@ -163,36 +143,180 @@ static int match_string_spaces(const char *line1, int len1,
163143
return 0;
164144
}
165145

166-
static void append_lost(struct sline *sline, int n, const char *line, int len, long flags)
146+
enum coalesce_direction { MATCH, BASE, NEW };
147+
148+
/* Coalesce new lines into base by finding LCS */
149+
static struct lline *coalesce_lines(struct lline *base, int *lenbase,
150+
struct lline *new, int lennew,
151+
unsigned long parent, long flags)
167152
{
168-
struct lline *lline;
169-
unsigned long this_mask = (1UL<<n);
170-
if (line[len-1] == '\n')
171-
len--;
153+
int **lcs;
154+
enum coalesce_direction **directions;
155+
struct lline *baseend, *newend = NULL;
156+
int i, j, origbaselen = *lenbase;
172157

173-
/* Check to see if we can squash things */
174-
if (sline->lost_head) {
175-
lline = sline->next_lost;
176-
while (lline) {
177-
if (match_string_spaces(lline->line, lline->len,
178-
line, len, flags)) {
179-
lline->parent_map |= this_mask;
180-
sline->next_lost = lline->next;
181-
return;
158+
if (new == NULL)
159+
return base;
160+
161+
if (base == NULL) {
162+
*lenbase = lennew;
163+
return new;
164+
}
165+
166+
/*
167+
* Coalesce new lines into base by finding the LCS
168+
* - Create the table to run dynamic programing
169+
* - Compute the LCS
170+
* - Then reverse read the direction structure:
171+
* - If we have MATCH, assign parent to base flag, and consume
172+
* both baseend and newend
173+
* - Else if we have BASE, consume baseend
174+
* - Else if we have NEW, insert newend lline into base and
175+
* consume newend
176+
*/
177+
lcs = xcalloc(origbaselen + 1, sizeof(int*));
178+
directions = xcalloc(origbaselen + 1, sizeof(enum coalesce_direction*));
179+
for (i = 0; i < origbaselen + 1; i++) {
180+
lcs[i] = xcalloc(lennew + 1, sizeof(int));
181+
directions[i] = xcalloc(lennew + 1, sizeof(enum coalesce_direction));
182+
directions[i][0] = BASE;
183+
}
184+
for (j = 1; j < lennew + 1; j++)
185+
directions[0][j] = NEW;
186+
187+
for (i = 1, baseend = base; i < origbaselen + 1; i++) {
188+
for (j = 1, newend = new; j < lennew + 1; j++) {
189+
if (match_string_spaces(baseend->line, baseend->len,
190+
newend->line, newend->len, flags)) {
191+
lcs[i][j] = lcs[i - 1][j - 1] + 1;
192+
directions[i][j] = MATCH;
193+
} else if (lcs[i][j - 1] >= lcs[i - 1][j]) {
194+
lcs[i][j] = lcs[i][j - 1];
195+
directions[i][j] = NEW;
196+
} else {
197+
lcs[i][j] = lcs[i - 1][j];
198+
directions[i][j] = BASE;
199+
}
200+
if (newend->next)
201+
newend = newend->next;
202+
}
203+
if (baseend->next)
204+
baseend = baseend->next;
205+
}
206+
207+
for (i = 0; i < origbaselen + 1; i++)
208+
free(lcs[i]);
209+
free(lcs);
210+
211+
/* At this point, baseend and newend point to the end of each lists */
212+
i--;
213+
j--;
214+
while (i != 0 || j != 0) {
215+
if (directions[i][j] == MATCH) {
216+
baseend->parent_map |= 1<<parent;
217+
baseend = baseend->prev;
218+
newend = newend->prev;
219+
i--;
220+
j--;
221+
} else if (directions[i][j] == NEW) {
222+
struct lline *lline;
223+
224+
lline = newend;
225+
/* Remove lline from new list and update newend */
226+
if (lline->prev)
227+
lline->prev->next = lline->next;
228+
else
229+
new = lline->next;
230+
if (lline->next)
231+
lline->next->prev = lline->prev;
232+
233+
newend = lline->prev;
234+
j--;
235+
236+
/* Add lline to base list */
237+
if (baseend) {
238+
lline->next = baseend->next;
239+
lline->prev = baseend;
240+
if (lline->prev)
241+
lline->prev->next = lline;
182242
}
183-
lline = lline->next;
243+
else {
244+
lline->next = base;
245+
base = lline;
246+
}
247+
(*lenbase)++;
248+
249+
if (lline->next)
250+
lline->next->prev = lline;
251+
252+
} else {
253+
baseend = baseend->prev;
254+
i--;
184255
}
185256
}
186257

258+
newend = new;
259+
while (newend) {
260+
struct lline *lline = newend;
261+
newend = newend->next;
262+
free(lline);
263+
}
264+
265+
for (i = 0; i < origbaselen + 1; i++)
266+
free(directions[i]);
267+
free(directions);
268+
269+
return base;
270+
}
271+
272+
static char *grab_blob(const unsigned char *sha1, unsigned int mode,
273+
unsigned long *size, struct userdiff_driver *textconv,
274+
const char *path)
275+
{
276+
char *blob;
277+
enum object_type type;
278+
279+
if (S_ISGITLINK(mode)) {
280+
blob = xmalloc(100);
281+
*size = snprintf(blob, 100,
282+
"Subproject commit %s\n", sha1_to_hex(sha1));
283+
} else if (is_null_sha1(sha1)) {
284+
/* deleted blob */
285+
*size = 0;
286+
return xcalloc(1, 1);
287+
} else if (textconv) {
288+
struct diff_filespec *df = alloc_filespec(path);
289+
fill_filespec(df, sha1, 1, mode);
290+
*size = fill_textconv(textconv, df, &blob);
291+
free_filespec(df);
292+
} else {
293+
blob = read_sha1_file(sha1, &type, size);
294+
if (type != OBJ_BLOB)
295+
die("object '%s' is not a blob!", sha1_to_hex(sha1));
296+
}
297+
return blob;
298+
}
299+
300+
static void append_lost(struct sline *sline, int n, const char *line, int len)
301+
{
302+
struct lline *lline;
303+
unsigned long this_mask = (1UL<<n);
304+
if (line[len-1] == '\n')
305+
len--;
306+
187307
lline = xmalloc(sizeof(*lline) + len + 1);
188308
lline->len = len;
189309
lline->next = NULL;
310+
lline->prev = sline->plost.lost_tail;
311+
if (lline->prev)
312+
lline->prev->next = lline;
313+
else
314+
sline->plost.lost_head = lline;
315+
sline->plost.lost_tail = lline;
316+
sline->plost.len++;
190317
lline->parent_map = this_mask;
191318
memcpy(lline->line, line, len);
192319
lline->line[len] = 0;
193-
*sline->lost_tail = lline;
194-
sline->lost_tail = &lline->next;
195-
sline->next_lost = NULL;
196320
}
197321

198322
struct combine_diff_state {
@@ -203,7 +327,6 @@ struct combine_diff_state {
203327
int n;
204328
struct sline *sline;
205329
struct sline *lost_bucket;
206-
long flags;
207330
};
208331

209332
static void consume_line(void *state_, char *line, unsigned long len)
@@ -236,14 +359,13 @@ static void consume_line(void *state_, char *line, unsigned long len)
236359
xcalloc(state->num_parent,
237360
sizeof(unsigned long));
238361
state->sline[state->nb-1].p_lno[state->n] = state->ob;
239-
state->lost_bucket->next_lost = state->lost_bucket->lost_head;
240362
return;
241363
}
242364
if (!state->lost_bucket)
243365
return; /* not in any hunk yet */
244366
switch (line[0]) {
245367
case '-':
246-
append_lost(state->lost_bucket, state->n, line+1, len-1, state->flags);
368+
append_lost(state->lost_bucket, state->n, line+1, len-1);
247369
break;
248370
case '+':
249371
state->sline[state->lno-1].flag |= state->nmask;
@@ -276,7 +398,6 @@ static void combine_diff(const unsigned char *parent, unsigned int mode,
276398
xpp.flags = flags;
277399
memset(&xecfg, 0, sizeof(xecfg));
278400
memset(&state, 0, sizeof(state));
279-
state.flags = flags;
280401
state.nmask = nmask;
281402
state.sline = sline;
282403
state.lno = 1;
@@ -298,8 +419,18 @@ static void combine_diff(const unsigned char *parent, unsigned int mode,
298419
struct lline *ll;
299420
sline[lno].p_lno[n] = p_lno;
300421

422+
/* Coalesce new lines */
423+
if (sline[lno].plost.lost_head) {
424+
struct sline *sl = &sline[lno];
425+
sl->lost = coalesce_lines(sl->lost, &sl->lenlost,
426+
sl->plost.lost_head,
427+
sl->plost.len, n, flags);
428+
sl->plost.lost_head = sl->plost.lost_tail = NULL;
429+
sl->plost.len = 0;
430+
}
431+
301432
/* How many lines would this sline advance the p_lno? */
302-
ll = sline[lno].lost_head;
433+
ll = sline[lno].lost;
303434
while (ll) {
304435
if (ll->parent_map & nmask)
305436
p_lno++; /* '-' means parent had it */
@@ -319,7 +450,7 @@ static int interesting(struct sline *sline, unsigned long all_mask)
319450
/* If some parents lost lines here, or if we have added to
320451
* some parent, it is interesting.
321452
*/
322-
return ((sline->flag & all_mask) || sline->lost_head);
453+
return ((sline->flag & all_mask) || sline->lost);
323454
}
324455

325456
static unsigned long adjust_hunk_tail(struct sline *sline,
@@ -502,7 +633,7 @@ static int make_hunks(struct sline *sline, unsigned long cnt,
502633
has_interesting = 0;
503634
for (j = i; j < hunk_end && !has_interesting; j++) {
504635
unsigned long this_diff = sline[j].flag & all_mask;
505-
struct lline *ll = sline[j].lost_head;
636+
struct lline *ll = sline[j].lost;
506637
if (this_diff) {
507638
/* This has some changes. Is it the
508639
* same as others?
@@ -656,7 +787,7 @@ static void dump_sline(struct sline *sline, const char *line_prefix,
656787
int j;
657788
unsigned long p_mask;
658789
struct sline *sl = &sline[lno++];
659-
ll = (sl->flag & no_pre_delete) ? NULL : sl->lost_head;
790+
ll = (sl->flag & no_pre_delete) ? NULL : sl->lost;
660791
while (ll) {
661792
printf("%s%s", line_prefix, c_old);
662793
for (j = 0; j < num_parent; j++) {
@@ -707,7 +838,7 @@ static void reuse_combine_diff(struct sline *sline, unsigned long cnt,
707838
jmask = (1UL<<j);
708839

709840
for (lno = 0; lno <= cnt; lno++) {
710-
struct lline *ll = sline->lost_head;
841+
struct lline *ll = sline->lost;
711842
sline->p_lno[i] = sline->p_lno[j];
712843
while (ll) {
713844
if (ll->parent_map & jmask)
@@ -966,10 +1097,6 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
9661097

9671098
sline = xcalloc(cnt+2, sizeof(*sline));
9681099
sline[0].bol = result;
969-
for (lno = 0; lno <= cnt + 1; lno++) {
970-
sline[lno].lost_tail = &sline[lno].lost_head;
971-
sline[lno].flag = 0;
972-
}
9731100
for (lno = 0, cp = result; cp < result + result_size; cp++) {
9741101
if (*cp == '\n') {
9751102
sline[lno].len = cp - sline[lno].bol;
@@ -1019,8 +1146,8 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
10191146
free(result);
10201147

10211148
for (lno = 0; lno < cnt; lno++) {
1022-
if (sline[lno].lost_head) {
1023-
struct lline *ll = sline[lno].lost_head;
1149+
if (sline[lno].lost) {
1150+
struct lline *ll = sline[lno].lost;
10241151
while (ll) {
10251152
struct lline *tmp = ll;
10261153
ll = ll->next;

0 commit comments

Comments
 (0)