Skip to content

Commit b767aa7

Browse files
committed
built-in add -p: implement the hunk splitting feature
If this developer's workflow is any indication, then this is *the* most useful feature of Git's interactive `add `command. Note: once again, this is not a verbatim conversion from the Perl code to C: the `hunk_splittable()` function, for example, essentially did all the work of splitting the hunk, just to find out whether more than one hunk would have been the result (and then tossed that result into the trash). In C we instead count the number of resulting hunks (without actually doing the work of splitting, but just counting the transitions from non-context lines to context lines), and store that information with the hunk, and we do that *while* parsing the diff in the first place. Another deviation: the built-in `git add -p` was designed with a single strbuf holding the diff (and another one holding the colored diff, if that one was asked for) in mind, and hunks essentially store just the start and end offsets pointing into that strbuf. As a consequence, when we split hunks, we now use a special mode where the hunk header is generated dynamically, and only the rest of the hunk is stored using such start/end offsets. This way, we also avoid the frequent formatting/re-parsing of the hunk header of the Perl version. Signed-off-by: Johannes Schindelin <[email protected]>
1 parent 5e4e505 commit b767aa7

File tree

1 file changed

+182
-2
lines changed

1 file changed

+182
-2
lines changed

add-patch.c

Lines changed: 182 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ struct hunk_header {
2828
};
2929

3030
struct hunk {
31-
size_t start, end, colored_start, colored_end;
31+
size_t start, end, colored_start, colored_end, splittable_into;
3232
enum { UNDECIDED_HUNK = 0, SKIP_HUNK, USE_HUNK } use;
3333
struct hunk_header header;
3434
};
@@ -152,7 +152,7 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps)
152152
struct argv_array args = ARGV_ARRAY_INIT;
153153
struct strbuf *plain = &s->plain, *colored = NULL;
154154
struct child_process cp = CHILD_PROCESS_INIT;
155-
char *p, *pend, *colored_p = NULL, *colored_pend = NULL;
155+
char *p, *pend, *colored_p = NULL, *colored_pend = NULL, marker = '\0';
156156
size_t file_diff_alloc = 0, i, color_arg_index;
157157
struct file_diff *file_diff = NULL;
158158
struct hunk *hunk = NULL;
@@ -222,6 +222,13 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps)
222222
else if (starts_with(p, "@@ ") ||
223223
(hunk == &file_diff->head &&
224224
skip_prefix(p, "deleted file", &deleted))) {
225+
if (marker == '-' || marker == '+')
226+
/*
227+
* Should not happen; previous hunk did not end
228+
* in a context line? Handle it anyway.
229+
*/
230+
hunk->splittable_into++;
231+
225232
file_diff->hunk_nr++;
226233
ALLOC_GROW(file_diff->hunk, file_diff->hunk_nr,
227234
file_diff->hunk_alloc);
@@ -236,6 +243,12 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps)
236243
file_diff->deleted = 1;
237244
else if (parse_hunk_header(s, hunk) < 0)
238245
return -1;
246+
247+
/*
248+
* Start counting into how many hunks this one can be
249+
* split
250+
*/
251+
marker = *p;
239252
} else if (hunk == &file_diff->head &&
240253
((skip_prefix(p, "old mode ", &mode_change) ||
241254
skip_prefix(p, "new mode ", &mode_change)) &&
@@ -260,6 +273,12 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps)
260273
(int)(eol - (plain->buf + file_diff->head.start)),
261274
plain->buf + file_diff->head.start);
262275

276+
if ((marker == '-' || marker == '+') &&
277+
(*p == ' ' || *p == '\\'))
278+
hunk->splittable_into++;
279+
if (marker)
280+
marker = *p;
281+
263282
p = eol == pend ? pend : eol + 1;
264283
hunk->end = p - plain->buf;
265284

@@ -282,9 +301,25 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps)
282301
}
283302
}
284303

304+
if (marker == '-' || marker == '+')
305+
/*
306+
* Last hunk ended in non-context line (i.e. it appended lines
307+
* to the file, so there are no trailing context lines).
308+
*/
309+
hunk->splittable_into++;
310+
285311
return 0;
286312
}
287313

314+
static size_t find_next_line(struct strbuf *sb, size_t offset)
315+
{
316+
char *eol = memchr(sb->buf + offset, '\n', sb->len - offset);
317+
318+
if (!eol)
319+
return sb->len;
320+
return eol - sb->buf + 1;
321+
}
322+
288323
static void render_hunk(struct add_p_state *s, struct hunk *hunk,
289324
ssize_t delta, int colored, struct strbuf *out)
290325
{
@@ -380,6 +415,139 @@ static void reassemble_patch(struct add_p_state *s,
380415
}
381416
}
382417

418+
static int split_hunk(struct add_p_state *s, struct file_diff *file_diff,
419+
size_t hunk_index)
420+
{
421+
int colored = !!s->colored.len, first = 1;
422+
struct hunk *hunk = file_diff->hunk + hunk_index;
423+
size_t splittable_into;
424+
size_t end, colored_end, current, colored_current = 0, context_line_count;
425+
struct hunk_header remaining, *header;
426+
char marker, ch;
427+
428+
if (hunk_index >= file_diff->hunk_nr)
429+
BUG("invalid hunk index: %d (must be >= 0 and < %d)",
430+
(int)hunk_index, (int)file_diff->hunk_nr);
431+
432+
if (hunk->splittable_into < 2)
433+
return 0;
434+
splittable_into = hunk->splittable_into;
435+
436+
end = hunk->end;
437+
colored_end = hunk->colored_end;
438+
439+
memcpy(&remaining, &hunk->header, sizeof(remaining));
440+
441+
file_diff->hunk_nr += splittable_into - 1;
442+
ALLOC_GROW(file_diff->hunk, file_diff->hunk_nr, file_diff->hunk_alloc);
443+
if (hunk_index + splittable_into < file_diff->hunk_nr)
444+
memmove(file_diff->hunk + hunk_index + splittable_into,
445+
file_diff->hunk + hunk_index + 1,
446+
(file_diff->hunk_nr - hunk_index - splittable_into)
447+
* sizeof(*hunk));
448+
hunk = file_diff->hunk + hunk_index;
449+
hunk->splittable_into = 1;
450+
memset(hunk + 1, 0, (splittable_into - 1) * sizeof(*hunk));
451+
452+
header = &hunk->header;
453+
header->old_count = header->new_count = 0;
454+
455+
current = hunk->start;
456+
if (colored)
457+
colored_current = hunk->colored_start;
458+
marker = '\0';
459+
context_line_count = 0;
460+
461+
while (splittable_into > 1) {
462+
ch = s->plain.buf[current];
463+
if ((marker == '-' || marker == '+') && ch == ' ') {
464+
first = 0;
465+
hunk[1].start = current;
466+
if (colored)
467+
hunk[1].colored_start = colored_current;
468+
context_line_count = 0;
469+
}
470+
471+
if (marker != ' ' || (ch != '-' && ch != '+')) {
472+
next_hunk_line:
473+
/* current hunk not done yet */
474+
if (ch == ' ')
475+
context_line_count++;
476+
else if (ch == '-')
477+
header->old_count++;
478+
else if (ch == '+')
479+
header->new_count++;
480+
else
481+
BUG("unhandled diff marker: '%c'", ch);
482+
marker = ch;
483+
current = find_next_line(&s->plain, current);
484+
if (colored)
485+
colored_current =
486+
find_next_line(&s->colored,
487+
colored_current);
488+
continue;
489+
}
490+
491+
if (first) {
492+
if (header->old_count || header->new_count)
493+
BUG("counts are off: %d/%d",
494+
(int)header->old_count,
495+
(int)header->new_count);
496+
497+
header->old_count = context_line_count;
498+
header->new_count = context_line_count;
499+
context_line_count = 0;
500+
first = 0;
501+
goto next_hunk_line;
502+
}
503+
504+
remaining.old_offset += header->old_count;
505+
remaining.old_count -= header->old_count;
506+
remaining.new_offset += header->new_count;
507+
remaining.new_count -= header->new_count;
508+
509+
/* initialize next hunk header's offsets */
510+
hunk[1].header.old_offset =
511+
header->old_offset + header->old_count;
512+
hunk[1].header.new_offset =
513+
header->new_offset + header->new_count;
514+
515+
/* add one split hunk */
516+
header->old_count += context_line_count;
517+
header->new_count += context_line_count;
518+
519+
hunk->end = current;
520+
if (colored)
521+
hunk->colored_end = colored_current;
522+
523+
hunk++;
524+
hunk->splittable_into = 1;
525+
hunk->use = hunk[-1].use;
526+
header = &hunk->header;
527+
528+
header->old_count = header->new_count = context_line_count;
529+
context_line_count = 0;
530+
531+
splittable_into--;
532+
marker = ch;
533+
}
534+
535+
/* last hunk simply gets the rest */
536+
if (header->old_offset != remaining.old_offset)
537+
BUG("miscounted old_offset: %lu != %lu",
538+
header->old_offset, remaining.old_offset);
539+
if (header->new_offset != remaining.new_offset)
540+
BUG("miscounted new_offset: %lu != %lu",
541+
header->new_offset, remaining.new_offset);
542+
header->old_count = remaining.old_count;
543+
header->new_count = remaining.new_count;
544+
hunk->end = end;
545+
if (colored)
546+
hunk->colored_end = colored_end;
547+
548+
return 0;
549+
}
550+
383551
static const char help_patch_text[] =
384552
N_("y - stage this hunk\n"
385553
"n - do not stage this hunk\n"
@@ -389,6 +557,7 @@ N_("y - stage this hunk\n"
389557
"J - leave this hunk undecided, see next hunk\n"
390558
"k - leave this hunk undecided, see previous undecided hunk\n"
391559
"K - leave this hunk undecided, see previous hunk\n"
560+
"s - split the current hunk into smaller hunks\n"
392561
"? - print help\n");
393562

394563
static int patch_update_file(struct add_p_state *s,
@@ -445,6 +614,8 @@ static int patch_update_file(struct add_p_state *s,
445614
strbuf_addstr(&s->buf, ",j");
446615
if (hunk_index + 1 < file_diff->hunk_nr)
447616
strbuf_addstr(&s->buf, ",J");
617+
if (hunk->splittable_into > 1)
618+
strbuf_addstr(&s->buf, ",s");
448619

449620
if (file_diff->deleted)
450621
prompt_mode_type = PROMPT_DELETION;
@@ -505,6 +676,15 @@ static int patch_update_file(struct add_p_state *s,
505676
hunk_index = undecided_next;
506677
else
507678
err(s, _("No next hunk"));
679+
} else if (s->answer.buf[0] == 's') {
680+
size_t splittable_into = hunk->splittable_into;
681+
if (splittable_into < 2)
682+
err(s, _("Sorry, cannot split this hunk"));
683+
else if (!split_hunk(s, file_diff,
684+
hunk - file_diff->hunk))
685+
color_fprintf_ln(stdout, s->s.header_color,
686+
_("Split into %d hunks."),
687+
(int)splittable_into);
508688
} else
509689
color_fprintf(stdout, s->s.help_color,
510690
_(help_patch_text));

0 commit comments

Comments
 (0)