Skip to content

Commit 5a12c88

Browse files
jrngitster
authored andcommitted
apply: handle traditional patches with space in filename
To discover filenames from the --- and +++ lines in a traditional unified diff, currently "git apply" scans forward for a whitespace character on each line and stops there. It can't use the whole line because "diff -u" likes to include timestamps, like so: --- foo 2000-07-12 16:56:50.020000414 -0500 +++ bar 2010-07-12 16:56:50.020000414 -0500 The whitespace-seeking heuristic works great, even when the tab has been converted to spaces by some email + copy-and-paste related corruption. Except for one problem: if the filename itself contains whitespace, the inferred filename will be too short. When Giuseppe ran into this problem, it was for a file creation patch (for debian/licenses/LICENSE.global BSD-style Chromium). So one can't use the list of files present in the index to deduce an appropriate filename (not to mention that way lies madness; see v0.99~402, 2005-05-31). Instead, look for a timestamp and use that if present to mark the end of the filename. If no timestamp is present, the old heuristic is used, with one exception: the space character \040 is not considered terminating whitespace any more unless it is followed by a timestamp. Reported-by: Giuseppe Iuculano <[email protected]> Acked-by: Guido Günther <[email protected]> Signed-off-by: Jonathan Nieder <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent c51c0da commit 5a12c88

File tree

2 files changed

+181
-16
lines changed

2 files changed

+181
-16
lines changed

builtin/apply.c

Lines changed: 179 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -449,23 +449,157 @@ static char *find_name_gnu(const char *line, char *def, int p_value)
449449
return squash_slash(strbuf_detach(&name, NULL));
450450
}
451451

452-
static char *find_name(const char *line, char *def, int p_value, int terminate)
452+
static size_t tz_len(const char *line, size_t len)
453453
{
454-
int len;
455-
const char *start = NULL;
454+
const char *tz, *p;
456455

457-
if (*line == '"') {
458-
char *name = find_name_gnu(line, def, p_value);
459-
if (name)
460-
return name;
456+
if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ')
457+
return 0;
458+
tz = line + len - strlen(" +0500");
459+
460+
if (tz[1] != '+' && tz[1] != '-')
461+
return 0;
462+
463+
for (p = tz + 2; p != line + len; p++)
464+
if (!isdigit(*p))
465+
return 0;
466+
467+
return line + len - tz;
468+
}
469+
470+
static size_t date_len(const char *line, size_t len)
471+
{
472+
const char *date, *p;
473+
474+
if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-')
475+
return 0;
476+
p = date = line + len - strlen("72-02-05");
477+
478+
if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
479+
!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
480+
!isdigit(*p++) || !isdigit(*p++)) /* Not a date. */
481+
return 0;
482+
483+
if (date - line >= strlen("19") &&
484+
isdigit(date[-1]) && isdigit(date[-2])) /* 4-digit year */
485+
date -= strlen("19");
486+
487+
return line + len - date;
488+
}
489+
490+
static size_t short_time_len(const char *line, size_t len)
491+
{
492+
const char *time, *p;
493+
494+
if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':')
495+
return 0;
496+
p = time = line + len - strlen(" 07:01:32");
497+
498+
/* Permit 1-digit hours? */
499+
if (*p++ != ' ' ||
500+
!isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
501+
!isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
502+
!isdigit(*p++) || !isdigit(*p++)) /* Not a time. */
503+
return 0;
504+
505+
return line + len - time;
506+
}
507+
508+
static size_t fractional_time_len(const char *line, size_t len)
509+
{
510+
const char *p;
511+
size_t n;
512+
513+
/* Expected format: 19:41:17.620000023 */
514+
if (!len || !isdigit(line[len - 1]))
515+
return 0;
516+
p = line + len - 1;
517+
518+
/* Fractional seconds. */
519+
while (p > line && isdigit(*p))
520+
p--;
521+
if (*p != '.')
522+
return 0;
523+
524+
/* Hours, minutes, and whole seconds. */
525+
n = short_time_len(line, p - line);
526+
if (!n)
527+
return 0;
528+
529+
return line + len - p + n;
530+
}
531+
532+
static size_t trailing_spaces_len(const char *line, size_t len)
533+
{
534+
const char *p;
535+
536+
/* Expected format: ' ' x (1 or more) */
537+
if (!len || line[len - 1] != ' ')
538+
return 0;
539+
540+
p = line + len;
541+
while (p != line) {
542+
p--;
543+
if (*p != ' ')
544+
return line + len - (p + 1);
461545
}
462546

547+
/* All spaces! */
548+
return len;
549+
}
550+
551+
static size_t diff_timestamp_len(const char *line, size_t len)
552+
{
553+
const char *end = line + len;
554+
size_t n;
555+
556+
/*
557+
* Posix: 2010-07-05 19:41:17
558+
* GNU: 2010-07-05 19:41:17.620000023 -0500
559+
*/
560+
561+
if (!isdigit(end[-1]))
562+
return 0;
563+
564+
n = tz_len(line, end - line);
565+
end -= n;
566+
567+
n = short_time_len(line, end - line);
568+
if (!n)
569+
n = fractional_time_len(line, end - line);
570+
end -= n;
571+
572+
n = date_len(line, end - line);
573+
if (!n) /* No date. Too bad. */
574+
return 0;
575+
end -= n;
576+
577+
if (end == line) /* No space before date. */
578+
return 0;
579+
if (end[-1] == '\t') { /* Success! */
580+
end--;
581+
return line + len - end;
582+
}
583+
if (end[-1] != ' ') /* No space before date. */
584+
return 0;
585+
586+
/* Whitespace damage. */
587+
end -= trailing_spaces_len(line, end - line);
588+
return line + len - end;
589+
}
590+
591+
static char *find_name_common(const char *line, char *def, int p_value,
592+
const char *end, int terminate)
593+
{
594+
int len;
595+
const char *start = NULL;
596+
463597
if (p_value == 0)
464598
start = line;
465-
for (;;) {
599+
while (line != end) {
466600
char c = *line;
467601

468-
if (isspace(c)) {
602+
if (!end && isspace(c)) {
469603
if (c == '\n')
470604
break;
471605
if (name_terminate(start, line-start, c, terminate))
@@ -505,6 +639,37 @@ static char *find_name(const char *line, char *def, int p_value, int terminate)
505639
return squash_slash(xmemdupz(start, len));
506640
}
507641

642+
static char *find_name(const char *line, char *def, int p_value, int terminate)
643+
{
644+
if (*line == '"') {
645+
char *name = find_name_gnu(line, def, p_value);
646+
if (name)
647+
return name;
648+
}
649+
650+
return find_name_common(line, def, p_value, NULL, terminate);
651+
}
652+
653+
static char *find_name_traditional(const char *line, char *def, int p_value)
654+
{
655+
size_t len = strlen(line);
656+
size_t date_len;
657+
658+
if (*line == '"') {
659+
char *name = find_name_gnu(line, def, p_value);
660+
if (name)
661+
return name;
662+
}
663+
664+
len = strchrnul(line, '\n') - line;
665+
date_len = diff_timestamp_len(line, len);
666+
if (!date_len)
667+
return find_name_common(line, def, p_value, NULL, TERM_TAB);
668+
len -= date_len;
669+
670+
return find_name_common(line, def, p_value, line + len, 0);
671+
}
672+
508673
static int count_slashes(const char *cp)
509674
{
510675
int cnt = 0;
@@ -527,7 +692,7 @@ static int guess_p_value(const char *nameline)
527692

528693
if (is_dev_null(nameline))
529694
return -1;
530-
name = find_name(nameline, NULL, 0, TERM_SPACE | TERM_TAB);
695+
name = find_name_traditional(nameline, NULL, 0);
531696
if (!name)
532697
return -1;
533698
cp = strchr(name, '/');
@@ -646,16 +811,16 @@ static void parse_traditional_patch(const char *first, const char *second, struc
646811
if (is_dev_null(first)) {
647812
patch->is_new = 1;
648813
patch->is_delete = 0;
649-
name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB);
814+
name = find_name_traditional(second, NULL, p_value);
650815
patch->new_name = name;
651816
} else if (is_dev_null(second)) {
652817
patch->is_new = 0;
653818
patch->is_delete = 1;
654-
name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
819+
name = find_name_traditional(first, NULL, p_value);
655820
patch->old_name = name;
656821
} else {
657-
name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
658-
name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB);
822+
name = find_name_traditional(first, NULL, p_value);
823+
name = find_name_traditional(second, name, p_value);
659824
if (has_epoch_timestamp(first)) {
660825
patch->is_new = 1;
661826
patch->is_delete = 0;

t/t4135-apply-weird-filenames.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ try_filename() {
5959
}
6060

6161
try_filename 'plain' 'postimage.txt'
62-
try_filename 'with spaces' 'post image.txt' '' success failure failure
63-
try_filename 'with tab' 'post image.txt' FUNNYNAMES success failure failure
62+
try_filename 'with spaces' 'post image.txt'
63+
try_filename 'with tab' 'post image.txt' FUNNYNAMES
6464
try_filename 'with backslash' 'post\image.txt' BSLASHPSPEC
6565
try_filename 'with quote' '"postimage".txt' FUNNYNAMES success failure success
6666

0 commit comments

Comments
 (0)