Skip to content

Commit 620e92b

Browse files
committed
Merge branch 'jk/parse-commit-with-malformed-ident'
The commit object parser has been taught to be a bit more lenient to parse timestamps on the author/committer line with a malformed author/committer ident. * jk/parse-commit-with-malformed-ident: parse_commit(): describe more date-parsing failure modes parse_commit(): handle broken whitespace-only timestamp parse_commit(): parse timestamp from end of line t4212: avoid putting git on left-hand side of pipe
2 parents 69c7866 + 90ef0f1 commit 620e92b

File tree

2 files changed

+98
-10
lines changed

2 files changed

+98
-10
lines changed

commit.c

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ struct commit *lookup_commit_reference_by_name(const char *name)
9696
static timestamp_t parse_commit_date(const char *buf, const char *tail)
9797
{
9898
const char *dateptr;
99+
const char *eol;
99100

100101
if (buf + 6 >= tail)
101102
return 0;
@@ -107,16 +108,56 @@ static timestamp_t parse_commit_date(const char *buf, const char *tail)
107108
return 0;
108109
if (memcmp(buf, "committer", 9))
109110
return 0;
110-
while (buf < tail && *buf++ != '>')
111-
/* nada */;
112-
if (buf >= tail)
111+
112+
/*
113+
* Jump to end-of-line so that we can walk backwards to find the
114+
* end-of-email ">". This is more forgiving of malformed cases
115+
* because unexpected characters tend to be in the name and email
116+
* fields.
117+
*/
118+
eol = memchr(buf, '\n', tail - buf);
119+
if (!eol)
113120
return 0;
114-
dateptr = buf;
115-
while (buf < tail && *buf++ != '\n')
116-
/* nada */;
117-
if (buf >= tail)
121+
dateptr = eol;
122+
while (dateptr > buf && dateptr[-1] != '>')
123+
dateptr--;
124+
if (dateptr == buf)
118125
return 0;
119-
/* dateptr < buf && buf[-1] == '\n', so parsing will stop at buf-1 */
126+
127+
/*
128+
* Trim leading whitespace, but make sure we have at least one
129+
* non-whitespace character, as parse_timestamp() will otherwise walk
130+
* right past the newline we found in "eol" when skipping whitespace
131+
* itself.
132+
*
133+
* In theory it would be sufficient to allow any character not matched
134+
* by isspace(), but there's a catch: our isspace() does not
135+
* necessarily match the behavior of parse_timestamp(), as the latter
136+
* is implemented by system routines which match more exotic control
137+
* codes, or even locale-dependent sequences.
138+
*
139+
* Since we expect the timestamp to be a number, we can check for that.
140+
* Anything else (e.g., a non-numeric token like "foo") would just
141+
* cause parse_timestamp() to return 0 anyway.
142+
*/
143+
while (dateptr < eol && isspace(*dateptr))
144+
dateptr++;
145+
if (!isdigit(*dateptr) && *dateptr != '-')
146+
return 0;
147+
148+
/*
149+
* We know there is at least one digit (or dash), so we'll begin
150+
* parsing there and stop at worst case at eol.
151+
*
152+
* Note that we may feed parse_timestamp() extra characters here if the
153+
* commit is malformed, and it will parse as far as it can. For
154+
* example, "123foo456" would return "123". That might be questionable
155+
* (versus returning "0"), but it would help in a hypothetical case
156+
* like "123456+0100", where the whitespace from the timezone is
157+
* missing. Since such syntactic errors may be baked into history and
158+
* hard to correct now, let's err on trying to make our best guess
159+
* here, rather than insist on perfect syntax.
160+
*/
120161
return parse_timestamp(dateptr, NULL, 10);
121162
}
122163

t/t4212-log-corrupt.sh

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ TEST_PASSES_SANITIZE_LEAK=true
88
test_expect_success 'setup' '
99
test_commit foo &&
1010
11-
git cat-file commit HEAD |
12-
sed "/^author /s/>/>-<>/" >broken_email.commit &&
11+
git cat-file commit HEAD >ok.commit &&
12+
sed "s/>/>-<>/" <ok.commit >broken_email.commit &&
13+
1314
git hash-object --literally -w -t commit broken_email.commit >broken_email.hash &&
1415
git update-ref refs/heads/broken_email $(cat broken_email.hash)
1516
'
@@ -43,6 +44,11 @@ test_expect_success 'git log --format with broken author email' '
4344
test_must_be_empty actual.err
4445
'
4546

47+
test_expect_success '--until handles broken email' '
48+
git rev-list --until=1980-01-01 broken_email >actual &&
49+
test_must_be_empty actual
50+
'
51+
4652
munge_author_date () {
4753
git cat-file commit "$1" >commit.orig &&
4854
sed "s/^\(author .*>\) [0-9]*/\1 $2/" <commit.orig >commit.munge &&
@@ -86,4 +92,45 @@ test_expect_success 'absurdly far-in-future date' '
8692
git log -1 --format=%ad $commit
8793
'
8894

95+
test_expect_success 'create commits with whitespace committer dates' '
96+
# It is important that this subject line is numeric, since we want to
97+
# be sure we are not confused by skipping whitespace and accidentally
98+
# parsing the subject as a timestamp.
99+
#
100+
# Do not use munge_author_date here. Besides not hitting the committer
101+
# line, it leaves the timezone intact, and we want nothing but
102+
# whitespace.
103+
#
104+
# We will make two munged commits here. The first, ws_commit, will
105+
# be purely spaces. The second contains a vertical tab, which is
106+
# considered a space by strtoumax(), but not by our isspace().
107+
test_commit 1234567890 &&
108+
git cat-file commit HEAD >commit.orig &&
109+
sed "s/>.*/> /" <commit.orig >commit.munge &&
110+
ws_commit=$(git hash-object --literally -w -t commit commit.munge) &&
111+
sed "s/>.*/> $(printf "\013")/" <commit.orig >commit.munge &&
112+
vt_commit=$(git hash-object --literally -w -t commit commit.munge)
113+
'
114+
115+
test_expect_success '--until treats whitespace date as sentinel' '
116+
echo $ws_commit >expect &&
117+
git rev-list --until=1980-01-01 $ws_commit >actual &&
118+
test_cmp expect actual &&
119+
120+
echo $vt_commit >expect &&
121+
git rev-list --until=1980-01-01 $vt_commit >actual &&
122+
test_cmp expect actual
123+
'
124+
125+
test_expect_success 'pretty-printer handles whitespace date' '
126+
# as with the %ad test above, we will show these as the empty string,
127+
# not the 1970 epoch date. This is intentional; see 7d9a281941 (t4212:
128+
# test bogus timestamps with git-log, 2014-02-24) for more discussion.
129+
echo : >expect &&
130+
git log -1 --format="%at:%ct" $ws_commit >actual &&
131+
test_cmp expect actual &&
132+
git log -1 --format="%at:%ct" $vt_commit >actual &&
133+
test_cmp expect actual
134+
'
135+
89136
test_done

0 commit comments

Comments
 (0)