Skip to content

Commit d04f998

Browse files
committed
Merge branch 'jk/read-commit-buffer-data-after-free'
"git log --grep=<pattern>" used to look for the pattern in literal bytes of the commit log message and ignored the log-output encoding. * jk/read-commit-buffer-data-after-free: log: re-encode commit messages before grepping
2 parents 7b6e784 + 04deccd commit d04f998

File tree

2 files changed

+78
-7
lines changed

2 files changed

+78
-7
lines changed

revision.c

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2268,7 +2268,10 @@ static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st
22682268
static int commit_match(struct commit *commit, struct rev_info *opt)
22692269
{
22702270
int retval;
2271+
const char *encoding;
2272+
char *message;
22712273
struct strbuf buf = STRBUF_INIT;
2274+
22722275
if (!opt->grep_filter.pattern_list && !opt->grep_filter.header_list)
22732276
return 1;
22742277

@@ -2279,13 +2282,23 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
22792282
strbuf_addch(&buf, '\n');
22802283
}
22812284

2285+
/*
2286+
* We grep in the user's output encoding, under the assumption that it
2287+
* is the encoding they are most likely to write their grep pattern
2288+
* for. In addition, it means we will match the "notes" encoding below,
2289+
* so we will not end up with a buffer that has two different encodings
2290+
* in it.
2291+
*/
2292+
encoding = get_log_output_encoding();
2293+
message = logmsg_reencode(commit, encoding);
2294+
22822295
/* Copy the commit to temporary if we are using "fake" headers */
22832296
if (buf.len)
2284-
strbuf_addstr(&buf, commit->buffer);
2297+
strbuf_addstr(&buf, message);
22852298

22862299
if (opt->grep_filter.header_list && opt->mailmap) {
22872300
if (!buf.len)
2288-
strbuf_addstr(&buf, commit->buffer);
2301+
strbuf_addstr(&buf, message);
22892302

22902303
commit_rewrite_person(&buf, "\nauthor ", opt->mailmap);
22912304
commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap);
@@ -2294,18 +2307,18 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
22942307
/* Append "fake" message parts as needed */
22952308
if (opt->show_notes) {
22962309
if (!buf.len)
2297-
strbuf_addstr(&buf, commit->buffer);
2298-
format_display_notes(commit->object.sha1, &buf,
2299-
get_log_output_encoding(), 1);
2310+
strbuf_addstr(&buf, message);
2311+
format_display_notes(commit->object.sha1, &buf, encoding, 1);
23002312
}
23012313

2302-
/* Find either in the commit object, or in the temporary */
2314+
/* Find either in the original commit message, or in the temporary */
23032315
if (buf.len)
23042316
retval = grep_buffer(&opt->grep_filter, buf.buf, buf.len);
23052317
else
23062318
retval = grep_buffer(&opt->grep_filter,
2307-
commit->buffer, strlen(commit->buffer));
2319+
message, strlen(message));
23082320
strbuf_release(&buf);
2321+
logmsg_free(message, commit);
23092322
return retval;
23102323
}
23112324

t/t4210-log-i18n.sh

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/bin/sh
2+
3+
test_description='test log with i18n features'
4+
. ./test-lib.sh
5+
6+
# two forms of é
7+
utf8_e=$(printf '\303\251')
8+
latin1_e=$(printf '\351')
9+
10+
test_expect_success 'create commits in different encodings' '
11+
test_tick &&
12+
cat >msg <<-EOF &&
13+
utf8
14+
15+
t${utf8_e}st
16+
EOF
17+
git add msg &&
18+
git -c i18n.commitencoding=utf8 commit -F msg &&
19+
cat >msg <<-EOF &&
20+
latin1
21+
22+
t${latin1_e}st
23+
EOF
24+
git add msg &&
25+
git -c i18n.commitencoding=ISO-8859-1 commit -F msg
26+
'
27+
28+
test_expect_success 'log --grep searches in log output encoding (utf8)' '
29+
cat >expect <<-\EOF &&
30+
latin1
31+
utf8
32+
EOF
33+
git log --encoding=utf8 --format=%s --grep=$utf8_e >actual &&
34+
test_cmp expect actual
35+
'
36+
37+
test_expect_success 'log --grep searches in log output encoding (latin1)' '
38+
cat >expect <<-\EOF &&
39+
latin1
40+
utf8
41+
EOF
42+
git log --encoding=ISO-8859-1 --format=%s --grep=$latin1_e >actual &&
43+
test_cmp expect actual
44+
'
45+
46+
test_expect_success 'log --grep does not find non-reencoded values (utf8)' '
47+
>expect &&
48+
git log --encoding=utf8 --format=%s --grep=$latin1_e >actual &&
49+
test_cmp expect actual
50+
'
51+
52+
test_expect_success 'log --grep does not find non-reencoded values (latin1)' '
53+
>expect &&
54+
git log --encoding=ISO-8859-1 --format=%s --grep=$utf8_e >actual &&
55+
test_cmp expect actual
56+
'
57+
58+
test_done

0 commit comments

Comments
 (0)