Skip to content

Commit d5365b4

Browse files
committed
Merge branch 'jk/read-commit-buffer-data-after-free'
Clarify the ownership rule for commit->buffer field, which some callers incorrectly accessed without making sure it is populated. * jk/read-commit-buffer-data-after-free: logmsg_reencode: lazily load missing commit buffers logmsg_reencode: never return NULL commit: drop useless xstrdup of commit message
2 parents 27d46a7 + be5c9fb commit d5365b4

File tree

5 files changed

+85
-53
lines changed

5 files changed

+85
-53
lines changed

builtin/blame.c

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,32 +1420,18 @@ static void get_commit_info(struct commit *commit,
14201420
{
14211421
int len;
14221422
const char *subject, *encoding;
1423-
char *reencoded, *message;
1423+
char *message;
14241424

14251425
commit_info_init(ret);
14261426

1427-
/*
1428-
* We've operated without save_commit_buffer, so
1429-
* we now need to populate them for output.
1430-
*/
1431-
if (!commit->buffer) {
1432-
enum object_type type;
1433-
unsigned long size;
1434-
commit->buffer =
1435-
read_sha1_file(commit->object.sha1, &type, &size);
1436-
if (!commit->buffer)
1437-
die("Cannot read commit %s",
1438-
sha1_to_hex(commit->object.sha1));
1439-
}
14401427
encoding = get_log_output_encoding();
1441-
reencoded = logmsg_reencode(commit, encoding);
1442-
message = reencoded ? reencoded : commit->buffer;
1428+
message = logmsg_reencode(commit, encoding);
14431429
get_ac_line(message, "\nauthor ",
14441430
&ret->author, &ret->author_mail,
14451431
&ret->author_time, &ret->author_tz);
14461432

14471433
if (!detailed) {
1448-
free(reencoded);
1434+
logmsg_free(message, commit);
14491435
return;
14501436
}
14511437

@@ -1459,7 +1445,7 @@ static void get_commit_info(struct commit *commit,
14591445
else
14601446
strbuf_addf(&ret->summary, "(%s)", sha1_to_hex(commit->object.sha1));
14611447

1462-
free(reencoded);
1448+
logmsg_free(message, commit);
14631449
}
14641450

14651451
/*

builtin/commit.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -946,24 +946,14 @@ static void handle_untracked_files_arg(struct wt_status *s)
946946

947947
static const char *read_commit_message(const char *name)
948948
{
949-
const char *out_enc, *out;
949+
const char *out_enc;
950950
struct commit *commit;
951951

952952
commit = lookup_commit_reference_by_name(name);
953953
if (!commit)
954954
die(_("could not lookup commit %s"), name);
955955
out_enc = get_commit_output_encoding();
956-
out = logmsg_reencode(commit, out_enc);
957-
958-
/*
959-
* If we failed to reencode the buffer, just copy it
960-
* byte for byte so the user can try to fix it up.
961-
* This also handles the case where input and output
962-
* encodings are identical.
963-
*/
964-
if (out == NULL)
965-
out = xstrdup(commit->buffer);
966-
return out;
956+
return logmsg_reencode(commit, out_enc);
967957
}
968958

969959
static int parse_and_validate_options(int argc, const char *argv[],

commit.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ extern int has_non_ascii(const char *text);
101101
struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */
102102
extern char *logmsg_reencode(const struct commit *commit,
103103
const char *output_encoding);
104+
extern void logmsg_free(char *msg, const struct commit *commit);
104105
extern void get_commit_format(const char *arg, struct rev_info *);
105106
extern const char *format_subject(struct strbuf *sb, const char *msg,
106107
const char *line_separator);

pretty.c

Lines changed: 70 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -524,10 +524,11 @@ static void add_merge_info(const struct pretty_print_context *pp,
524524
strbuf_addch(sb, '\n');
525525
}
526526

527-
static char *get_header(const struct commit *commit, const char *key)
527+
static char *get_header(const struct commit *commit, const char *msg,
528+
const char *key)
528529
{
529530
int key_len = strlen(key);
530-
const char *line = commit->buffer;
531+
const char *line = msg;
531532

532533
while (line) {
533534
const char *eol = strchr(line, '\n'), *next;
@@ -588,25 +589,77 @@ char *logmsg_reencode(const struct commit *commit,
588589
static const char *utf8 = "UTF-8";
589590
const char *use_encoding;
590591
char *encoding;
592+
char *msg = commit->buffer;
591593
char *out;
592594

595+
if (!msg) {
596+
enum object_type type;
597+
unsigned long size;
598+
599+
msg = read_sha1_file(commit->object.sha1, &type, &size);
600+
if (!msg)
601+
die("Cannot read commit object %s",
602+
sha1_to_hex(commit->object.sha1));
603+
if (type != OBJ_COMMIT)
604+
die("Expected commit for '%s', got %s",
605+
sha1_to_hex(commit->object.sha1), typename(type));
606+
}
607+
593608
if (!output_encoding || !*output_encoding)
594-
return NULL;
595-
encoding = get_header(commit, "encoding");
609+
return msg;
610+
encoding = get_header(commit, msg, "encoding");
596611
use_encoding = encoding ? encoding : utf8;
597-
if (same_encoding(use_encoding, output_encoding))
598-
if (encoding) /* we'll strip encoding header later */
599-
out = xstrdup(commit->buffer);
600-
else
601-
return NULL; /* nothing to do */
602-
else
603-
out = reencode_string(commit->buffer,
604-
output_encoding, use_encoding);
612+
if (same_encoding(use_encoding, output_encoding)) {
613+
/*
614+
* No encoding work to be done. If we have no encoding header
615+
* at all, then there's nothing to do, and we can return the
616+
* message verbatim (whether newly allocated or not).
617+
*/
618+
if (!encoding)
619+
return msg;
620+
621+
/*
622+
* Otherwise, we still want to munge the encoding header in the
623+
* result, which will be done by modifying the buffer. If we
624+
* are using a fresh copy, we can reuse it. But if we are using
625+
* the cached copy from commit->buffer, we need to duplicate it
626+
* to avoid munging commit->buffer.
627+
*/
628+
out = msg;
629+
if (out == commit->buffer)
630+
out = xstrdup(out);
631+
}
632+
else {
633+
/*
634+
* There's actual encoding work to do. Do the reencoding, which
635+
* still leaves the header to be replaced in the next step. At
636+
* this point, we are done with msg. If we allocated a fresh
637+
* copy, we can free it.
638+
*/
639+
out = reencode_string(msg, output_encoding, use_encoding);
640+
if (out && msg != commit->buffer)
641+
free(msg);
642+
}
643+
644+
/*
645+
* This replacement actually consumes the buffer we hand it, so we do
646+
* not have to worry about freeing the old "out" here.
647+
*/
605648
if (out)
606649
out = replace_encoding_header(out, output_encoding);
607650

608651
free(encoding);
609-
return out;
652+
/*
653+
* If the re-encoding failed, out might be NULL here; in that
654+
* case we just return the commit message verbatim.
655+
*/
656+
return out ? out : msg;
657+
}
658+
659+
void logmsg_free(char *msg, const struct commit *commit)
660+
{
661+
if (msg != commit->buffer)
662+
free(msg);
610663
}
611664

612665
static int mailmap_name(const char **email, size_t *email_len,
@@ -1278,14 +1331,11 @@ void format_commit_message(const struct commit *commit,
12781331
context.pretty_ctx = pretty_ctx;
12791332
context.wrap_start = sb->len;
12801333
context.message = logmsg_reencode(commit, output_enc);
1281-
if (!context.message)
1282-
context.message = commit->buffer;
12831334

12841335
strbuf_expand(sb, format, format_commit_item, &context);
12851336
rewrap_message_tail(sb, &context, 0, 0, 0);
12861337

1287-
if (context.message != commit->buffer)
1288-
free(context.message);
1338+
logmsg_free(context.message, commit);
12891339
free(context.signature.gpg_output);
12901340
free(context.signature.signer);
12911341
}
@@ -1432,7 +1482,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
14321482
{
14331483
unsigned long beginning_of_body;
14341484
int indent = 4;
1435-
const char *msg = commit->buffer;
1485+
const char *msg;
14361486
char *reencoded;
14371487
const char *encoding;
14381488
int need_8bit_cte = pp->need_8bit_cte;
@@ -1443,10 +1493,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
14431493
}
14441494

14451495
encoding = get_log_output_encoding();
1446-
reencoded = logmsg_reencode(commit, encoding);
1447-
if (reencoded) {
1448-
msg = reencoded;
1449-
}
1496+
msg = reencoded = logmsg_reencode(commit, encoding);
14501497

14511498
if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL)
14521499
indent = 0;
@@ -1503,7 +1550,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
15031550
if (pp->fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)
15041551
strbuf_addch(sb, '\n');
15051552

1506-
free(reencoded);
1553+
logmsg_free(reencoded, commit);
15071554
}
15081555

15091556
void pp_commit_easy(enum cmit_fmt fmt, const struct commit *commit,

t/t4042-diff-textconv-caching.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,12 @@ test_expect_success 'switching diff driver produces correct results' '
106106
test_cmp expect actual
107107
'
108108

109+
# The point here is to test that we can log the notes cache and still use it to
110+
# produce a diff later (older versions of git would segfault on this). It's
111+
# much more likely to come up in the real world with "log --all -p", but using
112+
# --no-walk lets us reliably reproduce the order of traversal.
113+
test_expect_success 'log notes cache and still use cache for -p' '
114+
git log --no-walk -p refs/notes/textconv/magic HEAD
115+
'
116+
109117
test_done

0 commit comments

Comments
 (0)