Merge branch 'jk/read-commit-buffer-data-after-free'

gitster · gitster · commit d5365b432747 · 2013-02-04T10:25:18.000-08:00
Clarify the ownership rule for commit-&gt;buffer field, which some
callers incorrectly accessed without making sure it is populated.

* jk/read-commit-buffer-data-after-free:
  logmsg_reencode: lazily load missing commit buffers
  logmsg_reencode: never return NULL
  commit: drop useless xstrdup of commit message
diff --git a/builtin/blame.c b/builtin/blame.c
@@ -1420,32 +1420,18 @@ static void get_commit_info(struct commit *commit,
 {
 	int len;
 	const char *subject, *encoding;
-	char *reencoded, *message;
+	char *message;
 
 	commit_info_init(ret);
 
-	/*
-	 * We've operated without save_commit_buffer, so
-	 * we now need to populate them for output.
-	 */
-	if (!commit->buffer) {
-		enum object_type type;
-		unsigned long size;
-		commit->buffer =
-			read_sha1_file(commit->object.sha1, &type, &size);
-		if (!commit->buffer)
-			die("Cannot read commit %s",
-			    sha1_to_hex(commit->object.sha1));
-	}
 	encoding = get_log_output_encoding();
-	reencoded = logmsg_reencode(commit, encoding);
-	message   = reencoded ? reencoded : commit->buffer;
+	message = logmsg_reencode(commit, encoding);
 	get_ac_line(message, "\nauthor ",
 		    &ret->author, &ret->author_mail,
 		    &ret->author_time, &ret->author_tz);
 
 	if (!detailed) {
-		free(reencoded);
+		logmsg_free(message, commit);
 		return;
 	}
 
@@ -1459,7 +1445,7 @@ static void get_commit_info(struct commit *commit,
 	else
 		strbuf_addf(&ret->summary, "(%s)", sha1_to_hex(commit->object.sha1));
 
-	free(reencoded);
+	logmsg_free(message, commit);
 }
 
 /*
diff --git a/builtin/commit.c b/builtin/commit.c
@@ -946,24 +946,14 @@ static void handle_untracked_files_arg(struct wt_status *s)
 
 static const char *read_commit_message(const char *name)
 {
-	const char *out_enc, *out;
+	const char *out_enc;
 	struct commit *commit;
 
 	commit = lookup_commit_reference_by_name(name);
 	if (!commit)
 		die(_("could not lookup commit %s"), name);
 	out_enc = get_commit_output_encoding();
-	out = logmsg_reencode(commit, out_enc);
-
-	/*
-	 * If we failed to reencode the buffer, just copy it
-	 * byte for byte so the user can try to fix it up.
-	 * This also handles the case where input and output
-	 * encodings are identical.
-	 */
-	if (out == NULL)
-		out = xstrdup(commit->buffer);
-	return out;
+	return logmsg_reencode(commit, out_enc);
 }
 
 static int parse_and_validate_options(int argc, const char *argv[],
diff --git a/commit.h b/commit.h
@@ -101,6 +101,7 @@ extern int has_non_ascii(const char *text);
 struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */
 extern char *logmsg_reencode(const struct commit *commit,
 			     const char *output_encoding);
+extern void logmsg_free(char *msg, const struct commit *commit);
 extern void get_commit_format(const char *arg, struct rev_info *);
 extern const char *format_subject(struct strbuf *sb, const char *msg,
 				  const char *line_separator);
diff --git a/pretty.c b/pretty.c
@@ -524,10 +524,11 @@ static void add_merge_info(const struct pretty_print_context *pp,
 	strbuf_addch(sb, '\n');
 }
 
-static char *get_header(const struct commit *commit, const char *key)
+static char *get_header(const struct commit *commit, const char *msg,
+			const char *key)
 {
 	int key_len = strlen(key);
-	const char *line = commit->buffer;
+	const char *line = msg;
 
 	while (line) {
 		const char *eol = strchr(line, '\n'), *next;
@@ -588,25 +589,77 @@ char *logmsg_reencode(const struct commit *commit,
 	static const char *utf8 = "UTF-8";
 	const char *use_encoding;
 	char *encoding;
+	char *msg = commit->buffer;
 	char *out;
 
+	if (!msg) {
+		enum object_type type;
+		unsigned long size;
+
+		msg = read_sha1_file(commit->object.sha1, &type, &size);
+		if (!msg)
+			die("Cannot read commit object %s",
+			    sha1_to_hex(commit->object.sha1));
+		if (type != OBJ_COMMIT)
+			die("Expected commit for '%s', got %s",
+			    sha1_to_hex(commit->object.sha1), typename(type));
+	}
+
 	if (!output_encoding || !*output_encoding)
-		return NULL;
-	encoding = get_header(commit, "encoding");
+		return msg;
+	encoding = get_header(commit, msg, "encoding");
 	use_encoding = encoding ? encoding : utf8;
-	if (same_encoding(use_encoding, output_encoding))
-		if (encoding) /* we'll strip encoding header later */
-			out = xstrdup(commit->buffer);
-		else
-			return NULL; /* nothing to do */
-	else
-		out = reencode_string(commit->buffer,
-				      output_encoding, use_encoding);
+	if (same_encoding(use_encoding, output_encoding)) {
+		/*
+		 * No encoding work to be done. If we have no encoding header
+		 * at all, then there's nothing to do, and we can return the
+		 * message verbatim (whether newly allocated or not).
+		 */
+		if (!encoding)
+			return msg;
+
+		/*
+		 * Otherwise, we still want to munge the encoding header in the
+		 * result, which will be done by modifying the buffer. If we
+		 * are using a fresh copy, we can reuse it. But if we are using
+		 * the cached copy from commit->buffer, we need to duplicate it
+		 * to avoid munging commit->buffer.
+		 */
+		out = msg;
+		if (out == commit->buffer)
+			out = xstrdup(out);
+	}
+	else {
+		/*
+		 * There's actual encoding work to do. Do the reencoding, which
+		 * still leaves the header to be replaced in the next step. At
+		 * this point, we are done with msg. If we allocated a fresh
+		 * copy, we can free it.
+		 */
+		out = reencode_string(msg, output_encoding, use_encoding);
+		if (out && msg != commit->buffer)
+			free(msg);
+	}
+
+	/*
+	 * This replacement actually consumes the buffer we hand it, so we do
+	 * not have to worry about freeing the old "out" here.
+	 */
 	if (out)
 		out = replace_encoding_header(out, output_encoding);
 
 	free(encoding);
-	return out;
+	/*
+	 * If the re-encoding failed, out might be NULL here; in that
+	 * case we just return the commit message verbatim.
+	 */
+	return out ? out : msg;
+}
+
+void logmsg_free(char *msg, const struct commit *commit)
+{
+	if (msg != commit->buffer)
+		free(msg);
 }
 
 static int mailmap_name(const char **email, size_t *email_len,
@@ -1278,14 +1331,11 @@ void format_commit_message(const struct commit *commit,
 	context.pretty_ctx = pretty_ctx;
 	context.wrap_start = sb->len;
 	context.message = logmsg_reencode(commit, output_enc);
-	if (!context.message)
-		context.message = commit->buffer;
 
 	strbuf_expand(sb, format, format_commit_item, &context);
 	rewrap_message_tail(sb, &context, 0, 0, 0);
 
-	if (context.message != commit->buffer)
-		free(context.message);
+	logmsg_free(context.message, commit);
 	free(context.signature.gpg_output);
 	free(context.signature.signer);
 }
@@ -1432,7 +1482,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
 {
 	unsigned long beginning_of_body;
 	int indent = 4;
-	const char *msg = commit->buffer;
+	const char *msg;
 	char *reencoded;
 	const char *encoding;
 	int need_8bit_cte = pp->need_8bit_cte;
@@ -1443,10 +1493,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
 	}
 
 	encoding = get_log_output_encoding();
-	reencoded = logmsg_reencode(commit, encoding);
-	if (reencoded) {
-		msg = reencoded;
-	}
+	msg = reencoded = logmsg_reencode(commit, encoding);
 
 	if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL)
 		indent = 0;
@@ -1503,7 +1550,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
 	if (pp->fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)
 		strbuf_addch(sb, '\n');
 
-	free(reencoded);
+	logmsg_free(reencoded, commit);
 }
 
 void pp_commit_easy(enum cmit_fmt fmt, const struct commit *commit,
diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh
@@ -106,4 +106,12 @@ test_expect_success 'switching diff driver produces correct results' '
 	test_cmp expect actual
 '
 
+# The point here is to test that we can log the notes cache and still use it to
+# produce a diff later (older versions of git would segfault on this). It's
+# much more likely to come up in the real world with "log --all -p", but using
+# --no-walk lets us reliably reproduce the order of traversal.
+test_expect_success 'log notes cache and still use cache for -p' '
+	git log --no-walk -p refs/notes/textconv/magic HEAD
+'
+
 test_done