Skip to content

Commit abf2bb8

Browse files
committed
Merge branch 'jk/hash-object-fsck'
"git hash-object" now checks that the resulting object is well formed with the same code as "git fsck". * jk/hash-object-fsck: fsck: do not assume NUL-termination of buffers hash-object: use fsck for object checks fsck: provide a function to fsck buffer without object struct t: use hash-object --literally when created malformed objects t7030: stop using invalid tag name t1006: stop using 0-padded timestamps t1007: modernize malformed object tests
2 parents 4ac326f + 8e43090 commit abf2bb8

21 files changed

+294
-96
lines changed

fsck.c

Lines changed: 71 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,23 @@ static int fsck_tree(const struct object_id *tree_oid,
748748
return retval;
749749
}
750750

751+
/*
752+
* Confirm that the headers of a commit or tag object end in a reasonable way,
753+
* either with the usual "\n\n" separator, or at least with a trailing newline
754+
* on the final header line.
755+
*
756+
* This property is important for the memory safety of our callers. It allows
757+
* them to scan the buffer linewise without constantly checking the remaining
758+
* size as long as:
759+
*
760+
* - they check that there are bytes left in the buffer at the start of any
761+
* line (i.e., that the last newline they saw was not the final one we
762+
* found here)
763+
*
764+
* - any intra-line scanning they do will stop at a newline, which will worst
765+
* case hit the newline we found here as the end-of-header. This makes it
766+
* OK for them to use helpers like parse_oid_hex(), or even skip_prefix().
767+
*/
751768
static int verify_headers(const void *data, unsigned long size,
752769
const struct object_id *oid, enum object_type type,
753770
struct fsck_options *options)
@@ -808,6 +825,20 @@ static int fsck_ident(const char **ident,
808825
if (*p != ' ')
809826
return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date");
810827
p++;
828+
/*
829+
* Our timestamp parser is based on the C strto*() functions, which
830+
* will happily eat whitespace, including the newline that is supposed
831+
* to prevent us walking past the end of the buffer. So do our own
832+
* scan, skipping linear whitespace but not newlines, and then
833+
* confirming we found a digit. We _could_ be even more strict here,
834+
* as we really expect only a single space, but since we have
835+
* traditionally allowed extra whitespace, we'll continue to do so.
836+
*/
837+
while (*p == ' ' || *p == '\t')
838+
p++;
839+
if (!isdigit(*p))
840+
return report(options, oid, type, FSCK_MSG_BAD_DATE,
841+
"invalid author/committer line - bad date");
811842
if (*p == '0' && p[1] != ' ')
812843
return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");
813844
if (date_overflows(parse_timestamp(p, &end, 10)))
@@ -834,20 +865,26 @@ static int fsck_commit(const struct object_id *oid,
834865
unsigned author_count;
835866
int err;
836867
const char *buffer_begin = buffer;
868+
const char *buffer_end = buffer + size;
837869
const char *p;
838870

871+
/*
872+
* We _must_ stop parsing immediately if this reports failure, as the
873+
* memory safety of the rest of the function depends on it. See the
874+
* comment above the definition of verify_headers() for more details.
875+
*/
839876
if (verify_headers(buffer, size, oid, OBJ_COMMIT, options))
840877
return -1;
841878

842-
if (!skip_prefix(buffer, "tree ", &buffer))
879+
if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))
843880
return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
844881
if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
845882
err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
846883
if (err)
847884
return err;
848885
}
849886
buffer = p + 1;
850-
while (skip_prefix(buffer, "parent ", &buffer)) {
887+
while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {
851888
if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
852889
err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
853890
if (err)
@@ -856,7 +893,7 @@ static int fsck_commit(const struct object_id *oid,
856893
buffer = p + 1;
857894
}
858895
author_count = 0;
859-
while (skip_prefix(buffer, "author ", &buffer)) {
896+
while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
860897
author_count++;
861898
err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
862899
if (err)
@@ -868,7 +905,7 @@ static int fsck_commit(const struct object_id *oid,
868905
err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
869906
if (err)
870907
return err;
871-
if (!skip_prefix(buffer, "committer ", &buffer))
908+
if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))
872909
return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
873910
err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
874911
if (err)
@@ -899,13 +936,19 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
899936
int ret = 0;
900937
char *eol;
901938
struct strbuf sb = STRBUF_INIT;
939+
const char *buffer_end = buffer + size;
902940
const char *p;
903941

942+
/*
943+
* We _must_ stop parsing immediately if this reports failure, as the
944+
* memory safety of the rest of the function depends on it. See the
945+
* comment above the definition of verify_headers() for more details.
946+
*/
904947
ret = verify_headers(buffer, size, oid, OBJ_TAG, options);
905948
if (ret)
906949
goto done;
907950

908-
if (!skip_prefix(buffer, "object ", &buffer)) {
951+
if (buffer >= buffer_end || !skip_prefix(buffer, "object ", &buffer)) {
909952
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line");
910953
goto done;
911954
}
@@ -916,11 +959,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
916959
}
917960
buffer = p + 1;
918961

919-
if (!skip_prefix(buffer, "type ", &buffer)) {
962+
if (buffer >= buffer_end || !skip_prefix(buffer, "type ", &buffer)) {
920963
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE_ENTRY, "invalid format - expected 'type' line");
921964
goto done;
922965
}
923-
eol = strchr(buffer, '\n');
966+
eol = memchr(buffer, '\n', buffer_end - buffer);
924967
if (!eol) {
925968
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE, "invalid format - unexpected end after 'type' line");
926969
goto done;
@@ -932,11 +975,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
932975
goto done;
933976
buffer = eol + 1;
934977

935-
if (!skip_prefix(buffer, "tag ", &buffer)) {
978+
if (buffer >= buffer_end || !skip_prefix(buffer, "tag ", &buffer)) {
936979
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG_ENTRY, "invalid format - expected 'tag' line");
937980
goto done;
938981
}
939-
eol = strchr(buffer, '\n');
982+
eol = memchr(buffer, '\n', buffer_end - buffer);
940983
if (!eol) {
941984
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG, "invalid format - unexpected end after 'type' line");
942985
goto done;
@@ -952,18 +995,16 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
952995
}
953996
buffer = eol + 1;
954997

955-
if (!skip_prefix(buffer, "tagger ", &buffer)) {
998+
if (buffer >= buffer_end || !skip_prefix(buffer, "tagger ", &buffer)) {
956999
/* early tags do not contain 'tagger' lines; warn only */
9571000
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAGGER_ENTRY, "invalid format - expected 'tagger' line");
9581001
if (ret)
9591002
goto done;
9601003
}
9611004
else
9621005
ret = fsck_ident(&buffer, oid, OBJ_TAG, options);
963-
if (!*buffer)
964-
goto done;
9651006

966-
if (!starts_with(buffer, "\n")) {
1007+
if (buffer < buffer_end && !starts_with(buffer, "\n")) {
9671008
/*
9681009
* The verify_headers() check will allow
9691010
* e.g. "[...]tagger <tagger>\nsome
@@ -1237,19 +1278,26 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
12371278
if (!obj)
12381279
return report(options, NULL, OBJ_NONE, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
12391280

1240-
if (obj->type == OBJ_BLOB)
1241-
return fsck_blob(&obj->oid, data, size, options);
1242-
if (obj->type == OBJ_TREE)
1243-
return fsck_tree(&obj->oid, data, size, options);
1244-
if (obj->type == OBJ_COMMIT)
1245-
return fsck_commit(&obj->oid, data, size, options);
1246-
if (obj->type == OBJ_TAG)
1247-
return fsck_tag(&obj->oid, data, size, options);
1281+
return fsck_buffer(&obj->oid, obj->type, data, size, options);
1282+
}
1283+
1284+
int fsck_buffer(const struct object_id *oid, enum object_type type,
1285+
void *data, unsigned long size,
1286+
struct fsck_options *options)
1287+
{
1288+
if (type == OBJ_BLOB)
1289+
return fsck_blob(oid, data, size, options);
1290+
if (type == OBJ_TREE)
1291+
return fsck_tree(oid, data, size, options);
1292+
if (type == OBJ_COMMIT)
1293+
return fsck_commit(oid, data, size, options);
1294+
if (type == OBJ_TAG)
1295+
return fsck_tag(oid, data, size, options);
12481296

1249-
return report(options, &obj->oid, obj->type,
1297+
return report(options, oid, type,
12501298
FSCK_MSG_UNKNOWN_TYPE,
12511299
"unknown type '%d' (internal fsck error)",
1252-
obj->type);
1300+
type);
12531301
}
12541302

12551303
int fsck_error_function(struct fsck_options *o,

fsck.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,14 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
183183
int fsck_object(struct object *obj, void *data, unsigned long size,
184184
struct fsck_options *options);
185185

186+
/*
187+
* Same as fsck_object(), but for when the caller doesn't have an object
188+
* struct.
189+
*/
190+
int fsck_buffer(const struct object_id *oid, enum object_type,
191+
void *data, unsigned long size,
192+
struct fsck_options *options);
193+
186194
/*
187195
* fsck a tag, and pass info about it back to the caller. This is
188196
* exposed fsck_object() internals for git-mktag(1).

object-file.c

Lines changed: 23 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "object-store.h"
3434
#include "promisor-remote.h"
3535
#include "submodule.h"
36+
#include "fsck.h"
3637

3738
/* The maximum size for an object header. */
3839
#define MAX_HEADER_LEN 32
@@ -2284,32 +2285,21 @@ int repo_has_object_file(struct repository *r,
22842285
return repo_has_object_file_with_flags(r, oid, 0);
22852286
}
22862287

2287-
static void check_tree(const void *buf, size_t size)
2288-
{
2289-
struct tree_desc desc;
2290-
struct name_entry entry;
2291-
2292-
init_tree_desc(&desc, buf, size);
2293-
while (tree_entry(&desc, &entry))
2294-
/* do nothing
2295-
* tree_entry() will die() on malformed entries */
2296-
;
2297-
}
2298-
2299-
static void check_commit(const void *buf, size_t size)
2300-
{
2301-
struct commit c;
2302-
memset(&c, 0, sizeof(c));
2303-
if (parse_commit_buffer(the_repository, &c, buf, size, 0))
2304-
die(_("corrupt commit"));
2305-
}
2306-
2307-
static void check_tag(const void *buf, size_t size)
2308-
{
2309-
struct tag t;
2310-
memset(&t, 0, sizeof(t));
2311-
if (parse_tag_buffer(the_repository, &t, buf, size))
2312-
die(_("corrupt tag"));
2288+
/*
2289+
* We can't use the normal fsck_error_function() for index_mem(),
2290+
* because we don't yet have a valid oid for it to report. Instead,
2291+
* report the minimal fsck error here, and rely on the caller to
2292+
* give more context.
2293+
*/
2294+
static int hash_format_check_report(struct fsck_options *opts,
2295+
const struct object_id *oid,
2296+
enum object_type object_type,
2297+
enum fsck_msg_type msg_type,
2298+
enum fsck_msg_id msg_id,
2299+
const char *message)
2300+
{
2301+
error(_("object fails fsck: %s"), message);
2302+
return 1;
23132303
}
23142304

23152305
static int index_mem(struct index_state *istate,
@@ -2336,12 +2326,13 @@ static int index_mem(struct index_state *istate,
23362326
}
23372327
}
23382328
if (flags & HASH_FORMAT_CHECK) {
2339-
if (type == OBJ_TREE)
2340-
check_tree(buf, size);
2341-
if (type == OBJ_COMMIT)
2342-
check_commit(buf, size);
2343-
if (type == OBJ_TAG)
2344-
check_tag(buf, size);
2329+
struct fsck_options opts = FSCK_OPTIONS_DEFAULT;
2330+
2331+
opts.strict = 1;
2332+
opts.error_func = hash_format_check_report;
2333+
if (fsck_buffer(null_oid(), type, buf, size, &opts))
2334+
die(_("refusing to create malformed object"));
2335+
fsck_finish(&opts);
23452336
}
23462337

23472338
if (write_object)

t/t1006-cat-file.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,8 @@ commit_message="Initial commit"
292292
commit_sha1=$(echo_without_newline "$commit_message" | git commit-tree $tree_sha1)
293293
commit_size=$(($(test_oid hexsz) + 137))
294294
commit_content="tree $tree_sha1
295-
author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> 0000000000 +0000
296-
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> 0000000000 +0000
295+
author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> 0 +0000
296+
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> 0 +0000
297297
298298
$commit_message"
299299

@@ -304,7 +304,7 @@ type blob
304304
tag hellotag
305305
tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>"
306306
tag_description="This is a tag"
307-
tag_content="$tag_header_without_timestamp 0000000000 +0000
307+
tag_content="$tag_header_without_timestamp 0 +0000
308308
309309
$tag_description"
310310

t/t1007-hash-object.sh

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -203,23 +203,34 @@ done
203203
test_expect_success 'too-short tree' '
204204
echo abc >malformed-tree &&
205205
test_must_fail git hash-object -t tree malformed-tree 2>err &&
206-
test_i18ngrep "too-short tree object" err
206+
grep "too-short tree object" err
207207
'
208208

209209
test_expect_success 'malformed mode in tree' '
210-
hex_sha1=$(echo foo | git hash-object --stdin -w) &&
211-
bin_sha1=$(echo $hex_sha1 | hex2oct) &&
212-
printf "9100644 \0$bin_sha1" >tree-with-malformed-mode &&
210+
hex_oid=$(echo foo | git hash-object --stdin -w) &&
211+
bin_oid=$(echo $hex_oid | hex2oct) &&
212+
printf "9100644 \0$bin_oid" >tree-with-malformed-mode &&
213213
test_must_fail git hash-object -t tree tree-with-malformed-mode 2>err &&
214-
test_i18ngrep "malformed mode in tree entry" err
214+
grep "malformed mode in tree entry" err
215215
'
216216

217217
test_expect_success 'empty filename in tree' '
218-
hex_sha1=$(echo foo | git hash-object --stdin -w) &&
219-
bin_sha1=$(echo $hex_sha1 | hex2oct) &&
220-
printf "100644 \0$bin_sha1" >tree-with-empty-filename &&
218+
hex_oid=$(echo foo | git hash-object --stdin -w) &&
219+
bin_oid=$(echo $hex_oid | hex2oct) &&
220+
printf "100644 \0$bin_oid" >tree-with-empty-filename &&
221221
test_must_fail git hash-object -t tree tree-with-empty-filename 2>err &&
222-
test_i18ngrep "empty filename in tree entry" err
222+
grep "empty filename in tree entry" err
223+
'
224+
225+
test_expect_success 'duplicate filename in tree' '
226+
hex_oid=$(echo foo | git hash-object --stdin -w) &&
227+
bin_oid=$(echo $hex_oid | hex2oct) &&
228+
{
229+
printf "100644 file\0$bin_oid" &&
230+
printf "100644 file\0$bin_oid"
231+
} >tree-with-duplicate-filename &&
232+
test_must_fail git hash-object -t tree tree-with-duplicate-filename 2>err &&
233+
grep "duplicateEntries" err
223234
'
224235

225236
test_expect_success 'corrupt commit' '

0 commit comments

Comments
 (0)