@@ -748,6 +748,23 @@ static int fsck_tree(const struct object_id *tree_oid,
748
748
return retval ;
749
749
}
750
750
751
+ /*
752
+ * Confirm that the headers of a commit or tag object end in a reasonable way,
753
+ * either with the usual "\n\n" separator, or at least with a trailing newline
754
+ * on the final header line.
755
+ *
756
+ * This property is important for the memory safety of our callers. It allows
757
+ * them to scan the buffer linewise without constantly checking the remaining
758
+ * size as long as:
759
+ *
760
+ * - they check that there are bytes left in the buffer at the start of any
761
+ * line (i.e., that the last newline they saw was not the final one we
762
+ * found here)
763
+ *
764
+ * - any intra-line scanning they do will stop at a newline, which will worst
765
+ * case hit the newline we found here as the end-of-header. This makes it
766
+ * OK for them to use helpers like parse_oid_hex(), or even skip_prefix().
767
+ */
751
768
static int verify_headers (const void * data , unsigned long size ,
752
769
const struct object_id * oid , enum object_type type ,
753
770
struct fsck_options * options )
@@ -808,6 +825,20 @@ static int fsck_ident(const char **ident,
808
825
if (* p != ' ' )
809
826
return report (options , oid , type , FSCK_MSG_MISSING_SPACE_BEFORE_DATE , "invalid author/committer line - missing space before date" );
810
827
p ++ ;
828
+ /*
829
+ * Our timestamp parser is based on the C strto*() functions, which
830
+ * will happily eat whitespace, including the newline that is supposed
831
+ * to prevent us walking past the end of the buffer. So do our own
832
+ * scan, skipping linear whitespace but not newlines, and then
833
+ * confirming we found a digit. We _could_ be even more strict here,
834
+ * as we really expect only a single space, but since we have
835
+ * traditionally allowed extra whitespace, we'll continue to do so.
836
+ */
837
+ while (* p == ' ' || * p == '\t' )
838
+ p ++ ;
839
+ if (!isdigit (* p ))
840
+ return report (options , oid , type , FSCK_MSG_BAD_DATE ,
841
+ "invalid author/committer line - bad date" );
811
842
if (* p == '0' && p [1 ] != ' ' )
812
843
return report (options , oid , type , FSCK_MSG_ZERO_PADDED_DATE , "invalid author/committer line - zero-padded date" );
813
844
if (date_overflows (parse_timestamp (p , & end , 10 )))
@@ -834,20 +865,26 @@ static int fsck_commit(const struct object_id *oid,
834
865
unsigned author_count ;
835
866
int err ;
836
867
const char * buffer_begin = buffer ;
868
+ const char * buffer_end = buffer + size ;
837
869
const char * p ;
838
870
871
+ /*
872
+ * We _must_ stop parsing immediately if this reports failure, as the
873
+ * memory safety of the rest of the function depends on it. See the
874
+ * comment above the definition of verify_headers() for more details.
875
+ */
839
876
if (verify_headers (buffer , size , oid , OBJ_COMMIT , options ))
840
877
return -1 ;
841
878
842
- if (!skip_prefix (buffer , "tree " , & buffer ))
879
+ if (buffer >= buffer_end || !skip_prefix (buffer , "tree " , & buffer ))
843
880
return report (options , oid , OBJ_COMMIT , FSCK_MSG_MISSING_TREE , "invalid format - expected 'tree' line" );
844
881
if (parse_oid_hex (buffer , & tree_oid , & p ) || * p != '\n' ) {
845
882
err = report (options , oid , OBJ_COMMIT , FSCK_MSG_BAD_TREE_SHA1 , "invalid 'tree' line format - bad sha1" );
846
883
if (err )
847
884
return err ;
848
885
}
849
886
buffer = p + 1 ;
850
- while (skip_prefix (buffer , "parent " , & buffer )) {
887
+ while (buffer < buffer_end && skip_prefix (buffer , "parent " , & buffer )) {
851
888
if (parse_oid_hex (buffer , & parent_oid , & p ) || * p != '\n' ) {
852
889
err = report (options , oid , OBJ_COMMIT , FSCK_MSG_BAD_PARENT_SHA1 , "invalid 'parent' line format - bad sha1" );
853
890
if (err )
@@ -856,7 +893,7 @@ static int fsck_commit(const struct object_id *oid,
856
893
buffer = p + 1 ;
857
894
}
858
895
author_count = 0 ;
859
- while (skip_prefix (buffer , "author " , & buffer )) {
896
+ while (buffer < buffer_end && skip_prefix (buffer , "author " , & buffer )) {
860
897
author_count ++ ;
861
898
err = fsck_ident (& buffer , oid , OBJ_COMMIT , options );
862
899
if (err )
@@ -868,7 +905,7 @@ static int fsck_commit(const struct object_id *oid,
868
905
err = report (options , oid , OBJ_COMMIT , FSCK_MSG_MULTIPLE_AUTHORS , "invalid format - multiple 'author' lines" );
869
906
if (err )
870
907
return err ;
871
- if (!skip_prefix (buffer , "committer " , & buffer ))
908
+ if (buffer >= buffer_end || !skip_prefix (buffer , "committer " , & buffer ))
872
909
return report (options , oid , OBJ_COMMIT , FSCK_MSG_MISSING_COMMITTER , "invalid format - expected 'committer' line" );
873
910
err = fsck_ident (& buffer , oid , OBJ_COMMIT , options );
874
911
if (err )
@@ -899,13 +936,19 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
899
936
int ret = 0 ;
900
937
char * eol ;
901
938
struct strbuf sb = STRBUF_INIT ;
939
+ const char * buffer_end = buffer + size ;
902
940
const char * p ;
903
941
942
+ /*
943
+ * We _must_ stop parsing immediately if this reports failure, as the
944
+ * memory safety of the rest of the function depends on it. See the
945
+ * comment above the definition of verify_headers() for more details.
946
+ */
904
947
ret = verify_headers (buffer , size , oid , OBJ_TAG , options );
905
948
if (ret )
906
949
goto done ;
907
950
908
- if (!skip_prefix (buffer , "object " , & buffer )) {
951
+ if (buffer >= buffer_end || !skip_prefix (buffer , "object " , & buffer )) {
909
952
ret = report (options , oid , OBJ_TAG , FSCK_MSG_MISSING_OBJECT , "invalid format - expected 'object' line" );
910
953
goto done ;
911
954
}
@@ -916,11 +959,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
916
959
}
917
960
buffer = p + 1 ;
918
961
919
- if (!skip_prefix (buffer , "type " , & buffer )) {
962
+ if (buffer >= buffer_end || !skip_prefix (buffer , "type " , & buffer )) {
920
963
ret = report (options , oid , OBJ_TAG , FSCK_MSG_MISSING_TYPE_ENTRY , "invalid format - expected 'type' line" );
921
964
goto done ;
922
965
}
923
- eol = strchr (buffer , '\n' );
966
+ eol = memchr (buffer , '\n' , buffer_end - buffer );
924
967
if (!eol ) {
925
968
ret = report (options , oid , OBJ_TAG , FSCK_MSG_MISSING_TYPE , "invalid format - unexpected end after 'type' line" );
926
969
goto done ;
@@ -932,11 +975,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
932
975
goto done ;
933
976
buffer = eol + 1 ;
934
977
935
- if (!skip_prefix (buffer , "tag " , & buffer )) {
978
+ if (buffer >= buffer_end || !skip_prefix (buffer , "tag " , & buffer )) {
936
979
ret = report (options , oid , OBJ_TAG , FSCK_MSG_MISSING_TAG_ENTRY , "invalid format - expected 'tag' line" );
937
980
goto done ;
938
981
}
939
- eol = strchr (buffer , '\n' );
982
+ eol = memchr (buffer , '\n' , buffer_end - buffer );
940
983
if (!eol ) {
941
984
ret = report (options , oid , OBJ_TAG , FSCK_MSG_MISSING_TAG , "invalid format - unexpected end after 'type' line" );
942
985
goto done ;
@@ -952,18 +995,16 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
952
995
}
953
996
buffer = eol + 1 ;
954
997
955
- if (!skip_prefix (buffer , "tagger " , & buffer )) {
998
+ if (buffer >= buffer_end || !skip_prefix (buffer , "tagger " , & buffer )) {
956
999
/* early tags do not contain 'tagger' lines; warn only */
957
1000
ret = report (options , oid , OBJ_TAG , FSCK_MSG_MISSING_TAGGER_ENTRY , "invalid format - expected 'tagger' line" );
958
1001
if (ret )
959
1002
goto done ;
960
1003
}
961
1004
else
962
1005
ret = fsck_ident (& buffer , oid , OBJ_TAG , options );
963
- if (!* buffer )
964
- goto done ;
965
1006
966
- if (!starts_with (buffer , "\n" )) {
1007
+ if (buffer < buffer_end && !starts_with (buffer , "\n" )) {
967
1008
/*
968
1009
* The verify_headers() check will allow
969
1010
* e.g. "[...]tagger <tagger>\nsome
@@ -1237,19 +1278,26 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
1237
1278
if (!obj )
1238
1279
return report (options , NULL , OBJ_NONE , FSCK_MSG_BAD_OBJECT_SHA1 , "no valid object to fsck" );
1239
1280
1240
- if (obj -> type == OBJ_BLOB )
1241
- return fsck_blob (& obj -> oid , data , size , options );
1242
- if (obj -> type == OBJ_TREE )
1243
- return fsck_tree (& obj -> oid , data , size , options );
1244
- if (obj -> type == OBJ_COMMIT )
1245
- return fsck_commit (& obj -> oid , data , size , options );
1246
- if (obj -> type == OBJ_TAG )
1247
- return fsck_tag (& obj -> oid , data , size , options );
1281
+ return fsck_buffer (& obj -> oid , obj -> type , data , size , options );
1282
+ }
1283
+
1284
+ int fsck_buffer (const struct object_id * oid , enum object_type type ,
1285
+ void * data , unsigned long size ,
1286
+ struct fsck_options * options )
1287
+ {
1288
+ if (type == OBJ_BLOB )
1289
+ return fsck_blob (oid , data , size , options );
1290
+ if (type == OBJ_TREE )
1291
+ return fsck_tree (oid , data , size , options );
1292
+ if (type == OBJ_COMMIT )
1293
+ return fsck_commit (oid , data , size , options );
1294
+ if (type == OBJ_TAG )
1295
+ return fsck_tag (oid , data , size , options );
1248
1296
1249
- return report (options , & obj -> oid , obj -> type ,
1297
+ return report (options , oid , type ,
1250
1298
FSCK_MSG_UNKNOWN_TYPE ,
1251
1299
"unknown type '%d' (internal fsck error)" ,
1252
- obj -> type );
1300
+ type );
1253
1301
}
1254
1302
1255
1303
int fsck_error_function (struct fsck_options * o ,
0 commit comments