Skip to content

Commit c39e5cb

Browse files
committed
Merge branch 'jk/zlib-inflate-fixes'
Fix our use of zlib corner cases. * jk/zlib-inflate-fixes: unpack_loose_rest(): rewrite return handling for clarity unpack_loose_rest(): simplify error handling unpack_loose_rest(): never clean up zstream unpack_loose_rest(): avoid numeric comparison of zlib status unpack_loose_header(): avoid numeric comparison of zlib status git_inflate(): skip zlib_post_call() sanity check on Z_NEED_DICT unpack_loose_header(): fix infinite loop on broken zlib input unpack_loose_header(): report headers without NUL as "bad" unpack_loose_header(): simplify next_out assignment loose_object_info(): BUG() on inflating content with unknown type
2 parents 139d703 + 1cb2f29 commit c39e5cb

File tree

3 files changed

+92
-36
lines changed

3 files changed

+92
-36
lines changed

git-zlib.c

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ static void zlib_pre_call(git_zstream *s)
4545
s->z.avail_out = zlib_buf_cap(s->avail_out);
4646
}
4747

48-
static void zlib_post_call(git_zstream *s)
48+
static void zlib_post_call(git_zstream *s, int status)
4949
{
5050
unsigned long bytes_consumed;
5151
unsigned long bytes_produced;
@@ -54,7 +54,12 @@ static void zlib_post_call(git_zstream *s)
5454
bytes_produced = s->z.next_out - s->next_out;
5555
if (s->z.total_out != s->total_out + bytes_produced)
5656
BUG("total_out mismatch");
57-
if (s->z.total_in != s->total_in + bytes_consumed)
57+
/*
58+
* zlib does not update total_in when it returns Z_NEED_DICT,
59+
* causing a mismatch here. Skip the sanity check in that case.
60+
*/
61+
if (status != Z_NEED_DICT &&
62+
s->z.total_in != s->total_in + bytes_consumed)
5863
BUG("total_in mismatch");
5964

6065
s->total_out = s->z.total_out;
@@ -72,7 +77,7 @@ void git_inflate_init(git_zstream *strm)
7277

7378
zlib_pre_call(strm);
7479
status = inflateInit(&strm->z);
75-
zlib_post_call(strm);
80+
zlib_post_call(strm, status);
7681
if (status == Z_OK)
7782
return;
7883
die("inflateInit: %s (%s)", zerr_to_string(status),
@@ -90,7 +95,7 @@ void git_inflate_init_gzip_only(git_zstream *strm)
9095

9196
zlib_pre_call(strm);
9297
status = inflateInit2(&strm->z, windowBits);
93-
zlib_post_call(strm);
98+
zlib_post_call(strm, status);
9499
if (status == Z_OK)
95100
return;
96101
die("inflateInit2: %s (%s)", zerr_to_string(status),
@@ -103,7 +108,7 @@ void git_inflate_end(git_zstream *strm)
103108

104109
zlib_pre_call(strm);
105110
status = inflateEnd(&strm->z);
106-
zlib_post_call(strm);
111+
zlib_post_call(strm, status);
107112
if (status == Z_OK)
108113
return;
109114
error("inflateEnd: %s (%s)", zerr_to_string(status),
@@ -122,7 +127,7 @@ int git_inflate(git_zstream *strm, int flush)
122127
? 0 : flush);
123128
if (status == Z_MEM_ERROR)
124129
die("inflate: out of memory");
125-
zlib_post_call(strm);
130+
zlib_post_call(strm, status);
126131

127132
/*
128133
* Let zlib work another round, while we can still
@@ -160,7 +165,7 @@ void git_deflate_init(git_zstream *strm, int level)
160165
memset(strm, 0, sizeof(*strm));
161166
zlib_pre_call(strm);
162167
status = deflateInit(&strm->z, level);
163-
zlib_post_call(strm);
168+
zlib_post_call(strm, status);
164169
if (status == Z_OK)
165170
return;
166171
die("deflateInit: %s (%s)", zerr_to_string(status),
@@ -176,7 +181,7 @@ static void do_git_deflate_init(git_zstream *strm, int level, int windowBits)
176181
status = deflateInit2(&strm->z, level,
177182
Z_DEFLATED, windowBits,
178183
8, Z_DEFAULT_STRATEGY);
179-
zlib_post_call(strm);
184+
zlib_post_call(strm, status);
180185
if (status == Z_OK)
181186
return;
182187
die("deflateInit2: %s (%s)", zerr_to_string(status),
@@ -207,7 +212,7 @@ int git_deflate_abort(git_zstream *strm)
207212

208213
zlib_pre_call(strm);
209214
status = deflateEnd(&strm->z);
210-
zlib_post_call(strm);
215+
zlib_post_call(strm, status);
211216
return status;
212217
}
213218

@@ -227,7 +232,7 @@ int git_deflate_end_gently(git_zstream *strm)
227232

228233
zlib_pre_call(strm);
229234
status = deflateEnd(&strm->z);
230-
zlib_post_call(strm);
235+
zlib_post_call(strm, status);
231236
return status;
232237
}
233238

@@ -244,7 +249,7 @@ int git_deflate(git_zstream *strm, int flush)
244249
? 0 : flush);
245250
if (status == Z_MEM_ERROR)
246251
die("deflate: out of memory");
247-
zlib_post_call(strm);
252+
zlib_post_call(strm, status);
248253

249254
/*
250255
* Let zlib work another round, while we can still

object-file.c

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,7 +1362,7 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
13621362
obj_read_unlock();
13631363
status = git_inflate(stream, 0);
13641364
obj_read_lock();
1365-
if (status < Z_OK)
1365+
if (status != Z_OK && status != Z_STREAM_END)
13661366
return ULHR_BAD;
13671367

13681368
/*
@@ -1385,20 +1385,19 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
13851385
* reading the stream.
13861386
*/
13871387
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
1388-
stream->next_out = buffer;
1389-
stream->avail_out = bufsiz;
13901388

13911389
do {
1390+
stream->next_out = buffer;
1391+
stream->avail_out = bufsiz;
1392+
13921393
obj_read_unlock();
13931394
status = git_inflate(stream, 0);
13941395
obj_read_lock();
13951396
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
13961397
if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
13971398
return 0;
1398-
stream->next_out = buffer;
1399-
stream->avail_out = bufsiz;
1400-
} while (status != Z_STREAM_END);
1401-
return ULHR_TOO_LONG;
1399+
} while (status == Z_OK);
1400+
return ULHR_BAD;
14021401
}
14031402

14041403
static void *unpack_loose_rest(git_zstream *stream,
@@ -1437,18 +1436,17 @@ static void *unpack_loose_rest(git_zstream *stream,
14371436
obj_read_lock();
14381437
}
14391438
}
1440-
if (status == Z_STREAM_END && !stream->avail_in) {
1441-
git_inflate_end(stream);
1442-
return buf;
1443-
}
14441439

1445-
if (status < 0)
1440+
if (status != Z_STREAM_END) {
14461441
error(_("corrupt loose object '%s'"), oid_to_hex(oid));
1447-
else if (stream->avail_in)
1442+
FREE_AND_NULL(buf);
1443+
} else if (stream->avail_in) {
14481444
error(_("garbage at end of loose object '%s'"),
14491445
oid_to_hex(oid));
1450-
free(buf);
1451-
return NULL;
1446+
FREE_AND_NULL(buf);
1447+
}
1448+
1449+
return buf;
14521450
}
14531451

14541452
/*
@@ -1580,6 +1578,8 @@ static int loose_object_info(struct repository *r,
15801578

15811579
if (!oi->contentp)
15821580
break;
1581+
if (hdrbuf.len)
1582+
BUG("unpacking content with unknown types not yet supported");
15831583
*oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
15841584
if (*oi->contentp)
15851585
goto cleanup;
@@ -1600,8 +1600,8 @@ static int loose_object_info(struct repository *r,
16001600
die(_("loose object %s (stored in %s) is corrupt"),
16011601
oid_to_hex(oid), path);
16021602

1603-
git_inflate_end(&stream);
16041603
cleanup:
1604+
git_inflate_end(&stream);
16051605
munmap(map, mapsize);
16061606
if (oi->sizep == &size_scratch)
16071607
oi->sizep = NULL;
@@ -3080,7 +3080,6 @@ static int check_stream_oid(git_zstream *stream,
30803080
git_hash_update(&c, buf, stream->next_out - buf);
30813081
total_read += stream->next_out - buf;
30823082
}
3083-
git_inflate_end(stream);
30843083

30853084
if (status != Z_STREAM_END) {
30863085
error(_("corrupt loose object '%s'"), oid_to_hex(expected_oid));
@@ -3127,35 +3126,34 @@ int read_loose_object(const char *path,
31273126
if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr),
31283127
NULL) != ULHR_OK) {
31293128
error(_("unable to unpack header of %s"), path);
3130-
git_inflate_end(&stream);
3131-
goto out;
3129+
goto out_inflate;
31323130
}
31333131

31343132
if (parse_loose_header(hdr, oi) < 0) {
31353133
error(_("unable to parse header of %s"), path);
3136-
git_inflate_end(&stream);
3137-
goto out;
3134+
goto out_inflate;
31383135
}
31393136

31403137
if (*oi->typep == OBJ_BLOB && *size > big_file_threshold) {
31413138
if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0)
3142-
goto out;
3139+
goto out_inflate;
31433140
} else {
31443141
*contents = unpack_loose_rest(&stream, hdr, *size, expected_oid);
31453142
if (!*contents) {
31463143
error(_("unable to unpack contents of %s"), path);
3147-
git_inflate_end(&stream);
3148-
goto out;
3144+
goto out_inflate;
31493145
}
31503146
hash_object_file_literally(the_repository->hash_algo,
31513147
*contents, *size,
31523148
oi->type_name->buf, real_oid);
31533149
if (!oideq(expected_oid, real_oid))
3154-
goto out;
3150+
goto out_inflate;
31553151
}
31563152

31573153
ret = 0; /* everything checks out */
31583154

3155+
out_inflate:
3156+
git_inflate_end(&stream);
31593157
out:
31603158
if (map)
31613159
munmap(map, mapsize);

t/t1006-cat-file.sh

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -903,6 +903,59 @@ test_expect_success 'cat-file -t and -s on corrupt loose object' '
903903
)
904904
'
905905

906+
test_expect_success 'truncated object with --allow-unknown-type' - <<\EOT
907+
objtype='a really long type name that exceeds the 32-byte limit' &&
908+
blob=$(git hash-object -w --literally -t "$objtype" /dev/null) &&
909+
objpath=.git/objects/$(test_oid_to_path "$blob") &&
910+
911+
# We want to truncate the object far enough in that we don't hit the
912+
# end while inflating the first 32 bytes (since we want to have to dig
913+
# for the trailing NUL of the header). But we don't want to go too far,
914+
# since our header isn't very big. And of course we are counting
915+
# deflated zlib bytes in the on-disk file, so it's a bit of a guess.
916+
# Empirically 50 seems to work.
917+
mv "$objpath" obj.bak &&
918+
test_when_finished 'mv obj.bak "$objpath"' &&
919+
test_copy_bytes 50 <obj.bak >"$objpath" &&
920+
921+
test_must_fail git cat-file --allow-unknown-type -t $blob 2>err &&
922+
test_grep "unable to unpack $blob header" err
923+
EOT
924+
925+
test_expect_success 'object reading handles zlib dictionary' - <<\EOT
926+
echo 'content that will be recompressed' >file &&
927+
blob=$(git hash-object -w file) &&
928+
objpath=.git/objects/$(test_oid_to_path "$blob") &&
929+
930+
# Recompress a loose object using a precomputed zlib dictionary.
931+
# This was originally done with:
932+
#
933+
# perl -MCompress::Raw::Zlib -e '
934+
# binmode STDIN;
935+
# binmode STDOUT;
936+
# my $data = do { local $/; <STDIN> };
937+
# my $in = new Compress::Raw::Zlib::Inflate;
938+
# my $de = new Compress::Raw::Zlib::Deflate(
939+
# -Dictionary => "anything"
940+
# );
941+
# $in->inflate($data, $raw);
942+
# $de->deflate($raw, $out);
943+
# print $out;
944+
# ' <obj.bak >$objpath
945+
#
946+
# but we do not want to require the perl module for all test runs (nor
947+
# carry a custom t/helper program that uses zlib features we don't
948+
# otherwise care about).
949+
mv "$objpath" obj.bak &&
950+
test_when_finished 'mv obj.bak "$objpath"' &&
951+
printf '\170\273\017\112\003\143' >$objpath &&
952+
953+
test_must_fail git cat-file blob $blob 2>err &&
954+
test_grep ! 'too long' err &&
955+
test_grep 'error: unable to unpack' err &&
956+
test_grep 'error: inflate: needs dictionary' err
957+
EOT
958+
906959
# Tests for git cat-file --follow-symlinks
907960
test_expect_success 'prep for symlink tests' '
908961
echo_without_newline "$hello_content" >morx &&

0 commit comments

Comments
 (0)