Skip to content

Commit 3b7d373

Browse files
committed
Merge branch 'kn/cat-file-literally'
Add the "--allow-unknown-type" option to "cat-file" to allow inspecting loose objects of an experimental or a broken type. * kn/cat-file-literally: t1006: add tests for git cat-file --allow-unknown-type cat-file: teach cat-file a '--allow-unknown-type' option cat-file: make the options mutually exclusive sha1_file: support reading from a loose object of unknown type
2 parents 949d167 + 3e370f9 commit 3b7d373

File tree

5 files changed

+188
-43
lines changed

5 files changed

+188
-43
lines changed

Documentation/git-cat-file.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ git-cat-file - Provide content or type and size information for repository objec
99
SYNOPSIS
1010
--------
1111
[verse]
12-
'git cat-file' (-t | -s | -e | -p | <type> | --textconv ) <object>
12+
'git cat-file' (-t [--allow-unknown-type]| -s [--allow-unknown-type]| -e | -p | <type> | --textconv ) <object>
1313
'git cat-file' (--batch | --batch-check) < <list-of-objects>
1414

1515
DESCRIPTION
@@ -69,6 +69,9 @@ OPTIONS
6969
not be combined with any other options or arguments. See the
7070
section `BATCH OUTPUT` below for details.
7171

72+
--allow-unknown-type::
73+
Allow -s or -t to query broken/corrupt objects of unknown type.
74+
7275
OUTPUT
7376
------
7477
If '-t' is specified, one of the <type>.

builtin/cat-file.c

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,34 +9,43 @@
99
#include "userdiff.h"
1010
#include "streaming.h"
1111

12-
static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
12+
static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
13+
int unknown_type)
1314
{
1415
unsigned char sha1[20];
1516
enum object_type type;
1617
char *buf;
1718
unsigned long size;
1819
struct object_context obj_context;
20+
struct object_info oi = {NULL};
21+
struct strbuf sb = STRBUF_INIT;
22+
unsigned flags = LOOKUP_REPLACE_OBJECT;
23+
24+
if (unknown_type)
25+
flags |= LOOKUP_UNKNOWN_OBJECT;
1926

2027
if (get_sha1_with_context(obj_name, 0, sha1, &obj_context))
2128
die("Not a valid object name %s", obj_name);
2229

2330
buf = NULL;
2431
switch (opt) {
2532
case 't':
26-
type = sha1_object_info(sha1, NULL);
27-
if (type > 0) {
28-
printf("%s\n", typename(type));
33+
oi.typename = &sb;
34+
if (sha1_object_info_extended(sha1, &oi, flags) < 0)
35+
die("git cat-file: could not get object info");
36+
if (sb.len) {
37+
printf("%s\n", sb.buf);
38+
strbuf_release(&sb);
2939
return 0;
3040
}
3141
break;
3242

3343
case 's':
34-
type = sha1_object_info(sha1, &size);
35-
if (type > 0) {
36-
printf("%lu\n", size);
37-
return 0;
38-
}
39-
break;
44+
oi.sizep = &size;
45+
if (sha1_object_info_extended(sha1, &oi, flags) < 0)
46+
die("git cat-file: could not get object info");
47+
printf("%lu\n", size);
48+
return 0;
4049

4150
case 'e':
4251
return !has_sha1_file(sha1);
@@ -323,7 +332,7 @@ static int batch_objects(struct batch_options *opt)
323332
}
324333

325334
static const char * const cat_file_usage[] = {
326-
N_("git cat-file (-t | -s | -e | -p | <type> | --textconv) <object>"),
335+
N_("git cat-file (-t [--allow-unknown-type]|-s [--allow-unknown-type]|-e|-p|<type>|--textconv) <object>"),
327336
N_("git cat-file (--batch | --batch-check) < <list-of-objects>"),
328337
NULL
329338
};
@@ -359,16 +368,19 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
359368
int opt = 0;
360369
const char *exp_type = NULL, *obj_name = NULL;
361370
struct batch_options batch = {0};
371+
int unknown_type = 0;
362372

363373
const struct option options[] = {
364374
OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
365-
OPT_SET_INT('t', NULL, &opt, N_("show object type"), 't'),
366-
OPT_SET_INT('s', NULL, &opt, N_("show object size"), 's'),
367-
OPT_SET_INT('e', NULL, &opt,
375+
OPT_CMDMODE('t', NULL, &opt, N_("show object type"), 't'),
376+
OPT_CMDMODE('s', NULL, &opt, N_("show object size"), 's'),
377+
OPT_CMDMODE('e', NULL, &opt,
368378
N_("exit with zero when there's no error"), 'e'),
369-
OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
370-
OPT_SET_INT(0, "textconv", &opt,
379+
OPT_CMDMODE('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
380+
OPT_CMDMODE(0, "textconv", &opt,
371381
N_("for blob objects, run textconv on object's content"), 'c'),
382+
OPT_BOOL( 0, "allow-unknown-type", &unknown_type,
383+
N_("allow -s and -t to work with broken/corrupt objects")),
372384
{ OPTION_CALLBACK, 0, "batch", &batch, "format",
373385
N_("show info and content of objects fed from the standard input"),
374386
PARSE_OPT_OPTARG, batch_option_callback },
@@ -380,9 +392,6 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
380392

381393
git_config(git_cat_file_config, NULL);
382394

383-
if (argc != 3 && argc != 2)
384-
usage_with_options(cat_file_usage, options);
385-
386395
argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
387396

388397
if (opt) {
@@ -405,5 +414,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
405414
if (batch.enabled)
406415
return batch_objects(&batch);
407416

408-
return cat_one_file(opt, exp_type, obj_name);
417+
if (unknown_type && opt != 't' && opt != 's')
418+
die("git cat-file --allow-unknown-type: use with -s or -t");
419+
return cat_one_file(opt, exp_type, obj_name, unknown_type);
409420
}

cache.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,7 @@ extern char *xdg_config_home(const char *filename);
879879

880880
/* object replacement */
881881
#define LOOKUP_REPLACE_OBJECT 1
882+
#define LOOKUP_UNKNOWN_OBJECT 2
882883
extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
883884
static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
884885
{
@@ -1351,6 +1352,7 @@ struct object_info {
13511352
unsigned long *sizep;
13521353
unsigned long *disk_sizep;
13531354
unsigned char *delta_base_sha1;
1355+
struct strbuf *typename;
13541356

13551357
/* Response */
13561358
enum {

sha1_file.c

Lines changed: 106 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,6 +1564,40 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma
15641564
return git_inflate(stream, 0);
15651565
}
15661566

1567+
static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
1568+
unsigned long mapsize, void *buffer,
1569+
unsigned long bufsiz, struct strbuf *header)
1570+
{
1571+
int status;
1572+
1573+
status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
1574+
1575+
/*
1576+
* Check if entire header is unpacked in the first iteration.
1577+
*/
1578+
if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
1579+
return 0;
1580+
1581+
/*
1582+
* buffer[0..bufsiz] was not large enough. Copy the partial
1583+
* result out to header, and then append the result of further
1584+
* reading the stream.
1585+
*/
1586+
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
1587+
stream->next_out = buffer;
1588+
stream->avail_out = bufsiz;
1589+
1590+
do {
1591+
status = git_inflate(stream, 0);
1592+
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
1593+
if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
1594+
return 0;
1595+
stream->next_out = buffer;
1596+
stream->avail_out = bufsiz;
1597+
} while (status != Z_STREAM_END);
1598+
return -1;
1599+
}
1600+
15671601
static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
15681602
{
15691603
int bytes = strlen(buffer) + 1;
@@ -1614,27 +1648,38 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s
16141648
* too permissive for what we want to check. So do an anal
16151649
* object header parse by hand.
16161650
*/
1617-
int parse_sha1_header(const char *hdr, unsigned long *sizep)
1651+
static int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
1652+
unsigned int flags)
16181653
{
1619-
char type[10];
1620-
int i;
1654+
const char *type_buf = hdr;
16211655
unsigned long size;
1656+
int type, type_len = 0;
16221657

16231658
/*
1624-
* The type can be at most ten bytes (including the
1625-
* terminating '\0' that we add), and is followed by
1659+
* The type can be of any size but is followed by
16261660
* a space.
16271661
*/
1628-
i = 0;
16291662
for (;;) {
16301663
char c = *hdr++;
16311664
if (c == ' ')
16321665
break;
1633-
type[i++] = c;
1634-
if (i >= sizeof(type))
1635-
return -1;
1666+
type_len++;
16361667
}
1637-
type[i] = 0;
1668+
1669+
type = type_from_string_gently(type_buf, type_len, 1);
1670+
if (oi->typename)
1671+
strbuf_add(oi->typename, type_buf, type_len);
1672+
/*
1673+
* Set type to 0 if its an unknown object and
1674+
* we're obtaining the type using '--allow-unkown-type'
1675+
* option.
1676+
*/
1677+
if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0))
1678+
type = 0;
1679+
else if (type < 0)
1680+
die("invalid object type");
1681+
if (oi->typep)
1682+
*oi->typep = type;
16381683

16391684
/*
16401685
* The length must follow immediately, and be in canonical
@@ -1652,12 +1697,24 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
16521697
size = size * 10 + c;
16531698
}
16541699
}
1655-
*sizep = size;
1700+
1701+
if (oi->sizep)
1702+
*oi->sizep = size;
16561703

16571704
/*
16581705
* The length must be followed by a zero byte
16591706
*/
1660-
return *hdr ? -1 : type_from_string(type);
1707+
return *hdr ? -1 : type;
1708+
}
1709+
1710+
int parse_sha1_header(const char *hdr, unsigned long *sizep)
1711+
{
1712+
struct object_info oi;
1713+
1714+
oi.sizep = sizep;
1715+
oi.typename = NULL;
1716+
oi.typep = NULL;
1717+
return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
16611718
}
16621719

16631720
static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
@@ -2522,13 +2579,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
25222579
}
25232580

25242581
static int sha1_loose_object_info(const unsigned char *sha1,
2525-
struct object_info *oi)
2582+
struct object_info *oi,
2583+
int flags)
25262584
{
2527-
int status;
2528-
unsigned long mapsize, size;
2585+
int status = 0;
2586+
unsigned long mapsize;
25292587
void *map;
25302588
git_zstream stream;
25312589
char hdr[32];
2590+
struct strbuf hdrbuf = STRBUF_INIT;
25322591

25332592
if (oi->delta_base_sha1)
25342593
hashclr(oi->delta_base_sha1);
@@ -2541,7 +2600,7 @@ static int sha1_loose_object_info(const unsigned char *sha1,
25412600
* return value implicitly indicates whether the
25422601
* object even exists.
25432602
*/
2544-
if (!oi->typep && !oi->sizep) {
2603+
if (!oi->typep && !oi->typename && !oi->sizep) {
25452604
struct stat st;
25462605
if (stat_sha1_file(sha1, &st) < 0)
25472606
return -1;
@@ -2555,17 +2614,26 @@ static int sha1_loose_object_info(const unsigned char *sha1,
25552614
return -1;
25562615
if (oi->disk_sizep)
25572616
*oi->disk_sizep = mapsize;
2558-
if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
2617+
if ((flags & LOOKUP_UNKNOWN_OBJECT)) {
2618+
if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
2619+
status = error("unable to unpack %s header with --allow-unknown-type",
2620+
sha1_to_hex(sha1));
2621+
} else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
25592622
status = error("unable to unpack %s header",
25602623
sha1_to_hex(sha1));
2561-
else if ((status = parse_sha1_header(hdr, &size)) < 0)
2624+
if (status < 0)
2625+
; /* Do nothing */
2626+
else if (hdrbuf.len) {
2627+
if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
2628+
status = error("unable to parse %s header with --allow-unknown-type",
2629+
sha1_to_hex(sha1));
2630+
} else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
25622631
status = error("unable to parse %s header", sha1_to_hex(sha1));
2563-
else if (oi->sizep)
2564-
*oi->sizep = size;
25652632
git_inflate_end(&stream);
25662633
munmap(map, mapsize);
2567-
if (oi->typep)
2634+
if (status && oi->typep)
25682635
*oi->typep = status;
2636+
strbuf_release(&hdrbuf);
25692637
return 0;
25702638
}
25712639

@@ -2574,6 +2642,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
25742642
struct cached_object *co;
25752643
struct pack_entry e;
25762644
int rtype;
2645+
enum object_type real_type;
25772646
const unsigned char *real = lookup_replace_object_extended(sha1, flags);
25782647

25792648
co = find_cached_object(real);
@@ -2586,13 +2655,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
25862655
*(oi->disk_sizep) = 0;
25872656
if (oi->delta_base_sha1)
25882657
hashclr(oi->delta_base_sha1);
2658+
if (oi->typename)
2659+
strbuf_addstr(oi->typename, typename(co->type));
25892660
oi->whence = OI_CACHED;
25902661
return 0;
25912662
}
25922663

25932664
if (!find_pack_entry(real, &e)) {
25942665
/* Most likely it's a loose object. */
2595-
if (!sha1_loose_object_info(real, oi)) {
2666+
if (!sha1_loose_object_info(real, oi, flags)) {
25962667
oi->whence = OI_LOOSE;
25972668
return 0;
25982669
}
@@ -2603,9 +2674,18 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
26032674
return -1;
26042675
}
26052676

2677+
/*
2678+
* packed_object_info() does not follow the delta chain to
2679+
* find out the real type, unless it is given oi->typep.
2680+
*/
2681+
if (oi->typename && !oi->typep)
2682+
oi->typep = &real_type;
2683+
26062684
rtype = packed_object_info(e.p, e.offset, oi);
26072685
if (rtype < 0) {
26082686
mark_bad_packed_object(e.p, real);
2687+
if (oi->typep == &real_type)
2688+
oi->typep = NULL;
26092689
return sha1_object_info_extended(real, oi, 0);
26102690
} else if (in_delta_base_cache(e.p, e.offset)) {
26112691
oi->whence = OI_DBCACHED;
@@ -2616,6 +2696,10 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
26162696
oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
26172697
rtype == OBJ_OFS_DELTA);
26182698
}
2699+
if (oi->typename)
2700+
strbuf_addstr(oi->typename, typename(*oi->typep));
2701+
if (oi->typep == &real_type)
2702+
oi->typep = NULL;
26192703

26202704
return 0;
26212705
}

0 commit comments

Comments
 (0)