Skip to content

Commit 356df9b

Browse files
committed
Merge branch 'jk/cat-file-batch-optim'
If somebody wants to only know on-disk footprint of an object without having to know its type or payload size, we can bypass a lot of code to cheaply learn it. * jk/cat-file-batch-optim: Fix some sparse warnings sha1_object_info_extended: pass object_info to helpers sha1_object_info_extended: make type calculation optional packed_object_info: make type lookup optional packed_object_info: hoist delta type resolution to helper sha1_loose_object_info: make type lookup optional sha1_object_info_extended: rename "status" to "type" cat-file: disable object/refname ambiguity check for batch mode
2 parents 2bf3501 + d099b71 commit 356df9b

File tree

6 files changed

+145
-71
lines changed

6 files changed

+145
-71
lines changed

builtin/cat-file.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,9 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len,
150150
if (!data->mark_query)
151151
strbuf_addstr(sb, sha1_to_hex(data->sha1));
152152
} else if (is_atom("objecttype", atom, len)) {
153-
if (!data->mark_query)
153+
if (data->mark_query)
154+
data->info.typep = &data->type;
155+
else
154156
strbuf_addstr(sb, typename(data->type));
155157
} else if (is_atom("objectsize", atom, len)) {
156158
if (data->mark_query)
@@ -229,8 +231,7 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt,
229231
return 0;
230232
}
231233

232-
data->type = sha1_object_info_extended(data->sha1, &data->info);
233-
if (data->type <= 0) {
234+
if (sha1_object_info_extended(data->sha1, &data->info) < 0) {
234235
printf("%s missing\n", obj_name);
235236
fflush(stdout);
236237
return 0;
@@ -266,6 +267,15 @@ static int batch_objects(struct batch_options *opt)
266267
strbuf_expand(&buf, opt->format, expand_format, &data);
267268
data.mark_query = 0;
268269

270+
/*
271+
* We are going to call get_sha1 on a potentially very large number of
272+
* objects. In most large cases, these will be actual object sha1s. The
273+
* cost to double-check that each one is not also a ref (just so we can
274+
* warn) ends up dwarfing the actual cost of the object lookups
275+
* themselves. We can work around it by just turning off the warning.
276+
*/
277+
warn_on_object_refname_ambiguity = 0;
278+
269279
while (strbuf_getline(&buf, stdin, '\n') != EOF) {
270280
char *p;
271281
int error;

cache.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,7 @@ extern int assume_unchanged;
577577
extern int prefer_symlink_refs;
578578
extern int log_all_ref_updates;
579579
extern int warn_ambiguous_refs;
580+
extern int warn_on_object_refname_ambiguity;
580581
extern int shared_repository;
581582
extern const char *apply_default_whitespace;
582583
extern const char *apply_default_ignorewhitespace;
@@ -1131,6 +1132,7 @@ extern int unpack_object_header(struct packed_git *, struct pack_window **, off_
11311132

11321133
struct object_info {
11331134
/* Request */
1135+
enum object_type *typep;
11341136
unsigned long *sizep;
11351137
unsigned long *disk_sizep;
11361138

environment.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ int prefer_symlink_refs;
2222
int is_bare_repository_cfg = -1; /* unspecified */
2323
int log_all_ref_updates = -1; /* unspecified */
2424
int warn_ambiguous_refs = 1;
25+
int warn_on_object_refname_ambiguity = 1;
2526
int repository_format_version;
2627
const char *git_commit_encoding;
2728
const char *git_log_output_encoding;

sha1_file.c

Lines changed: 119 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,6 +1306,26 @@ static int git_open_noatime(const char *name)
13061306
}
13071307
}
13081308

1309+
static int stat_sha1_file(const unsigned char *sha1, struct stat *st)
1310+
{
1311+
char *name = sha1_file_name(sha1);
1312+
struct alternate_object_database *alt;
1313+
1314+
if (!lstat(name, st))
1315+
return 0;
1316+
1317+
prepare_alt_odb();
1318+
errno = ENOENT;
1319+
for (alt = alt_odb_list; alt; alt = alt->next) {
1320+
name = alt->name;
1321+
fill_sha1_path(name, sha1);
1322+
if (!lstat(alt->base, st))
1323+
return 0;
1324+
}
1325+
1326+
return -1;
1327+
}
1328+
13091329
static int open_sha1_file(const unsigned char *sha1)
13101330
{
13111331
int fd;
@@ -1693,52 +1713,21 @@ static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
16931713
return type;
16941714
}
16951715

1696-
16971716
#define POI_STACK_PREALLOC 64
16981717

1699-
static int packed_object_info(struct packed_git *p, off_t obj_offset,
1700-
unsigned long *sizep, int *rtype,
1701-
unsigned long *disk_sizep)
1718+
static enum object_type packed_to_object_type(struct packed_git *p,
1719+
off_t obj_offset,
1720+
enum object_type type,
1721+
struct pack_window **w_curs,
1722+
off_t curpos)
17021723
{
1703-
struct pack_window *w_curs = NULL;
1704-
unsigned long size;
1705-
off_t curpos = obj_offset;
1706-
enum object_type type;
17071724
off_t small_poi_stack[POI_STACK_PREALLOC];
17081725
off_t *poi_stack = small_poi_stack;
17091726
int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
17101727

1711-
type = unpack_object_header(p, &w_curs, &curpos, &size);
1712-
1713-
if (rtype)
1714-
*rtype = type; /* representation type */
1715-
1716-
if (sizep) {
1717-
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1718-
off_t tmp_pos = curpos;
1719-
off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1720-
type, obj_offset);
1721-
if (!base_offset) {
1722-
type = OBJ_BAD;
1723-
goto out;
1724-
}
1725-
*sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1726-
if (*sizep == 0) {
1727-
type = OBJ_BAD;
1728-
goto out;
1729-
}
1730-
} else {
1731-
*sizep = size;
1732-
}
1733-
}
1734-
1735-
if (disk_sizep) {
1736-
struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1737-
*disk_sizep = revidx[1].offset - obj_offset;
1738-
}
1739-
17401728
while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
17411729
off_t base_offset;
1730+
unsigned long size;
17421731
/* Push the object we're going to leave behind */
17431732
if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
17441733
poi_stack_alloc = alloc_nr(poi_stack_nr);
@@ -1749,11 +1738,11 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
17491738
}
17501739
poi_stack[poi_stack_nr++] = obj_offset;
17511740
/* If parsing the base offset fails, just unwind */
1752-
base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1741+
base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
17531742
if (!base_offset)
17541743
goto unwind;
17551744
curpos = obj_offset = base_offset;
1756-
type = unpack_object_header(p, &w_curs, &curpos, &size);
1745+
type = unpack_object_header(p, w_curs, &curpos, &size);
17571746
if (type <= OBJ_NONE) {
17581747
/* If getting the base itself fails, we first
17591748
* retry the base, otherwise unwind */
@@ -1780,7 +1769,6 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
17801769
out:
17811770
if (poi_stack != small_poi_stack)
17821771
free(poi_stack);
1783-
unuse_pack(&w_curs);
17841772
return type;
17851773

17861774
unwind:
@@ -1794,6 +1782,57 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
17941782
goto out;
17951783
}
17961784

1785+
static int packed_object_info(struct packed_git *p, off_t obj_offset,
1786+
struct object_info *oi)
1787+
{
1788+
struct pack_window *w_curs = NULL;
1789+
unsigned long size;
1790+
off_t curpos = obj_offset;
1791+
enum object_type type;
1792+
1793+
/*
1794+
* We always get the representation type, but only convert it to
1795+
* a "real" type later if the caller is interested.
1796+
*/
1797+
type = unpack_object_header(p, &w_curs, &curpos, &size);
1798+
1799+
if (oi->sizep) {
1800+
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1801+
off_t tmp_pos = curpos;
1802+
off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1803+
type, obj_offset);
1804+
if (!base_offset) {
1805+
type = OBJ_BAD;
1806+
goto out;
1807+
}
1808+
*oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1809+
if (*oi->sizep == 0) {
1810+
type = OBJ_BAD;
1811+
goto out;
1812+
}
1813+
} else {
1814+
*oi->sizep = size;
1815+
}
1816+
}
1817+
1818+
if (oi->disk_sizep) {
1819+
struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1820+
*oi->disk_sizep = revidx[1].offset - obj_offset;
1821+
}
1822+
1823+
if (oi->typep) {
1824+
*oi->typep = packed_to_object_type(p, obj_offset, type, &w_curs, curpos);
1825+
if (*oi->typep < 0) {
1826+
type = OBJ_BAD;
1827+
goto out;
1828+
}
1829+
}
1830+
1831+
out:
1832+
unuse_pack(&w_curs);
1833+
return type;
1834+
}
1835+
17971836
static void *unpack_compressed_entry(struct packed_git *p,
17981837
struct pack_window **w_curs,
17991838
off_t curpos,
@@ -2363,68 +2402,84 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
23632402

23642403
}
23652404

2366-
static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep,
2367-
unsigned long *disk_sizep)
2405+
static int sha1_loose_object_info(const unsigned char *sha1,
2406+
struct object_info *oi)
23682407
{
23692408
int status;
23702409
unsigned long mapsize, size;
23712410
void *map;
23722411
git_zstream stream;
23732412
char hdr[32];
23742413

2414+
/*
2415+
* If we don't care about type or size, then we don't
2416+
* need to look inside the object at all.
2417+
*/
2418+
if (!oi->typep && !oi->sizep) {
2419+
if (oi->disk_sizep) {
2420+
struct stat st;
2421+
if (stat_sha1_file(sha1, &st) < 0)
2422+
return -1;
2423+
*oi->disk_sizep = st.st_size;
2424+
}
2425+
return 0;
2426+
}
2427+
23752428
map = map_sha1_file(sha1, &mapsize);
23762429
if (!map)
23772430
return -1;
2378-
if (disk_sizep)
2379-
*disk_sizep = mapsize;
2431+
if (oi->disk_sizep)
2432+
*oi->disk_sizep = mapsize;
23802433
if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
23812434
status = error("unable to unpack %s header",
23822435
sha1_to_hex(sha1));
23832436
else if ((status = parse_sha1_header(hdr, &size)) < 0)
23842437
status = error("unable to parse %s header", sha1_to_hex(sha1));
2385-
else if (sizep)
2386-
*sizep = size;
2438+
else if (oi->sizep)
2439+
*oi->sizep = size;
23872440
git_inflate_end(&stream);
23882441
munmap(map, mapsize);
2389-
return status;
2442+
if (oi->typep)
2443+
*oi->typep = status;
2444+
return 0;
23902445
}
23912446

23922447
/* returns enum object_type or negative */
23932448
int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
23942449
{
23952450
struct cached_object *co;
23962451
struct pack_entry e;
2397-
int status, rtype;
2452+
int rtype;
23982453

23992454
co = find_cached_object(sha1);
24002455
if (co) {
2456+
if (oi->typep)
2457+
*(oi->typep) = co->type;
24012458
if (oi->sizep)
24022459
*(oi->sizep) = co->size;
24032460
if (oi->disk_sizep)
24042461
*(oi->disk_sizep) = 0;
24052462
oi->whence = OI_CACHED;
2406-
return co->type;
2463+
return 0;
24072464
}
24082465

24092466
if (!find_pack_entry(sha1, &e)) {
24102467
/* Most likely it's a loose object. */
2411-
status = sha1_loose_object_info(sha1, oi->sizep, oi->disk_sizep);
2412-
if (status >= 0) {
2468+
if (!sha1_loose_object_info(sha1, oi)) {
24132469
oi->whence = OI_LOOSE;
2414-
return status;
2470+
return 0;
24152471
}
24162472

24172473
/* Not a loose object; someone else may have just packed it. */
24182474
reprepare_packed_git();
24192475
if (!find_pack_entry(sha1, &e))
2420-
return status;
2476+
return -1;
24212477
}
24222478

2423-
status = packed_object_info(e.p, e.offset, oi->sizep, &rtype,
2424-
oi->disk_sizep);
2425-
if (status < 0) {
2479+
rtype = packed_object_info(e.p, e.offset, oi);
2480+
if (rtype < 0) {
24262481
mark_bad_packed_object(e.p, sha1);
2427-
status = sha1_object_info_extended(sha1, oi);
2482+
return sha1_object_info_extended(sha1, oi);
24282483
} else if (in_delta_base_cache(e.p, e.offset)) {
24292484
oi->whence = OI_DBCACHED;
24302485
} else {
@@ -2435,15 +2490,19 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
24352490
rtype == OBJ_OFS_DELTA);
24362491
}
24372492

2438-
return status;
2493+
return 0;
24392494
}
24402495

24412496
int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
24422497
{
2443-
struct object_info oi = {0};
2498+
enum object_type type;
2499+
struct object_info oi = {NULL};
24442500

2501+
oi.typep = &type;
24452502
oi.sizep = sizep;
2446-
return sha1_object_info_extended(sha1, &oi);
2503+
if (sha1_object_info_extended(sha1, &oi) < 0)
2504+
return -1;
2505+
return type;
24472506
}
24482507

24492508
static void *read_packed_sha1(const unsigned char *sha1,

sha1_name.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -452,13 +452,15 @@ static int get_sha1_basic(const char *str, int len, unsigned char *sha1)
452452
int at, reflog_len, nth_prior = 0;
453453

454454
if (len == 40 && !get_sha1_hex(str, sha1)) {
455-
refs_found = dwim_ref(str, len, tmp_sha1, &real_ref);
456-
if (refs_found > 0 && warn_ambiguous_refs) {
457-
warning(warn_msg, len, str);
458-
if (advice_object_name_warning)
459-
fprintf(stderr, "%s\n", _(object_name_msg));
455+
if (warn_on_object_refname_ambiguity) {
456+
refs_found = dwim_ref(str, len, tmp_sha1, &real_ref);
457+
if (refs_found > 0 && warn_ambiguous_refs) {
458+
warning(warn_msg, len, str);
459+
if (advice_object_name_warning)
460+
fprintf(stderr, "%s\n", _(object_name_msg));
461+
}
462+
free(real_ref);
460463
}
461-
free(real_ref);
462464
return 0;
463465
}
464466

streaming.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,11 @@ static enum input_source istream_source(const unsigned char *sha1,
111111
unsigned long size;
112112
int status;
113113

114+
oi->typep = type;
114115
oi->sizep = &size;
115116
status = sha1_object_info_extended(sha1, oi);
116117
if (status < 0)
117118
return stream_error;
118-
*type = status;
119119

120120
switch (oi->whence) {
121121
case OI_LOOSE:
@@ -135,7 +135,7 @@ struct git_istream *open_istream(const unsigned char *sha1,
135135
struct stream_filter *filter)
136136
{
137137
struct git_istream *st;
138-
struct object_info oi = {0};
138+
struct object_info oi = {NULL};
139139
const unsigned char *real = lookup_replace_object(sha1);
140140
enum input_source src = istream_source(real, type, &oi);
141141

0 commit comments

Comments
 (0)