Skip to content

Commit 790d96c

Browse files
trastgitster
authored andcommitted
sha1_file: remove recursion in packed_object_info
packed_object_info() and packed_delta_info() were mutually recursive. The former would handle ordinary types and defer deltas to the latter; the latter would use the former to resolve the delta base. This arrangement, however, leads to trouble with threaded index-pack and long delta chains on platforms where thread stacks are small, as happened on OS X (512kB thread stacks by default) with the chromium repo. The task of the two functions is not all that hard to describe without any recursion, however. It proceeds in three steps: - determine the representation type and size, based on the outermost object (delta or not) - follow through the delta chain, if any - determine the object type from what is found at the end of the delta chain The only complication stems from the error recovery. If parsing fails at any step, we want to mark that object (within the pack) as bad and try getting the corresponding SHA1 from elsewhere. If that also fails, we want to repeat this process back up the delta chain until we find a reasonable solution or conclude that there is no way to reconstruct the object. (This is conveniently checked by t5303.) To achieve that within the pack, we keep track of the entire delta chain in a stack. When things go sour, we process that stack from the top, marking entries as bad and attempting to re-resolve by sha1. To avoid excessive malloc(), the stack starts out with a small stack-allocated array. The choice of 64 is based on the default of pack.depth, which is 50, in the hope that it covers "most" delta chains without any need for malloc(). It's much harder to make the actual re-resolving by sha1 nonrecursive, so we skip that. If you can't afford *that* recursion, your corruption problems are more serious than your stack size problems. Reported-by: Stefan Zager <[email protected]> Signed-off-by: Thomas Rast <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 239222f commit 790d96c

File tree

1 file changed

+84
-51
lines changed

1 file changed

+84
-51
lines changed

sha1_file.c

Lines changed: 84 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,50 +1560,6 @@ static off_t get_delta_base(struct packed_git *p,
15601560
return base_offset;
15611561
}
15621562

1563-
/* forward declaration for a mutually recursive function */
1564-
static int packed_object_info(struct packed_git *p, off_t offset,
1565-
unsigned long *sizep, int *rtype);
1566-
1567-
static int packed_delta_info(struct packed_git *p,
1568-
struct pack_window **w_curs,
1569-
off_t curpos,
1570-
enum object_type type,
1571-
off_t obj_offset,
1572-
unsigned long *sizep)
1573-
{
1574-
off_t base_offset;
1575-
1576-
base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1577-
if (!base_offset)
1578-
return OBJ_BAD;
1579-
type = packed_object_info(p, base_offset, NULL, NULL);
1580-
if (type <= OBJ_NONE) {
1581-
struct revindex_entry *revidx;
1582-
const unsigned char *base_sha1;
1583-
revidx = find_pack_revindex(p, base_offset);
1584-
if (!revidx)
1585-
return OBJ_BAD;
1586-
base_sha1 = nth_packed_object_sha1(p, revidx->nr);
1587-
mark_bad_packed_object(p, base_sha1);
1588-
type = sha1_object_info(base_sha1, NULL);
1589-
if (type <= OBJ_NONE)
1590-
return OBJ_BAD;
1591-
}
1592-
1593-
/* We choose to only get the type of the base object and
1594-
* ignore potentially corrupt pack file that expects the delta
1595-
* based on a base with a wrong size. This saves tons of
1596-
* inflate() calls.
1597-
*/
1598-
if (sizep) {
1599-
*sizep = get_size_from_delta(p, w_curs, curpos);
1600-
if (*sizep == 0)
1601-
type = OBJ_BAD;
1602-
}
1603-
1604-
return type;
1605-
}
1606-
16071563
int unpack_object_header(struct packed_git *p,
16081564
struct pack_window **w_curs,
16091565
off_t *curpos,
@@ -1630,38 +1586,115 @@ int unpack_object_header(struct packed_git *p,
16301586
return type;
16311587
}
16321588

1589+
static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
1590+
{
1591+
int type;
1592+
struct revindex_entry *revidx;
1593+
const unsigned char *sha1;
1594+
revidx = find_pack_revindex(p, obj_offset);
1595+
if (!revidx)
1596+
return OBJ_BAD;
1597+
sha1 = nth_packed_object_sha1(p, revidx->nr);
1598+
mark_bad_packed_object(p, sha1);
1599+
type = sha1_object_info(sha1, NULL);
1600+
if (type <= OBJ_NONE)
1601+
return OBJ_BAD;
1602+
return type;
1603+
}
1604+
1605+
1606+
#define POI_STACK_PREALLOC 64
1607+
16331608
static int packed_object_info(struct packed_git *p, off_t obj_offset,
16341609
unsigned long *sizep, int *rtype)
16351610
{
16361611
struct pack_window *w_curs = NULL;
16371612
unsigned long size;
16381613
off_t curpos = obj_offset;
16391614
enum object_type type;
1615+
off_t small_poi_stack[POI_STACK_PREALLOC];
1616+
off_t *poi_stack = small_poi_stack;
1617+
int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
16401618

16411619
type = unpack_object_header(p, &w_curs, &curpos, &size);
1620+
16421621
if (rtype)
16431622
*rtype = type; /* representation type */
16441623

1624+
if (sizep) {
1625+
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1626+
off_t tmp_pos = curpos;
1627+
off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1628+
type, obj_offset);
1629+
if (!base_offset) {
1630+
type = OBJ_BAD;
1631+
goto out;
1632+
}
1633+
*sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1634+
if (*sizep == 0) {
1635+
type = OBJ_BAD;
1636+
goto out;
1637+
}
1638+
} else {
1639+
*sizep = size;
1640+
}
1641+
}
1642+
1643+
while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1644+
off_t base_offset;
1645+
/* Push the object we're going to leave behind */
1646+
if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
1647+
poi_stack_alloc = alloc_nr(poi_stack_nr);
1648+
poi_stack = xmalloc(sizeof(off_t)*poi_stack_alloc);
1649+
memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
1650+
} else {
1651+
ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
1652+
}
1653+
poi_stack[poi_stack_nr++] = obj_offset;
1654+
/* If parsing the base offset fails, just unwind */
1655+
base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1656+
if (!base_offset)
1657+
goto unwind;
1658+
curpos = obj_offset = base_offset;
1659+
type = unpack_object_header(p, &w_curs, &curpos, &size);
1660+
if (type <= OBJ_NONE) {
1661+
/* If getting the base itself fails, we first
1662+
* retry the base, otherwise unwind */
1663+
type = retry_bad_packed_offset(p, base_offset);
1664+
if (type > OBJ_NONE)
1665+
goto out;
1666+
goto unwind;
1667+
}
1668+
}
1669+
16451670
switch (type) {
1646-
case OBJ_OFS_DELTA:
1647-
case OBJ_REF_DELTA:
1648-
type = packed_delta_info(p, &w_curs, curpos,
1649-
type, obj_offset, sizep);
1650-
break;
1671+
case OBJ_BAD:
16511672
case OBJ_COMMIT:
16521673
case OBJ_TREE:
16531674
case OBJ_BLOB:
16541675
case OBJ_TAG:
1655-
if (sizep)
1656-
*sizep = size;
16571676
break;
16581677
default:
16591678
error("unknown object type %i at offset %"PRIuMAX" in %s",
16601679
type, (uintmax_t)obj_offset, p->pack_name);
16611680
type = OBJ_BAD;
16621681
}
1682+
1683+
out:
1684+
if (poi_stack != small_poi_stack)
1685+
free(poi_stack);
16631686
unuse_pack(&w_curs);
16641687
return type;
1688+
1689+
unwind:
1690+
while (poi_stack_nr) {
1691+
obj_offset = poi_stack[--poi_stack_nr];
1692+
type = retry_bad_packed_offset(p, obj_offset);
1693+
if (type > OBJ_NONE)
1694+
goto out;
1695+
}
1696+
type = OBJ_BAD;
1697+
goto out;
16651698
}
16661699

16671700
static void *unpack_compressed_entry(struct packed_git *p,

0 commit comments

Comments
 (0)