Skip to content

Commit 898eba5

Browse files
pcloudsgitster
authored andcommitted
pack-objects: refer to delta objects by index instead of pointer
These delta pointers always point to elements in the objects[] array in packing_data struct. We can only hold maximum 4G of those objects because the array size in nr_objects is uint32_t. We could use uint32_t indexes to address these elements instead of pointers. On 64-bit architecture (8 bytes per pointer) this would save 4 bytes per pointer. Convert these delta pointers to indexes. Since we need to handle NULL pointers as well, the index is shifted by one [1]. [1] This means we can only index 2^32-2 objects even though nr_objects could contain 2^32-1 objects. It should not be a problem in practice because when we grow objects[], nr_alloc would probably blow up long before nr_objects hits the wall. Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 43fa44f commit 898eba5

File tree

2 files changed

+125
-59
lines changed

2 files changed

+125
-59
lines changed

builtin/pack-objects.c

Lines changed: 63 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@
3232
#include "object-store.h"
3333

3434
#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
35+
#define DELTA(obj) oe_delta(&to_pack, obj)
36+
#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
37+
#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
38+
#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
39+
#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
40+
#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
3541

3642
static const char *pack_usage[] = {
3743
N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
@@ -129,10 +135,11 @@ static void *get_delta(struct object_entry *entry)
129135
buf = read_object_file(&entry->idx.oid, &type, &size);
130136
if (!buf)
131137
die("unable to read %s", oid_to_hex(&entry->idx.oid));
132-
base_buf = read_object_file(&entry->delta->idx.oid, &type, &base_size);
138+
base_buf = read_object_file(&DELTA(entry)->idx.oid, &type,
139+
&base_size);
133140
if (!base_buf)
134141
die("unable to read %s",
135-
oid_to_hex(&entry->delta->idx.oid));
142+
oid_to_hex(&DELTA(entry)->idx.oid));
136143
delta_buf = diff_delta(base_buf, base_size,
137144
buf, size, &delta_size, 0);
138145
if (!delta_buf || delta_size != entry->delta_size)
@@ -288,12 +295,12 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
288295
size = entry->delta_size;
289296
buf = entry->delta_data;
290297
entry->delta_data = NULL;
291-
type = (allow_ofs_delta && entry->delta->idx.offset) ?
298+
type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
292299
OBJ_OFS_DELTA : OBJ_REF_DELTA;
293300
} else {
294301
buf = get_delta(entry);
295302
size = entry->delta_size;
296-
type = (allow_ofs_delta && entry->delta->idx.offset) ?
303+
type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
297304
OBJ_OFS_DELTA : OBJ_REF_DELTA;
298305
}
299306

@@ -317,7 +324,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
317324
* encoding of the relative offset for the delta
318325
* base from this object's position in the pack.
319326
*/
320-
off_t ofs = entry->idx.offset - entry->delta->idx.offset;
327+
off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
321328
unsigned pos = sizeof(dheader) - 1;
322329
dheader[pos] = ofs & 127;
323330
while (ofs >>= 7)
@@ -343,7 +350,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
343350
return 0;
344351
}
345352
hashwrite(f, header, hdrlen);
346-
hashwrite(f, entry->delta->idx.oid.hash, 20);
353+
hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
347354
hdrlen += 20;
348355
} else {
349356
if (limit && hdrlen + datalen + 20 >= limit) {
@@ -379,8 +386,8 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
379386
dheader[MAX_PACK_OBJECT_HEADER];
380387
unsigned hdrlen;
381388

382-
if (entry->delta)
383-
type = (allow_ofs_delta && entry->delta->idx.offset) ?
389+
if (DELTA(entry))
390+
type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
384391
OBJ_OFS_DELTA : OBJ_REF_DELTA;
385392
hdrlen = encode_in_pack_object_header(header, sizeof(header),
386393
type, entry->size);
@@ -408,7 +415,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
408415
}
409416

410417
if (type == OBJ_OFS_DELTA) {
411-
off_t ofs = entry->idx.offset - entry->delta->idx.offset;
418+
off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
412419
unsigned pos = sizeof(dheader) - 1;
413420
dheader[pos] = ofs & 127;
414421
while (ofs >>= 7)
@@ -427,7 +434,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
427434
return 0;
428435
}
429436
hashwrite(f, header, hdrlen);
430-
hashwrite(f, entry->delta->idx.oid.hash, 20);
437+
hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
431438
hdrlen += 20;
432439
reused_delta++;
433440
} else {
@@ -467,13 +474,13 @@ static off_t write_object(struct hashfile *f,
467474
else
468475
limit = pack_size_limit - write_offset;
469476

470-
if (!entry->delta)
477+
if (!DELTA(entry))
471478
usable_delta = 0; /* no delta */
472479
else if (!pack_size_limit)
473480
usable_delta = 1; /* unlimited packfile */
474-
else if (entry->delta->idx.offset == (off_t)-1)
481+
else if (DELTA(entry)->idx.offset == (off_t)-1)
475482
usable_delta = 0; /* base was written to another pack */
476-
else if (entry->delta->idx.offset)
483+
else if (DELTA(entry)->idx.offset)
477484
usable_delta = 1; /* base already exists in this pack */
478485
else
479486
usable_delta = 0; /* base could end up in another pack */
@@ -489,7 +496,7 @@ static off_t write_object(struct hashfile *f,
489496
/* ... but pack split may override that */
490497
else if (oe_type(entry) != entry->in_pack_type)
491498
to_reuse = 0; /* pack has delta which is unusable */
492-
else if (entry->delta)
499+
else if (DELTA(entry))
493500
to_reuse = 0; /* we want to pack afresh */
494501
else
495502
to_reuse = 1; /* we have it in-pack undeltified,
@@ -541,12 +548,12 @@ static enum write_one_status write_one(struct hashfile *f,
541548
}
542549

543550
/* if we are deltified, write out base object first. */
544-
if (e->delta) {
551+
if (DELTA(e)) {
545552
e->idx.offset = 1; /* now recurse */
546-
switch (write_one(f, e->delta, offset)) {
553+
switch (write_one(f, DELTA(e), offset)) {
547554
case WRITE_ONE_RECURSIVE:
548555
/* we cannot depend on this one */
549-
e->delta = NULL;
556+
SET_DELTA(e, NULL);
550557
break;
551558
default:
552559
break;
@@ -608,34 +615,34 @@ static void add_descendants_to_write_order(struct object_entry **wo,
608615
/* add this node... */
609616
add_to_write_order(wo, endp, e);
610617
/* all its siblings... */
611-
for (s = e->delta_sibling; s; s = s->delta_sibling) {
618+
for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
612619
add_to_write_order(wo, endp, s);
613620
}
614621
}
615622
/* drop down a level to add left subtree nodes if possible */
616-
if (e->delta_child) {
623+
if (DELTA_CHILD(e)) {
617624
add_to_order = 1;
618-
e = e->delta_child;
625+
e = DELTA_CHILD(e);
619626
} else {
620627
add_to_order = 0;
621628
/* our sibling might have some children, it is next */
622-
if (e->delta_sibling) {
623-
e = e->delta_sibling;
629+
if (DELTA_SIBLING(e)) {
630+
e = DELTA_SIBLING(e);
624631
continue;
625632
}
626633
/* go back to our parent node */
627-
e = e->delta;
628-
while (e && !e->delta_sibling) {
634+
e = DELTA(e);
635+
while (e && !DELTA_SIBLING(e)) {
629636
/* we're on the right side of a subtree, keep
630637
* going up until we can go right again */
631-
e = e->delta;
638+
e = DELTA(e);
632639
}
633640
if (!e) {
634641
/* done- we hit our original root node */
635642
return;
636643
}
637644
/* pass it off to sibling at this level */
638-
e = e->delta_sibling;
645+
e = DELTA_SIBLING(e);
639646
}
640647
};
641648
}
@@ -646,7 +653,7 @@ static void add_family_to_write_order(struct object_entry **wo,
646653
{
647654
struct object_entry *root;
648655

649-
for (root = e; root->delta; root = root->delta)
656+
for (root = e; DELTA(root); root = DELTA(root))
650657
; /* nothing */
651658
add_descendants_to_write_order(wo, endp, root);
652659
}
@@ -661,8 +668,8 @@ static struct object_entry **compute_write_order(void)
661668
for (i = 0; i < to_pack.nr_objects; i++) {
662669
objects[i].tagged = 0;
663670
objects[i].filled = 0;
664-
objects[i].delta_child = NULL;
665-
objects[i].delta_sibling = NULL;
671+
SET_DELTA_CHILD(&objects[i], NULL);
672+
SET_DELTA_SIBLING(&objects[i], NULL);
666673
}
667674

668675
/*
@@ -672,11 +679,11 @@ static struct object_entry **compute_write_order(void)
672679
*/
673680
for (i = to_pack.nr_objects; i > 0;) {
674681
struct object_entry *e = &objects[--i];
675-
if (!e->delta)
682+
if (!DELTA(e))
676683
continue;
677684
/* Mark me as the first child */
678-
e->delta_sibling = e->delta->delta_child;
679-
e->delta->delta_child = e;
685+
e->delta_sibling_idx = DELTA(e)->delta_child_idx;
686+
SET_DELTA_CHILD(DELTA(e), e);
680687
}
681688

682689
/*
@@ -1493,10 +1500,10 @@ static void check_object(struct object_entry *entry)
14931500
* circular deltas.
14941501
*/
14951502
oe_set_type(entry, entry->in_pack_type);
1496-
entry->delta = base_entry;
1503+
SET_DELTA(entry, base_entry);
14971504
entry->delta_size = entry->size;
1498-
entry->delta_sibling = base_entry->delta_child;
1499-
base_entry->delta_child = entry;
1505+
entry->delta_sibling_idx = base_entry->delta_child_idx;
1506+
SET_DELTA_CHILD(base_entry, entry);
15001507
unuse_pack(&w_curs);
15011508
return;
15021509
}
@@ -1567,17 +1574,19 @@ static int pack_offset_sort(const void *_a, const void *_b)
15671574
*/
15681575
static void drop_reused_delta(struct object_entry *entry)
15691576
{
1570-
struct object_entry **p = &entry->delta->delta_child;
1577+
unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
15711578
struct object_info oi = OBJECT_INFO_INIT;
15721579
enum object_type type;
15731580

1574-
while (*p) {
1575-
if (*p == entry)
1576-
*p = (*p)->delta_sibling;
1581+
while (*idx) {
1582+
struct object_entry *oe = &to_pack.objects[*idx - 1];
1583+
1584+
if (oe == entry)
1585+
*idx = oe->delta_sibling_idx;
15771586
else
1578-
p = &(*p)->delta_sibling;
1587+
idx = &oe->delta_sibling_idx;
15791588
}
1580-
entry->delta = NULL;
1589+
SET_DELTA(entry, NULL);
15811590
entry->depth = 0;
15821591

15831592
oi.sizep = &entry->size;
@@ -1617,7 +1626,7 @@ static void break_delta_chains(struct object_entry *entry)
16171626

16181627
for (cur = entry, total_depth = 0;
16191628
cur;
1620-
cur = cur->delta, total_depth++) {
1629+
cur = DELTA(cur), total_depth++) {
16211630
if (cur->dfs_state == DFS_DONE) {
16221631
/*
16231632
* We've already seen this object and know it isn't
@@ -1642,7 +1651,7 @@ static void break_delta_chains(struct object_entry *entry)
16421651
* it's not a delta, we're done traversing, but we'll mark it
16431652
* done to save time on future traversals.
16441653
*/
1645-
if (!cur->delta) {
1654+
if (!DELTA(cur)) {
16461655
cur->dfs_state = DFS_DONE;
16471656
break;
16481657
}
@@ -1665,7 +1674,7 @@ static void break_delta_chains(struct object_entry *entry)
16651674
* We keep all commits in the chain that we examined.
16661675
*/
16671676
cur->dfs_state = DFS_ACTIVE;
1668-
if (cur->delta->dfs_state == DFS_ACTIVE) {
1677+
if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
16691678
drop_reused_delta(cur);
16701679
cur->dfs_state = DFS_DONE;
16711680
break;
@@ -1680,7 +1689,7 @@ static void break_delta_chains(struct object_entry *entry)
16801689
* an extra "next" pointer to keep going after we reset cur->delta.
16811690
*/
16821691
for (cur = entry; cur; cur = next) {
1683-
next = cur->delta;
1692+
next = DELTA(cur);
16841693

16851694
/*
16861695
* We should have a chain of zero or more ACTIVE states down to
@@ -1865,7 +1874,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
18651874

18661875
/* Now some size filtering heuristics. */
18671876
trg_size = trg_entry->size;
1868-
if (!trg_entry->delta) {
1877+
if (!DELTA(trg_entry)) {
18691878
max_size = trg_size/2 - 20;
18701879
ref_depth = 1;
18711880
} else {
@@ -1939,7 +1948,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
19391948
if (!delta_buf)
19401949
return 0;
19411950

1942-
if (trg_entry->delta) {
1951+
if (DELTA(trg_entry)) {
19431952
/* Prefer only shallower same-sized deltas. */
19441953
if (delta_size == trg_entry->delta_size &&
19451954
src->depth + 1 >= trg->depth) {
@@ -1968,7 +1977,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
19681977
free(delta_buf);
19691978
}
19701979

1971-
trg_entry->delta = src_entry;
1980+
SET_DELTA(trg_entry, src_entry);
19721981
trg_entry->delta_size = delta_size;
19731982
trg->depth = src->depth + 1;
19741983

@@ -1977,13 +1986,13 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
19771986

19781987
static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
19791988
{
1980-
struct object_entry *child = me->delta_child;
1989+
struct object_entry *child = DELTA_CHILD(me);
19811990
unsigned int m = n;
19821991
while (child) {
19831992
unsigned int c = check_delta_limit(child, n + 1);
19841993
if (m < c)
19851994
m = c;
1986-
child = child->delta_sibling;
1995+
child = DELTA_SIBLING(child);
19871996
}
19881997
return m;
19891998
}
@@ -2052,7 +2061,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
20522061
* otherwise they would become too deep.
20532062
*/
20542063
max_depth = depth;
2055-
if (entry->delta_child) {
2064+
if (DELTA_CHILD(entry)) {
20562065
max_depth -= check_delta_limit(entry, 0);
20572066
if (max_depth <= 0)
20582067
goto next;
@@ -2102,15 +2111,15 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
21022111
* depth, leaving it in the window is pointless. we
21032112
* should evict it first.
21042113
*/
2105-
if (entry->delta && max_depth <= n->depth)
2114+
if (DELTA(entry) && max_depth <= n->depth)
21062115
continue;
21072116

21082117
/*
21092118
* Move the best delta base up in the window, after the
21102119
* currently deltified object, to keep it longer. It will
21112120
* be the first base object to be attempted next.
21122121
*/
2113-
if (entry->delta) {
2122+
if (DELTA(entry)) {
21142123
struct unpacked swap = array[best_base];
21152124
int dist = (window + idx - best_base) % window;
21162125
int dst = best_base;
@@ -2431,7 +2440,7 @@ static void prepare_pack(int window, int depth)
24312440
for (i = 0; i < to_pack.nr_objects; i++) {
24322441
struct object_entry *entry = to_pack.objects + i;
24332442

2434-
if (entry->delta)
2443+
if (DELTA(entry))
24352444
/* This happens if we decided to reuse existing
24362445
* delta from a pack. "reuse_delta &&" is implied.
24372446
*/

0 commit comments

Comments
 (0)