Skip to content

Commit 43fa44f

Browse files
pcloudsgitster
authored andcommitted
pack-objects: move in_pack out of struct object_entry
Instead of using 8 bytes (on 64 bit arch) to store a pointer to a pack. Use an index instead since the number of packs should be relatively small. This limits the number of packs we can handle to 1k. Since we can't be sure people can never run into the situation where they have more than 1k pack files. Provide a fall back route for it. If we find out they have too many packs, the new in_pack_by_idx[] array (which has at most 1k elements) will not be used. Instead we allocate in_pack[] array that holds nr_objects elements. This is similar to how the optional in_pack_pos field is handled. The new simple test is just to make sure the too-many-packs code path is at least executed. The true test is running make test GIT_TEST_FULL_IN_PACK_ARRAY=1 to take advantage of other special case tests. Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 06af3bb commit 43fa44f

File tree

6 files changed

+130
-12
lines changed

6 files changed

+130
-12
lines changed

builtin/pack-objects.c

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include "packfile.h"
3232
#include "object-store.h"
3333

34+
#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
35+
3436
static const char *pack_usage[] = {
3537
N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
3638
N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"),
@@ -367,7 +369,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
367369
static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
368370
unsigned long limit, int usable_delta)
369371
{
370-
struct packed_git *p = entry->in_pack;
372+
struct packed_git *p = IN_PACK(entry);
371373
struct pack_window *w_curs = NULL;
372374
struct revindex_entry *revidx;
373375
off_t offset;
@@ -478,7 +480,7 @@ static off_t write_object(struct hashfile *f,
478480

479481
if (!reuse_object)
480482
to_reuse = 0; /* explicit */
481-
else if (!entry->in_pack)
483+
else if (!IN_PACK(entry))
482484
to_reuse = 0; /* can't reuse what we don't have */
483485
else if (oe_type(entry) == OBJ_REF_DELTA ||
484486
oe_type(entry) == OBJ_OFS_DELTA)
@@ -1074,7 +1076,7 @@ static void create_object_entry(const struct object_id *oid,
10741076
else
10751077
nr_result++;
10761078
if (found_pack) {
1077-
entry->in_pack = found_pack;
1079+
oe_set_in_pack(&to_pack, entry, found_pack);
10781080
entry->in_pack_offset = found_offset;
10791081
}
10801082

@@ -1399,8 +1401,8 @@ static void cleanup_preferred_base(void)
13991401

14001402
static void check_object(struct object_entry *entry)
14011403
{
1402-
if (entry->in_pack) {
1403-
struct packed_git *p = entry->in_pack;
1404+
if (IN_PACK(entry)) {
1405+
struct packed_git *p = IN_PACK(entry);
14041406
struct pack_window *w_curs = NULL;
14051407
const unsigned char *base_ref = NULL;
14061408
struct object_entry *base_entry;
@@ -1535,14 +1537,16 @@ static int pack_offset_sort(const void *_a, const void *_b)
15351537
{
15361538
const struct object_entry *a = *(struct object_entry **)_a;
15371539
const struct object_entry *b = *(struct object_entry **)_b;
1540+
const struct packed_git *a_in_pack = IN_PACK(a);
1541+
const struct packed_git *b_in_pack = IN_PACK(b);
15381542

15391543
/* avoid filesystem trashing with loose objects */
1540-
if (!a->in_pack && !b->in_pack)
1544+
if (!a_in_pack && !b_in_pack)
15411545
return oidcmp(&a->idx.oid, &b->idx.oid);
15421546

1543-
if (a->in_pack < b->in_pack)
1547+
if (a_in_pack < b_in_pack)
15441548
return -1;
1545-
if (a->in_pack > b->in_pack)
1549+
if (a_in_pack > b_in_pack)
15461550
return 1;
15471551
return a->in_pack_offset < b->in_pack_offset ? -1 :
15481552
(a->in_pack_offset > b->in_pack_offset);
@@ -1578,7 +1582,7 @@ static void drop_reused_delta(struct object_entry *entry)
15781582

15791583
oi.sizep = &entry->size;
15801584
oi.typep = &type;
1581-
if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) {
1585+
if (packed_object_info(IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
15821586
/*
15831587
* We failed to get the info from this pack for some reason;
15841588
* fall back to sha1_object_info, which may find another copy.
@@ -1848,8 +1852,8 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
18481852
* it, we will still save the transfer cost, as we already know
18491853
* the other side has it and we won't send src_entry at all.
18501854
*/
1851-
if (reuse_delta && trg_entry->in_pack &&
1852-
trg_entry->in_pack == src_entry->in_pack &&
1855+
if (reuse_delta && IN_PACK(trg_entry) &&
1856+
IN_PACK(trg_entry) == IN_PACK(src_entry) &&
18531857
!src_entry->preferred_base &&
18541858
trg_entry->in_pack_type != OBJ_REF_DELTA &&
18551859
trg_entry->in_pack_type != OBJ_OFS_DELTA)
@@ -3192,6 +3196,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
31923196
}
31933197
}
31943198

3199+
prepare_packing_data(&to_pack);
3200+
31953201
if (progress)
31963202
progress_state = start_progress(_("Counting objects"), 0);
31973203
if (!use_internal_rev_list)

object-store.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ struct packed_git {
6969
int index_version;
7070
time_t mtime;
7171
int pack_fd;
72+
int index; /* for builtin/pack-objects.c */
7273
unsigned pack_local:1,
7374
pack_keep:1,
7475
freshened:1,

pack-objects.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#include "object.h"
33
#include "pack.h"
44
#include "pack-objects.h"
5+
#include "packfile.h"
6+
#include "config.h"
57

68
static uint32_t locate_object_entry_hash(struct packing_data *pdata,
79
const unsigned char *sha1,
@@ -86,6 +88,63 @@ struct object_entry *packlist_find(struct packing_data *pdata,
8688
return &pdata->objects[pdata->index[i] - 1];
8789
}
8890

91+
static void prepare_in_pack_by_idx(struct packing_data *pdata)
92+
{
93+
struct packed_git **mapping, *p;
94+
int cnt = 0, nr = 1U << OE_IN_PACK_BITS;
95+
96+
ALLOC_ARRAY(mapping, nr);
97+
/*
98+
* oe_in_pack() on an all-zero'd object_entry
99+
* (i.e. in_pack_idx also zero) should return NULL.
100+
*/
101+
mapping[cnt++] = NULL;
102+
for (p = get_packed_git(the_repository); p; p = p->next, cnt++) {
103+
if (cnt == nr) {
104+
free(mapping);
105+
return;
106+
}
107+
p->index = cnt;
108+
mapping[cnt] = p;
109+
}
110+
pdata->in_pack_by_idx = mapping;
111+
}
112+
113+
/*
114+
* A new pack appears after prepare_in_pack_by_idx() has been
115+
* run. This is likely a race.
116+
*
117+
* We could map this new pack to in_pack_by_idx[] array, but then we
118+
* have to deal with full array anyway. And since it's hard to test
119+
* this fall back code, just stay simple and fall back to using
120+
* in_pack[] array.
121+
*/
122+
void oe_map_new_pack(struct packing_data *pack,
123+
struct packed_git *p)
124+
{
125+
uint32_t i;
126+
127+
REALLOC_ARRAY(pack->in_pack, pack->nr_alloc);
128+
129+
for (i = 0; i < pack->nr_objects; i++)
130+
pack->in_pack[i] = oe_in_pack(pack, pack->objects + i);
131+
132+
FREE_AND_NULL(pack->in_pack_by_idx);
133+
}
134+
135+
/* assume pdata is already zero'd by caller */
136+
void prepare_packing_data(struct packing_data *pdata)
137+
{
138+
if (git_env_bool("GIT_TEST_FULL_IN_PACK_ARRAY", 0)) {
139+
/*
140+
* do not initialize in_pack_by_idx[] to force the
141+
* slow path in oe_in_pack()
142+
*/
143+
} else {
144+
prepare_in_pack_by_idx(pdata);
145+
}
146+
}
147+
89148
struct object_entry *packlist_alloc(struct packing_data *pdata,
90149
const unsigned char *sha1,
91150
uint32_t index_pos)
@@ -95,6 +154,9 @@ struct object_entry *packlist_alloc(struct packing_data *pdata,
95154
if (pdata->nr_objects >= pdata->nr_alloc) {
96155
pdata->nr_alloc = (pdata->nr_alloc + 1024) * 3 / 2;
97156
REALLOC_ARRAY(pdata->objects, pdata->nr_alloc);
157+
158+
if (!pdata->in_pack_by_idx)
159+
REALLOC_ARRAY(pdata->in_pack, pdata->nr_alloc);
98160
}
99161

100162
new_entry = pdata->objects + pdata->nr_objects++;
@@ -107,5 +169,8 @@ struct object_entry *packlist_alloc(struct packing_data *pdata,
107169
else
108170
pdata->index[index_pos] = pdata->nr_objects;
109171

172+
if (pdata->in_pack)
173+
pdata->in_pack[pdata->nr_objects - 1] = NULL;
174+
110175
return new_entry;
111176
}

pack-objects.h

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
#ifndef PACK_OBJECTS_H
22
#define PACK_OBJECTS_H
33

4+
#include "object-store.h"
5+
46
#define OE_DFS_STATE_BITS 2
57
#define OE_DEPTH_BITS 12
8+
#define OE_IN_PACK_BITS 10
69

710
/*
811
* State flags for depth-first search used for analyzing delta cycles.
@@ -65,7 +68,7 @@ enum dfs_state {
6568
struct object_entry {
6669
struct pack_idx_entry idx;
6770
unsigned long size; /* uncompressed size */
68-
struct packed_git *in_pack; /* already in pack */
71+
unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */
6972
off_t in_pack_offset;
7073
struct object_entry *delta; /* delta base object */
7174
struct object_entry *delta_child; /* deltified objects who bases me */
@@ -100,8 +103,18 @@ struct packing_data {
100103
uint32_t index_size;
101104

102105
unsigned int *in_pack_pos;
106+
107+
/*
108+
* Only one of these can be non-NULL and they have different
109+
* sizes. if in_pack_by_idx is allocated, oe_in_pack() returns
110+
* the pack of an object using in_pack_idx field. If not,
111+
* in_pack[] array is used the same way as in_pack_pos[]
112+
*/
113+
struct packed_git **in_pack_by_idx;
114+
struct packed_git **in_pack;
103115
};
104116

117+
void prepare_packing_data(struct packing_data *pdata);
105118
struct object_entry *packlist_alloc(struct packing_data *pdata,
106119
const unsigned char *sha1,
107120
uint32_t index_pos);
@@ -158,4 +171,27 @@ static inline void oe_set_in_pack_pos(const struct packing_data *pack,
158171
pack->in_pack_pos[e - pack->objects] = pos;
159172
}
160173

174+
static inline struct packed_git *oe_in_pack(const struct packing_data *pack,
175+
const struct object_entry *e)
176+
{
177+
if (pack->in_pack_by_idx)
178+
return pack->in_pack_by_idx[e->in_pack_idx];
179+
else
180+
return pack->in_pack[e - pack->objects];
181+
}
182+
183+
void oe_map_new_pack(struct packing_data *pack,
184+
struct packed_git *p);
185+
static inline void oe_set_in_pack(struct packing_data *pack,
186+
struct object_entry *e,
187+
struct packed_git *p)
188+
{
189+
if (!p->index)
190+
oe_map_new_pack(pack, p);
191+
if (pack->in_pack_by_idx)
192+
e->in_pack_idx = p->index;
193+
else
194+
pack->in_pack[e - pack->objects] = p;
195+
}
196+
161197
#endif

t/README

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,11 @@ environment set.
304304
GIT_TEST_SPLIT_INDEX=<boolean> forces split-index mode on the whole
305305
test suite. Accept any boolean values that are accepted by git-config.
306306

307+
GIT_TEST_FULL_IN_PACK_ARRAY=<boolean> exercises the uncommon
308+
pack-objects code path where there are more than 1024 packs even if
309+
the actual number of packs in repository is below this limit. Accept
310+
any boolean values that are accepted by git-config.
311+
307312
Naming Tests
308313
------------
309314

t/t5300-pack-object.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,11 @@ test_expect_success !PTHREADS,C_LOCALE_OUTPUT 'pack-objects --threads=N or pack.
457457
grep -F "no threads support, ignoring pack.threads" err
458458
'
459459

460+
test_expect_success 'pack-objects in too-many-packs mode' '
461+
GIT_TEST_FULL_IN_PACK_ARRAY=1 git repack -ad &&
462+
git fsck
463+
'
464+
460465
#
461466
# WARNING!
462467
#

0 commit comments

Comments
 (0)