Skip to content

Commit 41a078c

Browse files
peffgitster
authored andcommitted
fetch-pack: cache results of for_each_alternate_ref
We may run for_each_alternate_ref() twice, once in find_common() and once in everything_local(). This operation can be expensive, because it involves running a sub-process which must freshly load all of the alternate's refs from disk. Let's cache and reuse the results between the two calls. We can make some optimizations based on the particular use pattern in fetch-pack to keep our memory usage down. The first is that we only care about the sha1s, not the refs themselves. So it's OK to store only the sha1s, and to suppress duplicates. The natural fit would therefore be a sha1_array. However, sha1_array's de-duplication happens only after it has read and sorted all entries. It still stores each duplicate. For an alternate with a large number of refs pointing to the same commits, this is a needless expense. Instead, we'd prefer to eliminate duplicates before putting them in the cache, which implies using a hash. We can further note that fetch-pack will call parse_object() on each alternate sha1. We can therefore keep our cache as a set of pointers to "struct object". That gives us a place to put our "already seen" bit with an optimized hash lookup. And as a bonus, the object stores the sha1 for us, so pointer-to-object is all we need. There are two extra optimizations I didn't do here: - we actually store an array of pointer-to-object. Technically we could just walk the obj_hash table looking for entries with the ALTERNATE flag set (because our use case doesn't care about the order here). But that hash table may be mostly composed of non-ALTERNATE entries, so we'd waste time walking over them. So it would be a slight win in memory use, but a loss in CPU. - the items we pull out of the cache are actual "struct object"s, but then we feed "obj->sha1" to our sub-functions, which promptly call parse_object(). This second parse is cheap, because it starts with lookup_object() and will bail immediately when it sees we've already parsed the object. We could save the extra hash lookup, but it would involve refactoring the functions we call. It may or may not be worth the trouble. Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent a10a178 commit 41a078c

File tree

2 files changed

+43
-11
lines changed

2 files changed

+43
-11
lines changed

fetch-pack.c

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ static const char *alternate_shallow_file;
3535
#define COMMON_REF (1U << 2)
3636
#define SEEN (1U << 3)
3737
#define POPPED (1U << 4)
38+
#define ALTERNATE (1U << 5)
3839

3940
static int marked;
4041

@@ -67,6 +68,41 @@ static inline void print_verbose(const struct fetch_pack_args *args,
6768
fputc('\n', stderr);
6869
}
6970

71+
struct alternate_object_cache {
72+
struct object **items;
73+
size_t nr, alloc;
74+
};
75+
76+
static void cache_one_alternate(const char *refname,
77+
const struct object_id *oid,
78+
void *vcache)
79+
{
80+
struct alternate_object_cache *cache = vcache;
81+
struct object *obj = parse_object(oid->hash);
82+
83+
if (!obj || (obj->flags & ALTERNATE))
84+
return;
85+
86+
obj->flags |= ALTERNATE;
87+
ALLOC_GROW(cache->items, cache->nr + 1, cache->alloc);
88+
cache->items[cache->nr++] = obj;
89+
}
90+
91+
static void for_each_cached_alternate(void (*cb)(struct object *))
92+
{
93+
static int initialized;
94+
static struct alternate_object_cache cache;
95+
size_t i;
96+
97+
if (!initialized) {
98+
for_each_alternate_ref(cache_one_alternate, &cache);
99+
initialized = 1;
100+
}
101+
102+
for (i = 0; i < cache.nr; i++)
103+
cb(cache.items[i]);
104+
}
105+
70106
static void rev_list_push(struct commit *commit, int mark)
71107
{
72108
if (!(commit->object.flags & mark)) {
@@ -253,11 +289,9 @@ static void send_request(struct fetch_pack_args *args,
253289
write_or_die(fd, buf->buf, buf->len);
254290
}
255291

256-
static void insert_one_alternate_ref(const char *refname,
257-
const struct object_id *oid,
258-
void *unused)
292+
static void insert_one_alternate_object(struct object *obj)
259293
{
260-
rev_list_insert_ref(NULL, oid->hash);
294+
rev_list_insert_ref(NULL, obj->oid.hash);
261295
}
262296

263297
#define INITIAL_FLUSH 16
@@ -300,7 +334,7 @@ static int find_common(struct fetch_pack_args *args,
300334
marked = 1;
301335

302336
for_each_ref(rev_list_insert_ref_oid, NULL);
303-
for_each_alternate_ref(insert_one_alternate_ref, NULL);
337+
for_each_cached_alternate(insert_one_alternate_object);
304338

305339
fetching = 0;
306340
for ( ; refs ; refs = refs->next) {
@@ -621,11 +655,9 @@ static void filter_refs(struct fetch_pack_args *args,
621655
*refs = newlist;
622656
}
623657

624-
static void mark_alternate_complete(const char *refname,
625-
const struct object_id *oid,
626-
void *unused)
658+
static void mark_alternate_complete(struct object *obj)
627659
{
628-
mark_complete(oid->hash);
660+
mark_complete(obj->oid.hash);
629661
}
630662

631663
static int everything_local(struct fetch_pack_args *args,
@@ -661,7 +693,7 @@ static int everything_local(struct fetch_pack_args *args,
661693

662694
if (!args->deepen) {
663695
for_each_ref(mark_complete_oid, NULL);
664-
for_each_alternate_ref(mark_alternate_complete, NULL);
696+
for_each_cached_alternate(mark_alternate_complete);
665697
commit_list_sort_by_date(&complete);
666698
if (cutoff)
667699
mark_recent_complete_commits(args, cutoff);

object.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ struct object_array {
2929
/*
3030
* object flag allocation:
3131
* revision.h: 0---------10 26
32-
* fetch-pack.c: 0---4
32+
* fetch-pack.c: 0---5
3333
* walker.c: 0-2
3434
* upload-pack.c: 4 11----------------19
3535
* builtin/blame.c: 12-13

0 commit comments

Comments
 (0)