Skip to content

Commit b9c2919

Browse files
committed
Merge branch 'jk/alternate-ref-optim'
Optimizes resource usage while enumerating refs from alternate object store, to help receiving end of "push" that hosts a repository with many "forks". * jk/alternate-ref-optim: receive-pack: avoid duplicates between our refs and alternates receive-pack: treat namespace .have lines like alternates receive-pack: fix misleading namespace/.have comment receive-pack: use oidset to de-duplicate .have lines add oidset API fetch-pack: cache results of for_each_alternate_ref for_each_alternate_ref: replace transport code with for-each-ref for_each_alternate_ref: pass name/oid instead of ref struct for_each_alternate_ref: use strbuf for path allocation for_each_alternate_ref: stop trimming trailing slashes for_each_alternate_ref: handle failure from real_pathdup()
2 parents 93e8cd8 + 63d428e commit b9c2919

File tree

9 files changed

+249
-49
lines changed

9 files changed

+249
-49
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,7 @@ LIB_OBJS += notes-cache.o
781781
LIB_OBJS += notes-merge.o
782782
LIB_OBJS += notes-utils.o
783783
LIB_OBJS += object.o
784+
LIB_OBJS += oidset.o
784785
LIB_OBJS += pack-bitmap.o
785786
LIB_OBJS += pack-bitmap-write.o
786787
LIB_OBJS += pack-check.o

builtin/receive-pack.c

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "sigchain.h"
2222
#include "fsck.h"
2323
#include "tmp-objdir.h"
24+
#include "oidset.h"
2425

2526
static const char * const receive_pack_usage[] = {
2627
N_("git receive-pack <git-dir>"),
@@ -250,8 +251,9 @@ static void show_ref(const char *path, const unsigned char *sha1)
250251
}
251252

252253
static int show_ref_cb(const char *path_full, const struct object_id *oid,
253-
int flag, void *unused)
254+
int flag, void *data)
254255
{
256+
struct oidset *seen = data;
255257
const char *path = strip_namespace(path_full);
256258

257259
if (ref_is_hidden(path, path_full))
@@ -260,37 +262,38 @@ static int show_ref_cb(const char *path_full, const struct object_id *oid,
260262
/*
261263
* Advertise refs outside our current namespace as ".have"
262264
* refs, so that the client can use them to minimize data
263-
* transfer but will otherwise ignore them. This happens to
264-
* cover ".have" that are thrown in by add_one_alternate_ref()
265-
* to mark histories that are complete in our alternates as
266-
* well.
265+
* transfer but will otherwise ignore them.
267266
*/
268-
if (!path)
267+
if (!path) {
268+
if (oidset_insert(seen, oid))
269+
return 0;
269270
path = ".have";
271+
} else {
272+
oidset_insert(seen, oid);
273+
}
270274
show_ref(path, oid->hash);
271275
return 0;
272276
}
273277

274-
static int show_one_alternate_sha1(const unsigned char sha1[20], void *unused)
278+
static void show_one_alternate_ref(const char *refname,
279+
const struct object_id *oid,
280+
void *data)
275281
{
276-
show_ref(".have", sha1);
277-
return 0;
278-
}
282+
struct oidset *seen = data;
279283

280-
static void collect_one_alternate_ref(const struct ref *ref, void *data)
281-
{
282-
struct sha1_array *sa = data;
283-
sha1_array_append(sa, ref->old_oid.hash);
284+
if (oidset_insert(seen, oid))
285+
return;
286+
287+
show_ref(".have", oid->hash);
284288
}
285289

286290
static void write_head_info(void)
287291
{
288-
struct sha1_array sa = SHA1_ARRAY_INIT;
292+
static struct oidset seen = OIDSET_INIT;
289293

290-
for_each_alternate_ref(collect_one_alternate_ref, &sa);
291-
sha1_array_for_each_unique(&sa, show_one_alternate_sha1, NULL);
292-
sha1_array_clear(&sa);
293-
for_each_ref(show_ref_cb, NULL);
294+
for_each_ref(show_ref_cb, &seen);
295+
for_each_alternate_ref(show_one_alternate_ref, &seen);
296+
oidset_clear(&seen);
294297
if (!sent_capabilities)
295298
show_ref("capabilities^{}", null_sha1);
296299

fetch-pack.c

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ static const char *alternate_shallow_file;
3535
#define COMMON_REF (1U << 2)
3636
#define SEEN (1U << 3)
3737
#define POPPED (1U << 4)
38+
#define ALTERNATE (1U << 5)
3839

3940
static int marked;
4041

@@ -67,6 +68,41 @@ static inline void print_verbose(const struct fetch_pack_args *args,
6768
fputc('\n', stderr);
6869
}
6970

71+
struct alternate_object_cache {
72+
struct object **items;
73+
size_t nr, alloc;
74+
};
75+
76+
static void cache_one_alternate(const char *refname,
77+
const struct object_id *oid,
78+
void *vcache)
79+
{
80+
struct alternate_object_cache *cache = vcache;
81+
struct object *obj = parse_object(oid->hash);
82+
83+
if (!obj || (obj->flags & ALTERNATE))
84+
return;
85+
86+
obj->flags |= ALTERNATE;
87+
ALLOC_GROW(cache->items, cache->nr + 1, cache->alloc);
88+
cache->items[cache->nr++] = obj;
89+
}
90+
91+
static void for_each_cached_alternate(void (*cb)(struct object *))
92+
{
93+
static int initialized;
94+
static struct alternate_object_cache cache;
95+
size_t i;
96+
97+
if (!initialized) {
98+
for_each_alternate_ref(cache_one_alternate, &cache);
99+
initialized = 1;
100+
}
101+
102+
for (i = 0; i < cache.nr; i++)
103+
cb(cache.items[i]);
104+
}
105+
70106
static void rev_list_push(struct commit *commit, int mark)
71107
{
72108
if (!(commit->object.flags & mark)) {
@@ -253,9 +289,9 @@ static void send_request(struct fetch_pack_args *args,
253289
write_or_die(fd, buf->buf, buf->len);
254290
}
255291

256-
static void insert_one_alternate_ref(const struct ref *ref, void *unused)
292+
static void insert_one_alternate_object(struct object *obj)
257293
{
258-
rev_list_insert_ref(NULL, ref->old_oid.hash);
294+
rev_list_insert_ref(NULL, obj->oid.hash);
259295
}
260296

261297
#define INITIAL_FLUSH 16
@@ -298,7 +334,7 @@ static int find_common(struct fetch_pack_args *args,
298334
marked = 1;
299335

300336
for_each_ref(rev_list_insert_ref_oid, NULL);
301-
for_each_alternate_ref(insert_one_alternate_ref, NULL);
337+
for_each_cached_alternate(insert_one_alternate_object);
302338

303339
fetching = 0;
304340
for ( ; refs ; refs = refs->next) {
@@ -619,9 +655,9 @@ static void filter_refs(struct fetch_pack_args *args,
619655
*refs = newlist;
620656
}
621657

622-
static void mark_alternate_complete(const struct ref *ref, void *unused)
658+
static void mark_alternate_complete(struct object *obj)
623659
{
624-
mark_complete(ref->old_oid.hash);
660+
mark_complete(obj->oid.hash);
625661
}
626662

627663
static int everything_local(struct fetch_pack_args *args,
@@ -657,7 +693,7 @@ static int everything_local(struct fetch_pack_args *args,
657693

658694
if (!args->deepen) {
659695
for_each_ref(mark_complete_oid, NULL);
660-
for_each_alternate_ref(mark_alternate_complete, NULL);
696+
for_each_cached_alternate(mark_alternate_complete);
661697
commit_list_sort_by_date(&complete);
662698
if (cutoff)
663699
mark_recent_complete_commits(args, cutoff);

object.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ struct object_array {
2929
/*
3030
* object flag allocation:
3131
* revision.h: 0---------10 26
32-
* fetch-pack.c: 0---4
32+
* fetch-pack.c: 0---5
3333
* walker.c: 0-2
3434
* upload-pack.c: 4 11----------------19
3535
* builtin/blame.c: 12-13

oidset.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#include "cache.h"
2+
#include "oidset.h"
3+
4+
struct oidset_entry {
5+
struct hashmap_entry hash;
6+
struct object_id oid;
7+
};
8+
9+
static int oidset_hashcmp(const void *va, const void *vb,
10+
const void *vkey)
11+
{
12+
const struct oidset_entry *a = va, *b = vb;
13+
const struct object_id *key = vkey;
14+
return oidcmp(&a->oid, key ? key : &b->oid);
15+
}
16+
17+
int oidset_contains(const struct oidset *set, const struct object_id *oid)
18+
{
19+
struct hashmap_entry key;
20+
21+
if (!set->map.cmpfn)
22+
return 0;
23+
24+
hashmap_entry_init(&key, sha1hash(oid->hash));
25+
return !!hashmap_get(&set->map, &key, oid);
26+
}
27+
28+
int oidset_insert(struct oidset *set, const struct object_id *oid)
29+
{
30+
struct oidset_entry *entry;
31+
32+
if (!set->map.cmpfn)
33+
hashmap_init(&set->map, oidset_hashcmp, 0);
34+
35+
if (oidset_contains(set, oid))
36+
return 1;
37+
38+
entry = xmalloc(sizeof(*entry));
39+
hashmap_entry_init(&entry->hash, sha1hash(oid->hash));
40+
oidcpy(&entry->oid, oid);
41+
42+
hashmap_add(&set->map, entry);
43+
return 0;
44+
}
45+
46+
void oidset_clear(struct oidset *set)
47+
{
48+
hashmap_free(&set->map, 1);
49+
}

oidset.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#ifndef OIDSET_H
2+
#define OIDSET_H
3+
4+
/**
5+
* This API is similar to sha1-array, in that it maintains a set of object ids
6+
* in a memory-efficient way. The major differences are:
7+
*
8+
* 1. It uses a hash, so we can do online duplicate removal, rather than
9+
* sort-and-uniq at the end. This can reduce memory footprint if you have
10+
* a large list of oids with many duplicates.
11+
*
12+
* 2. The per-unique-oid memory footprint is slightly higher due to hash
13+
* table overhead.
14+
*/
15+
16+
/**
17+
* A single oidset; should be zero-initialized (or use OIDSET_INIT).
18+
*/
19+
struct oidset {
20+
struct hashmap map;
21+
};
22+
23+
#define OIDSET_INIT { { NULL } }
24+
25+
/**
26+
* Returns true iff `set` contains `oid`.
27+
*/
28+
int oidset_contains(const struct oidset *set, const struct object_id *oid);
29+
30+
/**
31+
* Insert the oid into the set; a copy is made, so "oid" does not need
32+
* to persist after this function is called.
33+
*
34+
* Returns 1 if the oid was already in the set, 0 otherwise. This can be used
35+
* to perform an efficient check-and-add.
36+
*/
37+
int oidset_insert(struct oidset *set, const struct object_id *oid);
38+
39+
/**
40+
* Remove all entries from the oidset, freeing any resources associated with
41+
* it.
42+
*/
43+
void oidset_clear(struct oidset *set);
44+
45+
#endif /* OIDSET_H */

t/t5400-send-pack.sh

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,4 +255,42 @@ test_expect_success 'deny pushing to delete current branch' '
255255
)
256256
'
257257

258+
extract_ref_advertisement () {
259+
perl -lne '
260+
# \\ is there to skip capabilities after \0
261+
/push< ([^\\]+)/ or next;
262+
exit 0 if $1 eq "0000";
263+
print $1;
264+
'
265+
}
266+
267+
test_expect_success 'receive-pack de-dupes .have lines' '
268+
git init shared &&
269+
git -C shared commit --allow-empty -m both &&
270+
git clone -s shared fork &&
271+
(
272+
cd shared &&
273+
git checkout -b only-shared &&
274+
git commit --allow-empty -m only-shared &&
275+
git update-ref refs/heads/foo HEAD
276+
) &&
277+
278+
# Notable things in this expectation:
279+
# - local refs are not de-duped
280+
# - .have does not duplicate locals
281+
# - .have does not duplicate itself
282+
local=$(git -C fork rev-parse HEAD) &&
283+
shared=$(git -C shared rev-parse only-shared) &&
284+
cat >expect <<-EOF &&
285+
$local refs/heads/master
286+
$local refs/remotes/origin/HEAD
287+
$local refs/remotes/origin/master
288+
$shared .have
289+
EOF
290+
291+
GIT_TRACE_PACKET=$(pwd)/trace git push fork HEAD:foo &&
292+
extract_ref_advertisement <trace >refs &&
293+
test_cmp expect refs
294+
'
295+
258296
test_done

0 commit comments

Comments
 (0)