Skip to content

Commit f1bd15a

Browse files
drafnelgitster
authored andcommitted
remote.c: avoid O(m*n) behavior in match_push_refs
When pushing using a matching refspec or a pattern refspec, each ref in the local repository must be paired with a ref advertised by the remote server. This is accomplished by using the refspec to transform the name of the local ref into the name it should have in the remote repository, and then performing a linear search through the list of remote refs to see if the remote ref was advertised by the remote system. Each of these lookups has O(n) complexity and makes match_push_refs() be an O(m*n) operation, where m is the number of local refs and n is the number of remote refs. If there are many refs 100,000+, then this ref matching can take a significant amount of time. Let's prepare an index of the remote refs to allow searching in O(log n) time and reduce the complexity of match_push_refs() to O(m log n). We prepare the index lazily so that it is only created when necessary. So, there should be no impact when _not_ using a matching or pattern refspec, i.e. when pushing using only explicit refspecs. Dry-run push of a repository with 121,913 local and remote refs: before after real 1m40.582s 0m0.804s user 1m39.914s 0m0.515s sys 0m0.125s 0m0.106s The creation of the index has overhead. So, if there are very few local refs, then it could take longer to create the index than it would have taken to just perform n linear lookups into the remote ref space. Using the index should provide some improvement when the number of local refs is roughly greater than the log of the number of remote refs (i.e. m >= log n). The pathological case is when there is a single local ref and very many remote refs. Dry-run push of a repository with 121,913 remote refs and a single local ref: before after real 0m0.525s 0m0.566s user 0m0.243s 0m0.279s sys 0m0.075s 0m0.099s Using an index takes 41 ms longer, or roughly 7.8% longer. Jeff King measured a no-op push of a single ref into a remote repo with 370,000 refs: before after real 0m1.087s 0m1.156s user 0m1.344s 0m1.412s sys 0m0.288s 0m0.284s Using an index takes 69 ms longer, or roughly 6.3% longer. None of the measurements above required transferring any objects to the remote repository. If the push required transferring objects and updating the refs in the remote repository, the impact of preparing the search index would be even smaller. A similar operation is performed in the reverse direction when pruning using a matching or pattern refspec. Let's avoid O(m*n) behavior in the same way by lazily preparing an index on the local refs. Signed-off-by: Brandon Casey <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent edca415 commit f1bd15a

File tree

1 file changed

+25
-2
lines changed

1 file changed

+25
-2
lines changed

remote.c

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,6 +1317,14 @@ static void add_missing_tags(struct ref *src, struct ref **dst, struct ref ***ds
13171317
free(sent_tips.tip);
13181318
}
13191319

1320+
static void prepare_ref_index(struct string_list *ref_index, struct ref *ref)
1321+
{
1322+
for ( ; ref; ref = ref->next)
1323+
string_list_append_nodup(ref_index, ref->name)->util = ref;
1324+
1325+
sort_string_list(ref_index);
1326+
}
1327+
13201328
/*
13211329
* Given the set of refs the local repository has, the set of refs the
13221330
* remote repository has, and the refspec used for push, determine
@@ -1335,6 +1343,7 @@ int match_push_refs(struct ref *src, struct ref **dst,
13351343
int errs;
13361344
static const char *default_refspec[] = { ":", NULL };
13371345
struct ref *ref, **dst_tail = tail_ref(dst);
1346+
struct string_list dst_ref_index = STRING_LIST_INIT_NODUP;
13381347

13391348
if (!nr_refspec) {
13401349
nr_refspec = 1;
@@ -1345,6 +1354,7 @@ int match_push_refs(struct ref *src, struct ref **dst,
13451354

13461355
/* pick the remainder */
13471356
for (ref = src; ref; ref = ref->next) {
1357+
struct string_list_item *dst_item;
13481358
struct ref *dst_peer;
13491359
const struct refspec *pat = NULL;
13501360
char *dst_name;
@@ -1353,7 +1363,11 @@ int match_push_refs(struct ref *src, struct ref **dst,
13531363
if (!dst_name)
13541364
continue;
13551365

1356-
dst_peer = find_ref_by_name(*dst, dst_name);
1366+
if (!dst_ref_index.nr)
1367+
prepare_ref_index(&dst_ref_index, *dst);
1368+
1369+
dst_item = string_list_lookup(&dst_ref_index, dst_name);
1370+
dst_peer = dst_item ? dst_item->util : NULL;
13571371
if (dst_peer) {
13581372
if (dst_peer->peer_ref)
13591373
/* We're already sending something to this ref. */
@@ -1370,17 +1384,22 @@ int match_push_refs(struct ref *src, struct ref **dst,
13701384
/* Create a new one and link it */
13711385
dst_peer = make_linked_ref(dst_name, &dst_tail);
13721386
hashcpy(dst_peer->new_sha1, ref->new_sha1);
1387+
string_list_insert(&dst_ref_index,
1388+
dst_peer->name)->util = dst_peer;
13731389
}
13741390
dst_peer->peer_ref = copy_ref(ref);
13751391
dst_peer->force = pat->force;
13761392
free_name:
13771393
free(dst_name);
13781394
}
13791395

1396+
string_list_clear(&dst_ref_index, 0);
1397+
13801398
if (flags & MATCH_REFS_FOLLOW_TAGS)
13811399
add_missing_tags(src, dst, &dst_tail);
13821400

13831401
if (send_prune) {
1402+
struct string_list src_ref_index = STRING_LIST_INIT_NODUP;
13841403
/* check for missing refs on the remote */
13851404
for (ref = *dst; ref; ref = ref->next) {
13861405
char *src_name;
@@ -1391,11 +1410,15 @@ int match_push_refs(struct ref *src, struct ref **dst,
13911410

13921411
src_name = get_ref_match(rs, nr_refspec, ref, send_mirror, FROM_DST, NULL);
13931412
if (src_name) {
1394-
if (!find_ref_by_name(src, src_name))
1413+
if (!src_ref_index.nr)
1414+
prepare_ref_index(&src_ref_index, src);
1415+
if (!string_list_has_string(&src_ref_index,
1416+
src_name))
13951417
ref->peer_ref = alloc_delete_ref();
13961418
free(src_name);
13971419
}
13981420
}
1421+
string_list_clear(&src_ref_index, 0);
13991422
}
14001423
if (errs)
14011424
return -1;

0 commit comments

Comments
 (0)