Skip to content

Commit 39000e8

Browse files
committed
Merge branch 'jk/fetch-quick-tag-following' into maint
When fetching from a remote that has many tags that are irrelevant to branches we are following, we used to waste way too many cycles when checking if the object pointed at by a tag (that we are not going to fetch!) exists in our repository too carefully. * jk/fetch-quick-tag-following: fetch: use "quick" has_sha1_file for tag following
2 parents 96ec83c + 5827a03 commit 39000e8

File tree

4 files changed

+112
-4
lines changed

4 files changed

+112
-4
lines changed

builtin/fetch.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,10 @@ static void find_non_local_tags(struct transport *transport,
233233
* as one to ignore by setting util to NULL.
234234
*/
235235
if (ends_with(ref->name, "^{}")) {
236-
if (item && !has_object_file(&ref->old_oid) &&
236+
if (item &&
237+
!has_object_file_with_flags(&ref->old_oid, HAS_SHA1_QUICK) &&
237238
!will_fetch(head, ref->old_oid.hash) &&
238-
!has_sha1_file(item->util) &&
239+
!has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
239240
!will_fetch(head, item->util))
240241
item->util = NULL;
241242
item = NULL;
@@ -248,7 +249,8 @@ static void find_non_local_tags(struct transport *transport,
248249
* to check if it is a lightweight tag that we want to
249250
* fetch.
250251
*/
251-
if (item && !has_sha1_file(item->util) &&
252+
if (item &&
253+
!has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
252254
!will_fetch(head, item->util))
253255
item->util = NULL;
254256

@@ -268,7 +270,8 @@ static void find_non_local_tags(struct transport *transport,
268270
* We may have a final lightweight tag that needs to be
269271
* checked to see if it needs fetching.
270272
*/
271-
if (item && !has_sha1_file(item->util) &&
273+
if (item &&
274+
!has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
272275
!will_fetch(head, item->util))
273276
item->util = NULL;
274277

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,7 @@ static inline int has_sha1_file(const unsigned char *sha1)
11231123

11241124
/* Same as the above, except for struct object_id. */
11251125
extern int has_object_file(const struct object_id *oid);
1126+
extern int has_object_file_with_flags(const struct object_id *oid, int flags);
11261127

11271128
/*
11281129
* Return true iff an alternate object database has a loose object

sha1_file.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3231,6 +3231,11 @@ int has_object_file(const struct object_id *oid)
32313231
return has_sha1_file(oid->hash);
32323232
}
32333233

3234+
int has_object_file_with_flags(const struct object_id *oid, int flags)
3235+
{
3236+
return has_sha1_file_with_flags(oid->hash, flags);
3237+
}
3238+
32343239
static void check_tree(const void *buf, size_t size)
32353240
{
32363241
struct tree_desc desc;

t/perf/p5550-fetch-tags.sh

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#!/bin/sh
2+
3+
test_description='performance of tag-following with many tags
4+
5+
This tests a fairly pathological case, so rather than rely on a real-world
6+
case, we will construct our own repository. The situation is roughly as
7+
follows.
8+
9+
The parent repository has a large number of tags which are disconnected from
10+
the rest of history. That makes them candidates for tag-following, but we never
11+
actually grab them (and thus they will impact each subsequent fetch).
12+
13+
The child repository is a clone of parent, without the tags, and is at least
14+
one commit behind the parent (meaning that we will fetch one object and then
15+
examine the tags to see if they need followed). Furthermore, it has a large
16+
number of packs.
17+
18+
The exact values of "large" here are somewhat arbitrary; I picked values that
19+
start to show a noticeable performance problem on my machine, but without
20+
taking too long to set up and run the tests.
21+
'
22+
. ./perf-lib.sh
23+
24+
# make a long nonsense history on branch $1, consisting of $2 commits, each
25+
# with a unique file pointing to the blob at $2.
26+
create_history () {
27+
perl -le '
28+
my ($branch, $n, $blob) = @ARGV;
29+
for (1..$n) {
30+
print "commit refs/heads/$branch";
31+
print "committer nobody <[email protected]> now";
32+
print "data 4";
33+
print "foo";
34+
print "M 100644 $blob $_";
35+
}
36+
' "$@" |
37+
git fast-import --date-format=now
38+
}
39+
40+
# make a series of tags, one per commit in the revision range given by $@
41+
create_tags () {
42+
git rev-list "$@" |
43+
perl -lne 'print "create refs/tags/$. $_"' |
44+
git update-ref --stdin
45+
}
46+
47+
# create $1 nonsense packs, each with a single blob
48+
create_packs () {
49+
perl -le '
50+
my ($n) = @ARGV;
51+
for (1..$n) {
52+
print "blob";
53+
print "data <<EOF";
54+
print "$_";
55+
print "EOF";
56+
}
57+
' "$@" |
58+
git fast-import &&
59+
60+
git cat-file --batch-all-objects --batch-check='%(objectname)' |
61+
while read sha1
62+
do
63+
echo $sha1 | git pack-objects .git/objects/pack/pack
64+
done
65+
}
66+
67+
test_expect_success 'create parent and child' '
68+
git init parent &&
69+
git -C parent commit --allow-empty -m base &&
70+
git clone parent child &&
71+
git -C parent commit --allow-empty -m trigger-fetch
72+
'
73+
74+
test_expect_success 'populate parent tags' '
75+
(
76+
cd parent &&
77+
blob=$(echo content | git hash-object -w --stdin) &&
78+
create_history cruft 3000 $blob &&
79+
create_tags cruft &&
80+
git branch -D cruft
81+
)
82+
'
83+
84+
test_expect_success 'create child packs' '
85+
(
86+
cd child &&
87+
git config gc.auto 0 &&
88+
git config gc.autopacklimit 0 &&
89+
create_packs 500
90+
)
91+
'
92+
93+
test_perf 'fetch' '
94+
# make sure there is something to fetch on each iteration
95+
git -C child update-ref -d refs/remotes/origin/master &&
96+
git -C child fetch
97+
'
98+
99+
test_done

0 commit comments

Comments
 (0)