Skip to content

Commit c0c578b

Browse files
jonathantanmygitster
authored andcommitted
unpack-trees: batch fetching of missing blobs
When running checkout, first prefetch all blobs that are to be updated but are missing. This means that only one pack is downloaded during such operations, instead of one per missing blob. This operates only on the blob level - if a repository has a missing tree, they are still fetched one at a time. This does not use the delayed checkout mechanism introduced in commit 2841e8f ("convert: add "status=delayed" to filter process protocol", 2017-06-30) due to significant conceptual differences - in particular, for partial clones, we already know what needs to be fetched based on the contents of the local repo alone, whereas for status=delayed, it is the filter process that tells us what needs to be checked in the end. Signed-off-by: Jonathan Tan <[email protected]> Signed-off-by: Jeff Hostetler <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 548719f commit c0c578b

File tree

4 files changed

+101
-4
lines changed

4 files changed

+101
-4
lines changed

fetch-object.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,10 @@
55
#include "transport.h"
66
#include "fetch-object.h"
77

8-
void fetch_object(const char *remote_name, const unsigned char *sha1)
8+
static void fetch_refs(const char *remote_name, struct ref *ref)
99
{
1010
struct remote *remote;
1111
struct transport *transport;
12-
struct ref *ref;
1312
int original_fetch_if_missing = fetch_if_missing;
1413

1514
fetch_if_missing = 0;
@@ -18,10 +17,29 @@ void fetch_object(const char *remote_name, const unsigned char *sha1)
1817
die(_("Remote with no URL"));
1918
transport = transport_get(remote, remote->url[0]);
2019

21-
ref = alloc_ref(sha1_to_hex(sha1));
22-
hashcpy(ref->old_oid.hash, sha1);
2320
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
2421
transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1");
2522
transport_fetch_refs(transport, ref);
2623
fetch_if_missing = original_fetch_if_missing;
2724
}
25+
26+
void fetch_object(const char *remote_name, const unsigned char *sha1)
27+
{
28+
struct ref *ref = alloc_ref(sha1_to_hex(sha1));
29+
hashcpy(ref->old_oid.hash, sha1);
30+
fetch_refs(remote_name, ref);
31+
}
32+
33+
void fetch_objects(const char *remote_name, const struct oid_array *to_fetch)
34+
{
35+
struct ref *ref = NULL;
36+
int i;
37+
38+
for (i = 0; i < to_fetch->nr; i++) {
39+
struct ref *new_ref = alloc_ref(oid_to_hex(&to_fetch->oid[i]));
40+
oidcpy(&new_ref->old_oid, &to_fetch->oid[i]);
41+
new_ref->next = ref;
42+
ref = new_ref;
43+
}
44+
fetch_refs(remote_name, ref);
45+
}

fetch-object.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
#ifndef FETCH_OBJECT_H
22
#define FETCH_OBJECT_H
33

4+
#include "sha1-array.h"
5+
46
extern void fetch_object(const char *remote_name, const unsigned char *sha1);
57

8+
extern void fetch_objects(const char *remote_name,
9+
const struct oid_array *to_fetch);
10+
611
#endif

t/t5601-clone.sh

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,58 @@ test_expect_success 'partial clone: warn if server does not support object filte
611611
test_i18ngrep "filtering not recognized by server" err
612612
'
613613

614+
test_expect_success 'batch missing blob request during checkout' '
615+
rm -rf server client &&
616+
617+
test_create_repo server &&
618+
echo a >server/a &&
619+
echo b >server/b &&
620+
git -C server add a b &&
621+
622+
git -C server commit -m x &&
623+
echo aa >server/a &&
624+
echo bb >server/b &&
625+
git -C server add a b &&
626+
git -C server commit -m x &&
627+
628+
test_config -C server uploadpack.allowfilter 1 &&
629+
test_config -C server uploadpack.allowanysha1inwant 1 &&
630+
631+
git clone --filter=blob:limit=0 "file://$(pwd)/server" client &&
632+
633+
# Ensure that there is only one negotiation by checking that there is
634+
# only "done" line sent. ("done" marks the end of negotiation.)
635+
GIT_TRACE_PACKET="$(pwd)/trace" git -C client checkout HEAD^ &&
636+
grep "git> done" trace >done_lines &&
637+
test_line_count = 1 done_lines
638+
'
639+
640+
test_expect_success 'batch missing blob request does not inadvertently try to fetch gitlinks' '
641+
rm -rf server client &&
642+
643+
test_create_repo repo_for_submodule &&
644+
test_commit -C repo_for_submodule x &&
645+
646+
test_create_repo server &&
647+
echo a >server/a &&
648+
echo b >server/b &&
649+
git -C server add a b &&
650+
git -C server commit -m x &&
651+
652+
echo aa >server/a &&
653+
echo bb >server/b &&
654+
# Also add a gitlink pointing to an arbitrary repository
655+
git -C server submodule add "$(pwd)/repo_for_submodule" c &&
656+
git -C server add a b c &&
657+
git -C server commit -m x &&
658+
659+
test_config -C server uploadpack.allowfilter 1 &&
660+
test_config -C server uploadpack.allowanysha1inwant 1 &&
661+
662+
# Make sure that it succeeds
663+
git clone --filter=blob:limit=0 "file://$(pwd)/server" client
664+
'
665+
614666
. "$TEST_DIRECTORY"/lib-httpd.sh
615667
start_httpd
616668

unpack-trees.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "dir.h"
1515
#include "submodule.h"
1616
#include "submodule-config.h"
17+
#include "fetch-object.h"
1718

1819
/*
1920
* Error messages expected by scripts out of plumbing commands such as
@@ -369,6 +370,27 @@ static int check_updates(struct unpack_trees_options *o)
369370
load_gitmodules_file(index, &state);
370371

371372
enable_delayed_checkout(&state);
373+
if (repository_format_partial_clone && o->update && !o->dry_run) {
374+
/*
375+
* Prefetch the objects that are to be checked out in the loop
376+
* below.
377+
*/
378+
struct oid_array to_fetch = OID_ARRAY_INIT;
379+
int fetch_if_missing_store = fetch_if_missing;
380+
fetch_if_missing = 0;
381+
for (i = 0; i < index->cache_nr; i++) {
382+
struct cache_entry *ce = index->cache[i];
383+
if ((ce->ce_flags & CE_UPDATE) &&
384+
!S_ISGITLINK(ce->ce_mode)) {
385+
if (!has_object_file(&ce->oid))
386+
oid_array_append(&to_fetch, &ce->oid);
387+
}
388+
}
389+
if (to_fetch.nr)
390+
fetch_objects(repository_format_partial_clone,
391+
&to_fetch);
392+
fetch_if_missing = fetch_if_missing_store;
393+
}
372394
for (i = 0; i < index->cache_nr; i++) {
373395
struct cache_entry *ce = index->cache[i];
374396

0 commit comments

Comments
 (0)