Skip to content

Commit 32dc15d

Browse files
committed
Merge branch 'jt/batch-fetch-blobs-in-diff'
While running "git diff" in a lazy clone, we can upfront know which missing blobs we will need, instead of waiting for the on-demand machinery to discover them one by one. Aim to achieve better performance by batching the request for these promised blobs. * jt/batch-fetch-blobs-in-diff: diff: batch fetching of missing blobs sha1-file: support OBJECT_INFO_FOR_PREFETCH
2 parents ac70c53 + 7fbbcb2 commit 32dc15d

File tree

5 files changed

+154
-9
lines changed

5 files changed

+154
-9
lines changed

diff.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "packfile.h"
2626
#include "parse-options.h"
2727
#include "help.h"
28+
#include "fetch-object.h"
2829

2930
#ifdef NO_FAST_WORKING_DIRECTORY
3031
#define FAST_WORKING_DIRECTORY 0
@@ -6477,8 +6478,41 @@ void diffcore_fix_diff_index(void)
64776478
QSORT(q->queue, q->nr, diffnamecmp);
64786479
}
64796480

6481+
static void add_if_missing(struct repository *r,
6482+
struct oid_array *to_fetch,
6483+
const struct diff_filespec *filespec)
6484+
{
6485+
if (filespec && filespec->oid_valid &&
6486+
oid_object_info_extended(r, &filespec->oid, NULL,
6487+
OBJECT_INFO_FOR_PREFETCH))
6488+
oid_array_append(to_fetch, &filespec->oid);
6489+
}
6490+
64806491
void diffcore_std(struct diff_options *options)
64816492
{
6493+
if (options->repo == the_repository &&
6494+
repository_format_partial_clone) {
6495+
/*
6496+
* Prefetch the diff pairs that are about to be flushed.
6497+
*/
6498+
int i;
6499+
struct diff_queue_struct *q = &diff_queued_diff;
6500+
struct oid_array to_fetch = OID_ARRAY_INIT;
6501+
6502+
for (i = 0; i < q->nr; i++) {
6503+
struct diff_filepair *p = q->queue[i];
6504+
add_if_missing(options->repo, &to_fetch, p->one);
6505+
add_if_missing(options->repo, &to_fetch, p->two);
6506+
}
6507+
if (to_fetch.nr)
6508+
/*
6509+
* NEEDSWORK: Consider deduplicating the OIDs sent.
6510+
*/
6511+
fetch_objects(repository_format_partial_clone,
6512+
to_fetch.oid, to_fetch.nr);
6513+
oid_array_clear(&to_fetch);
6514+
}
6515+
64826516
/* NOTE please keep the following in sync with diff_tree_combined() */
64836517
if (options->skip_stat_unmatch)
64846518
diffcore_skip_stat_unmatch(options);

object-store.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,12 @@ struct object_info {
280280
#define OBJECT_INFO_QUICK 8
281281
/* Do not check loose object */
282282
#define OBJECT_INFO_IGNORE_LOOSE 16
283+
/*
284+
* Do not attempt to fetch the object if missing (even if fetch_is_missing is
285+
* nonzero). This is meant for bulk prefetching of missing blobs in a partial
286+
* clone. Implies OBJECT_INFO_QUICK.
287+
*/
288+
#define OBJECT_INFO_FOR_PREFETCH (32 + OBJECT_INFO_QUICK)
283289

284290
int oid_object_info_extended(struct repository *r,
285291
const struct object_id *,

sha1-file.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1378,7 +1378,8 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid,
13781378

13791379
/* Check if it is a missing object */
13801380
if (fetch_if_missing && repository_format_partial_clone &&
1381-
!already_retried && r == the_repository) {
1381+
!already_retried && r == the_repository &&
1382+
!(flags & OBJECT_INFO_FOR_PREFETCH)) {
13821383
/*
13831384
* TODO Investigate having fetch_object() return
13841385
* TODO error/success and stopping the music here.

t/t4067-diff-partial-clone.sh

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/bin/sh
2+
3+
test_description='behavior of diff when reading objects in a partial clone'
4+
5+
. ./test-lib.sh
6+
7+
test_expect_success 'git show batches blobs' '
8+
test_when_finished "rm -rf server client trace" &&
9+
10+
test_create_repo server &&
11+
echo a >server/a &&
12+
echo b >server/b &&
13+
git -C server add a b &&
14+
git -C server commit -m x &&
15+
16+
test_config -C server uploadpack.allowfilter 1 &&
17+
test_config -C server uploadpack.allowanysha1inwant 1 &&
18+
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
19+
20+
# Ensure that there is exactly 1 negotiation by checking that there is
21+
# only 1 "done" line sent. ("done" marks the end of negotiation.)
22+
GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
23+
grep "git> done" trace >done_lines &&
24+
test_line_count = 1 done_lines
25+
'
26+
27+
test_expect_success 'diff batches blobs' '
28+
test_when_finished "rm -rf server client trace" &&
29+
30+
test_create_repo server &&
31+
echo a >server/a &&
32+
echo b >server/b &&
33+
git -C server add a b &&
34+
git -C server commit -m x &&
35+
echo c >server/c &&
36+
echo d >server/d &&
37+
git -C server add c d &&
38+
git -C server commit -m x &&
39+
40+
test_config -C server uploadpack.allowfilter 1 &&
41+
test_config -C server uploadpack.allowanysha1inwant 1 &&
42+
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
43+
44+
# Ensure that there is exactly 1 negotiation by checking that there is
45+
# only 1 "done" line sent. ("done" marks the end of negotiation.)
46+
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
47+
grep "git> done" trace >done_lines &&
48+
test_line_count = 1 done_lines
49+
'
50+
51+
test_expect_success 'diff skips same-OID blobs' '
52+
test_when_finished "rm -rf server client trace" &&
53+
54+
test_create_repo server &&
55+
echo a >server/a &&
56+
echo b >server/b &&
57+
git -C server add a b &&
58+
git -C server commit -m x &&
59+
echo another-a >server/a &&
60+
git -C server add a &&
61+
git -C server commit -m x &&
62+
63+
test_config -C server uploadpack.allowfilter 1 &&
64+
test_config -C server uploadpack.allowanysha1inwant 1 &&
65+
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
66+
67+
echo a | git hash-object --stdin >hash-old-a &&
68+
echo another-a | git hash-object --stdin >hash-new-a &&
69+
echo b | git hash-object --stdin >hash-b &&
70+
71+
# Ensure that only a and another-a are fetched.
72+
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
73+
grep "want $(cat hash-old-a)" trace &&
74+
grep "want $(cat hash-new-a)" trace &&
75+
! grep "want $(cat hash-b)" trace
76+
'
77+
78+
test_expect_success 'diff with rename detection batches blobs' '
79+
test_when_finished "rm -rf server client trace" &&
80+
81+
test_create_repo server &&
82+
echo a >server/a &&
83+
printf "b\nb\nb\nb\nb\n" >server/b &&
84+
git -C server add a b &&
85+
git -C server commit -m x &&
86+
rm server/b &&
87+
printf "b\nb\nb\nb\nbX\n" >server/c &&
88+
git -C server add c &&
89+
git -C server commit -a -m x &&
90+
91+
test_config -C server uploadpack.allowfilter 1 &&
92+
test_config -C server uploadpack.allowanysha1inwant 1 &&
93+
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
94+
95+
# Ensure that there is exactly 1 negotiation by checking that there is
96+
# only 1 "done" line sent. ("done" marks the end of negotiation.)
97+
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff -M HEAD^ HEAD >out &&
98+
grep "similarity index" out &&
99+
grep "git> done" trace >done_lines &&
100+
test_line_count = 1 done_lines
101+
'
102+
103+
test_done

unpack-trees.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -406,20 +406,21 @@ static int check_updates(struct unpack_trees_options *o)
406406
* below.
407407
*/
408408
struct oid_array to_fetch = OID_ARRAY_INIT;
409-
int fetch_if_missing_store = fetch_if_missing;
410-
fetch_if_missing = 0;
411409
for (i = 0; i < index->cache_nr; i++) {
412410
struct cache_entry *ce = index->cache[i];
413-
if ((ce->ce_flags & CE_UPDATE) &&
414-
!S_ISGITLINK(ce->ce_mode)) {
415-
if (!has_object_file(&ce->oid))
416-
oid_array_append(&to_fetch, &ce->oid);
417-
}
411+
412+
if (!(ce->ce_flags & CE_UPDATE) ||
413+
S_ISGITLINK(ce->ce_mode))
414+
continue;
415+
if (!oid_object_info_extended(the_repository, &ce->oid,
416+
NULL,
417+
OBJECT_INFO_FOR_PREFETCH))
418+
continue;
419+
oid_array_append(&to_fetch, &ce->oid);
418420
}
419421
if (to_fetch.nr)
420422
fetch_objects(repository_format_partial_clone,
421423
to_fetch.oid, to_fetch.nr);
422-
fetch_if_missing = fetch_if_missing_store;
423424
oid_array_clear(&to_fetch);
424425
}
425426
for (i = 0; i < index->cache_nr; i++) {

0 commit comments

Comments
 (0)