Skip to content

Commit 95acf11

Browse files
jonathantanmygitster
authored andcommitted
diff: restrict when prefetching occurs
Commit 7fbbcb2 ("diff: batch fetching of missing blobs", 2019-04-08) optimized "diff" by prefetching blobs in a partial clone, but there are some cases wherein blobs do not need to be prefetched. In these cases, any command that uses the diff machinery will unnecessarily fetch blobs. diffcore_std() may read blobs when it calls the following functions: (1) diffcore_skip_stat_unmatch() (controlled by the config variable diff.autorefreshindex) (2) diffcore_break() and diffcore_merge_broken() (for break-rewrite detection) (3) diffcore_rename() (for rename detection) (4) diffcore_pickaxe() (for detecting addition/deletion of specified string) Instead of always prefetching blobs, teach diffcore_skip_stat_unmatch(), diffcore_break(), and diffcore_rename() to prefetch blobs upon the first read of a missing object. This covers (1), (2), and (3): to cover the rest, teach diffcore_std() to prefetch if the output type is one that includes blob data (and hence blob data will be required later anyway), or if it knows that (4) will be run. Helped-by: Jeff King <[email protected]> Signed-off-by: Jonathan Tan <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent c14b6f8 commit 95acf11

File tree

5 files changed

+181
-28
lines changed

5 files changed

+181
-28
lines changed

diff.c

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4034,10 +4034,18 @@ int diff_populate_filespec(struct repository *r,
40344034
*/
40354035
info.contentp = &s->data;
40364036

4037+
if (options && options->missing_object_cb) {
4038+
if (!oid_object_info_extended(r, &s->oid, &info,
4039+
OBJECT_INFO_LOOKUP_REPLACE |
4040+
OBJECT_INFO_SKIP_FETCH_OBJECT))
4041+
goto object_read;
4042+
options->missing_object_cb(options->missing_object_data);
4043+
}
40374044
if (oid_object_info_extended(r, &s->oid, &info,
40384045
OBJECT_INFO_LOOKUP_REPLACE))
40394046
die("unable to read %s", oid_to_hex(&s->oid));
40404047

4048+
object_read:
40414049
if (size_only || check_binary) {
40424050
if (size_only)
40434051
return 0;
@@ -6444,6 +6452,8 @@ static int diff_filespec_check_stat_unmatch(struct repository *r,
64446452
{
64456453
struct diff_populate_filespec_options dpf_options = {
64466454
.check_size_only = 1,
6455+
.missing_object_cb = diff_queued_diff_prefetch,
6456+
.missing_object_data = r,
64476457
};
64486458

64496459
if (p->done_skip_stat_unmatch)
@@ -6520,9 +6530,9 @@ void diffcore_fix_diff_index(void)
65206530
QSORT(q->queue, q->nr, diffnamecmp);
65216531
}
65226532

6523-
static void add_if_missing(struct repository *r,
6524-
struct oid_array *to_fetch,
6525-
const struct diff_filespec *filespec)
6533+
void diff_add_if_missing(struct repository *r,
6534+
struct oid_array *to_fetch,
6535+
const struct diff_filespec *filespec)
65266536
{
65276537
if (filespec && filespec->oid_valid &&
65286538
!S_ISGITLINK(filespec->mode) &&
@@ -6531,29 +6541,48 @@ static void add_if_missing(struct repository *r,
65316541
oid_array_append(to_fetch, &filespec->oid);
65326542
}
65336543

6534-
void diffcore_std(struct diff_options *options)
6544+
void diff_queued_diff_prefetch(void *repository)
65356545
{
6536-
if (options->repo == the_repository && has_promisor_remote()) {
6537-
/*
6538-
* Prefetch the diff pairs that are about to be flushed.
6539-
*/
6540-
int i;
6541-
struct diff_queue_struct *q = &diff_queued_diff;
6542-
struct oid_array to_fetch = OID_ARRAY_INIT;
6546+
struct repository *repo = repository;
6547+
int i;
6548+
struct diff_queue_struct *q = &diff_queued_diff;
6549+
struct oid_array to_fetch = OID_ARRAY_INIT;
65436550

6544-
for (i = 0; i < q->nr; i++) {
6545-
struct diff_filepair *p = q->queue[i];
6546-
add_if_missing(options->repo, &to_fetch, p->one);
6547-
add_if_missing(options->repo, &to_fetch, p->two);
6548-
}
6549-
/*
6550-
* NEEDSWORK: Consider deduplicating the OIDs sent.
6551-
*/
6552-
promisor_remote_get_direct(options->repo,
6553-
to_fetch.oid, to_fetch.nr);
6554-
oid_array_clear(&to_fetch);
6551+
for (i = 0; i < q->nr; i++) {
6552+
struct diff_filepair *p = q->queue[i];
6553+
diff_add_if_missing(repo, &to_fetch, p->one);
6554+
diff_add_if_missing(repo, &to_fetch, p->two);
65556555
}
65566556

6557+
/*
6558+
* NEEDSWORK: Consider deduplicating the OIDs sent.
6559+
*/
6560+
promisor_remote_get_direct(repo, to_fetch.oid, to_fetch.nr);
6561+
6562+
oid_array_clear(&to_fetch);
6563+
}
6564+
6565+
void diffcore_std(struct diff_options *options)
6566+
{
6567+
int output_formats_to_prefetch = DIFF_FORMAT_DIFFSTAT |
6568+
DIFF_FORMAT_NUMSTAT |
6569+
DIFF_FORMAT_PATCH |
6570+
DIFF_FORMAT_SHORTSTAT |
6571+
DIFF_FORMAT_DIRSTAT;
6572+
6573+
/*
6574+
* Check if the user requested a blob-data-requiring diff output and/or
6575+
* break-rewrite detection (which requires blob data). If yes, prefetch
6576+
* the diff pairs.
6577+
*
6578+
* If no prefetching occurs, diffcore_rename() will prefetch if it
6579+
* decides that it needs inexact rename detection.
6580+
*/
6581+
if (options->repo == the_repository && has_promisor_remote() &&
6582+
(options->output_format & output_formats_to_prefetch ||
6583+
options->pickaxe_opts & DIFF_PICKAXE_KINDS_MASK))
6584+
diff_queued_diff_prefetch(options->repo);
6585+
65576586
/* NOTE please keep the following in sync with diff_tree_combined() */
65586587
if (options->skip_stat_unmatch)
65596588
diffcore_skip_stat_unmatch(options);

diffcore-break.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "cache.h"
55
#include "diff.h"
66
#include "diffcore.h"
7+
#include "promisor-remote.h"
78

89
static int should_break(struct repository *r,
910
struct diff_filespec *src,
@@ -49,6 +50,8 @@ static int should_break(struct repository *r,
4950
unsigned long delta_size, max_size;
5051
unsigned long src_copied, literal_added, src_removed;
5152

53+
struct diff_populate_filespec_options options = { 0 };
54+
5255
*merge_score_p = 0; /* assume no deletion --- "do not break"
5356
* is the default.
5457
*/
@@ -62,8 +65,13 @@ static int should_break(struct repository *r,
6265
oideq(&src->oid, &dst->oid))
6366
return 0; /* they are the same */
6467

65-
if (diff_populate_filespec(r, src, NULL) ||
66-
diff_populate_filespec(r, dst, NULL))
68+
if (r == the_repository && has_promisor_remote()) {
69+
options.missing_object_cb = diff_queued_diff_prefetch;
70+
options.missing_object_data = r;
71+
}
72+
73+
if (diff_populate_filespec(r, src, &options) ||
74+
diff_populate_filespec(r, dst, &options))
6775
return 0; /* error but caught downstream */
6876

6977
max_size = ((src->size > dst->size) ? src->size : dst->size);

diffcore-rename.c

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/*
2+
*
23
* Copyright (C) 2005 Junio C Hamano
34
*/
45
#include "cache.h"
@@ -7,6 +8,7 @@
78
#include "object-store.h"
89
#include "hashmap.h"
910
#include "progress.h"
11+
#include "promisor-remote.h"
1012

1113
/* Table of rename/copy destinations */
1214

@@ -128,10 +130,46 @@ struct diff_score {
128130
short name_score;
129131
};
130132

133+
struct prefetch_options {
134+
struct repository *repo;
135+
int skip_unmodified;
136+
};
137+
static void prefetch(void *prefetch_options)
138+
{
139+
struct prefetch_options *options = prefetch_options;
140+
int i;
141+
struct oid_array to_fetch = OID_ARRAY_INIT;
142+
143+
for (i = 0; i < rename_dst_nr; i++) {
144+
if (rename_dst[i].pair)
145+
/*
146+
* The loop in diffcore_rename() will not need these
147+
* blobs, so skip prefetching.
148+
*/
149+
continue; /* already found exact match */
150+
diff_add_if_missing(options->repo, &to_fetch,
151+
rename_dst[i].two);
152+
}
153+
for (i = 0; i < rename_src_nr; i++) {
154+
if (options->skip_unmodified &&
155+
diff_unmodified_pair(rename_src[i].p))
156+
/*
157+
* The loop in diffcore_rename() will not need these
158+
* blobs, so skip prefetching.
159+
*/
160+
continue;
161+
diff_add_if_missing(options->repo, &to_fetch,
162+
rename_src[i].p->one);
163+
}
164+
promisor_remote_get_direct(options->repo, to_fetch.oid, to_fetch.nr);
165+
oid_array_clear(&to_fetch);
166+
}
167+
131168
static int estimate_similarity(struct repository *r,
132169
struct diff_filespec *src,
133170
struct diff_filespec *dst,
134-
int minimum_score)
171+
int minimum_score,
172+
int skip_unmodified)
135173
{
136174
/* src points at a file that existed in the original tree (or
137175
* optionally a file in the destination tree) and dst points
@@ -151,6 +189,12 @@ static int estimate_similarity(struct repository *r,
151189
struct diff_populate_filespec_options dpf_options = {
152190
.check_size_only = 1
153191
};
192+
struct prefetch_options prefetch_options = {r, skip_unmodified};
193+
194+
if (r == the_repository && has_promisor_remote()) {
195+
dpf_options.missing_object_cb = prefetch;
196+
dpf_options.missing_object_data = &prefetch_options;
197+
}
154198

155199
/* We deal only with regular files. Symlink renames are handled
156200
* only when they are exact matches --- in other words, no edits
@@ -190,9 +234,11 @@ static int estimate_similarity(struct repository *r,
190234
if (max_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
191235
return 0;
192236

193-
if (!src->cnt_data && diff_populate_filespec(r, src, NULL))
237+
dpf_options.check_size_only = 0;
238+
239+
if (!src->cnt_data && diff_populate_filespec(r, src, &dpf_options))
194240
return 0;
195-
if (!dst->cnt_data && diff_populate_filespec(r, dst, NULL))
241+
if (!dst->cnt_data && diff_populate_filespec(r, dst, &dpf_options))
196242
return 0;
197243

198244
if (diffcore_count_changes(r, src, dst,
@@ -569,7 +615,8 @@ void diffcore_rename(struct diff_options *options)
569615

570616
this_src.score = estimate_similarity(options->repo,
571617
one, two,
572-
minimum_score);
618+
minimum_score,
619+
skip_unmodified);
573620
this_src.name_score = basename_same(one, two);
574621
this_src.dst = i;
575622
this_src.src = j;

diffcore.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,22 @@ void free_filespec(struct diff_filespec *);
6565
void fill_filespec(struct diff_filespec *, const struct object_id *,
6666
int, unsigned short);
6767

68+
/*
69+
* Prefetch the entries in diff_queued_diff. The parameter is a pointer to a
70+
* struct repository.
71+
*/
72+
void diff_queued_diff_prefetch(void *repository);
73+
6874
struct diff_populate_filespec_options {
6975
unsigned check_size_only : 1;
7076
unsigned check_binary : 1;
77+
78+
/*
79+
* If an object is missing, diff_populate_filespec() will invoke this
80+
* callback before attempting to read that object again.
81+
*/
82+
void (*missing_object_cb)(void *);
83+
void *missing_object_data;
7184
};
7285
int diff_populate_filespec(struct repository *, struct diff_filespec *,
7386
const struct diff_populate_filespec_options *);
@@ -185,4 +198,12 @@ int diffcore_count_changes(struct repository *r,
185198
unsigned long *src_copied,
186199
unsigned long *literal_added);
187200

201+
/*
202+
* If filespec contains an OID and if that object is missing from the given
203+
* repository, add that OID to to_fetch.
204+
*/
205+
void diff_add_if_missing(struct repository *r,
206+
struct oid_array *to_fetch,
207+
const struct diff_filespec *filespec);
208+
188209
#endif

t/t4067-diff-partial-clone.sh

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,52 @@ test_expect_success 'diff with rename detection batches blobs' '
131131
test_line_count = 1 done_lines
132132
'
133133

134+
test_expect_success 'diff does not fetch anything if inexact rename detection is not needed' '
135+
test_when_finished "rm -rf server client trace" &&
136+
137+
test_create_repo server &&
138+
echo a >server/a &&
139+
printf "b\nb\nb\nb\nb\n" >server/b &&
140+
git -C server add a b &&
141+
git -C server commit -m x &&
142+
mv server/b server/c &&
143+
git -C server add c &&
144+
git -C server commit -a -m x &&
145+
146+
test_config -C server uploadpack.allowfilter 1 &&
147+
test_config -C server uploadpack.allowanysha1inwant 1 &&
148+
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
149+
150+
# Ensure no fetches.
151+
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --raw -M HEAD^ HEAD &&
152+
! test_path_exists trace
153+
'
154+
155+
test_expect_success 'diff --break-rewrites fetches only if necessary, and batches blobs if it does' '
156+
test_when_finished "rm -rf server client trace" &&
157+
158+
test_create_repo server &&
159+
echo a >server/a &&
160+
printf "b\nb\nb\nb\nb\n" >server/b &&
161+
git -C server add a b &&
162+
git -C server commit -m x &&
163+
printf "c\nc\nc\nc\nc\n" >server/b &&
164+
git -C server commit -a -m x &&
165+
166+
test_config -C server uploadpack.allowfilter 1 &&
167+
test_config -C server uploadpack.allowanysha1inwant 1 &&
168+
git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
169+
170+
# Ensure no fetches.
171+
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --raw -M HEAD^ HEAD &&
172+
! test_path_exists trace &&
173+
174+
# But with --break-rewrites, ensure that there is exactly 1 negotiation
175+
# by checking that there is only 1 "done" line sent. ("done" marks the
176+
# end of negotiation.)
177+
GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff --break-rewrites --raw -M HEAD^ HEAD &&
178+
grep "git> done" trace >done_lines &&
179+
test_line_count = 1 done_lines
180+
'
181+
134182
test_done

0 commit comments

Comments
 (0)