Skip to content

Commit e369698

Browse files
derrickstoleegitster
authored andcommitted
diff: halt tree-diff early after max_changes
When computing the changed-paths bloom filters for the commit-graph, we limit the size of the filter by restricting the number of paths in the diff. Instead of computing a large diff and then ignoring the result, it is better to halt the diff computation early. Create a new "max_changes" option in struct diff_options. If non-zero, then halt the diff computation after discovering strictly more changed paths. This includes paths corresponding to trees that change. Use this max_changes option in the bloom filter calculations. This reduces the time taken to compute the filters for the Linux kernel repo from 2m50s to 2m35s. On a large internal repository with ~500 commits that perform tree-wide changes, the time reduced from 6m15s to 3m48s. Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Garima Singh <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent ed591fe commit e369698

File tree

3 files changed

+14
-1
lines changed

3 files changed

+14
-1
lines changed

bloom.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
133133
struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
134134
int i;
135135
struct diff_options diffopt;
136+
int max_changes = 512;
136137

137138
if (bloom_filters.slab_size == 0)
138139
return NULL;
@@ -141,6 +142,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
141142

142143
repo_diff_setup(r, &diffopt);
143144
diffopt.flags.recursive = 1;
145+
diffopt.max_changes = max_changes;
144146
diff_setup_done(&diffopt);
145147

146148
if (c->parents)
@@ -149,7 +151,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
149151
diff_tree_oid(NULL, &c->object.oid, "", &diffopt);
150152
diffcore_std(&diffopt);
151153

152-
if (diff_queued_diff.nr <= 512) {
154+
if (diff_queued_diff.nr <= max_changes) {
153155
struct hashmap pathmap;
154156
struct pathmap_hash_entry *e;
155157
struct hashmap_iter iter;

diff.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,11 @@ struct diff_options {
285285
/* Number of hexdigits to abbreviate raw format output to. */
286286
int abbrev;
287287

288+
/* If non-zero, then stop computing after this many changes. */
289+
int max_changes;
290+
/* For internal use only. */
291+
int num_changes;
292+
288293
int ita_invisible_in_index;
289294
/* white-space error highlighting */
290295
#define WSEH_NEW (1<<12)

tree-diff.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,9 @@ static struct combine_diff_path *ll_diff_tree_paths(
434434
if (diff_can_quit_early(opt))
435435
break;
436436

437+
if (opt->max_changes && opt->num_changes > opt->max_changes)
438+
break;
439+
437440
if (opt->pathspec.nr) {
438441
skip_uninteresting(&t, base, opt);
439442
for (i = 0; i < nparent; i++)
@@ -518,6 +521,7 @@ static struct combine_diff_path *ll_diff_tree_paths(
518521

519522
/* t↓ */
520523
update_tree_entry(&t);
524+
opt->num_changes++;
521525
}
522526

523527
/* t > p[imin] */
@@ -535,6 +539,7 @@ static struct combine_diff_path *ll_diff_tree_paths(
535539
skip_emit_tp:
536540
/* ∀ pi=p[imin] pi↓ */
537541
update_tp_entries(tp, nparent);
542+
opt->num_changes++;
538543
}
539544
}
540545

@@ -552,6 +557,7 @@ struct combine_diff_path *diff_tree_paths(
552557
const struct object_id **parents_oid, int nparent,
553558
struct strbuf *base, struct diff_options *opt)
554559
{
560+
opt->num_changes = 0;
555561
p = ll_diff_tree_paths(p, oid, parents_oid, nparent, base, opt);
556562

557563
/*

0 commit comments

Comments
 (0)