Skip to content

Commit 1343c89

Browse files
ttaylorrgitster
authored andcommitted
revision.c: consult Bloom filters for root commits
The commit-graph stores changed-path Bloom filters which represent the set of paths included in a tree-level diff between a commit's root tree and that of its parent. When a commit has no parents, the tree-diff is computed against that commit's root tree and the empty tree. In other words, every path in that commit's tree is stored in the Bloom filter (since they all appear in the diff). Consult these filters during pathspec-limited traversals in the function `rev_same_tree_as_empty()`. Doing so yields a performance improvement where we can avoid enumerating the full set of paths in a parentless commit's root tree when we know that the path(s) of interest were not listed in that commit's changed-path Bloom filter. Suggested-by: SZEDER Gábor <[email protected]> Original-patch-by: Jonathan Tan <[email protected]> Signed-off-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent f88611c commit 1343c89

File tree

2 files changed

+28
-6
lines changed

2 files changed

+28
-6
lines changed

revision.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -833,17 +833,28 @@ static int rev_compare_tree(struct rev_info *revs,
833833
return tree_difference;
834834
}
835835

836-
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
836+
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit,
837+
int nth_parent)
837838
{
838839
struct tree *t1 = repo_get_commit_tree(the_repository, commit);
840+
int bloom_ret = -1;
839841

840842
if (!t1)
841843
return 0;
842844

845+
if (!nth_parent && revs->bloom_keys_nr) {
846+
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
847+
if (!bloom_ret)
848+
return 1;
849+
}
850+
843851
tree_difference = REV_TREE_SAME;
844852
revs->pruning.flags.has_changes = 0;
845853
diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
846854

855+
if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
856+
count_bloom_filter_false_positive++;
857+
847858
return tree_difference == REV_TREE_SAME;
848859
}
849860

@@ -881,7 +892,7 @@ static int compact_treesame(struct rev_info *revs, struct commit *commit, unsign
881892
if (nth_parent != 0)
882893
die("compact_treesame %u", nth_parent);
883894
old_same = !!(commit->object.flags & TREESAME);
884-
if (rev_same_tree_as_empty(revs, commit))
895+
if (rev_same_tree_as_empty(revs, commit, nth_parent))
885896
commit->object.flags |= TREESAME;
886897
else
887898
commit->object.flags &= ~TREESAME;
@@ -977,7 +988,14 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
977988
return;
978989

979990
if (!commit->parents) {
980-
if (rev_same_tree_as_empty(revs, commit))
991+
/*
992+
* Pretend as if we are comparing ourselves to the
993+
* (non-existent) first parent of this commit object. Even
994+
* though no such parent exists, its changed-path Bloom filter
995+
* (if one exists) is relative to the empty tree, using Bloom
996+
* filters is allowed here.
997+
*/
998+
if (rev_same_tree_as_empty(revs, commit, 0))
981999
commit->object.flags |= TREESAME;
9821000
return;
9831001
}
@@ -1058,7 +1076,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
10581076

10591077
case REV_TREE_NEW:
10601078
if (revs->remove_empty_trees &&
1061-
rev_same_tree_as_empty(revs, p)) {
1079+
rev_same_tree_as_empty(revs, p, nth_parent)) {
10621080
/* We are adding all the specified
10631081
* paths from this parent, so the
10641082
* history beyond this parent is not

t/t4216-log-bloom.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,11 @@ test_bloom_filters_not_used () {
8888
# if the Bloom filter system is initialized, ensure that no
8989
# filters were used
9090
data="statistics:{"
91-
data="$data\"filter_not_present\":0,"
91+
# unusable filters (e.g., those computed with a
92+
# different value of commitGraph.changedPathsVersion)
93+
# are counted in the filter_not_present bucket, so any
94+
# value is OK there.
95+
data="$data\"filter_not_present\":[0-9][0-9]*,"
9296
data="$data\"maybe\":0,"
9397
data="$data\"definitely_not\":0,"
9498
data="$data\"false_positive\":0}"
@@ -175,7 +179,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
175179

176180
test_bloom_filters_used_when_some_filters_are_missing () {
177181
log_args=$1
178-
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":9"
182+
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":10"
179183
setup "$log_args" &&
180184
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
181185
test_cmp log_wo_bloom log_w_bloom

0 commit comments

Comments
 (0)